Skip to content
Snippets Groups Projects
Commit 339b53a1 authored by Cheng Lian's avatar Cheng Lian
Browse files

[SPARK-19737][SQL] New analysis rule for reporting unregistered functions...

[SPARK-19737][SQL] New analysis rule for reporting unregistered functions without relying on relation resolution

## What changes were proposed in this pull request?

This PR adds a new `Once` analysis rule batch consists of a single analysis rule `LookupFunctions` that performs simple existence check over `UnresolvedFunctions` without actually resolving them.

The benefit of this rule is that it doesn't require function arguments to be resolved first and therefore doesn't rely on relation resolution, which may incur potentially expensive partition/schema discovery cost.

Please refer to [SPARK-19737][1] for more details about the motivation.

## How was this patch tested?

New test case added in `AnalysisErrorSuite`.

[1]: https://issues.apache.org/jira/browse/SPARK-19737

Author: Cheng Lian <lian@databricks.com>

Closes #17168 from liancheng/spark-19737-lookup-functions.
parent 2a0bc867
No related branches found
No related tags found
No related merge requests found
......@@ -117,6 +117,8 @@ class Analyzer(
Batch("Hints", fixedPoint,
new ResolveHints.ResolveBroadcastHints(conf),
ResolveHints.RemoveAllHints),
Batch("Simple Sanity Check", Once,
LookupFunctions),
Batch("Substitution", fixedPoint,
CTESubstitution,
WindowsSubstitution,
......@@ -1038,6 +1040,25 @@ class Analyzer(
}
}
/**
* Checks whether a function identifier referenced by an [[UnresolvedFunction]] is defined in the
* function registry. Note that this rule doesn't try to resolve the [[UnresolvedFunction]]. It
* only performs simple existence check according to the function identifier to quickly identify
* undefined functions without triggering relation resolution, which may incur potentially
* expensive partition/schema discovery process in some cases.
*
* @see [[ResolveFunctions]]
* @see https://issues.apache.org/jira/browse/SPARK-19737
*/
object LookupFunctions extends Rule[LogicalPlan] {
override def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressions {
case f: UnresolvedFunction if !catalog.functionExists(f.name) =>
withPosition(f) {
throw new NoSuchFunctionException(f.name.database.getOrElse("default"), f.name.funcName)
}
}
}
/**
* Replaces [[UnresolvedFunction]]s with concrete [[Expression]]s.
*/
......
......@@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.catalog
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.{FunctionIdentifier, SimpleCatalystConf, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
......@@ -1196,4 +1196,25 @@ class SessionCatalogSuite extends PlanTest {
catalog.listFunctions("unknown_db", "func*")
}
}
test("SPARK-19737: detect undefined functions without triggering relation resolution") {
import org.apache.spark.sql.catalyst.dsl.plans._
Seq(true, false) foreach { caseSensitive =>
val conf = SimpleCatalystConf(caseSensitive)
val catalog = new SessionCatalog(newBasicCatalog(), new SimpleFunctionRegistry, conf)
val analyzer = new Analyzer(catalog, conf)
// The analyzer should report the undefined function rather than the undefined table first.
val cause = intercept[AnalysisException] {
analyzer.execute(
UnresolvedRelation(TableIdentifier("undefined_table")).select(
UnresolvedFunction("undefined_fn", Nil, isDistinct = false)
)
)
}
assert(cause.getMessage.contains("Undefined function: 'undefined_fn'"))
}
}
}
......@@ -199,6 +199,11 @@ private[sql] class HiveSessionCatalog(
}
}
// TODO Removes this method after implementing Spark native "histogram_numeric".
override def functionExists(name: FunctionIdentifier): Boolean = {
super.functionExists(name) || hiveFunctions.contains(name.funcName)
}
/** List of functions we pass over to Hive. Note that over time this list should go to 0. */
// We have a list of Hive built-in functions that we do not support. So, we will check
// Hive's function registry and lazily load needed functions into our own function registry.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment