Skip to content
Snippets Groups Projects
Commit 2eaabfa4 authored by Dongjoon Hyun's avatar Dongjoon Hyun Committed by Reynold Xin
Browse files

[SPARK-16228][SQL] HiveSessionCatalog should return `double`-param functions...

[SPARK-16228][SQL] HiveSessionCatalog should return `double`-param functions for decimal param lookups

## What changes were proposed in this pull request?

This PR supports a fallback lookup by casting `DecimalType` into `DoubleType` for the external functions with `double`-type parameter.

**Reported Error Scenarios**
```scala
scala> sql("select percentile(value, 0.5) from values 1,2,3 T(value)")
org.apache.spark.sql.AnalysisException: ... No matching method for class org.apache.hadoop.hive.ql.udf.UDAFPercentile with (int, decimal(38,18)). Possible choices: _FUNC_(bigint, array<double>)  _FUNC_(bigint, double)  ; line 1 pos 7

scala> sql("select percentile_approx(value, 0.5) from values 1.0,2.0,3.0 T(value)")
org.apache.spark.sql.AnalysisException: ... Only a float/double or float/double array argument is accepted as parameter 2, but decimal(38,18) was passed instead.; line 1 pos 7
```

## How was this patch tested?

Pass the Jenkins tests (including a new testcase).

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #13930 from dongjoon-hyun/SPARK-16228.
parent 23c58653
No related branches found
No related tags found
No related merge requests found
......@@ -30,12 +30,13 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.catalog.{FunctionResourceLoader, SessionCatalog}
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, ExpressionInfo}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
import org.apache.spark.sql.hive.client.HiveClient
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DecimalType, DoubleType}
import org.apache.spark.util.Utils
......@@ -163,6 +164,19 @@ private[sql] class HiveSessionCatalog(
}
override def lookupFunction(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
try {
lookupFunction0(name, children)
} catch {
case NonFatal(_) =>
// SPARK-16228 ExternalCatalog may recognize `double`-type only.
val newChildren = children.map { child =>
if (child.dataType.isInstanceOf[DecimalType]) Cast(child, DoubleType) else child
}
lookupFunction0(name, newChildren)
}
}
private def lookupFunction0(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
// TODO: Once lookupFunction accepts a FunctionIdentifier, we should refactor this method to
// if (super.functionExists(name)) {
// super.lookupFunction(name, children)
......
......@@ -142,6 +142,13 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
sql("SELECT array(max(key), max(key)) FROM src").collect().toSeq)
}
test("SPARK-16228 Percentile needs explicit cast to double") {
sql("select percentile(value, cast(0.5 as double)) from values 1,2,3 T(value)")
sql("select percentile_approx(value, cast(0.5 as double)) from values 1.0,2.0,3.0 T(value)")
sql("select percentile(value, 0.5) from values 1,2,3 T(value)")
sql("select percentile_approx(value, 0.5) from values 1.0,2.0,3.0 T(value)")
}
test("Generic UDAF aggregates") {
checkAnswer(sql("SELECT ceiling(percentile_approx(key, 0.99999D)) FROM src LIMIT 1"),
sql("SELECT max(key) FROM src LIMIT 1").collect().toSeq)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment