Skip to content
Snippets Groups Projects
Commit 3aa4e464 authored by Takeshi Yamamuro's avatar Takeshi Yamamuro Committed by Xiao Li
Browse files

[SPARK-20416][SQL] Print UDF names in EXPLAIN

## What changes were proposed in this pull request?
This pr added `withName` in `UserDefinedFunction` for printing UDF names in EXPLAIN

## How was this patch tested?
Added tests in `UDFSuite`.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #17712 from maropu/SPARK-20416.
parent 8c67aa7f
No related branches found
No related tags found
No related merge requests found
......@@ -114,7 +114,7 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
val bucketizer: UserDefinedFunction = udf { (feature: Double) =>
Bucketizer.binarySearchForBuckets($(splits), feature, keepInvalid)
}
}.withName("bucketizer")
val newCol = bucketizer(filteredDataset($(inputCol)).cast(DoubleType))
val newField = prepOutputField(filteredDataset.schema)
......
......@@ -47,6 +47,7 @@ case class UserDefinedFunction protected[sql] (
dataType: DataType,
inputTypes: Option[Seq[DataType]]) {
private var _nameOption: Option[String] = None
private var _nullable: Boolean = true
/**
......@@ -67,15 +68,27 @@ case class UserDefinedFunction protected[sql] (
dataType,
exprs.map(_.expr),
inputTypes.getOrElse(Nil),
udfName = _nameOption,
nullable = _nullable))
}
private def copyAll(): UserDefinedFunction = {
val udf = copy()
udf._nameOption = _nameOption
udf._nullable = _nullable
udf
}
/**
* Updates UserDefinedFunction with a given name.
*
* @since 2.3.0
*/
def withName(name: String): this.type = {
this._nameOption = Option(name)
this
}
/**
* Updates UserDefinedFunction with a given nullability.
*
......
......@@ -263,10 +263,12 @@ class UDFSuite extends QueryTest with SharedSQLContext {
val sparkPlan = spark.sessionState.executePlan(explain).executedPlan
sparkPlan.executeCollect().map(_.getString(0).trim).headOption.getOrElse("")
}
val udf1 = "myUdf1"
val udf2 = "myUdf2"
spark.udf.register(udf1, (n: Int) => { n + 1 })
spark.udf.register(udf2, (n: Int) => { n * 1 })
assert(explainStr(sql("SELECT myUdf1(myUdf2(1))")).contains(s"UDF:$udf1(UDF:$udf2(1))"))
val udf1Name = "myUdf1"
val udf2Name = "myUdf2"
val udf1 = spark.udf.register(udf1Name, (n: Int) => n + 1)
val udf2 = spark.udf.register(udf2Name, (n: Int) => n * 1)
assert(explainStr(sql("SELECT myUdf1(myUdf2(1))")).contains(s"UDF:$udf1Name(UDF:$udf2Name(1))"))
assert(explainStr(spark.range(1).select(udf1(udf2(functions.lit(1)))))
.contains(s"UDF:$udf1Name(UDF:$udf2Name(1))"))
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment