[SPARK-19338][SQL] Add UDF names in explain

## What changes were proposed in this pull request? This pr added a variable for a UDF name in `ScalaUDF`. Then, if the variable filled, `DataFrame#explain` prints the name. ## How was this patch tested? Added a test in `UDFSuite`. Author: Takeshi YAMAMURO <linguin.m.s@gmail.com> Closes #16707 from maropu/SPARK-19338. (cherry picked from commit 9f523d31) Signed-off-by: gatorsmile <gatorsmile@gmail.com>

[SPARK-19338][SQL] Add UDF names in explain
b12a76a4 · Takeshi YAMAMURO · gatorsmile · 0d7e3852 · b12a76a4 · b12a76a4
Commit b12a76a4 authored 8 years ago by Takeshi YAMAMURO Committed by gatorsmile 8 years ago
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1904,7 +1904,7 @@ class Analyzer(

      case p => p transformExpressionsUp {

-        case udf @ ScalaUDF(func, _, inputs, _) =>
+        case udf @ ScalaUDF(func, _, inputs, _, _) =>
          val parameterTypes = ScalaReflection.getParameterTypes(func)
          assert(parameterTypes.length == inputs.length)


--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -35,17 +35,20 @@ import org.apache.spark.sql.types.DataType
 *                    not want to perform coercion, simply use "Nil". Note that it would've been
 *                    better to use Option of Seq[DataType] so we can use "None" as the case for no
 *                    type coercion. However, that would require more refactoring of the codebase.
+ * @param udfName   The user-specified name of this UDF.
 */
 case class ScalaUDF(
    function: AnyRef,
    dataType: DataType,
    children: Seq[Expression],
-    inputTypes: Seq[DataType] = Nil)
+    inputTypes: Seq[DataType] = Nil,
+    udfName: Option[String] = None)
  extends Expression with ImplicitCastInputTypes with NonSQLExpression {

  override def nullable: Boolean = true

-  override def toString: String = s"UDF(${children.mkString(", ")})"
+  override def toString: String =
+    s"${udfName.map(name => s"UDF:$name").getOrElse("UDF")}(${children.mkString(", ")})"

  // scalastyle:off line.size.limit


--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -17,6 +17,7 @@

 package org.apache.spark.sql

+import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.test.SQLTestData._

@@ -248,4 +249,17 @@ class UDFSuite extends QueryTest with SharedSQLContext {
      sql("SELECT tmp.t.* FROM (SELECT testDataFunc(a, b) AS t from testData2) tmp").toDF(),
      testData2)
  }
+
+  test("SPARK-19338 Provide identical names for UDFs in the EXPLAIN output") {
+    def explainStr(df: DataFrame): String = {
+      val explain = ExplainCommand(df.queryExecution.logical, extended = false)
+      val sparkPlan = spark.sessionState.executePlan(explain).executedPlan
+      sparkPlan.executeCollect().map(_.getString(0).trim).headOption.getOrElse("")
+    }
+    val udf1 = "myUdf1"
+    val udf2 = "myUdf2"
+    spark.udf.register(udf1, (n: Int) => { n + 1 })
+    spark.udf.register(udf2, (n: Int) => { n * 1 })
+    assert(explainStr(sql("SELECT myUdf1(myUdf2(1))")).contains(s"UDF:$udf1(UDF:$udf2(1))"))
+  }
 }