From cd2fed70129ba601f8c849a93eeb44a5d69c2402 Mon Sep 17 00:00:00 2001 From: Yong Tang <yong.tang.github@outlook.com> Date: Sat, 9 Apr 2016 13:54:30 -0700 Subject: [PATCH] [SPARK-14335][SQL] Describe function command returns wrong output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What changes were proposed in this pull request? …because some of built-in functions are not in function registry. This fix tries to fix issues in `describe function` command where some of the outputs still shows Hive's function because some built-in functions are not in FunctionRegistry. The following built-in functions have been added to FunctionRegistry: ``` - ! * / & % ^ + < <= <=> = == > >= | ~ and in like not or rlike when ``` The following listed functions are not added, but hard coded in `commands.scala` (hvanhovell): ``` != <> between case ``` Below are the existing result of the above functions that have not been added: ``` spark-sql> describe function `!=`; Function: <> Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual Usage: a <> b - Returns TRUE if a is not equal to b ``` ``` spark-sql> describe function `<>`; Function: <> Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual Usage: a <> b - Returns TRUE if a is not equal to b ``` ``` spark-sql> describe function `between`; Function: between Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween Usage: between a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c ``` ``` spark-sql> describe function `case`; Function: case Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - When a = b, returns c; when a = d, return e; else return f ``` ## How was this patch tested? Existing tests passed. Additional test cases added. Author: Yong Tang <yong.tang.github@outlook.com> Closes #12128 from yongtang/SPARK-14335. --- .../catalyst/analysis/FunctionRegistry.scala | 33 +++++++++++++- .../sql/execution/command/commands.scala | 44 +++++++++++++------ .../sql/hive/execution/SQLQuerySuite.scala | 30 ++++++++++--- 3 files changed, 86 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index f239b33e44..f2abf136da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -171,6 +171,7 @@ object FunctionRegistry { expression[Rand]("rand"), expression[Randn]("randn"), expression[CreateStruct]("struct"), + expression[CaseWhen]("when"), // math functions expression[Acos]("acos"), @@ -217,6 +218,12 @@ object FunctionRegistry { expression[Tan]("tan"), expression[Tanh]("tanh"), + expression[Add]("+"), + expression[Subtract]("-"), + expression[Multiply]("*"), + expression[Divide]("/"), + expression[Remainder]("%"), + // aggregate functions expression[HyperLogLogPlusPlus]("approx_count_distinct"), expression[Average]("avg"), @@ -257,6 +264,7 @@ object FunctionRegistry { expression[Lower]("lcase"), expression[Length]("length"), expression[Levenshtein]("levenshtein"), + expression[Like]("like"), expression[Lower]("lower"), expression[StringLocate]("locate"), expression[StringLPad]("lpad"), @@ -267,6 +275,7 @@ object FunctionRegistry { expression[RegExpReplace]("regexp_replace"), expression[StringRepeat]("repeat"), expression[StringReverse]("reverse"), + expression[RLike]("rlike"), expression[StringRPad]("rpad"), expression[StringTrimRight]("rtrim"), expression[SoundEx]("soundex"), @@ -343,7 +352,29 @@ object FunctionRegistry { expression[NTile]("ntile"), expression[Rank]("rank"), expression[DenseRank]("dense_rank"), - expression[PercentRank]("percent_rank") + expression[PercentRank]("percent_rank"), + + // predicates + expression[And]("and"), + expression[In]("in"), + expression[Not]("not"), + expression[Or]("or"), + + expression[EqualNullSafe]("<=>"), + expression[EqualTo]("="), + expression[EqualTo]("=="), + expression[GreaterThan](">"), + expression[GreaterThanOrEqual](">="), + expression[LessThan]("<"), + expression[LessThanOrEqual]("<="), + expression[Not]("!"), + + // bitwise + expression[BitwiseAnd]("&"), + expression[BitwiseNot]("~"), + expression[BitwiseOr]("|"), + expression[BitwiseXor]("^") + ) val builtin: SimpleFunctionRegistry = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala index 3fd2a93d29..5d00c805a6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala @@ -483,20 +483,38 @@ case class DescribeFunction( } override def run(sqlContext: SQLContext): Seq[Row] = { - sqlContext.sessionState.functionRegistry.lookupFunction(functionName) match { - case Some(info) => - val result = - Row(s"Function: ${info.getName}") :: - Row(s"Class: ${info.getClassName}") :: - Row(s"Usage: ${replaceFunctionName(info.getUsage(), info.getName)}") :: Nil - - if (isExtended) { - result :+ Row(s"Extended Usage:\n${replaceFunctionName(info.getExtended, info.getName)}") - } else { - result - } + // Hard code "<>", "!=", "between", and "case" for now as there is no corresponding functions. + functionName.toLowerCase match { + case "<>" => + Row(s"Function: $functionName") :: + Row(s"Usage: a <> b - Returns TRUE if a is not equal to b") :: Nil + case "!=" => + Row(s"Function: $functionName") :: + Row(s"Usage: a != b - Returns TRUE if a is not equal to b") :: Nil + case "between" => + Row(s"Function: between") :: + Row(s"Usage: a [NOT] BETWEEN b AND c - " + + s"evaluate if a is [not] in between b and c") :: Nil + case "case" => + Row(s"Function: case") :: + Row(s"Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - " + + s"When a = b, returns c; when a = d, return e; else return f") :: Nil + case _ => sqlContext.sessionState.functionRegistry.lookupFunction(functionName) match { + case Some(info) => + val result = + Row(s"Function: ${info.getName}") :: + Row(s"Class: ${info.getClassName}") :: + Row(s"Usage: ${replaceFunctionName(info.getUsage(), info.getName)}") :: Nil + + if (isExtended) { + result :+ + Row(s"Extended Usage:\n${replaceFunctionName(info.getExtended, info.getName)}") + } else { + result + } - case None => Seq(Row(s"Function: $functionName not found.")) + case None => Seq(Row(s"Function: $functionName not found.")) + } } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 14a1d4cd30..d7ec85c15d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -203,8 +203,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { checkAnswer(sql("SHOW functions abc.abs"), Row("abs")) checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs")) checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs")) - // TODO: Re-enable this test after we fix SPARK-14335. - // checkAnswer(sql("SHOW functions `~`"), Row("~")) + checkAnswer(sql("SHOW functions `~`"), Row("~")) checkAnswer(sql("SHOW functions `a function doens't exist`"), Nil) checkAnswer(sql("SHOW functions `weekofyea*`"), Row("weekofyear")) // this probably will failed if we add more function with `sha` prefixing. @@ -236,11 +235,28 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { checkExistence(sql("describe functioN abcadf"), true, "Function: abcadf not found.") - // TODO: Re-enable this test after we fix SPARK-14335. - // checkExistence(sql("describe functioN `~`"), true, - // "Function: ~", - // "Class: org.apache.hadoop.hive.ql.udf.UDFOPBitNot", - // "Usage: ~ n - Bitwise not") + checkExistence(sql("describe functioN `~`"), true, + "Function: ~", + "Class: org.apache.spark.sql.catalyst.expressions.BitwiseNot", + "Usage: To be added.") + + // Hard coded describe functions + checkExistence(sql("describe function `<>`"), true, + "Function: <>", + "Usage: a <> b - Returns TRUE if a is not equal to b") + + checkExistence(sql("describe function `!=`"), true, + "Function: !=", + "Usage: a != b - Returns TRUE if a is not equal to b") + + checkExistence(sql("describe function `between`"), true, + "Function: between", + "Usage: a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c") + + checkExistence(sql("describe function `case`"), true, + "Function: case", + "Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - " + + "When a = b, returns c; when a = d, return e; else return f") } test("SPARK-5371: union with null and sum") { -- GitLab