From 98e9419784a9ad5096cfd563fa9a433786a90bd4 Mon Sep 17 00:00:00 2001 From: ravipesala <ravindra.pesala@huawei.com> Date: Thu, 20 Nov 2014 15:34:03 -0800 Subject: [PATCH] [SPARK-4513][SQL] Support relational operator '<=>' in Spark SQL The relational operator '<=>' is not working in Spark SQL. Same works in Spark HiveQL Author: ravipesala <ravindra.pesala@huawei.com> Closes #3387 from ravipesala/<=> and squashes the following commits: 7198e90 [ravipesala] Supporting relational operator '<=>' in Spark SQL --- .../apache/spark/sql/catalyst/SparkSQLParser.scala | 2 +- .../org/apache/spark/sql/catalyst/SqlParser.scala | 1 + .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 12 ++++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala index f5c19ee69c..b198ed9936 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala @@ -61,7 +61,7 @@ class SqlLexical(val keywords: Seq[String]) extends StdLexical { delimiters += ( "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")", - ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~" + ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>" ) override lazy val token: Parser[Token] = diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index affef276c2..dc1d349f10 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -234,6 +234,7 @@ class SqlParser extends AbstractSparkSQLParser { | termExpression ~ (">=" ~> termExpression) ^^ { case e1 ~ e2 => GreaterThanOrEqual(e1, e2) } | termExpression ~ ("!=" ~> termExpression) ^^ { case e1 ~ e2 => Not(EqualTo(e1, e2)) } | termExpression ~ ("<>" ~> termExpression) ^^ { case e1 ~ e2 => Not(EqualTo(e1, e2)) } + | termExpression ~ ("<=>" ~> termExpression) ^^ { case e1 ~ e2 => EqualNullSafe(e1, e2) } | termExpression ~ NOT.? ~ (BETWEEN ~> termExpression) ~ (AND ~> termExpression) ^^ { case e ~ not ~ el ~ eu => val betweenExpr: Expression = And(GreaterThanOrEqual(e, el), LessThanOrEqual(e, eu)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index a63515464c..0a96831c76 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -973,4 +973,16 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { checkAnswer(sql("SELECT a.b + 1 FROM data GROUP BY a.b + 1"), 2) dropTempTable("data") } + + test("Supporting relational operator '<=>' in Spark SQL") { + val nullCheckData1 = TestData(1,"1") :: TestData(2,null) :: Nil + val rdd1 = sparkContext.parallelize((0 to 1).map(i => nullCheckData1(i))) + rdd1.registerTempTable("nulldata1") + val nullCheckData2 = TestData(1,"1") :: TestData(2,null) :: Nil + val rdd2 = sparkContext.parallelize((0 to 1).map(i => nullCheckData2(i))) + rdd2.registerTempTable("nulldata2") + checkAnswer(sql("SELECT nulldata1.key FROM nulldata1 join " + + "nulldata2 on nulldata1.value <=> nulldata2.value"), + (1 to 2).map(i => Seq(i))) + } } -- GitLab