From 01e10c9fef51c69ecf3060929c62d113cfc672b9 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Tue, 23 Feb 2016 15:16:59 +0800
Subject: [PATCH] [SPARK-13236] SQL Generation for Set Operations

This PR is to implement SQL generation for the following three set operations:
- Union Distinct
- Intersect
- Except

liancheng Thanks!

Author: gatorsmile <gatorsmile@gmail.com>
Author: xiaoli <lixiao1983@gmail.com>
Author: Xiao Li <xiaoli@Xiaos-MacBook-Pro.local>

Closes #11195 from gatorsmile/setOpSQLGen.
---
 .../apache/spark/sql/hive/SQLBuilder.scala    | 14 +++++++++--
 .../sql/hive/LogicalPlanToSQLSuite.scala      | 24 ++++++++++++++++---
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
index 32f17f41c9..e66cc127ea 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation
  * supported by this builder (yet).
  */
 class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Logging {
-  require(logicalPlan.resolved, "SQLBuilder only supports resloved logical query plans")
+  require(logicalPlan.resolved, "SQLBuilder only supports resolved logical query plans")
 
   def this(df: DataFrame) = this(df.queryExecution.analyzed, df.sqlContext)
 
@@ -98,10 +98,20 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
       }
       build(toSQL(p.child), whereOrHaving, p.condition.sql)
 
+    case p @ Distinct(u: Union) if u.children.length > 1 =>
+      val childrenSql = u.children.map(c => s"(${toSQL(c)})")
+      childrenSql.mkString(" UNION DISTINCT ")
+
     case p: Union if p.children.length > 1 =>
-      val childrenSql = p.children.map(toSQL(_))
+      val childrenSql = p.children.map(c => s"(${toSQL(c)})")
       childrenSql.mkString(" UNION ALL ")
 
+    case p: Intersect =>
+      build("(" + toSQL(p.left), ") INTERSECT (", toSQL(p.right) + ")")
+
+    case p: Except =>
+      build("(" + toSQL(p.left), ") EXCEPT (", toSQL(p.right) + ")")
+
     case p: SubqueryAlias =>
       p.child match {
         // Persisted data source relation
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
index b162adf215..28559eac8d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
@@ -114,6 +114,27 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
     checkHiveQl("SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM t0")
   }
 
+  test("union distinct") {
+    checkHiveQl("SELECT * FROM t0 UNION SELECT * FROM t0")
+  }
+
+  // Parser is unable to parse the following query:
+  // SELECT  `u_1`.`id`
+  // FROM (((SELECT  `t0`.`id` FROM `default`.`t0`)
+  // UNION ALL (SELECT  `t0`.`id` FROM `default`.`t0`))
+  // UNION ALL (SELECT  `t0`.`id` FROM `default`.`t0`)) AS u_1
+  test("three-child union") {
+    checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0")
+  }
+
+  test("intersect") {
+    checkHiveQl("SELECT * FROM t0 INTERSECT SELECT * FROM t0")
+  }
+
+  test("except") {
+    checkHiveQl("SELECT * FROM t0 EXCEPT SELECT * FROM t0")
+  }
+
   test("self join") {
     checkHiveQl("SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key")
   }
@@ -122,9 +143,6 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
     checkHiveQl("SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key group by x.key")
   }
 
-  test("three-child union") {
-    checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0")
-  }
 
   test("case") {
     checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END FROM t0")
-- 
GitLab