From 01e10c9fef51c69ecf3060929c62d113cfc672b9 Mon Sep 17 00:00:00 2001 From: gatorsmile <gatorsmile@gmail.com> Date: Tue, 23 Feb 2016 15:16:59 +0800 Subject: [PATCH] [SPARK-13236] SQL Generation for Set Operations This PR is to implement SQL generation for the following three set operations: - Union Distinct - Intersect - Except liancheng Thanks! Author: gatorsmile <gatorsmile@gmail.com> Author: xiaoli <lixiao1983@gmail.com> Author: Xiao Li <xiaoli@Xiaos-MacBook-Pro.local> Closes #11195 from gatorsmile/setOpSQLGen. --- .../apache/spark/sql/hive/SQLBuilder.scala | 14 +++++++++-- .../sql/hive/LogicalPlanToSQLSuite.scala | 24 ++++++++++++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala index 32f17f41c9..e66cc127ea 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation * supported by this builder (yet). */ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Logging { - require(logicalPlan.resolved, "SQLBuilder only supports resloved logical query plans") + require(logicalPlan.resolved, "SQLBuilder only supports resolved logical query plans") def this(df: DataFrame) = this(df.queryExecution.analyzed, df.sqlContext) @@ -98,10 +98,20 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi } build(toSQL(p.child), whereOrHaving, p.condition.sql) + case p @ Distinct(u: Union) if u.children.length > 1 => + val childrenSql = u.children.map(c => s"(${toSQL(c)})") + childrenSql.mkString(" UNION DISTINCT ") + case p: Union if p.children.length > 1 => - val childrenSql = p.children.map(toSQL(_)) + val childrenSql = p.children.map(c => s"(${toSQL(c)})") childrenSql.mkString(" UNION ALL ") + case p: Intersect => + build("(" + toSQL(p.left), ") INTERSECT (", toSQL(p.right) + ")") + + case p: Except => + build("(" + toSQL(p.left), ") EXCEPT (", toSQL(p.right) + ")") + case p: SubqueryAlias => p.child match { // Persisted data source relation diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala index b162adf215..28559eac8d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala @@ -114,6 +114,27 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { checkHiveQl("SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM t0") } + test("union distinct") { + checkHiveQl("SELECT * FROM t0 UNION SELECT * FROM t0") + } + + // Parser is unable to parse the following query: + // SELECT `u_1`.`id` + // FROM (((SELECT `t0`.`id` FROM `default`.`t0`) + // UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) + // UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) AS u_1 + test("three-child union") { + checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0") + } + + test("intersect") { + checkHiveQl("SELECT * FROM t0 INTERSECT SELECT * FROM t0") + } + + test("except") { + checkHiveQl("SELECT * FROM t0 EXCEPT SELECT * FROM t0") + } + test("self join") { checkHiveQl("SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key") } @@ -122,9 +143,6 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { checkHiveQl("SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key group by x.key") } - test("three-child union") { - checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0") - } test("case") { checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END FROM t0") -- GitLab