Skip to content
Snippets Groups Projects
Commit 01e10c9f authored by gatorsmile's avatar gatorsmile Committed by Cheng Lian
Browse files

[SPARK-13236] SQL Generation for Set Operations

This PR is to implement SQL generation for the following three set operations:
- Union Distinct
- Intersect
- Except

liancheng Thanks!

Author: gatorsmile <gatorsmile@gmail.com>
Author: xiaoli <lixiao1983@gmail.com>
Author: Xiao Li <xiaoli@Xiaos-MacBook-Pro.local>

Closes #11195 from gatorsmile/setOpSQLGen.
parent 9dd5399d
No related branches found
No related tags found
No related merge requests found
...@@ -38,7 +38,7 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation ...@@ -38,7 +38,7 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation
* supported by this builder (yet). * supported by this builder (yet).
*/ */
class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Logging { class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Logging {
require(logicalPlan.resolved, "SQLBuilder only supports resloved logical query plans") require(logicalPlan.resolved, "SQLBuilder only supports resolved logical query plans")
def this(df: DataFrame) = this(df.queryExecution.analyzed, df.sqlContext) def this(df: DataFrame) = this(df.queryExecution.analyzed, df.sqlContext)
...@@ -98,10 +98,20 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi ...@@ -98,10 +98,20 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
} }
build(toSQL(p.child), whereOrHaving, p.condition.sql) build(toSQL(p.child), whereOrHaving, p.condition.sql)
case p @ Distinct(u: Union) if u.children.length > 1 =>
val childrenSql = u.children.map(c => s"(${toSQL(c)})")
childrenSql.mkString(" UNION DISTINCT ")
case p: Union if p.children.length > 1 => case p: Union if p.children.length > 1 =>
val childrenSql = p.children.map(toSQL(_)) val childrenSql = p.children.map(c => s"(${toSQL(c)})")
childrenSql.mkString(" UNION ALL ") childrenSql.mkString(" UNION ALL ")
case p: Intersect =>
build("(" + toSQL(p.left), ") INTERSECT (", toSQL(p.right) + ")")
case p: Except =>
build("(" + toSQL(p.left), ") EXCEPT (", toSQL(p.right) + ")")
case p: SubqueryAlias => case p: SubqueryAlias =>
p.child match { p.child match {
// Persisted data source relation // Persisted data source relation
......
...@@ -114,6 +114,27 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { ...@@ -114,6 +114,27 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
checkHiveQl("SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM t0") checkHiveQl("SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM t0")
} }
test("union distinct") {
checkHiveQl("SELECT * FROM t0 UNION SELECT * FROM t0")
}
// Parser is unable to parse the following query:
// SELECT `u_1`.`id`
// FROM (((SELECT `t0`.`id` FROM `default`.`t0`)
// UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`))
// UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) AS u_1
test("three-child union") {
checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0")
}
test("intersect") {
checkHiveQl("SELECT * FROM t0 INTERSECT SELECT * FROM t0")
}
test("except") {
checkHiveQl("SELECT * FROM t0 EXCEPT SELECT * FROM t0")
}
test("self join") { test("self join") {
checkHiveQl("SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key") checkHiveQl("SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key")
} }
...@@ -122,9 +143,6 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { ...@@ -122,9 +143,6 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
checkHiveQl("SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key group by x.key") checkHiveQl("SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key group by x.key")
} }
test("three-child union") {
checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0")
}
test("case") { test("case") {
checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END FROM t0") checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END FROM t0")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment