From 6037ed0a1d7ecbb77140ddf4d0192a1dc60316bb Mon Sep 17 00:00:00 2001 From: Wenchen Fan <wenchen@databricks.com> Date: Fri, 18 Mar 2016 10:16:48 +0800 Subject: [PATCH] [SPARK-13976][SQL] do not remove sub-queries added by user when generate SQL ## What changes were proposed in this pull request? We haven't figured out the corrected logical to add sub-queries yet, so we should not clear all sub-queries before generate SQL. This PR changed the logic to only remove sub-queries above table relation. an example for this bug, original SQL: `SELECT a FROM (SELECT a FROM tbl) t WHERE a = 1` before this PR, we will generate: ``` SELECT attr_1 AS a FROM SELECT attr_1 FROM ( SELECT a AS attr_1 FROM tbl ) AS sub_q0 WHERE attr_1 = 1 ``` We missed a sub-query and this SQL string is illegal. After this PR, we will generate: ``` SELECT attr_1 AS a FROM ( SELECT attr_1 FROM ( SELECT a AS attr_1 FROM tbl ) AS sub_q0 WHERE attr_1 = 1 ) AS t ``` TODO: for long term, we should find a way to add sub-queries correctly, so that arbitrary logical plans can be converted to SQL string. ## How was this patch tested? `LogicalPlanToSQLSuite` Author: Wenchen Fan <wenchen@databricks.com> Closes #11786 from cloud-fan/bug-fix. --- .../scala/org/apache/spark/sql/hive/SQLBuilder.scala | 12 ++++++++++-- .../spark/sql/hive/LogicalPlanToSQLSuite.scala | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala index 9fa8474226..da05905a89 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala @@ -390,8 +390,6 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi // `Aggregate`s. CollapseProject), Batch("Recover Scoping Info", Once, - // Remove all sub queries, as we will insert new ones when it's necessary. - EliminateSubqueryAliases, // A logical plan is allowed to have same-name outputs with different qualifiers(e.g. the // `Join` operator). However, this kind of plan can't be put under a sub query as we will // erase and assign a new qualifier to all outputs and make it impossible to distinguish @@ -400,6 +398,10 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi // qualifiers, as attributes have unique names now and we don't need qualifiers to resolve // ambiguity. NormalizedAttribute, + // Our analyzer will add one or more sub-queries above table relation, this rule removes + // these sub-queries so that next rule can combine adjacent table relation and sample to + // SQLTable. + RemoveSubqueriesAboveSQLTable, // Finds the table relations and wrap them with `SQLTable`s. If there are any `Sample` // operators on top of a table relation, merge the sample information into `SQLTable` of // that table relation, as we can only convert table sample to standard SQL string. @@ -418,6 +420,12 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi } } + object RemoveSubqueriesAboveSQLTable extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { + case SubqueryAlias(_, t @ ExtractSQLTable(_)) => t + } + } + object ResolveSQLTable extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan.transformDown { case Sample(lowerBound, upperBound, _, _, ExtractSQLTable(table)) => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala index f3cb6f8511..f86eba6349 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala @@ -737,4 +737,8 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { |LIMIT 5 """.stripMargin) } + + test("filter after subquery") { + checkHiveQl("SELECT a FROM (SELECT key + 1 AS a FROM parquet_t1) t WHERE a > 5") + } } -- GitLab