diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index b6a4686bb9ec9acbdde700d4b28c91f021c171eb..4d725904bc9b9bd1c081cba4734e3277c876a8e6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -751,15 +751,17 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * hooks. */ override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) { - // The unaliased subqueries in the FROM clause are disallowed. Instead of rejecting it in - // parser rules, we handle it here in order to provide better error message. - if (ctx.strictIdentifier == null) { - throw new ParseException("The unaliased subqueries in the FROM clause are not supported.", - ctx) + val alias = if (ctx.strictIdentifier == null) { + // For un-aliased subqueries, use a default alias name that is not likely to conflict with + // normal subquery names, so that parent operators can only access the columns in subquery by + // unqualified names. Users can still use this special qualifier to access columns if they + // know it, but that's not recommended. + "__auto_generated_subquery_name" + } else { + ctx.strictIdentifier.getText } - aliasPlan(ctx.strictIdentifier, - plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample)) + SubqueryAlias(alias, plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample)) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 8649603b1a9f5dbfef5470105471bf72dfa9b42c..9b440cd99f994b173602634890f44a933307e699 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -253,7 +253,7 @@ abstract class LogicalPlan // More than one match. case ambiguousReferences => - val referenceNames = ambiguousReferences.map(_._1).mkString(", ") + val referenceNames = ambiguousReferences.map(_._1.qualifiedName).mkString(", ") throw new AnalysisException( s"Reference '$name' is ambiguous, could be: $referenceNames.") } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 5b2573fa4d60108274d300b5effb6e03eed56ebb..6dad097041a15ffd4a3286f0c2a991c8fe0a1efd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -450,19 +450,6 @@ class PlanParserSuite extends AnalysisTest { | (select id from t0)) as u_1 """.stripMargin, plan.union(plan).union(plan).as("u_1").select('id)) - - } - - test("aliased subquery") { - val errMsg = "The unaliased subqueries in the FROM clause are not supported" - - assertEqual("select a from (select id as a from t0) tt", - table("t0").select('id.as("a")).as("tt").select('a)) - intercept("select a from (select id as a from t0)", errMsg) - - assertEqual("from (select id as a from t0) tt select a", - table("t0").select('id.as("a")).as("tt").select('a)) - intercept("from (select id as a from t0) select a", errMsg) } test("scalar sub-query") { diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index bc2120727dac2103b7b094475c43e46b062bd407..1e1384549a410897bc1d9340692b98c125ccf7e3 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -34,7 +34,7 @@ SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), FROM testData; -- Aggregate with foldable input and multiple distinct groups. -SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t GROUP BY a; +SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a; -- Aliases in SELECT could be used in GROUP BY SELECT a AS k, COUNT(b) FROM testData GROUP BY k; diff --git a/sql/core/src/test/resources/sql-tests/inputs/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/limit.sql index df555bdc1976d8082049c79d6752b3fd9330b963..f21912a0427168eca2c6f0fba80195797777dfc0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/limit.sql @@ -21,7 +21,7 @@ SELECT * FROM testdata LIMIT true; SELECT * FROM testdata LIMIT 'a'; -- limit within a subquery -SELECT * FROM (SELECT * FROM range(10) LIMIT 5) t WHERE id > 3; +SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3; -- limit ALL SELECT * FROM testdata WHERE key < 3 LIMIT ALL; diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index 20c03906640379c44a6bd7cf95f3cef3109c2ef7..c95f4817b7ce0b6b753789e38a32835b63d63056 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -7,7 +7,7 @@ select 'a' || 'b' || 'c'; -- Check if catalyst combine nested `Concat`s EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col -FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) t; +FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)); -- replace function select replace('abc', 'b', '123'); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql index 42f84e9748713f6361b9c661446dbc8c6303237c..5c371d2305ac85449aaedddb54525d2c22584442 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql @@ -394,7 +394,7 @@ FROM (SELECT * FROM t1)) t4 WHERE t4.t2b IN (SELECT Min(t3b) FROM t3 - WHERE t4.t2a = t3a)) T; + WHERE t4.t2a = t3a)); -- UNION, UNION ALL, UNION DISTINCT, INTERSECT and EXCEPT for NOT IN -- TC 01.12 diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql index f3f0c7622ccdb14331a367bae99da73aada212e6..e22cade93679282d218d10ae548d286e34605dbd 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql @@ -23,7 +23,7 @@ AND t2b = (SELECT max(avg) FROM (SELECT t2b, avg(t2b) avg FROM t2 WHERE t2a = t1.t1b - ) T + ) ) ; diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql index dbe8d76d2f117ed9c17e66645ba987c222fa948f..fb0d07fbdace7a6f43dd61298abffa88a84c7a5a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql @@ -19,7 +19,7 @@ AND c.cv = (SELECT max(avg) FROM (SELECT c1.cv, avg(c1.cv) avg FROM c c1 WHERE c1.ck = p.pk - GROUP BY c1.cv) T); + GROUP BY c1.cv)); create temporary view t1 as select * from values ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/union.sql b/sql/core/src/test/resources/sql-tests/inputs/union.sql index 63bc044535e4d9f89121e89a04dc2a7570ba9bab..e57d69eaad0333baf54c3d7757a414eaccb70a83 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/union.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/union.sql @@ -5,7 +5,7 @@ CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (1.0, 1), (2.0, 4) tbl(c1, c2); SELECT * FROM (SELECT * FROM t1 UNION ALL - SELECT * FROM t1) T; + SELECT * FROM t1); -- Type Coerced Union SELECT * @@ -13,7 +13,7 @@ FROM (SELECT * FROM t1 UNION ALL SELECT * FROM t2 UNION ALL - SELECT * FROM t2) T; + SELECT * FROM t2); -- Regression test for SPARK-18622 SELECT a diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out index 9e60e592c2bd1f91ebdef95cbb9c492c7e3fec91..b5a4f5c2bf65456faec15c88d17dc6de94d67bf3 100644 --- a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out @@ -72,7 +72,7 @@ SELECT i1 FROM t1, mydb1.t1 struct<> -- !query 8 output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 9 @@ -81,7 +81,7 @@ SELECT t1.i1 FROM t1, mydb1.t1 struct<> -- !query 9 output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 10 @@ -99,7 +99,7 @@ SELECT i1 FROM t1, mydb2.t1 struct<> -- !query 11 output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 12 @@ -108,7 +108,7 @@ SELECT t1.i1 FROM t1, mydb2.t1 struct<> -- !query 12 output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 13 @@ -125,7 +125,7 @@ SELECT i1 FROM t1, mydb1.t1 struct<> -- !query 14 output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 15 @@ -134,7 +134,7 @@ SELECT t1.i1 FROM t1, mydb1.t1 struct<> -- !query 15 output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 16 @@ -143,7 +143,7 @@ SELECT i1 FROM t1, mydb2.t1 struct<> -- !query 16 output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 17 @@ -152,7 +152,7 @@ SELECT t1.i1 FROM t1, mydb2.t1 struct<> -- !query 17 output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7 +Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7 -- !query 18 diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index e23ebd4e822fa68e4ebfab4c0b0b916a55e90726..986bb01c13fe4caa92e270211a22f681a7f072b9 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -134,7 +134,7 @@ struct<skewness(CAST(a AS DOUBLE)):double,kurtosis(CAST(a AS DOUBLE)):double,min -- !query 14 -SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t GROUP BY a +SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a -- !query 14 schema struct<count(DISTINCT b):bigint,count(DISTINCT b, c):bigint> -- !query 14 output diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out index afdd6df2a57142eee4998e6cd1b944cd1fd3df04..146abe6cbd05851b3ea4691538e0dce54531ece4 100644 --- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out @@ -93,7 +93,7 @@ The limit expression must be integer type, but got string; -- !query 10 -SELECT * FROM (SELECT * FROM range(10) LIMIT 5) t WHERE id > 3 +SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3 -- !query 10 schema struct<id:bigint> -- !query 10 output diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 52eb554edf89e5a86ff03409e941818300950ea8..b0ae9d775d968a414d7bb00dc941644f8509f53e 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -30,20 +30,20 @@ abc -- !query 3 EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col -FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) t +FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) -- !query 3 schema struct<plan:string> -- !query 3 output == Parsed Logical Plan == 'Project [concat(concat(concat('col1, 'col2), 'col3), 'col4) AS col#x] -+- 'SubqueryAlias t ++- 'SubqueryAlias __auto_generated_subquery_name +- 'Project ['id AS col1#x, 'id AS col2#x, 'id AS col3#x, 'id AS col4#x] +- 'UnresolvedTableValuedFunction range, [10] == Analyzed Logical Plan == col: string Project [concat(concat(concat(cast(col1#xL as string), cast(col2#xL as string)), cast(col3#xL as string)), cast(col4#xL as string)) AS col#x] -+- SubqueryAlias t ++- SubqueryAlias __auto_generated_subquery_name +- Project [id#xL AS col1#xL, id#xL AS col2#xL, id#xL AS col3#xL, id#xL AS col4#xL] +- Range (0, 10, step=1, splits=None) diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out index 5780f49648ec74c324e07ecd9a85f5da58f2f93c..e06f9206d3401fecc50a7ae26a25a632e2aa38a0 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out @@ -496,7 +496,7 @@ FROM (SELECT * FROM t1)) t4 WHERE t4.t2b IN (SELECT Min(t3b) FROM t3 - WHERE t4.t2a = t3a)) T + WHERE t4.t2a = t3a)) -- !query 13 schema struct<t2a:string,t2b:smallint,t2c:int,t2d:bigint,t2e:float,t2f:double,t2g:decimal(2,-2),t2h:timestamp,t2i:date> -- !query 13 output diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index ca3930b33e06d8ee9815aecb27e1d4143ec59c60..e4b1a2dbc675c227941173e1e998372e2090bd22 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -40,7 +40,7 @@ AND t2b = (SELECT max(avg) FROM (SELECT t2b, avg(t2b) avg FROM t2 WHERE t2a = t1.t1b - ) T + ) ) -- !query 3 schema struct<> diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out index 1d5dddca76a17d2796d06d9b4cb27fa17d855776..8b29300e71f9032da96fb58279f86ac1e627cf8c 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out @@ -39,7 +39,7 @@ AND c.cv = (SELECT max(avg) FROM (SELECT c1.cv, avg(c1.cv) avg FROM c c1 WHERE c1.ck = p.pk - GROUP BY c1.cv) T) + GROUP BY c1.cv)) -- !query 3 schema struct<pk:int,cv:int> -- !query 3 output diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out index 14553557d1ffc73b17db0c478f8bc9950107a408..50370df349168b2eea7847b9d2fac799afc2d3b9 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out @@ -37,26 +37,14 @@ struct<key:int,value:string> -- !query 4 SELECT * FROM (SELECT * FROM testData) WHERE key = 1 -- !query 4 schema -struct<> +struct<key:int,value:string> -- !query 4 output -org.apache.spark.sql.catalyst.parser.ParseException - -The unaliased subqueries in the FROM clause are not supported.(line 1, pos 14) - -== SQL == -SELECT * FROM (SELECT * FROM testData) WHERE key = 1 ---------------^^^ +1 1 -- !query 5 FROM (SELECT * FROM testData WHERE key = 1) SELECT * -- !query 5 schema -struct<> +struct<key:int,value:string> -- !query 5 output -org.apache.spark.sql.catalyst.parser.ParseException - -The unaliased subqueries in the FROM clause are not supported.(line 1, pos 5) - -== SQL == -FROM (SELECT * FROM testData WHERE key = 1) SELECT * ------^^^ +1 1 diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out index 865b3aed65d700ab02663e596d2a0155cb5cb166..d123b7fdbe0cf27f52357ee1456fd75b74c0d824 100644 --- a/sql/core/src/test/resources/sql-tests/results/union.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/union.sql.out @@ -22,7 +22,7 @@ struct<> SELECT * FROM (SELECT * FROM t1 UNION ALL - SELECT * FROM t1) T + SELECT * FROM t1) -- !query 2 schema struct<c1:int,c2:string> -- !query 2 output @@ -38,7 +38,7 @@ FROM (SELECT * FROM t1 UNION ALL SELECT * FROM t2 UNION ALL - SELECT * FROM t2) T + SELECT * FROM t2) -- !query 3 schema struct<c1:decimal(11,1),c2:string> -- !query 3 output diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index 506cc2548e260a6073924bdbbb5b8534b0a0f407..3e4f61943159945a2da0ab55bfc443c1f380903d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -631,13 +631,13 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext val ds2 = sql( """ - |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt + |SELECT * FROM (SELECT c1, max(c1) FROM t1 GROUP BY c1) |WHERE - |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1) + |c1 = (SELECT max(c1) FROM t2 GROUP BY c1) |OR |EXISTS (SELECT c1 FROM t3) |OR - |tt.c1 IN (SELECT c1 FROM t4) + |c1 IN (SELECT c1 FROM t4) """.stripMargin) assert(getNumInMemoryRelations(ds2) == 4) } @@ -683,20 +683,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext Seq(1).toDF("c1").createOrReplaceTempView("t1") Seq(2).toDF("c1").createOrReplaceTempView("t2") - sql( + val sql1 = """ |SELECT * FROM t1 |WHERE |NOT EXISTS (SELECT * FROM t2) - """.stripMargin).cache() + """.stripMargin + sql(sql1).cache() - val cachedDs = - sql( - """ - |SELECT * FROM t1 - |WHERE - |NOT EXISTS (SELECT * FROM t2) - """.stripMargin) + val cachedDs = sql(sql1) assert(getNumInMemoryRelations(cachedDs) == 1) // Additional predicate in the subquery plan should cause a cache miss @@ -717,20 +712,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext Seq(1).toDF("c1").createOrReplaceTempView("t2") // Simple correlated predicate in subquery - sql( + val sqlText = """ |SELECT * FROM t1 |WHERE |t1.c1 in (SELECT t2.c1 FROM t2 where t1.c1 = t2.c1) - """.stripMargin).cache() + """.stripMargin + sql(sqlText).cache() - val cachedDs = - sql( - """ - |SELECT * FROM t1 - |WHERE - |t1.c1 in (SELECT t2.c1 FROM t2 where t1.c1 = t2.c1) - """.stripMargin) + val cachedDs = sql(sqlText) assert(getNumInMemoryRelations(cachedDs) == 1) } } @@ -741,22 +731,16 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext spark.catalog.cacheTable("t1") // underlying table t1 is cached as well as the query that refers to it. - val ds = - sql( + val sqlText = """ |SELECT * FROM t1 |WHERE |NOT EXISTS (SELECT * FROM t1) - """.stripMargin) + """.stripMargin + val ds = sql(sqlText) assert(getNumInMemoryRelations(ds) == 2) - val cachedDs = - sql( - """ - |SELECT * FROM t1 - |WHERE - |NOT EXISTS (SELECT * FROM t1) - """.stripMargin).cache() + val cachedDs = sql(sqlText).cache() assert(getNumInMemoryTablesRecursively(cachedDs.queryExecution.sparkPlan) == 3) } } @@ -769,45 +753,31 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext Seq(1).toDF("c1").createOrReplaceTempView("t4") // Nested predicate subquery - sql( + val sql1 = """ |SELECT * FROM t1 |WHERE |c1 IN (SELECT c1 FROM t2 WHERE c1 IN (SELECT c1 FROM t3 WHERE c1 = 1)) - """.stripMargin).cache() + """.stripMargin + sql(sql1).cache() - val cachedDs = - sql( - """ - |SELECT * FROM t1 - |WHERE - |c1 IN (SELECT c1 FROM t2 WHERE c1 IN (SELECT c1 FROM t3 WHERE c1 = 1)) - """.stripMargin) + val cachedDs = sql(sql1) assert(getNumInMemoryRelations(cachedDs) == 1) // Scalar subquery and predicate subquery - sql( + val sql2 = """ - |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt + |SELECT * FROM (SELECT c1, max(c1) FROM t1 GROUP BY c1) |WHERE - |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1) + |c1 = (SELECT max(c1) FROM t2 GROUP BY c1) |OR |EXISTS (SELECT c1 FROM t3) |OR - |tt.c1 IN (SELECT c1 FROM t4) - """.stripMargin).cache() + |c1 IN (SELECT c1 FROM t4) + """.stripMargin + sql(sql2).cache() - val cachedDs2 = - sql( - """ - |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt - |WHERE - |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1) - |OR - |EXISTS (SELECT c1 FROM t3) - |OR - |tt.c1 IN (SELECT c1 FROM t4) - """.stripMargin) + val cachedDs2 = sql(sql2) assert(getNumInMemoryRelations(cachedDs2) == 1) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 5171aaebc990780f22c989ac7564064d9884d510..472ff7385b194a2e760e323d4f295f05ae80a080 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2638,4 +2638,17 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } } + + test("SPARK-21335: support un-aliased subquery") { + withTempView("v") { + Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("v") + checkAnswer(sql("SELECT i from (SELECT i FROM v)"), Row(1)) + + val e = intercept[AnalysisException](sql("SELECT v.i from (SELECT i FROM v)")) + assert(e.message == + "cannot resolve '`v.i`' given input columns: [__auto_generated_subquery_name.i]") + + checkAnswer(sql("SELECT __auto_generated_subquery_name.i from (SELECT i FROM v)"), Row(1)) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index c0a3b5add313a05c622a9b086b32fe539707074f..7bcb419e8df6c5ca3abc2251a80b86995e4bc8d2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -112,7 +112,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext { | with t4 as (select 1 as d, 3 as e) | select * from t4 cross join t2 where t2.b = t4.d | ) - | select a from (select 1 as a union all select 2 as a) t + | select a from (select 1 as a union all select 2 as a) | where a = (select max(d) from t3) """.stripMargin), Array(Row(1)) @@ -606,8 +606,8 @@ class SubquerySuite extends QueryTest with SharedSQLContext { | select cntPlusOne + 1 as cntPlusTwo from ( | select cnt + 1 as cntPlusOne from ( | select sum(r.c) s, count(*) cnt from r where l.a = r.c having cnt = 0 - | ) t1 - | ) t2 + | ) + | ) |) = 2""".stripMargin), Row(1) :: Row(1) :: Row(null) :: Row(null) :: Nil) } @@ -655,7 +655,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext { """ | select c1 from onerow t1 | where exists (select 1 - | from (select 1 as c1 from onerow t2 LIMIT 1) t2 + | from (select c1 from onerow t2 LIMIT 1) t2 | where t1.c1=t2.c1)""".stripMargin), Row(1) :: Nil) }