diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g index e01e7101d0b7e6eda9fedb9a93e394059121349a..44a63fbef258c44b6465eb1a6bd3c31f6191b937 100644 --- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g +++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g @@ -103,6 +103,7 @@ KW_CLUSTER: 'CLUSTER'; KW_DISTRIBUTE: 'DISTRIBUTE'; KW_SORT: 'SORT'; KW_UNION: 'UNION'; +KW_EXCEPT: 'EXCEPT'; KW_LOAD: 'LOAD'; KW_EXPORT: 'EXPORT'; KW_IMPORT: 'IMPORT'; diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g index 4afce3090f739aa42878af65af961f2cafe583c5..cf8a56566d32d17d07f3883effdddbe24cce0ad6 100644 --- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g +++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g @@ -88,6 +88,8 @@ TOK_DISTRIBUTEBY; TOK_SORTBY; TOK_UNIONALL; TOK_UNIONDISTINCT; +TOK_EXCEPT; +TOK_INTERSECT; TOK_JOIN; TOK_LEFTOUTERJOIN; TOK_RIGHTOUTERJOIN; @@ -2122,6 +2124,8 @@ setOperator @after { popMsg(state); } : KW_UNION KW_ALL -> ^(TOK_UNIONALL) | KW_UNION KW_DISTINCT? -> ^(TOK_UNIONDISTINCT) + | KW_EXCEPT -> ^(TOK_EXCEPT) + | KW_INTERSECT -> ^(TOK_INTERSECT) ; queryStatementExpression[boolean topLevel] @@ -2242,7 +2246,7 @@ setOpSelectStatement[CommonTree t, boolean topLevel] ^(TOK_QUERY ^(TOK_FROM ^(TOK_SUBQUERY - ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b) + ^($u {$setOpSelectStatement.tree} $b) {adaptor.create(Identifier, generateUnionAlias())} ) ) @@ -2252,12 +2256,12 @@ setOpSelectStatement[CommonTree t, boolean topLevel] ) ) -> {$setOpSelectStatement.tree != null && $u.tree.getType()!=SparkSqlParser.TOK_UNIONDISTINCT}? - ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b) + ^($u {$setOpSelectStatement.tree} $b) -> {$setOpSelectStatement.tree == null && $u.tree.getType()==SparkSqlParser.TOK_UNIONDISTINCT}? ^(TOK_QUERY ^(TOK_FROM ^(TOK_SUBQUERY - ^(TOK_UNIONALL {$t} $b) + ^($u {$t} $b) {adaptor.create(Identifier, generateUnionAlias())} ) ) @@ -2266,7 +2270,7 @@ setOpSelectStatement[CommonTree t, boolean topLevel] ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF)) ) ) - -> ^(TOK_UNIONALL {$t} $b) + -> ^($u {$t} $b) )+ o=orderByClause? c=clusterByClause? diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala index 42bdf25b61ea5a8305f27613e7507d338142acfe..1eda4a9a9764460435ecea3cb3b0df247916e5cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala @@ -399,9 +399,14 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C // return With plan if there is CTE cteRelations.map(With(query, _)).getOrElse(query) - // HIVE-9039 renamed TOK_UNION => TOK_UNIONALL while adding TOK_UNIONDISTINCT case Token("TOK_UNIONALL", left :: right :: Nil) => Union(nodeToPlan(left), nodeToPlan(right)) + case Token("TOK_UNIONDISTINCT", left :: right :: Nil) => + Distinct(Union(nodeToPlan(left), nodeToPlan(right))) + case Token("TOK_EXCEPT", left :: right :: Nil) => + Except(nodeToPlan(left), nodeToPlan(right)) + case Token("TOK_INTERSECT", left :: right :: Nil) => + Intersect(nodeToPlan(left), nodeToPlan(right)) case _ => noParseRule("Plan", node) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala new file mode 100644 index 0000000000000000000000000000000000000000..0fee97fb0718cc6c028c089dd3c62ca99b0ea157 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst + +import org.apache.spark.sql.catalyst.plans.PlanTest + +class CatalystQlSuite extends PlanTest { + + test("parse union/except/intersect") { + val paresr = new CatalystQl() + paresr.createPlan("select * from t1 union all select * from t2") + paresr.createPlan("select * from t1 union distinct select * from t2") + paresr.createPlan("select * from t1 union select * from t2") + paresr.createPlan("select * from t1 except select * from t2") + paresr.createPlan("select * from t1 intersect select * from t2") + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 98e22c2e2c1b0ff50738ed17d1eda885cf6da3f4..fa99289b4197148dcc9981a9f0238fec1a6f0e6b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -787,6 +787,24 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { assert(sql("select key from src having key > 490").collect().size < 100) } + test("union/except/intersect") { + assertResult(Array(Row(1), Row(1))) { + sql("select 1 as a union all select 1 as a").collect() + } + assertResult(Array(Row(1))) { + sql("select 1 as a union distinct select 1 as a").collect() + } + assertResult(Array(Row(1))) { + sql("select 1 as a union select 1 as a").collect() + } + assertResult(Array()) { + sql("select 1 as a except select 1 as a").collect() + } + assertResult(Array(Row(1))) { + sql("select 1 as a intersect select 1 as a").collect() + } + } + test("SPARK-5383 alias for udfs with multi output columns") { assert( sql("select stack(2, key, value, key, value) as (a, b) from src limit 5")