From 9ade77c3fa2e1bf436b79368a97d5980c12fe215 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh <simonh@tw.ibm.com> Date: Thu, 28 Jul 2016 22:33:33 +0800 Subject: [PATCH] [SPARK-16639][SQL] The query with having condition that contains grouping by column should work ## What changes were proposed in this pull request? The query with having condition that contains grouping by column will be failed during analysis. E.g., create table tbl(a int, b string); select count(b) from tbl group by a + 1 having a + 1 = 2; Having condition should be able to use grouping by column. ## How was this patch tested? Jenkins tests. Author: Liang-Chi Hsieh <simonh@tw.ibm.com> Closes #14296 from viirya/having-contains-grouping-column. --- .../sql/catalyst/analysis/Analyzer.scala | 15 ++++++++++++- .../org/apache/spark/sql/SQLQuerySuite.scala | 22 ++++++++++++++----- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 61162ccdba..2efa997ff2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -246,7 +246,7 @@ class Analyzer( }.isDefined } - private def hasGroupingFunction(e: Expression): Boolean = { + private[sql] def hasGroupingFunction(e: Expression): Boolean = { e.collectFirst { case g: Grouping => g case g: GroupingID => g @@ -1207,6 +1207,19 @@ class Analyzer( val alias = Alias(ae, ae.toString)() aggregateExpressions += alias alias.toAttribute + // Grouping functions are handled in the rule [[ResolveGroupingAnalytics]]. + case e: Expression if grouping.exists(_.semanticEquals(e)) && + !ResolveGroupingAnalytics.hasGroupingFunction(e) && + !aggregate.output.exists(_.semanticEquals(e)) => + e match { + case ne: NamedExpression => + aggregateExpressions += ne + ne.toAttribute + case _ => + val alias = Alias(e, e.toString)() + aggregateExpressions += alias + alias.toAttribute + } } // Push the aggregate expressions into the aggregate (if any). diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 06cc2a5057..d89bda1e48 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -39,11 +39,23 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { setupTestData() test("having clause") { - Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v") - .createOrReplaceTempView("hav") - checkAnswer( - sql("SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2"), - Row("one", 6) :: Row("three", 3) :: Nil) + withTempView("hav") { + Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v") + .createOrReplaceTempView("hav") + checkAnswer( + sql("SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2"), + Row("one", 6) :: Row("three", 3) :: Nil) + } + } + + test("having condition contains grouping column") { + withTempView("hav") { + Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v") + .createOrReplaceTempView("hav") + checkAnswer( + sql("SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2"), + Row(1) :: Nil) + } } test("SPARK-8010: promote numeric to string") { -- GitLab