From 49ea0e9d7ce805d312d94a5b2936eec2053bc052 Mon Sep 17 00:00:00 2001 From: Dilip Biswal <dbiswal@us.ibm.com> Date: Wed, 21 Oct 2015 11:10:32 -0700 Subject: [PATCH] [SPARK-10534] [SQL] ORDER BY clause allows only columns that are present in the select projection list Find out the missing attributes by recursively looking at the sort order expression and rest of the code takes care of projecting them out. Added description from cloud-fan I wanna explain a bit more about this bug. When we resolve sort ordering, we will use a special method, which only resolves UnresolvedAttributes and UnresolvedExtractValue. However, for something like Floor('a), even the 'a is resolved, the floor expression may still being unresolved as data type mismatch(for example, 'a is string type and Floor need double type), thus can't pass this filter, and we can't push down this missing attribute 'a Author: Dilip Biswal <dbiswal@us.ibm.com> Closes #9123 from dilipbiswal/SPARK-10534. --- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../spark/sql/catalyst/analysis/AnalysisSuite.scala | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index e6046055bf..9237f2f3dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -482,7 +482,7 @@ class Analyzer( val newOrdering = resolveSortOrders(ordering, grandchild, throws = true) // Construct a set that contains all of the attributes that we need to evaluate the // ordering. - val requiredAttributes = AttributeSet(newOrdering.filter(_.resolved)) + val requiredAttributes = AttributeSet(newOrdering).filter(_.resolved) // Figure out which ones are missing from the projection, so that we can add them and // remove them after the sort. val missingInProject = requiredAttributes -- child.output diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index ec05cfa63c..24af8483a7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -143,4 +143,14 @@ class AnalysisSuite extends AnalysisTest { plan = testRelation.select(CreateStructUnsafe(Seq(a, (a + 1).as("a+1"))).as("col")) checkAnalysis(plan, plan) } + + test("SPARK-10534: resolve attribute references in order by clause") { + val a = testRelation2.output(0) + val c = testRelation2.output(2) + + val plan = testRelation2.select('c).orderBy(Floor('a).asc) + val expected = testRelation2.select(c, a).orderBy(Floor(a.cast(DoubleType)).asc).select(c) + + checkAnalysis(plan, expected) + } } -- GitLab