Skip to content
Snippets Groups Projects
Commit 2cbb3e43 authored by Daoyuan Wang's avatar Daoyuan Wang Committed by Michael Armbrust
Browse files

[SPARK-5642] [SQL] Apply column pruning on unused aggregation fields

select k from (select key k, max(value) v from src group by k) t

Author: Daoyuan Wang <daoyuan.wang@intel.com>
Author: Michael Armbrust <michael@databricks.com>

Closes #4415 from adrian-wang/groupprune and squashes the following commits:

5d2d8a3 [Daoyuan Wang] address Michael's comments
61f8ef7 [Daoyuan Wang] add a unit test
80ddcc6 [Daoyuan Wang] keep project
b69d385 [Daoyuan Wang] add a prune rule for grouping set
parent 5d3cc6b3
No related branches found
No related tags found
No related merge requests found
...@@ -119,6 +119,15 @@ object ColumnPruning extends Rule[LogicalPlan] { ...@@ -119,6 +119,15 @@ object ColumnPruning extends Rule[LogicalPlan] {
case a @ Aggregate(_, _, child) if (child.outputSet -- a.references).nonEmpty => case a @ Aggregate(_, _, child) if (child.outputSet -- a.references).nonEmpty =>
a.copy(child = Project(a.references.toSeq, child)) a.copy(child = Project(a.references.toSeq, child))
case p @ Project(projectList, a @ Aggregate(groupingExpressions, aggregateExpressions, child))
if (a.outputSet -- p.references).nonEmpty =>
Project(
projectList,
Aggregate(
groupingExpressions,
aggregateExpressions.filter(e => p.references.contains(e)),
child))
// Eliminate unneeded attributes from either side of a Join. // Eliminate unneeded attributes from either side of a Join.
case Project(projectList, Join(left, right, joinType, condition)) => case Project(projectList, Join(left, right, joinType, condition)) =>
// Collect the list of all references required either above or to evaluate the condition. // Collect the list of all references required either above or to evaluate the condition.
......
...@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer ...@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer
import org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.analysis
import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
import org.apache.spark.sql.catalyst.expressions.Explode import org.apache.spark.sql.catalyst.expressions.{Count, Explode}
import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.plans.{PlanTest, LeftOuter, RightOuter} import org.apache.spark.sql.catalyst.plans.{PlanTest, LeftOuter, RightOuter}
import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.catalyst.rules._
...@@ -37,7 +37,8 @@ class FilterPushdownSuite extends PlanTest { ...@@ -37,7 +37,8 @@ class FilterPushdownSuite extends PlanTest {
CombineFilters, CombineFilters,
PushPredicateThroughProject, PushPredicateThroughProject,
PushPredicateThroughJoin, PushPredicateThroughJoin,
PushPredicateThroughGenerate) :: Nil PushPredicateThroughGenerate,
ColumnPruning) :: Nil
} }
val testRelation = LocalRelation('a.int, 'b.int, 'c.int) val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
...@@ -58,6 +59,38 @@ class FilterPushdownSuite extends PlanTest { ...@@ -58,6 +59,38 @@ class FilterPushdownSuite extends PlanTest {
comparePlans(optimized, correctAnswer) comparePlans(optimized, correctAnswer)
} }
test("column pruning for group") {
val originalQuery =
testRelation
.groupBy('a)('a, Count('b))
.select('a)
val optimized = Optimize(originalQuery.analyze)
val correctAnswer =
testRelation
.select('a)
.groupBy('a)('a)
.select('a).analyze
comparePlans(optimized, correctAnswer)
}
test("column pruning for group with alias") {
val originalQuery =
testRelation
.groupBy('a)('a as 'c, Count('b))
.select('c)
val optimized = Optimize(originalQuery.analyze)
val correctAnswer =
testRelation
.select('a)
.groupBy('a)('a as 'c)
.select('c).analyze
comparePlans(optimized, correctAnswer)
}
// After this line is unimplemented. // After this line is unimplemented.
test("simple push down") { test("simple push down") {
val originalQuery = val originalQuery =
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment