Skip to content
Snippets Groups Projects
Commit d8f528ce authored by Andrew Ray's avatar Andrew Ray Committed by Yin Huai
Browse files

[SPARK-13749][SQL][FOLLOW-UP] Faster pivot implementation for many distinct...

[SPARK-13749][SQL][FOLLOW-UP] Faster pivot implementation for many distinct values with two phase aggregation

## What changes were proposed in this pull request?

This is a follow up PR for #11583. It makes 3 lazy vals into just vals and adds unit test coverage.

## How was this patch tested?

Existing unit tests and additional unit tests.

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #12861 from aray/fast-pivot-follow-up.
parent bb9ab56b
No related branches found
No related tags found
No related merge requests found
...@@ -141,12 +141,12 @@ case class PivotFirst( ...@@ -141,12 +141,12 @@ case class PivotFirst(
copy(mutableAggBufferOffset = newMutableAggBufferOffset) copy(mutableAggBufferOffset = newMutableAggBufferOffset)
override lazy val aggBufferAttributes: Seq[AttributeReference] = override val aggBufferAttributes: Seq[AttributeReference] =
pivotIndex.toList.sortBy(_._2).map(kv => AttributeReference(kv._1.toString, valueDataType)()) pivotIndex.toList.sortBy(_._2).map(kv => AttributeReference(kv._1.toString, valueDataType)())
override lazy val aggBufferSchema: StructType = StructType.fromAttributes(aggBufferAttributes) override val aggBufferSchema: StructType = StructType.fromAttributes(aggBufferAttributes)
override lazy val inputAggBufferAttributes: Seq[AttributeReference] = override val inputAggBufferAttributes: Seq[AttributeReference] =
aggBufferAttributes.map(_.newInstance()) aggBufferAttributes.map(_.newInstance())
} }
...@@ -180,4 +180,21 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{ ...@@ -180,4 +180,21 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{
) )
} }
test("pivot with datatype not supported by PivotFirst") {
checkAnswer(
complexData.groupBy().pivot("b", Seq(true, false)).agg(max("a")),
Row(Seq(1, 1, 1), Seq(2, 2, 2)) :: Nil
)
}
test("pivot with datatype not supported by PivotFirst 2") {
checkAnswer(
courseSales.withColumn("e", expr("array(earnings, 7.0d)"))
.groupBy("year")
.pivot("course", Seq("dotNET", "Java"))
.agg(min($"e")),
Row(2012, Seq(5000.0, 7.0), Seq(20000.0, 7.0)) ::
Row(2013, Seq(48000.0, 7.0), Seq(30000.0, 7.0)) :: Nil
)
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment