Skip to content
Snippets Groups Projects
Commit 6235132a authored by Michael Armbrust's avatar Michael Armbrust Committed by Herman van Hovell
Browse files

[SPARK-20567] Lazily bind in GenerateExec

It is not valid to eagerly bind with the child's output as this causes failures when we attempt to canonicalize the plan (replacing the attribute references with dummies).

Author: Michael Armbrust <michael@databricks.com>

Closes #17838 from marmbrus/fixBindExplode.
parent b946f316
No related branches found
No related tags found
No related merge requests found
......@@ -78,7 +78,7 @@ case class GenerateExec(
override def outputPartitioning: Partitioning = child.outputPartitioning
val boundGenerator: Generator = BindReferences.bindReference(generator, child.output)
lazy val boundGenerator: Generator = BindReferences.bindReference(generator, child.output)
protected override def doExecute(): RDD[InternalRow] = {
// boundGenerator.terminate() should be triggered after all of the rows in the partition
......
......@@ -69,6 +69,22 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with BeforeAndAfte
)
}
test("count distinct") {
val inputData = MemoryStream[(Int, Seq[Int])]
val aggregated =
inputData.toDF()
.select($"*", explode($"_2") as 'value)
.groupBy($"_1")
.agg(size(collect_set($"value")))
.as[(Int, Int)]
testStream(aggregated, Update)(
AddData(inputData, (1, Seq(1, 2))),
CheckLastBatch((1, 2))
)
}
test("simple count, complete mode") {
val inputData = MemoryStream[Int]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment