From 2837bf8548db7e9d43f6eefedf5a73feb22daedb Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 9 Oct 2014 17:54:02 -0700
Subject: [PATCH] [SPARK-3798][SQL] Store the output of a generator in a val

This prevents it from changing during serialization, leading to corrupted results.

Author: Michael Armbrust <michael@databricks.com>

Closes #2656 from marmbrus/generateBug and squashes the following commits:

efa32eb [Michael Armbrust] Store the output of a generator in a val. This prevents it from changing during serialization.
---
 .../main/scala/org/apache/spark/sql/execution/Generate.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
index c386fd121c..38877c28de 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
@@ -39,7 +39,8 @@ case class Generate(
     child: SparkPlan)
   extends UnaryNode {
 
-  protected def generatorOutput: Seq[Attribute] = {
+  // This must be a val since the generator output expr ids are not preserved by serialization.
+  protected val generatorOutput: Seq[Attribute] = {
     if (join && outer) {
       generator.output.map(_.withNullability(true))
     } else {
@@ -62,7 +63,7 @@ case class Generate(
           newProjection(child.output ++ nullValues, child.output)
 
         val joinProjection =
-          newProjection(child.output ++ generator.output, child.output ++ generator.output)
+          newProjection(child.output ++ generatorOutput, child.output ++ generatorOutput)
         val joinedRow = new JoinedRow
 
         iter.flatMap {row =>
-- 
GitLab