From 18bcbbdd84e80222d1d29530831c6d68d02e7593 Mon Sep 17 00:00:00 2001
From: Nong Li <nong@databricks.com>
Date: Wed, 10 Feb 2016 23:52:19 -0800
Subject: [PATCH] [SPARK-13270][SQL] Remove extra new lines in whole stage
 codegen and include pipeline plan in comments.

Author: Nong Li <nong@databricks.com>

Closes #11155 from nongli/spark-13270.
---
 .../expressions/codegen/CodeFormatter.scala        | 14 ++++++++++++++
 .../spark/sql/execution/WholeStageCodegen.scala    |  8 ++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
index 9b8b6382d7..9d99bbffbe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala
@@ -25,6 +25,20 @@ package org.apache.spark.sql.catalyst.expressions.codegen
  */
 object CodeFormatter {
   def format(code: String): String = new CodeFormatter().addLines(code).result()
+  def stripExtraNewLines(input: String): String = {
+    val code = new StringBuilder
+    var lastLine: String = "dummy"
+    input.split('\n').foreach { l =>
+      val line = l.trim()
+      val skip = line == "" && (lastLine == "" || lastLine.endsWith("{"))
+      if (!skip) {
+        code.append(line)
+        code.append("\n")
+      }
+      lastLine = line
+    }
+    code.result()
+  }
 }
 
 private class CodeFormatter {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala
index b200239c94..30f74fc14f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala
@@ -237,6 +237,9 @@ case class WholeStageCodegen(plan: CodegenSupport, children: Seq[SparkPlan])
         return new GeneratedIterator(references);
       }
 
+      /** Codegened pipeline for:
+        * ${plan.treeString.trim}
+        */
       class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator {
 
         private Object[] references;
@@ -256,8 +259,9 @@ case class WholeStageCodegen(plan: CodegenSupport, children: Seq[SparkPlan])
       """
 
     // try to compile, helpful for debug
-    // println(s"${CodeFormatter.format(source)}")
-    CodeGenerator.compile(source)
+    val cleanedSource = CodeFormatter.stripExtraNewLines(source)
+    // println(s"${CodeFormatter.format(cleanedSource)}")
+    CodeGenerator.compile(cleanedSource)
 
     plan.upstream().mapPartitions { iter =>
 
-- 
GitLab