From 18bcbbdd84e80222d1d29530831c6d68d02e7593 Mon Sep 17 00:00:00 2001 From: Nong Li <nong@databricks.com> Date: Wed, 10 Feb 2016 23:52:19 -0800 Subject: [PATCH] [SPARK-13270][SQL] Remove extra new lines in whole stage codegen and include pipeline plan in comments. Author: Nong Li <nong@databricks.com> Closes #11155 from nongli/spark-13270. --- .../expressions/codegen/CodeFormatter.scala | 14 ++++++++++++++ .../spark/sql/execution/WholeStageCodegen.scala | 8 ++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index 9b8b6382d7..9d99bbffbe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -25,6 +25,20 @@ package org.apache.spark.sql.catalyst.expressions.codegen */ object CodeFormatter { def format(code: String): String = new CodeFormatter().addLines(code).result() + def stripExtraNewLines(input: String): String = { + val code = new StringBuilder + var lastLine: String = "dummy" + input.split('\n').foreach { l => + val line = l.trim() + val skip = line == "" && (lastLine == "" || lastLine.endsWith("{")) + if (!skip) { + code.append(line) + code.append("\n") + } + lastLine = line + } + code.result() + } } private class CodeFormatter { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala index b200239c94..30f74fc14f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegen.scala @@ -237,6 +237,9 @@ case class WholeStageCodegen(plan: CodegenSupport, children: Seq[SparkPlan]) return new GeneratedIterator(references); } + /** Codegened pipeline for: + * ${plan.treeString.trim} + */ class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator { private Object[] references; @@ -256,8 +259,9 @@ case class WholeStageCodegen(plan: CodegenSupport, children: Seq[SparkPlan]) """ // try to compile, helpful for debug - // println(s"${CodeFormatter.format(source)}") - CodeGenerator.compile(source) + val cleanedSource = CodeFormatter.stripExtraNewLines(source) + // println(s"${CodeFormatter.format(cleanedSource)}") + CodeGenerator.compile(cleanedSource) plan.upstream().mapPartitions { iter => -- GitLab