Skip to content
Snippets Groups Projects
Commit fa1af0af authored by Dongjoon Hyun's avatar Dongjoon Hyun Committed by Reynold Xin
Browse files

[SPARK-14251][SQL] Add SQL command for printing out generated code for debugging

## What changes were proposed in this pull request?

This PR implements `EXPLAIN CODEGEN` SQL command which returns generated codes like `debugCodegen`. In `spark-shell`, we don't need to `import debug` module. In `spark-sql`, we can use this SQL command now.

**Before**
```
scala> import org.apache.spark.sql.execution.debug._
scala> sql("select 'a' as a group by 1").debugCodegen()
Found 2 WholeStageCodegen subtrees.
== Subtree 1 / 2 ==
...

Generated code:
...

== Subtree 2 / 2 ==
...

Generated code:
...
```

**After**
```
scala> sql("explain extended codegen select 'a' as a group by 1").collect().foreach(println)
[Found 2 WholeStageCodegen subtrees.]
[== Subtree 1 / 2 ==]
...
[]
[Generated code:]
...
[]
[== Subtree 2 / 2 ==]
...
[]
[Generated code:]
...
```

## How was this patch tested?

Pass the Jenkins tests (including new testcases)

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #12099 from dongjoon-hyun/SPARK-14251.
parent 877dc712
No related branches found
No related tags found
No related merge requests found
...@@ -584,7 +584,7 @@ frameBound ...@@ -584,7 +584,7 @@ frameBound
explainOption explainOption
: LOGICAL | FORMATTED | EXTENDED : LOGICAL | FORMATTED | EXTENDED | CODEGEN
; ;
transactionMode transactionMode
...@@ -633,7 +633,7 @@ nonReserved ...@@ -633,7 +633,7 @@ nonReserved
| DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED | DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED
| EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | TEMPORARY | OPTIONS | EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | TEMPORARY | OPTIONS
| GROUPING | CUBE | ROLLUP | GROUPING | CUBE | ROLLUP
| EXPLAIN | FORMAT | LOGICAL | FORMATTED | EXPLAIN | FORMAT | LOGICAL | FORMATTED | CODEGEN
| TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF | TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF
| SET | SET
| VIEW | REPLACE | VIEW | REPLACE
...@@ -724,6 +724,7 @@ DESCRIBE: 'DESCRIBE'; ...@@ -724,6 +724,7 @@ DESCRIBE: 'DESCRIBE';
EXPLAIN: 'EXPLAIN'; EXPLAIN: 'EXPLAIN';
FORMAT: 'FORMAT'; FORMAT: 'FORMAT';
LOGICAL: 'LOGICAL'; LOGICAL: 'LOGICAL';
CODEGEN: 'CODEGEN';
CAST: 'CAST'; CAST: 'CAST';
SHOW: 'SHOW'; SHOW: 'SHOW';
TABLES: 'TABLES'; TABLES: 'TABLES';
......
...@@ -136,7 +136,8 @@ class SparkSqlAstBuilder extends AstBuilder { ...@@ -136,7 +136,8 @@ class SparkSqlAstBuilder extends AstBuilder {
// Create the explain comment. // Create the explain comment.
val statement = plan(ctx.statement) val statement = plan(ctx.statement)
if (isExplainableStatement(statement)) { if (isExplainableStatement(statement)) {
ExplainCommand(statement, extended = options.exists(_.EXTENDED != null)) ExplainCommand(statement, extended = options.exists(_.EXTENDED != null),
codegen = options.exists(_.CODEGEN != null))
} else { } else {
ExplainCommand(OneRowRelation) ExplainCommand(OneRowRelation)
} }
......
...@@ -28,10 +28,10 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} ...@@ -28,10 +28,10 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.debug._
import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._ import org.apache.spark.sql.types._
/** /**
* A logical command that is executed for its side-effects. `RunnableCommand`s are * A logical command that is executed for its side-effects. `RunnableCommand`s are
* wrapped in `ExecutedCommand` during execution. * wrapped in `ExecutedCommand` during execution.
...@@ -237,15 +237,22 @@ case class ExplainCommand( ...@@ -237,15 +237,22 @@ case class ExplainCommand(
logicalPlan: LogicalPlan, logicalPlan: LogicalPlan,
override val output: Seq[Attribute] = override val output: Seq[Attribute] =
Seq(AttributeReference("plan", StringType, nullable = true)()), Seq(AttributeReference("plan", StringType, nullable = true)()),
extended: Boolean = false) extended: Boolean = false,
codegen: Boolean = false)
extends RunnableCommand { extends RunnableCommand {
// Run through the optimizer to generate the physical plan. // Run through the optimizer to generate the physical plan.
override def run(sqlContext: SQLContext): Seq[Row] = try { override def run(sqlContext: SQLContext): Seq[Row] = try {
// TODO in Hive, the "extended" ExplainCommand prints the AST as well, and detailed properties. // TODO in Hive, the "extended" ExplainCommand prints the AST as well, and detailed properties.
val queryExecution = sqlContext.executePlan(logicalPlan) val queryExecution = sqlContext.executePlan(logicalPlan)
val outputString = if (extended) queryExecution.toString else queryExecution.simpleString val outputString =
if (codegen) {
codegenString(queryExecution.executedPlan)
} else if (extended) {
queryExecution.toString
} else {
queryExecution.simpleString
}
outputString.split("\n").map(Row(_)) outputString.split("\n").map(Row(_))
} catch { case cause: TreeNodeException[_] => } catch { case cause: TreeNodeException[_] =>
("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_)) ("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_))
......
...@@ -48,6 +48,25 @@ package object debug { ...@@ -48,6 +48,25 @@ package object debug {
// scalastyle:on println // scalastyle:on println
} }
def codegenString(plan: SparkPlan): String = {
val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegen]()
plan transform {
case s: WholeStageCodegen =>
codegenSubtrees += s
s
case s => s
}
var output = s"Found ${codegenSubtrees.size} WholeStageCodegen subtrees.\n"
for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) {
output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n"
output += s
output += "\nGenerated code:\n"
val (_, source) = s.doCodeGen()
output += s"${CodeFormatter.format(source)}\n"
}
output
}
/** /**
* Augments [[SQLContext]] with debug methods. * Augments [[SQLContext]] with debug methods.
*/ */
...@@ -81,28 +100,7 @@ package object debug { ...@@ -81,28 +100,7 @@ package object debug {
* WholeStageCodegen subtree). * WholeStageCodegen subtree).
*/ */
def debugCodegen(): Unit = { def debugCodegen(): Unit = {
debugPrint(debugCodegenString()) debugPrint(codegenString(query.queryExecution.executedPlan))
}
/** Visible for testing. */
def debugCodegenString(): String = {
val plan = query.queryExecution.executedPlan
val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegen]()
plan transform {
case s: WholeStageCodegen =>
codegenSubtrees += s
s
case s => s
}
var output = s"Found ${codegenSubtrees.size} WholeStageCodegen subtrees.\n"
for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) {
output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n"
output += s
output += "\nGenerated code:\n"
val (_, source) = s.doCodeGen()
output += s"${CodeFormatter.format(source)}\n"
}
output
} }
} }
...@@ -123,6 +121,7 @@ package object debug { ...@@ -123,6 +121,7 @@ package object debug {
/** /**
* A collection of metrics for each column of output. * A collection of metrics for each column of output.
*
* @param elementTypes the actual runtime types for the output. Useful when there are bugs * @param elementTypes the actual runtime types for the output. Useful when there are bugs
* causing the wrong data to be projected. * causing the wrong data to be projected.
*/ */
......
...@@ -27,7 +27,7 @@ class DebuggingSuite extends SparkFunSuite with SharedSQLContext { ...@@ -27,7 +27,7 @@ class DebuggingSuite extends SparkFunSuite with SharedSQLContext {
} }
test("debugCodegen") { test("debugCodegen") {
val res = sqlContext.range(10).groupBy("id").count().debugCodegenString() val res = codegenString(sqlContext.range(10).groupBy("id").count().queryExecution.executedPlan)
assert(res.contains("Subtree 1 / 2")) assert(res.contains("Subtree 1 / 2"))
assert(res.contains("Subtree 2 / 2")) assert(res.contains("Subtree 2 / 2"))
assert(res.contains("Object[]")) assert(res.contains("Object[]"))
......
...@@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.TableIdentifier ...@@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.command.RunnableCommand
import org.apache.spark.sql.execution.datasources.{BucketSpec, DataSource, LogicalRelation} import org.apache.spark.sql.execution.datasources.{BucketSpec, DataSource, LogicalRelation}
import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.HiveContext
......
...@@ -101,4 +101,33 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto ...@@ -101,4 +101,33 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
"Physical Plan should not contain Subquery since it's eliminated by optimizer") "Physical Plan should not contain Subquery since it's eliminated by optimizer")
} }
} }
test("EXPLAIN CODEGEN command") {
checkExistence(sql("EXPLAIN CODEGEN SELECT 1"), true,
"WholeStageCodegen",
"Generated code:",
"/* 001 */ public Object generate(Object[] references) {",
"/* 002 */ return new GeneratedIterator(references);",
"/* 003 */ }"
)
checkExistence(sql("EXPLAIN CODEGEN SELECT 1"), false,
"== Physical Plan =="
)
checkExistence(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"), true,
"WholeStageCodegen",
"Generated code:",
"/* 001 */ public Object generate(Object[] references) {",
"/* 002 */ return new GeneratedIterator(references);",
"/* 003 */ }"
)
checkExistence(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"), false,
"== Parsed Logical Plan ==",
"== Analyzed Logical Plan ==",
"== Optimized Logical Plan ==",
"== Physical Plan =="
)
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment