From 6de41e951fd6172ab7d603474abded0ee7417cde Mon Sep 17 00:00:00 2001 From: Zhenhua Wang <wzh_zju@163.com> Date: Fri, 2 Jun 2017 17:36:00 -0700 Subject: [PATCH] [SPARK-17078][SQL][FOLLOWUP] Simplify explain cost command ## What changes were proposed in this pull request? Usually when using explain cost command, users want to see the stats of plan. Since stats is only showed in optimized plan, it is more direct and convenient to include only optimized plan and physical plan in the output. ## How was this patch tested? Enhanced existing test. Author: Zhenhua Wang <wzh_zju@163.com> Closes #18190 from wzhfy/simplifyExplainCost. --- .../spark/sql/execution/QueryExecution.scala | 28 +++++++++---------- .../sql/execution/command/commands.scala | 2 +- .../sql/hive/execution/HiveExplainSuite.scala | 6 ++++ 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index 1ba9a79446..34998cbd61 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -200,11 +200,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) { """.stripMargin.trim } - override def toString: String = completeString(appendStats = false) - - def toStringWithStats: String = completeString(appendStats = true) - - private def completeString(appendStats: Boolean): String = { + override def toString: String = { def output = Utils.truncatedString( analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}"), ", ") val analyzedPlan = Seq( @@ -212,25 +208,29 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) { stringOrError(analyzed.treeString(verbose = true)) ).filter(_.nonEmpty).mkString("\n") - val optimizedPlanString = if (appendStats) { - // trigger to compute stats for logical plans - optimizedPlan.stats(sparkSession.sessionState.conf) - optimizedPlan.treeString(verbose = true, addSuffix = true) - } else { - optimizedPlan.treeString(verbose = true) - } - s"""== Parsed Logical Plan == |${stringOrError(logical.treeString(verbose = true))} |== Analyzed Logical Plan == |$analyzedPlan |== Optimized Logical Plan == - |${stringOrError(optimizedPlanString)} + |${stringOrError(optimizedPlan.treeString(verbose = true))} |== Physical Plan == |${stringOrError(executedPlan.treeString(verbose = true))} """.stripMargin.trim } + def stringWithStats: String = { + // trigger to compute stats for logical plans + optimizedPlan.stats(sparkSession.sessionState.conf) + + // only show optimized logical plan and physical plan + s"""== Optimized Logical Plan == + |${stringOrError(optimizedPlan.treeString(verbose = true, addSuffix = true))} + |== Physical Plan == + |${stringOrError(executedPlan.treeString(verbose = true))} + """.stripMargin.trim + } + /** A special namespace for commands that can be used to debug query execution. */ // scalastyle:off object debug { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala index 99d81c49f1..2d82fcf4da 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala @@ -127,7 +127,7 @@ case class ExplainCommand( } else if (extended) { queryExecution.toString } else if (cost) { - queryExecution.toStringWithStats + queryExecution.stringWithStats } else { queryExecution.simpleString } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala index aa1ca29090..3066a4f305 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala @@ -29,6 +29,12 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto import testImplicits._ test("show cost in explain command") { + // For readability, we only show optimized plan and physical plan in explain cost command + checkKeywordsExist(sql("EXPLAIN COST SELECT * FROM src "), + "Optimized Logical Plan", "Physical Plan") + checkKeywordsNotExist(sql("EXPLAIN COST SELECT * FROM src "), + "Parsed Logical Plan", "Analyzed Logical Plan") + // Only has sizeInBytes before ANALYZE command checkKeywordsExist(sql("EXPLAIN COST SELECT * FROM src "), "sizeInBytes") checkKeywordsNotExist(sql("EXPLAIN COST SELECT * FROM src "), "rowCount") -- GitLab