From 570647267055cbe33291232b375e08fa1f5d8e7a Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Tue, 10 May 2016 11:53:37 +0200
Subject: [PATCH] [SPARK-15215][SQL] Fix Explain Parsing and Output

#### What changes were proposed in this pull request?
This PR is to address a few existing issues in `EXPLAIN`:
- The `EXPLAIN` options `LOGICAL | FORMATTED | EXTENDED | CODEGEN` should not be 0 or more match. It should 0 or one match. Parser does not allow users to use more than one option in a single command.
- The option `LOGICAL` is not supported. Issue an exception when users specify this option in the command.
- The output of `EXPLAIN ` contains a weird empty line when the output of analyzed plan is empty. We should remove it. For example:
  ```
  == Parsed Logical Plan ==
  CreateTable CatalogTable(`t`,CatalogTableType(MANAGED),CatalogStorageFormat(None,Some(org.apache.hadoop.mapred.TextInputFormat),Some(org.apache.hadoop.hive.ql.io.  HiveIgnoreKeyTextOutputFormat),None,false,Map()),List(CatalogColumn(col,int,true,None)),List(),List(),List(),-1,,1462725171656,-1,Map(),None,None,None), false

  == Analyzed Logical Plan ==

  CreateTable CatalogTable(`t`,CatalogTableType(MANAGED),CatalogStorageFormat(None,Some(org.apache.hadoop.mapred.TextInputFormat),Some(org.apache.hadoop.hive.ql.io.  HiveIgnoreKeyTextOutputFormat),None,false,Map()),List(CatalogColumn(col,int,true,None)),List(),List(),List(),-1,,1462725171656,-1,Map(),None,None,None), false

  == Optimized Logical Plan ==
  CreateTable CatalogTable(`t`,CatalogTableType(MANAGED),CatalogStorageFormat(None,Some(org.apache.hadoop.mapred.TextInputFormat),Some(org.apache.hadoop.hive.ql.io.  HiveIgnoreKeyTextOutputFormat),None,false,Map()),List(CatalogColumn(col,int,true,None)),List(),List(),List(),-1,,1462725171656,-1,Map(),None,None,None), false
  ...
  ```

#### How was this patch tested?
Added and modified a few test cases

Author: gatorsmile <gatorsmile@gmail.com>

Closes #12991 from gatorsmile/explainCreateTable.
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  7 +------
 .../sql/catalyst/parser/PlanParserSuite.scala |  5 +++++
 .../spark/sql/execution/QueryExecution.scala  |  5 +++--
 .../spark/sql/execution/SparkSqlParser.scala  | 14 ++++++++-----
 .../sql/execution/command/commands.scala      |  2 +-
 .../sql/hive/execution/HiveExplainSuite.scala | 20 +++++--------------
 6 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index ee27d69ab3..ffb7a097ee 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -92,7 +92,7 @@ statement
     | CREATE TEMPORARY? FUNCTION qualifiedName AS className=STRING
         (USING resource (',' resource)*)?                              #createFunction
     | DROP TEMPORARY? FUNCTION (IF EXISTS)? qualifiedName              #dropFunction
-    | EXPLAIN explainOption* statement                                 #explain
+    | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN)? statement    #explain
     | SHOW TABLES ((FROM | IN) db=identifier)?
         (LIKE? pattern=STRING)?                                        #showTables
     | SHOW DATABASES (LIKE pattern=STRING)?                            #showDatabases
@@ -587,11 +587,6 @@ frameBound
     | expression boundType=(PRECEDING | FOLLOWING)
     ;
 
-
-explainOption
-    : LOGICAL | FORMATTED | EXTENDED | CODEGEN
-    ;
-
 qualifiedName
     : identifier ('.' identifier)*
     ;
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index f25e3fb404..25d87d93be 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -48,6 +48,11 @@ class PlanParserSuite extends PlanTest {
     assertEqual("SELECT * FROM a", plan)
   }
 
+  test("explain") {
+    intercept("EXPLAIN logical SELECT 1", "Unsupported SQL statement")
+    intercept("EXPLAIN formatted SELECT 1", "Unsupported SQL statement")
+  }
+
   test("show functions") {
     assertEqual("show functions", ShowFunctions(None, None))
     assertEqual("show functions foo", ShowFunctions(None, Some("foo")))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 3e772286e0..cb3c46a98b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -208,12 +208,13 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
   override def toString: String = {
     def output =
       analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}").mkString(", ")
+    val analyzedPlan =
+      Seq(stringOrError(output), stringOrError(analyzed)).filter(_.nonEmpty).mkString("\n")
 
     s"""== Parsed Logical Plan ==
        |${stringOrError(logical)}
        |== Analyzed Logical Plan ==
-       |${stringOrError(output)}
-       |${stringOrError(analyzed)}
+       |$analyzedPlan
        |== Optimized Logical Plan ==
        |${stringOrError(optimizedPlan)}
        |== Physical Plan ==
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index a85ac16295..086282d07c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -212,18 +212,22 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
 
   /**
    * Create an [[ExplainCommand]] logical plan.
+   * The syntax of using this command in SQL is:
+   * {{{
+   *   EXPLAIN (EXTENDED | CODEGEN) SELECT * FROM ...
+   * }}}
    */
   override def visitExplain(ctx: ExplainContext): LogicalPlan = withOrigin(ctx) {
-    val options = ctx.explainOption.asScala
-    if (options.exists(_.FORMATTED != null)) {
+    if (ctx.FORMATTED != null) {
       throw operationNotAllowed("EXPLAIN FORMATTED", ctx)
     }
+    if (ctx.LOGICAL != null) {
+      throw operationNotAllowed("EXPLAIN LOGICAL", ctx)
+    }
 
-    // Create the explain comment.
     val statement = plan(ctx.statement)
     if (isExplainableStatement(statement)) {
-      ExplainCommand(statement, extended = options.exists(_.EXTENDED != null),
-        codegen = options.exists(_.CODEGEN != null))
+      ExplainCommand(statement, extended = ctx.EXTENDED != null, codegen = ctx.CODEGEN != null)
     } else {
       ExplainCommand(OneRowRelation)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index 5f9287b3b5..576e12a94b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -85,7 +85,7 @@ private[sql] case class ExecutedCommandExec(cmd: RunnableCommand) extends SparkP
  * (but do NOT actually execute it).
  *
  * {{{
- *   EXPLAIN (EXTENDED|CODEGEN) SELECT * FROM ...
+ *   EXPLAIN (EXTENDED | CODEGEN) SELECT * FROM ...
  * }}}
  *
  * @param logicalPlan plan to explain
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index 542de724cc..17422ca1a0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.SQLTestUtils
 
@@ -86,7 +87,7 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
            |CREATE TABLE t1
            |AS
            |SELECT * FROM jt
-      """.stripMargin).collect().map(_.mkString).mkString
+         """.stripMargin).collect().map(_.mkString).mkString
 
       val shouldContain =
         "== Parsed Logical Plan ==" :: "== Analyzed Logical Plan ==" :: "Subquery" ::
@@ -115,19 +116,8 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       "== Physical Plan =="
     )
 
-    checkKeywordsExist(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"),
-      "WholeStageCodegen",
-      "Generated code:",
-      "/* 001 */ public Object generate(Object[] references) {",
-      "/* 002 */   return new GeneratedIterator(references);",
-      "/* 003 */ }"
-    )
-
-    checkKeywordsNotExist(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"),
-      "== Parsed Logical Plan ==",
-      "== Analyzed Logical Plan ==",
-      "== Optimized Logical Plan ==",
-      "== Physical Plan =="
-    )
+    intercept[ParseException] {
+      sql("EXPLAIN EXTENDED CODEGEN SELECT 1")
+    }
   }
 }
-- 
GitLab