Skip to content
Snippets Groups Projects
Commit a9ec033c authored by Zongheng Yang's avatar Zongheng Yang Committed by Michael Armbrust
Browse files

[SPARK-1704][SQL] Fully support EXPLAIN commands as SchemaRDD.

This PR attempts to resolve [SPARK-1704](https://issues.apache.org/jira/browse/SPARK-1704) by introducing a physical plan for EXPLAIN commands, which just prints out the debug string (containing various SparkSQL's plans) of the corresponding QueryExecution for the actual query.

Author: Zongheng Yang <zongheng.y@gmail.com>

Closes #1003 from concretevitamin/explain-cmd and squashes the following commits:

5b7911f [Zongheng Yang] Add a regression test.
1bfa379 [Zongheng Yang] Modify output().
719ada9 [Zongheng Yang] Override otherCopyArgs for ExplainCommandPhysical.
4318fd7 [Zongheng Yang] Make all output one Row.
439c6ab [Zongheng Yang] Minor cleanups.
408f574 [Zongheng Yang] SPARK-1704: Add CommandStrategy and ExplainCommandPhysical.
parent c6e041d1
No related branches found
No related tags found
No related merge requests found
...@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical ...@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.types.StructType import org.apache.spark.sql.catalyst.types.{StringType, StructType}
import org.apache.spark.sql.catalyst.trees import org.apache.spark.sql.catalyst.trees
abstract class LogicalPlan extends QueryPlan[LogicalPlan] { abstract class LogicalPlan extends QueryPlan[LogicalPlan] {
...@@ -102,7 +102,7 @@ abstract class LeafNode extends LogicalPlan with trees.LeafNode[LogicalPlan] { ...@@ -102,7 +102,7 @@ abstract class LeafNode extends LogicalPlan with trees.LeafNode[LogicalPlan] {
*/ */
abstract class Command extends LeafNode { abstract class Command extends LeafNode {
self: Product => self: Product =>
def output = Seq.empty def output: Seq[Attribute] = Seq.empty
} }
/** /**
...@@ -115,7 +115,9 @@ case class NativeCommand(cmd: String) extends Command ...@@ -115,7 +115,9 @@ case class NativeCommand(cmd: String) extends Command
* Returned by a parser when the users only wants to see what query plan would be executed, without * Returned by a parser when the users only wants to see what query plan would be executed, without
* actually performing the execution. * actually performing the execution.
*/ */
case class ExplainCommand(plan: LogicalPlan) extends Command case class ExplainCommand(plan: LogicalPlan) extends Command {
override def output = Seq(AttributeReference("plan", StringType, nullable = false)())
}
/** /**
* A logical plan node with single child. * A logical plan node with single child.
......
...@@ -191,6 +191,7 @@ class SQLContext(@transient val sparkContext: SparkContext) ...@@ -191,6 +191,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
val sparkContext = self.sparkContext val sparkContext = self.sparkContext
val strategies: Seq[Strategy] = val strategies: Seq[Strategy] =
CommandStrategy(self) ::
TakeOrdered :: TakeOrdered ::
PartialAggregation :: PartialAggregation ::
LeftSemiJoin :: LeftSemiJoin ::
...@@ -256,6 +257,11 @@ class SQLContext(@transient val sparkContext: SparkContext) ...@@ -256,6 +257,11 @@ class SQLContext(@transient val sparkContext: SparkContext)
Batch("Prepare Expressions", Once, new BindReferences[SparkPlan]) :: Nil Batch("Prepare Expressions", Once, new BindReferences[SparkPlan]) :: Nil
} }
// TODO: or should we make QueryExecution protected[sql]?
protected[sql] def mkQueryExecution(plan: LogicalPlan) = new QueryExecution {
val logical = plan
}
/** /**
* The primary workflow for executing relational queries using Spark. Designed to allow easy * The primary workflow for executing relational queries using Spark. Designed to allow easy
* access to the intermediate phases of query execution for developers. * access to the intermediate phases of query execution for developers.
......
...@@ -233,4 +233,15 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { ...@@ -233,4 +233,15 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
case _ => Nil case _ => Nil
} }
} }
// TODO: this should be merged with SPARK-1508's SetCommandStrategy
case class CommandStrategy(context: SQLContext) extends Strategy {
def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
case logical.ExplainCommand(child) =>
val qe = context.mkQueryExecution(child)
Seq(execution.ExplainCommandPhysical(qe.executedPlan, plan.output)(context))
case _ => Nil
}
}
} }
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{SQLContext, Row}
import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
case class ExplainCommandPhysical(child: SparkPlan, output: Seq[Attribute])
(@transient context: SQLContext) extends UnaryNode {
def execute(): RDD[Row] = {
val planString = new GenericRow(Array[Any](child.toString))
context.sparkContext.parallelize(Seq(planString))
}
override def otherCopyArgs = context :: Nil
}
...@@ -218,6 +218,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { ...@@ -218,6 +218,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
val hiveContext = self val hiveContext = self
override val strategies: Seq[Strategy] = Seq( override val strategies: Seq[Strategy] = Seq(
CommandStrategy(self),
TakeOrdered, TakeOrdered,
ParquetOperations, ParquetOperations,
HiveTableScans, HiveTableScans,
...@@ -304,7 +305,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { ...@@ -304,7 +305,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
*/ */
def stringResult(): Seq[String] = analyzed match { def stringResult(): Seq[String] = analyzed match {
case NativeCommand(cmd) => runSqlHive(cmd) case NativeCommand(cmd) => runSqlHive(cmd)
case ExplainCommand(plan) => new QueryExecution { val logical = plan }.toString.split("\n") case ExplainCommand(plan) => mkQueryExecution(plan).toString.split("\n")
case query => case query =>
val result: Seq[Seq[Any]] = toRdd.collect().toSeq val result: Seq[Seq[Any]] = toRdd.collect().toSeq
// We need the types so we can output struct field names // We need the types so we can output struct field names
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
package org.apache.spark.sql.hive.execution package org.apache.spark.sql.hive.execution
import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.sql.hive.test.TestHive
/** /**
* A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution. * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
...@@ -159,4 +160,15 @@ class HiveQuerySuite extends HiveComparisonTest { ...@@ -159,4 +160,15 @@ class HiveQuerySuite extends HiveComparisonTest {
hql("SHOW TABLES").toString hql("SHOW TABLES").toString
hql("SELECT * FROM src").toString hql("SELECT * FROM src").toString
} }
test("SPARK-1704: Explain commands as a SchemaRDD") {
hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
val rdd = hql("explain select key, count(value) from src group by key")
assert(rdd.collect().size == 1)
assert(rdd.toString.contains("ExplainCommand"))
assert(rdd.filter(row => row.toString.contains("ExplainCommand")).collect().size == 0,
"actual contents of the result should be the plans of the query to be explained")
TestHive.reset()
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment