diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 2b8fbdcde9d37fc693ee50458ceb8b55f3c5bed0..4f641cd3a656bccabcb7aa6a3a1fb002ec4495b9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.QueryPlan -import org.apache.spark.sql.catalyst.types.StructType +import org.apache.spark.sql.catalyst.types.{StringType, StructType} import org.apache.spark.sql.catalyst.trees abstract class LogicalPlan extends QueryPlan[LogicalPlan] { @@ -102,7 +102,7 @@ abstract class LeafNode extends LogicalPlan with trees.LeafNode[LogicalPlan] { */ abstract class Command extends LeafNode { self: Product => - def output = Seq.empty + def output: Seq[Attribute] = Seq.empty } /** @@ -115,7 +115,9 @@ case class NativeCommand(cmd: String) extends Command * Returned by a parser when the users only wants to see what query plan would be executed, without * actually performing the execution. */ -case class ExplainCommand(plan: LogicalPlan) extends Command +case class ExplainCommand(plan: LogicalPlan) extends Command { + override def output = Seq(AttributeReference("plan", StringType, nullable = false)()) +} /** * A logical plan node with single child. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 5626f0da2230b8fed7eee5bef9e61b3f25d296b7..fde4c485b58a0a067954c541996d261609478d1e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -191,6 +191,7 @@ class SQLContext(@transient val sparkContext: SparkContext) val sparkContext = self.sparkContext val strategies: Seq[Strategy] = + CommandStrategy(self) :: TakeOrdered :: PartialAggregation :: LeftSemiJoin :: @@ -256,6 +257,11 @@ class SQLContext(@transient val sparkContext: SparkContext) Batch("Prepare Expressions", Once, new BindReferences[SparkPlan]) :: Nil } + // TODO: or should we make QueryExecution protected[sql]? + protected[sql] def mkQueryExecution(plan: LogicalPlan) = new QueryExecution { + val logical = plan + } + /** * The primary workflow for executing relational queries using Spark. Designed to allow easy * access to the intermediate phases of query execution for developers. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 6463f47510633983ce786af2039f8f4b02350d41..295c265b1673f7c8cd7e011c00f6d523e7b7b11e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -233,4 +233,15 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { case _ => Nil } } + + // TODO: this should be merged with SPARK-1508's SetCommandStrategy + case class CommandStrategy(context: SQLContext) extends Strategy { + def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { + case logical.ExplainCommand(child) => + val qe = context.mkQueryExecution(child) + Seq(execution.ExplainCommandPhysical(qe.executedPlan, plan.output)(context)) + case _ => Nil + } + } + } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala new file mode 100644 index 0000000000000000000000000000000000000000..5371d2f479e73617fbf8689749481494411960e2 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{SQLContext, Row} +import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute} + +case class ExplainCommandPhysical(child: SparkPlan, output: Seq[Attribute]) + (@transient context: SQLContext) extends UnaryNode { + def execute(): RDD[Row] = { + val planString = new GenericRow(Array[Any](child.toString)) + context.sparkContext.parallelize(Seq(planString)) + } + + override def otherCopyArgs = context :: Nil +} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index fbab2ac16b896178d6b768a63b0697666b506b58..4b97dc25acf89a3c9f3036dc5745356ac1a5b116 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -218,6 +218,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { val hiveContext = self override val strategies: Seq[Strategy] = Seq( + CommandStrategy(self), TakeOrdered, ParquetOperations, HiveTableScans, @@ -304,7 +305,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { */ def stringResult(): Seq[String] = analyzed match { case NativeCommand(cmd) => runSqlHive(cmd) - case ExplainCommand(plan) => new QueryExecution { val logical = plan }.toString.split("\n") + case ExplainCommand(plan) => mkQueryExecution(plan).toString.split("\n") case query => val result: Seq[Seq[Any]] = toRdd.collect().toSeq // We need the types so we can output struct field names diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 125cc18bfb2b5ca0e828ed86655f56fff9f5e67f..c56eee258047f478e3f00a831597e7939bbde113 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.hive.test.TestHive._ +import org.apache.spark.sql.hive.test.TestHive /** * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution. @@ -159,4 +160,15 @@ class HiveQuerySuite extends HiveComparisonTest { hql("SHOW TABLES").toString hql("SELECT * FROM src").toString } + + test("SPARK-1704: Explain commands as a SchemaRDD") { + hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") + val rdd = hql("explain select key, count(value) from src group by key") + assert(rdd.collect().size == 1) + assert(rdd.toString.contains("ExplainCommand")) + assert(rdd.filter(row => row.toString.contains("ExplainCommand")).collect().size == 0, + "actual contents of the result should be the plans of the query to be explained") + TestHive.reset() + } + }