From 2b10ebe6ac1cdc2c723cb47e4b88cfbf39e0de08 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 10 Aug 2016 17:05:50 +0800
Subject: [PATCH] [SPARK-16185][SQL] Better Error Messages When Creating Table
 As Select Without Enabling Hive Support

#### What changes were proposed in this pull request?
When we do not turn on the Hive Support, the following query generates a confusing error message by Planner:
```Scala
sql("CREATE TABLE t2 SELECT a, b from t1")
```

```
assertion failed: No plan for CreateTable CatalogTable(
	Table: `t2`
	Created: Tue Aug 09 23:45:32 PDT 2016
	Last Access: Wed Dec 31 15:59:59 PST 1969
	Type: MANAGED
	Provider: hive
	Storage(InputFormat: org.apache.hadoop.mapred.TextInputFormat, OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat)), ErrorIfExists
+- Relation[a#19L,b#20L] parquet

java.lang.AssertionError: assertion failed: No plan for CreateTable CatalogTable(
	Table: `t2`
	Created: Tue Aug 09 23:45:32 PDT 2016
	Last Access: Wed Dec 31 15:59:59 PST 1969
	Type: MANAGED
	Provider: hive
	Storage(InputFormat: org.apache.hadoop.mapred.TextInputFormat, OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat)), ErrorIfExists
+- Relation[a#19L,b#20L] parquet
```

This PR is to issue a better error message:
```
Hive support is required to use CREATE Hive TABLE AS SELECT
```

#### How was this patch tested?
Added test cases in `DDLSuite.scala`

Author: gatorsmile <gatorsmile@gmail.com>

Closes #13886 from gatorsmile/createCatalogedTableAsSelect.
---
 .../sql/execution/datasources/rules.scala     | 15 ++++++++++
 .../spark/sql/internal/SessionState.scala     |  3 +-
 .../sql/execution/command/DDLSuite.scala      | 28 +++++++++++++++++++
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index c133dda13e..fc8d8c3667 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -272,6 +272,21 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
   }
 }
 
+/**
+ * A rule to check whether the functions are supported only when Hive support is enabled
+ */
+object HiveOnlyCheck extends (LogicalPlan => Unit) {
+  def apply(plan: LogicalPlan): Unit = {
+    plan.foreach {
+      case CreateTable(tableDesc, _, Some(_))
+          if tableDesc.provider.get == "hive" =>
+        throw new AnalysisException("Hive support is required to use CREATE Hive TABLE AS SELECT")
+
+      case _ => // OK
+    }
+  }
+}
+
 /**
  * A rule to do various checks before inserting into or writing to a data source table.
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 052bce0923..ab27381c06 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -117,7 +117,8 @@ private[sql] class SessionState(sparkSession: SparkSession) {
         DataSourceAnalysis(conf) ::
         (if (conf.runSQLonFile) new ResolveDataSource(sparkSession) :: Nil else Nil)
 
-      override val extendedCheckRules = Seq(datasources.PreWriteCheck(conf, catalog))
+      override val extendedCheckRules =
+        Seq(PreWriteCheck(conf, catalog), HiveOnlyCheck)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 53376c56f1..0eb3f2002d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1578,6 +1578,34 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
       "WITH SERDEPROPERTIES ('spark.sql.sources.me'='anything')")
   }
 
+  test("Create Hive Table As Select") {
+    import testImplicits._
+    withTable("t", "t1") {
+      var e = intercept[AnalysisException] {
+        sql("CREATE TABLE t SELECT 1 as a, 1 as b")
+      }.getMessage
+      assert(e.contains("Hive support is required to use CREATE Hive TABLE AS SELECT"))
+
+      spark.range(1).select('id as 'a, 'id as 'b).write.saveAsTable("t1")
+      e = intercept[AnalysisException] {
+        sql("CREATE TABLE t SELECT a, b from t1")
+      }.getMessage
+      assert(e.contains("Hive support is required to use CREATE Hive TABLE AS SELECT"))
+    }
+  }
+
+  test("Create Data Source Table As Select") {
+    import testImplicits._
+    withTable("t", "t1", "t2") {
+      sql("CREATE TABLE t USING parquet SELECT 1 as a, 1 as b")
+      checkAnswer(spark.table("t"), Row(1, 1) :: Nil)
+
+      spark.range(1).select('id as 'a, 'id as 'b).write.saveAsTable("t1")
+      sql("CREATE TABLE t2 USING parquet SELECT a, b from t1")
+      checkAnswer(spark.table("t2"), spark.table("t1"))
+    }
+  }
+
   test("drop current database") {
     sql("CREATE DATABASE temp")
     sql("USE temp")
-- 
GitLab