From 8881765ac7ac6ba6fe9ef0d0a669c08cca58ed93 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 11 May 2016 15:30:53 -0700
Subject: [PATCH] [SPARK-15257][SQL] Require CREATE EXTERNAL TABLE to specify
 LOCATION

## What changes were proposed in this pull request?

Before:
```sql
-- uses warehouse dir anyway
CREATE EXTERNAL TABLE my_tab
-- doesn't actually delete the data
DROP TABLE my_tab
```
After:
```sql
-- no location is provided, throws exception
CREATE EXTERNAL TABLE my_tab
-- creates an external table using that location
CREATE EXTERNAL TABLE my_tab LOCATION '/path/to/something'
-- doesn't delete the data, which is expected
DROP TABLE my_tab
```

## How was this patch tested?

New test in `DDLCommandSuite`

Author: Andrew Or <andrew@databricks.com>

Closes #13032 from andrewor14/create-external-table-location.
---
 .../spark/sql/execution/SparkSqlParser.scala  |  4 ++++
 .../execution/command/DDLCommandSuite.scala   | 20 +++++++++++++++++--
 .../execution/HiveCompatibilitySuite.scala    |  6 ++++--
 .../spark/sql/hive/HiveDDLCommandSuite.scala  |  9 ++-------
 .../sql/hive/execution/HiveCommandSuite.scala |  2 +-
 .../hive/execution/HiveTableScanSuite.scala   |  2 +-
 .../sql/hive/execution/HiveUDFSuite.scala     |  2 +-
 7 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 87e6f9094d..a51665f838 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -780,6 +780,10 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
       .getOrElse(EmptyStorageFormat)
     val rowStorage = Option(ctx.rowFormat).map(visitRowFormat).getOrElse(EmptyStorageFormat)
     val location = Option(ctx.locationSpec).map(visitLocationSpec)
+    // If we are creating an EXTERNAL table, then the LOCATION field is required
+    if (external && location.isEmpty) {
+      throw operationNotAllowed("CREATE EXTERNAL TABLE must be accompanied by LOCATION", ctx)
+    }
     val storage = CatalogStorageFormat(
       locationUri = location,
       inputFormat = fileStorage.inputFormat.orElse(defaultStorage.inputFormat),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index a728ac3c8a..fa8dabfe1a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{FunctionResource, FunctionResourceType}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, FunctionResource}
+import org.apache.spark.sql.catalyst.catalog.FunctionResourceType
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.Project
@@ -35,7 +36,7 @@ class DDLCommandSuite extends PlanTest {
       parser.parsePlan(sql)
     }
     assert(e.getMessage.toLowerCase.contains("operation not allowed"))
-    containsThesePhrases.foreach { p => assert(e.getMessage.toLowerCase.contains(p)) }
+    containsThesePhrases.foreach { p => assert(e.getMessage.toLowerCase.contains(p.toLowerCase)) }
   }
 
   test("create database") {
@@ -211,6 +212,21 @@ class DDLCommandSuite extends PlanTest {
     comparePlans(parsed4, expected4)
   }
 
+  test("create external table - location must be specified") {
+    assertUnsupported(
+      sql = "CREATE EXTERNAL TABLE my_tab",
+      containsThesePhrases = Seq("create external table", "location"))
+    val query = "CREATE EXTERNAL TABLE my_tab LOCATION '/something/anything'"
+    parser.parsePlan(query) match {
+      case ct: CreateTable =>
+        assert(ct.table.tableType == CatalogTableType.EXTERNAL)
+        assert(ct.table.storage.locationUri == Some("/something/anything"))
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTable].getClass.getName} from query," +
+          s"got ${other.getClass.getName}: $query")
+    }
+  }
+
   // ALTER TABLE table_name RENAME TO new_table_name;
   // ALTER VIEW view_name RENAME TO new_view_name;
   test("alter table/view: rename table/view") {
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index f89a8479f0..54fb440b33 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -508,7 +508,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     // These tests use EXPLAIN FORMATTED, which is not supported
     "input4",
     "join0",
-    "plan_json"
+    "plan_json",
+
+    // This test uses CREATE EXTERNAL TABLE without specifying LOCATION
+    "alter2"
   )
 
   /**
@@ -521,7 +524,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "add_partition_no_whitelist",
     "add_partition_with_whitelist",
     "alias_casted_column",
-    "alter2",
     "alter_partition_with_whitelist",
     "alter_rename_partition",
     "ambiguous_col",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
index 3d74235dc5..538e218f7e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
@@ -352,9 +352,10 @@ class HiveDDLCommandSuite extends PlanTest {
   }
 
   test("create table - external") {
-    val query = "CREATE EXTERNAL TABLE tab1 (id int, name string)"
+    val query = "CREATE EXTERNAL TABLE tab1 (id int, name string) LOCATION '/path/to/nowhere'"
     val (desc, _) = extractTableDesc(query)
     assert(desc.tableType == CatalogTableType.EXTERNAL)
+    assert(desc.storage.locationUri == Some("/path/to/nowhere"))
   }
 
   test("create table - if not exists") {
@@ -454,12 +455,6 @@ class HiveDDLCommandSuite extends PlanTest {
     assert(e2.getMessage.contains("Operation not allowed"))
   }
 
-  test("create table - location") {
-    val query = "CREATE TABLE my_table (id int, name string) LOCATION '/path/to/mars'"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.storage.locationUri == Some("/path/to/mars"))
-  }
-
   test("create table - properties") {
     val query = "CREATE TABLE my_table (id int, name string) TBLPROPERTIES ('k1'='v1', 'k2'='v2')"
     val (desc, _) = extractTableDesc(query)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
index 1eed5b6a6a..8225bd69c1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
@@ -35,7 +35,7 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
 
     sql(
       """
-        |CREATE EXTERNAL TABLE parquet_tab2 (c1 INT, c2 STRING)
+        |CREATE TABLE parquet_tab2 (c1 INT, c2 STRING)
         |STORED AS PARQUET
         |TBLPROPERTIES('prop1Key'="prop1Val", '`prop2Key`'="prop2Val")
       """.stripMargin)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index b0c0dcbe5c..8c9c37fece 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -65,7 +65,7 @@ class HiveTableScanSuite extends HiveComparisonTest {
     TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null")
     TestHive.sql(
       """
-        CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT)
+        CREATE TABLE timestamp_query_null (time TIMESTAMP,id INT)
         ROW FORMAT DELIMITED
         FIELDS TERMINATED BY ','
         LINES TERMINATED BY '\n'
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index dd4321d1b6..51d537d43a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -72,7 +72,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
   test("hive struct udf") {
     sql(
       """
-      |CREATE EXTERNAL TABLE hiveUDFTestTable (
+      |CREATE TABLE hiveUDFTestTable (
       |   pair STRUCT<id: INT, value: INT>
       |)
       |PARTITIONED BY (partition STRING)
-- 
GitLab