From f14c4ba001fbdbcc9faa46896f1f9d08a7d06609 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 11 May 2016 17:29:58 -0700
Subject: [PATCH] [SPARK-15276][SQL] CREATE TABLE with LOCATION should imply
 EXTERNAL

## What changes were proposed in this pull request?

Before:
```sql
-- uses that location but issues a warning
CREATE TABLE my_tab LOCATION /some/path
-- deletes any existing data in the specified location
DROP TABLE my_tab
```

After:
```sql
-- uses that location but creates an EXTERNAL table instead
CREATE TABLE my_tab LOCATION /some/path
-- does not delete the data at /some/path
DROP TABLE my_tab
```

This patch essentially makes the `EXTERNAL` field optional. This is related to #13032.

## How was this patch tested?

New test in `DDLCommandSuite`.

Author: Andrew Or <andrew@databricks.com>

Closes #13060 from andrewor14/location-implies-external.
---
 .../apache/spark/sql/execution/SparkSqlParser.scala  | 12 +++++++-----
 .../sql/execution/command/DDLCommandSuite.scala      | 12 ++++++++++++
 .../spark/sql/hive/execution/HiveDDLSuite.scala      |  8 +++-----
 .../spark/sql/hive/execution/SQLQuerySuite.scala     |  5 +----
 4 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index a51665f838..53aba1f206 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -745,11 +745,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
     if (ctx.bucketSpec != null) {
       throw operationNotAllowed("CREATE TABLE ... CLUSTERED BY", ctx)
     }
-    val tableType = if (external) {
-      CatalogTableType.EXTERNAL
-    } else {
-      CatalogTableType.MANAGED
-    }
     val comment = Option(ctx.STRING).map(string)
     val partitionCols = Option(ctx.partitionColumns).toSeq.flatMap(visitCatalogColumns)
     val cols = Option(ctx.columns).toSeq.flatMap(visitCatalogColumns)
@@ -791,6 +786,13 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
       serde = rowStorage.serde.orElse(fileStorage.serde).orElse(defaultStorage.serde),
       compressed = false,
       serdeProperties = rowStorage.serdeProperties ++ fileStorage.serdeProperties)
+    // If location is defined, we'll assume this is an external table.
+    // Otherwise, we may accidentally delete existing data.
+    val tableType = if (external || location.isDefined) {
+      CatalogTableType.EXTERNAL
+    } else {
+      CatalogTableType.MANAGED
+    }
 
     // TODO support the sql text - have a proper location for this!
     val tableDesc = CatalogTable(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index fa8dabfe1a..aeb613acb5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -227,6 +227,18 @@ class DDLCommandSuite extends PlanTest {
     }
   }
 
+  test("create table - location implies external") {
+    val query = "CREATE TABLE my_tab LOCATION '/something/anything'"
+    parser.parsePlan(query) match {
+      case ct: CreateTable =>
+        assert(ct.table.tableType == CatalogTableType.EXTERNAL)
+        assert(ct.table.storage.locationUri == Some("/something/anything"))
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTable].getClass.getName} from query," +
+            s"got ${other.getClass.getName}: $query")
+    }
+  }
+
   // ALTER TABLE table_name RENAME TO new_table_name;
   // ALTER VIEW view_name RENAME TO new_view_name;
   test("alter table/view: rename table/view") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 8b60802b91..ae61322844 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -72,7 +72,7 @@ class HiveDDLSuite
     }
   }
 
-  test("drop managed tables in default database") {
+  test("drop external tables in default database") {
     withTempDir { tmpDir =>
       val tabName = "tab1"
       withTable(tabName) {
@@ -88,13 +88,11 @@ class HiveDDLSuite
         val hiveTable =
           hiveContext.sessionState.catalog
             .getTableMetadata(TableIdentifier(tabName, Some("default")))
-        // It is a managed table, although it uses external in SQL
-        assert(hiveTable.tableType == CatalogTableType.MANAGED)
+        assert(hiveTable.tableType == CatalogTableType.EXTERNAL)
 
         assert(tmpDir.listFiles.nonEmpty)
         sql(s"DROP TABLE $tabName")
-        // The data are deleted since the table type is not EXTERNAL
-        assert(tmpDir.listFiles == null)
+        assert(tmpDir.listFiles.nonEmpty)
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 6ce5051cbd..ac9a3930fd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1534,10 +1534,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     assert(fs.listStatus(new Path(path, "part=1")).nonEmpty)
 
     sql("drop table test_table")
-    assert(
-      !fs.exists(path),
-      "Once a managed table has been dropped, " +
-        "dirs of this table should also have been deleted.")
+    assert(fs.exists(path), "This is an external table, so the data should not have been dropped")
   }
 
   test("SPARK-14981: DESC not supported for sorting columns") {
-- 
GitLab