From 8881765ac7ac6ba6fe9ef0d0a669c08cca58ed93 Mon Sep 17 00:00:00 2001 From: Andrew Or <andrew@databricks.com> Date: Wed, 11 May 2016 15:30:53 -0700 Subject: [PATCH] [SPARK-15257][SQL] Require CREATE EXTERNAL TABLE to specify LOCATION ## What changes were proposed in this pull request? Before: ```sql -- uses warehouse dir anyway CREATE EXTERNAL TABLE my_tab -- doesn't actually delete the data DROP TABLE my_tab ``` After: ```sql -- no location is provided, throws exception CREATE EXTERNAL TABLE my_tab -- creates an external table using that location CREATE EXTERNAL TABLE my_tab LOCATION '/path/to/something' -- doesn't delete the data, which is expected DROP TABLE my_tab ``` ## How was this patch tested? New test in `DDLCommandSuite` Author: Andrew Or <andrew@databricks.com> Closes #13032 from andrewor14/create-external-table-location. --- .../spark/sql/execution/SparkSqlParser.scala | 4 ++++ .../execution/command/DDLCommandSuite.scala | 20 +++++++++++++++++-- .../execution/HiveCompatibilitySuite.scala | 6 ++++-- .../spark/sql/hive/HiveDDLCommandSuite.scala | 9 ++------- .../sql/hive/execution/HiveCommandSuite.scala | 2 +- .../hive/execution/HiveTableScanSuite.scala | 2 +- .../sql/hive/execution/HiveUDFSuite.scala | 2 +- 7 files changed, 31 insertions(+), 14 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 87e6f9094d..a51665f838 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -780,6 +780,10 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { .getOrElse(EmptyStorageFormat) val rowStorage = Option(ctx.rowFormat).map(visitRowFormat).getOrElse(EmptyStorageFormat) val location = Option(ctx.locationSpec).map(visitLocationSpec) + // If we are creating an EXTERNAL table, then the LOCATION field is required + if (external && location.isEmpty) { + throw operationNotAllowed("CREATE EXTERNAL TABLE must be accompanied by LOCATION", ctx) + } val storage = CatalogStorageFormat( locationUri = location, inputFormat = fileStorage.inputFormat.orElse(defaultStorage.inputFormat), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala index a728ac3c8a..fa8dabfe1a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala @@ -18,7 +18,8 @@ package org.apache.spark.sql.execution.command import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.catalog.{FunctionResource, FunctionResourceType} +import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, FunctionResource} +import org.apache.spark.sql.catalyst.catalog.FunctionResourceType import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.Project @@ -35,7 +36,7 @@ class DDLCommandSuite extends PlanTest { parser.parsePlan(sql) } assert(e.getMessage.toLowerCase.contains("operation not allowed")) - containsThesePhrases.foreach { p => assert(e.getMessage.toLowerCase.contains(p)) } + containsThesePhrases.foreach { p => assert(e.getMessage.toLowerCase.contains(p.toLowerCase)) } } test("create database") { @@ -211,6 +212,21 @@ class DDLCommandSuite extends PlanTest { comparePlans(parsed4, expected4) } + test("create external table - location must be specified") { + assertUnsupported( + sql = "CREATE EXTERNAL TABLE my_tab", + containsThesePhrases = Seq("create external table", "location")) + val query = "CREATE EXTERNAL TABLE my_tab LOCATION '/something/anything'" + parser.parsePlan(query) match { + case ct: CreateTable => + assert(ct.table.tableType == CatalogTableType.EXTERNAL) + assert(ct.table.storage.locationUri == Some("/something/anything")) + case other => + fail(s"Expected to parse ${classOf[CreateTable].getClass.getName} from query," + + s"got ${other.getClass.getName}: $query") + } + } + // ALTER TABLE table_name RENAME TO new_table_name; // ALTER VIEW view_name RENAME TO new_view_name; test("alter table/view: rename table/view") { diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index f89a8479f0..54fb440b33 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -508,7 +508,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // These tests use EXPLAIN FORMATTED, which is not supported "input4", "join0", - "plan_json" + "plan_json", + + // This test uses CREATE EXTERNAL TABLE without specifying LOCATION + "alter2" ) /** @@ -521,7 +524,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "add_partition_no_whitelist", "add_partition_with_whitelist", "alias_casted_column", - "alter2", "alter_partition_with_whitelist", "alter_rename_partition", "ambiguous_col", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index 3d74235dc5..538e218f7e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -352,9 +352,10 @@ class HiveDDLCommandSuite extends PlanTest { } test("create table - external") { - val query = "CREATE EXTERNAL TABLE tab1 (id int, name string)" + val query = "CREATE EXTERNAL TABLE tab1 (id int, name string) LOCATION '/path/to/nowhere'" val (desc, _) = extractTableDesc(query) assert(desc.tableType == CatalogTableType.EXTERNAL) + assert(desc.storage.locationUri == Some("/path/to/nowhere")) } test("create table - if not exists") { @@ -454,12 +455,6 @@ class HiveDDLCommandSuite extends PlanTest { assert(e2.getMessage.contains("Operation not allowed")) } - test("create table - location") { - val query = "CREATE TABLE my_table (id int, name string) LOCATION '/path/to/mars'" - val (desc, _) = extractTableDesc(query) - assert(desc.storage.locationUri == Some("/path/to/mars")) - } - test("create table - properties") { val query = "CREATE TABLE my_table (id int, name string) TBLPROPERTIES ('k1'='v1', 'k2'='v2')" val (desc, _) = extractTableDesc(query) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala index 1eed5b6a6a..8225bd69c1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala @@ -35,7 +35,7 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto sql( """ - |CREATE EXTERNAL TABLE parquet_tab2 (c1 INT, c2 STRING) + |CREATE TABLE parquet_tab2 (c1 INT, c2 STRING) |STORED AS PARQUET |TBLPROPERTIES('prop1Key'="prop1Val", '`prop2Key`'="prop2Val") """.stripMargin) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index b0c0dcbe5c..8c9c37fece 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -65,7 +65,7 @@ class HiveTableScanSuite extends HiveComparisonTest { TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null") TestHive.sql( """ - CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT) + CREATE TABLE timestamp_query_null (time TIMESTAMP,id INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala index dd4321d1b6..51d537d43a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala @@ -72,7 +72,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils { test("hive struct udf") { sql( """ - |CREATE EXTERNAL TABLE hiveUDFTestTable ( + |CREATE TABLE hiveUDFTestTable ( | pair STRUCT<id: INT, value: INT> |) |PARTITIONED BY (partition STRING) -- GitLab