From f14c4ba001fbdbcc9faa46896f1f9d08a7d06609 Mon Sep 17 00:00:00 2001 From: Andrew Or <andrew@databricks.com> Date: Wed, 11 May 2016 17:29:58 -0700 Subject: [PATCH] [SPARK-15276][SQL] CREATE TABLE with LOCATION should imply EXTERNAL ## What changes were proposed in this pull request? Before: ```sql -- uses that location but issues a warning CREATE TABLE my_tab LOCATION /some/path -- deletes any existing data in the specified location DROP TABLE my_tab ``` After: ```sql -- uses that location but creates an EXTERNAL table instead CREATE TABLE my_tab LOCATION /some/path -- does not delete the data at /some/path DROP TABLE my_tab ``` This patch essentially makes the `EXTERNAL` field optional. This is related to #13032. ## How was this patch tested? New test in `DDLCommandSuite`. Author: Andrew Or <andrew@databricks.com> Closes #13060 from andrewor14/location-implies-external. --- .../apache/spark/sql/execution/SparkSqlParser.scala | 12 +++++++----- .../sql/execution/command/DDLCommandSuite.scala | 12 ++++++++++++ .../spark/sql/hive/execution/HiveDDLSuite.scala | 8 +++----- .../spark/sql/hive/execution/SQLQuerySuite.scala | 5 +---- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index a51665f838..53aba1f206 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -745,11 +745,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { if (ctx.bucketSpec != null) { throw operationNotAllowed("CREATE TABLE ... CLUSTERED BY", ctx) } - val tableType = if (external) { - CatalogTableType.EXTERNAL - } else { - CatalogTableType.MANAGED - } val comment = Option(ctx.STRING).map(string) val partitionCols = Option(ctx.partitionColumns).toSeq.flatMap(visitCatalogColumns) val cols = Option(ctx.columns).toSeq.flatMap(visitCatalogColumns) @@ -791,6 +786,13 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { serde = rowStorage.serde.orElse(fileStorage.serde).orElse(defaultStorage.serde), compressed = false, serdeProperties = rowStorage.serdeProperties ++ fileStorage.serdeProperties) + // If location is defined, we'll assume this is an external table. + // Otherwise, we may accidentally delete existing data. + val tableType = if (external || location.isDefined) { + CatalogTableType.EXTERNAL + } else { + CatalogTableType.MANAGED + } // TODO support the sql text - have a proper location for this! val tableDesc = CatalogTable( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala index fa8dabfe1a..aeb613acb5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala @@ -227,6 +227,18 @@ class DDLCommandSuite extends PlanTest { } } + test("create table - location implies external") { + val query = "CREATE TABLE my_tab LOCATION '/something/anything'" + parser.parsePlan(query) match { + case ct: CreateTable => + assert(ct.table.tableType == CatalogTableType.EXTERNAL) + assert(ct.table.storage.locationUri == Some("/something/anything")) + case other => + fail(s"Expected to parse ${classOf[CreateTable].getClass.getName} from query," + + s"got ${other.getClass.getName}: $query") + } + } + // ALTER TABLE table_name RENAME TO new_table_name; // ALTER VIEW view_name RENAME TO new_view_name; test("alter table/view: rename table/view") { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 8b60802b91..ae61322844 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -72,7 +72,7 @@ class HiveDDLSuite } } - test("drop managed tables in default database") { + test("drop external tables in default database") { withTempDir { tmpDir => val tabName = "tab1" withTable(tabName) { @@ -88,13 +88,11 @@ class HiveDDLSuite val hiveTable = hiveContext.sessionState.catalog .getTableMetadata(TableIdentifier(tabName, Some("default"))) - // It is a managed table, although it uses external in SQL - assert(hiveTable.tableType == CatalogTableType.MANAGED) + assert(hiveTable.tableType == CatalogTableType.EXTERNAL) assert(tmpDir.listFiles.nonEmpty) sql(s"DROP TABLE $tabName") - // The data are deleted since the table type is not EXTERNAL - assert(tmpDir.listFiles == null) + assert(tmpDir.listFiles.nonEmpty) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 6ce5051cbd..ac9a3930fd 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1534,10 +1534,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { assert(fs.listStatus(new Path(path, "part=1")).nonEmpty) sql("drop table test_table") - assert( - !fs.exists(path), - "Once a managed table has been dropped, " + - "dirs of this table should also have been deleted.") + assert(fs.exists(path), "This is an external table, so the data should not have been dropped") } test("SPARK-14981: DESC not supported for sorting columns") { -- GitLab