From f9bf15d9bde4df2178f7a8f932c883bb77c46149 Mon Sep 17 00:00:00 2001 From: Herman van Hovell <hvanhovell@databricks.com> Date: Thu, 16 Jun 2016 13:47:36 -0700 Subject: [PATCH] [SPARK-15977][SQL] Fix TRUNCATE TABLE for Spark specific datasource tables ## What changes were proposed in this pull request? `TRUNCATE TABLE` is currently broken for Spark specific datasource tables (json, csv, ...). This PR correctly sets the location for these datasources which allows them to be truncated. ## How was this patch tested? Extended the datasources `TRUNCATE TABLE` tests in `DDLSuite`. Author: Herman van Hovell <hvanhovell@databricks.com> Closes #13697 from hvanhovell/SPARK-15977. --- .../spark/sql/execution/command/tables.scala | 4 ++- .../sql/execution/command/DDLSuite.scala | 28 ++++++++++++------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 58bb5cdca9..3eb93a2922 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -348,7 +348,9 @@ case class TruncateTableCommand( s"for tables that are not partitioned: '$tableName'") } val locations = - if (isDatasourceTable || table.partitionColumnNames.isEmpty) { + if (isDatasourceTable) { + Seq(table.storage.serdeProperties.get("path")) + } else if (table.partitionColumnNames.isEmpty) { Seq(table.storage.locationUri) } else { catalog.listPartitions(tableName, partitionSpec).map(_.storage.locationUri) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index e15fcf4326..7eb2fff91d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1280,17 +1280,25 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { test("truncate table - datasource table") { import testImplicits._ val data = (1 to 10).map { i => (i, i) }.toDF("width", "length") - data.write.saveAsTable("rectangles") - spark.catalog.cacheTable("rectangles") - assume(spark.table("rectangles").collect().nonEmpty, "bad test; table was empty to begin with") - assume(spark.catalog.isCached("rectangles"), "bad test; table was not cached to begin with") - sql("TRUNCATE TABLE rectangles") - assert(spark.table("rectangles").collect().isEmpty) - assert(!spark.catalog.isCached("rectangles")) + + // Test both a Hive compatible and incompatible code path. + Seq("json", "parquet").foreach { format => + withTable("rectangles") { + data.write.format(format).saveAsTable("rectangles") + assume(spark.table("rectangles").collect().nonEmpty, + "bad test; table was empty to begin with") + sql("TRUNCATE TABLE rectangles") + assert(spark.table("rectangles").collect().isEmpty) + } + } + // truncating partitioned data source tables is not supported - data.write.partitionBy("length").saveAsTable("rectangles2") - assertUnsupported("TRUNCATE TABLE rectangles PARTITION (width=1)") - assertUnsupported("TRUNCATE TABLE rectangles2 PARTITION (width=1)") + withTable("rectangles", "rectangles2") { + data.write.saveAsTable("rectangles") + data.write.partitionBy("length").saveAsTable("rectangles2") + assertUnsupported("TRUNCATE TABLE rectangles PARTITION (width=1)") + assertUnsupported("TRUNCATE TABLE rectangles2 PARTITION (width=1)") + } } test("truncate table - external table, temporary table, view (not allowed)") { -- GitLab