From acb7fed23700a524b6d92ee745ee9de5a6bb2f22 Mon Sep 17 00:00:00 2001 From: gatorsmile <gatorsmile@gmail.com> Date: Sat, 2 Sep 2017 14:53:41 -0700 Subject: [PATCH] [SPARK-21891][SQL] Add TBLPROPERTIES to DDL statement: CREATE TABLE USING ## What changes were proposed in this pull request? Add `TBLPROPERTIES` to the DDL statement `CREATE TABLE USING`. After this change, the DDL becomes ``` CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name USING table_provider [OPTIONS table_property_list] [PARTITIONED BY (col_name, col_name, ...)] [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS ] [LOCATION path] [COMMENT table_comment] [TBLPROPERTIES (property_name=property_value, ...)] [[AS] select_statement]; ``` ## How was this patch tested? Add a few tests Author: gatorsmile <gatorsmile@gmail.com> Closes #19100 from gatorsmile/addTablePropsToCreateTableUsing. --- .../spark/sql/catalyst/parser/SqlBase.g4 | 1 + .../spark/sql/execution/SparkSqlParser.scala | 7 +++++-- .../resources/sql-tests/inputs/describe.sql | 3 ++- .../sql-tests/results/cross-join.sql.out | 1 + .../sql-tests/results/describe.sql.out | 7 +++++-- .../OptimizeMetadataOnlyQuerySuite.scala | 8 ++++++++ .../execution/command/DDLParserSuite.scala | 20 +++++++++++++++++++ 7 files changed, 42 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 5d4363f945..f741dcfbf2 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -81,6 +81,7 @@ statement (PARTITIONED BY partitionColumnNames=identifierList)? bucketSpec? locationSpec? (COMMENT comment=STRING)? + (TBLPROPERTIES tableProps=tablePropertyList)? (AS? query)? #createTable | createTableHeader ('(' columns=colTypeList ')')? (COMMENT comment=STRING)? diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 8379e740a0..d3f6ab5654 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -385,7 +385,8 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { * ] * [LOCATION path] * [COMMENT table_comment] - * [AS select_statement]; + * [TBLPROPERTIES (property_name=property_value, ...)] + * [[AS] select_statement]; * }}} */ override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) { @@ -400,6 +401,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { Option(ctx.partitionColumnNames) .map(visitIdentifierList(_).toArray) .getOrElse(Array.empty[String]) + val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty) val bucketSpec = Option(ctx.bucketSpec()).map(visitBucketSpec) val location = Option(ctx.locationSpec).map(visitLocationSpec) @@ -410,7 +412,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { "LOCATION and 'path' in OPTIONS are both used to indicate the custom table path, " + "you can only specify one of them.", ctx) } - val customLocation = storage.locationUri.orElse(location.map(CatalogUtils.stringToURI(_))) + val customLocation = storage.locationUri.orElse(location.map(CatalogUtils.stringToURI)) val tableType = if (customLocation.isDefined) { CatalogTableType.EXTERNAL @@ -426,6 +428,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { provider = Some(provider), partitionColumnNames = partitionColumnNames, bucketSpec = bucketSpec, + properties = properties, comment = Option(ctx.comment).map(string)) // Determine the storage mode. diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql index a222e11916..f26d5efec0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql @@ -1,7 +1,8 @@ CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet OPTIONS (a '1', b '2') PARTITIONED BY (c, d) CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS - COMMENT 'table_comment'; + COMMENT 'table_comment' + TBLPROPERTIES (t 'test'); CREATE TEMPORARY VIEW temp_v AS SELECT * FROM t; diff --git a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out index e75cc4448a..3833c42bdf 100644 --- a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out @@ -128,6 +128,7 @@ two 2 two 2 one 1 two 2 two 2 two 2 three 3 two 2 two 2 two 2 two 2 two 2 + -- !query 12 SELECT * FROM nt1 CROSS JOIN nt2 ON (nt1.k > nt2.k) -- !query 12 schema diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index b91f2c09f3..8c908b7625 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -7,6 +7,7 @@ CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet OPTIONS (a '1', b '2') PARTITIONED BY (c, d) CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS COMMENT 'table_comment' + TBLPROPERTIES (t 'test') -- !query 0 schema struct<> -- !query 0 output @@ -129,7 +130,7 @@ Num Buckets 2 Bucket Columns [`a`] Sort Columns [`b`] Comment table_comment -Table Properties [e=3] +Table Properties [t=test, e=3] Location [not included in comparison]sql/core/spark-warehouse/t Storage Properties [a=1, b=2] Partition Provider Catalog @@ -161,7 +162,7 @@ Num Buckets 2 Bucket Columns [`a`] Sort Columns [`b`] Comment table_comment -Table Properties [e=3] +Table Properties [t=test, e=3] Location [not included in comparison]sql/core/spark-warehouse/t Storage Properties [a=1, b=2] Partition Provider Catalog @@ -201,6 +202,7 @@ Num Buckets 2 Bucket Columns [`a`] Sort Columns [`b`] Comment table_comment +Table Properties [t=test] Location [not included in comparison]sql/core/spark-warehouse/t Storage Properties [a=1, b=2] Partition Provider Catalog @@ -239,6 +241,7 @@ Provider parquet Num Buckets 2 Bucket Columns [`a`] Sort Columns [`b`] +Table Properties [t=test] Location [not included in comparison]sql/core/spark-warehouse/t Storage Properties [a=1, b=2] Partition Provider Catalog diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala index 223c3d7729..78c1e5dae5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala @@ -117,4 +117,12 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSQLContext { "select partcol1, max(partcol2) from srcpart where partcol1 = 0 group by rollup (partcol1)", "select partcol2 from (select partcol2 from srcpart where partcol1 = 0 union all " + "select partcol2 from srcpart where partcol1 = 1) t group by partcol2") + + test("SPARK-21884 Fix StackOverflowError on MetadataOnlyQuery") { + withTable("t_1000") { + sql("CREATE TABLE t_1000 (a INT, p INT) USING PARQUET PARTITIONED BY (p)") + (1 to 1000).foreach(p => sql(s"ALTER TABLE t_1000 ADD PARTITION (p=$p)")) + sql("SELECT COUNT(DISTINCT p) FROM t_1000").collect() + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala index 70df7607a7..4ee38215f5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala @@ -473,6 +473,26 @@ class DDLParserSuite extends PlanTest with SharedSQLContext { } } + test("create table - with table properties") { + val sql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet TBLPROPERTIES('test' = 'test')" + + val expectedTableDesc = CatalogTable( + identifier = TableIdentifier("my_tab"), + tableType = CatalogTableType.MANAGED, + storage = CatalogStorageFormat.empty, + schema = new StructType().add("a", IntegerType).add("b", StringType), + provider = Some("parquet"), + properties = Map("test" -> "test")) + + parser.parsePlan(sql) match { + case CreateTable(tableDesc, _, None) => + assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime)) + case other => + fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," + + s"got ${other.getClass.getName}: $sql") + } + } + test("create table - with location") { val v1 = "CREATE TABLE my_tab(a INT, b STRING) USING parquet LOCATION '/tmp/file'" -- GitLab