diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala index 6000d483db209d83073530aa811fb16be04a9400..68644f4d6b95b7dc0ecb13da976cdf08dd36b1f0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala @@ -106,8 +106,10 @@ abstract class ExternalCatalog final def createTable(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = { val db = tableDefinition.database val name = tableDefinition.identifier.table + val tableDefinitionWithVersion = + tableDefinition.copy(createVersion = org.apache.spark.SPARK_VERSION) postToAll(CreateTablePreEvent(db, name)) - doCreateTable(tableDefinition, ignoreIfExists) + doCreateTable(tableDefinitionWithVersion, ignoreIfExists) postToAll(CreateTableEvent(db, name)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 9531456434a15eba7543ec57f9c3818b2aad07a4..f86510624aa7825169ca8ae25b6f1bdbd46aba9e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -205,6 +205,9 @@ case class BucketSpec( * configured. * @param ignoredProperties is a list of table properties that are used by the underlying table * but ignored by Spark SQL yet. + * @param createVersion records the version of Spark that created this table metadata. The default + * is an empty string. We expect it will be read from the catalog or filled by + * ExternalCatalog.createTable. For temporary views, the value will be empty. */ case class CatalogTable( identifier: TableIdentifier, @@ -217,6 +220,7 @@ case class CatalogTable( owner: String = "", createTime: Long = System.currentTimeMillis, lastAccessTime: Long = -1, + createVersion: String = "", properties: Map[String, String] = Map.empty, stats: Option[CatalogStatistics] = None, viewText: Option[String] = None, @@ -302,8 +306,9 @@ case class CatalogTable( identifier.database.foreach(map.put("Database", _)) map.put("Table", identifier.table) if (owner.nonEmpty) map.put("Owner", owner) - map.put("Created", new Date(createTime).toString) + map.put("Created Time", new Date(createTime).toString) map.put("Last Access", new Date(lastAccessTime).toString) + map.put("Created By", "Spark " + createVersion) map.put("Type", tableType.name) provider.foreach(map.put("Provider", _)) bucketSpec.foreach(map ++= _.toLinkedHashMap) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index 4fc947a88f6b8d90cc79244e5ea4607415b0ada3..f83c6379185c48c586188777b5e2566c51a43ba0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -480,7 +480,8 @@ class TreeNodeSuite extends SparkFunSuite { CatalogTableType.MANAGED, CatalogStorageFormat.empty, StructType(StructField("a", IntegerType, true) :: Nil), - createTime = 0L), + createTime = 0L, + createVersion = "2.x"), JObject( "product-class" -> classOf[CatalogTable].getName, @@ -509,6 +510,7 @@ class TreeNodeSuite extends SparkFunSuite { "owner" -> "", "createTime" -> 0, "lastAccessTime" -> -1, + "createVersion" -> "2.x", "tracksPartitionsInCatalog" -> false, "properties" -> JNull, "unsupportedFeatures" -> List.empty[String], diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out index 4bf4633491bd9b0b4f69299cc8ed252544db3a04..7873085da5069fc0b35897092bdcec11637b33c4 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out @@ -23,8 +23,9 @@ d string # Detailed Table Information Database default Table table_with_comment -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Comment added @@ -52,8 +53,9 @@ d string # Detailed Table Information Database default Table table_with_comment -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Comment modified comment @@ -88,8 +90,9 @@ b int # Detailed Table Information Database default Table table_comment -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Location [not included in comparison]sql/core/spark-warehouse/table_comment @@ -114,8 +117,9 @@ b int # Detailed Table Information Database default Table table_comment -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Comment added comment @@ -141,8 +145,9 @@ b int # Detailed Table Information Database default Table table_comment -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Location [not included in comparison]sql/core/spark-warehouse/table_comment diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index e2b79e8f7801dfa3e321f6f0689a3ed593d27930..b91f2c09f3cd4036c99da9470aa122e9b7b59138 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -120,8 +120,9 @@ d string # Detailed Table Information Database default Table t -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Num Buckets 2 @@ -151,8 +152,9 @@ d string # Detailed Table Information Database default Table t -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Num Buckets 2 @@ -190,8 +192,9 @@ d string # Detailed Table Information Database default Table t -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Num Buckets 2 @@ -228,8 +231,9 @@ d string # Detailed Table Information Database default Table t -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type MANAGED Provider parquet Num Buckets 2 @@ -458,8 +462,9 @@ d string # Detailed Table Information Database default Table v -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type VIEW View Text SELECT * FROM t View Default Database default @@ -480,8 +485,9 @@ d string # Detailed Table Information Database default Table v -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type VIEW View Text SELECT * FROM t View Default Database default diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out index 8f2a54f7c24e2be46ad63e51523dca941f6137ae..da729cd757cfc9fabc22071796df05db34204213 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out @@ -119,8 +119,9 @@ SHOW TABLE EXTENDED LIKE 'show_t*' struct<database:string,tableName:string,isTemporary:boolean,information:string> -- !query 12 output show_t3 true Table: show_t3 -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type: VIEW Schema: root |-- e: integer (nullable = true) @@ -128,8 +129,9 @@ Schema: root showdb show_t1 false Database: showdb Table: show_t1 -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type: MANAGED Provider: parquet Location [not included in comparison]sql/core/spark-warehouse/showdb.db/show_t1 @@ -144,8 +146,9 @@ Schema: root showdb show_t2 false Database: showdb Table: show_t2 -Created [not included in comparison] +Created Time [not included in comparison] Last Access [not included in comparison] +Created By [not included in comparison] Type: MANAGED Provider: parquet Location [not included in comparison]sql/core/spark-warehouse/showdb.db/show_t2 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index d9130fdcfaea63c94aad61248ea78f0c019a346e..aa000bddf9c7eb93565ebddcc6a15b0697113a38 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -228,7 +228,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { // Get answer, but also get rid of the #1234 expression ids that show up in explain plans val answer = df.queryExecution.hiveResultString().map(_.replaceAll("#\\d+", "#x") .replaceAll("Location.*/sql/core/", s"Location ${notIncludedMsg}sql/core/") - .replaceAll("Created.*", s"Created $notIncludedMsg") + .replaceAll("Created By.*", s"Created By $notIncludedMsg") + .replaceAll("Created Time.*", s"Created Time $notIncludedMsg") .replaceAll("Last Access.*", s"Last Access $notIncludedMsg")) // If the output is not pre-sorted, sort it. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 5c0a6aa724bf032a7c7330ad0f1f8c3cca7e1803..9332f773430e7b8a2b037390f550c23c72c5e171 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -68,6 +68,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSQLContext with Befo provider = Some("parquet"), partitionColumnNames = Seq("a", "b"), createTime = 0L, + createVersion = org.apache.spark.SPARK_VERSION, tracksPartitionsInCatalog = true) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 172317c34659db7f5ee71a66fe55af3c651590ce..19e5f789822e81c179608b402a5a2f43488ad97e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -390,6 +390,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat val bucketSpec = table.bucketSpec val properties = new mutable.HashMap[String, String] + + properties.put(CREATED_SPARK_VERSION, table.createVersion) + // Serialized JSON schema string may be too long to be stored into a single metastore table // property. In this case, we split the JSON string and store each part as a separate table // property. @@ -594,7 +597,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // Set the `schema`, `partitionColumnNames` and `bucketSpec` from the old table definition, // to retain the spark specific format if it is. val propsFromOldTable = oldTableDef.properties.filter { case (k, v) => - k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) + k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) || + k.startsWith(CREATED_SPARK_VERSION) } val newTableProps = propsFromOldTable ++ tableDefinition.properties + partitionProviderProp val newDef = tableDefinition.copy( @@ -700,6 +704,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat table = restoreDataSourceTable(table, provider) } + // Restore version info + val version: String = table.properties.getOrElse(CREATED_SPARK_VERSION, "2.2 or prior") + // Restore Spark's statistics from information in Metastore. val statsProps = table.properties.filterKeys(_.startsWith(STATISTICS_PREFIX)) @@ -735,6 +742,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // Get the original table properties as defined by the user. table.copy( + createVersion = version, properties = table.properties.filterNot { case (key, _) => key.startsWith(SPARK_SQL_PREFIX) }) } @@ -1208,6 +1216,8 @@ object HiveExternalCatalog { val TABLE_PARTITION_PROVIDER_CATALOG = "catalog" val TABLE_PARTITION_PROVIDER_FILESYSTEM = "filesystem" + val CREATED_SPARK_VERSION = SPARK_SQL_PREFIX + "create.version" + /** * Returns the fully qualified name used in table properties for a particular column stat. * For example, for column "mycol", and "min" stat, this should return diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 5b62e37311d88b7574c0da05cba7ced4df9ba66d..0007d25614a290dd4de3f8fea27ac8eaf8aa2f95 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -90,6 +90,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA provider = if (isDataSource) Some("parquet") else Some("hive"), partitionColumnNames = Seq("a", "b"), createTime = 0L, + createVersion = org.apache.spark.SPARK_VERSION, tracksPartitionsInCatalog = true) }