Skip to content
Snippets Groups Projects
Commit e862dc90 authored by Wenchen Fan's avatar Wenchen Fan Committed by gatorsmile
Browse files

[SPARK-21150][SQL] Persistent view stored in Hive metastore should be case preserving

## What changes were proposed in this pull request?

This is a regression in Spark 2.2. In Spark 2.2, we introduced a new way to resolve persisted view: https://issues.apache.org/jira/browse/SPARK-18209 , but this makes the persisted view non case-preserving because we store the schema in hive metastore directly. We should follow data source table and store schema in table properties.

## How was this patch tested?

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18360 from cloud-fan/view.
parent ef162289
No related branches found
No related tags found
No related merge requests found
...@@ -159,7 +159,9 @@ case class CreateViewCommand( ...@@ -159,7 +159,9 @@ case class CreateViewCommand(
checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent) checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent)
// Handles `CREATE OR REPLACE VIEW v0 AS SELECT ...` // Handles `CREATE OR REPLACE VIEW v0 AS SELECT ...`
catalog.alterTable(prepareTable(sparkSession, analyzedPlan)) // Nothing we need to retain from the old view, so just drop and create a new one
catalog.dropTable(viewIdent, ignoreIfNotExists = false, purge = false)
catalog.createTable(prepareTable(sparkSession, analyzedPlan), ignoreIfExists = false)
} else { } else {
// Handles `CREATE VIEW v0 AS SELECT ...`. Throws exception when the target view already // Handles `CREATE VIEW v0 AS SELECT ...`. Throws exception when the target view already
// exists. // exists.
......
...@@ -669,4 +669,14 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { ...@@ -669,4 +669,14 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
"positive.")) "positive."))
} }
} }
test("permanent view should be case-preserving") {
withView("v") {
sql("CREATE VIEW v AS SELECT 1 as aBc")
assert(spark.table("v").schema.head.name == "aBc")
sql("CREATE OR REPLACE VIEW v AS SELECT 2 as cBa")
assert(spark.table("v").schema.head.name == "cBa")
}
}
} }
...@@ -224,39 +224,36 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat ...@@ -224,39 +224,36 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
throw new TableAlreadyExistsException(db = db, table = table) throw new TableAlreadyExistsException(db = db, table = table)
} }
if (tableDefinition.tableType == VIEW) { // Ideally we should not create a managed table with location, but Hive serde table can
client.createTable(tableDefinition, ignoreIfExists) // specify location for managed table. And in [[CreateDataSourceTableAsSelectCommand]] we have
// to create the table directory and write out data before we create this table, to avoid
// exposing a partial written table.
val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
tableDefinition.storage.locationUri.isEmpty
val tableLocation = if (needDefaultTableLocation) {
Some(CatalogUtils.stringToURI(defaultTablePath(tableDefinition.identifier)))
} else { } else {
// Ideally we should not create a managed table with location, but Hive serde table can tableDefinition.storage.locationUri
// specify location for managed table. And in [[CreateDataSourceTableAsSelectCommand]] we have }
// to create the table directory and write out data before we create this table, to avoid
// exposing a partial written table.
val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
tableDefinition.storage.locationUri.isEmpty
val tableLocation = if (needDefaultTableLocation) {
Some(CatalogUtils.stringToURI(defaultTablePath(tableDefinition.identifier)))
} else {
tableDefinition.storage.locationUri
}
if (DDLUtils.isHiveTable(tableDefinition)) { if (DDLUtils.isDatasourceTable(tableDefinition)) {
val tableWithDataSourceProps = tableDefinition.copy( createDataSourceTable(
// We can't leave `locationUri` empty and count on Hive metastore to set a default table tableDefinition.withNewStorage(locationUri = tableLocation),
// location, because Hive metastore uses hive.metastore.warehouse.dir to generate default ignoreIfExists)
// table location for tables in default database, while we expect to use the location of } else {
// default database. val tableWithDataSourceProps = tableDefinition.copy(
storage = tableDefinition.storage.copy(locationUri = tableLocation), // We can't leave `locationUri` empty and count on Hive metastore to set a default table
// Here we follow data source tables and put table metadata like table schema, partition // location, because Hive metastore uses hive.metastore.warehouse.dir to generate default
// columns etc. in table properties, so that we can work around the Hive metastore issue // table location for tables in default database, while we expect to use the location of
// about not case preserving and make Hive serde table support mixed-case column names. // default database.
properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition)) storage = tableDefinition.storage.copy(locationUri = tableLocation),
client.createTable(tableWithDataSourceProps, ignoreIfExists) // Here we follow data source tables and put table metadata like table schema, partition
} else { // columns etc. in table properties, so that we can work around the Hive metastore issue
createDataSourceTable( // about not case preserving and make Hive serde table and view support mixed-case column
tableDefinition.withNewStorage(locationUri = tableLocation), // names.
ignoreIfExists) properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
} client.createTable(tableWithDataSourceProps, ignoreIfExists)
} }
} }
...@@ -679,16 +676,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat ...@@ -679,16 +676,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
var table = inputTable var table = inputTable
if (table.tableType != VIEW) { table.properties.get(DATASOURCE_PROVIDER) match {
table.properties.get(DATASOURCE_PROVIDER) match { case None if table.tableType == VIEW =>
// No provider in table properties, which means this is a Hive serde table. // If this is a view created by Spark 2.2 or higher versions, we should restore its schema
case None => // from table properties.
table = restoreHiveSerdeTable(table) if (table.properties.contains(DATASOURCE_SCHEMA_NUMPARTS)) {
table = table.copy(schema = getSchemaFromTableProperties(table))
// This is a regular data source table. }
case Some(provider) =>
table = restoreDataSourceTable(table, provider) // No provider in table properties, which means this is a Hive serde table.
} case None =>
table = restoreHiveSerdeTable(table)
// This is a regular data source table.
case Some(provider) =>
table = restoreDataSourceTable(table, provider)
} }
// Restore Spark's statistics from information in Metastore. // Restore Spark's statistics from information in Metastore.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment