diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala new file mode 100644 index 0000000000000000000000000000000000000000..3f351b07b37dfa2f80b12a3680c30e2a8d51e8cf --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst + +private[spark] trait CatalystConf { + def caseSensitiveAnalysis: Boolean +} + +/** + * A trivial conf that is empty. Used for testing when all + * relations are already filled in and the analyser needs only to resolve attribute references. + */ +object EmptyConf extends CatalystConf { + override def caseSensitiveAnalysis: Boolean = { + throw new UnsupportedOperationException + } +} + +/** A CatalystConf that can be used for local testing. */ +case class SimpleCatalystConf(caseSensitiveAnalysis: Boolean) extends CatalystConf diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ecbac57ea4d62d03b6fce30f5678766440974caf..a4c61149dd975e274c3e6e88b100dce1e50522ea 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -19,19 +19,21 @@ package org.apache.spark.sql.catalyst.analysis import scala.collection.mutable.ArrayBuffer -import org.apache.spark.util.collection.OpenHashSet import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.{SimpleCatalystConf, CatalystConf} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types._ +import org.apache.spark.util.collection.OpenHashSet /** * A trivial [[Analyzer]] with an [[EmptyCatalog]] and [[EmptyFunctionRegistry]]. Used for testing * when all relations are already filled in and the analyzer needs only to resolve attribute * references. */ -object SimpleAnalyzer extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, true) +object SimpleAnalyzer + extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, new SimpleCatalystConf(true)) /** * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and @@ -41,11 +43,17 @@ object SimpleAnalyzer extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, true class Analyzer( catalog: Catalog, registry: FunctionRegistry, - caseSensitive: Boolean, + conf: CatalystConf, maxIterations: Int = 100) extends RuleExecutor[LogicalPlan] with HiveTypeCoercion with CheckAnalysis { - val resolver = if (caseSensitive) caseSensitiveResolution else caseInsensitiveResolution + def resolver: Resolver = { + if (conf.caseSensitiveAnalysis) { + caseSensitiveResolution + } else { + caseInsensitiveResolution + } + } val fixedPoint = FixedPoint(maxIterations) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala index 18c24b651921a212af000f35266812bbd47d257f..208021c4213262c610b450884db0802e5bbf90b8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.analysis import scala.collection.mutable +import org.apache.spark.sql.catalyst.CatalystConf +import org.apache.spark.sql.catalyst.EmptyConf import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery} /** @@ -34,7 +36,7 @@ class NoSuchDatabaseException extends Exception */ trait Catalog { - def caseSensitive: Boolean + val conf: CatalystConf def tableExists(tableIdentifier: Seq[String]): Boolean @@ -57,10 +59,10 @@ trait Catalog { def unregisterAllTables(): Unit protected def processTableIdentifier(tableIdentifier: Seq[String]): Seq[String] = { - if (!caseSensitive) { - tableIdentifier.map(_.toLowerCase) - } else { + if (conf.caseSensitiveAnalysis) { tableIdentifier + } else { + tableIdentifier.map(_.toLowerCase) } } @@ -78,7 +80,7 @@ trait Catalog { } } -class SimpleCatalog(val caseSensitive: Boolean) extends Catalog { +class SimpleCatalog(val conf: CatalystConf) extends Catalog { val tables = new mutable.HashMap[String, LogicalPlan]() override def registerTable( @@ -164,10 +166,10 @@ trait OverrideCatalog extends Catalog { } abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = { - val dbName = if (!caseSensitive) { - if (databaseName.isDefined) Some(databaseName.get.toLowerCase) else None - } else { + val dbName = if (conf.caseSensitiveAnalysis) { databaseName + } else { + if (databaseName.isDefined) Some(databaseName.get.toLowerCase) else None } val temporaryTables = overrides.filter { @@ -207,7 +209,7 @@ trait OverrideCatalog extends Catalog { */ object EmptyCatalog extends Catalog { - override val caseSensitive: Boolean = true + override val conf: CatalystConf = EmptyConf override def tableExists(tableIdentifier: Seq[String]): Boolean = { throw new UnsupportedOperationException diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 971e1ff5ec2b8f8b0f77a57a0336a2cfe8aa1bb5..6f2f35564d12ec1a8d86264c36f3538d70e87838 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -23,24 +23,26 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.types._ - +import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ class AnalysisSuite extends FunSuite with BeforeAndAfter { - val caseSensitiveCatalog = new SimpleCatalog(true) - val caseInsensitiveCatalog = new SimpleCatalog(false) + val caseSensitiveConf = new SimpleCatalystConf(true) + val caseInsensitiveConf = new SimpleCatalystConf(false) + + val caseSensitiveCatalog = new SimpleCatalog(caseSensitiveConf) + val caseInsensitiveCatalog = new SimpleCatalog(caseInsensitiveConf) val caseSensitiveAnalyzer = - new Analyzer(caseSensitiveCatalog, EmptyFunctionRegistry, caseSensitive = true) { + new Analyzer(caseSensitiveCatalog, EmptyFunctionRegistry, caseSensitiveConf) { override val extendedResolutionRules = EliminateSubQueries :: Nil } val caseInsensitiveAnalyzer = - new Analyzer(caseInsensitiveCatalog, EmptyFunctionRegistry, caseSensitive = false) { + new Analyzer(caseInsensitiveCatalog, EmptyFunctionRegistry, caseInsensitiveConf) { override val extendedResolutionRules = EliminateSubQueries :: Nil } - def caseSensitiveAnalyze(plan: LogicalPlan): Unit = caseSensitiveAnalyzer.checkAnalysis(caseSensitiveAnalyzer.execute(plan)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala index 36b03d1c65e283a8ff5537665555332dc6017c10..565b1cfe019c7833dfa5d6280324336de73f99ff 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala @@ -17,14 +17,17 @@ package org.apache.spark.sql.catalyst.analysis +import org.scalatest.{BeforeAndAfter, FunSuite} + import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{Union, Project, LocalRelation} import org.apache.spark.sql.types._ -import org.scalatest.{BeforeAndAfter, FunSuite} +import org.apache.spark.sql.catalyst.SimpleCatalystConf class DecimalPrecisionSuite extends FunSuite with BeforeAndAfter { - val catalog = new SimpleCatalog(false) - val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, caseSensitive = false) + val conf = new SimpleCatalystConf(true) + val catalog = new SimpleCatalog(conf) + val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, conf) val relation = LocalRelation( AttributeReference("i", IntegerType)(), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala index bfaddd0f2ce1b4b32173b0b37e3f261551579452..98a75bb4ed2dfbc9df4d756f8fea1b6efb907cb3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala @@ -17,10 +17,12 @@ package org.apache.spark.sql +import java.util.Properties + import scala.collection.immutable import scala.collection.JavaConversions._ -import java.util.Properties +import org.apache.spark.sql.catalyst.CatalystConf private[spark] object SQLConf { val COMPRESS_CACHED = "spark.sql.inMemoryColumnarStorage.compressed" @@ -32,6 +34,7 @@ private[spark] object SQLConf { val CODEGEN_ENABLED = "spark.sql.codegen" val UNSAFE_ENABLED = "spark.sql.unsafe.enabled" val DIALECT = "spark.sql.dialect" + val CASE_SENSITIVE = "spark.sql.caseSensitive" val PARQUET_BINARY_AS_STRING = "spark.sql.parquet.binaryAsString" val PARQUET_INT96_AS_TIMESTAMP = "spark.sql.parquet.int96AsTimestamp" @@ -89,7 +92,8 @@ private[spark] object SQLConf { * * SQLConf is thread-safe (internally synchronized, so safe to be used in multiple threads). */ -private[sql] class SQLConf extends Serializable { + +private[sql] class SQLConf extends Serializable with CatalystConf { import SQLConf._ /** Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap. */ @@ -158,6 +162,11 @@ private[sql] class SQLConf extends Serializable { */ private[spark] def codegenEnabled: Boolean = getConf(CODEGEN_ENABLED, "false").toBoolean + /** + * caseSensitive analysis true by default + */ + def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, "true").toBoolean + /** * When set to true, Spark SQL will use managed memory for certain operations. This option only * takes effect if codegen is enabled. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 0ac0936f0f592d50778d97f00d2474ce6c43d826..28fc9d04436f78d19283d9b14bac8abf0cc4d057 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -148,7 +148,7 @@ class SQLContext(@transient val sparkContext: SparkContext) // TODO how to handle the temp table per user session? @transient - protected[sql] lazy val catalog: Catalog = new SimpleCatalog(true) + protected[sql] lazy val catalog: Catalog = new SimpleCatalog(conf) // TODO how to handle the temp function per user session? @transient @@ -156,7 +156,7 @@ class SQLContext(@transient val sparkContext: SparkContext) @transient protected[sql] lazy val analyzer: Analyzer = - new Analyzer(catalog, functionRegistry, caseSensitive = true) { + new Analyzer(catalog, functionRegistry, conf) { override val extendedResolutionRules = ExtractPythonUdfs :: sources.PreInsertCastAndRename :: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 77be3b8b206c0d39821b5f9964b6409197b29e6b..b44eb223c80c5bf78e8b5384b05f743e6c3c27bf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -22,6 +22,7 @@ import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.catalyst.errors.DialectException import org.apache.spark.sql.execution.GeneratedAggregate import org.apache.spark.sql.functions._ +import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.TestData._ import org.apache.spark.sql.test.TestSQLContext import org.apache.spark.sql.test.TestSQLContext.{udf => _, _} @@ -1277,6 +1278,15 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { checkAnswer(sql("SELECT COUNT(DISTINCT key,value) FROM distinctData"), Row(2)) } + test("SPARK-4699 case sensitivity SQL query") { + setConf(SQLConf.CASE_SENSITIVE, "false") + val data = TestData(1, "val_1") :: TestData(2, "val_2") :: Nil + val rdd = sparkContext.parallelize((0 to 1).map(i => data(i))) + rdd.toDF().registerTempTable("testTable1") + checkAnswer(sql("SELECT VALUE FROM TESTTABLE1 where KEY = 1"), Row("val_1")) + setConf(SQLConf.CASE_SENSITIVE, "true") + } + test("SPARK-6145: ORDER BY test for nested fields") { jsonRDD(sparkContext.makeRDD("""{"a": {"b": 1, "a": {"a": 1}}, "c": [{"d": 1}]}""" :: Nil)) .registerTempTable("nestedOrder") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala index 33c67355967ddc74602c961722284739c03292ed..9d3090c19b4e8938051eb09ce0482bcbcb0aff52 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala @@ -18,25 +18,13 @@ package org.apache.spark.sql.sources import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.analysis.Analyzer +import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.test.TestSQLContext import org.scalatest.BeforeAndAfter abstract class DataSourceTest extends QueryTest with BeforeAndAfter { - // Case sensitivity is not configurable yet, but we want to test some edge cases. - // TODO: Remove when it is configurable - implicit val caseInsensisitiveContext = new SQLContext(TestSQLContext.sparkContext) { - @transient - override protected[sql] lazy val analyzer: Analyzer = - new Analyzer(catalog, functionRegistry, caseSensitive = false) { - override val extendedResolutionRules = - PreInsertCastAndRename :: - Nil + // We want to test some edge cases. + implicit val caseInsensisitiveContext = new SQLContext(TestSQLContext.sparkContext) - override val extendedCheckRules = Seq( - sources.PreWriteCheck(catalog) - ) - } - } + caseInsensisitiveContext.setConf(SQLConf.CASE_SENSITIVE, "false") } - diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index 538c6c7f0a200c6adc5565cd4b0715563e8ae1be..3bab648e31d1f2550cdac28f6f77f061f8464a5d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -46,6 +46,7 @@ import org.apache.spark.sql.execution.{ExecutedCommand, ExtractPythonUdfs, Query import org.apache.spark.sql.hive.client._ import org.apache.spark.sql.hive.execution.{DescribeHiveTableCommand, HiveNativeCommand} import org.apache.spark.sql.sources.{DDLParser, DataSourceStrategy} +import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.types._ import org.apache.spark.util.Utils @@ -329,7 +330,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { /* An analyzer that uses the Hive metastore. */ @transient override protected[sql] lazy val analyzer = - new Analyzer(catalog, functionRegistry, caseSensitive = false) { + new Analyzer(catalog, functionRegistry, conf) { override val extendedResolutionRules = catalog.ParquetConversions :: catalog.CreateTables :: @@ -350,6 +351,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { protected[hive] class SQLSession extends super.SQLSession { protected[sql] override lazy val conf: SQLConf = new SQLConf { override def dialect: String = getConf(SQLConf.DIALECT, "hiveql") + override def caseSensitiveAnalysis: Boolean = + getConf(SQLConf.CASE_SENSITIVE, "false").toBoolean } /** diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 8fcdf3d0ab119902bbfd7182c48ef2fc830cfe32..f5398605bc7eb3e5a6ed2a9e3d114af09ca5c72e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -17,26 +17,18 @@ package org.apache.spark.sql.hive -import java.io.IOException -import java.util.{List => JList} - import com.google.common.base.Objects import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache} import org.apache.hadoop.fs.Path -import org.apache.hadoop.hive.metastore.api.{FieldSchema, Partition => TPartition, Table => TTable} -import org.apache.hadoop.hive.metastore.{TableType, Warehouse} +import org.apache.hadoop.hive.metastore.api.FieldSchema +import org.apache.hadoop.hive.metastore.Warehouse import org.apache.hadoop.hive.ql.metadata._ -import org.apache.hadoop.hive.ql.plan.CreateTableDesc -import org.apache.hadoop.hive.serde.serdeConstants -import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe -import org.apache.hadoop.hive.serde2.{Deserializer, SerDeException} -import org.apache.hadoop.util.ReflectionUtils +import org.apache.hadoop.hive.serde2.Deserializer import org.apache.spark.Logging -import org.apache.spark.sql.hive.client.IsolatedClientLoader import org.apache.spark.sql.{SaveMode, AnalysisException, SQLContext} -import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NoSuchTableException, Catalog, OverrideCatalog} +import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, Catalog, OverrideCatalog} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical @@ -44,7 +36,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.hive.client._ import org.apache.spark.sql.parquet.{ParquetRelation2, Partition => ParquetPartition, PartitionSpec} -import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, DDLParser, LogicalRelation, ResolvedDataSource} +import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, LogicalRelation, ResolvedDataSource} import org.apache.spark.sql.types._ import org.apache.spark.util.Utils @@ -54,7 +46,7 @@ import scala.collection.JavaConversions._ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: HiveContext) extends Catalog with Logging { - import org.apache.spark.sql.hive.HiveMetastoreTypes._ + val conf = hive.conf /** Usages should lock on `this`. */ protected[hive] lazy val hiveWarehouse = new Warehouse(hive.hiveconf) @@ -148,7 +140,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive val tableProperties = new scala.collection.mutable.HashMap[String, String] tableProperties.put("spark.sql.sources.provider", provider) if (userSpecifiedSchema.isDefined) { - val threshold = hive.conf.schemaStringLengthThreshold + val threshold = conf.schemaStringLengthThreshold val schemaJsonString = userSpecifiedSchema.get.json // Split the JSON string. val parts = schemaJsonString.grouped(threshold).toSeq @@ -355,7 +347,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive // Inserting into partitioned table is not supported in Parquet data source (yet). if !relation.hiveQlTable.isPartitioned && hive.convertMetastoreParquet && - hive.conf.parquetUseDataSourceApi && + conf.parquetUseDataSourceApi && relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") => val parquetRelation = convertToParquetRelation(relation) val attributedRewrites = relation.output.zip(parquetRelation.output) @@ -366,7 +358,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive // Inserting into partitioned table is not supported in Parquet data source (yet). if !relation.hiveQlTable.isPartitioned && hive.convertMetastoreParquet && - hive.conf.parquetUseDataSourceApi && + conf.parquetUseDataSourceApi && relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") => val parquetRelation = convertToParquetRelation(relation) val attributedRewrites = relation.output.zip(parquetRelation.output) @@ -375,7 +367,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive // Read path case p @ PhysicalOperation(_, _, relation: MetastoreRelation) if hive.convertMetastoreParquet && - hive.conf.parquetUseDataSourceApi && + conf.parquetUseDataSourceApi && relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") => val parquetRelation = convertToParquetRelation(relation) val attributedRewrites = relation.output.zip(parquetRelation.output) @@ -435,7 +427,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists CreateTableUsingAsSelect( desc.name, - hive.conf.defaultDataSourceName, + conf.defaultDataSourceName, temporary = false, mode, options = Map.empty[String, String], @@ -464,7 +456,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists CreateTableUsingAsSelect( tblName, - hive.conf.defaultDataSourceName, + conf.defaultDataSourceName, temporary = false, mode, options = Map.empty[String, String], diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 1f40a5340c2cefd0602114cd665cb07dbaedf2bd..1598d4bd47550e6a8e2702d52edb0cbf5ea6f047 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -26,15 +26,16 @@ import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContaine import org.apache.hadoop.hive.ql.metadata.Table import org.apache.hadoop.hive.ql.parse.VariableSubstitution import org.apache.hadoop.hive.ql.processors._ -import org.apache.hadoop.hive.serde2.RegexSerDe import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.hive.serde2.avro.AvroSerDe -import org.apache.spark.sql.SQLConf + +import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.CacheTableCommand import org.apache.spark.sql.hive._ import org.apache.spark.sql.hive.execution.HiveNativeCommand +import org.apache.spark.sql.SQLConf import org.apache.spark.util.Utils import org.apache.spark.{SparkConf, SparkContext} @@ -103,10 +104,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { /** Fewer partitions to speed up testing. */ protected[sql] override lazy val conf: SQLConf = new SQLConf { override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt - // TODO as in unit test, conf.clear() probably be called, all of the value will be cleared. // The super.getConf(SQLConf.DIALECT) is "sql" by default, we need to set it as "hiveql" override def dialect: String = super.getConf(SQLConf.DIALECT, "hiveql") + override def caseSensitiveAnalysis: Boolean = + getConf(SQLConf.CASE_SENSITIVE, "false").toBoolean } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 7d728fe87bda706689e5efc4371ed6a0fb44890e..2c9c08a9f389804ac24c9c32141aa8f761303839 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -29,7 +29,6 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.spark.{SparkFiles, SparkException} import org.apache.spark.sql.{AnalysisException, DataFrame, Row} import org.apache.spark.sql.catalyst.plans.logical.Project -import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive._ import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index c605f1017547af5d8d1e2a19510d34751cd27e5b..1353802604402d155ded68cf65f1aa721bcf406f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -493,6 +493,12 @@ class SQLQuerySuite extends QueryTest { } } + test("SPARK-4699 HiveContext should be case insensitive by default") { + checkAnswer( + sql("SELECT KEY FROM Src ORDER BY value"), + sql("SELECT key FROM src ORDER BY value").collect().toSeq) + } + test("SPARK-5284 Insert into Hive throws NPE when a inner complex type field has a null value") { val schema = StructType( StructField("s", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala index d5dd0bf58e7026161dc86d6af11425ceb1cffcbd..bf1121ddf027327e53c399b1c23ed18b5f49cec0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala @@ -21,14 +21,12 @@ import java.io.File import org.scalatest.BeforeAndAfterAll -import org.apache.spark.sql.{QueryTest, SQLConf, SaveMode} +import org.apache.spark.sql.{QueryTest, SQLConf} import org.apache.spark.sql.catalyst.expressions.Row import org.apache.spark.sql.execution.{ExecutedCommand, PhysicalRDD} import org.apache.spark.sql.hive.execution.HiveTableScan -import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ -import org.apache.spark.sql.json.JSONRelation import org.apache.spark.sql.sources.{InsertIntoDataSource, LogicalRelation} import org.apache.spark.sql.parquet.{ParquetRelation2, ParquetTableScan} import org.apache.spark.sql.SaveMode