Skip to content
Snippets Groups Projects
Commit 91f6be87 authored by Yin Huai's avatar Yin Huai Committed by Reynold Xin
Browse files

[SPARK-8020] Spark SQL in spark-defaults.conf make metadataHive get constructed too early

https://issues.apache.org/jira/browse/SPARK-8020

Author: Yin Huai <yhuai@databricks.com>

Closes #6563 from yhuai/SPARK-8020 and squashes the following commits:

4e5addc [Yin Huai] style
bf766c6 [Yin Huai] Failed test.
0398f5b [Yin Huai] First populate the SQLConf and then construct executionHive and metadataHive.
parent 4c868b99
No related branches found
No related tags found
No related merge requests found
......@@ -182,9 +182,28 @@ class SQLContext(@transient val sparkContext: SparkContext)
conf.dialect
}
sparkContext.getConf.getAll.foreach {
case (key, value) if key.startsWith("spark.sql") => setConf(key, value)
case _ =>
{
// We extract spark sql settings from SparkContext's conf and put them to
// Spark SQL's conf.
// First, we populate the SQLConf (conf). So, we can make sure that other values using
// those settings in their construction can get the correct settings.
// For example, metadataHive in HiveContext may need both spark.sql.hive.metastore.version
// and spark.sql.hive.metastore.jars to get correctly constructed.
val properties = new Properties
sparkContext.getConf.getAll.foreach {
case (key, value) if key.startsWith("spark.sql") => properties.setProperty(key, value)
case _ =>
}
// We directly put those settings to conf to avoid of calling setConf, which may have
// side-effects. For example, in HiveContext, setConf may cause executionHive and metadataHive
// get constructed. If we call setConf directly, the constructed metadataHive may have
// wrong settings, or the construction may fail.
conf.setConf(properties)
// After we have populated SQLConf, we call setConf to populate other confs in the subclass
// (e.g. hiveconf in HiveContext).
properties.foreach {
case (key, value) => setConf(key, value)
}
}
@transient
......
......@@ -17,7 +17,8 @@
package org.apache.spark.sql.hive.client
import org.apache.spark.{Logging, SparkFunSuite}
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{Logging, SparkConf, SparkContext, SparkFunSuite}
import org.apache.spark.sql.catalyst.util.quietly
import org.apache.spark.util.Utils
......@@ -37,6 +38,48 @@ class VersionsSuite extends SparkFunSuite with Logging {
"hive.metastore.warehouse.dir" -> warehousePath.toString)
}
test("SPARK-8020: successfully create a HiveContext with metastore settings in Spark conf.") {
val sparkConf =
new SparkConf() {
// We are not really clone it. We need to keep the custom getAll.
override def clone: SparkConf = this
override def getAll: Array[(String, String)] = {
val allSettings = super.getAll
val metastoreVersion = get("spark.sql.hive.metastore.version")
val metastoreJars = get("spark.sql.hive.metastore.jars")
val others = allSettings.filterNot { case (key, _) =>
key == "spark.sql.hive.metastore.version" || key == "spark.sql.hive.metastore.jars"
}
// Put metastore.version to the first one. It is needed to trigger the exception
// caused by SPARK-8020. Other problems triggered by SPARK-8020
// (e.g. using Hive 0.13.1's metastore client to connect to the a 0.12 metastore)
// are not easy to test.
Array(
("spark.sql.hive.metastore.version" -> metastoreVersion),
("spark.sql.hive.metastore.jars" -> metastoreJars)) ++ others
}
}
sparkConf
.set("spark.sql.hive.metastore.version", "12")
.set("spark.sql.hive.metastore.jars", "maven")
val hiveContext = new HiveContext(
new SparkContext(
"local[2]",
"TestSQLContextInVersionsSuite",
sparkConf)) {
protected override def configure(): Map[String, String] = buildConf
}
// Make sure all metastore related lazy vals got created.
hiveContext.tables()
}
test("success sanity check") {
val badClient = IsolatedClientLoader.forVersion("13", buildConf()).client
val db = new HiveDatabase("default", "")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment