From ec5f9ed5de2218938dba52152475daafd4dc4786 Mon Sep 17 00:00:00 2001 From: Yin Huai <yhuai@databricks.com> Date: Thu, 10 Dec 2015 12:04:20 -0800 Subject: [PATCH] [SPARK-12228][SQL] Try to run execution hive's derby in memory. This PR tries to make execution hive's derby run in memory since it is a fake metastore and every time we create a HiveContext, we will switch to a new one. It is possible that it can reduce the flakyness of our tests that need to create HiveContext (e.g. HiveSparkSubmitSuite). I will test it more. https://issues.apache.org/jira/browse/SPARK-12228 Author: Yin Huai <yhuai@databricks.com> Closes #10204 from yhuai/derbyInMemory. --- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 2 +- .../scala/org/apache/spark/sql/hive/HiveContext.scala | 8 +++++--- .../scala/org/apache/spark/sql/hive/test/TestHive.scala | 2 +- .../org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala | 2 ++ 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 4b928e600b..03bb2c2225 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -83,7 +83,7 @@ private[hive] object SparkSQLCLIDriver extends Logging { val cliConf = new HiveConf(classOf[SessionState]) // Override the location of the metastore since this is only used for local execution. - HiveContext.newTemporaryConfiguration().foreach { + HiveContext.newTemporaryConfiguration(useInMemoryDerby = false).foreach { case (key, value) => cliConf.set(key, value) } val sessionState = new CliSessionState(cliConf) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index e83941c2ec..5958777b0d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -212,7 +212,7 @@ class HiveContext private[hive]( val loader = new IsolatedClientLoader( version = IsolatedClientLoader.hiveVersion(hiveExecutionVersion), execJars = Seq(), - config = newTemporaryConfiguration(), + config = newTemporaryConfiguration(useInMemoryDerby = true), isolationOn = false, baseClassLoader = Utils.getContextOrSparkClassLoader) loader.createClient().asInstanceOf[ClientWrapper] @@ -721,7 +721,9 @@ private[hive] object HiveContext { doc = "TODO") /** Constructs a configuration for hive, where the metastore is located in a temp directory. */ - def newTemporaryConfiguration(): Map[String, String] = { + def newTemporaryConfiguration(useInMemoryDerby: Boolean): Map[String, String] = { + val withInMemoryMode = if (useInMemoryDerby) "memory:" else "" + val tempDir = Utils.createTempDir() val localMetastore = new File(tempDir, "metastore") val propMap: HashMap[String, String] = HashMap() @@ -735,7 +737,7 @@ private[hive] object HiveContext { } propMap.put(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, localMetastore.toURI.toString) propMap.put(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, - s"jdbc:derby:;databaseName=${localMetastore.getAbsolutePath};create=true") + s"jdbc:derby:${withInMemoryMode};databaseName=${localMetastore.getAbsolutePath};create=true") propMap.put("datanucleus.rdbms.datastoreAdapterClassName", "org.datanucleus.store.rdbms.adapter.DerbyAdapter") diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 2e2d201bf2..97792549bb 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -87,7 +87,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { dir } - private lazy val temporaryConfig = newTemporaryConfiguration() + private lazy val temporaryConfig = newTemporaryConfiguration(useInMemoryDerby = false) /** Sets up the system initially or after a RESET command */ protected override def configure(): Map[String, String] = { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index 9296219331..53185fd775 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -80,6 +80,8 @@ class HiveSparkSubmitSuite "--master", "local-cluster[2,1,1024]", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", + "--conf", "spark.sql.hive.metastore.version=0.12", + "--conf", "spark.sql.hive.metastore.jars=maven", "--driver-java-options", "-Dderby.system.durability=test", unusedJar.toString) runSparkSubmit(args) -- GitLab