Skip to content
Snippets Groups Projects
Commit 04f925ed authored by xin Wu's avatar xin Wu Committed by Andrew Or
Browse files

[SPARK-15236][SQL][SPARK SHELL] Add spark-defaults property to switch to use InMemoryCatalog

## What changes were proposed in this pull request?
This PR change REPL/Main to check this property `spark.sql.catalogImplementation` to decide if `enableHiveSupport `should be called.

If `spark.sql.catalogImplementation` is set to `hive`, and hive classes are built, Spark will use Hive support.
Other wise, Spark will create a SparkSession with in-memory catalog support.

## How was this patch tested?
Run the REPL component test.

Author: xin Wu <xinwu@us.ibm.com>
Author: Xin Wu <xinwu@us.ibm.com>

Closes #13088 from xwu0226/SPARK-15236.
parent 85d6b0db
No related branches found
No related tags found
No related merge requests found
...@@ -22,6 +22,7 @@ import java.io.File ...@@ -22,6 +22,7 @@ import java.io.File
import scala.tools.nsc.GenericRunnerSettings import scala.tools.nsc.GenericRunnerSettings
import org.apache.spark._ import org.apache.spark._
import org.apache.spark.internal.config.CATALOG_IMPLEMENTATION
import org.apache.spark.internal.Logging import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession import org.apache.spark.sql.SparkSession
import org.apache.spark.util.Utils import org.apache.spark.util.Utils
...@@ -88,10 +89,23 @@ object Main extends Logging { ...@@ -88,10 +89,23 @@ object Main extends Logging {
} }
val builder = SparkSession.builder.config(conf) val builder = SparkSession.builder.config(conf)
if (SparkSession.hiveClassesArePresent) { if (conf.get(CATALOG_IMPLEMENTATION.key, "hive").toLowerCase == "hive") {
sparkSession = builder.enableHiveSupport().getOrCreate() if (SparkSession.hiveClassesArePresent) {
logInfo("Created Spark session with Hive support") // In the case that the property is not set at all, builder's config
// does not have this value set to 'hive' yet. The original default
// behavior is that when there are hive classes, we use hive catalog.
sparkSession = builder.enableHiveSupport().getOrCreate()
logInfo("Created Spark session with Hive support")
} else {
// Need to change it back to 'in-memory' if no hive classes are found
// in the case that the property is set to hive in spark-defaults.conf
builder.config(CATALOG_IMPLEMENTATION.key, "in-memory")
sparkSession = builder.getOrCreate()
logInfo("Created Spark session")
}
} else { } else {
// In the case that the property is set but not to 'hive', the internal
// default is 'in-memory'. So the sparkSession will use in-memory catalog.
sparkSession = builder.getOrCreate() sparkSession = builder.getOrCreate()
logInfo("Created Spark session") logInfo("Created Spark session")
} }
......
...@@ -21,9 +21,11 @@ import java.io._ ...@@ -21,9 +21,11 @@ import java.io._
import java.net.URLClassLoader import java.net.URLClassLoader
import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.ArrayBuffer
import org.apache.commons.lang3.StringEscapeUtils import org.apache.commons.lang3.StringEscapeUtils
import org.apache.log4j.{Level, LogManager}
import org.apache.spark.{SparkContext, SparkFunSuite} import org.apache.spark.{SparkContext, SparkFunSuite}
import org.apache.spark.internal.config._
import org.apache.spark.sql.SparkSession
import org.apache.spark.util.Utils import org.apache.spark.util.Utils
class ReplSuite extends SparkFunSuite { class ReplSuite extends SparkFunSuite {
...@@ -99,6 +101,52 @@ class ReplSuite extends SparkFunSuite { ...@@ -99,6 +101,52 @@ class ReplSuite extends SparkFunSuite {
System.clearProperty("spark.driver.port") System.clearProperty("spark.driver.port")
} }
test("SPARK-15236: use Hive catalog") {
// turn on the INFO log so that it is possible the code will dump INFO
// entry for using "HiveMetastore"
val rootLogger = LogManager.getRootLogger()
val logLevel = rootLogger.getLevel
rootLogger.setLevel(Level.INFO)
try {
Main.conf.set(CATALOG_IMPLEMENTATION.key, "hive")
val output = runInterpreter("local",
"""
|spark.sql("drop table if exists t_15236")
""".stripMargin)
assertDoesNotContain("error:", output)
assertDoesNotContain("Exception", output)
// only when the config is set to hive and
// hive classes are built, we will use hive catalog.
// Then log INFO entry will show things using HiveMetastore
if (SparkSession.hiveClassesArePresent) {
assertContains("HiveMetaStore", output)
} else {
// If hive classes are not built, in-memory catalog will be used
assertDoesNotContain("HiveMetaStore", output)
}
} finally {
rootLogger.setLevel(logLevel)
}
}
test("SPARK-15236: use in-memory catalog") {
val rootLogger = LogManager.getRootLogger()
val logLevel = rootLogger.getLevel
rootLogger.setLevel(Level.INFO)
try {
Main.conf.set(CATALOG_IMPLEMENTATION.key, "in-memory")
val output = runInterpreter("local",
"""
|spark.sql("drop table if exists t_16236")
""".stripMargin)
assertDoesNotContain("error:", output)
assertDoesNotContain("Exception", output)
assertDoesNotContain("HiveMetaStore", output)
} finally {
rootLogger.setLevel(logLevel)
}
}
test("simple foreach with accumulator") { test("simple foreach with accumulator") {
val output = runInterpreter("local", val output = runInterpreter("local",
""" """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment