Skip to content
Snippets Groups Projects
Commit ca1b2198 authored by Reynold Xin's avatar Reynold Xin
Browse files

[SPARK-15052][SQL] Use builder pattern to create SparkSession

## What changes were proposed in this pull request?
This patch creates a builder pattern for creating SparkSession. The new code is unused and mostly deadcode. I'm putting it up here for feedback.

There are a few TODOs that can be done as follow-up pull requests:
- [ ] Update tests to use this
- [ ] Update examples to use this
- [ ] Clean up SQLContext code w.r.t. this one (i.e. SparkSession shouldn't call into SQLContext.getOrCreate; it should be the other way around)
- [ ] Remove SparkSession.withHiveSupport
- [ ] Disable the old constructor (by making it private) so the only way to start a SparkSession is through this builder pattern

## How was this patch tested?
Part of the future pull request is to clean this up and switch existing tests to use this.

Author: Reynold Xin <rxin@databricks.com>

Closes #12830 from rxin/sparksession-builder.
parent d5c79f56
No related branches found
No related tags found
No related merge requests found
......@@ -18,9 +18,7 @@
package org.apache.spark.sql
import java.beans.Introspector
import java.util.Properties
import scala.collection.immutable
import scala.collection.JavaConverters._
import scala.reflect.ClassTag
import scala.reflect.runtime.universe.TypeTag
......@@ -30,7 +28,7 @@ import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.annotation.{DeveloperApi, Experimental}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config.{CATALOG_IMPLEMENTATION, ConfigEntry}
import org.apache.spark.internal.config.CATALOG_IMPLEMENTATION
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalog.Catalog
import org.apache.spark.sql.catalyst._
......@@ -49,7 +47,16 @@ import org.apache.spark.util.Utils
/**
* The entry point to Spark execution.
* The entry point to programming Spark with the Dataset and DataFrame API.
*
* To create a SparkSession, use the following builder pattern:
*
* {{{
* SparkSession.builder()
* .master("local")
* .config("spark.some.config.option", "some-value").
* .getOrCreate()
* }}}
*/
class SparkSession private(
@transient val sparkContext: SparkContext,
......@@ -635,6 +642,122 @@ class SparkSession private(
object SparkSession {
/**
* Builder for [[SparkSession]].
*/
class Builder {
private[this] val options = new scala.collection.mutable.HashMap[String, String]
/**
* Sets a name for the application, which will be shown in the Spark web UI.
*
* @since 2.0.0
*/
def appName(name: String): Builder = config("spark.app.name", name)
/**
* Sets a config option. Options set using this method are automatically propagated to
* both [[SparkConf]] and SparkSession's own configuration.
*
* @since 2.0.0
*/
def config(key: String, value: String): Builder = synchronized {
options += key -> value
this
}
/**
* Sets a config option. Options set using this method are automatically propagated to
* both [[SparkConf]] and SparkSession's own configuration.
*
* @since 2.0.0
*/
def config(key: String, value: Long): Builder = synchronized {
options += key -> value.toString
this
}
/**
* Sets a config option. Options set using this method are automatically propagated to
* both [[SparkConf]] and SparkSession's own configuration.
*
* @since 2.0.0
*/
def config(key: String, value: Double): Builder = synchronized {
options += key -> value.toString
this
}
/**
* Sets a config option. Options set using this method are automatically propagated to
* both [[SparkConf]] and SparkSession's own configuration.
*
* @since 2.0.0
*/
def config(key: String, value: Boolean): Builder = synchronized {
options += key -> value.toString
this
}
/**
* Sets a list of config options based on the given [[SparkConf]].
*
* @since 2.0.0
*/
def config(conf: SparkConf): Builder = synchronized {
conf.getAll.foreach { case (k, v) => options += k -> v }
this
}
/**
* Sets the Spark master URL to connect to, such as "local" to run locally, "local[4]" to
* run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.
*
* @since 2.0.0
*/
def master(master: String): Builder = config("spark.master", master)
/**
* Enables Hive support, including connectivity to a persistent Hive metastore, support for
* Hive serdes, and Hive user-defined functions.
*
* @since 2.0.0
*/
def enableHiveSupport(): Builder = synchronized {
if (hiveClassesArePresent) {
config(CATALOG_IMPLEMENTATION.key, "hive")
} else {
throw new IllegalArgumentException(
"Unable to instantiate SparkSession with Hive support because " +
"Hive classes are not found.")
}
}
/**
* Gets an existing [[SparkSession]] or, if there is no existing one, creates a new one
* based on the options set in this builder.
*
* @since 2.0.0
*/
def getOrCreate(): SparkSession = synchronized {
// Step 1. Create a SparkConf
// Step 2. Get a SparkContext
// Step 3. Get a SparkSession
val sparkConf = new SparkConf()
options.foreach { case (k, v) => sparkConf.set(k, v) }
val sparkContext = SparkContext.getOrCreate(sparkConf)
SQLContext.getOrCreate(sparkContext).sparkSession
}
}
/**
* Creates a [[SparkSession.Builder]] for constructing a [[SparkSession]].
* @since 2.0.0
*/
def builder: Builder = new Builder
private val HIVE_SHARED_STATE_CLASS_NAME = "org.apache.spark.sql.hive.HiveSharedState"
private val HIVE_SESSION_STATE_CLASS_NAME = "org.apache.spark.sql.hive.HiveSessionState"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment