Skip to content
Snippets Groups Projects
Commit e34922a2 authored by Sandy Ryza's avatar Sandy Ryza Committed by Patrick Wendell
Browse files

SPARK-2310. Support arbitrary Spark properties on the command line with ...

...spark-submit

The PR allows invocations like
  spark-submit --class org.MyClass --spark.shuffle.spill false myjar.jar

Author: Sandy Ryza <sandy@cloudera.com>

Closes #1253 from sryza/sandy-spark-2310 and squashes the following commits:

1dc9855 [Sandy Ryza] More doc and cleanup
00edfb9 [Sandy Ryza] Review comments
91b244a [Sandy Ryza] Change format to --conf PROP=VALUE
8fabe77 [Sandy Ryza] SPARK-2310. Support arbitrary Spark properties on the command line with spark-submit
parent 78d18fdb
No related branches found
No related tags found
No related merge requests found
...@@ -269,6 +269,9 @@ object SparkSubmit { ...@@ -269,6 +269,9 @@ object SparkSubmit {
sysProps.getOrElseUpdate(k, v) sysProps.getOrElseUpdate(k, v)
} }
// Spark properties included on command line take precedence
sysProps ++= args.sparkProperties
(childArgs, childClasspath, sysProps, childMainClass) (childArgs, childClasspath, sysProps, childMainClass)
} }
......
...@@ -55,6 +55,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { ...@@ -55,6 +55,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
var verbose: Boolean = false var verbose: Boolean = false
var isPython: Boolean = false var isPython: Boolean = false
var pyFiles: String = null var pyFiles: String = null
val sparkProperties: HashMap[String, String] = new HashMap[String, String]()
parseOpts(args.toList) parseOpts(args.toList)
loadDefaults() loadDefaults()
...@@ -177,6 +178,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { ...@@ -177,6 +178,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
| executorCores $executorCores | executorCores $executorCores
| totalExecutorCores $totalExecutorCores | totalExecutorCores $totalExecutorCores
| propertiesFile $propertiesFile | propertiesFile $propertiesFile
| extraSparkProperties $sparkProperties
| driverMemory $driverMemory | driverMemory $driverMemory
| driverCores $driverCores | driverCores $driverCores
| driverExtraClassPath $driverExtraClassPath | driverExtraClassPath $driverExtraClassPath
...@@ -290,6 +292,13 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { ...@@ -290,6 +292,13 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
jars = Utils.resolveURIs(value) jars = Utils.resolveURIs(value)
parse(tail) parse(tail)
case ("--conf" | "-c") :: value :: tail =>
value.split("=", 2).toSeq match {
case Seq(k, v) => sparkProperties(k) = v
case _ => SparkSubmit.printErrorAndExit(s"Spark config without '=': $value")
}
parse(tail)
case ("--help" | "-h") :: tail => case ("--help" | "-h") :: tail =>
printUsageAndExit(0) printUsageAndExit(0)
...@@ -349,6 +358,8 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { ...@@ -349,6 +358,8 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
| on the PYTHONPATH for Python apps. | on the PYTHONPATH for Python apps.
| --files FILES Comma-separated list of files to be placed in the working | --files FILES Comma-separated list of files to be placed in the working
| directory of each executor. | directory of each executor.
|
| --conf PROP=VALUE Arbitrary Spark configuration property.
| --properties-file FILE Path to a file from which to load extra properties. If not | --properties-file FILE Path to a file from which to load extra properties. If not
| specified, this will look for conf/spark-defaults.conf. | specified, this will look for conf/spark-defaults.conf.
| |
......
...@@ -120,6 +120,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { ...@@ -120,6 +120,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
"--archives", "archive1.txt,archive2.txt", "--archives", "archive1.txt,archive2.txt",
"--num-executors", "6", "--num-executors", "6",
"--name", "beauty", "--name", "beauty",
"--conf", "spark.shuffle.spill=false",
"thejar.jar", "thejar.jar",
"arg1", "arg2") "arg1", "arg2")
val appArgs = new SparkSubmitArguments(clArgs) val appArgs = new SparkSubmitArguments(clArgs)
...@@ -139,6 +140,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { ...@@ -139,6 +140,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
mainClass should be ("org.apache.spark.deploy.yarn.Client") mainClass should be ("org.apache.spark.deploy.yarn.Client")
classpath should have length (0) classpath should have length (0)
sysProps("spark.app.name") should be ("beauty") sysProps("spark.app.name") should be ("beauty")
sysProps("spark.shuffle.spill") should be ("false")
sysProps("SPARK_SUBMIT") should be ("true") sysProps("SPARK_SUBMIT") should be ("true")
} }
...@@ -156,6 +158,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { ...@@ -156,6 +158,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
"--archives", "archive1.txt,archive2.txt", "--archives", "archive1.txt,archive2.txt",
"--num-executors", "6", "--num-executors", "6",
"--name", "trill", "--name", "trill",
"--conf", "spark.shuffle.spill=false",
"thejar.jar", "thejar.jar",
"arg1", "arg2") "arg1", "arg2")
val appArgs = new SparkSubmitArguments(clArgs) val appArgs = new SparkSubmitArguments(clArgs)
...@@ -176,6 +179,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { ...@@ -176,6 +179,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
sysProps("spark.yarn.dist.archives") should include regex (".*archive1.txt,.*archive2.txt") sysProps("spark.yarn.dist.archives") should include regex (".*archive1.txt,.*archive2.txt")
sysProps("spark.jars") should include regex (".*one.jar,.*two.jar,.*three.jar,.*thejar.jar") sysProps("spark.jars") should include regex (".*one.jar,.*two.jar,.*three.jar,.*thejar.jar")
sysProps("SPARK_SUBMIT") should be ("true") sysProps("SPARK_SUBMIT") should be ("true")
sysProps("spark.shuffle.spill") should be ("false")
} }
test("handles standalone cluster mode") { test("handles standalone cluster mode") {
...@@ -186,6 +190,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { ...@@ -186,6 +190,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
"--supervise", "--supervise",
"--driver-memory", "4g", "--driver-memory", "4g",
"--driver-cores", "5", "--driver-cores", "5",
"--conf", "spark.shuffle.spill=false",
"thejar.jar", "thejar.jar",
"arg1", "arg2") "arg1", "arg2")
val appArgs = new SparkSubmitArguments(clArgs) val appArgs = new SparkSubmitArguments(clArgs)
...@@ -195,9 +200,10 @@ class SparkSubmitSuite extends FunSuite with Matchers { ...@@ -195,9 +200,10 @@ class SparkSubmitSuite extends FunSuite with Matchers {
childArgsStr should include regex ("launch spark://h:p .*thejar.jar org.SomeClass arg1 arg2") childArgsStr should include regex ("launch spark://h:p .*thejar.jar org.SomeClass arg1 arg2")
mainClass should be ("org.apache.spark.deploy.Client") mainClass should be ("org.apache.spark.deploy.Client")
classpath should have size (0) classpath should have size (0)
sysProps should have size (2) sysProps should have size (3)
sysProps.keys should contain ("spark.jars") sysProps.keys should contain ("spark.jars")
sysProps.keys should contain ("SPARK_SUBMIT") sysProps.keys should contain ("SPARK_SUBMIT")
sysProps("spark.shuffle.spill") should be ("false")
} }
test("handles standalone client mode") { test("handles standalone client mode") {
...@@ -208,6 +214,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { ...@@ -208,6 +214,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
"--total-executor-cores", "5", "--total-executor-cores", "5",
"--class", "org.SomeClass", "--class", "org.SomeClass",
"--driver-memory", "4g", "--driver-memory", "4g",
"--conf", "spark.shuffle.spill=false",
"thejar.jar", "thejar.jar",
"arg1", "arg2") "arg1", "arg2")
val appArgs = new SparkSubmitArguments(clArgs) val appArgs = new SparkSubmitArguments(clArgs)
...@@ -218,6 +225,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { ...@@ -218,6 +225,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
classpath(0) should endWith ("thejar.jar") classpath(0) should endWith ("thejar.jar")
sysProps("spark.executor.memory") should be ("5g") sysProps("spark.executor.memory") should be ("5g")
sysProps("spark.cores.max") should be ("5") sysProps("spark.cores.max") should be ("5")
sysProps("spark.shuffle.spill") should be ("false")
} }
test("handles mesos client mode") { test("handles mesos client mode") {
...@@ -228,6 +236,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { ...@@ -228,6 +236,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
"--total-executor-cores", "5", "--total-executor-cores", "5",
"--class", "org.SomeClass", "--class", "org.SomeClass",
"--driver-memory", "4g", "--driver-memory", "4g",
"--conf", "spark.shuffle.spill=false",
"thejar.jar", "thejar.jar",
"arg1", "arg2") "arg1", "arg2")
val appArgs = new SparkSubmitArguments(clArgs) val appArgs = new SparkSubmitArguments(clArgs)
...@@ -238,6 +247,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { ...@@ -238,6 +247,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
classpath(0) should endWith ("thejar.jar") classpath(0) should endWith ("thejar.jar")
sysProps("spark.executor.memory") should be ("5g") sysProps("spark.executor.memory") should be ("5g")
sysProps("spark.cores.max") should be ("5") sysProps("spark.cores.max") should be ("5")
sysProps("spark.shuffle.spill") should be ("false")
} }
test("launch simple application with spark-submit") { test("launch simple application with spark-submit") {
......
...@@ -42,13 +42,15 @@ val sc = new SparkContext(new SparkConf()) ...@@ -42,13 +42,15 @@ val sc = new SparkContext(new SparkConf())
Then, you can supply configuration values at runtime: Then, you can supply configuration values at runtime:
{% highlight bash %} {% highlight bash %}
./bin/spark-submit --name "My fancy app" --master local[4] myApp.jar ./bin/spark-submit --name "My app" --master local[4] --conf spark.shuffle.spill=false
--conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" myApp.jar
{% endhighlight %} {% endhighlight %}
The Spark shell and [`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit) The Spark shell and [`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit)
tool support two ways to load configurations dynamically. The first are command line options, tool support two ways to load configurations dynamically. The first are command line options,
such as `--master`, as shown above. Running `./bin/spark-submit --help` will show the entire list such as `--master`, as shown above. `spark-submit` can accept any Spark property using the `--conf`
of options. flag, but uses special flags for properties that play a part in launching the Spark application.
Running `./bin/spark-submit --help` will show the entire list of these options.
`bin/spark-submit` will also read configuration options from `conf/spark-defaults.conf`, in which `bin/spark-submit` will also read configuration options from `conf/spark-defaults.conf`, in which
each line consists of a key and a value separated by whitespace. For example: each line consists of a key and a value separated by whitespace. For example:
......
...@@ -33,6 +33,7 @@ dependencies, and can support different cluster managers and deploy modes that S ...@@ -33,6 +33,7 @@ dependencies, and can support different cluster managers and deploy modes that S
--class <main-class> --class <main-class>
--master <master-url> \ --master <master-url> \
--deploy-mode <deploy-mode> \ --deploy-mode <deploy-mode> \
--conf <key>=<value> \
... # other options ... # other options
<application-jar> \ <application-jar> \
[application-arguments] [application-arguments]
...@@ -43,6 +44,7 @@ Some of the commonly used options are: ...@@ -43,6 +44,7 @@ Some of the commonly used options are:
* `--class`: The entry point for your application (e.g. `org.apache.spark.examples.SparkPi`) * `--class`: The entry point for your application (e.g. `org.apache.spark.examples.SparkPi`)
* `--master`: The [master URL](#master-urls) for the cluster (e.g. `spark://23.195.26.187:7077`) * `--master`: The [master URL](#master-urls) for the cluster (e.g. `spark://23.195.26.187:7077`)
* `--deploy-mode`: Whether to deploy your driver on the worker nodes (`cluster`) or locally as an external client (`client`) (default: `client`)* * `--deploy-mode`: Whether to deploy your driver on the worker nodes (`cluster`) or locally as an external client (`client`) (default: `client`)*
* `--conf`: Arbitrary Spark configuration property in key=value format. For values that contain spaces wrap "key=value" in quotes (as shown).
* `application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes. * `application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes.
* `application-arguments`: Arguments passed to the main method of your main class, if any * `application-arguments`: Arguments passed to the main method of your main class, if any
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment