diff --git a/bin/run-example b/bin/run-example index e1b0d5789bed62b6a8426d2c2106bebc07544c75..dd0e3c41202602ab11975ec5d3b781527e9a9da1 100755 --- a/bin/run-example +++ b/bin/run-example @@ -21,56 +21,5 @@ if [ -z "${SPARK_HOME}" ]; then export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)" fi -EXAMPLES_DIR="${SPARK_HOME}"/examples - -. "${SPARK_HOME}"/bin/load-spark-env.sh - -if [ -n "$1" ]; then - EXAMPLE_CLASS="$1" - shift -else - echo "Usage: ./bin/run-example <example-class> [example-args]" 1>&2 - echo " - set MASTER=XX to use a specific master" 1>&2 - echo " - can use abbreviated example class name relative to com.apache.spark.examples" 1>&2 - echo " (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL)" 1>&2 - exit 1 -fi - -if [ -f "${SPARK_HOME}/RELEASE" ]; then - JAR_PATH="${SPARK_HOME}/lib" -else - JAR_PATH="${EXAMPLES_DIR}/target/scala-${SPARK_SCALA_VERSION}" -fi - -JAR_COUNT=0 - -for f in "${JAR_PATH}"/spark-examples-*hadoop*.jar; do - if [[ ! -e "$f" ]]; then - echo "Failed to find Spark examples assembly in ${SPARK_HOME}/lib or ${SPARK_HOME}/examples/target" 1>&2 - echo "You need to build Spark before running this program" 1>&2 - exit 1 - fi - SPARK_EXAMPLES_JAR="$f" - JAR_COUNT=$((JAR_COUNT+1)) -done - -if [ "$JAR_COUNT" -gt "1" ]; then - echo "Found multiple Spark examples assembly jars in ${JAR_PATH}" 1>&2 - ls "${JAR_PATH}"/spark-examples-*hadoop*.jar 1>&2 - echo "Please remove all but one jar." 1>&2 - exit 1 -fi - -export SPARK_EXAMPLES_JAR - -EXAMPLE_MASTER=${MASTER:-"local[*]"} - -if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then - EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS" -fi - -exec "${SPARK_HOME}"/bin/spark-submit \ - --master $EXAMPLE_MASTER \ - --class $EXAMPLE_CLASS \ - "$SPARK_EXAMPLES_JAR" \ - "$@" +export _SPARK_CMD_USAGE="Usage: ./bin/run-example [options] example-class [example args]" +exec "${SPARK_HOME}"/bin/spark-submit run-example "$@" diff --git a/bin/run-example.cmd b/bin/run-example.cmd index 64f6bc3728d07833238e50ac83eee5503f1b73eb..f9b786e92b823f3a417a37bfa775a5133c8101e5 100644 --- a/bin/run-example.cmd +++ b/bin/run-example.cmd @@ -17,7 +17,6 @@ rem See the License for the specific language governing permissions and rem limitations under the License. rem -rem This is the entry point for running a Spark example. To avoid polluting -rem the environment, it just launches a new cmd to do the real work. - -cmd /V /E /C "%~dp0run-example2.cmd" %* +set SPARK_HOME=%~dp0.. +set _SPARK_CMD_USAGE=Usage: ./bin/run-example [options] example-class [example args] +cmd /V /E /C "%~dp0spark-submit.cmd" run-example %* diff --git a/bin/run-example2.cmd b/bin/run-example2.cmd deleted file mode 100644 index fada43581d1845b4e88617ee9ef2f5fed97a9e38..0000000000000000000000000000000000000000 --- a/bin/run-example2.cmd +++ /dev/null @@ -1,85 +0,0 @@ -@echo off - -rem -rem Licensed to the Apache Software Foundation (ASF) under one or more -rem contributor license agreements. See the NOTICE file distributed with -rem this work for additional information regarding copyright ownership. -rem The ASF licenses this file to You under the Apache License, Version 2.0 -rem (the "License"); you may not use this file except in compliance with -rem the License. You may obtain a copy of the License at -rem -rem http://www.apache.org/licenses/LICENSE-2.0 -rem -rem Unless required by applicable law or agreed to in writing, software -rem distributed under the License is distributed on an "AS IS" BASIS, -rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -rem See the License for the specific language governing permissions and -rem limitations under the License. -rem - -set SCALA_VERSION=2.10 - -rem Figure out where the Spark framework is installed -set SPARK_HOME=%~dp0.. - -call "%SPARK_HOME%\bin\load-spark-env.cmd" - -rem Test that an argument was given -if not "x%1"=="x" goto arg_given - echo Usage: run-example ^<example-class^> [example-args] - echo - set MASTER=XX to use a specific master - echo - can use abbreviated example class name relative to com.apache.spark.examples - echo (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL) - goto exit -:arg_given - -set EXAMPLES_DIR=%SPARK_HOME%\examples - -rem Figure out the JAR file that our examples were packaged into. -set SPARK_EXAMPLES_JAR= -if exist "%SPARK_HOME%\RELEASE" ( - for %%d in ("%SPARK_HOME%\lib\spark-examples*.jar") do ( - set SPARK_EXAMPLES_JAR=%%d - ) -) else ( - for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*.jar") do ( - set SPARK_EXAMPLES_JAR=%%d - ) -) -if "x%SPARK_EXAMPLES_JAR%"=="x" ( - echo Failed to find Spark examples assembly JAR. - echo You need to build Spark before running this program. - goto exit -) - -rem Set master from MASTER environment variable if given -if "x%MASTER%"=="x" ( - set EXAMPLE_MASTER=local[*] -) else ( - set EXAMPLE_MASTER=%MASTER% -) - -rem If the EXAMPLE_CLASS does not start with org.apache.spark.examples, add that -set EXAMPLE_CLASS=%1 -set PREFIX=%EXAMPLE_CLASS:~0,25% -if not %PREFIX%==org.apache.spark.examples ( - set EXAMPLE_CLASS=org.apache.spark.examples.%EXAMPLE_CLASS% -) - -rem Get the tail of the argument list, to skip the first one. This is surprisingly -rem complicated on Windows. -set "ARGS=" -:top -shift -if "%~1" neq "" ( - set ARGS=%ARGS% "%~1" - goto :top -) -if defined ARGS set ARGS=%ARGS:~1% - -call "%SPARK_HOME%\bin\spark-submit.cmd" ^ - --master %EXAMPLE_MASTER% ^ - --class %EXAMPLE_CLASS% ^ - "%SPARK_EXAMPLES_JAR%" %ARGS% - -:exit diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index ac4e9b90f01777a3573125389e5ddae7f806e9d9..dbdd42ff9e0872e357deff64b33af2edabea323a 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -166,11 +166,14 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE" # Copy jars cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/" -cp "$SPARK_HOME"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/" # This will fail if the -Pyarn profile is not provided # In this case, silence the error and ignore the return code of this command cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || : +# Copy examples and dependencies +mkdir -p "$DISTDIR/examples/jars" +cp "$SPARK_HOME"/examples/target/scala*/jars/* "$DISTDIR/examples/jars" + # Copy example sources (needed for python and SQL) mkdir -p "$DISTDIR/examples/src/main" cp -r "$SPARK_HOME"/examples/src/main "$DISTDIR/examples/src/" diff --git a/examples/pom.xml b/examples/pom.xml index 92bb373c7382d29c244c5d91b9522ec3f620ba7a..1aa730c0dcdac56f1c817190fff261a5d41dc744 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -322,36 +322,36 @@ </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-shade-plugin</artifactId> + <artifactId>maven-jar-plugin</artifactId> + <executions> + <execution> + <id>prepare-test-jar</id> + <phase>none</phase> + <goals> + <goal>test-jar</goal> + </goals> + </execution> + </executions> <configuration> - <shadedArtifactAttached>false</shadedArtifactAttached> - <outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-examples-${project.version}-hadoop${hadoop.version}.jar</outputFile> - <artifactSet> - <includes> - <include>*:*</include> - </includes> - </artifactSet> - <filters> - <filter> - <artifact>*:*</artifact> - <excludes> - <exclude>META-INF/*.SF</exclude> - <exclude>META-INF/*.DSA</exclude> - <exclude>META-INF/*.RSA</exclude> - </excludes> - </filter> - </filters> - <transformers> - <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> - <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> - <resource>reference.conf</resource> - </transformer> - <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer"> - <resource>log4j.properties</resource> - </transformer> - </transformers> + <outputDirectory>${jars.target.dir}</outputDirectory> </configuration> </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>copy-dependencies</goal> + </goals> + <configuration> + <includeScope>runtime</includeScope> + <outputDirectory>${jars.target.dir}</outputDirectory> + </configuration> + </execution> + </executions> + </plugin> </plugins> </build> <profiles> diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index b2dd6ac4c39823937e708617047a9e50468ba3ab..56e4107c5a0c75a5680c77cc1766edd099a547be 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -30,7 +30,8 @@ import static org.apache.spark.launcher.CommandBuilderUtils.*; * driver-side options and special parsing behavior needed for the special-casing certain internal * Spark applications. * <p> - * This class has also some special features to aid launching pyspark. + * This class has also some special features to aid launching shells (pyspark and sparkR) and also + * examples. */ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { @@ -62,6 +63,17 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { */ static final String SPARKR_SHELL_RESOURCE = "sparkr-shell"; + /** + * Name of app resource used to identify examples. When running examples, args[0] should be + * this name. The app resource will identify the example class to run. + */ + static final String RUN_EXAMPLE = "run-example"; + + /** + * Prefix for example class names. + */ + static final String EXAMPLE_CLASS_PREFIX = "org.apache.spark.examples."; + /** * This map must match the class names for available special classes, since this modifies the way * command line parsing works. This maps the class name to the resource to use when calling @@ -78,6 +90,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { final List<String> sparkArgs; private final boolean printInfo; + private final boolean isExample; /** * Controls whether mixing spark-submit arguments with app arguments is allowed. This is needed @@ -89,10 +102,13 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { SparkSubmitCommandBuilder() { this.sparkArgs = new ArrayList<>(); this.printInfo = false; + this.isExample = false; } SparkSubmitCommandBuilder(List<String> args) { - this.sparkArgs = new ArrayList<>(); + this.allowsMixedArguments = false; + + boolean isExample = false; List<String> submitArgs = args; if (args.size() > 0 && args.get(0).equals(PYSPARK_SHELL)) { this.allowsMixedArguments = true; @@ -102,10 +118,14 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { this.allowsMixedArguments = true; appResource = SPARKR_SHELL_RESOURCE; submitArgs = args.subList(1, args.size()); - } else { - this.allowsMixedArguments = false; + } else if (args.size() > 0 && args.get(0).equals(RUN_EXAMPLE)) { + isExample = true; + submitArgs = args.subList(1, args.size()); } + this.sparkArgs = new ArrayList<>(); + this.isExample = isExample; + OptionParser parser = new OptionParser(); parser.parse(submitArgs); this.printInfo = parser.infoRequested; @@ -155,6 +175,10 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { args.add(propertiesFile); } + if (isExample) { + jars.addAll(findExamplesJars()); + } + if (!jars.isEmpty()) { args.add(parser.JARS); args.add(join(",", jars)); @@ -170,6 +194,9 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { args.add(join(",", pyFiles)); } + if (!printInfo) { + checkArgument(!isExample || mainClass != null, "Missing example class name."); + } if (mainClass != null) { args.add(parser.CLASS); args.add(mainClass); @@ -308,6 +335,25 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { mainClass.equals("org.apache.spark.sql.hive.thriftserver.HiveThriftServer2")); } + private List<String> findExamplesJars() { + List<String> examplesJars = new ArrayList<>(); + String sparkHome = getSparkHome(); + + File jarsDir; + if (new File(sparkHome, "RELEASE").isFile()) { + jarsDir = new File(sparkHome, "examples/jars"); + } else { + jarsDir = new File(sparkHome, + String.format("examples/target/scala-%s/jars", getScalaVersion())); + } + checkState(jarsDir.isDirectory(), "Examples jars directory '%s' does not exist.", + jarsDir.getAbsolutePath()); + + for (File f: jarsDir.listFiles()) { + examplesJars.add(f.getAbsolutePath()); + } + return examplesJars; + } private class OptionParser extends SparkSubmitOptionParser { @@ -367,6 +413,14 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { if (allowsMixedArguments) { appArgs.add(opt); return true; + } else if (isExample) { + String className = opt; + if (!className.startsWith(EXAMPLE_CLASS_PREFIX)) { + className = EXAMPLE_CLASS_PREFIX + className; + } + mainClass = className; + appResource = "spark-internal"; + return false; } else { checkArgument(!opt.startsWith("-"), "Unrecognized option: %s", opt); sparkArgs.add(opt); @@ -376,8 +430,10 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { @Override protected void handleExtraArgs(List<String> extra) { - for (String arg : extra) { - sparkArgs.add(arg); + if (isExample) { + appArgs.addAll(extra); + } else { + sparkArgs.addAll(extra); } } diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java index 00f967122bd703342d73b593536feb771b3d3e7b..b7f4f2efc5d84e3869adcc843d80efc43ddbfe87 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java @@ -151,6 +151,24 @@ public class SparkSubmitCommandBuilderSuite extends BaseSuite { assertEquals("arg1", cmd.get(cmd.size() - 1)); } + @Test + public void testExamplesRunner() throws Exception { + List<String> sparkSubmitArgs = Arrays.asList( + SparkSubmitCommandBuilder.RUN_EXAMPLE, + parser.MASTER + "=foo", + parser.DEPLOY_MODE + "=bar", + "SparkPi", + "42"); + + Map<String, String> env = new HashMap<String, String>(); + List<String> cmd = buildCommand(sparkSubmitArgs, env); + assertEquals("foo", findArgValue(cmd, parser.MASTER)); + assertEquals("bar", findArgValue(cmd, parser.DEPLOY_MODE)); + assertEquals(SparkSubmitCommandBuilder.EXAMPLE_CLASS_PREFIX + "SparkPi", + findArgValue(cmd, parser.CLASS)); + assertEquals("42", cmd.get(cmd.size() - 1)); + } + private void testCmdBuilder(boolean isDriver, boolean useDefaultPropertyFile) throws Exception { String deployMode = isDriver ? "client" : "cluster"; diff --git a/pom.xml b/pom.xml index 0faa691c5e78bf91226163fec77649accb8f7f10..92a32e7797bbc92044b192f4016d9940f15f8d42 100644 --- a/pom.xml +++ b/pom.xml @@ -178,6 +178,9 @@ <test.java.home>${java.home}</test.java.home> <test.exclude.tags></test.exclude.tags> + <!-- Modules that copy jars to the build directory should do so under this location. --> + <jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir> + <!-- Dependency scopes that can be overridden by enabling certain profiles. These profiles are declared in the projects that build assemblies. diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index d7519e82b8706e9261e4b141a9db2e05ccd49db9..f76cda08ec36ac72e22a75aed85bb2b54dbc4815 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -55,10 +55,12 @@ object BuildCommons { Seq("yarn", "java8-tests", "ganglia-lgpl", "streaming-kinesis-asl", "docker-integration-tests").map(ProjectRef(buildLocation, _)) - val assemblyProjects@Seq(assembly, examples, networkYarn, streamingKafkaAssembly, streamingKinesisAslAssembly) = - Seq("assembly", "examples", "network-yarn", "streaming-kafka-assembly", "streaming-kinesis-asl-assembly") + val assemblyProjects@Seq(assembly, networkYarn, streamingKafkaAssembly, streamingKinesisAslAssembly) = + Seq("assembly", "network-yarn", "streaming-kafka-assembly", "streaming-kinesis-asl-assembly") .map(ProjectRef(buildLocation, _)) + val copyJarsProjects@Seq(examples) = Seq("examples").map(ProjectRef(buildLocation, _)) + val tools = ProjectRef(buildLocation, "tools") // Root project. val spark = ProjectRef(buildLocation, "spark") @@ -142,6 +144,8 @@ object SparkBuild extends PomBuild { scalacOptions <+= target.map(t => "-P:genjavadoc:out=" + (t / "java"))) lazy val sharedSettings = sparkGenjavadocSettings ++ Seq ( + exportJars in Compile := true, + exportJars in Test := false, javaHome := sys.env.get("JAVA_HOME") .orElse(sys.props.get("java.home").map { p => new File(p).getParentFile().getAbsolutePath() }) .map(file), @@ -236,7 +240,7 @@ object SparkBuild extends PomBuild { // Note ordering of these settings matter. /* Enable shared settings on all projects */ - (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ Seq(spark, tools)) + (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ copyJarsProjects ++ Seq(spark, tools)) .foreach(enable(sharedSettings ++ DependencyOverrides.settings ++ ExcludedDependencies.settings)) @@ -255,6 +259,9 @@ object SparkBuild extends PomBuild { /* Unsafe settings */ enable(Unsafe.settings)(unsafe) + /* Set up tasks to copy dependencies during packaging. */ + copyJarsProjects.foreach(enable(CopyDependencies.settings)) + /* Enable Assembly for all assembly projects */ assemblyProjects.foreach(enable(Assembly.settings)) @@ -686,6 +693,34 @@ object Unidoc { ) } +object CopyDependencies { + + val copyDeps = TaskKey[Unit]("copyDeps", "Copies needed dependencies to the build directory.") + val destPath = (crossTarget in Compile) / "jars" + + lazy val settings = Seq( + copyDeps := { + val dest = destPath.value + if (!dest.isDirectory() && !dest.mkdirs()) { + throw new IOException("Failed to create jars directory.") + } + + (dependencyClasspath in Compile).value.map(_.data) + .filter { jar => jar.isFile() } + .foreach { jar => + val destJar = new File(dest, jar.getName()) + if (destJar.isFile()) { + destJar.delete() + } + Files.copy(jar.toPath(), destJar.toPath()) + } + }, + crossTarget in (Compile, packageBin) := destPath.value, + packageBin in Compile <<= (packageBin in Compile).dependsOn(copyDeps) + ) + +} + object Java8TestSettings { import BuildCommons._