diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 93421efcbc30fce2fd3415d0f34c61379e42af47..c611db0af4ca0cf3ace5a1659ea449eb581777a6 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -42,7 +42,7 @@ This would be used to connect to the cluster, write to the dfs and submit jobs t The command to launch the YARN Client is as follows: - SPARK_JAR=<SPARK_YARN_JAR_FILE> ./spark-class org.apache.spark.deploy.yarn.Client \ + SPARK_JAR=<SPARK_ASSEMBLY_JAR_FILE> ./spark-class org.apache.spark.deploy.yarn.Client \ --jar <YOUR_APP_JAR_FILE> \ --class <APP_MAIN_CLASS> \ --args <APP_MAIN_ARGUMENTS> \ @@ -54,14 +54,27 @@ The command to launch the YARN Client is as follows: For example: - SPARK_JAR=./yarn/target/spark-yarn-assembly-{{site.SPARK_VERSION}}.jar ./spark-class org.apache.spark.deploy.yarn.Client \ - --jar examples/target/scala-{{site.SCALA_VERSION}}/spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}.jar \ - --class org.apache.spark.examples.SparkPi \ - --args yarn-standalone \ - --num-workers 3 \ - --master-memory 4g \ - --worker-memory 2g \ - --worker-cores 1 + # Build the Spark assembly JAR and the Spark examples JAR + $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt/sbt assembly + + # Configure logging + $ cp conf/log4j.properties.template conf/log4j.properties + + # Submit Spark's ApplicationMaster to YARN's ResourceManager, and instruct Spark to run the SparkPi example + $ SPARK_JAR=./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop2.0.5-alpha.jar \ + ./spark-class org.apache.spark.deploy.yarn.Client \ + --jar examples/target/scala-{{site.SCALA_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \ + --class org.apache.spark.examples.SparkPi \ + --args yarn-standalone \ + --num-workers 3 \ + --master-memory 4g \ + --worker-memory 2g \ + --worker-cores 1 + + # Examine the output (replace $YARN_APP_ID in the following with the "application identifier" output by the previous command) + # (Note: YARN_APP_LOGS_DIR is usually /tmp/logs or $HADOOP_HOME/logs/userlogs depending on the Hadoop version.) + $ cat $YARN_APP_LOGS_DIR/$YARN_APP_ID/container*_000001/stdout + Pi is roughly 3.13794 The above starts a YARN Client programs which periodically polls the Application Master for status updates and displays them in the console. The client will exit once your application has finished running. diff --git a/examples/pom.xml b/examples/pom.xml index 224cf6c96c9cc91b80a03963f777c246fae403f7..e48f5b50abcc6f7f88bf6bdb40e42be822767539 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -127,20 +127,6 @@ </dependency> </dependencies> - <profiles> - <profile> - <id>hadoop2-yarn</id> - <dependencies> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-yarn</artifactId> - <version>${project.version}</version> - <scope>provided</scope> - </dependency> - </dependencies> - </profile> - </profiles> - <build> <outputDirectory>target/scala-${scala.version}/classes</outputDirectory> <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory> diff --git a/pom.xml b/pom.xml index c561b099ab76a8dd304a2f65bbb4382d5399d2c9..5c1f9f03246bc714e5e394c6bcc13a21eb9636e3 100644 --- a/pom.xml +++ b/pom.xml @@ -368,6 +368,99 @@ </exclusion> </exclusions> </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-api</artifactId> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-common</artifactId> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-client</artifactId> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + </exclusions> + </dependency> <!-- Specify Avro version because Kafka also has it as a dependency --> <dependency> <groupId>org.apache.avro</groupId> @@ -620,131 +713,6 @@ <dependencyManagement> <dependencies> - <!-- TODO: check versions, bringover from yarn branch ! --> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <version>${hadoop.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-core-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-mapper-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-jaxrs</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-xc</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <version>${hadoop.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-core-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-mapper-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-jaxrs</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-xc</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> - <version>${hadoop.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-core-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-mapper-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-jaxrs</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-xc</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-client</artifactId> - <version>${hadoop.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-core-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-mapper-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-jaxrs</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-xc</artifactId> - </exclusion> - </exclusions> - </dependency> </dependencies> </dependencyManagement> </profile> diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index d038a4f479682a10e316bc5493f77a278ca4afbb..51773258871aadc9b358e88da465bf140d498e54 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -38,10 +38,10 @@ object SparkBuild extends Build { lazy val core = Project("core", file("core"), settings = coreSettings) lazy val repl = Project("repl", file("repl"), settings = replSettings) - .dependsOn(core, bagel, mllib) dependsOn(maybeYarn: _*) + .dependsOn(core, bagel, mllib) lazy val examples = Project("examples", file("examples"), settings = examplesSettings) - .dependsOn(core, mllib, bagel, streaming) dependsOn(maybeYarn: _*) + .dependsOn(core, mllib, bagel, streaming) lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming) diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index d61b36a61a6559a210a54270d1dee466bb77a5a4..3685561501d67c8c47f0109a1a66588ec8e3acf5 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -105,16 +105,6 @@ </build> <profiles> - <profile> - <id>hadoop2-yarn</id> - <dependencies> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-yarn</artifactId> - <version>${project.version}</version> - </dependency> - </dependencies> - </profile> <profile> <id>deb</id> <build> diff --git a/repl/pom.xml b/repl/pom.xml index a1c87d7618249cc720a2ecb65aa75dc8ae9248e8..3123b37780dc509c8e302b7fbe41f79610127ad6 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -131,16 +131,4 @@ </plugin> </plugins> </build> - <profiles> - <profile> - <id>hadoop2-yarn</id> - <dependencies> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-yarn</artifactId> - <version>${project.version}</version> - </dependency> - </dependencies> - </profile> - </profiles> </project> diff --git a/yarn/pom.xml b/yarn/pom.xml index 654b5bcd2dc99f5f60f9d76e389c36237fc08d45..27b2002095fa8c2fa394c6469ad330e5a4bd6a07 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -30,6 +30,34 @@ <name>Spark Project YARN Support</name> <url>http://spark.incubator.apache.org/</url> + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-api</artifactId> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-common</artifactId> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-client</artifactId> + </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro-ipc</artifactId> + </dependency> + </dependencies> + <build> <outputDirectory>target/scala-${scala.version}/classes</outputDirectory> <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory> @@ -75,37 +103,4 @@ </plugin> </plugins> </build> - - <profiles> - <profile> - <id>hadoop2-yarn</id> - <dependencies> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-client</artifactId> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-ipc</artifactId> - </dependency> - </dependencies> - </profile> - </profiles> </project>