Skip to content
Snippets Groups Projects
Commit aa43a8da authored by Andrew Or's avatar Andrew Or
Browse files

[SPARK-4281][Build] Package Yarn shuffle service into its own jar

This is another addendum to #3082, which added the Yarn shuffle service to run inside the NM. This PR makes the feature much more usable by packaging enough dependencies into the jar to run the service inside an NM. After these changes, the user can run `./make-distribution.sh` and find a `spark-network-yarn*.jar` in their `lib` directory. The equivalent change is done in SBT by making the `network-yarn` module an assembly project.

Author: Andrew Or <andrew@databricks.com>

Closes #3147 from andrewor14/yarn-shuffle-build and squashes the following commits:

bda58d0 [Andrew Or] Fix line too long
81e9705 [Andrew Or] Merge branch 'master' of github.com:apache/spark into yarn-shuffle-build
fb7f398 [Andrew Or] Rename jar to spark-{VERSION}-yarn-shuffle.jar
65db822 [Andrew Or] Actually mark slf4j as provided
abcefd1 [Andrew Or] Do the same for SBT
c653028 [Andrew Or] Package network-yarn and its dependencies
parent 6e3c5a29
No related branches found
No related tags found
No related merge requests found
...@@ -181,6 +181,7 @@ echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DI ...@@ -181,6 +181,7 @@ echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DI
# Copy jars # Copy jars
cp "$FWDIR"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/" cp "$FWDIR"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
cp "$FWDIR"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/" cp "$FWDIR"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
cp "$FWDIR"/network/yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/"
# Copy example sources (needed for python and SQL) # Copy example sources (needed for python and SQL)
mkdir -p "$DISTDIR/examples/src/main" mkdir -p "$DISTDIR/examples/src/main"
......
...@@ -41,12 +41,13 @@ ...@@ -41,12 +41,13 @@
<groupId>io.netty</groupId> <groupId>io.netty</groupId>
<artifactId>netty-all</artifactId> <artifactId>netty-all</artifactId>
</dependency> </dependency>
<!-- Provided dependencies -->
<dependency> <dependency>
<groupId>org.slf4j</groupId> <groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId> <artifactId>slf4j-api</artifactId>
<scope>provided</scope>
</dependency> </dependency>
<!-- Provided dependencies -->
<dependency> <dependency>
<groupId>com.google.guava</groupId> <groupId>com.google.guava</groupId>
<artifactId>guava</artifactId> <artifactId>guava</artifactId>
......
...@@ -42,12 +42,13 @@ ...@@ -42,12 +42,13 @@
<artifactId>spark-network-common_${scala.binary.version}</artifactId> <artifactId>spark-network-common_${scala.binary.version}</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<!-- Provided dependencies -->
<dependency> <dependency>
<groupId>org.slf4j</groupId> <groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId> <artifactId>slf4j-api</artifactId>
<scope>provided</scope>
</dependency> </dependency>
<!-- Provided dependencies -->
<dependency> <dependency>
<groupId>com.google.guava</groupId> <groupId>com.google.guava</groupId>
<artifactId>guava</artifactId> <artifactId>guava</artifactId>
......
...@@ -54,5 +54,38 @@ ...@@ -54,5 +54,38 @@
<build> <build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory> <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory> <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<configuration>
<shadedArtifactAttached>false</shadedArtifactAttached>
<outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-${project.version}-yarn-shuffle.jar</outputFile>
<artifactSet>
<includes>
<include>*:*</include>
</includes>
</artifactSet>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build> </build>
</project> </project>
...@@ -38,12 +38,12 @@ object BuildCommons { ...@@ -38,12 +38,12 @@ object BuildCommons {
"streaming-flume", "streaming-kafka", "streaming-mqtt", "streaming-twitter", "streaming-flume", "streaming-kafka", "streaming-mqtt", "streaming-twitter",
"streaming-zeromq").map(ProjectRef(buildLocation, _)) "streaming-zeromq").map(ProjectRef(buildLocation, _))
val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, networkYarn, java8Tests, val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, java8Tests,
sparkGangliaLgpl, sparkKinesisAsl) = Seq("yarn", "yarn-stable", "yarn-alpha", "network-yarn", sparkGangliaLgpl, sparkKinesisAsl) = Seq("yarn", "yarn-stable", "yarn-alpha",
"java8-tests", "ganglia-lgpl", "kinesis-asl").map(ProjectRef(buildLocation, _)) "java8-tests", "ganglia-lgpl", "kinesis-asl").map(ProjectRef(buildLocation, _))
val assemblyProjects@Seq(assembly, examples) = Seq("assembly", "examples") val assemblyProjects@Seq(assembly, examples, networkYarn) =
.map(ProjectRef(buildLocation, _)) Seq("assembly", "examples", "network-yarn").map(ProjectRef(buildLocation, _))
val tools = ProjectRef(buildLocation, "tools") val tools = ProjectRef(buildLocation, "tools")
// Root project. // Root project.
...@@ -289,8 +289,15 @@ object Assembly { ...@@ -289,8 +289,15 @@ object Assembly {
lazy val settings = assemblySettings ++ Seq( lazy val settings = assemblySettings ++ Seq(
test in assembly := {}, test in assembly := {},
jarName in assembly <<= (version, moduleName) map { (v, mName) => mName + "-"+v + "-hadoop" + jarName in assembly <<= (version, moduleName) map { (v, mName) =>
Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar" }, if (mName.contains("network-yarn")) {
// This must match the same name used in maven (see network/yarn/pom.xml)
"spark-" + v + "-yarn-shuffle.jar"
} else {
mName + "-" + v + "-hadoop" +
Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar"
}
},
mergeStrategy in assembly := { mergeStrategy in assembly := {
case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment