From f4d514810e6fd9f42868ebb9a89390c62c3b42e1 Mon Sep 17 00:00:00 2001
From: Konstantin Boudnik <cos@wandisco.com>
Date: Tue, 2 Jul 2013 17:03:24 -0700
Subject: [PATCH] Building spark assembly for further consumption of the Spark
 project with a deployed cluster

---
 assembly/README                         |  13 +++
 assembly/pom.xml                        | 104 ++++++++++++++++++++++++
 assembly/src/main/assembly/assembly.xml |  68 ++++++++++++++++
 pom.xml                                 |  21 ++++-
 spark-shell                             |   1 +
 5 files changed, 206 insertions(+), 1 deletion(-)
 create mode 100644 assembly/README
 create mode 100644 assembly/pom.xml
 create mode 100644 assembly/src/main/assembly/assembly.xml

diff --git a/assembly/README b/assembly/README
new file mode 100644
index 0000000000..6ee2a536d7
--- /dev/null
+++ b/assembly/README
@@ -0,0 +1,13 @@
+This is an assembly module for Spark project.
+
+It creates a single tar.gz file that includes all needed dependency of the project
+except for org.apache.hadoop.* jars that are supposed to be available from the
+deployed Hadoop cluster.
+
+This module is off by default to avoid spending extra time on top of repl-bin
+module. To activate it specify the profile in the command line
+  -Passembly
+
+In case you want to avoid building time-expensive repl-bin module, that shaders
+all the dependency into a big flat jar supplement maven command with
+  -DnoExpensive
diff --git a/assembly/pom.xml b/assembly/pom.xml
new file mode 100644
index 0000000000..1382539f24
--- /dev/null
+++ b/assembly/pom.xml
@@ -0,0 +1,104 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.spark-project</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>0.8.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.spark-project</groupId>
+  <artifactId>spark-assembly</artifactId>
+  <name>Spark Project Assembly</name>
+  <url>http://spark-project.org/</url>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+          <id>dist</id>
+          <phase>package</phase>
+          <goals>
+            <goal>single</goal>
+          </goals>
+          <configuration>
+            <descriptors>
+              <descriptor>src/main/assembly/assembly.xml</descriptor>
+            </descriptors>
+          </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>hadoop1</id>
+      <properties>
+        <classifier.name>hadoop1</classifier.name>
+      </properties>
+    </profile>
+    <profile>
+      <id>hadoop2</id>
+      <properties>
+        <classifier.name>hadoop2</classifier.name>
+      </properties>
+    </profile>
+    <profile>
+      <id>hadoop2-yarn</id>
+      <properties>
+        <classifier.name>hadoop2-yarn</classifier.name>
+      </properties>
+    </profile>
+  </profiles>
+  <dependencies>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-core</artifactId>
+      <classifier>${classifier.name}</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-bagel</artifactId>
+      <classifier>${classifier.name}</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-examples</artifactId>
+      <classifier>${classifier.name}</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-examples</artifactId>
+      <classifier>javadoc</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-examples</artifactId>
+      <classifier>sources</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-repl</artifactId>
+      <classifier>${classifier.name}</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-streaming</artifactId>
+      <classifier>${classifier.name}</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+  </dependencies>
+</project>
\ No newline at end of file
diff --git a/assembly/src/main/assembly/assembly.xml b/assembly/src/main/assembly/assembly.xml
new file mode 100644
index 0000000000..dd05f35f1f
--- /dev/null
+++ b/assembly/src/main/assembly/assembly.xml
@@ -0,0 +1,68 @@
+<assembly>
+  <id>dist</id>
+  <formats>
+    <format>tar.gz</format>
+    <format>dir</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+
+  <fileSets>
+    <fileSet>
+      <includes>
+        <include>README</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>
+        ${project.parent.basedir}/core/src/main/resources/spark/ui/static/
+      </directory>
+      <outputDirectory>/ui-resources/spark/ui/static</outputDirectory>
+      <includes>
+        <include>**/*</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>
+        ${project.parent.basedir}/bin/
+      </directory>
+      <outputDirectory>/bin</outputDirectory>
+      <includes>
+        <include>**/*</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>
+        ${project.parent.basedir}
+      </directory>
+      <outputDirectory>/bin</outputDirectory>
+      <includes>
+        <include>run*</include>
+        <include>spark-shell*</include>
+        <include>spark-executor*</include>
+      </includes>
+    </fileSet>
+  </fileSets>
+
+  <dependencySets>
+    <dependencySet>
+      <includes>
+        <include>org.spark-project:*:jar</include>
+      </includes>
+      <excludes>
+        <exclude>org.spark-project:spark-dist:jar</exclude>
+      </excludes>
+    </dependencySet>
+    <dependencySet>
+      <outputDirectory>lib</outputDirectory>
+      <useTransitiveDependencies>true</useTransitiveDependencies>
+      <unpack>false</unpack>
+      <scope>runtime</scope>
+      <useProjectArtifact>false</useProjectArtifact>
+      <excludes>
+        <exclude>org.apache.hadoop:*:jar</exclude>
+        <exclude>org.spark-project:*:jar</exclude>
+      </excludes>
+    </dependencySet>
+  </dependencySets>
+
+</assembly>
diff --git a/pom.xml b/pom.xml
index 4b48072c6e..6d9437b69f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -60,7 +60,6 @@
     <module>examples</module>
     <module>streaming</module>
     <module>repl</module>
-    <module>repl-bin</module>
   </modules>
 
   <properties>
@@ -618,5 +617,25 @@
         </dependencies>
       </dependencyManagement>
     </profile>
+    <profile>
+      <id>assembly</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <modules>
+        <module>assembly</module>
+      </modules>
+    </profile>
+    <profile>
+      <id>expensive-modules</id>
+      <activation>
+        <property>
+          <name>!noExpensive</name>
+        </property>
+      </activation>
+      <modules>
+        <module>repl-bin</module>
+      </modules>
+    </profile>
   </profiles>
 </project>
diff --git a/spark-shell b/spark-shell
index 31a4138124..62fc18550d 100755
--- a/spark-shell
+++ b/spark-shell
@@ -79,6 +79,7 @@ if [[ ! $? ]]; then
   saved_stty=""
 fi
 
+export SPARK_LAUNCH_WITH_SCALA=${SPARK_LAUNCH_WITH_SCALA:-1}
 $FWDIR/run $OPTIONS spark.repl.Main "$@"
 
 # record the exit status lest it be overwritten:
-- 
GitLab