From f4d514810e6fd9f42868ebb9a89390c62c3b42e1 Mon Sep 17 00:00:00 2001 From: Konstantin Boudnik <cos@wandisco.com> Date: Tue, 2 Jul 2013 17:03:24 -0700 Subject: [PATCH] Building spark assembly for further consumption of the Spark project with a deployed cluster --- assembly/README | 13 +++ assembly/pom.xml | 104 ++++++++++++++++++++++++ assembly/src/main/assembly/assembly.xml | 68 ++++++++++++++++ pom.xml | 21 ++++- spark-shell | 1 + 5 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 assembly/README create mode 100644 assembly/pom.xml create mode 100644 assembly/src/main/assembly/assembly.xml diff --git a/assembly/README b/assembly/README new file mode 100644 index 0000000000..6ee2a536d7 --- /dev/null +++ b/assembly/README @@ -0,0 +1,13 @@ +This is an assembly module for Spark project. + +It creates a single tar.gz file that includes all needed dependency of the project +except for org.apache.hadoop.* jars that are supposed to be available from the +deployed Hadoop cluster. + +This module is off by default to avoid spending extra time on top of repl-bin +module. To activate it specify the profile in the command line + -Passembly + +In case you want to avoid building time-expensive repl-bin module, that shaders +all the dependency into a big flat jar supplement maven command with + -DnoExpensive diff --git a/assembly/pom.xml b/assembly/pom.xml new file mode 100644 index 0000000000..1382539f24 --- /dev/null +++ b/assembly/pom.xml @@ -0,0 +1,104 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.spark-project</groupId> + <artifactId>spark-parent</artifactId> + <version>0.8.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <groupId>org.spark-project</groupId> + <artifactId>spark-assembly</artifactId> + <name>Spark Project Assembly</name> + <url>http://spark-project.org/</url> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-assembly-plugin</artifactId> + <version>2.4</version> + <executions> + <execution> + <id>dist</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + <configuration> + <descriptors> + <descriptor>src/main/assembly/assembly.xml</descriptor> + </descriptors> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + + <profiles> + <profile> + <id>hadoop1</id> + <properties> + <classifier.name>hadoop1</classifier.name> + </properties> + </profile> + <profile> + <id>hadoop2</id> + <properties> + <classifier.name>hadoop2</classifier.name> + </properties> + </profile> + <profile> + <id>hadoop2-yarn</id> + <properties> + <classifier.name>hadoop2-yarn</classifier.name> + </properties> + </profile> + </profiles> + <dependencies> + <dependency> + <groupId>org.spark-project</groupId> + <artifactId>spark-core</artifactId> + <classifier>${classifier.name}</classifier> + <version>0.8.0-SNAPSHOT</version> + </dependency> + <dependency> + <groupId>org.spark-project</groupId> + <artifactId>spark-bagel</artifactId> + <classifier>${classifier.name}</classifier> + <version>0.8.0-SNAPSHOT</version> + </dependency> + <dependency> + <groupId>org.spark-project</groupId> + <artifactId>spark-examples</artifactId> + <classifier>${classifier.name}</classifier> + <version>0.8.0-SNAPSHOT</version> + </dependency> + <dependency> + <groupId>org.spark-project</groupId> + <artifactId>spark-examples</artifactId> + <classifier>javadoc</classifier> + <version>0.8.0-SNAPSHOT</version> + </dependency> + <dependency> + <groupId>org.spark-project</groupId> + <artifactId>spark-examples</artifactId> + <classifier>sources</classifier> + <version>0.8.0-SNAPSHOT</version> + </dependency> + <dependency> + <groupId>org.spark-project</groupId> + <artifactId>spark-repl</artifactId> + <classifier>${classifier.name}</classifier> + <version>0.8.0-SNAPSHOT</version> + </dependency> + <dependency> + <groupId>org.spark-project</groupId> + <artifactId>spark-streaming</artifactId> + <classifier>${classifier.name}</classifier> + <version>0.8.0-SNAPSHOT</version> + </dependency> + </dependencies> +</project> \ No newline at end of file diff --git a/assembly/src/main/assembly/assembly.xml b/assembly/src/main/assembly/assembly.xml new file mode 100644 index 0000000000..dd05f35f1f --- /dev/null +++ b/assembly/src/main/assembly/assembly.xml @@ -0,0 +1,68 @@ +<assembly> + <id>dist</id> + <formats> + <format>tar.gz</format> + <format>dir</format> + </formats> + <includeBaseDirectory>false</includeBaseDirectory> + + <fileSets> + <fileSet> + <includes> + <include>README</include> + </includes> + </fileSet> + <fileSet> + <directory> + ${project.parent.basedir}/core/src/main/resources/spark/ui/static/ + </directory> + <outputDirectory>/ui-resources/spark/ui/static</outputDirectory> + <includes> + <include>**/*</include> + </includes> + </fileSet> + <fileSet> + <directory> + ${project.parent.basedir}/bin/ + </directory> + <outputDirectory>/bin</outputDirectory> + <includes> + <include>**/*</include> + </includes> + </fileSet> + <fileSet> + <directory> + ${project.parent.basedir} + </directory> + <outputDirectory>/bin</outputDirectory> + <includes> + <include>run*</include> + <include>spark-shell*</include> + <include>spark-executor*</include> + </includes> + </fileSet> + </fileSets> + + <dependencySets> + <dependencySet> + <includes> + <include>org.spark-project:*:jar</include> + </includes> + <excludes> + <exclude>org.spark-project:spark-dist:jar</exclude> + </excludes> + </dependencySet> + <dependencySet> + <outputDirectory>lib</outputDirectory> + <useTransitiveDependencies>true</useTransitiveDependencies> + <unpack>false</unpack> + <scope>runtime</scope> + <useProjectArtifact>false</useProjectArtifact> + <excludes> + <exclude>org.apache.hadoop:*:jar</exclude> + <exclude>org.spark-project:*:jar</exclude> + </excludes> + </dependencySet> + </dependencySets> + +</assembly> diff --git a/pom.xml b/pom.xml index 4b48072c6e..6d9437b69f 100644 --- a/pom.xml +++ b/pom.xml @@ -60,7 +60,6 @@ <module>examples</module> <module>streaming</module> <module>repl</module> - <module>repl-bin</module> </modules> <properties> @@ -618,5 +617,25 @@ </dependencies> </dependencyManagement> </profile> + <profile> + <id>assembly</id> + <activation> + <activeByDefault>false</activeByDefault> + </activation> + <modules> + <module>assembly</module> + </modules> + </profile> + <profile> + <id>expensive-modules</id> + <activation> + <property> + <name>!noExpensive</name> + </property> + </activation> + <modules> + <module>repl-bin</module> + </modules> + </profile> </profiles> </project> diff --git a/spark-shell b/spark-shell index 31a4138124..62fc18550d 100755 --- a/spark-shell +++ b/spark-shell @@ -79,6 +79,7 @@ if [[ ! $? ]]; then saved_stty="" fi +export SPARK_LAUNCH_WITH_SCALA=${SPARK_LAUNCH_WITH_SCALA:-1} $FWDIR/run $OPTIONS spark.repl.Main "$@" # record the exit status lest it be overwritten: -- GitLab