diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index 3dbb35f7054a28a9b302418766bfcc46b876d9ba..af4f00054997ce621e649c494cc775868f583194 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -118,14 +118,14 @@ if [[ ! "$@" =~ --skip-publish ]]; then rm -rf $SPARK_REPO - build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \ - -Pyarn -Phive -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ + build/mvn -DskipTests -Pyarn -Phive \ + -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install ./dev/change-version-to-2.11.sh - build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \ - -Dscala-2.11 -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ + build/mvn -DskipTests -Pyarn -Phive \ + -Dscala-2.11 -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install ./dev/change-version-to-2.10.sh @@ -228,9 +228,9 @@ if [[ ! "$@" =~ --skip-package ]]; then # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds # share the same Zinc server. - make_binary_release "hadoop1" "-Phive -Phive-thriftserver -Dhadoop.version=1.0.4" "3030" & - make_binary_release "hadoop1-scala2.11" "-Phive -Dscala-2.11" "3031" & - make_binary_release "cdh4" "-Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" & + make_binary_release "hadoop1" "-Phadoop-1 -Phive -Phive-thriftserver" "3030" & + make_binary_release "hadoop1-scala2.11" "-Phadoop-1 -Phive -Dscala-2.11" "3031" & + make_binary_release "cdh4" "-Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" & make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" & make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" & make_binary_release "mapr3" "-Pmapr3 -Phive -Phive-thriftserver" "3035" & diff --git a/dev/run-tests b/dev/run-tests index ef587a1a5988c549a1909a495fb388d1b9418171..44d802782c4a4d432f8c0c077cd42b4c1302ea16 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -40,11 +40,11 @@ function handle_error () { { if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then - export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4" + export SBT_MAVEN_PROFILES_ARGS="-Phadoop-1 -Dhadoop.version=1.0.4" elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then - export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1" + export SBT_MAVEN_PROFILES_ARGS="-Phadoop-1 -Dhadoop.version=2.0.0-mr1-cdh4.1.1" elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then - export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0" + export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2" elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0" fi diff --git a/dev/scalastyle b/dev/scalastyle index 4e03f89ed5d5d9dfb689f5419f8b3d4764956607..7f014c82f14c63d942344efb840aeff39e93d448 100755 --- a/dev/scalastyle +++ b/dev/scalastyle @@ -20,8 +20,8 @@ echo -e "q\n" | build/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt echo -e "q\n" | build/sbt -Phive -Phive-thriftserver test:scalastyle >> scalastyle.txt # Check style with YARN built too -echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 scalastyle >> scalastyle.txt -echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 test:scalastyle >> scalastyle.txt +echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 scalastyle >> scalastyle.txt +echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 test:scalastyle >> scalastyle.txt ERRORS=$(cat scalastyle.txt | awk '{if($1~/error/)print}') rm scalastyle.txt diff --git a/docs/building-spark.md b/docs/building-spark.md index 287fcd3c4034f6b409e27e39f0ae4486b9fd1518..6e310ff42478480115d6d5517fa6a2dfa80006b6 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -59,14 +59,14 @@ You can fix this by setting the `MAVEN_OPTS` variable as discussed before. # Specifying the Hadoop Version -Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default. Note that certain build profiles are required for particular Hadoop versions: +Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 2.2.0 by default. Note that certain build profiles are required for particular Hadoop versions: <table class="table"> <thead> <tr><th>Hadoop version</th><th>Profile required</th></tr> </thead> <tbody> - <tr><td>1.x to 2.1.x</td><td>(none)</td></tr> + <tr><td>1.x to 2.1.x</td><td>hadoop-1</td></tr> <tr><td>2.2.x</td><td>hadoop-2.2</td></tr> <tr><td>2.3.x</td><td>hadoop-2.3</td></tr> <tr><td>2.4.x</td><td>hadoop-2.4</td></tr> @@ -77,10 +77,10 @@ For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hado {% highlight bash %} # Apache Hadoop 1.2.1 -mvn -Dhadoop.version=1.2.1 -DskipTests clean package +mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package # Cloudera CDH 4.2.0 with MapReduce v1 -mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package +mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package {% endhighlight %} You can enable the "yarn" profile and optionally set the "yarn.version" property if it is different from "hadoop.version". Spark only supports YARN versions 2.2.0 and later. @@ -88,8 +88,9 @@ You can enable the "yarn" profile and optionally set the "yarn.version" property Examples: {% highlight bash %} + # Apache Hadoop 2.2.X -mvn -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package +mvn -Pyarn -Phadoop-2.2 -DskipTests clean package # Apache Hadoop 2.3.X mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md index 96bd69ca3b33be5541d688a9e05efe550b15c009..795dd82a6be06b8ec58e5286595af4c41f188375 100644 --- a/docs/hadoop-third-party-distributions.md +++ b/docs/hadoop-third-party-distributions.md @@ -14,7 +14,7 @@ property. For certain versions, you will need to specify additional profiles. Fo see the guide on [building with maven](building-spark.html#specifying-the-hadoop-version): mvn -Dhadoop.version=1.0.4 -DskipTests clean package - mvn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package + mvn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package The table below lists the corresponding `hadoop.version` code for each CDH/HDP release. Note that some Hadoop releases are binary compatible across client versions. This means the pre-built Spark diff --git a/make-distribution.sh b/make-distribution.sh index 1bfa9acb1fe6eadedccba33730322d970b5e384a..8d6e91d67593fd0da1e35139986fa735f25c307d 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -58,7 +58,7 @@ while (( "$#" )); do --hadoop) echo "Error: '--hadoop' is no longer supported:" echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead." - echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4." + echo "Error: Related profiles include hadoop-1, hadoop-2.2, hadoop-2.3 and hadoop-2.4." exit_with_usage ;; --with-yarn) diff --git a/pom.xml b/pom.xml index 564a443466e5a1aea7bbdf260ed02b5db17a62c9..91d1d843c762ac2fb0d1cf682f777c95e3e96949 100644 --- a/pom.xml +++ b/pom.xml @@ -122,9 +122,9 @@ <slf4j.version>1.7.10</slf4j.version> <log4j.version>1.2.17</log4j.version> <hadoop.version>2.2.0</hadoop.version> - <protobuf.version>2.4.1</protobuf.version> + <protobuf.version>2.5.0</protobuf.version> <yarn.version>${hadoop.version}</yarn.version> - <hbase.version>0.98.7-hadoop1</hbase.version> + <hbase.version>0.98.7-hadoop2</hbase.version> <hbase.artifact>hbase</hbase.artifact> <flume.version>1.4.0</flume.version> <zookeeper.version>3.4.5</zookeeper.version> @@ -143,7 +143,7 @@ <oro.version>2.0.8</oro.version> <codahale.metrics.version>3.1.0</codahale.metrics.version> <avro.version>1.7.7</avro.version> - <avro.mapred.classifier></avro.mapred.classifier> + <avro.mapred.classifier>hadoop2</avro.mapred.classifier> <jets3t.version>0.7.1</jets3t.version> <aws.java.sdk.version>1.8.3</aws.java.sdk.version> <aws.kinesis.client.version>1.1.0</aws.kinesis.client.version> @@ -155,7 +155,7 @@ <jline.version>${scala.version}</jline.version> <jline.groupid>org.scala-lang</jline.groupid> <jodd.version>3.6.3</jodd.version> - <codehaus.jackson.version>1.8.8</codehaus.jackson.version> + <codehaus.jackson.version>1.9.13</codehaus.jackson.version> <fasterxml.jackson.version>2.4.4</fasterxml.jackson.version> <snappy.version>1.1.1.7</snappy.version> <netlib.java.version>1.1.2</netlib.java.version> @@ -1644,26 +1644,27 @@ --> <profile> - <id>hadoop-2.2</id> + <id>hadoop-1</id> <properties> - <hadoop.version>2.2.0</hadoop.version> - <protobuf.version>2.5.0</protobuf.version> - <hbase.version>0.98.7-hadoop2</hbase.version> - <avro.mapred.classifier>hadoop2</avro.mapred.classifier> - <codehaus.jackson.version>1.9.13</codehaus.jackson.version> + <hadoop.version>1.0.4</hadoop.version> + <protobuf.version>2.4.1</protobuf.version> + <hbase.version>0.98.7-hadoop1</hbase.version> + <avro.mapred.classifier>hadoop1</avro.mapred.classifier> + <codehaus.jackson.version>1.8.8</codehaus.jackson.version> </properties> </profile> + <profile> + <id>hadoop-2.2</id> + <!-- SPARK-7249: Default hadoop profile. Uses global properties. --> + </profile> + <profile> <id>hadoop-2.3</id> <properties> <hadoop.version>2.3.0</hadoop.version> - <protobuf.version>2.5.0</protobuf.version> <jets3t.version>0.9.3</jets3t.version> - <hbase.version>0.98.7-hadoop2</hbase.version> <commons.math3.version>3.1.1</commons.math3.version> - <avro.mapred.classifier>hadoop2</avro.mapred.classifier> - <codehaus.jackson.version>1.9.13</codehaus.jackson.version> </properties> </profile> @@ -1671,12 +1672,8 @@ <id>hadoop-2.4</id> <properties> <hadoop.version>2.4.0</hadoop.version> - <protobuf.version>2.5.0</protobuf.version> <jets3t.version>0.9.3</jets3t.version> - <hbase.version>0.98.7-hadoop2</hbase.version> <commons.math3.version>3.1.1</commons.math3.version> - <avro.mapred.classifier>hadoop2</avro.mapred.classifier> - <codehaus.jackson.version>1.9.13</codehaus.jackson.version> </properties> </profile> diff --git a/yarn/pom.xml b/yarn/pom.xml index 7c8c3613e7a05645ef921cadad870121b947a2aa..00d219f83670815904d2ac2219065ae9a46234df 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -30,6 +30,7 @@ <name>Spark Project YARN</name> <properties> <sbt.project.name>yarn</sbt.project.name> + <jersey.version>1.9</jersey.version> </properties> <dependencies> @@ -85,7 +86,12 @@ <artifactId>jetty-servlet</artifactId> </dependency> <!-- End of shaded deps. --> - + + <!-- + See SPARK-3710. hadoop-yarn-server-tests in Hadoop 2.2 fails to pull some needed + dependencies, so they need to be added manually for the tests to work. + --> + <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-server-tests</artifactId> @@ -97,59 +103,44 @@ <artifactId>mockito-all</artifactId> <scope>test</scope> </dependency> + <dependency> + <groupId>org.mortbay.jetty</groupId> + <artifactId>jetty</artifactId> + <version>6.1.26</version> + <exclusions> + <exclusion> + <groupId>org.mortbay.jetty</groupId> + <artifactId>servlet-api</artifactId> + </exclusion> + </exclusions> + <scope>test</scope> + </dependency> + <dependency> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-core</artifactId> + <version>${jersey.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-json</artifactId> + <version>${jersey.version}</version> + <scope>test</scope> + <exclusions> + <exclusion> + <groupId>stax</groupId> + <artifactId>stax-api</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-server</artifactId> + <version>${jersey.version}</version> + <scope>test</scope> + </dependency> </dependencies> - - <!-- - See SPARK-3710. hadoop-yarn-server-tests in Hadoop 2.2 fails to pull some needed - dependencies, so they need to be added manually for the tests to work. - --> - <profiles> - <profile> - <id>hadoop-2.2</id> - <properties> - <jersey.version>1.9</jersey.version> - </properties> - <dependencies> - <dependency> - <groupId>org.mortbay.jetty</groupId> - <artifactId>jetty</artifactId> - <version>6.1.26</version> - <exclusions> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>servlet-api</artifactId> - </exclusion> - </exclusions> - <scope>test</scope> - </dependency> - <dependency> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-core</artifactId> - <version>${jersey.version}</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-json</artifactId> - <version>${jersey.version}</version> - <scope>test</scope> - <exclusions> - <exclusion> - <groupId>stax</groupId> - <artifactId>stax-api</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-server</artifactId> - <version>${jersey.version}</version> - <scope>test</scope> - </dependency> - </dependencies> - </profile> - </profiles> - + <build> <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory> <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>