diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index 718efc4f3bd5e329c53132ee4af040896893a959..6e91d73b6e0fdc2e4ff7b7ccab1d8b15edd80dcf 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -663,16 +663,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) // For testing. private[history] def isFsInSafeMode(dfs: DistributedFileSystem): Boolean = { - val hadoop1Class = "org.apache.hadoop.hdfs.protocol.FSConstants$SafeModeAction" val hadoop2Class = "org.apache.hadoop.hdfs.protocol.HdfsConstants$SafeModeAction" - val actionClass: Class[_] = - try { - getClass().getClassLoader().loadClass(hadoop2Class) - } catch { - case _: ClassNotFoundException => - getClass().getClassLoader().loadClass(hadoop1Class) - } - + val actionClass: Class[_] = getClass().getClassLoader().loadClass(hadoop2Class) val action = actionClass.getField("SAFEMODE_GET").get(null) val method = dfs.getClass().getMethod("setSafeMode", action.getClass()) method.invoke(dfs, action).asInstanceOf[Boolean] diff --git a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala index 943ebcb7bd0a10e80a184803ff51902f8053d6c0..82d807fad89387d2a0b0b6ae568a8637b36fd0b7 100644 --- a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala +++ b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala @@ -26,17 +26,13 @@ import org.apache.spark.util.Utils private[spark] trait SparkHadoopMapReduceUtil { def newJobContext(conf: Configuration, jobId: JobID): JobContext = { - val klass = firstAvailableClass( - "org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn - "org.apache.hadoop.mapreduce.JobContext") // hadoop1 + val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.JobContextImpl") val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID]) ctor.newInstance(conf, jobId).asInstanceOf[JobContext] } def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = { - val klass = firstAvailableClass( - "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", // hadoop2, hadoop2-yarn - "org.apache.hadoop.mapreduce.TaskAttemptContext") // hadoop1 + val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl") val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID]) ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] } @@ -69,13 +65,4 @@ trait SparkHadoopMapReduceUtil { } } } - - private def firstAvailableClass(first: String, second: String): Class[_] = { - try { - Utils.classForName(first) - } catch { - case e: ClassNotFoundException => - Utils.classForName(second) - } - } } diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index cb79e9eba06e2351c3ce1ffff38959950d0a70cb..b1895b16b1b612a3d2c186e6a8ffe45c043eeb86 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -166,9 +166,6 @@ if [[ "$1" == "package" ]]; then # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds # share the same Zinc server. - make_binary_release "hadoop1" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver" "3030" & - make_binary_release "hadoop1-scala2.11" "-Psparkr -Phadoop-1 -Phive -Dscala-2.11" "3031" & - make_binary_release "cdh4" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" & make_binary_release "hadoop2.3" "-Psparkr -Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" & make_binary_release "hadoop2.4" "-Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" & make_binary_release "hadoop2.6" "-Psparkr -Phadoop-2.6 -Phive -Phive-thriftserver -Pyarn" "3034" & diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 7aecea25b20999815bbee3943b534b65ca94eb4b..42afca0e524482ad1ec193dce20385134dd0bd29 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -163,10 +163,6 @@ def main(): if "test-maven" in ghprb_pull_title: os.environ["AMPLAB_JENKINS_BUILD_TOOL"] = "maven" # Switch the Hadoop profile based on the PR title: - if "test-hadoop1.0" in ghprb_pull_title: - os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop1.0" - if "test-hadoop2.0" in ghprb_pull_title: - os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.0" if "test-hadoop2.2" in ghprb_pull_title: os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.2" if "test-hadoop2.3" in ghprb_pull_title: diff --git a/dev/run-tests.py b/dev/run-tests.py index 2d4e04c4684de12fcec51773d9fa3cebfcb4e35d..17ceba052b8cd06edf91362a9d4aaec836eef90c 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -301,8 +301,6 @@ def get_hadoop_profiles(hadoop_version): """ sbt_maven_hadoop_profiles = { - "hadoop1.0": ["-Phadoop-1", "-Dhadoop.version=1.2.1"], - "hadoop2.0": ["-Phadoop-1", "-Dhadoop.version=2.0.0-mr1-cdh4.1.1"], "hadoop2.2": ["-Pyarn", "-Phadoop-2.2"], "hadoop2.3": ["-Pyarn", "-Phadoop-2.3", "-Dhadoop.version=2.3.0"], "hadoop2.6": ["-Pyarn", "-Phadoop-2.6"], diff --git a/docs/building-spark.md b/docs/building-spark.md index 3d38edbdad4bcd8bd107305192a80164f4b7cbb6..785988902da8e04c94cab0727ba7c076e0245e10 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -33,13 +33,13 @@ to the `sharedSettings` val. See also [this PR](https://github.com/apache/spark/ # Building a Runnable Distribution -To create a Spark distribution like those distributed by the -[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as -to be runnable, use `make-distribution.sh` in the project root directory. It can be configured +To create a Spark distribution like those distributed by the +[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as +to be runnable, use `make-distribution.sh` in the project root directory. It can be configured with Maven profile settings and so on like the direct Maven build. Example: ./make-distribution.sh --name custom-spark --tgz -Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn - + For more information on usage, run `./make-distribution.sh --help` # Setting up Maven's Memory Usage @@ -74,7 +74,6 @@ Because HDFS is not protocol-compatible across versions, if you want to read fro <tr><th>Hadoop version</th><th>Profile required</th></tr> </thead> <tbody> - <tr><td>1.x to 2.1.x</td><td>hadoop-1</td></tr> <tr><td>2.2.x</td><td>hadoop-2.2</td></tr> <tr><td>2.3.x</td><td>hadoop-2.3</td></tr> <tr><td>2.4.x</td><td>hadoop-2.4</td></tr> @@ -82,15 +81,6 @@ Because HDFS is not protocol-compatible across versions, if you want to read fro </tbody> </table> -For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hadoop versions without YARN, use: - -{% highlight bash %} -# Apache Hadoop 1.2.1 -mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package - -# Cloudera CDH 4.2.0 with MapReduce v1 -mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package -{% endhighlight %} You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different from `hadoop.version`. Spark only supports YARN versions 2.2.0 and later. diff --git a/make-distribution.sh b/make-distribution.sh index e64ceb802464c8a5bafffa07b6491a8a8de3fa7a..351b9e7d89a3210f086c94a0ada983de91e37cdf 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -58,7 +58,7 @@ while (( "$#" )); do --hadoop) echo "Error: '--hadoop' is no longer supported:" echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead." - echo "Error: Related profiles include hadoop-1, hadoop-2.2, hadoop-2.3 and hadoop-2.4." + echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4." exit_with_usage ;; --with-yarn) diff --git a/pom.xml b/pom.xml index 32918d6a74af9a999d8445377a6ac6ecfb1bc7af..284c219519bca6300eee53ef44dc8114de994208 100644 --- a/pom.xml +++ b/pom.xml @@ -2442,19 +2442,6 @@ http://hadoop.apache.org/docs/ra.b.c/hadoop-project-dist/hadoop-common/dependency-analysis.html --> - <profile> - <id>hadoop-1</id> - <properties> - <hadoop.version>1.2.1</hadoop.version> - <protobuf.version>2.4.1</protobuf.version> - <hbase.version>0.98.7-hadoop1</hbase.version> - <avro.mapred.classifier>hadoop1</avro.mapred.classifier> - <codehaus.jackson.version>1.8.8</codehaus.jackson.version> - <akka.group>org.spark-project.akka</akka.group> - <akka.version>2.3.4-spark</akka.version> - </properties> - </profile> - <profile> <id>hadoop-2.2</id> <!-- SPARK-7249: Default hadoop profile. Uses global properties. --> diff --git a/sql/README.md b/sql/README.md index 63d4dac9829e0e7246d15285dfaa73ae9d7ef512..a13bdab6d457f8fc20eaf14aff9e201feb48c7a6 100644 --- a/sql/README.md +++ b/sql/README.md @@ -20,7 +20,7 @@ If you are working with Hive 0.12.0, you will need to set several environmental ``` export HIVE_HOME="<path to>/hive/build/dist" export HIVE_DEV_HOME="<path to>/hive/" -export HADOOP_HOME="<path to>/hadoop-1.0.4" +export HADOOP_HOME="<path to>/hadoop" ``` If you are working with Hive 0.13.1, the following steps are needed: