From 0a38637d05d2338503ecceacfb911a6da6d49538 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 21 Dec 2015 22:15:52 -0800
Subject: [PATCH] [SPARK-11807] Remove support for Hadoop < 2.2

i.e. Hadoop 1 and Hadoop 2.0

Author: Reynold Xin <rxin@databricks.com>

Closes #10404 from rxin/SPARK-11807.
---
 .../deploy/history/FsHistoryProvider.scala     | 10 +---------
 .../mapreduce/SparkHadoopMapReduceUtil.scala   | 17 ++---------------
 dev/create-release/release-build.sh            |  3 ---
 dev/run-tests-jenkins.py                       |  4 ----
 dev/run-tests.py                               |  2 --
 docs/building-spark.md                         | 18 ++++--------------
 make-distribution.sh                           |  2 +-
 pom.xml                                        | 13 -------------
 sql/README.md                                  |  2 +-
 9 files changed, 9 insertions(+), 62 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 718efc4f3b..6e91d73b6e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -663,16 +663,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
   // For testing.
   private[history] def isFsInSafeMode(dfs: DistributedFileSystem): Boolean = {
-    val hadoop1Class = "org.apache.hadoop.hdfs.protocol.FSConstants$SafeModeAction"
     val hadoop2Class = "org.apache.hadoop.hdfs.protocol.HdfsConstants$SafeModeAction"
-    val actionClass: Class[_] =
-      try {
-        getClass().getClassLoader().loadClass(hadoop2Class)
-      } catch {
-        case _: ClassNotFoundException =>
-          getClass().getClassLoader().loadClass(hadoop1Class)
-      }
-
+    val actionClass: Class[_] = getClass().getClassLoader().loadClass(hadoop2Class)
     val action = actionClass.getField("SAFEMODE_GET").get(null)
     val method = dfs.getClass().getMethod("setSafeMode", action.getClass())
     method.invoke(dfs, action).asInstanceOf[Boolean]
diff --git a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
index 943ebcb7bd..82d807fad8 100644
--- a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
+++ b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
@@ -26,17 +26,13 @@ import org.apache.spark.util.Utils
 private[spark]
 trait SparkHadoopMapReduceUtil {
   def newJobContext(conf: Configuration, jobId: JobID): JobContext = {
-    val klass = firstAvailableClass(
-        "org.apache.hadoop.mapreduce.task.JobContextImpl",  // hadoop2, hadoop2-yarn
-        "org.apache.hadoop.mapreduce.JobContext")           // hadoop1
+    val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.JobContextImpl")
     val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID])
     ctor.newInstance(conf, jobId).asInstanceOf[JobContext]
   }
 
   def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = {
-    val klass = firstAvailableClass(
-        "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl",  // hadoop2, hadoop2-yarn
-        "org.apache.hadoop.mapreduce.TaskAttemptContext")           // hadoop1
+    val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl")
     val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID])
     ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]
   }
@@ -69,13 +65,4 @@ trait SparkHadoopMapReduceUtil {
       }
     }
   }
-
-  private def firstAvailableClass(first: String, second: String): Class[_] = {
-    try {
-      Utils.classForName(first)
-    } catch {
-      case e: ClassNotFoundException =>
-        Utils.classForName(second)
-    }
-  }
 }
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index cb79e9eba0..b1895b16b1 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -166,9 +166,6 @@ if [[ "$1" == "package" ]]; then
 
   # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds
   # share the same Zinc server.
-  make_binary_release "hadoop1" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver" "3030" &
-  make_binary_release "hadoop1-scala2.11" "-Psparkr -Phadoop-1 -Phive -Dscala-2.11" "3031" &
-  make_binary_release "cdh4" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
   make_binary_release "hadoop2.3" "-Psparkr -Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" &
   make_binary_release "hadoop2.4" "-Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" &
   make_binary_release "hadoop2.6" "-Psparkr -Phadoop-2.6 -Phive -Phive-thriftserver -Pyarn" "3034" &
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 7aecea25b2..42afca0e52 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -163,10 +163,6 @@ def main():
     if "test-maven" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_TOOL"] = "maven"
     # Switch the Hadoop profile based on the PR title:
-    if "test-hadoop1.0" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop1.0"
-    if "test-hadoop2.0" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.0"
     if "test-hadoop2.2" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.2"
     if "test-hadoop2.3" in ghprb_pull_title:
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 2d4e04c468..17ceba052b 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -301,8 +301,6 @@ def get_hadoop_profiles(hadoop_version):
     """
 
     sbt_maven_hadoop_profiles = {
-        "hadoop1.0": ["-Phadoop-1", "-Dhadoop.version=1.2.1"],
-        "hadoop2.0": ["-Phadoop-1", "-Dhadoop.version=2.0.0-mr1-cdh4.1.1"],
         "hadoop2.2": ["-Pyarn", "-Phadoop-2.2"],
         "hadoop2.3": ["-Pyarn", "-Phadoop-2.3", "-Dhadoop.version=2.3.0"],
         "hadoop2.6": ["-Pyarn", "-Phadoop-2.6"],
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 3d38edbdad..785988902d 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -33,13 +33,13 @@ to the `sharedSettings` val. See also [this PR](https://github.com/apache/spark/
 
 # Building a Runnable Distribution
 
-To create a Spark distribution like those distributed by the 
-[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as 
-to be runnable, use `make-distribution.sh` in the project root directory. It can be configured 
+To create a Spark distribution like those distributed by the
+[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as
+to be runnable, use `make-distribution.sh` in the project root directory. It can be configured
 with Maven profile settings and so on like the direct Maven build. Example:
 
     ./make-distribution.sh --name custom-spark --tgz -Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn
-    
+
 For more information on usage, run `./make-distribution.sh --help`
 
 # Setting up Maven's Memory Usage
@@ -74,7 +74,6 @@ Because HDFS is not protocol-compatible across versions, if you want to read fro
     <tr><th>Hadoop version</th><th>Profile required</th></tr>
   </thead>
   <tbody>
-    <tr><td>1.x to 2.1.x</td><td>hadoop-1</td></tr>
     <tr><td>2.2.x</td><td>hadoop-2.2</td></tr>
     <tr><td>2.3.x</td><td>hadoop-2.3</td></tr>
     <tr><td>2.4.x</td><td>hadoop-2.4</td></tr>
@@ -82,15 +81,6 @@ Because HDFS is not protocol-compatible across versions, if you want to read fro
   </tbody>
 </table>
 
-For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hadoop versions without YARN, use:
-
-{% highlight bash %}
-# Apache Hadoop 1.2.1
-mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package
-
-# Cloudera CDH 4.2.0 with MapReduce v1
-mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package
-{% endhighlight %}
 
 You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different from `hadoop.version`. Spark only supports YARN versions 2.2.0 and later.
 
diff --git a/make-distribution.sh b/make-distribution.sh
index e64ceb8024..351b9e7d89 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -58,7 +58,7 @@ while (( "$#" )); do
     --hadoop)
       echo "Error: '--hadoop' is no longer supported:"
       echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead."
-      echo "Error: Related profiles include hadoop-1, hadoop-2.2, hadoop-2.3 and hadoop-2.4."
+      echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4."
       exit_with_usage
       ;;
     --with-yarn)
diff --git a/pom.xml b/pom.xml
index 32918d6a74..284c219519 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2442,19 +2442,6 @@
     http://hadoop.apache.org/docs/ra.b.c/hadoop-project-dist/hadoop-common/dependency-analysis.html
     -->
 
-    <profile>
-      <id>hadoop-1</id>
-      <properties>
-        <hadoop.version>1.2.1</hadoop.version>
-        <protobuf.version>2.4.1</protobuf.version>
-        <hbase.version>0.98.7-hadoop1</hbase.version>
-        <avro.mapred.classifier>hadoop1</avro.mapred.classifier>
-        <codehaus.jackson.version>1.8.8</codehaus.jackson.version>
-        <akka.group>org.spark-project.akka</akka.group>
-        <akka.version>2.3.4-spark</akka.version>
-      </properties>
-    </profile>
-
     <profile>
       <id>hadoop-2.2</id>
     <!-- SPARK-7249: Default hadoop profile. Uses global properties. -->
diff --git a/sql/README.md b/sql/README.md
index 63d4dac982..a13bdab6d4 100644
--- a/sql/README.md
+++ b/sql/README.md
@@ -20,7 +20,7 @@ If you are working with Hive 0.12.0, you will need to set several environmental
 ```
 export HIVE_HOME="<path to>/hive/build/dist"
 export HIVE_DEV_HOME="<path to>/hive/"
-export HADOOP_HOME="<path to>/hadoop-1.0.4"
+export HADOOP_HOME="<path to>/hadoop"
 ```
 
 If you are working with Hive 0.13.1, the following steps are needed:
-- 
GitLab