From 600e99728dc983e520e0a1b82b3f46e50f1dfac5 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 21 Jul 2012 21:58:28 -0700
Subject: [PATCH] Fix a bug where an input path was added to a Hadoop job
 configuration twice

---
 core/src/main/scala/spark/SparkContext.scala | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 3d3fda1e47..55d746e525 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -182,15 +182,12 @@ class SparkContext(
   /** Get an RDD for a Hadoop file with an arbitrary new API InputFormat. */
   def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]](path: String)
       (implicit km: ClassManifest[K], vm: ClassManifest[V], fm: ClassManifest[F]): RDD[(K, V)] = {
-    val job = new NewHadoopJob
-    NewFileInputFormat.addInputPath(job, new Path(path))
-    val conf = job.getConfiguration
     newAPIHadoopFile(
         path,
         fm.erasure.asInstanceOf[Class[F]],
         km.erasure.asInstanceOf[Class[K]],
         vm.erasure.asInstanceOf[Class[V]],
-        conf)
+        new Configuration)
   }
 
   /** 
-- 
GitLab