Skip to content
Snippets Groups Projects
Commit 7b877b27 authored by Neville Li's avatar Neville Li Committed by Patrick Wendell
Browse files

SPARK-2056 Set RDD name to input path

Author: Neville Li <neville@spotify.com>

Closes #992 from nevillelyh/master and squashes the following commits:

3011739 [Neville Li] [SPARK-2056] Set RDD name to input path
parent 3ace10dc
No related branches found
No related tags found
No related merge requests found
......@@ -455,7 +455,7 @@ class SparkContext(config: SparkConf) extends Logging {
*/
def textFile(path: String, minPartitions: Int = defaultMinPartitions): RDD[String] = {
hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
minPartitions).map(pair => pair._2.toString)
minPartitions).map(pair => pair._2.toString).setName(path)
}
/**
......@@ -496,7 +496,7 @@ class SparkContext(config: SparkConf) extends Logging {
classOf[String],
classOf[String],
updateConf,
minPartitions)
minPartitions).setName(path)
}
/**
......@@ -551,7 +551,7 @@ class SparkContext(config: SparkConf) extends Logging {
inputFormatClass,
keyClass,
valueClass,
minPartitions)
minPartitions).setName(path)
}
/**
......@@ -623,7 +623,7 @@ class SparkContext(config: SparkConf) extends Logging {
val job = new NewHadoopJob(conf)
NewFileInputFormat.addInputPath(job, new Path(path))
val updatedConf = job.getConfiguration
new NewHadoopRDD(this, fClass, kClass, vClass, updatedConf)
new NewHadoopRDD(this, fClass, kClass, vClass, updatedConf).setName(path)
}
/**
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment