Skip to content
Snippets Groups Projects
Commit 34eccedb authored by root's avatar root
Browse files

Fixed a rather bad bug in HDFS files that has been in for a while:

caching was not working because Split objects did not have a
consistent toString value
parent b6debf5d
No related branches found
No related tags found
No related merge requests found
......@@ -14,6 +14,7 @@ import org.apache.hadoop.mapred.Reporter
@serializable class HdfsSplit(@transient s: InputSplit)
extends Split {
val inputSplit = new SerializableWritable[InputSplit](s)
override def toString = inputSplit.toString
}
class HdfsTextFile(sc: SparkContext, path: String)
......
......@@ -198,6 +198,7 @@ extends RDD[T](prev.sparkContext) with Logging {
override def iterator(split: Split): Iterator[T] = {
val key = id + "::" + split.toString
logInfo("CachedRDD split key is " + key)
val cache = CachedRDD.cache
val loading = CachedRDD.loading
val cachedVal = cache.get(key)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment