Fixed a rather bad bug in HDFS files that has been in for a while:

caching was not working because Split objects did not have a consistent toString value

Fixed a rather bad bug in HDFS files that has been in for a while:
34eccedb · root · b6debf5d · 34eccedb · 34eccedb
Commit 34eccedb authored 14 years ago by root
--- a/src/scala/spark/HdfsFile.scala
+++ b/src/scala/spark/HdfsFile.scala
@@ -14,6 +14,7 @@ import org.apache.hadoop.mapred.Reporter
 @serializable class HdfsSplit(@transient s: InputSplit)
 extends Split { 
  val inputSplit = new SerializableWritable[InputSplit](s)
+  override def toString = inputSplit.toString
 }

 class HdfsTextFile(sc: SparkContext, path: String)

--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -198,6 +198,7 @@ extends RDD[T](prev.sparkContext) with Logging {
  
  override def iterator(split: Split): Iterator[T] = {
    val key = id + "::" + split.toString
+    logInfo("CachedRDD split key is " + key)
    val cache = CachedRDD.cache
    val loading = CachedRDD.loading
    val cachedVal = cache.get(key)