From d984b8ab23f0921c4dc145f3efe1f13b172292bb Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Thu, 4 Nov 2010 14:39:55 -0700
Subject: [PATCH] Properly set the number of output splits in DFS shuffle

---
 src/scala/spark/DfsShuffle.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala
index 10f77a824a..256bf4ea9c 100644
--- a/src/scala/spark/DfsShuffle.scala
+++ b/src/scala/spark/DfsShuffle.scala
@@ -61,7 +61,8 @@ extends Logging
     })
 
     // Return an RDD that does each of the merges for a given partition
-    return sc.parallelize(0 until numOutputSplits).flatMap((myIndex: Int) => {
+    val indexes = sc.parallelize(0 until numOutputSplits, numOutputSplits)
+    return indexes.flatMap((myIndex: Int) => {
       val combiners = new HashMap[K, C]
       val fs = DfsShuffle.getFileSystem()
       for (i <- Utils.shuffle(0 until numInputSplits)) {
-- 
GitLab