From 4cc0984b43095aaebcb565bcb0ac0a71b6cef7ca Mon Sep 17 00:00:00 2001
From: root <root@domU-12-31-39-15-18-A2.compute-1.internal>
Date: Thu, 4 Nov 2010 21:34:55 +0000
Subject: [PATCH] Fixed a small bug in DFS shuffle -- the number of reduce
 tasks was not being set based on numOutputSplits

---
 src/scala/spark/DfsShuffle.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala
index 10f77a824a..256bf4ea9c 100644
--- a/src/scala/spark/DfsShuffle.scala
+++ b/src/scala/spark/DfsShuffle.scala
@@ -61,7 +61,8 @@ extends Logging
     })
 
     // Return an RDD that does each of the merges for a given partition
-    return sc.parallelize(0 until numOutputSplits).flatMap((myIndex: Int) => {
+    val indexes = sc.parallelize(0 until numOutputSplits, numOutputSplits)
+    return indexes.flatMap((myIndex: Int) => {
       val combiners = new HashMap[K, C]
       val fs = DfsShuffle.getFileSystem()
       for (i <- Utils.shuffle(0 until numInputSplits)) {
-- 
GitLab