From 4cc0984b43095aaebcb565bcb0ac0a71b6cef7ca Mon Sep 17 00:00:00 2001 From: root <root@domU-12-31-39-15-18-A2.compute-1.internal> Date: Thu, 4 Nov 2010 21:34:55 +0000 Subject: [PATCH] Fixed a small bug in DFS shuffle -- the number of reduce tasks was not being set based on numOutputSplits --- src/scala/spark/DfsShuffle.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala index 10f77a824a..256bf4ea9c 100644 --- a/src/scala/spark/DfsShuffle.scala +++ b/src/scala/spark/DfsShuffle.scala @@ -61,7 +61,8 @@ extends Logging }) // Return an RDD that does each of the merges for a given partition - return sc.parallelize(0 until numOutputSplits).flatMap((myIndex: Int) => { + val indexes = sc.parallelize(0 until numOutputSplits, numOutputSplits) + return indexes.flatMap((myIndex: Int) => { val combiners = new HashMap[K, C] val fs = DfsShuffle.getFileSystem() for (i <- Utils.shuffle(0 until numInputSplits)) { -- GitLab