From d984b8ab23f0921c4dc145f3efe1f13b172292bb Mon Sep 17 00:00:00 2001 From: Matei Zaharia <matei@eecs.berkeley.edu> Date: Thu, 4 Nov 2010 14:39:55 -0700 Subject: [PATCH] Properly set the number of output splits in DFS shuffle --- src/scala/spark/DfsShuffle.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala index 10f77a824a..256bf4ea9c 100644 --- a/src/scala/spark/DfsShuffle.scala +++ b/src/scala/spark/DfsShuffle.scala @@ -61,7 +61,8 @@ extends Logging }) // Return an RDD that does each of the merges for a given partition - return sc.parallelize(0 until numOutputSplits).flatMap((myIndex: Int) => { + val indexes = sc.parallelize(0 until numOutputSplits, numOutputSplits) + return indexes.flatMap((myIndex: Int) => { val combiners = new HashMap[K, C] val fs = DfsShuffle.getFileSystem() for (i <- Utils.shuffle(0 until numInputSplits)) { -- GitLab