From 1e9269c3eeeaa3a481b95521c703032ed84abd68 Mon Sep 17 00:00:00 2001
From: jerryshao <saisai.shao@intel.com>
Date: Thu, 13 Jun 2013 10:46:22 +0800
Subject: [PATCH] reduce ZippedPartitionsRDD's getPreferredLocations complexity

---
 .../main/scala/spark/rdd/ZippedPartitionsRDD.scala   | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala
index dd9f3c2680..b234428ab2 100644
--- a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala
+++ b/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala
@@ -53,14 +53,10 @@ abstract class ZippedPartitionsBaseRDD[V: ClassManifest](
     val exactMatchLocations = exactMatchPreferredLocations.reduce((x, y) => x.intersect(y))
 
     // Remove exact match and then do host local match.
-    val otherNodePreferredLocations = rddSplitZip.map(x => {
-      x._1.preferredLocations(x._2).map(hostPort => {
-        val host = Utils.parseHostPort(hostPort)._1
-
-        if (exactMatchLocations.contains(host)) null else host
-      }).filter(_ != null)
-    })
-    val otherNodeLocalLocations = otherNodePreferredLocations.reduce((x, y) => x.intersect(y))
+    val exactMatchHosts = exactMatchLocations.map(Utils.parseHostPort(_)._1)
+    val matchPreferredHosts = exactMatchPreferredLocations.map(locs => locs.map(Utils.parseHostPort(_)._1))
+      .reduce((x, y) => x.intersect(y))
+    val otherNodeLocalLocations = matchPreferredHosts.filter { s => !exactMatchHosts.contains(s) }
 
     otherNodeLocalLocations ++ exactMatchLocations
   }
-- 
GitLab