From 414367850982c4f8fc5e63cc94caa422eb736db5 Mon Sep 17 00:00:00 2001 From: Josh Rosen <joshrosen@eecs.berkeley.edu> Date: Mon, 27 Aug 2012 00:13:19 -0700 Subject: [PATCH] Fix minor bugs in Python API examples. --- pyspark/pyspark/examples/pi.py | 2 +- pyspark/pyspark/examples/tc.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyspark/pyspark/examples/pi.py b/pyspark/pyspark/examples/pi.py index fe63d2c952..348bbc5dce 100644 --- a/pyspark/pyspark/examples/pi.py +++ b/pyspark/pyspark/examples/pi.py @@ -9,7 +9,7 @@ if __name__ == "__main__": print >> sys.stderr, \ "Usage: PythonPi <host> [<slices>]" exit(-1) - sc = SparkContext(sys.argv[1], "PythonKMeans") + sc = SparkContext(sys.argv[1], "PythonPi") slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2 n = 100000 * slices def f(_): diff --git a/pyspark/pyspark/examples/tc.py b/pyspark/pyspark/examples/tc.py index 2796fdc6ad..9630e72b47 100644 --- a/pyspark/pyspark/examples/tc.py +++ b/pyspark/pyspark/examples/tc.py @@ -22,9 +22,9 @@ if __name__ == "__main__": print >> sys.stderr, \ "Usage: PythonTC <host> [<slices>]" exit(-1) - sc = SparkContext(sys.argv[1], "PythonKMeans") + sc = SparkContext(sys.argv[1], "PythonTC") slices = sys.argv[2] if len(sys.argv) > 2 else 2 - tc = sc.parallelizePairs(generateGraph(), slices).cache() + tc = sc.parallelize(generateGraph(), slices).cache() # Linear transitive closure: each round grows paths by one edge, # by joining the graph's edges with the already-discovered paths. @@ -32,7 +32,7 @@ if __name__ == "__main__": # the graph to obtain the path (x, z). # Because join() joins on keys, the edges are stored in reversed order. - edges = tc.mapPairs(lambda (x, y): (y, x)) + edges = tc.map(lambda (x, y): (y, x)) oldCount = 0L nextCount = tc.count() @@ -40,7 +40,7 @@ if __name__ == "__main__": oldCount = nextCount # Perform the join, obtaining an RDD of (y, (z, x)) pairs, # then project the result to obtain the new (x, z) paths. - new_edges = tc.join(edges).mapPairs(lambda (_, (a, b)): (b, a)) + new_edges = tc.join(edges).map(lambda (_, (a, b)): (b, a)) tc = tc.union(new_edges).distinct().cache() nextCount = tc.count() if nextCount == oldCount: -- GitLab