diff --git a/pyspark/pyspark/examples/pi.py b/pyspark/pyspark/examples/pi.py index fe63d2c95248b024a6202e2dcf93eacfd267ad31..348bbc5dce19220d1b2a42b094bf565556b9078f 100644 --- a/pyspark/pyspark/examples/pi.py +++ b/pyspark/pyspark/examples/pi.py @@ -9,7 +9,7 @@ if __name__ == "__main__": print >> sys.stderr, \ "Usage: PythonPi <host> [<slices>]" exit(-1) - sc = SparkContext(sys.argv[1], "PythonKMeans") + sc = SparkContext(sys.argv[1], "PythonPi") slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2 n = 100000 * slices def f(_): diff --git a/pyspark/pyspark/examples/tc.py b/pyspark/pyspark/examples/tc.py index 2796fdc6add38f2ec719d8e7b0adf275f07450e8..9630e72b47ec42081f6df6f5bff478404e578c96 100644 --- a/pyspark/pyspark/examples/tc.py +++ b/pyspark/pyspark/examples/tc.py @@ -22,9 +22,9 @@ if __name__ == "__main__": print >> sys.stderr, \ "Usage: PythonTC <host> [<slices>]" exit(-1) - sc = SparkContext(sys.argv[1], "PythonKMeans") + sc = SparkContext(sys.argv[1], "PythonTC") slices = sys.argv[2] if len(sys.argv) > 2 else 2 - tc = sc.parallelizePairs(generateGraph(), slices).cache() + tc = sc.parallelize(generateGraph(), slices).cache() # Linear transitive closure: each round grows paths by one edge, # by joining the graph's edges with the already-discovered paths. @@ -32,7 +32,7 @@ if __name__ == "__main__": # the graph to obtain the path (x, z). # Because join() joins on keys, the edges are stored in reversed order. - edges = tc.mapPairs(lambda (x, y): (y, x)) + edges = tc.map(lambda (x, y): (y, x)) oldCount = 0L nextCount = tc.count() @@ -40,7 +40,7 @@ if __name__ == "__main__": oldCount = nextCount # Perform the join, obtaining an RDD of (y, (z, x)) pairs, # then project the result to obtain the new (x, z) paths. - new_edges = tc.join(edges).mapPairs(lambda (_, (a, b)): (b, a)) + new_edges = tc.join(edges).map(lambda (_, (a, b)): (b, a)) tc = tc.union(new_edges).distinct().cache() nextCount = tc.count() if nextCount == oldCount: