Skip to content
Snippets Groups Projects
Commit a03e5b81 authored by Matthew Farrellee's avatar Matthew Farrellee Committed by Josh Rosen
Browse files

[SPARK-1701] [PySpark] remove slice terminology from python examples

Author: Matthew Farrellee <matt@redhat.com>

Closes #2304 from mattf/SPARK-1701-partition-over-slice-for-python-examples and squashes the following commits:

928a581 [Matthew Farrellee] [SPARK-1701] [PySpark] remove slice terminology from python examples
parent be0c7563
No related branches found
No related tags found
No related merge requests found
......@@ -54,7 +54,7 @@ def update(i, vec, mat, ratings):
if __name__ == "__main__":
"""
Usage: als [M] [U] [F] [iterations] [slices]"
Usage: als [M] [U] [F] [iterations] [partitions]"
"""
print >> sys.stderr, """WARN: This is a naive implementation of ALS and is given as an
......@@ -66,10 +66,10 @@ if __name__ == "__main__":
U = int(sys.argv[2]) if len(sys.argv) > 2 else 500
F = int(sys.argv[3]) if len(sys.argv) > 3 else 10
ITERATIONS = int(sys.argv[4]) if len(sys.argv) > 4 else 5
slices = int(sys.argv[5]) if len(sys.argv) > 5 else 2
partitions = int(sys.argv[5]) if len(sys.argv) > 5 else 2
print "Running ALS with M=%d, U=%d, F=%d, iters=%d, slices=%d\n" % \
(M, U, F, ITERATIONS, slices)
print "Running ALS with M=%d, U=%d, F=%d, iters=%d, partitions=%d\n" % \
(M, U, F, ITERATIONS, partitions)
R = matrix(rand(M, F)) * matrix(rand(U, F).T)
ms = matrix(rand(M, F))
......@@ -80,7 +80,7 @@ if __name__ == "__main__":
usb = sc.broadcast(us)
for i in range(ITERATIONS):
ms = sc.parallelize(range(M), slices) \
ms = sc.parallelize(range(M), partitions) \
.map(lambda x: update(x, msb.value[x, :], usb.value, Rb.value)) \
.collect()
# collect() returns a list, so array ends up being
......@@ -88,7 +88,7 @@ if __name__ == "__main__":
ms = matrix(np.array(ms)[:, :, 0])
msb = sc.broadcast(ms)
us = sc.parallelize(range(U), slices) \
us = sc.parallelize(range(U), partitions) \
.map(lambda x: update(x, usb.value[x, :], msb.value, Rb.value.T)) \
.collect()
us = matrix(np.array(us)[:, :, 0])
......
......@@ -24,18 +24,18 @@ from pyspark import SparkContext
if __name__ == "__main__":
"""
Usage: pi [slices]
Usage: pi [partitions]
"""
sc = SparkContext(appName="PythonPi")
slices = int(sys.argv[1]) if len(sys.argv) > 1 else 2
n = 100000 * slices
partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
n = 100000 * partitions
def f(_):
x = random() * 2 - 1
y = random() * 2 - 1
return 1 if x ** 2 + y ** 2 < 1 else 0
count = sc.parallelize(xrange(1, n + 1), slices).map(f).reduce(add)
count = sc.parallelize(xrange(1, n + 1), partitions).map(f).reduce(add)
print "Pi is roughly %f" % (4.0 * count / n)
sc.stop()
......@@ -37,11 +37,11 @@ def generateGraph():
if __name__ == "__main__":
"""
Usage: transitive_closure [slices]
Usage: transitive_closure [partitions]
"""
sc = SparkContext(appName="PythonTransitiveClosure")
slices = int(sys.argv[1]) if len(sys.argv) > 1 else 2
tc = sc.parallelize(generateGraph(), slices).cache()
partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
tc = sc.parallelize(generateGraph(), partitions).cache()
# Linear transitive closure: each round grows paths by one edge,
# by joining the graph's edges with the already-discovered paths.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment