diff --git a/apache-spark/python/join.py b/apache-spark/python/join.py index f4571164a2622f642ce931094898e4b13bd7d6b2..220369d81c5c6113f4e915c483ae9a4fb501caa9 100755 --- a/apache-spark/python/join.py +++ b/apache-spark/python/join.py @@ -34,12 +34,12 @@ if __name__ == "__main__": lines = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0]) lines = lines.map(lambda a: a.encode('utf-8')) - one = lines.filter(lambda x: x if 'profileName' in x else '') - names = one.map(lambda y: y.split(':')[1]) - + names = lines.filter(lambda x: x.split(':')[1] if 'profileName' in x else '') profiles = names.map(lambda c: (c, random.randint(1, 10))) + #randos = randos.map(lambda d: (d, random.randint(1, 10)) #final = profiles.join(randos) + res = profiles.collect() for r in res: