diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 4aafe35d13491a23279b46ff95c8ca5f54eab4b3..8734cacb0baf425975f2100a74fe8c568277367d 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -160,7 +160,7 @@ class RDD(object): >>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect()) [1, 2, 3] """ - return self.map(lambda x: (x, "")) \ + return self.map(lambda x: (x, None)) \ .reduceByKey(lambda x, _: x) \ .map(lambda (x, _): x)