diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 4aafe35d13491a23279b46ff95c8ca5f54eab4b3..8734cacb0baf425975f2100a74fe8c568277367d 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -160,7 +160,7 @@ class RDD(object):
         >>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect())
         [1, 2, 3]
         """
-        return self.map(lambda x: (x, "")) \
+        return self.map(lambda x: (x, None)) \
                    .reduceByKey(lambda x, _: x) \
                    .map(lambda (x, _): x)