From c1b6fa9838f9d26d60fab3b05a96649882e3dd5b Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Wed, 18 Feb 2015 01:00:54 -0800
Subject: [PATCH] [SPARK-5878] fix DataFrame.repartition() in Python

Also add tests for distinct()

Author: Davies Liu <davies@databricks.com>

Closes #4667 from davies/repartition and squashes the following commits:

79059fd [Davies Liu] add test
cb4915e [Davies Liu] fix repartition
---
 python/pyspark/sql/dataframe.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 388033d385..52bd75bf8a 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -434,12 +434,18 @@ class DataFrame(object):
     def repartition(self, numPartitions):
         """ Return a new :class:`DataFrame` that has exactly `numPartitions`
         partitions.
+
+        >>> df.repartition(10).rdd.getNumPartitions()
+        10
         """
-        return DataFrame(self._jdf.repartition(numPartitions, None), self.sql_ctx)
+        return DataFrame(self._jdf.repartition(numPartitions), self.sql_ctx)
 
     def distinct(self):
         """
         Return a new :class:`DataFrame` containing the distinct rows in this DataFrame.
+
+        >>> df.distinct().count()
+        2L
         """
         return DataFrame(self._jdf.distinct(), self.sql_ctx)
 
-- 
GitLab