Skip to content
Snippets Groups Projects
Commit 660c6cec authored by Reynold Xin's avatar Reynold Xin
Browse files

[SPARK-8698] partitionBy in Python DataFrame reader/writer interface should...

[SPARK-8698] partitionBy in Python DataFrame reader/writer interface should not default to empty tuple.

Author: Reynold Xin <rxin@databricks.com>

Closes #7079 from rxin/SPARK-8698 and squashes the following commits:

8513e1c [Reynold Xin] [SPARK-8698] partitionBy in Python DataFrame reader/writer interface should not default to empty tuple.
parent ac2e17b0
No related branches found
No related tags found
No related merge requests found
...@@ -270,12 +270,11 @@ class DataFrameWriter(object): ...@@ -270,12 +270,11 @@ class DataFrameWriter(object):
""" """
if len(cols) == 1 and isinstance(cols[0], (list, tuple)): if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
cols = cols[0] cols = cols[0]
if len(cols) > 0: self._jwrite = self._jwrite.partitionBy(_to_seq(self._sqlContext._sc, cols))
self._jwrite = self._jwrite.partitionBy(_to_seq(self._sqlContext._sc, cols))
return self return self
@since(1.4) @since(1.4)
def save(self, path=None, format=None, mode=None, partitionBy=(), **options): def save(self, path=None, format=None, mode=None, partitionBy=None, **options):
"""Saves the contents of the :class:`DataFrame` to a data source. """Saves the contents of the :class:`DataFrame` to a data source.
The data source is specified by the ``format`` and a set of ``options``. The data source is specified by the ``format`` and a set of ``options``.
...@@ -295,7 +294,9 @@ class DataFrameWriter(object): ...@@ -295,7 +294,9 @@ class DataFrameWriter(object):
>>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data')) >>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
""" """
self.partitionBy(partitionBy).mode(mode).options(**options) self.mode(mode).options(**options)
if partitionBy is not None:
self.partitionBy(partitionBy)
if format is not None: if format is not None:
self.format(format) self.format(format)
if path is None: if path is None:
...@@ -315,7 +316,7 @@ class DataFrameWriter(object): ...@@ -315,7 +316,7 @@ class DataFrameWriter(object):
self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName) self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName)
@since(1.4) @since(1.4)
def saveAsTable(self, name, format=None, mode=None, partitionBy=(), **options): def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options):
"""Saves the content of the :class:`DataFrame` as the specified table. """Saves the content of the :class:`DataFrame` as the specified table.
In the case the table already exists, behavior of this function depends on the In the case the table already exists, behavior of this function depends on the
...@@ -334,7 +335,9 @@ class DataFrameWriter(object): ...@@ -334,7 +335,9 @@ class DataFrameWriter(object):
:param partitionBy: names of partitioning columns :param partitionBy: names of partitioning columns
:param options: all other string options :param options: all other string options
""" """
self.partitionBy(partitionBy).mode(mode).options(**options) self.mode(mode).options(**options)
if partitionBy is not None:
self.partitionBy(partitionBy)
if format is not None: if format is not None:
self.format(format) self.format(format)
self._jwrite.saveAsTable(name) self._jwrite.saveAsTable(name)
...@@ -356,7 +359,7 @@ class DataFrameWriter(object): ...@@ -356,7 +359,7 @@ class DataFrameWriter(object):
self.mode(mode)._jwrite.json(path) self.mode(mode)._jwrite.json(path)
@since(1.4) @since(1.4)
def parquet(self, path, mode=None, partitionBy=()): def parquet(self, path, mode=None, partitionBy=None):
"""Saves the content of the :class:`DataFrame` in Parquet format at the specified path. """Saves the content of the :class:`DataFrame` in Parquet format at the specified path.
:param path: the path in any Hadoop supported file system :param path: the path in any Hadoop supported file system
...@@ -370,7 +373,9 @@ class DataFrameWriter(object): ...@@ -370,7 +373,9 @@ class DataFrameWriter(object):
>>> df.write.parquet(os.path.join(tempfile.mkdtemp(), 'data')) >>> df.write.parquet(os.path.join(tempfile.mkdtemp(), 'data'))
""" """
self.partitionBy(partitionBy).mode(mode) self.mode(mode)
if partitionBy is not None:
self.partitionBy(partitionBy)
self._jwrite.parquet(path) self._jwrite.parquet(path)
@since(1.4) @since(1.4)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment