Skip to content
Snippets Groups Projects
Commit ac2e17b0 authored by Cheolsoo Park's avatar Cheolsoo Park Committed by Reynold Xin
Browse files

[SPARK-8355] [SQL] Python DataFrameReader/Writer should mirror Scala

I compared PySpark DataFrameReader/Writer against Scala ones. `Option` function is missing in both reader and writer, but the rest seems to all match.

I added `Option` to reader and writer and updated the `pyspark-sql` test.

Author: Cheolsoo Park <cheolsoop@netflix.com>

Closes #7078 from piaozhexiu/SPARK-8355 and squashes the following commits:

c63d419 [Cheolsoo Park] Fix version
524e0aa [Cheolsoo Park] Add option function to df reader and writer
parent 0b10662f
No related branches found
No related tags found
No related merge requests found
...@@ -73,6 +73,13 @@ class DataFrameReader(object): ...@@ -73,6 +73,13 @@ class DataFrameReader(object):
self._jreader = self._jreader.schema(jschema) self._jreader = self._jreader.schema(jschema)
return self return self
@since(1.5)
def option(self, key, value):
"""Adds an input option for the underlying data source.
"""
self._jreader = self._jreader.option(key, value)
return self
@since(1.4) @since(1.4)
def options(self, **options): def options(self, **options):
"""Adds input options for the underlying data source. """Adds input options for the underlying data source.
...@@ -235,6 +242,13 @@ class DataFrameWriter(object): ...@@ -235,6 +242,13 @@ class DataFrameWriter(object):
self._jwrite = self._jwrite.format(source) self._jwrite = self._jwrite.format(source)
return self return self
@since(1.5)
def option(self, key, value):
"""Adds an output option for the underlying data source.
"""
self._jwrite = self._jwrite.option(key, value)
return self
@since(1.4) @since(1.4)
def options(self, **options): def options(self, **options):
"""Adds output options for the underlying data source. """Adds output options for the underlying data source.
......
...@@ -564,6 +564,7 @@ class SQLTests(ReusedPySparkTestCase): ...@@ -564,6 +564,7 @@ class SQLTests(ReusedPySparkTestCase):
self.assertEqual(sorted(df.collect()), sorted(actual.collect())) self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
df.write.mode("overwrite").options(noUse="this options will not be used in save.")\ df.write.mode("overwrite").options(noUse="this options will not be used in save.")\
.option("noUse", "this option will not be used in save.")\
.format("json").save(path=tmpPath) .format("json").save(path=tmpPath)
actual =\ actual =\
self.sqlCtx.read.format("json")\ self.sqlCtx.read.format("json")\
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment