Skip to content
Snippets Groups Projects
Commit 8c320e45 authored by Yijie Shen's avatar Yijie Shen Committed by Davies Liu
Browse files

[SPARK-6591] [SQL] Python data source load options should auto convert common types into strings

JIRA: https://issues.apache.org/jira/browse/SPARK-6591

Author: Yijie Shen <henry.yijieshen@gmail.com>

Closes #7926 from yjshen/py_dsload_opt and squashes the following commits:

b207832 [Yijie Shen] fix style
efdf834 [Yijie Shen] resolve comment
7a8f6a2 [Yijie Shen] lowercase
822e769 [Yijie Shen] convert load opts to string
parent a018b857
No related branches found
No related tags found
No related merge requests found
...@@ -24,6 +24,16 @@ from pyspark.sql.types import * ...@@ -24,6 +24,16 @@ from pyspark.sql.types import *
__all__ = ["DataFrameReader", "DataFrameWriter"] __all__ = ["DataFrameReader", "DataFrameWriter"]
def to_str(value):
"""
A wrapper over str(), but convert bool values to lower case string
"""
if isinstance(value, bool):
return str(value).lower()
else:
return str(value)
class DataFrameReader(object): class DataFrameReader(object):
""" """
Interface used to load a :class:`DataFrame` from external storage systems Interface used to load a :class:`DataFrame` from external storage systems
...@@ -77,7 +87,7 @@ class DataFrameReader(object): ...@@ -77,7 +87,7 @@ class DataFrameReader(object):
def option(self, key, value): def option(self, key, value):
"""Adds an input option for the underlying data source. """Adds an input option for the underlying data source.
""" """
self._jreader = self._jreader.option(key, value) self._jreader = self._jreader.option(key, to_str(value))
return self return self
@since(1.4) @since(1.4)
...@@ -85,7 +95,7 @@ class DataFrameReader(object): ...@@ -85,7 +95,7 @@ class DataFrameReader(object):
"""Adds input options for the underlying data source. """Adds input options for the underlying data source.
""" """
for k in options: for k in options:
self._jreader = self._jreader.option(k, options[k]) self._jreader = self._jreader.option(k, to_str(options[k]))
return self return self
@since(1.4) @since(1.4)
...@@ -97,7 +107,8 @@ class DataFrameReader(object): ...@@ -97,7 +107,8 @@ class DataFrameReader(object):
:param schema: optional :class:`StructType` for the input schema. :param schema: optional :class:`StructType` for the input schema.
:param options: all other string options :param options: all other string options
>>> df = sqlContext.read.load('python/test_support/sql/parquet_partitioned') >>> df = sqlContext.read.load('python/test_support/sql/parquet_partitioned', opt1=True,
... opt2=1, opt3='str')
>>> df.dtypes >>> df.dtypes
[('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')] [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
""" """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment