diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index f7009fe5893e095621033c84f03e04542a553ee8..4085f165f465c1d3240c1c5d556f00458afcbd99 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -226,9 +226,8 @@ class SQLContext(object): from ``data``, which should be an RDD of :class:`Row`, or :class:`namedtuple`, or :class:`dict`. - When ``schema`` is :class:`pyspark.sql.types.DataType` or - :class:`pyspark.sql.types.StringType`, it must match the - real data, or an exception will be thrown at runtime. If the given schema is not + When ``schema`` is :class:`pyspark.sql.types.DataType` or a datatype string it must match + the real data, or an exception will be thrown at runtime. If the given schema is not :class:`pyspark.sql.types.StructType`, it will be wrapped into a :class:`pyspark.sql.types.StructType` as its only field, and the field name will be "value", each record will also be wrapped into a tuple, which can be converted to row later. @@ -239,8 +238,7 @@ class SQLContext(object): :param data: an RDD of any kind of SQL data representation(e.g. :class:`Row`, :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`, or :class:`pandas.DataFrame`. - :param schema: a :class:`pyspark.sql.types.DataType` or a - :class:`pyspark.sql.types.StringType` or a list of + :param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of column names, default is None. The data type string format equals to :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use @@ -251,7 +249,7 @@ class SQLContext(object): .. versionchanged:: 2.0 The ``schema`` parameter can be a :class:`pyspark.sql.types.DataType` or a - :class:`pyspark.sql.types.StringType` after 2.0. + datatype string after 2.0. If it's not a :class:`pyspark.sql.types.StructType`, it will be wrapped into a :class:`pyspark.sql.types.StructType` and each record will also be wrapped into a tuple. diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 10bd89b03fe331d84adb41f6d321fc0c1af80c7a..2dacf483fc7e6f0ba78f63e82bfa001399f0e309 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -414,9 +414,8 @@ class SparkSession(object): from ``data``, which should be an RDD of :class:`Row`, or :class:`namedtuple`, or :class:`dict`. - When ``schema`` is :class:`pyspark.sql.types.DataType` or - :class:`pyspark.sql.types.StringType`, it must match the - real data, or an exception will be thrown at runtime. If the given schema is not + When ``schema`` is :class:`pyspark.sql.types.DataType` or a datatype string, it must match + the real data, or an exception will be thrown at runtime. If the given schema is not :class:`pyspark.sql.types.StructType`, it will be wrapped into a :class:`pyspark.sql.types.StructType` as its only field, and the field name will be "value", each record will also be wrapped into a tuple, which can be converted to row later. @@ -426,8 +425,7 @@ class SparkSession(object): :param data: an RDD of any kind of SQL data representation(e.g. row, tuple, int, boolean, etc.), or :class:`list`, or :class:`pandas.DataFrame`. - :param schema: a :class:`pyspark.sql.types.DataType` or a - :class:`pyspark.sql.types.StringType` or a list of + :param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of column names, default is ``None``. The data type string format equals to :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use @@ -438,7 +436,7 @@ class SparkSession(object): .. versionchanged:: 2.0 The ``schema`` parameter can be a :class:`pyspark.sql.types.DataType` or a - :class:`pyspark.sql.types.StringType` after 2.0. If it's not a + datatype string after 2.0. If it's not a :class:`pyspark.sql.types.StructType`, it will be wrapped into a :class:`pyspark.sql.types.StructType` and each record will also be wrapped into a tuple.