Skip to content
Snippets Groups Projects
Commit 5aa05219 authored by Reynold Xin's avatar Reynold Xin Committed by Yin Huai
Browse files

[SPARK-11292] [SQL] Python API for text data source

Adds DataFrameReader.text and DataFrameWriter.text.

Author: Reynold Xin <rxin@databricks.com>

Closes #9259 from rxin/SPARK-11292.
parent 032748bb
No related branches found
No related tags found
No related merge requests found
......@@ -23,6 +23,7 @@ if sys.version >= '3':
from py4j.java_gateway import JavaClass
from pyspark import RDD, since
from pyspark.rdd import ignore_unicode_prefix
from pyspark.sql.column import _to_seq
from pyspark.sql.types import *
......@@ -193,10 +194,22 @@ class DataFrameReader(object):
"""
return self._df(self._jreader.parquet(_to_seq(self._sqlContext._sc, paths)))
@ignore_unicode_prefix
@since(1.6)
def text(self, path):
"""Loads a text file and returns a [[DataFrame]] with a single string column named "text".
Each line in the text file is a new row in the resulting DataFrame.
>>> df = sqlContext.read.text('python/test_support/sql/text-test.txt')
>>> df.collect()
[Row(text=u'hello'), Row(text=u'this')]
"""
return self._df(self._jreader.text(path))
@since(1.5)
def orc(self, path):
"""
Loads an ORC file, returning the result as a :class:`DataFrame`.
"""Loads an ORC file, returning the result as a :class:`DataFrame`.
::Note: Currently ORC support is only available together with
:class:`HiveContext`.
......@@ -432,6 +445,16 @@ class DataFrameWriter(object):
self.partitionBy(partitionBy)
self._jwrite.parquet(path)
@since(1.6)
def text(self, path):
"""Saves the content of the DataFrame in a text file at the specified path.
The DataFrame must have only one column that is of string type.
Each row becomes a new line in the output file.
"""
self._jwrite.text(path)
@since(1.5)
def orc(self, path, mode=None, partitionBy=None):
"""Saves the content of the :class:`DataFrame` in ORC format at the specified path.
......
hello
this
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment