diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 9208a527d29c37b4452ea15f61fde20783b129b5..7d1f18611bd63c66889fe89dec3760bd5aa0644b 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -320,7 +320,8 @@ class DataFrameReader(object): it uses the default value, ``UTF-8``. :param quote: sets the single character used for escaping quoted values where the separator can be part of the value. If None is set, it uses the default - value, ``"``. + value, ``"``. If you would like to turn off quotations, you need to set an + empty string. :param escape: sets the single character used for escaping quotes inside an already quoted value. If None is set, it uses the default value, ``\``. :param comment: sets the single character used for skipping lines beginning with this @@ -804,7 +805,8 @@ class DataFrameWriter(object): set, it uses the default value, ``,``. :param quote: sets the single character used for escaping quoted values where the separator can be part of the value. If None is set, it uses the default - value, ``"``. + value, ``"``. If you would like to turn off quotations, you need to set an + empty string. :param escape: sets the single character used for escaping quotes inside an already quoted value. If None is set, it uses the default value, ``\`` :param escapeQuotes: A flag indicating whether values containing quotes should always diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index b248583d79e6b1dc0c0ca50cec6c07937599dede..bb5fa2b99fc0c94fa7b750dfec3c14dd84e1f8cf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -370,7 +370,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * <li>`encoding` (default `UTF-8`): decodes the CSV files by the given encoding * type.</li> * <li>`quote` (default `"`): sets the single character used for escaping quoted values where - * the separator can be part of the value.</li> + * the separator can be part of the value. If you would like to turn off quotations, you need to + * set not `null` but an empty string. This behaviour is different form + * `com.databricks.spark.csv`.</li> * <li>`escape` (default `\`): sets the single character used for escaping quotes inside * an already quoted value.</li> * <li>`comment` (default empty string): sets the single character used for skipping lines diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index bc95446387956e47aa17004341d943262307fd1d..f170065132acda2d7fb91f9d68c415ff463a47af 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -655,4 +655,14 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { assert(msg.contains("CSV data source does not support array<string> data type")) } } + + test("SPARK-15585 turn off quotations") { + val cars = spark.read + .format("csv") + .option("header", "true") + .option("quote", "") + .load(testFile(carsUnbalancedQuotesFile)) + + verifyCars(cars, withHeader = true, checkValues = false) + } }