diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParameters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParameters.scala index ec16bdbd8bfb39b2e5e51244dc37d091033ac75a..127c9728da2d1a3d3b67af40c47c7b89da428d5f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParameters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParameters.scala @@ -21,7 +21,7 @@ import java.nio.charset.Charset import org.apache.spark.Logging -private[sql] case class CSVParameters(parameters: Map[String, String]) extends Logging { +private[sql] case class CSVParameters(@transient parameters: Map[String, String]) extends Logging { private def getChar(paramName: String, default: Char): Char = { val paramValue = parameters.get(paramName) @@ -44,9 +44,11 @@ private[sql] case class CSVParameters(parameters: Map[String, String]) extends L } } - val delimiter = CSVTypeCast.toChar(parameters.getOrElse("delimiter", ",")) + val delimiter = CSVTypeCast.toChar( + parameters.getOrElse("sep", parameters.getOrElse("delimiter", ","))) val parseMode = parameters.getOrElse("mode", "PERMISSIVE") - val charset = parameters.getOrElse("charset", Charset.forName("UTF-8").name()) + val charset = parameters.getOrElse("encoding", + parameters.getOrElse("charset", Charset.forName("UTF-8").name())) val quote = getChar("quote", '\"') val escape = getChar("escape", '\\') diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala index 9267479755e828a70881ed5fb7bba75c881ae355..53818853ffb3befaee405b05526850047fed400b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala @@ -58,9 +58,10 @@ private[csv] class CSVRelation( if (Charset.forName(params.charset) == Charset.forName("UTF-8")) { sqlContext.sparkContext.textFile(location) } else { + val charset = params.charset sqlContext.sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](location) .mapPartitions { _.map { pair => - new String(pair._2.getBytes, 0, pair._2.getLength, params.charset) + new String(pair._2.getBytes, 0, pair._2.getLength, charset) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 8fdd31aa4334f96f385d3bacebda25efb53f90a0..071b5ef56d58b29f7245844cc45a6a6857452e67 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -122,7 +122,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { assert(exception.getMessage.contains("1-9588-osi")) } - ignore("test different encoding") { + test("test different encoding") { // scalastyle:off sqlContext.sql( s""" @@ -135,6 +135,18 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { verifyCars(sqlContext.table("carsTable"), withHeader = true) } + test("test aliases sep and encoding for delimiter and charset") { + val cars = sqlContext + .read + .format("csv") + .option("header", "true") + .option("encoding", "iso-8859-1") + .option("sep", "þ") + .load(testFile(carsFile8859)) + + verifyCars(cars, withHeader = true) + } + test("DDL test with tab separated file") { sqlContext.sql( s""" @@ -337,5 +349,4 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { assert(results(0).toSeq === Array(2012, "Tesla", "S", "null", "null")) assert(results(2).toSeq === Array(null, "Chevy", "Volt", null, null)) } - }