diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index 1535103321193132915a277f2a582384e59af80b..e3b9ce0f847e30753b866392259b403fae7f06c0 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -182,7 +182,7 @@ stax-api-1.0.1.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar -univocity-parsers-2.2.1.jar +univocity-parsers-2.5.4.jar validation-api-1.1.0.Final.jar xbean-asm5-shaded-4.4.jar xercesImpl-2.9.1.jar diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index deaa288ef4efee0456d182e49030c5a620ac3237..a3f3f32444beb45a70eac88d923dc31433ecb5e8 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -183,7 +183,7 @@ stax-api-1.0.1.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar -univocity-parsers-2.2.1.jar +univocity-parsers-2.5.4.jar validation-api-1.1.0.Final.jar xbean-asm5-shaded-4.4.jar xercesImpl-2.9.1.jar diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 9a3cacbe3825e1f92682676f328e22881673f1c1..7ee002e46575645708984ba3efd858f1ed59bb81 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -38,7 +38,7 @@ <dependency> <groupId>com.univocity</groupId> <artifactId>univocity-parsers</artifactId> - <version>2.2.1</version> + <version>2.5.4</version> <type>jar</type> </dependency> <dependency> diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 243a55cffd47f55d2855446c5604785f452ec902..be89141151098cf05248e85854085a4f62d05845 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -1195,4 +1195,12 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { .csv(Seq("10u12").toDS()) checkAnswer(results, Row(null)) } + + test("SPARK-20978: Fill the malformed column when the number of tokens is less than schema") { + val df = spark.read + .schema("a string, b string, unparsed string") + .option("columnNameOfCorruptRecord", "unparsed") + .csv(Seq("a").toDS()) + checkAnswer(df, Row("a", null, "a")) + } }