Skip to content
Snippets Groups Projects
Commit 86174ea8 authored by Burak Yavuz's avatar Burak Yavuz Committed by Wenchen Fan
Browse files

[SPARK-20549] java.io.CharConversionException: Invalid UTF-32' in JsonToStructs

## What changes were proposed in this pull request?

A fix for the same problem was made in #17693 but ignored `JsonToStructs`. This PR uses the same fix for `JsonToStructs`.

## How was this patch tested?

Regression test

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #17826 from brkyvz/SPARK-20549.
parent afb21bf2
No related branches found
No related tags found
No related merge requests found
......@@ -151,8 +151,7 @@ case class GetJsonObject(json: Expression, path: Expression)
try {
/* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
detect character encoding which could fail for some malformed strings */
Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
new ByteArrayInputStream(jsonStr.getBytes), "UTF-8"))) { parser =>
Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, jsonStr)) { parser =>
val output = new ByteArrayOutputStream()
val matched = Utils.tryWithResource(
jsonFactory.createGenerator(output, JsonEncoding.UTF8)) { generator =>
......@@ -398,9 +397,8 @@ case class JsonTuple(children: Seq[Expression])
try {
/* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
detect character encoding which could fail for some malformed strings */
Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
new ByteArrayInputStream(json.getBytes), "UTF-8"))) {
parser => parseRow(parser, input)
Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, json)) { parser =>
parseRow(parser, input)
}
} catch {
case _: JsonProcessingException =>
......
......@@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.json
import java.io.InputStream
import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}
import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.hadoop.io.Text
......@@ -33,7 +33,10 @@ private[sql] object CreateJacksonParser extends Serializable {
val bb = record.getByteBuffer
assert(bb.hasArray)
jsonFactory.createParser(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())
val bain = new ByteArrayInputStream(
bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())
jsonFactory.createParser(new InputStreamReader(bain, "UTF-8"))
}
def text(jsonFactory: JsonFactory, record: Text): JsonParser = {
......
......@@ -453,6 +453,13 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
)
}
test("SPARK-20549: from_json bad UTF-8") {
val schema = StructType(StructField("a", IntegerType) :: Nil)
checkEvaluation(
JsonToStructs(schema, Map.empty, Literal(badJson), gmtId),
null)
}
test("from_json with timestamp") {
val schema = StructType(StructField("t", TimestampType) :: Nil)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment