Skip to content
Snippets Groups Projects
Commit 6b45d7e9 authored by Liang-Chi Hsieh's avatar Liang-Chi Hsieh Committed by hyukjinkwon
Browse files

[SPARK-21954][SQL] JacksonUtils should verify MapType's value type instead of key type

## What changes were proposed in this pull request?

`JacksonUtils.verifySchema` verifies if a data type can be converted to JSON. For `MapType`, it now verifies the key type. However, in `JacksonGenerator`, when converting a map to JSON, we only care about its values and create a writer for the values. The keys in a map are treated as strings by calling `toString` on the keys.

Thus, we should change `JacksonUtils.verifySchema` to verify the value type of `MapType`.

## How was this patch tested?

Added tests.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #19167 from viirya/test-jacksonutils.
parent 8a5eb506
No related branches found
No related tags found
No related merge requests found
......@@ -44,7 +44,9 @@ object JacksonUtils {
case at: ArrayType => verifyType(name, at.elementType)
case mt: MapType => verifyType(name, mt.keyType)
// For MapType, its keys are treated as a string (i.e. calling `toString`) basically when
// generating JSON, so we only care if the values are valid for JSON.
case mt: MapType => verifyType(name, mt.valueType)
case udt: UserDefinedType[_] => verifyType(name, udt.sqlType)
......
......@@ -21,6 +21,7 @@ import java.util.Calendar
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, GenericArrayData, PermissiveMode}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
......@@ -610,4 +611,26 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
"""{"t":"2015-12-31T16:00:00"}"""
)
}
test("to_json: verify MapType's value type instead of key type") {
// Keys in map are treated as strings when converting to JSON. The type doesn't matter at all.
val mapType1 = MapType(CalendarIntervalType, IntegerType)
val schema1 = StructType(StructField("a", mapType1) :: Nil)
val struct1 = Literal.create(null, schema1)
checkEvaluation(
StructsToJson(Map.empty, struct1, gmtId),
null
)
// The value type must be valid for converting to JSON.
val mapType2 = MapType(IntegerType, CalendarIntervalType)
val schema2 = StructType(StructField("a", mapType2) :: Nil)
val struct2 = Literal.create(null, schema2)
intercept[TreeNodeException[_]] {
checkEvaluation(
StructsToJson(Map.empty, struct2, gmtId),
null
)
}
}
}
......@@ -17,7 +17,7 @@
package org.apache.spark.sql
import org.apache.spark.sql.functions.{from_json, struct, to_json}
import org.apache.spark.sql.functions.{from_json, lit, map, struct, to_json}
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
......@@ -195,15 +195,33 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
Row("""{"_1":"26/08/2015 18:00"}""") :: Nil)
}
test("to_json unsupported type") {
test("to_json - key types of map don't matter") {
// interval type is invalid for converting to JSON. However, the keys of a map are treated
// as strings, so its type doesn't matter.
val df = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
.select(struct(map($"a._1".cast(CalendarIntervalType), lit("a")).as("col1")).as("c"))
checkAnswer(
df.select(to_json($"c")),
Row("""{"col1":{"interval -3 months 7 hours":"a"}}""") :: Nil)
}
test("to_json unsupported type") {
val baseDf = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
val df = baseDf.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
val e = intercept[AnalysisException]{
// Unsupported type throws an exception
df.select(to_json($"c")).collect()
}
assert(e.getMessage.contains(
"Unable to convert column a of type calendarinterval to JSON."))
// interval type is invalid for converting to JSON. We can't use it as value type of a map.
val df2 = baseDf
.select(struct(map(lit("a"), $"a._1".cast(CalendarIntervalType)).as("col1")).as("c"))
val e2 = intercept[AnalysisException] {
df2.select(to_json($"c")).collect()
}
assert(e2.getMessage.contains("Unable to convert column col1 of type calendarinterval to JSON"))
}
test("roundtrip in to_json and from_json - struct") {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment