From 6b45d7e941eba8a36be26116787322d9e3ae25d0 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh <viirya@gmail.com> Date: Sat, 9 Sep 2017 19:10:52 +0900 Subject: [PATCH] [SPARK-21954][SQL] JacksonUtils should verify MapType's value type instead of key type ## What changes were proposed in this pull request? `JacksonUtils.verifySchema` verifies if a data type can be converted to JSON. For `MapType`, it now verifies the key type. However, in `JacksonGenerator`, when converting a map to JSON, we only care about its values and create a writer for the values. The keys in a map are treated as strings by calling `toString` on the keys. Thus, we should change `JacksonUtils.verifySchema` to verify the value type of `MapType`. ## How was this patch tested? Added tests. Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #19167 from viirya/test-jacksonutils. --- .../sql/catalyst/json/JacksonUtils.scala | 4 +++- .../expressions/JsonExpressionsSuite.scala | 23 ++++++++++++++++++ .../apache/spark/sql/JsonFunctionsSuite.scala | 24 ++++++++++++++++--- 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala index 3b23c6cd28..134d16e981 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala @@ -44,7 +44,9 @@ object JacksonUtils { case at: ArrayType => verifyType(name, at.elementType) - case mt: MapType => verifyType(name, mt.keyType) + // For MapType, its keys are treated as a string (i.e. calling `toString`) basically when + // generating JSON, so we only care if the values are valid for JSON. + case mt: MapType => verifyType(name, mt.valueType) case udt: UserDefinedType[_] => verifyType(name, udt.sqlType) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index 9991bda165..5de11433b0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -21,6 +21,7 @@ import java.util.Calendar import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, GenericArrayData, PermissiveMode} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -610,4 +611,26 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { """{"t":"2015-12-31T16:00:00"}""" ) } + + test("to_json: verify MapType's value type instead of key type") { + // Keys in map are treated as strings when converting to JSON. The type doesn't matter at all. + val mapType1 = MapType(CalendarIntervalType, IntegerType) + val schema1 = StructType(StructField("a", mapType1) :: Nil) + val struct1 = Literal.create(null, schema1) + checkEvaluation( + StructsToJson(Map.empty, struct1, gmtId), + null + ) + + // The value type must be valid for converting to JSON. + val mapType2 = MapType(IntegerType, CalendarIntervalType) + val schema2 = StructType(StructField("a", mapType2) :: Nil) + val struct2 = Literal.create(null, schema2) + intercept[TreeNodeException[_]] { + checkEvaluation( + StructsToJson(Map.empty, struct2, gmtId), + null + ) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index cf2d00fc94..119af21304 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql -import org.apache.spark.sql.functions.{from_json, struct, to_json} +import org.apache.spark.sql.functions.{from_json, lit, map, struct, to_json} import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ @@ -195,15 +195,33 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext { Row("""{"_1":"26/08/2015 18:00"}""") :: Nil) } - test("to_json unsupported type") { + test("to_json - key types of map don't matter") { + // interval type is invalid for converting to JSON. However, the keys of a map are treated + // as strings, so its type doesn't matter. val df = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a") - .select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c")) + .select(struct(map($"a._1".cast(CalendarIntervalType), lit("a")).as("col1")).as("c")) + checkAnswer( + df.select(to_json($"c")), + Row("""{"col1":{"interval -3 months 7 hours":"a"}}""") :: Nil) + } + + test("to_json unsupported type") { + val baseDf = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a") + val df = baseDf.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c")) val e = intercept[AnalysisException]{ // Unsupported type throws an exception df.select(to_json($"c")).collect() } assert(e.getMessage.contains( "Unable to convert column a of type calendarinterval to JSON.")) + + // interval type is invalid for converting to JSON. We can't use it as value type of a map. + val df2 = baseDf + .select(struct(map(lit("a"), $"a._1".cast(CalendarIntervalType)).as("col1")).as("c")) + val e2 = intercept[AnalysisException] { + df2.select(to_json($"c")).collect() + } + assert(e2.getMessage.contains("Unable to convert column col1 of type calendarinterval to JSON")) } test("roundtrip in to_json and from_json - struct") { -- GitLab