From 07a1788ee04597700f101183e67d237f9a866c46 Mon Sep 17 00:00:00 2001 From: gatorsmile <gatorsmile@gmail.com> Date: Mon, 30 Jan 2017 18:38:14 -0800 Subject: [PATCH] [SPARK-19406][SQL] Fix function to_json to respect user-provided options ### What changes were proposed in this pull request? Currently, the function `to_json` allows users to provide options for generating JSON. However, it does not pass it to `JacksonGenerator`. Thus, it ignores the user-provided options. This PR is to fix it. Below is an example. ```Scala val df = Seq(Tuple1(Tuple1(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0")))).toDF("a") val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm") df.select(to_json($"a", options)).show(false) ``` The current output is like ``` +--------------------------------------+ |structtojson(a) | +--------------------------------------+ |{"_1":"2015-08-26T18:00:00.000-07:00"}| +--------------------------------------+ ``` After the fix, the output is like ``` +-------------------------+ |structtojson(a) | +-------------------------+ |{"_1":"26/08/2015 18:00"}| +-------------------------+ ``` ### How was this patch tested? Added test cases for both `from_json` and `to_json` Author: gatorsmile <gatorsmile@gmail.com> Closes #16745 from gatorsmile/toJson. (cherry picked from commit f9156d2956a8e751720bf63071c504a3e86f267d) Signed-off-by: gatorsmile <gatorsmile@gmail.com> --- .../expressions/jsonExpressions.scala | 5 ++++- .../apache/spark/sql/JsonFunctionsSuite.scala | 21 ++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 667ff649d1..92d0888fc6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -517,7 +517,10 @@ case class StructToJson(options: Map[String, String], child: Expression) @transient lazy val gen = - new JacksonGenerator(child.dataType.asInstanceOf[StructType], writer) + new JacksonGenerator( + child.dataType.asInstanceOf[StructType], + writer, + new JSONOptions(options)) override def dataType: DataType = StringType diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 890cc5b560..9c39b3c7f0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql import org.apache.spark.sql.functions.{from_json, struct, to_json} import org.apache.spark.sql.test.SharedSQLContext -import org.apache.spark.sql.types.{CalendarIntervalType, IntegerType, StructType} +import org.apache.spark.sql.types.{CalendarIntervalType, IntegerType, StructType, TimestampType} class JsonFunctionsSuite extends QueryTest with SharedSQLContext { import testImplicits._ @@ -105,6 +105,16 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext { Row(Row(1)) :: Nil) } + test("from_json with option") { + val df = Seq("""{"time": "26/08/2015 18:00"}""").toDS() + val schema = new StructType().add("time", TimestampType) + val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm") + + checkAnswer( + df.select(from_json($"value", schema, options)), + Row(Row(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0")))) + } + test("from_json missing columns") { val df = Seq("""{"a": 1}""").toDS() val schema = new StructType().add("b", IntegerType) @@ -131,6 +141,15 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext { Row("""{"_1":1}""") :: Nil) } + test("to_json with option") { + val df = Seq(Tuple1(Tuple1(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0")))).toDF("a") + val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm") + + checkAnswer( + df.select(to_json($"a", options)), + Row("""{"_1":"26/08/2015 18:00"}""") :: Nil) + } + test("to_json unsupported type") { val df = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a") .select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c")) -- GitLab