Skip to content
Snippets Groups Projects
Commit 07a1788e authored by gatorsmile's avatar gatorsmile
Browse files

[SPARK-19406][SQL] Fix function to_json to respect user-provided options


### What changes were proposed in this pull request?
Currently, the function `to_json` allows users to provide options for generating JSON. However, it does not pass it to `JacksonGenerator`. Thus, it ignores the user-provided options. This PR is to fix it. Below is an example.

```Scala
val df = Seq(Tuple1(Tuple1(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0")))).toDF("a")
val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm")
df.select(to_json($"a", options)).show(false)
```
The current output is like
```
+--------------------------------------+
|structtojson(a)                       |
+--------------------------------------+
|{"_1":"2015-08-26T18:00:00.000-07:00"}|
+--------------------------------------+
```

After the fix, the output is like
```
+-------------------------+
|structtojson(a)          |
+-------------------------+
|{"_1":"26/08/2015 18:00"}|
+-------------------------+
```
### How was this patch tested?
Added test cases for both `from_json` and `to_json`

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16745 from gatorsmile/toJson.

(cherry picked from commit f9156d29)
Signed-off-by: default avatargatorsmile <gatorsmile@gmail.com>
parent 445438c9
No related branches found
No related tags found
No related merge requests found
......@@ -517,7 +517,10 @@ case class StructToJson(options: Map[String, String], child: Expression)
@transient
lazy val gen =
new JacksonGenerator(child.dataType.asInstanceOf[StructType], writer)
new JacksonGenerator(
child.dataType.asInstanceOf[StructType],
writer,
new JSONOptions(options))
override def dataType: DataType = StringType
......
......@@ -19,7 +19,7 @@ package org.apache.spark.sql
import org.apache.spark.sql.functions.{from_json, struct, to_json}
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types.{CalendarIntervalType, IntegerType, StructType}
import org.apache.spark.sql.types.{CalendarIntervalType, IntegerType, StructType, TimestampType}
class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
import testImplicits._
......@@ -105,6 +105,16 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
Row(Row(1)) :: Nil)
}
test("from_json with option") {
val df = Seq("""{"time": "26/08/2015 18:00"}""").toDS()
val schema = new StructType().add("time", TimestampType)
val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm")
checkAnswer(
df.select(from_json($"value", schema, options)),
Row(Row(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0"))))
}
test("from_json missing columns") {
val df = Seq("""{"a": 1}""").toDS()
val schema = new StructType().add("b", IntegerType)
......@@ -131,6 +141,15 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
Row("""{"_1":1}""") :: Nil)
}
test("to_json with option") {
val df = Seq(Tuple1(Tuple1(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0")))).toDF("a")
val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm")
checkAnswer(
df.select(to_json($"a", options)),
Row("""{"_1":"26/08/2015 18:00"}""") :: Nil)
}
test("to_json unsupported type") {
val df = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment