Skip to content
Snippets Groups Projects
Commit 1c8633f3 authored by tianyi's avatar tianyi Committed by Cheng Lian
Browse files

[SPARK-3365][SQL]Wrong schema generated for List type

This PR fix the issue SPARK-3365.
The reason is Spark generated wrong schema for the type `List` in `ScalaReflection.scala`
for example:

the generated schema for type `Seq[String]` is:
```
{"name":"x","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}`
```

the generated schema for type `List[String]` is:
```
{"name":"x","type":{"type":"struct","fields":[]},"nullable":true,"metadata":{}}`
```

Author: tianyi <tianyi.asiainfo@gmail.com>

Closes #4581 from tianyi/SPARK-3365 and squashes the following commits:

a097e86 [tianyi] change the order of resolution in ScalaReflection.scala
parent 2aea892e
No related branches found
No related tags found
No related merge requests found
......@@ -122,6 +122,21 @@ trait ScalaReflection {
case t if t <:< typeOf[Option[_]] =>
val TypeRef(_, _, Seq(optType)) = t
Schema(schemaFor(optType).dataType, nullable = true)
// Need to decide if we actually need a special type here.
case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
case t if t <:< typeOf[Array[_]] =>
val TypeRef(_, _, Seq(elementType)) = t
val Schema(dataType, nullable) = schemaFor(elementType)
Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
case t if t <:< typeOf[Seq[_]] =>
val TypeRef(_, _, Seq(elementType)) = t
val Schema(dataType, nullable) = schemaFor(elementType)
Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
case t if t <:< typeOf[Map[_, _]] =>
val TypeRef(_, _, Seq(keyType, valueType)) = t
val Schema(valueDataType, valueNullable) = schemaFor(valueType)
Schema(MapType(schemaFor(keyType).dataType,
valueDataType, valueContainsNull = valueNullable), nullable = true)
case t if t <:< typeOf[Product] =>
val formalTypeArgs = t.typeSymbol.asClass.typeParams
val TypeRef(_, _, actualTypeArgs) = t
......@@ -144,21 +159,6 @@ trait ScalaReflection {
schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs))
StructField(p.name.toString, dataType, nullable)
}), nullable = true)
// Need to decide if we actually need a special type here.
case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
case t if t <:< typeOf[Array[_]] =>
val TypeRef(_, _, Seq(elementType)) = t
val Schema(dataType, nullable) = schemaFor(elementType)
Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
case t if t <:< typeOf[Seq[_]] =>
val TypeRef(_, _, Seq(elementType)) = t
val Schema(dataType, nullable) = schemaFor(elementType)
Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
case t if t <:< typeOf[Map[_, _]] =>
val TypeRef(_, _, Seq(keyType, valueType)) = t
val Schema(valueDataType, valueNullable) = schemaFor(valueType)
Schema(MapType(schemaFor(keyType).dataType,
valueDataType, valueContainsNull = valueNullable), nullable = true)
case t if t <:< typeOf[String] => Schema(StringType, nullable = true)
case t if t <:< typeOf[Timestamp] => Schema(TimestampType, nullable = true)
case t if t <:< typeOf[java.sql.Date] => Schema(DateType, nullable = true)
......
......@@ -61,6 +61,7 @@ case class OptionalData(
case class ComplexData(
arrayField: Seq[Int],
arrayField1: Array[Int],
arrayField2: List[Int],
arrayFieldContainsNull: Seq[java.lang.Integer],
mapField: Map[Int, Long],
mapFieldValueContainsNull: Map[Int, java.lang.Long],
......@@ -137,6 +138,10 @@ class ScalaReflectionSuite extends FunSuite {
"arrayField1",
ArrayType(IntegerType, containsNull = false),
nullable = true),
StructField(
"arrayField2",
ArrayType(IntegerType, containsNull = false),
nullable = true),
StructField(
"arrayFieldContainsNull",
ArrayType(IntegerType, containsNull = true),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment