Skip to content
Snippets Groups Projects
Commit 851e247c authored by Weizhong Lin's avatar Weizhong Lin Committed by Cheng Lian
Browse files

[SPARK-8928] [SQL] Makes CatalystSchemaConverter sticking to 1.4.x- when...

[SPARK-8928] [SQL] Makes CatalystSchemaConverter sticking to 1.4.x- when handling Parquet LISTs in compatible mode

This PR is based on #7209 authored by Sephiroth-Lin.

Author: Weizhong Lin <linweizhong@huawei.com>

Closes #7314 from liancheng/spark-8928 and squashes the following commits:

75267fe [Cheng Lian] Makes CatalystSchemaConverter sticking to 1.4.x- when handling LISTs in compatible mode
parent c056484c
No related branches found
No related tags found
No related merge requests found
...@@ -461,7 +461,8 @@ private[parquet] class CatalystSchemaConverter( ...@@ -461,7 +461,8 @@ private[parquet] class CatalystSchemaConverter(
field.name, field.name,
Types Types
.buildGroup(REPEATED) .buildGroup(REPEATED)
.addField(convertField(StructField("element", elementType, nullable))) // "array_element" is the name chosen by parquet-hive (1.7.0 and prior version)
.addField(convertField(StructField("array_element", elementType, nullable)))
.named(CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME)) .named(CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME))
// Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level // Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level
...@@ -474,7 +475,8 @@ private[parquet] class CatalystSchemaConverter( ...@@ -474,7 +475,8 @@ private[parquet] class CatalystSchemaConverter(
ConversionPatterns.listType( ConversionPatterns.listType(
repetition, repetition,
field.name, field.name,
convertField(StructField("element", elementType, nullable), REPEATED)) // "array" is the name chosen by parquet-avro (1.7.0 and prior version)
convertField(StructField("array", elementType, nullable), REPEATED))
// Spark 1.4.x and prior versions convert MapType into a 3-level group annotated by // Spark 1.4.x and prior versions convert MapType into a 3-level group annotated by
// MAP_KEY_VALUE. This is covered by `convertGroupField(field: GroupType): DataType`. // MAP_KEY_VALUE. This is covered by `convertGroupField(field: GroupType): DataType`.
......
...@@ -174,7 +174,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { ...@@ -174,7 +174,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
""" """
|message root { |message root {
| optional group _1 (LIST) { | optional group _1 (LIST) {
| repeated int32 element; | repeated int32 array;
| } | }
|} |}
""".stripMargin) """.stripMargin)
...@@ -198,7 +198,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { ...@@ -198,7 +198,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
|message root { |message root {
| optional group _1 (LIST) { | optional group _1 (LIST) {
| repeated group bag { | repeated group bag {
| optional int32 element; | optional int32 array_element;
| } | }
| } | }
|} |}
...@@ -267,7 +267,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { ...@@ -267,7 +267,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
| optional binary _1 (UTF8); | optional binary _1 (UTF8);
| optional group _2 (LIST) { | optional group _2 (LIST) {
| repeated group bag { | repeated group bag {
| optional group element { | optional group array_element {
| required int32 _1; | required int32 _1;
| required double _2; | required double _2;
| } | }
...@@ -616,7 +616,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { ...@@ -616,7 +616,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
"""message root { """message root {
| optional group f1 (LIST) { | optional group f1 (LIST) {
| repeated group bag { | repeated group bag {
| optional int32 element; | optional int32 array_element;
| } | }
| } | }
|} |}
...@@ -648,7 +648,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { ...@@ -648,7 +648,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
nullable = true))), nullable = true))),
"""message root { """message root {
| optional group f1 (LIST) { | optional group f1 (LIST) {
| repeated int32 element; | repeated int32 array;
| } | }
|} |}
""".stripMargin) """.stripMargin)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment