Skip to content
Snippets Groups Projects
Commit e388b39d authored by hyukjinkwon's avatar hyukjinkwon Committed by Cheng Lian
Browse files

[SPARK-11692][SQL] Support for Parquet logical types, JSON and BSON (embedded types)

Parquet supports some JSON and BSON datatypes. They are represented as binary for BSON and string (UTF-8) for JSON internally.

I searched a bit and found Apache drill also supports both in this way, [link](https://drill.apache.org/docs/parquet-format/).

Author: hyukjinkwon <gurwls223@gmail.com>
Author: Hyukjin Kwon <gurwls223@gmail.com>

Closes #9658 from HyukjinKwon/SPARK-11692.
parent 7f8eb3bf
No related branches found
No related tags found
No related merge requests found
......@@ -170,9 +170,10 @@ private[parquet] class CatalystSchemaConverter(
case BINARY =>
originalType match {
case UTF8 | ENUM => StringType
case UTF8 | ENUM | JSON => StringType
case null if assumeBinaryIsString => StringType
case null => BinaryType
case BSON => BinaryType
case DECIMAL => makeDecimalType()
case _ => illegalType()
}
......
......@@ -259,6 +259,31 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
}
}
test("SPARK-11692 Support for Parquet logical types, JSON and BSON (embedded types)") {
val parquetSchema = MessageTypeParser.parseMessageType(
"""message root {
| required binary a(JSON);
| required binary b(BSON);
|}
""".stripMargin)
withTempPath { location =>
val extraMetadata = Map.empty[String, String].asJava
val fileMetadata = new FileMetaData(parquetSchema, extraMetadata, "Spark")
val path = new Path(location.getCanonicalPath)
val footer = List(
new Footer(path, new ParquetMetadata(fileMetadata, Collections.emptyList()))
).asJava
ParquetFileWriter.writeMetadataFile(sparkContext.hadoopConfiguration, path, footer)
val jsonDataType = sqlContext.read.parquet(path.toString).schema(0).dataType
assert(jsonDataType === StringType)
val bsonDataType = sqlContext.read.parquet(path.toString).schema(1).dataType
assert(bsonDataType === BinaryType)
}
}
test("compression codec") {
def compressionCodecFor(path: String, codecName: String): String = {
val codecs = for {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment