Skip to content
Snippets Groups Projects
Commit ae226283 authored by Wenchen Fan's avatar Wenchen Fan Committed by Cheng Lian
Browse files

[SQL][MINOR] use stricter type parameter to make it clear that parquet reader returns UnsafeRow

## What changes were proposed in this pull request?

a small code style change, it's better to make the type parameter more accurate.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #14458 from cloud-fan/parquet.
parent 38612737
No related branches found
No related tags found
No related merge requests found
...@@ -370,11 +370,11 @@ private[sql] class ParquetFileFormat ...@@ -370,11 +370,11 @@ private[sql] class ParquetFileFormat
logDebug(s"Falling back to parquet-mr") logDebug(s"Falling back to parquet-mr")
val reader = pushed match { val reader = pushed match {
case Some(filter) => case Some(filter) =>
new ParquetRecordReader[InternalRow]( new ParquetRecordReader[UnsafeRow](
new ParquetReadSupport, new ParquetReadSupport,
FilterCompat.get(filter, null)) FilterCompat.get(filter, null))
case _ => case _ =>
new ParquetRecordReader[InternalRow](new ParquetReadSupport) new ParquetRecordReader[UnsafeRow](new ParquetReadSupport)
} }
reader.initialize(split, hadoopAttemptContext) reader.initialize(split, hadoopAttemptContext)
reader reader
......
...@@ -29,12 +29,12 @@ import org.apache.parquet.schema._ ...@@ -29,12 +29,12 @@ import org.apache.parquet.schema._
import org.apache.parquet.schema.Type.Repetition import org.apache.parquet.schema.Type.Repetition
import org.apache.spark.internal.Logging import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.UnsafeRow
import org.apache.spark.sql.types._ import org.apache.spark.sql.types._
/** /**
* A Parquet [[ReadSupport]] implementation for reading Parquet records as Catalyst * A Parquet [[ReadSupport]] implementation for reading Parquet records as Catalyst
* [[InternalRow]]s. * [[UnsafeRow]]s.
* *
* The API interface of [[ReadSupport]] is a little bit over complicated because of historical * The API interface of [[ReadSupport]] is a little bit over complicated because of historical
* reasons. In older versions of parquet-mr (say 1.6.0rc3 and prior), [[ReadSupport]] need to be * reasons. In older versions of parquet-mr (say 1.6.0rc3 and prior), [[ReadSupport]] need to be
...@@ -48,7 +48,7 @@ import org.apache.spark.sql.types._ ...@@ -48,7 +48,7 @@ import org.apache.spark.sql.types._
* Due to this reason, we no longer rely on [[ReadContext]] to pass requested schema from [[init()]] * Due to this reason, we no longer rely on [[ReadContext]] to pass requested schema from [[init()]]
* to [[prepareForRead()]], but use a private `var` for simplicity. * to [[prepareForRead()]], but use a private `var` for simplicity.
*/ */
private[parquet] class ParquetReadSupport extends ReadSupport[InternalRow] with Logging { private[parquet] class ParquetReadSupport extends ReadSupport[UnsafeRow] with Logging {
private var catalystRequestedSchema: StructType = _ private var catalystRequestedSchema: StructType = _
/** /**
...@@ -72,13 +72,13 @@ private[parquet] class ParquetReadSupport extends ReadSupport[InternalRow] with ...@@ -72,13 +72,13 @@ private[parquet] class ParquetReadSupport extends ReadSupport[InternalRow] with
/** /**
* Called on executor side after [[init()]], before instantiating actual Parquet record readers. * Called on executor side after [[init()]], before instantiating actual Parquet record readers.
* Responsible for instantiating [[RecordMaterializer]], which is used for converting Parquet * Responsible for instantiating [[RecordMaterializer]], which is used for converting Parquet
* records to Catalyst [[InternalRow]]s. * records to Catalyst [[UnsafeRow]]s.
*/ */
override def prepareForRead( override def prepareForRead(
conf: Configuration, conf: Configuration,
keyValueMetaData: JMap[String, String], keyValueMetaData: JMap[String, String],
fileSchema: MessageType, fileSchema: MessageType,
readContext: ReadContext): RecordMaterializer[InternalRow] = { readContext: ReadContext): RecordMaterializer[UnsafeRow] = {
log.debug(s"Preparing for read Parquet file with message type: $fileSchema") log.debug(s"Preparing for read Parquet file with message type: $fileSchema")
val parquetRequestedSchema = readContext.getRequestedSchema val parquetRequestedSchema = readContext.getRequestedSchema
......
...@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet ...@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet
import org.apache.parquet.io.api.{GroupConverter, RecordMaterializer} import org.apache.parquet.io.api.{GroupConverter, RecordMaterializer}
import org.apache.parquet.schema.MessageType import org.apache.parquet.schema.MessageType
import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.UnsafeRow
import org.apache.spark.sql.types.StructType import org.apache.spark.sql.types.StructType
/** /**
...@@ -32,12 +32,12 @@ import org.apache.spark.sql.types.StructType ...@@ -32,12 +32,12 @@ import org.apache.spark.sql.types.StructType
*/ */
private[parquet] class ParquetRecordMaterializer( private[parquet] class ParquetRecordMaterializer(
parquetSchema: MessageType, catalystSchema: StructType, schemaConverter: ParquetSchemaConverter) parquetSchema: MessageType, catalystSchema: StructType, schemaConverter: ParquetSchemaConverter)
extends RecordMaterializer[InternalRow] { extends RecordMaterializer[UnsafeRow] {
private val rootConverter = private val rootConverter =
new ParquetRowConverter(schemaConverter, parquetSchema, catalystSchema, NoopUpdater) new ParquetRowConverter(schemaConverter, parquetSchema, catalystSchema, NoopUpdater)
override def getCurrentRecord: InternalRow = rootConverter.currentRecord override def getCurrentRecord: UnsafeRow = rootConverter.currentRecord
override def getRootConverter: GroupConverter = rootConverter override def getRootConverter: GroupConverter = rootConverter
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment