diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java index 57cc28e9f4e056c949f5f2ff255f28c3ec89c0e2..ee9a7a221bbde7ed768e79c5a987b0fb33672561 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java @@ -85,7 +85,7 @@ public class VectorizedPlainValuesReader extends ValuesReader implements Vectori for (int i = 0; i < total; i++) { // Bytes are stored as a 4-byte little endian int. Just read the first byte. // TODO: consider pushing this in ColumnVector by adding a readBytes with a stride. - c.putByte(rowId + i, buffer[offset]); + c.putByte(rowId + i, Platform.getByte(buffer, offset)); offset += 4; } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala index 5ce58e898ebecc7ef3fa6574bcda0d5ff1e7dacc..f2501d7ce35900be0aecd4432660ead43ee5d1a9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala @@ -175,4 +175,37 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest { } } } + + test(s"SPARK-13537: Fix readBytes in VectorizedPlainValuesReader") { + withTempPath { file => + val path = file.getCanonicalPath + + val schema = new StructType() + .add("index", IntegerType, nullable = false) + .add("col", ByteType, nullable = true) + + val data = Seq(Row(1, -33.toByte), Row(2, 0.toByte), Row(3, -55.toByte), Row(4, 56.toByte), + Row(5, 127.toByte), Row(6, -44.toByte), Row(7, 23.toByte), Row(8, -95.toByte), + Row(9, 127.toByte), Row(10, 13.toByte)) + + val rdd = sqlContext.sparkContext.parallelize(data) + val df = sqlContext.createDataFrame(rdd, schema).orderBy("index").coalesce(1) + + df.write + .mode("overwrite") + .format(dataSourceName) + .option("dataSchema", df.schema.json) + .save(path) + + val loadedDF = sqlContext + .read + .format(dataSourceName) + .option("dataSchema", df.schema.json) + .schema(df.schema) + .load(path) + .orderBy("index") + + checkAnswer(loadedDF, df) + } + } }