From 078f3fbda860e2f5de34153c55dfc3fecb4256e9 Mon Sep 17 00:00:00 2001 From: Chia-Yung Su <chiayung@appier.com> Date: Thu, 14 Aug 2014 10:43:08 -0700 Subject: [PATCH] [SPARK-3011][SQL] _temporary directory should be filtered out by sqlContext.parquetFile Author: Chia-Yung Su <chiayung@appier.com> Closes #1924 from joesu/bugfix-spark3011 and squashes the following commits: c7e44f2 [Chia-Yung Su] match syntax f8fc32a [Chia-Yung Su] filter out tmp dir --- .../main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala index 2867dc0a8b..37091bcf73 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala @@ -375,7 +375,8 @@ private[parquet] object ParquetTypesConverter extends Logging { val children = fs.listStatus(path).filterNot { status => val name = status.getPath.getName - name(0) == '.' || name == FileOutputCommitter.SUCCEEDED_FILE_NAME + name(0) == '.' || name == FileOutputCommitter.SUCCEEDED_FILE_NAME || + name == FileOutputCommitter.TEMP_DIR_NAME } // NOTE (lian): Parquet "_metadata" file can be very slow if the file consists of lots of row -- GitLab