From bebe3f7b45f7b0a96f20d5af9b80633fd40cff06 Mon Sep 17 00:00:00 2001 From: Cheng Lian <lian@databricks.com> Date: Thu, 23 Jul 2015 17:49:33 -0700 Subject: [PATCH] [SPARK-9207] [SQL] Enables Parquet filter push-down by default PARQUET-136 and PARQUET-173 have been fixed in parquet-mr 1.7.0. It's time to enable filter push-down by default now. Author: Cheng Lian <lian@databricks.com> Closes #7612 from liancheng/spark-9207 and squashes the following commits: 77e6b5e [Cheng Lian] Enables Parquet filter push-down by default --- docs/sql-programming-guide.md | 9 ++------- .../src/main/scala/org/apache/spark/sql/SQLConf.scala | 8 ++------ 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 5838bc172f..95945eb7fc 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1332,13 +1332,8 @@ Configuration of Parquet can be done using the `setConf` method on `SQLContext` </tr> <tr> <td><code>spark.sql.parquet.filterPushdown</code></td> - <td>false</td> - <td> - Turn on Parquet filter pushdown optimization. This feature is turned off by default because of a known - bug in Parquet 1.6.0rc3 (<a href="https://issues.apache.org/jira/browse/PARQUET-136">PARQUET-136</a>). - However, if your table doesn't contain any nullable string or binary columns, it's still safe to turn - this feature on. - </td> + <td>true</td> + <td>Enables Parquet filter push-down optimization when set to true.</td> </tr> <tr> <td><code>spark.sql.hive.convertMetastoreParquet</code></td> diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala index 1474b170ba..2a641b9d64 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala @@ -273,12 +273,8 @@ private[spark] object SQLConf { "uncompressed, snappy, gzip, lzo.") val PARQUET_FILTER_PUSHDOWN_ENABLED = booleanConf("spark.sql.parquet.filterPushdown", - defaultValue = Some(false), - doc = "Turn on Parquet filter pushdown optimization. This feature is turned off by default " + - "because of a known bug in Parquet 1.6.0rc3 " + - "(PARQUET-136, https://issues.apache.org/jira/browse/PARQUET-136). However, " + - "if your table doesn't contain any nullable string or binary columns, it's still safe to " + - "turn this feature on.") + defaultValue = Some(true), + doc = "Enables Parquet filter push-down optimization when set to true.") val PARQUET_USE_DATA_SOURCE_API = booleanConf("spark.sql.parquet.useDataSourceApi", defaultValue = Some(true), -- GitLab