From bebe3f7b45f7b0a96f20d5af9b80633fd40cff06 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Thu, 23 Jul 2015 17:49:33 -0700
Subject: [PATCH] [SPARK-9207] [SQL] Enables Parquet filter push-down by
 default

PARQUET-136 and PARQUET-173 have been fixed in parquet-mr 1.7.0. It's time to enable filter push-down by default now.

Author: Cheng Lian <lian@databricks.com>

Closes #7612 from liancheng/spark-9207 and squashes the following commits:

77e6b5e [Cheng Lian] Enables Parquet filter push-down by default
---
 docs/sql-programming-guide.md                            | 9 ++-------
 .../src/main/scala/org/apache/spark/sql/SQLConf.scala    | 8 ++------
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 5838bc172f..95945eb7fc 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1332,13 +1332,8 @@ Configuration of Parquet can be done using the `setConf` method on `SQLContext`
 </tr>
 <tr>
   <td><code>spark.sql.parquet.filterPushdown</code></td>
-  <td>false</td>
-  <td>
-    Turn on Parquet filter pushdown optimization. This feature is turned off by default because of a known
-    bug in Parquet 1.6.0rc3 (<a href="https://issues.apache.org/jira/browse/PARQUET-136">PARQUET-136</a>).
-    However, if your table doesn't contain any nullable string or binary columns, it's still safe to turn
-    this feature on.
-  </td>
+  <td>true</td>
+  <td>Enables Parquet filter push-down optimization when set to true.</td>
 </tr>
 <tr>
   <td><code>spark.sql.hive.convertMetastoreParquet</code></td>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 1474b170ba..2a641b9d64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -273,12 +273,8 @@ private[spark] object SQLConf {
       "uncompressed, snappy, gzip, lzo.")
 
   val PARQUET_FILTER_PUSHDOWN_ENABLED = booleanConf("spark.sql.parquet.filterPushdown",
-    defaultValue = Some(false),
-    doc = "Turn on Parquet filter pushdown optimization. This feature is turned off by default " +
-      "because of a known bug in Parquet 1.6.0rc3 " +
-      "(PARQUET-136, https://issues.apache.org/jira/browse/PARQUET-136). However, " +
-      "if your table doesn't contain any nullable string or binary columns, it's still safe to " +
-      "turn this feature on.")
+    defaultValue = Some(true),
+    doc = "Enables Parquet filter push-down optimization when set to true.")
 
   val PARQUET_USE_DATA_SOURCE_API = booleanConf("spark.sql.parquet.useDataSourceApi",
     defaultValue = Some(true),
-- 
GitLab