diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 0139b9e87ce8448adf408f0a2356673db361288c..d37f43888fd4f814d536d025480bcfa7b1ea458e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -74,10 +74,6 @@ object DefaultOptimizer extends Optimizer { object SamplePushDown extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { - // Push down filter into sample - case Filter(condition, s @ Sample(lb, up, replace, seed, child)) => - Sample(lb, up, replace, seed, - Filter(condition, child)) // Push down projection into sample case Project(projectList, s @ Sample(lb, up, replace, seed, child)) => Sample(lb, up, replace, seed, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 298c32290697a0b3229728e4c5f304781a4db560..f5ae3ae49b460e01e904f6ca12c7af50f397c5da 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1860,4 +1860,14 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { Row(1)) } } + + test("SPARK-11303: filter should not be pushed down into sample") { + val df = sqlContext.range(100) + List(true, false).foreach { withReplacement => + val sampled = df.sample(withReplacement, 0.1, 1) + val sampledOdd = sampled.filter("id % 2 != 0") + val sampledEven = sampled.filter("id % 2 = 0") + assert(sampled.count() == sampledOdd.count() + sampledEven.count()) + } + } }