From bd903ee89f1d1bc4daf63f1f07958cb86d667e1e Mon Sep 17 00:00:00 2001 From: Wenchen Fan <cloud0fan@outlook.com> Date: Fri, 17 Jul 2015 16:28:24 -0700 Subject: [PATCH] [SPARK-9117] [SQL] fix BooleanSimplification in case-insensitive Author: Wenchen Fan <cloud0fan@outlook.com> Closes #7452 from cloud-fan/boolean-simplify and squashes the following commits: 2a6e692 [Wenchen Fan] fix style d3cfd26 [Wenchen Fan] fix BooleanSimplification in case-insensitive --- .../sql/catalyst/optimizer/Optimizer.scala | 28 +++++----- .../BooleanSimplificationSuite.scala | 55 +++++++++---------- 2 files changed, 40 insertions(+), 43 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index d5beeec0ff..0f28a0d2c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -393,26 +393,26 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { // (a || b) && (a || c) => a || (b && c) case _ => // 1. Split left and right to get the disjunctive predicates, - // i.e. lhsSet = (a, b), rhsSet = (a, c) + // i.e. lhs = (a, b), rhs = (a, c) // 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a) // 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c) // 4. Apply the formula, get the optimized predicate: common || (ldiff && rdiff) - val lhsSet = splitDisjunctivePredicates(left).toSet - val rhsSet = splitDisjunctivePredicates(right).toSet - val common = lhsSet.intersect(rhsSet) + val lhs = splitDisjunctivePredicates(left) + val rhs = splitDisjunctivePredicates(right) + val common = lhs.filter(e => rhs.exists(e.semanticEquals(_))) if (common.isEmpty) { // No common factors, return the original predicate and } else { - val ldiff = lhsSet.diff(common) - val rdiff = rhsSet.diff(common) + val ldiff = lhs.filterNot(e => common.exists(e.semanticEquals(_))) + val rdiff = rhs.filterNot(e => common.exists(e.semanticEquals(_))) if (ldiff.isEmpty || rdiff.isEmpty) { // (a || b || c || ...) && (a || b) => (a || b) common.reduce(Or) } else { // (a || b || c || ...) && (a || b || d || ...) => // ((c || ...) && (d || ...)) || a || b - (common + And(ldiff.reduce(Or), rdiff.reduce(Or))).reduce(Or) + (common :+ And(ldiff.reduce(Or), rdiff.reduce(Or))).reduce(Or) } } } // end of And(left, right) @@ -431,26 +431,26 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { // (a && b) || (a && c) => a && (b || c) case _ => // 1. Split left and right to get the conjunctive predicates, - // i.e. lhsSet = (a, b), rhsSet = (a, c) + // i.e. lhs = (a, b), rhs = (a, c) // 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a) // 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c) // 4. Apply the formula, get the optimized predicate: common && (ldiff || rdiff) - val lhsSet = splitConjunctivePredicates(left).toSet - val rhsSet = splitConjunctivePredicates(right).toSet - val common = lhsSet.intersect(rhsSet) + val lhs = splitConjunctivePredicates(left) + val rhs = splitConjunctivePredicates(right) + val common = lhs.filter(e => rhs.exists(e.semanticEquals(_))) if (common.isEmpty) { // No common factors, return the original predicate or } else { - val ldiff = lhsSet.diff(common) - val rdiff = rhsSet.diff(common) + val ldiff = lhs.filterNot(e => common.exists(e.semanticEquals(_))) + val rdiff = rhs.filterNot(e => common.exists(e.semanticEquals(_))) if (ldiff.isEmpty || rdiff.isEmpty) { // (a && b) || (a && b && c && ...) => a && b common.reduce(And) } else { // (a && b && c && ...) || (a && b && d && ...) => // ((c && ...) || (d && ...)) && a && b - (common + Or(ldiff.reduce(And), rdiff.reduce(And))).reduce(And) + (common :+ Or(ldiff.reduce(And), rdiff.reduce(And))).reduce(And) } } } // end of Or(left, right) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala index 465a5e6914..d4916ea8d2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.optimizer -import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries +import org.apache.spark.sql.catalyst.analysis.{AnalysisSuite, EliminateSubQueries} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.PlanTest @@ -40,29 +40,11 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string) - // The `foldLeft` is required to handle cases like comparing `a && (b && c)` and `(a && b) && c` - def compareConditions(e1: Expression, e2: Expression): Boolean = (e1, e2) match { - case (lhs: And, rhs: And) => - val lhsSet = splitConjunctivePredicates(lhs).toSet - val rhsSet = splitConjunctivePredicates(rhs).toSet - lhsSet.foldLeft(rhsSet) { (set, e) => - set.find(compareConditions(_, e)).map(set - _).getOrElse(set) - }.isEmpty - - case (lhs: Or, rhs: Or) => - val lhsSet = splitDisjunctivePredicates(lhs).toSet - val rhsSet = splitDisjunctivePredicates(rhs).toSet - lhsSet.foldLeft(rhsSet) { (set, e) => - set.find(compareConditions(_, e)).map(set - _).getOrElse(set) - }.isEmpty - - case (l, r) => l == r - } - - def checkCondition(input: Expression, expected: Expression): Unit = { + private def checkCondition(input: Expression, expected: Expression): Unit = { val plan = testRelation.where(input).analyze - val actual = Optimize.execute(plan).expressions.head - compareConditions(actual, expected) + val actual = Optimize.execute(plan) + val correctAnswer = testRelation.where(expected).analyze + comparePlans(actual, correctAnswer) } test("a && a => a") { @@ -86,10 +68,8 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { ('a === 'b && 'c < 1 && 'a === 5) || ('a === 'b && 'b < 5 && 'a > 1) - val expected = - (((('b > 3) && ('c > 2)) || - (('c < 1) && ('a === 5))) || - (('b < 5) && ('a > 1))) && ('a === 'b) + val expected = 'a === 'b && ( + ('b > 3 && 'c > 2) || ('c < 1 && 'a === 5) || ('b < 5 && 'a > 1)) checkCondition(input, expected) } @@ -101,10 +81,27 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { checkCondition('a < 2 && ('a < 2 || 'a > 3 || 'b > 5) , 'a < 2) - checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), ('b > 3 && 'c > 5) || 'a < 2) + checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), 'a < 2 || ('b > 3 && 'c > 5)) checkCondition( ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5), - ('b > 3 && 'a > 3 && 'a < 5) || 'a === 'b) + ('a === 'b || 'b > 3 && 'a > 3 && 'a < 5)) + } + + private def caseInsensitiveAnalyse(plan: LogicalPlan) = + AnalysisSuite.caseInsensitiveAnalyzer.execute(plan) + + test("(a && b) || (a && c) => a && (b || c) when case insensitive") { + val plan = caseInsensitiveAnalyse(testRelation.where(('a > 2 && 'b > 3) || ('A > 2 && 'b < 5))) + val actual = Optimize.execute(plan) + val expected = caseInsensitiveAnalyse(testRelation.where('a > 2 && ('b > 3 || 'b < 5))) + comparePlans(actual, expected) + } + + test("(a || b) && (a || c) => a || (b && c) when case insensitive") { + val plan = caseInsensitiveAnalyse(testRelation.where(('a > 2 || 'b > 3) && ('A > 2 || 'b < 5))) + val actual = Optimize.execute(plan) + val expected = caseInsensitiveAnalyse(testRelation.where('a > 2 || ('b > 3 && 'b < 5))) + comparePlans(actual, expected) } } -- GitLab