diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 371d72ef5af086e24c56229d1efb74be42542fea..40c06ed6d458b3e4af545906e66594d80eb5de9e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -56,6 +56,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT Set(IsNotNull(l), IsNotNull(r)) case LessThanOrEqual(l, r) => Set(IsNotNull(l), IsNotNull(r)) + case Not(EqualTo(l, r)) => + Set(IsNotNull(l), IsNotNull(r)) case _ => Set.empty[Expression] }.foldLeft(Set.empty[Expression])(_ union _.toSet) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullFilteringSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullFilteringSuite.scala index 7e52d5ef6749c34dae903a514918d36d397d116b..142e4ae6e439950dca34008cb63dbee50b7aecc3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullFilteringSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullFilteringSuite.scala @@ -44,11 +44,28 @@ class NullFilteringSuite extends PlanTest { val x = testRelation.subquery('x) val y = testRelation.subquery('y) val originalQuery = x.join(y, - condition = Some("x.a".attr === "y.a".attr && "x.b".attr === 1 && "y.c".attr > 5)).analyze + condition = Some(("x.a".attr === "y.a".attr) && ("x.b".attr === 1) && ("y.c".attr > 5))) + .analyze val left = x.where(IsNotNull('a) && IsNotNull('b)) val right = y.where(IsNotNull('a) && IsNotNull('c)) val correctAnswer = left.join(right, - condition = Some("x.a".attr === "y.a".attr && "x.b".attr === 1 && "y.c".attr > 5)).analyze + condition = Some(("x.a".attr === "y.a".attr) && ("x.b".attr === 1) && ("y.c".attr > 5))) + .analyze + val optimized = Optimize.execute(originalQuery) + comparePlans(optimized, correctAnswer) + } + + test("single inner join: filter out nulls on either side on non equal keys") { + val x = testRelation.subquery('x) + val y = testRelation.subquery('y) + val originalQuery = x.join(y, + condition = Some(("x.a".attr =!= "y.a".attr) && ("x.b".attr === 1) && ("y.c".attr > 5))) + .analyze + val left = x.where(IsNotNull('a) && IsNotNull('b)) + val right = y.where(IsNotNull('a) && IsNotNull('c)) + val correctAnswer = left.join(right, + condition = Some(("x.a".attr =!= "y.a".attr) && ("x.b".attr === 1) && ("y.c".attr > 5))) + .analyze val optimized = Optimize.execute(originalQuery) comparePlans(optimized, correctAnswer) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala index 868ad934daf17ee4fbec7eb95880e84f6de21259..e70d3794abb2815dcff05690440c88e693d88a2a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala @@ -64,10 +64,10 @@ class ConstraintPropagationSuite extends SparkFunSuite { verifyConstraints(tr .where('a.attr > 10) .select('c.attr, 'a.attr) - .where('c.attr < 100) + .where('c.attr =!= 100) .analyze.constraints, ExpressionSet(Seq(resolveColumn(tr, "a") > 10, - resolveColumn(tr, "c") < 100, + resolveColumn(tr, "c") =!= 100, IsNotNull(resolveColumn(tr, "a")), IsNotNull(resolveColumn(tr, "c"))))) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala index 46c390c4369655e333925a3a04748e61702ff89a..d76d0c44f51a04f8e8dd68cda0a36f2b6762a76e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala @@ -212,8 +212,9 @@ class OrcFilterSuite extends QueryTest with OrcTest { ) checkFilterPredicate( '_1 =!= 1, - """leaf-0 = (EQUALS _1 1) - |expr = (not leaf-0)""".stripMargin.trim + """leaf-0 = (IS_NULL _1) + |leaf-1 = (EQUALS _1 1) + |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim ) checkFilterPredicate( !('_1 < 4),