Skip to content
Snippets Groups Projects
Commit c8c09064 authored by Liang-Chi Hsieh's avatar Liang-Chi Hsieh Committed by Wenchen Fan
Browse files

[SPARK-17821][SQL] Support And and Or in Expression Canonicalize

## What changes were proposed in this pull request?

Currently `Canonicalize` object doesn't support `And` and `Or`. So we can compare canonicalized form of predicates consistently. We should add the support.

## How was this patch tested?

Jenkins tests.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #15388 from viirya/canonicalize-and-or.
parent 3694ba48
No related branches found
No related tags found
No related merge requests found
......@@ -62,6 +62,13 @@ object Canonicalize extends {
case a: Add => orderCommutative(a, { case Add(l, r) => Seq(l, r) }).reduce(Add)
case m: Multiply => orderCommutative(m, { case Multiply(l, r) => Seq(l, r) }).reduce(Multiply)
case o: Or =>
orderCommutative(o, { case Or(l, r) if l.deterministic && r.deterministic => Seq(l, r) })
.reduce(Or)
case a: And =>
orderCommutative(a, { case And(l, r) if l.deterministic && r.deterministic => Seq(l, r)})
.reduce(And)
case EqualTo(l, r) if l.hashCode() > r.hashCode() => EqualTo(r, l)
case EqualNullSafe(l, r) if l.hashCode() > r.hashCode() => EqualNullSafe(r, l)
......
......@@ -80,6 +80,88 @@ class ExpressionSetSuite extends SparkFunSuite {
setTest(1, Not(aUpper >= 1), aUpper < 1, Not(Literal(1) <= aUpper), Literal(1) > aUpper)
setTest(1, Not(aUpper <= 1), aUpper > 1, Not(Literal(1) >= aUpper), Literal(1) < aUpper)
// Reordering AND/OR expressions
setTest(1, aUpper > bUpper && aUpper <= 10, aUpper <= 10 && aUpper > bUpper)
setTest(1,
aUpper > bUpper && bUpper > 100 && aUpper <= 10,
bUpper > 100 && aUpper <= 10 && aUpper > bUpper)
setTest(1, aUpper > bUpper || aUpper <= 10, aUpper <= 10 || aUpper > bUpper)
setTest(1,
aUpper > bUpper || bUpper > 100 || aUpper <= 10,
bUpper > 100 || aUpper <= 10 || aUpper > bUpper)
setTest(1,
(aUpper <= 10 && aUpper > bUpper) || bUpper > 100,
bUpper > 100 || (aUpper <= 10 && aUpper > bUpper))
setTest(1,
aUpper >= bUpper || (aUpper > 10 && bUpper < 10),
(bUpper < 10 && aUpper > 10) || aUpper >= bUpper)
// More complicated cases mixing AND/OR
// Three predicates in the following:
// (bUpper > 100)
// (aUpper < 100 && bUpper <= aUpper)
// (aUpper >= 10 && bUpper >= 50)
// They can be reordered and the sub-predicates contained in each of them can be reordered too.
setTest(1,
(bUpper > 100) || (aUpper < 100 && bUpper <= aUpper) || (aUpper >= 10 && bUpper >= 50),
(aUpper >= 10 && bUpper >= 50) || (bUpper > 100) || (aUpper < 100 && bUpper <= aUpper),
(bUpper >= 50 && aUpper >= 10) || (bUpper <= aUpper && aUpper < 100) || (bUpper > 100))
// Two predicates in the following:
// (bUpper > 100 && aUpper < 100 && bUpper <= aUpper)
// (aUpper >= 10 && bUpper >= 50)
setTest(1,
(bUpper > 100 && aUpper < 100 && bUpper <= aUpper) || (aUpper >= 10 && bUpper >= 50),
(aUpper >= 10 && bUpper >= 50) || (aUpper < 100 && bUpper > 100 && bUpper <= aUpper),
(bUpper >= 50 && aUpper >= 10) || (bUpper <= aUpper && aUpper < 100 && bUpper > 100))
// Three predicates in the following:
// (aUpper >= 10)
// (bUpper <= 10 && aUpper === bUpper && aUpper < 100)
// (bUpper >= 100)
setTest(1,
(aUpper >= 10) || (bUpper <= 10 && aUpper === bUpper && aUpper < 100) || (bUpper >= 100),
(aUpper === bUpper && aUpper < 100 && bUpper <= 10) || (bUpper >= 100) || (aUpper >= 10),
(aUpper < 100 && bUpper <= 10 && aUpper === bUpper) || (aUpper >= 10) || (bUpper >= 100),
((bUpper <= 10 && aUpper === bUpper) && aUpper < 100) || ((aUpper >= 10) || (bUpper >= 100)))
// Don't reorder non-deterministic expression in AND/OR.
setTest(2, Rand(1L) > aUpper && aUpper <= 10, aUpper <= 10 && Rand(1L) > aUpper)
setTest(2,
aUpper > bUpper && bUpper > 100 && Rand(1L) > aUpper,
bUpper > 100 && Rand(1L) > aUpper && aUpper > bUpper)
setTest(2, Rand(1L) > aUpper || aUpper <= 10, aUpper <= 10 || Rand(1L) > aUpper)
setTest(2,
aUpper > bUpper || aUpper <= Rand(1L) || aUpper <= 10,
aUpper <= Rand(1L) || aUpper <= 10 || aUpper > bUpper)
// Partial reorder case: we don't reorder non-deterministic expressions,
// but we can reorder sub-expressions in deterministic AND/OR expressions.
// There are two predicates:
// (aUpper > bUpper || bUpper > 100) => we can reorder sub-expressions in it.
// (aUpper === Rand(1L))
setTest(1,
(aUpper > bUpper || bUpper > 100) && aUpper === Rand(1L),
(bUpper > 100 || aUpper > bUpper) && aUpper === Rand(1L))
// There are three predicates:
// (Rand(1L) > aUpper)
// (aUpper <= Rand(1L) && aUpper > bUpper)
// (aUpper > 10 && bUpper > 10) => we can reorder sub-expressions in it.
setTest(1,
Rand(1L) > aUpper || (aUpper <= Rand(1L) && aUpper > bUpper) || (aUpper > 10 && bUpper > 10),
Rand(1L) > aUpper || (aUpper <= Rand(1L) && aUpper > bUpper) || (bUpper > 10 && aUpper > 10))
// Same predicates as above, but a negative case when we reorder non-deterministic
// expression in (aUpper <= Rand(1L) && aUpper > bUpper).
setTest(2,
Rand(1L) > aUpper || (aUpper <= Rand(1L) && aUpper > bUpper) || (aUpper > 10 && bUpper > 10),
Rand(1L) > aUpper || (aUpper > bUpper && aUpper <= Rand(1L)) || (aUpper > 10 && bUpper > 10))
test("add to / remove from set") {
val initialSet = ExpressionSet(aUpper + 1 :: Nil)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment