Skip to content
Snippets Groups Projects
Commit 26b415e1 authored by Liang-Chi Hsieh's avatar Liang-Chi Hsieh Committed by Michael Armbrust
Browse files

[SPARK-6647][SQL] Make trait StringComparison as BinaryPredicate and fix unit...

[SPARK-6647][SQL] Make trait StringComparison as BinaryPredicate and fix unit tests of string data source Filter

Now trait `StringComparison` is a `BinaryExpression`. In fact, it should be a `BinaryPredicate`.

By making `StringComparison` as `BinaryPredicate`, we can throw error when a `expressions.Predicate` can't translate to a data source `Filter` in function `selectFilters`.

Without this modification, because we will wrap a `Filter` outside the scanned results in `pruneFilterProjectRaw`, we can't detect about something is wrong in translating predicates to filters in `selectFilters`.

The unit test of #5285 demonstrates such problem. In that pr, even `expressions.Contains` is not properly translated to `sources.StringContains`, the filtering is still performed by the `Filter` and so the test passes.

Of course, by doing this modification, all `expressions.Predicate` classes need to have its data source `Filter` correspondingly.

There is a small bug in `FilteredScanSuite` for doing `StringEndsWith` filter. This pr also fixes it.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #5309 from viirya/translate_predicate and squashes the following commits:

b176385 [Liang-Chi Hsieh] Address comment.
275a493 [Liang-Chi Hsieh] More properly test for StringStartsWith, StringEndsWith and StringContains.
caf2347 [Liang-Chi Hsieh] Make trait StringComparison as BinaryPredicate and throw error when Predicate can't translate to data source Filter.
parent 14632b79
No related branches found
No related tags found
No related merge requests found
......@@ -156,12 +156,11 @@ case class Lower(child: Expression) extends UnaryExpression with CaseConversionE
/** A base trait for functions that compare two strings, returning a boolean. */
trait StringComparison {
self: BinaryExpression =>
self: BinaryPredicate =>
type EvaluatedType = Any
override type EvaluatedType = Any
override def nullable: Boolean = left.nullable || right.nullable
override def dataType: DataType = BooleanType
def compare(l: String, r: String): Boolean
......@@ -184,7 +183,7 @@ trait StringComparison {
* A function that returns true if the string `left` contains the string `right`.
*/
case class Contains(left: Expression, right: Expression)
extends BinaryExpression with StringComparison {
extends BinaryPredicate with StringComparison {
override def compare(l: String, r: String): Boolean = l.contains(r)
}
......@@ -192,7 +191,7 @@ case class Contains(left: Expression, right: Expression)
* A function that returns true if the string `left` starts with the string `right`.
*/
case class StartsWith(left: Expression, right: Expression)
extends BinaryExpression with StringComparison {
extends BinaryPredicate with StringComparison {
override def compare(l: String, r: String): Boolean = l.startsWith(r)
}
......@@ -200,7 +199,7 @@ case class StartsWith(left: Expression, right: Expression)
* A function that returns true if the string `left` ends with the string `right`.
*/
case class EndsWith(left: Expression, right: Expression)
extends BinaryExpression with StringComparison {
extends BinaryPredicate with StringComparison {
override def compare(l: String, r: String): Boolean = l.endsWith(r)
}
......
......@@ -45,7 +45,9 @@ case class SimpleFilteredScan(from: Int, to: Int)(@transient val sqlContext: SQL
val rowBuilders = requiredColumns.map {
case "a" => (i: Int) => Seq(i)
case "b" => (i: Int) => Seq(i * 2)
case "c" => (i: Int) => Seq((i - 1 + 'a').toChar.toString * 10)
case "c" => (i: Int) =>
val c = (i - 1 + 'a').toChar.toString
Seq(c * 5 + c.toUpperCase() * 5)
}
FiltersPushed.list = filters
......@@ -77,7 +79,7 @@ case class SimpleFilteredScan(from: Int, to: Int)(@transient val sqlContext: SQL
}
def eval(a: Int) = {
val c = (a - 1 + 'a').toChar.toString * 10
val c = (a - 1 + 'a').toChar.toString * 5 + (a - 1 + 'a').toChar.toString.toUpperCase() * 5
!filters.map(translateFilterOnA(_)(a)).contains(false) &&
!filters.map(translateFilterOnC(_)(c)).contains(false)
}
......@@ -110,7 +112,8 @@ class FilteredScanSuite extends DataSourceTest {
sqlTest(
"SELECT * FROM oneToTenFiltered",
(1 to 10).map(i => Row(i, i * 2, (i - 1 + 'a').toChar.toString * 10)).toSeq)
(1 to 10).map(i => Row(i, i * 2, (i - 1 + 'a').toChar.toString * 5
+ (i - 1 + 'a').toChar.toString.toUpperCase() * 5)).toSeq)
sqlTest(
"SELECT a, b FROM oneToTenFiltered",
......@@ -182,15 +185,15 @@ class FilteredScanSuite extends DataSourceTest {
sqlTest(
"SELECT a, b, c FROM oneToTenFiltered WHERE c like 'c%'",
Seq(Row(3, 3 * 2, "c" * 10)))
Seq(Row(3, 3 * 2, "c" * 5 + "C" * 5)))
sqlTest(
"SELECT a, b, c FROM oneToTenFiltered WHERE c like 'd%'",
Seq(Row(4, 4 * 2, "d" * 10)))
"SELECT a, b, c FROM oneToTenFiltered WHERE c like '%D'",
Seq(Row(4, 4 * 2, "d" * 5 + "D" * 5)))
sqlTest(
"SELECT a, b, c FROM oneToTenFiltered WHERE c like '%e%'",
Seq(Row(5, 5 * 2, "e" * 10)))
"SELECT a, b, c FROM oneToTenFiltered WHERE c like '%eE%'",
Seq(Row(5, 5 * 2, "e" * 5 + "E" * 5)))
testPushDown("SELECT * FROM oneToTenFiltered WHERE A = 1", 1)
testPushDown("SELECT a FROM oneToTenFiltered WHERE A = 1", 1)
......@@ -222,6 +225,15 @@ class FilteredScanSuite extends DataSourceTest {
testPushDown("SELECT * FROM oneToTenFiltered WHERE a < 3 OR a > 8", 4)
testPushDown("SELECT * FROM oneToTenFiltered WHERE NOT (a < 6)", 5)
testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like 'c%'", 1)
testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like 'C%'", 0)
testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like '%D'", 1)
testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like '%d'", 0)
testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like '%eE%'", 1)
testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like '%Ee%'", 0)
def testPushDown(sqlString: String, expectedCount: Int): Unit = {
test(s"PushDown Returns $expectedCount: $sqlString") {
val queryExecution = sql(sqlString).queryExecution
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment