Skip to content
Snippets Groups Projects
Commit 3a9d66cf authored by Kousuke Saruta's avatar Kousuke Saruta Committed by Michael Armbrust
Browse files

[SPARK-4061][SQL] We cannot use EOL character in the operand of LIKE predicate.

We cannot use EOL character like \n or \r in the operand of LIKE predicate.
So following condition is never true.

    -- someStr is 'hoge\nfuga'
    where someStr LIKE 'hoge_fuga'

Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>

Closes #2908 from sarutak/spark-sql-like-match-modification and squashes the following commits:

d15798b [Kousuke Saruta] Remove test setting for thriftserver
f99a2f4 [Kousuke Saruta] Fixed LIKE predicate so that we can use EOL character as in a operand
parent ace41e8b
No related branches found
No related tags found
No related merge requests found
......@@ -102,31 +102,27 @@ case class Like(left: Expression, right: Expression)
// replace the _ with .{1} exactly match 1 time of any character
// replace the % with .*, match 0 or more times with any character
override def escape(v: String) = {
val sb = new StringBuilder()
var i = 0;
while (i < v.length) {
// Make a special case for "\\_" and "\\%"
val n = v.charAt(i);
if (n == '\\' && i + 1 < v.length && (v.charAt(i + 1) == '_' || v.charAt(i + 1) == '%')) {
sb.append(v.charAt(i + 1))
i += 1
} else {
if (n == '_') {
sb.append(".");
} else if (n == '%') {
sb.append(".*");
} else {
sb.append(Pattern.quote(Character.toString(n)));
}
}
i += 1
override def escape(v: String) =
if (!v.isEmpty) {
"(?s)" + (' ' +: v.init).zip(v).flatMap {
case (prev, '\\') => ""
case ('\\', c) =>
c match {
case '_' => "_"
case '%' => "%"
case _ => Pattern.quote("\\" + c)
}
case (prev, c) =>
c match {
case '_' => "."
case '%' => ".*"
case _ => Pattern.quote(Character.toString(c))
}
}.mkString
} else {
v
}
sb.toString()
}
override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches()
}
......
......@@ -191,6 +191,9 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation("abc" like "a%", true)
checkEvaluation("abc" like "b%", false)
checkEvaluation("abc" like "bc%", false)
checkEvaluation("a\nb" like "a_b", true)
checkEvaluation("ab" like "a%b", true)
checkEvaluation("a\nb" like "a%b", true)
}
test("LIKE Non-literal Regular Expression") {
......@@ -207,6 +210,9 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation("abc" like regEx, true, new GenericRow(Array[Any]("a%")))
checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("b%")))
checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("bc%")))
checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a_b")))
checkEvaluation("ab" like regEx, true, new GenericRow(Array[Any]("a%b")))
checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a%b")))
checkEvaluation(Literal(null, StringType) like regEx, null, new GenericRow(Array[Any]("bc%")))
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment