Skip to content
Snippets Groups Projects
Commit 047ccc8c authored by Tarek Auel's avatar Tarek Auel Committed by Reynold Xin
Browse files

[SPARK-9178][SQL] Add an empty string constant to UTF8String

Jira: https://issues.apache.org/jira/browse/SPARK-9178

In order to avoid calls of `UTF8String.fromString("")` this pr adds an `EMPTY_STRING` constant to `UTF8String`. An `UTF8String` is immutable, so we can use a constant, isn't it?

I searched for current usage of `UTF8String.fromString("")` with
`grep -R  "UTF8String.fromString(\"\")" .`

Author: Tarek Auel <tarek.auel@googlemail.com>

Closes #7509 from tarekauel/SPARK-9178 and squashes the following commits:

8d6c405 [Tarek Auel] [SPARK-9178] revert intellij indents
3627b80 [Tarek Auel] [SPARK-9178] revert concat tests changes
3f5fbf5 [Tarek Auel] [SPARK-9178] rebase and add final to UTF8String.EMPTY_UTF8
47cda68 [Tarek Auel] Merge branch 'master' into SPARK-9178
4a37344 [Tarek Auel] [SPARK-9178] changed name to EMPTY_UTF8, added tests
748b87a [Tarek Auel] [SPARK-9178] Add empty string constant to UTF8String
parent 66bb8003
No related branches found
No related tags found
No related merge requests found
......@@ -50,6 +50,8 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
5, 5, 5, 5,
6, 6, 6, 6};
public static final UTF8String EMPTY_UTF8 = UTF8String.fromString("");
/**
* Creates an UTF8String from byte array, which should be encoded in UTF-8.
*
......
......@@ -54,6 +54,14 @@ public class UTF8StringSuite {
checkBasic("大 千 世 界", 7);
}
@Test
public void emptyStringTest() {
assertEquals(fromString(""), EMPTY_UTF8);
assertEquals(fromBytes(new byte[0]), EMPTY_UTF8);
assertEquals(0, EMPTY_UTF8.numChars());
assertEquals(0, EMPTY_UTF8.numBytes());
}
@Test
public void compareTo() {
assertTrue(fromString("abc").compareTo(fromString("ABC")) > 0);
......@@ -88,9 +96,9 @@ public class UTF8StringSuite {
@Test
public void concatTest() {
assertEquals(fromString(""), concat());
assertEquals(EMPTY_UTF8, concat());
assertEquals(null, concat((UTF8String) null));
assertEquals(fromString(""), concat(fromString("")));
assertEquals(EMPTY_UTF8, concat(EMPTY_UTF8));
assertEquals(fromString("ab"), concat(fromString("ab")));
assertEquals(fromString("ab"), concat(fromString("a"), fromString("b")));
assertEquals(fromString("abc"), concat(fromString("a"), fromString("b"), fromString("c")));
......@@ -109,8 +117,8 @@ public class UTF8StringSuite {
// If separator is null, concatWs should skip all null inputs and never return null.
UTF8String sep = fromString("哈哈");
assertEquals(
fromString(""),
concatWs(sep, fromString("")));
EMPTY_UTF8,
concatWs(sep, EMPTY_UTF8));
assertEquals(
fromString("ab"),
concatWs(sep, fromString("ab")));
......@@ -127,7 +135,7 @@ public class UTF8StringSuite {
fromString("a"),
concatWs(sep, fromString("a"), null, null));
assertEquals(
fromString(""),
EMPTY_UTF8,
concatWs(sep, null, null, null));
assertEquals(
fromString("数据哈哈砖头"),
......@@ -136,7 +144,7 @@ public class UTF8StringSuite {
@Test
public void contains() {
assertTrue(fromString("").contains(fromString("")));
assertTrue(EMPTY_UTF8.contains(EMPTY_UTF8));
assertTrue(fromString("hello").contains(fromString("ello")));
assertFalse(fromString("hello").contains(fromString("vello")));
assertFalse(fromString("hello").contains(fromString("hellooo")));
......@@ -147,7 +155,7 @@ public class UTF8StringSuite {
@Test
public void startsWith() {
assertTrue(fromString("").startsWith(fromString("")));
assertTrue(EMPTY_UTF8.startsWith(EMPTY_UTF8));
assertTrue(fromString("hello").startsWith(fromString("hell")));
assertFalse(fromString("hello").startsWith(fromString("ell")));
assertFalse(fromString("hello").startsWith(fromString("hellooo")));
......@@ -158,7 +166,7 @@ public class UTF8StringSuite {
@Test
public void endsWith() {
assertTrue(fromString("").endsWith(fromString("")));
assertTrue(EMPTY_UTF8.endsWith(EMPTY_UTF8));
assertTrue(fromString("hello").endsWith(fromString("ello")));
assertFalse(fromString("hello").endsWith(fromString("ellov")));
assertFalse(fromString("hello").endsWith(fromString("hhhello")));
......@@ -169,7 +177,7 @@ public class UTF8StringSuite {
@Test
public void substring() {
assertEquals(fromString(""), fromString("hello").substring(0, 0));
assertEquals(EMPTY_UTF8, fromString("hello").substring(0, 0));
assertEquals(fromString("el"), fromString("hello").substring(1, 3));
assertEquals(fromString("数"), fromString("数据砖头").substring(0, 1));
assertEquals(fromString("据砖"), fromString("数据砖头").substring(1, 3));
......@@ -183,9 +191,9 @@ public class UTF8StringSuite {
assertEquals(fromString("hello "), fromString(" hello ").trimLeft());
assertEquals(fromString(" hello"), fromString(" hello ").trimRight());
assertEquals(fromString(""), fromString(" ").trim());
assertEquals(fromString(""), fromString(" ").trimLeft());
assertEquals(fromString(""), fromString(" ").trimRight());
assertEquals(EMPTY_UTF8, fromString(" ").trim());
assertEquals(EMPTY_UTF8, fromString(" ").trimLeft());
assertEquals(EMPTY_UTF8, fromString(" ").trimRight());
assertEquals(fromString("数据砖头"), fromString(" 数据砖头 ").trim());
assertEquals(fromString("数据砖头 "), fromString(" 数据砖头 ").trimLeft());
......@@ -198,9 +206,9 @@ public class UTF8StringSuite {
@Test
public void indexOf() {
assertEquals(0, fromString("").indexOf(fromString(""), 0));
assertEquals(-1, fromString("").indexOf(fromString("l"), 0));
assertEquals(0, fromString("hello").indexOf(fromString(""), 0));
assertEquals(0, EMPTY_UTF8.indexOf(EMPTY_UTF8, 0));
assertEquals(-1, EMPTY_UTF8.indexOf(fromString("l"), 0));
assertEquals(0, fromString("hello").indexOf(EMPTY_UTF8, 0));
assertEquals(2, fromString("hello").indexOf(fromString("l"), 0));
assertEquals(3, fromString("hello").indexOf(fromString("l"), 3));
assertEquals(-1, fromString("hello").indexOf(fromString("a"), 0));
......@@ -215,7 +223,7 @@ public class UTF8StringSuite {
@Test
public void reverse() {
assertEquals(fromString("olleh"), fromString("hello").reverse());
assertEquals(fromString(""), fromString("").reverse());
assertEquals(EMPTY_UTF8, EMPTY_UTF8.reverse());
assertEquals(fromString("者行孙"), fromString("孙行者").reverse());
assertEquals(fromString("者行孙 olleh"), fromString("hello 孙行者").reverse());
}
......@@ -224,7 +232,7 @@ public class UTF8StringSuite {
public void repeat() {
assertEquals(fromString("数d数d数d数d数d"), fromString("数d").repeat(5));
assertEquals(fromString("数d"), fromString("数d").repeat(1));
assertEquals(fromString(""), fromString("数d").repeat(-1));
assertEquals(EMPTY_UTF8, fromString("数d").repeat(-1));
}
@Test
......@@ -234,14 +242,14 @@ public class UTF8StringSuite {
assertEquals(fromString("?hello"), fromString("hello").lpad(6, fromString("????")));
assertEquals(fromString("???????hello"), fromString("hello").lpad(12, fromString("????")));
assertEquals(fromString("?????hello"), fromString("hello").lpad(10, fromString("?????")));
assertEquals(fromString("???????"), fromString("").lpad(7, fromString("?????")));
assertEquals(fromString("???????"), EMPTY_UTF8.lpad(7, fromString("?????")));
assertEquals(fromString("hel"), fromString("hello").rpad(3, fromString("????")));
assertEquals(fromString("hello"), fromString("hello").rpad(5, fromString("????")));
assertEquals(fromString("hello?"), fromString("hello").rpad(6, fromString("????")));
assertEquals(fromString("hello???????"), fromString("hello").rpad(12, fromString("????")));
assertEquals(fromString("hello?????"), fromString("hello").rpad(10, fromString("?????")));
assertEquals(fromString("???????"), fromString("").rpad(7, fromString("?????")));
assertEquals(fromString("???????"), EMPTY_UTF8.rpad(7, fromString("?????")));
assertEquals(fromString("数据砖"), fromString("数据砖头").lpad(3, fromString("????")));
......@@ -265,26 +273,16 @@ public class UTF8StringSuite {
@Test
public void levenshteinDistance() {
assertEquals(
UTF8String.fromString("").levenshteinDistance(UTF8String.fromString("")), 0);
assertEquals(
UTF8String.fromString("").levenshteinDistance(UTF8String.fromString("a")), 1);
assertEquals(
UTF8String.fromString("aaapppp").levenshteinDistance(UTF8String.fromString("")), 7);
assertEquals(
UTF8String.fromString("frog").levenshteinDistance(UTF8String.fromString("fog")), 1);
assertEquals(
UTF8String.fromString("fly").levenshteinDistance(UTF8String.fromString("ant")),3);
assertEquals(
UTF8String.fromString("elephant").levenshteinDistance(UTF8String.fromString("hippo")), 7);
assertEquals(
UTF8String.fromString("hippo").levenshteinDistance(UTF8String.fromString("elephant")), 7);
assertEquals(
UTF8String.fromString("hippo").levenshteinDistance(UTF8String.fromString("zzzzzzzz")), 8);
assertEquals(
UTF8String.fromString("hello").levenshteinDistance(UTF8String.fromString("hallo")),1);
assertEquals(
UTF8String.fromString("世界千世").levenshteinDistance(UTF8String.fromString("千a世b")),4);
assertEquals(EMPTY_UTF8.levenshteinDistance(EMPTY_UTF8), 0);
assertEquals(EMPTY_UTF8.levenshteinDistance(fromString("a")), 1);
assertEquals(fromString("aaapppp").levenshteinDistance(EMPTY_UTF8), 7);
assertEquals(fromString("frog").levenshteinDistance(fromString("fog")), 1);
assertEquals(fromString("fly").levenshteinDistance(fromString("ant")),3);
assertEquals(fromString("elephant").levenshteinDistance(fromString("hippo")), 7);
assertEquals(fromString("hippo").levenshteinDistance(fromString("elephant")), 7);
assertEquals(fromString("hippo").levenshteinDistance(fromString("zzzzzzzz")), 8);
assertEquals(fromString("hello").levenshteinDistance(fromString("hallo")),1);
assertEquals(fromString("世界千世").levenshteinDistance(fromString("千a世b")),4);
}
@Test
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment