From b526f70c16caa2062a77117808860e1d3662114c Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro <yamamuro@apache.org> Date: Fri, 12 May 2017 09:55:51 -0700 Subject: [PATCH] [SPARK-19951][SQL] Add string concatenate operator || to Spark SQL ## What changes were proposed in this pull request? This pr added code to support `||` for string concatenation. This string operation is supported in PostgreSQL and MySQL. ## How was this patch tested? Added tests in `SparkSqlParserSuite` Author: Takeshi Yamamuro <yamamuro@apache.org> Closes #17711 from maropu/SPARK-19951. --- .../spark/sql/catalyst/parser/SqlBase.g4 | 5 +- .../sql/catalyst/parser/AstBuilder.scala | 2 + .../resources/sql-tests/inputs/arithmetic.sql | 34 ---------- .../resources/sql-tests/inputs/operators.sql | 55 ++++++++++++++++ .../sql-tests/inputs/string-functions.sql | 3 + .../{arithmetic.sql.out => operators.sql.out} | 62 ++++++++++++++++++- .../results/string-functions.sql.out | 10 ++- .../sql/execution/SparkSqlParserSuite.scala | 15 ++++- 8 files changed, 146 insertions(+), 40 deletions(-) delete mode 100644 sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql create mode 100644 sql/core/src/test/resources/sql-tests/inputs/operators.sql rename sql/core/src/test/resources/sql-tests/results/{arithmetic.sql.out => operators.sql.out} (70%) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index ed5450b494..2b7c290adb 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -549,7 +549,7 @@ valueExpression : primaryExpression #valueExpressionDefault | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary - | left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary + | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary @@ -590,7 +590,7 @@ comparisonOperator ; arithmeticOperator - : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | HAT + : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT ; predicateOperator @@ -869,6 +869,7 @@ DIV: 'DIV'; TILDE: '~'; AMPERSAND: '&'; PIPE: '|'; +CONCAT_PIPE: '||'; HAT: '^'; PERCENTLIT: 'PERCENT'; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 4b11b6f8d2..0e974a02e2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1010,6 +1010,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging Add(left, right) case SqlBaseParser.MINUS => Subtract(left, right) + case SqlBaseParser.CONCAT_PIPE => + Concat(left :: right :: Nil) case SqlBaseParser.AMPERSAND => BitwiseAnd(left, right) case SqlBaseParser.HAT => diff --git a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql deleted file mode 100644 index f62b10ca00..0000000000 --- a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql +++ /dev/null @@ -1,34 +0,0 @@ - --- unary minus and plus -select -100; -select +230; -select -5.2; -select +6.8e0; -select -key, +key from testdata where key = 2; -select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1; -select -max(key), +max(key) from testdata; -select - (-10); -select + (-key) from testdata where key = 32; -select - (+max(key)) from testdata; -select - - 3; -select - + 20; -select + + 100; -select - - max(key) from testdata; -select + - key from testdata where key = 33; - --- div -select 5 / 2; -select 5 / 0; -select 5 / null; -select null / 5; -select 5 div 2; -select 5 div 0; -select 5 div null; -select null div 5; - --- other arithmetics -select 1 + 2; -select 1 - 2; -select 2 * 5; -select 5 % 3; -select pmod(-7, 3); diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql new file mode 100644 index 0000000000..6339d69ca6 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql @@ -0,0 +1,55 @@ + +-- unary minus and plus +select -100; +select +230; +select -5.2; +select +6.8e0; +select -key, +key from testdata where key = 2; +select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1; +select -max(key), +max(key) from testdata; +select - (-10); +select + (-key) from testdata where key = 32; +select - (+max(key)) from testdata; +select - - 3; +select - + 20; +select + + 100; +select - - max(key) from testdata; +select + - key from testdata where key = 33; + +-- div +select 5 / 2; +select 5 / 0; +select 5 / null; +select null / 5; +select 5 div 2; +select 5 div 0; +select 5 div null; +select null div 5; + +-- other arithmetics +select 1 + 2; +select 1 - 2; +select 2 * 5; +select 5 % 3; +select pmod(-7, 3); + +-- check operator precedence. +-- We follow Oracle operator precedence in the table below that lists the levels of precedence +-- among SQL operators from high to low: +------------------------------------------------------------------------------------------ +-- Operator Operation +------------------------------------------------------------------------------------------ +-- +, - identity, negation +-- *, / multiplication, division +-- +, -, || addition, subtraction, concatenation +-- =, !=, <, >, <=, >=, IS NULL, LIKE, BETWEEN, IN comparison +-- NOT exponentiation, logical negation +-- AND conjunction +-- OR disjunction +------------------------------------------------------------------------------------------ +explain select 'a' || 1 + 2; +explain select 1 - 2 || 'b'; +explain select 2 * 4 + 3 || 'b'; +explain select 3 + 1 || 'a' || 4 / 2; +explain select 1 == 1 OR 'a' || 'b' == 'ab'; +explain select 'a' || 'c' == 'ac' AND 2 == 3; diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index f21981ef7b..7005cafe35 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -1,3 +1,6 @@ -- Argument number exception select concat_ws(); select format_string(); + +-- A pipe operator for string concatenation +select 'a' || 'b' || 'c'; diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out similarity index 70% rename from sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out rename to sql/core/src/test/resources/sql-tests/results/operators.sql.out index ce42c016a7..e0236f4118 100644 --- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 28 +-- Number of queries: 34 -- !query 0 @@ -224,3 +224,63 @@ select pmod(-7, 3) struct<pmod(-7, 3):int> -- !query 27 output 2 + + +-- !query 28 +explain select 'a' || 1 + 2 +-- !query 28 schema +struct<plan:string> +-- !query 28 output +== Physical Plan == +*Project [null AS (CAST(concat(a, CAST(1 AS STRING)) AS DOUBLE) + CAST(2 AS DOUBLE))#x] ++- Scan OneRowRelation[] + + +-- !query 29 +explain select 1 - 2 || 'b' +-- !query 29 schema +struct<plan:string> +-- !query 29 output +== Physical Plan == +*Project [-1b AS concat(CAST((1 - 2) AS STRING), b)#x] ++- Scan OneRowRelation[] + + +-- !query 30 +explain select 2 * 4 + 3 || 'b' +-- !query 30 schema +struct<plan:string> +-- !query 30 output +== Physical Plan == +*Project [11b AS concat(CAST(((2 * 4) + 3) AS STRING), b)#x] ++- Scan OneRowRelation[] + + +-- !query 31 +explain select 3 + 1 || 'a' || 4 / 2 +-- !query 31 schema +struct<plan:string> +-- !query 31 output +== Physical Plan == +*Project [4a2.0 AS concat(concat(CAST((3 + 1) AS STRING), a), CAST((CAST(4 AS DOUBLE) / CAST(2 AS DOUBLE)) AS STRING))#x] ++- Scan OneRowRelation[] + + +-- !query 32 +explain select 1 == 1 OR 'a' || 'b' == 'ab' +-- !query 32 schema +struct<plan:string> +-- !query 32 output +== Physical Plan == +*Project [true AS ((1 = 1) OR (concat(a, b) = ab))#x] ++- Scan OneRowRelation[] + + +-- !query 33 +explain select 'a' || 'c' == 'ac' AND 2 == 3 +-- !query 33 schema +struct<plan:string> +-- !query 33 output +== Physical Plan == +*Project [false AS ((concat(a, c) = ac) AND (2 = 3))#x] ++- Scan OneRowRelation[] diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 6961e9b659..8ee075118e 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 2 +-- Number of queries: 3 -- !query 0 @@ -18,3 +18,11 @@ struct<> -- !query 1 output org.apache.spark.sql.AnalysisException requirement failed: format_string() should take at least 1 argument; line 1 pos 7 + + +-- !query 2 +select 'a' || 'b' || 'c' +-- !query 2 schema +struct<concat(concat(a, b), c):string> +-- !query 2 output +abc diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 908b955abb..b32fb90e10 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -19,9 +19,9 @@ package org.apache.spark.sql.execution import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedStar} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType} -import org.apache.spark.sql.catalyst.expressions.{Ascending, SortOrder} +import org.apache.spark.sql.catalyst.expressions.{Ascending, Concat, SortOrder} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, RepartitionByExpression, Sort} @@ -290,4 +290,15 @@ class SparkSqlParserSuite extends PlanTest { basePlan, numPartitions = newConf.numShufflePartitions))) } + + test("pipeline concatenation") { + val concat = Concat( + Concat(UnresolvedAttribute("a") :: UnresolvedAttribute("b") :: Nil) :: + UnresolvedAttribute("c") :: + Nil + ) + assertEqual( + "SELECT a || b || c FROM t", + Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t")))) + } } -- GitLab