Skip to content
Snippets Groups Projects
Commit a0b92f73 authored by Herman van Hovell's avatar Herman van Hovell
Browse files

[SPARK-19850][SQL] Allow the use of aliases in SQL function calls

## What changes were proposed in this pull request?
We currently cannot use aliases in SQL function calls. This is inconvenient when you try to create a struct. This SQL query for example `select struct(1, 2) st`, will create a struct with column names `col1` and `col2`. This is even more problematic when we want to append a field to an existing struct. For example if we want to a field to struct `st` we would issue the following SQL query `select struct(st.*, 1) as st from src`, the result will be struct `st` with an a column with a non descriptive name `col3` (if `st` itself has 2 fields).

This PR proposes to change this by allowing the use of aliased expression in function parameters. For example `select struct(1 as a, 2 as b) st`, will create a struct with columns `a` & `b`.

## How was this patch tested?
Added a test to `ExpressionParserSuite` and added a test file for `SQLQueryTestSuite`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #17245 from hvanhovell/SPARK-19850.
parent 0ee38a39
No related branches found
No related tags found
No related merge requests found
......@@ -506,10 +506,10 @@ expression
booleanExpression
: NOT booleanExpression #logicalNot
| EXISTS '(' query ')' #exists
| predicated #booleanDefault
| left=booleanExpression operator=AND right=booleanExpression #logicalBinary
| left=booleanExpression operator=OR right=booleanExpression #logicalBinary
| EXISTS '(' query ')' #exists
;
// workaround for:
......@@ -546,9 +546,10 @@ primaryExpression
| constant #constantDefault
| ASTERISK #star
| qualifiedName '.' ASTERISK #star
| '(' expression (',' expression)+ ')' #rowConstructor
| '(' namedExpression (',' namedExpression)+ ')' #rowConstructor
| '(' query ')' #subqueryExpression
| qualifiedName '(' (setQuantifier? expression (',' expression)*)? ')' (OVER windowSpec)? #functionCall
| qualifiedName '(' (setQuantifier? namedExpression (',' namedExpression)*)? ')'
(OVER windowSpec)? #functionCall
| value=primaryExpression '[' index=valueExpression ']' #subscript
| identifier #columnReference
| base=primaryExpression '.' fieldName=identifier #dereference
......
......@@ -1016,7 +1016,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
// Create the function call.
val name = ctx.qualifiedName.getText
val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
val arguments = ctx.expression().asScala.map(expression) match {
val arguments = ctx.namedExpression().asScala.map(expression) match {
case Seq(UnresolvedStar(None)) if name.toLowerCase == "count" && !isDistinct =>
// Transform COUNT(*) into COUNT(1).
Seq(Literal(1))
......@@ -1127,7 +1127,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
* Create a [[CreateStruct]] expression.
*/
override def visitRowConstructor(ctx: RowConstructorContext): Expression = withOrigin(ctx) {
CreateStruct(ctx.expression.asScala.map(expression))
CreateStruct(ctx.namedExpression().asScala.map(expression))
}
/**
......
......@@ -209,6 +209,7 @@ class ExpressionParserSuite extends PlanTest {
assertEqual("foo(distinct a, b)", 'foo.distinctFunction('a, 'b))
assertEqual("grouping(distinct a, b)", 'grouping.distinctFunction('a, 'b))
assertEqual("`select`(all a, b)", 'select.function('a, 'b))
assertEqual("foo(a as x, b as e)", 'foo.function('a as 'x, 'b as 'e))
}
test("window function expressions") {
......@@ -278,6 +279,7 @@ class ExpressionParserSuite extends PlanTest {
// Note that '(a)' will be interpreted as a nested expression.
assertEqual("(a, b)", CreateStruct(Seq('a, 'b)))
assertEqual("(a, b, c)", CreateStruct(Seq('a, 'b, 'c)))
assertEqual("(a as b, b as c)", CreateStruct(Seq('a as 'b, 'b as 'c)))
}
test("scalar sub-query") {
......
CREATE TEMPORARY VIEW tbl_x AS VALUES
(1, NAMED_STRUCT('C', 'gamma', 'D', 'delta')),
(2, NAMED_STRUCT('C', 'epsilon', 'D', 'eta')),
(3, NAMED_STRUCT('C', 'theta', 'D', 'iota'))
AS T(ID, ST);
-- Create a struct
SELECT STRUCT('alpha', 'beta') ST;
-- Create a struct with aliases
SELECT STRUCT('alpha' AS A, 'beta' AS B) ST;
-- Star expansion in a struct.
SELECT ID, STRUCT(ST.*) NST FROM tbl_x;
-- Append a column to a struct
SELECT ID, STRUCT(ST.*,CAST(ID AS STRING) AS E) NST FROM tbl_x;
-- Prepend a column to a struct
SELECT ID, STRUCT(CAST(ID AS STRING) AS AA, ST.*) NST FROM tbl_x;
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 6
-- !query 0
CREATE TEMPORARY VIEW tbl_x AS VALUES
(1, NAMED_STRUCT('C', 'gamma', 'D', 'delta')),
(2, NAMED_STRUCT('C', 'epsilon', 'D', 'eta')),
(3, NAMED_STRUCT('C', 'theta', 'D', 'iota'))
AS T(ID, ST)
-- !query 0 schema
struct<>
-- !query 0 output
-- !query 1
SELECT STRUCT('alpha', 'beta') ST
-- !query 1 schema
struct<ST:struct<col1:string,col2:string>>
-- !query 1 output
{"col1":"alpha","col2":"beta"}
-- !query 2
SELECT STRUCT('alpha' AS A, 'beta' AS B) ST
-- !query 2 schema
struct<ST:struct<A:string,B:string>>
-- !query 2 output
{"A":"alpha","B":"beta"}
-- !query 3
SELECT ID, STRUCT(ST.*) NST FROM tbl_x
-- !query 3 schema
struct<ID:int,NST:struct<C:string,D:string>>
-- !query 3 output
1 {"C":"gamma","D":"delta"}
2 {"C":"epsilon","D":"eta"}
3 {"C":"theta","D":"iota"}
-- !query 4
SELECT ID, STRUCT(ST.*,CAST(ID AS STRING) AS E) NST FROM tbl_x
-- !query 4 schema
struct<ID:int,NST:struct<C:string,D:string,E:string>>
-- !query 4 output
1 {"C":"gamma","D":"delta","E":"1"}
2 {"C":"epsilon","D":"eta","E":"2"}
3 {"C":"theta","D":"iota","E":"3"}
-- !query 5
SELECT ID, STRUCT(CAST(ID AS STRING) AS AA, ST.*) NST FROM tbl_x
-- !query 5 schema
struct<ID:int,NST:struct<AA:string,C:string,D:string>>
-- !query 5 output
1 {"AA":"1","C":"gamma","D":"delta"}
2 {"AA":"2","C":"epsilon","D":"eta"}
3 {"AA":"3","C":"theta","D":"iota"}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment