Skip to content
Snippets Groups Projects
Commit 5acaf8c0 authored by hyukjinkwon's avatar hyukjinkwon Committed by Herman van Hovell
Browse files

[SPARK-19518][SQL] IGNORE NULLS in first / last in SQL

## What changes were proposed in this pull request?

This PR proposes to add `IGNORE NULLS` keyword in `first`/`last` in Spark's parser likewise http://docs.oracle.com/cd/B19306_01/server.102/b14200/functions057.htm.  This simply maps the keywords to existing `ignoreNullsExpr`.

**Before**

```scala
scala> sql("select first('a' IGNORE NULLS)").show()
```

```
org.apache.spark.sql.catalyst.parser.ParseException:
extraneous input 'NULLS' expecting {')', ','}(line 1, pos 24)

== SQL ==
select first('a' IGNORE NULLS)
------------------------^^^

  at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:210)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:112)
  at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:46)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:66)
  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:622)
  ... 48 elided
```

**After**

```scala
scala> sql("select first('a' IGNORE NULLS)").show()
```

```
+--------------+
|first(a, true)|
+--------------+
|             a|
+--------------+
```

## How was this patch tested?

Unit tests in `ExpressionParserSuite`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #17566 from HyukjinKwon/SPARK-19518.
parent 4f7d49b9
No related branches found
No related tags found
No related merge requests found
......@@ -552,6 +552,8 @@ primaryExpression
| CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
| CAST '(' expression AS dataType ')' #cast
| FIRST '(' expression (IGNORE NULLS)? ')' #first
| LAST '(' expression (IGNORE NULLS)? ')' #last
| constant #constantDefault
| ASTERISK #star
| qualifiedName '.' ASTERISK #star
......@@ -710,7 +712,7 @@ nonReserved
| VIEW | REPLACE
| IF
| NO | DATA
| START | TRANSACTION | COMMIT | ROLLBACK
| START | TRANSACTION | COMMIT | ROLLBACK | IGNORE
| SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION
| EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE
| CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT
......@@ -836,6 +838,7 @@ TRANSACTION: 'TRANSACTION';
COMMIT: 'COMMIT';
ROLLBACK: 'ROLLBACK';
MACRO: 'MACRO';
IGNORE: 'IGNORE';
IF: 'IF';
......
......@@ -31,6 +31,7 @@ import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
......@@ -1022,6 +1023,22 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
Cast(expression(ctx.expression), visitSparkDataType(ctx.dataType))
}
/**
* Create a [[First]] expression.
*/
override def visitFirst(ctx: FirstContext): Expression = withOrigin(ctx) {
val ignoreNullsExpr = ctx.IGNORE != null
First(expression(ctx.expression), Literal(ignoreNullsExpr)).toAggregateExpression()
}
/**
* Create a [[Last]] expression.
*/
override def visitLast(ctx: LastContext): Expression = withOrigin(ctx) {
val ignoreNullsExpr = ctx.IGNORE != null
Last(expression(ctx.expression), Literal(ignoreNullsExpr)).toAggregateExpression()
}
/**
* Create a (windowed) Function expression.
*/
......
......@@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp}
import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval
......@@ -549,4 +550,11 @@ class ExpressionParserSuite extends PlanTest {
val complexName2 = FunctionIdentifier("ba``r", Some("fo``o"))
assertEqual(complexName2.quotedString, UnresolvedAttribute("fo``o.ba``r"))
}
test("SPARK-19526 Support ignore nulls keywords for first and last") {
assertEqual("first(a ignore nulls)", First('a, Literal(true)).toAggregateExpression())
assertEqual("first(a)", First('a, Literal(false)).toAggregateExpression())
assertEqual("last(a ignore nulls)", Last('a, Literal(true)).toAggregateExpression())
assertEqual("last(a)", Last('a, Literal(false)).toAggregateExpression())
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment