Skip to content
Snippets Groups Projects
Commit d060da09 authored by Herman van Hovell's avatar Herman van Hovell Committed by Davies Liu
Browse files

[SPARK-14762] [SQL] TPCDS Q90 fails to parse

### What changes were proposed in this pull request?
TPCDS Q90 fails to parse because it uses a reserved keyword as an Identifier; `AT` was used as an alias for one of the subqueries. `AT` is not a reserved keyword and should have been registerd as a in the `nonReserved` rule.

In order to prevent this from happening again I have added tests for all keywords that are non-reserved in Hive. See the `nonReserved`, `sql11ReservedKeywordsUsedAsCastFunctionName` & `sql11ReservedKeywordsUsedAsIdentifier` rules in https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g.

### How was this patch tested?

Added tests to for all Hive non reserved keywords to `TableIdentifierParserSuite`.

cc davies

Author: Herman van Hovell <hvanhovell@questtec.nl>

Closes #12537 from hvanhovell/SPARK-14762.
parent 1a7fc74c
No related branches found
No related tags found
No related merge requests found
......@@ -650,11 +650,14 @@ nonReserved
| SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION
| EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE | FIRST
| AFTER | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT
| INPUTDRIVER | OUTPUTDRIVER | DBPROPERTIES | DFS | TRUNCATE | METADATA | REPLICATION | COMPUTE
| DBPROPERTIES | DFS | TRUNCATE | METADATA | REPLICATION | COMPUTE
| STATISTICS | ANALYZE | PARTITIONED | EXTERNAL | DEFINED | RECORDWRITER
| REVOKE | GRANT | LOCK | UNLOCK | MSCK | REPAIR | EXPORT | IMPORT | LOAD | VALUES | COMMENT | ROLE
| ROLES | COMPACTIONS | PRINCIPALS | TRANSACTIONS | INDEX | INDEXES | LOCKS | OPTION | LOCAL | INPATH
| ASC | DESC | LIMIT | RENAME | SETS
| AT | NULLS | OVERWRITE | ALL | ALTER | AS | BETWEEN | BY | CREATE | DELETE
| DESCRIBE | DROP | EXISTS | FALSE | FOR | GROUP | IN | INSERT | INTO | IS |LIKE
| NULL | ORDER | OUTER | TABLE | TRUE | WITH | RLIKE
;
SELECT: 'SELECT';
......@@ -850,8 +853,6 @@ SORTED: 'SORTED';
PURGE: 'PURGE';
INPUTFORMAT: 'INPUTFORMAT';
OUTPUTFORMAT: 'OUTPUTFORMAT';
INPUTDRIVER: 'INPUTDRIVER';
OUTPUTDRIVER: 'OUTPUTDRIVER';
DATABASE: 'DATABASE' | 'SCHEMA';
DATABASES: 'DATABASES' | 'SCHEMAS';
DFS: 'DFS';
......
......@@ -22,21 +22,65 @@ import org.apache.spark.sql.catalyst.TableIdentifier
class TableIdentifierParserSuite extends SparkFunSuite {
import CatalystSqlParser._
// Add "$elem$", "$value$" & "$key$"
val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before",
"bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection",
"columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "data",
"day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited",
"dependency", "desc", "directories", "directory", "disable", "distribute",
"enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first",
"format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index",
"indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last",
"limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin",
"materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls",
"offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned",
"partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly",
"rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace",
"replication", "restrict", "rewrite", "role", "roles", "schemas", "second",
"serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed",
"sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables",
"tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive",
"undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp",
"view", "while", "year", "work", "transaction", "write", "isolation", "level",
"snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint",
"binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp",
"cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external",
"false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in",
"insert", "int", "into", "is", "lateral", "like", "local", "none", "null",
"of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke",
"rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
"true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike",
"bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
"int", "smallint", "timestamp", "at")
val hiveNonReservedRegression = Seq("left", "right", "left", "right", "full", "inner", "semi",
"union", "except", "intersect", "schema", "database")
test("table identifier") {
// Regular names.
assert(TableIdentifier("q") === parseTableIdentifier("q"))
assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q"))
// Illegal names.
intercept[ParseException](parseTableIdentifier(""))
intercept[ParseException](parseTableIdentifier("d.q.g"))
Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier =>
intercept[ParseException](parseTableIdentifier(identifier))
}
}
test("table identifier - keywords") {
// SQL Keywords.
val keywords = Seq("select", "from", "where", "left", "right")
val keywords = Seq("select", "from", "where") ++ hiveNonReservedRegression
keywords.foreach { keyword =>
intercept[ParseException](parseTableIdentifier(keyword))
assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
}
}
test("table identifier - non reserved keywords") {
// Hive keywords are allowed.
hiveNonReservedKeyword.foreach { nonReserved =>
assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved))
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment