Skip to content
Snippets Groups Projects
Commit 78071736 authored by gatorsmile's avatar gatorsmile Committed by Herman van Hovell
Browse files

[SPARK-14349][SQL] Issue Error Messages for Unsupported Operators/DML/DDL in SQL Context.

#### What changes were proposed in this pull request?

Currently, the weird error messages are issued if we use Hive Context-only operations in SQL Context.

For example,
- When calling `Drop Table` in SQL Context, we got the following message:
```
Expected exception org.apache.spark.sql.catalyst.parser.ParseException to be thrown, but java.lang.ClassCastException was thrown.
```

- When calling `Script Transform` in SQL Context, we got the message:
```
assertion failed: No plan for ScriptTransformation [key#9,value#10], cat, [tKey#155,tValue#156], null
+- LogicalRDD [key#9,value#10], MapPartitionsRDD[3] at beforeAll at BeforeAndAfterAll.scala:187
```

Updates:
Based on the investigation from hvanhovell , the root cause is `visitChildren`, which is the default implementation. It always returns the result of the last defined context child. After merging the code changes from hvanhovell , it works! Thank you hvanhovell !

#### How was this patch tested?
A few test cases are added.

Not sure if the same issue exist for the other operators/DDL/DML. hvanhovell

Author: gatorsmile <gatorsmile@gmail.com>
Author: xiaoli <lixiao1983@gmail.com>
Author: Herman van Hovell <hvanhovell@questtec.nl>
Author: Xiao Li <xiaoli@Xiaos-MacBook-Pro.local>

Closes #12134 from gatorsmile/hiveParserCommand.
parent 2715bc68
No related branches found
No related tags found
No related merge requests found
......@@ -468,15 +468,15 @@ booleanExpression
// https://github.com/antlr/antlr4/issues/780
// https://github.com/antlr/antlr4/issues/781
predicated
: valueExpression predicate[$valueExpression.ctx]?
: valueExpression predicate?
;
predicate[ParserRuleContext value]
: NOT? BETWEEN lower=valueExpression AND upper=valueExpression #between
| NOT? IN '(' expression (',' expression)* ')' #inList
| NOT? IN '(' query ')' #inSubquery
| NOT? like=(RLIKE | LIKE) pattern=valueExpression #like
| IS NOT? NULL #nullPredicate
predicate
: NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
| NOT? kind=IN '(' expression (',' expression)* ')'
| NOT? kind=IN '(' query ')'
| NOT? kind=(RLIKE | LIKE) pattern=valueExpression
| IS NOT? kind=NULL
;
valueExpression
......
......@@ -22,7 +22,7 @@ import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
import org.antlr.v4.runtime.{ParserRuleContext, Token}
import org.antlr.v4.runtime.tree.{ParseTree, TerminalNode}
import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
......@@ -46,6 +46,19 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
ctx.accept(this).asInstanceOf[T]
}
/**
* Override the default behavior for all visit methods. This will only return a non-null result
* when the context has only one child. This is done because there is no generic method to
* combine the results of the context children. In all other cases null is returned.
*/
override def visitChildren(node: RuleNode): AnyRef = {
if (node.getChildCount == 1) {
node.getChild(0).accept(this)
} else {
null
}
}
override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) {
visit(ctx.statement).asInstanceOf[LogicalPlan]
}
......@@ -351,7 +364,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
string(script),
attributes,
withFilter,
withScriptIOSchema(inRowFormat, recordWriter, outRowFormat, recordReader, schemaLess))
withScriptIOSchema(
ctx, inRowFormat, recordWriter, outRowFormat, recordReader, schemaLess))
case SqlBaseParser.SELECT =>
// Regular select
......@@ -398,11 +412,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
* Create a (Hive based) [[ScriptInputOutputSchema]].
*/
protected def withScriptIOSchema(
ctx: QuerySpecificationContext,
inRowFormat: RowFormatContext,
recordWriter: Token,
outRowFormat: RowFormatContext,
recordReader: Token,
schemaLess: Boolean): ScriptInputOutputSchema = null
schemaLess: Boolean): ScriptInputOutputSchema = {
throw new ParseException("Script Transform is not supported", ctx)
}
/**
* Create a logical plan for a given 'FROM' clause. Note that we support multiple (comma
......@@ -778,17 +795,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
trees.asScala.map(expression)
}
/**
* Invert a boolean expression if it has a valid NOT clause.
*/
private def invertIfNotDefined(expression: Expression, not: TerminalNode): Expression = {
if (not != null) {
Not(expression)
} else {
expression
}
}
/**
* Create a star (i.e. all) expression; this selects all elements (in the specified object).
* Both un-targeted (global) and targeted aliases are supported.
......@@ -909,57 +915,55 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
}
/**
* Create a BETWEEN expression. This tests if an expression lies with in the bounds set by two
* other expressions. The inverse can also be created.
*/
override def visitBetween(ctx: BetweenContext): Expression = withOrigin(ctx) {
val value = expression(ctx.value)
val between = And(
GreaterThanOrEqual(value, expression(ctx.lower)),
LessThanOrEqual(value, expression(ctx.upper)))
invertIfNotDefined(between, ctx.NOT)
}
/**
* Create an IN expression. This tests if the value of the left hand side expression is
* contained by the sequence of expressions on the right hand side.
* Create a predicated expression. A predicated expression is a normal expression with a
* predicate attached to it, for example:
* {{{
* a + 1 IS NULL
* }}}
*/
override def visitInList(ctx: InListContext): Expression = withOrigin(ctx) {
val in = In(expression(ctx.value), ctx.expression().asScala.map(expression))
invertIfNotDefined(in, ctx.NOT)
override def visitPredicated(ctx: PredicatedContext): Expression = withOrigin(ctx) {
val e = expression(ctx.valueExpression)
if (ctx.predicate != null) {
withPredicate(e, ctx.predicate)
} else {
e
}
}
/**
* Create an IN expression, where the the right hand side is a query. This is unsupported.
* Add a predicate to the given expression. Supported expressions are:
* - (NOT) BETWEEN
* - (NOT) IN
* - (NOT) LIKE
* - (NOT) RLIKE
* - IS (NOT) NULL.
*/
override def visitInSubquery(ctx: InSubqueryContext): Expression = {
throw new ParseException("IN with a Sub-query is currently not supported.", ctx)
}
private def withPredicate(e: Expression, ctx: PredicateContext): Expression = withOrigin(ctx) {
// Invert a predicate if it has a valid NOT clause.
def invertIfNotDefined(e: Expression): Expression = ctx.NOT match {
case null => e
case not => Not(e)
}
/**
* Create a (R)LIKE/REGEXP expression.
*/
override def visitLike(ctx: LikeContext): Expression = {
val left = expression(ctx.value)
val right = expression(ctx.pattern)
val like = ctx.like.getType match {
// Create the predicate.
ctx.kind.getType match {
case SqlBaseParser.BETWEEN =>
// BETWEEN is translated to lower <= e && e <= upper
invertIfNotDefined(And(
GreaterThanOrEqual(e, expression(ctx.lower)),
LessThanOrEqual(e, expression(ctx.upper))))
case SqlBaseParser.IN if ctx.query != null =>
throw new ParseException("IN with a Sub-query is currently not supported.", ctx)
case SqlBaseParser.IN =>
invertIfNotDefined(In(e, ctx.expression.asScala.map(expression)))
case SqlBaseParser.LIKE =>
Like(left, right)
invertIfNotDefined(Like(e, expression(ctx.pattern)))
case SqlBaseParser.RLIKE =>
RLike(left, right)
}
invertIfNotDefined(like, ctx.NOT)
}
/**
* Create an IS (NOT) NULL expression.
*/
override def visitNullPredicate(ctx: NullPredicateContext): Expression = withOrigin(ctx) {
val value = expression(ctx.value)
if (ctx.NOT != null) {
IsNotNull(value)
} else {
IsNull(value)
invertIfNotDefined(RLike(e, expression(ctx.pattern)))
case SqlBaseParser.NULL if ctx.NOT != null =>
IsNotNull(e)
case SqlBaseParser.NULL =>
IsNull(e)
}
}
......
......@@ -122,16 +122,6 @@ class PlanParserSuite extends PlanTest {
table("a").union(table("b")).as("c").select(star()))
}
test("transform query spec") {
val p = ScriptTransformation(Seq('a, 'b), "func", Seq.empty, table("e"), null)
assertEqual("select transform(a, b) using 'func' from e where f < 10",
p.copy(child = p.child.where('f < 10), output = Seq('key.string, 'value.string)))
assertEqual("map a, b using 'func' as c, d from e",
p.copy(output = Seq('c.string, 'd.string)))
assertEqual("reduce a, b using 'func' as (c: int, d decimal(10, 0)) from e",
p.copy(output = Seq('c.int, 'd.decimal(10, 0))))
}
test("multi select query") {
assertEqual(
"from a select * select * where s < 10",
......
......@@ -18,6 +18,7 @@
package org.apache.spark.sql.execution.command
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.execution.SparkSqlParser
import org.apache.spark.sql.execution.datasources.BucketSpec
......@@ -781,4 +782,26 @@ class DDLCommandSuite extends PlanTest {
comparePlans(parsed1, expected1)
comparePlans(parsed2, expected2)
}
test("commands only available in HiveContext") {
intercept[ParseException] {
parser.parsePlan("DROP TABLE D1.T1")
}
intercept[ParseException] {
parser.parsePlan("CREATE VIEW testView AS SELECT id FROM tab")
}
intercept[ParseException] {
parser.parsePlan("ALTER VIEW testView AS SELECT id FROM tab")
}
intercept[ParseException] {
parser.parsePlan(
"""
|CREATE EXTERNAL TABLE parquet_tab2(c1 INT, c2 STRING)
|TBLPROPERTIES('prop1Key '= "prop1Val", ' `prop2Key` '= "prop2Val")
""".stripMargin)
}
intercept[ParseException] {
parser.parsePlan("SELECT TRANSFORM (key, value) USING 'cat' AS (tKey, tValue) FROM testData")
}
}
}
......@@ -133,6 +133,18 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder {
}
}
/**
* Create a [[CatalogStorageFormat]]. This is part of the [[CreateTableAsSelect]] command.
*/
override def visitCreateFileFormat(
ctx: CreateFileFormatContext): CatalogStorageFormat = withOrigin(ctx) {
if (ctx.storageHandler == null) {
typedVisit[CatalogStorageFormat](ctx.fileFormat)
} else {
visitStorageHandler(ctx.storageHandler)
}
}
/**
* Create a [[CreateTableAsSelect]] command.
*/
......@@ -282,6 +294,7 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder {
* Create a [[HiveScriptIOSchema]].
*/
override protected def withScriptIOSchema(
ctx: QuerySpecificationContext,
inRowFormat: RowFormatContext,
recordWriter: Token,
outRowFormat: RowFormatContext,
......@@ -391,7 +404,8 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder {
/**
* Storage Handlers are currently not supported in the statements we support (CTAS).
*/
override def visitStorageHandler(ctx: StorageHandlerContext): AnyRef = withOrigin(ctx) {
override def visitStorageHandler(
ctx: StorageHandlerContext): CatalogStorageFormat = withOrigin(ctx) {
throw new ParseException("Storage Handlers are currently unsupported.", ctx)
}
......
......@@ -18,16 +18,19 @@
package org.apache.spark.sql.hive
import org.apache.hadoop.hive.serde.serdeConstants
import org.scalatest.BeforeAndAfterAll
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
import org.apache.spark.sql.catalyst.catalog.{CatalogColumn, CatalogTable, CatalogTableType}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans
import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan
import org.apache.spark.sql.catalyst.expressions.JsonTuple
import org.apache.spark.sql.catalyst.plans.logical.Generate
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical.{Generate, ScriptTransformation}
import org.apache.spark.sql.hive.execution.HiveSqlParser
class HiveQlSuite extends SparkFunSuite with BeforeAndAfterAll {
class HiveQlSuite extends PlanTest {
val parser = HiveSqlParser
private def extractTableDesc(sql: String): (CatalogTable, Boolean) = {
......@@ -201,6 +204,26 @@ class HiveQlSuite extends SparkFunSuite with BeforeAndAfterAll {
assert(plan.children.head.asInstanceOf[Generate].generator.isInstanceOf[JsonTuple])
}
test("transform query spec") {
val plan1 = parser.parsePlan("select transform(a, b) using 'func' from e where f < 10")
.asInstanceOf[ScriptTransformation].copy(ioschema = null)
val plan2 = parser.parsePlan("map a, b using 'func' as c, d from e")
.asInstanceOf[ScriptTransformation].copy(ioschema = null)
val plan3 = parser.parsePlan("reduce a, b using 'func' as (c: int, d decimal(10, 0)) from e")
.asInstanceOf[ScriptTransformation].copy(ioschema = null)
val p = ScriptTransformation(
Seq(UnresolvedAttribute("a"), UnresolvedAttribute("b")),
"func", Seq.empty, plans.table("e"), null)
comparePlans(plan1,
p.copy(child = p.child.where('f < 10), output = Seq('key.string, 'value.string)))
comparePlans(plan2,
p.copy(output = Seq('c.string, 'd.string)))
comparePlans(plan3,
p.copy(output = Seq('c.int, 'd.decimal(10, 0))))
}
test("use backticks in output of Script Transform") {
val plan = parser.parsePlan(
"""SELECT `t`.`thing1`
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment