Skip to content
Snippets Groups Projects
Commit 64d8f37c authored by Dongjoon Hyun's avatar Dongjoon Hyun Committed by Herman van Hovell
Browse files

[SPARK-16726][SQL] Improve `Union/Intersect/Except` error messages on incompatible types

## What changes were proposed in this pull request?

Currently, `UNION` queries on incompatible types show misleading error messages, i.e., `unresolved operator Union`. We had better show a more correct message. This will help users in the situation of [SPARK-16704](https://issues.apache.org/jira/browse/SPARK-16704).

**Before**
```scala
scala> sql("select 1,2,3 union (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: unresolved operator 'Union;
scala> sql("select 1,2,3 intersect (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: unresolved operator 'Intersect;
scala> sql("select 1,2,3 except (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: unresolved operator 'Except;
```

**After**
```scala
scala> sql("select 1,2,3 union (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: Union can only be performed on tables with the compatible column types. ArrayType(IntegerType,false) <> IntegerType at the second column of the second table;
scala> sql("select 1,2,3 intersect (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: Intersect can only be performed on tables with the compatible column types. ArrayType(IntegerType,false) <> IntegerType at the second column of the second table;
scala> sql("select 1,2,3 except (select array(1),array(2),3)")
org.apache.spark.sql.AnalysisException: Except can only be performed on tables with the compatible column types. ArrayType(IntegerType,false) <> IntegerType at the first column of the second table;
```

## How was this patch tested?

Pass the Jenkins test with a new test case.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #14355 from dongjoon-hyun/SPARK-16726.
parent 579fbcf3
No related branches found
No related tags found
No related merge requests found
......@@ -253,19 +253,6 @@ trait CheckAnalysis extends PredicateHelper {
}
}
case s @ SetOperation(left, right) if left.output.length != right.output.length =>
failAnalysis(
s"${s.nodeName} can only be performed on tables with the same number of columns, " +
s"but the left table has ${left.output.length} columns and the right has " +
s"${right.output.length}")
case s: Union if s.children.exists(_.output.length != s.children.head.output.length) =>
val firstError = s.children.find(_.output.length != s.children.head.output.length).get
failAnalysis(
s"Unions can only be performed on tables with the same number of columns, " +
s"but one table has '${firstError.output.length}' columns and another table has " +
s"'${s.children.head.output.length}' columns")
case GlobalLimit(limitExpr, _) => checkLimitClause(limitExpr)
case LocalLimit(limitExpr, _) => checkLimitClause(limitExpr)
......@@ -280,6 +267,37 @@ trait CheckAnalysis extends PredicateHelper {
case p if p.expressions.exists(PredicateSubquery.hasPredicateSubquery) =>
failAnalysis(s"Predicate sub-queries can only be used in a Filter: $p")
case _: Union | _: SetOperation if operator.children.length > 1 =>
def dataTypes(plan: LogicalPlan): Seq[DataType] = plan.output.map(_.dataType)
def ordinalNumber(i: Int): String = i match {
case 0 => "first"
case 1 => "second"
case i => s"${i}th"
}
val ref = dataTypes(operator.children.head)
operator.children.tail.zipWithIndex.foreach { case (child, ti) =>
// Check the number of columns
if (child.output.length != ref.length) {
failAnalysis(
s"""
|${operator.nodeName} can only be performed on tables with the same number
|of columns, but the first table has ${ref.length} columns and
|the ${ordinalNumber(ti + 1)} table has ${child.output.length} columns
""".stripMargin.replace("\n", " ").trim())
}
// Check if the data types match.
dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, dt2), ci) =>
if (dt1 != dt2) {
failAnalysis(
s"""
|${operator.nodeName} can only be performed on tables with the compatible
|column types. $dt1 <> $dt2 at the ${ordinalNumber(ci)} column of
|the ${ordinalNumber(ti + 1)} table
""".stripMargin.replace("\n", " ").trim())
}
}
}
case _ => // Fallbacks to the following checks
}
......
......@@ -277,6 +277,21 @@ class AnalysisErrorSuite extends AnalysisTest {
"except" :: "number of columns" :: testRelation2.output.length.toString ::
testRelation.output.length.toString :: Nil)
errorTest(
"union with incompatible column types",
testRelation.union(nestedRelation),
"union" :: "the compatible column types" :: Nil)
errorTest(
"intersect with incompatible column types",
testRelation.intersect(nestedRelation),
"intersect" :: "the compatible column types" :: Nil)
errorTest(
"except with incompatible column types",
testRelation.except(nestedRelation),
"except" :: "the compatible column types" :: Nil)
errorTest(
"SPARK-9955: correct error message for aggregate",
// When parse SQL string, we will wrap aggregate expressions with UnresolvedAlias.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment