Skip to content
Snippets Groups Projects
Commit c2b50d69 authored by Josh Rosen's avatar Josh Rosen Committed by Michael Armbrust
Browse files

[SPARK-9292] Analysis should check that join conditions' data types are BooleanType

This patch adds an analysis check to ensure that join conditions' data types are BooleanType. This check is necessary in order to report proper errors for non-boolean DataFrame join conditions.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #7630 from JoshRosen/SPARK-9292 and squashes the following commits:

aec6c7b [Josh Rosen] Check condition type in resolved()
75a3ea6 [Josh Rosen] Fix SPARK-9292.
parent c8d71a41
No related branches found
No related tags found
No related merge requests found
......@@ -83,6 +83,11 @@ trait CheckAnalysis {
s"filter expression '${f.condition.prettyString}' " +
s"of type ${f.condition.dataType.simpleString} is not a boolean.")
case j @ Join(_, _, _, Some(condition)) if condition.dataType != BooleanType =>
failAnalysis(
s"join condition '${condition.prettyString}' " +
s"of type ${condition.dataType.simpleString} is not a boolean.")
case Aggregate(groupingExprs, aggregateExprs, child) =>
def checkValidAggregateExpression(expr: Expression): Unit = expr match {
case _: AggregateExpression => // OK
......
......@@ -128,7 +128,10 @@ case class Join(
// Joins are only resolved if they don't introduce ambiguous expression ids.
override lazy val resolved: Boolean = {
childrenResolved && expressions.forall(_.resolved) && selfJoinResolved
childrenResolved &&
expressions.forall(_.resolved) &&
selfJoinResolved &&
condition.forall(_.dataType == BooleanType)
}
}
......
......@@ -118,6 +118,11 @@ class AnalysisErrorSuite extends SparkFunSuite with BeforeAndAfter {
testRelation.where(Literal(1)),
"filter" :: "'1'" :: "not a boolean" :: Literal(1).dataType.simpleString :: Nil)
errorTest(
"non-boolean join conditions",
testRelation.join(testRelation, condition = Some(Literal(1))),
"condition" :: "'1'" :: "not a boolean" :: Literal(1).dataType.simpleString :: Nil)
errorTest(
"missing group by",
testRelation2.groupBy('a)('b),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment