Skip to content
Snippets Groups Projects
Commit ee214077 authored by Davies Liu's avatar Davies Liu Committed by Reynold Xin
Browse files

[SPARK-11864][SQL] Improve performance of max/min

This PR has the following optimization:

1) The greatest/least already does the null-check, so the `If` and `IsNull` are not necessary.

2) In greatest/least, it should initialize the result using the first child (removing one block).

3) For primitive types, the generated greater expression is too complicated (`a > b ? 1 : (a < b) ? -1 : 0) > 0`), should be as simple as `a > b`

Combine these optimization, this could improve the performance of `ss_max` query by 30%.

Author: Davies Liu <davies@databricks.com>

Closes #9846 from davies/improve_max.
parent b2cecb80
No related branches found
No related tags found
No related merge requests found
......@@ -46,13 +46,12 @@ case class Max(child: Expression) extends DeclarativeAggregate {
)
override lazy val updateExpressions: Seq[Expression] = Seq(
/* max = */ If(IsNull(child), max, If(IsNull(max), child, Greatest(Seq(max, child))))
/* max = */ Greatest(Seq(max, child))
)
override lazy val mergeExpressions: Seq[Expression] = {
val greatest = Greatest(Seq(max.left, max.right))
Seq(
/* max = */ If(IsNull(max.right), max.left, If(IsNull(max.left), max.right, greatest))
/* max = */ Greatest(Seq(max.left, max.right))
)
}
......
......@@ -47,13 +47,12 @@ case class Min(child: Expression) extends DeclarativeAggregate {
)
override lazy val updateExpressions: Seq[Expression] = Seq(
/* min = */ If(IsNull(child), min, If(IsNull(min), child, Least(Seq(min, child))))
/* min = */ Least(Seq(min, child))
)
override lazy val mergeExpressions: Seq[Expression] = {
val least = Least(Seq(min.left, min.right))
Seq(
/* min = */ If(IsNull(min.right), min.left, If(IsNull(min.left), min.right, least))
/* min = */ Least(Seq(min.left, min.right))
)
}
......
......@@ -329,6 +329,18 @@ class CodeGenContext {
throw new IllegalArgumentException("cannot generate compare code for un-comparable type")
}
/**
* Generates code for greater of two expressions.
*
* @param dataType data type of the expressions
* @param c1 name of the variable of expression 1's output
* @param c2 name of the variable of expression 2's output
*/
def genGreater(dataType: DataType, c1: String, c2: String): String = javaType(dataType) match {
case JAVA_BYTE | JAVA_SHORT | JAVA_INT | JAVA_LONG => s"$c1 > $c2"
case _ => s"(${genComp(dataType, c1, c2)}) > 0"
}
/**
* List of java data types that have special accessors and setters in [[InternalRow]].
*/
......
......@@ -348,19 +348,22 @@ case class Least(children: Seq[Expression]) extends Expression {
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
val evalChildren = children.map(_.gen(ctx))
def updateEval(i: Int): String =
val first = evalChildren(0)
val rest = evalChildren.drop(1)
def updateEval(eval: GeneratedExpressionCode): String =
s"""
if (!${evalChildren(i).isNull} && (${ev.isNull} ||
${ctx.genComp(dataType, evalChildren(i).value, ev.value)} < 0)) {
${eval.code}
if (!${eval.isNull} && (${ev.isNull} ||
${ctx.genGreater(dataType, ev.value, eval.value)})) {
${ev.isNull} = false;
${ev.value} = ${evalChildren(i).value};
${ev.value} = ${eval.value};
}
"""
s"""
${evalChildren.map(_.code).mkString("\n")}
boolean ${ev.isNull} = true;
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
${children.indices.map(updateEval).mkString("\n")}
${first.code}
boolean ${ev.isNull} = ${first.isNull};
${ctx.javaType(dataType)} ${ev.value} = ${first.value};
${rest.map(updateEval).mkString("\n")}
"""
}
}
......@@ -403,19 +406,22 @@ case class Greatest(children: Seq[Expression]) extends Expression {
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
val evalChildren = children.map(_.gen(ctx))
def updateEval(i: Int): String =
val first = evalChildren(0)
val rest = evalChildren.drop(1)
def updateEval(eval: GeneratedExpressionCode): String =
s"""
if (!${evalChildren(i).isNull} && (${ev.isNull} ||
${ctx.genComp(dataType, evalChildren(i).value, ev.value)} > 0)) {
${eval.code}
if (!${eval.isNull} && (${ev.isNull} ||
${ctx.genGreater(dataType, eval.value, ev.value)})) {
${ev.isNull} = false;
${ev.value} = ${evalChildren(i).value};
${ev.value} = ${eval.value};
}
"""
s"""
${evalChildren.map(_.code).mkString("\n")}
boolean ${ev.isNull} = true;
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
${children.indices.map(updateEval).mkString("\n")}
${first.code}
boolean ${ev.isNull} = ${first.isNull};
${ctx.javaType(dataType)} ${ev.value} = ${first.value};
${rest.map(updateEval).mkString("\n")}
"""
}
}
......
......@@ -62,11 +62,15 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
}
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
val first = children(0)
val rest = children.drop(1)
val firstEval = first.gen(ctx)
s"""
boolean ${ev.isNull} = true;
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
${firstEval.code}
boolean ${ev.isNull} = ${firstEval.isNull};
${ctx.javaType(dataType)} ${ev.value} = ${firstEval.value};
""" +
children.map { e =>
rest.map { e =>
val eval = e.gen(ctx)
s"""
if (${ev.isNull}) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment