Skip to content
Snippets Groups Projects
Commit 9693b0d5 authored by Liang-Chi Hsieh's avatar Liang-Chi Hsieh Committed by Michael Armbrust
Browse files

[SPARK-12018][SQL] Refactor common subexpression elimination code

JIRA: https://issues.apache.org/jira/browse/SPARK-12018

The code of common subexpression elimination can be factored and simplified. Some unnecessary variables can be removed.

Author: Liang-Chi Hsieh <viirya@appier.com>

Closes #10009 from viirya/refactor-subexpr-eliminate.
parent f73379be
No related branches found
No related tags found
No related merge requests found
......@@ -94,13 +94,9 @@ abstract class Expression extends TreeNode[Expression] {
def gen(ctx: CodeGenContext): GeneratedExpressionCode = {
ctx.subExprEliminationExprs.get(this).map { subExprState =>
// This expression is repeated meaning the code to evaluated has already been added
// as a function, `subExprState.fnName`. Just call that.
val code =
s"""
|/* $this */
|${subExprState.fnName}(${ctx.INPUT_ROW});
""".stripMargin.trim
GeneratedExpressionCode(code, subExprState.code.isNull, subExprState.code.value)
// as a function and called in advance. Just use it.
val code = s"/* $this */"
GeneratedExpressionCode(code, subExprState.isNull, subExprState.value)
}.getOrElse {
val isNull = ctx.freshName("isNull")
val primitive = ctx.freshName("primitive")
......
......@@ -104,16 +104,13 @@ class CodeGenContext {
val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
// State used for subexpression elimination.
case class SubExprEliminationState(
isLoaded: String,
code: GeneratedExpressionCode,
fnName: String)
case class SubExprEliminationState(isNull: String, value: String)
// Foreach expression that is participating in subexpression elimination, the state to use.
val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
// The collection of isLoaded variables that need to be reset on each row.
val subExprIsLoadedVariables = mutable.ArrayBuffer.empty[String]
// The collection of sub-exression result resetting methods that need to be called on each row.
val subExprResetVariables = mutable.ArrayBuffer.empty[String]
final val JAVA_BOOLEAN = "boolean"
final val JAVA_BYTE = "byte"
......@@ -408,7 +405,6 @@ class CodeGenContext {
val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
commonExprs.foreach(e => {
val expr = e.head
val isLoaded = freshName("isLoaded")
val isNull = freshName("isNull")
val value = freshName("value")
val fnName = freshName("evalExpr")
......@@ -417,18 +413,12 @@ class CodeGenContext {
val code = expr.gen(this)
val fn =
s"""
|private void $fnName(InternalRow ${INPUT_ROW}) {
| if (!$isLoaded) {
| ${code.code.trim}
| $isLoaded = true;
| $isNull = ${code.isNull};
| $value = ${code.value};
| }
|private void $fnName(InternalRow $INPUT_ROW) {
| ${code.code.trim}
| $isNull = ${code.isNull};
| $value = ${code.value};
|}
""".stripMargin
code.code = fn
code.isNull = isNull
code.value = value
addNewFunction(fnName, fn)
......@@ -448,18 +438,12 @@ class CodeGenContext {
// 2. Less code.
// Currently, we will do this for all non-leaf only expression trees (i.e. expr trees with
// at least two nodes) as the cost of doing it is expected to be low.
// Maintain the loaded value and isNull as member variables. This is necessary if the codegen
// function is split across multiple functions.
// TODO: maintaining this as a local variable probably allows the compiler to do better
// optimizations.
addMutableState("boolean", isLoaded, s"$isLoaded = false;")
addMutableState("boolean", isNull, s"$isNull = false;")
addMutableState(javaType(expr.dataType), value,
s"$value = ${defaultValue(expr.dataType)};")
subExprIsLoadedVariables += isLoaded
val state = SubExprEliminationState(isLoaded, code, fnName)
subExprResetVariables += s"$fnName($INPUT_ROW);"
val state = SubExprEliminationState(isNull, value)
e.foreach(subExprEliminationExprs.put(_, state))
})
}
......
......@@ -287,8 +287,8 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
val holderClass = classOf[BufferHolder].getName
ctx.addMutableState(holderClass, bufferHolder, s"this.$bufferHolder = new $holderClass();")
// Reset the isLoaded flag for each row.
val subexprReset = ctx.subExprIsLoadedVariables.map { v => s"${v} = false;" }.mkString("\n")
// Reset the subexpression values for each row.
val subexprReset = ctx.subExprResetVariables.mkString("\n")
val code =
s"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment