diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index d7746ca7a052e2f3ed72664549b2db07de610727..f8f868b59b9671e4bedd978b7b0d56b879f3066a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -640,8 +640,24 @@ class CodegenContext { splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil) } - private def splitExpressions( - expressions: Seq[String], funcName: String, arguments: Seq[(String, String)]): String = { + /** + * Splits the generated code of expressions into multiple functions, because function has + * 64kb code size limit in JVM + * + * @param expressions the codes to evaluate expressions. + * @param funcName the split function name base. + * @param arguments the list of (type, name) of the arguments of the split function. + * @param returnType the return type of the split function. + * @param makeSplitFunction makes split function body, e.g. add preparation or cleanup. + * @param foldFunctions folds the split function calls. + */ + def splitExpressions( + expressions: Seq[String], + funcName: String, + arguments: Seq[(String, String)], + returnType: String = "void", + makeSplitFunction: String => String = identity, + foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() for (code <- expressions) { @@ -662,18 +678,19 @@ class CodegenContext { blocks.head } else { val func = freshName(funcName) + val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ") val functions = blocks.zipWithIndex.map { case (body, i) => val name = s"${func}_$i" val code = s""" - |private void $name(${arguments.map { case (t, name) => s"$t $name" }.mkString(", ")}) { - | $body + |private $returnType $name($argString) { + | ${makeSplitFunction(body)} |} """.stripMargin addNewFunction(name, code) name } - functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")});").mkString("\n") + foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})")) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 1cef95654a17bc283c12832b102991c55bd35e25..b7335f12b64b1e9a2332ed7ec212ea6f7a43e9aa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -117,8 +117,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } } """ - }.mkString("\n") - comparisons + } + + ctx.splitExpressions( + expressions = comparisons, + funcName = "compare", + arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")), + returnType = "int", + makeSplitFunction = { body => + s""" + InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. + $body + return 0; + """ + }, + foldFunctions = { funCalls => + funCalls.zipWithIndex.map { case (funCall, i) => + val comp = ctx.freshName("comp") + s""" + int $comp = $funCall; + if ($comp != 0) { + return $comp; + } + """ + }.mkString + }) } protected def create(ordering: Seq[SortOrder]): BaseOrdering = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala index 8cc2ab46c0c857430f3a73af640d874a5c26632b..190fab5d249bb21480e6fba1da2ae0296bc31a80 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala @@ -127,4 +127,14 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") { + val sortOrder = Literal("abc").asc + + // this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845 + GenerateOrdering.generate(Array.fill(40)(sortOrder)) + + // verify that we can support up to 5000 ordering comparisons, which should be sufficient + GenerateOrdering.generate(Array.fill(5000)(sortOrder)) + } }