Skip to content
Snippets Groups Projects
Commit 23c58653 authored by Eric Liang's avatar Eric Liang Committed by Reynold Xin
Browse files

[SPARK-16238] Metrics for generated method and class bytecode size

## What changes were proposed in this pull request?

This extends SPARK-15860 to include metrics for the actual bytecode size of janino-generated methods. They can be accessed in the same way as any other codahale metric, e.g.

```
scala> org.apache.spark.metrics.source.CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getSnapshot().getValues()
res7: Array[Long] = Array(532, 532, 532, 542, 1479, 2670, 3585, 3585)

scala> org.apache.spark.metrics.source.CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getSnapshot().getValues()
res8: Array[Long] = Array(5, 5, 5, 5, 10, 10, 10, 10, 15, 15, 15, 38, 63, 79, 88, 94, 94, 94, 132, 132, 165, 165, 220, 220)
```

## How was this patch tested?

Small unit test, also verified manually that the performance impact is minimal (<10%). hvanhovell

Author: Eric Liang <ekl@databricks.com>

Closes #13934 from ericl/spark-16238.
parent 9b1b3ae7
No related branches found
No related tags found
No related merge requests found
......@@ -47,4 +47,16 @@ object CodegenMetrics extends Source {
* Histogram of the time it took to compile source code text (in milliseconds).
*/
val METRIC_COMPILATION_TIME = metricRegistry.histogram(MetricRegistry.name("compilationTime"))
/**
* Histogram of the bytecode size of each class generated by CodeGenerator.
*/
val METRIC_GENERATED_CLASS_BYTECODE_SIZE =
metricRegistry.histogram(MetricRegistry.name("generatedClassSize"))
/**
* Histogram of the bytecode size of each method in classes generated by CodeGenerator.
*/
val METRIC_GENERATED_METHOD_BYTECODE_SIZE =
metricRegistry.histogram(MetricRegistry.name("generatedMethodSize"))
}
......@@ -17,11 +17,16 @@
package org.apache.spark.sql.catalyst.expressions.codegen
import java.io.ByteArrayInputStream
import java.util.{Map => JavaMap}
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import com.google.common.cache.{CacheBuilder, CacheLoader}
import org.codehaus.janino.ClassBodyEvaluator
import org.codehaus.janino.{ByteArrayClassLoader, ClassBodyEvaluator, SimpleCompiler}
import org.codehaus.janino.util.ClassFile
import scala.language.existentials
import org.apache.spark.SparkEnv
......@@ -876,6 +881,7 @@ object CodeGenerator extends Logging {
try {
evaluator.cook("generated.java", code.body)
recordCompilationStats(evaluator)
} catch {
case e: Exception =>
val msg = s"failed to compile: $e\n$formatted"
......@@ -885,6 +891,38 @@ object CodeGenerator extends Logging {
evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass]
}
/**
* Records the generated class and method bytecode sizes by inspecting janino private fields.
*/
private def recordCompilationStats(evaluator: ClassBodyEvaluator): Unit = {
// First retrieve the generated classes.
val classes = {
val resultField = classOf[SimpleCompiler].getDeclaredField("result")
resultField.setAccessible(true)
val loader = resultField.get(evaluator).asInstanceOf[ByteArrayClassLoader]
val classesField = loader.getClass.getDeclaredField("classes")
classesField.setAccessible(true)
classesField.get(loader).asInstanceOf[JavaMap[String, Array[Byte]]].asScala
}
// Then walk the classes to get at the method bytecode.
val codeAttr = Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute")
val codeAttrField = codeAttr.getDeclaredField("code")
codeAttrField.setAccessible(true)
classes.foreach { case (_, classBytes) =>
CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classBytes.length)
val cf = new ClassFile(new ByteArrayInputStream(classBytes))
cf.methodInfos.asScala.foreach { method =>
method.getAttributes().foreach { a =>
if (a.getClass.getName == codeAttr.getName) {
CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.update(
codeAttrField.get(a).asInstanceOf[Array[Byte]].length)
}
}
}
}
}
/**
* A cache of generated classes.
*
......
......@@ -53,9 +53,13 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
test("metrics are recorded on compile") {
val startCount1 = CodegenMetrics.METRIC_COMPILATION_TIME.getCount()
val startCount2 = CodegenMetrics.METRIC_SOURCE_CODE_SIZE.getCount()
val startCount3 = CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getCount()
val startCount4 = CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getCount()
GenerateOrdering.generate(Add(Literal(123), Literal(1)).asc :: Nil)
assert(CodegenMetrics.METRIC_COMPILATION_TIME.getCount() == startCount1 + 1)
assert(CodegenMetrics.METRIC_SOURCE_CODE_SIZE.getCount() == startCount2 + 1)
assert(CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getCount() > startCount1)
assert(CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getCount() > startCount1)
}
test("SPARK-8443: split wide projections into blocks due to JVM code size limit") {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment