Skip to content
Snippets Groups Projects
Commit 6ae2746d authored by Matei Zaharia's avatar Matei Zaharia
Browse files

Handle arrays that contain the same element many times better in

SizeEstimator. Also added a test for SizeEstimator. Fixes #136.
parent 0a617958
No related branches found
No related tags found
No related merge requests found
......@@ -9,6 +9,8 @@ import java.util.Random
import scala.collection.mutable.ArrayBuffer
import it.unimi.dsi.fastutil.ints.IntOpenHashSet
/**
* Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in
* memory-aware caches.
......@@ -39,8 +41,7 @@ object SizeEstimator {
* IdentityHashMap of visited objects, and provides utility methods for enqueueing new objects
* to visit.
*/
private class SearchState {
val visited = new IdentityHashMap[AnyRef, AnyRef]
private class SearchState(val visited: IdentityHashMap[AnyRef, AnyRef]) {
val stack = new ArrayBuffer[AnyRef]
var size = 0L
......@@ -61,16 +62,18 @@ object SizeEstimator {
}
/**
* Cached information about each class. We remember two things: the
* "shell size" of the class (size of all non-static fields plus the
* java.lang.Object size), and any fields that are pointers to objects.
* Cached information about each class. We remember two things: the "shell size" of the class
* (size of all non-static fields plus the java.lang.Object size), and any fields that are
* pointers to objects.
*/
private class ClassInfo(
val shellSize: Long,
val pointerFields: List[Field]) {}
def estimate(obj: AnyRef): Long = {
val state = new SearchState
def estimate(obj: AnyRef): Long = estimate(obj, new IdentityHashMap[AnyRef, AnyRef])
private def estimate(obj: AnyRef, visited: IdentityHashMap[AnyRef, AnyRef]): Long = {
val state = new SearchState(visited)
state.enqueue(obj)
while (!state.isFinished) {
visitSingleObject(state.dequeue(), state)
......@@ -91,6 +94,10 @@ object SizeEstimator {
}
}
// Estimat the size of arrays larger than ARRAY_SIZE_FOR_SAMPLING by sampling.
private val ARRAY_SIZE_FOR_SAMPLING = 200
private val ARRAY_SAMPLE_SIZE = 100 // should be lower than ARRAY_SIZE_FOR_SAMPLING
private def visitArray(array: AnyRef, cls: Class[_], state: SearchState) {
val length = JArray.getLength(array)
val elementClass = cls.getComponentType
......@@ -98,18 +105,23 @@ object SizeEstimator {
state.size += length * primitiveSize(elementClass)
} else {
state.size += length * POINTER_SIZE
if (length <= 100) {
if (length <= ARRAY_SIZE_FOR_SAMPLING) {
for (i <- 0 until length) {
state.enqueue(JArray.get(array, i))
}
} else {
// Estimate the size of a large array by sampling elements.
// TODO: Add a config setting for turning this off?
// Estimate the size of a large array by sampling elements without replacement.
var size = 0.0
val rand = new Random(42)
for (i <- 0 until 100) {
val elem = JArray.get(array, rand.nextInt(length))
size += SizeEstimator.estimate(elem)
val drawn = new IntOpenHashSet(ARRAY_SAMPLE_SIZE)
for (i <- 0 until ARRAY_SAMPLE_SIZE) {
var index = 0
do {
index = rand.nextInt(length)
} while (drawn.contains(index))
drawn.add(index)
val elem = JArray.get(array, index)
size += SizeEstimator.estimate(elem, state.visited)
}
state.size += ((length / 100.0) * size).toLong
}
......
package spark
import org.scalatest.FunSuite
class DummyClass1 {}
class DummyClass2 {
val x: Int = 0
}
class DummyClass3 {
val x: Int = 0
val y: Double = 0.0
}
class DummyClass4(val d: DummyClass3) {
val x: Int = 0
}
class SizeEstimatorSuite extends FunSuite {
test("simple classes") {
expect(8)(SizeEstimator.estimate(new DummyClass1))
expect(12)(SizeEstimator.estimate(new DummyClass2))
expect(20)(SizeEstimator.estimate(new DummyClass3))
expect(16)(SizeEstimator.estimate(new DummyClass4(null)))
expect(36)(SizeEstimator.estimate(new DummyClass4(new DummyClass3)))
}
test("strings") {
expect(24)(SizeEstimator.estimate(""))
expect(26)(SizeEstimator.estimate("a"))
expect(28)(SizeEstimator.estimate("ab"))
expect(40)(SizeEstimator.estimate("abcdefgh"))
}
test("primitive arrays") {
expect(10)(SizeEstimator.estimate(new Array[Byte](10)))
expect(20)(SizeEstimator.estimate(new Array[Char](10)))
expect(20)(SizeEstimator.estimate(new Array[Short](10)))
expect(40)(SizeEstimator.estimate(new Array[Int](10)))
expect(80)(SizeEstimator.estimate(new Array[Long](10)))
expect(40)(SizeEstimator.estimate(new Array[Float](10)))
expect(80)(SizeEstimator.estimate(new Array[Double](10)))
expect(4000)(SizeEstimator.estimate(new Array[Int](1000)))
expect(8000)(SizeEstimator.estimate(new Array[Long](1000)))
}
test("object arrays") {
// Arrays containing nulls should just have one pointer per element
expect(40)(SizeEstimator.estimate(new Array[String](10)))
expect(40)(SizeEstimator.estimate(new Array[AnyRef](10)))
// For object arrays with non-null elements, each object should take one pointer plus
// however many bytes that class takes. (Note that Array.fill calls the code in its
// second parameter separately for each object, so we get distinct objects.)
expect(120)(SizeEstimator.estimate(Array.fill(10)(new DummyClass1)))
expect(160)(SizeEstimator.estimate(Array.fill(10)(new DummyClass2)))
expect(240)(SizeEstimator.estimate(Array.fill(10)(new DummyClass3)))
expect(12 + 16)(SizeEstimator.estimate(Array(new DummyClass1, new DummyClass2)))
// Past size 100, our samples 100 elements, but we should still get the right size.
expect(24000)(SizeEstimator.estimate(Array.fill(1000)(new DummyClass3)))
// If an array contains the *same* element many times, we should only count it once.
val d1 = new DummyClass1
expect(48)(SizeEstimator.estimate(Array.fill(10)(d1))) // 10 pointers plus 8-byte object
expect(408)(SizeEstimator.estimate(Array.fill(100)(d1))) // 100 pointers plus 8-byte object
// Same thing with huge array containing the same element many times. Note that this won't
// return exactly 4008 because it can't tell that *all* the elements will equal the first
// one it samples, but it should be close to that.
val estimatedSize = SizeEstimator.estimate(Array.fill(1000)(d1))
assert(estimatedSize >= 4000, "Estimated size " + estimatedSize + " should be more than 4000")
assert(estimatedSize <= 4100, "Estimated size " + estimatedSize + " should be less than 4100")
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment