diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java index 44120e591f2fba22f966794d491dddc363d2449b..4bef21b6b4e4dd77ba62c5ea11f9d5d348f915c7 100644 --- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java +++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java @@ -698,7 +698,7 @@ public final class BytesToBytesMap extends MemoryConsumer { if (numKeys == MAX_CAPACITY // The map could be reused from last spill (because of no enough memory to grow), // then we don't try to grow again if hit the `growthThreshold`. - || !canGrowArray && numKeys > growthThreshold) { + || !canGrowArray && numKeys >= growthThreshold) { return false; } @@ -742,7 +742,7 @@ public final class BytesToBytesMap extends MemoryConsumer { longArray.set(pos * 2 + 1, keyHashcode); isDefined = true; - if (numKeys > growthThreshold && longArray.size() < MAX_CAPACITY) { + if (numKeys >= growthThreshold && longArray.size() < MAX_CAPACITY) { try { growAndRehash(); } catch (OutOfMemoryError oom) { @@ -911,6 +911,7 @@ public final class BytesToBytesMap extends MemoryConsumer { freePage(dataPage); } allocate(initialCapacity); + canGrowArray = true; currentPage = null; pageCursor = 0; } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala index c1555114e8b3e1ce0e8ae4064c2220e7fc4d7f32..6cf18de0cc76846d96c9db3675f75ae967c8ae4c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala @@ -342,4 +342,44 @@ class UnsafeFixedWidthAggregationMapSuite } } + testWithMemoryLeakDetection("convert to external sorter after fail to grow (SPARK-19500)") { + val pageSize = 4096000 + val map = new UnsafeFixedWidthAggregationMap( + emptyAggregationBuffer, + aggBufferSchema, + groupKeySchema, + taskMemoryManager, + 128, // initial capacity + pageSize, + false // disable perf metrics + ) + + val rand = new Random(42) + for (i <- 1 to 63) { + val str = rand.nextString(1024) + val buf = map.getAggregationBuffer(InternalRow(UTF8String.fromString(str))) + buf.setInt(0, str.length) + } + // Simulate running out of space + memoryManager.limit(0) + var str = rand.nextString(1024) + var buf = map.getAggregationBuffer(InternalRow(UTF8String.fromString(str))) + assert(buf != null) + str = rand.nextString(1024) + buf = map.getAggregationBuffer(InternalRow(UTF8String.fromString(str))) + assert(buf == null) + + // Convert the map into a sorter. This used to fail before the fix for SPARK-10474 + // because we would try to acquire space for the in-memory sorter pointer array before + // actually releasing the pages despite having spilled all of them. + var sorter: UnsafeKVExternalSorter = null + try { + sorter = map.destructAndCreateExternalSorter() + map.free() + } finally { + if (sorter != null) { + sorter.cleanupResources() + } + } + } }