Skip to content
Snippets Groups Projects
Commit f16bc68d authored by zsxwing's avatar zsxwing Committed by Reynold Xin
Browse files

[SPARK-9824] [CORE] Fix the issue that InternalAccumulator leaks WeakReference

`InternalAccumulator.create` doesn't call `registerAccumulatorForCleanup` to register itself with ContextCleaner, so `WeakReference`s for these accumulators in `Accumulators.originals` won't be removed.

This PR added `registerAccumulatorForCleanup` for internal accumulators to avoid the memory leak.

Author: zsxwing <zsxwing@gmail.com>

Closes #8108 from zsxwing/internal-accumulators-leak.
parent 00c02728
No related branches found
No related tags found
No related merge requests found
......@@ -382,14 +382,18 @@ private[spark] object InternalAccumulator {
* add to the same set of accumulators. We do this to report the distribution of accumulator
* values across all tasks within each stage.
*/
def create(): Seq[Accumulator[Long]] = {
Seq(
// Execution memory refers to the memory used by internal data structures created
// during shuffles, aggregations and joins. The value of this accumulator should be
// approximately the sum of the peak sizes across all such data structures created
// in this task. For SQL jobs, this only tracks all unsafe operators and ExternalSort.
new Accumulator(
0L, AccumulatorParam.LongAccumulatorParam, Some(PEAK_EXECUTION_MEMORY), internal = true)
) ++ maybeTestAccumulator.toSeq
def create(sc: SparkContext): Seq[Accumulator[Long]] = {
val internalAccumulators = Seq(
// Execution memory refers to the memory used by internal data structures created
// during shuffles, aggregations and joins. The value of this accumulator should be
// approximately the sum of the peak sizes across all such data structures created
// in this task. For SQL jobs, this only tracks all unsafe operators and ExternalSort.
new Accumulator(
0L, AccumulatorParam.LongAccumulatorParam, Some(PEAK_EXECUTION_MEMORY), internal = true)
) ++ maybeTestAccumulator.toSeq
internalAccumulators.foreach { accumulator =>
sc.cleaner.foreach(_.registerAccumulatorForCleanup(accumulator))
}
internalAccumulators
}
}
......@@ -81,7 +81,7 @@ private[spark] abstract class Stage(
* accumulators here again will override partial values from the finished tasks.
*/
def resetInternalAccumulators(): Unit = {
_internalAccumulators = InternalAccumulator.create()
_internalAccumulators = InternalAccumulator.create(rdd.sparkContext)
}
/**
......
......@@ -160,7 +160,8 @@ class AccumulatorSuite extends SparkFunSuite with Matchers with LocalSparkContex
}
test("internal accumulators in TaskContext") {
val accums = InternalAccumulator.create()
sc = new SparkContext("local", "test")
val accums = InternalAccumulator.create(sc)
val taskContext = new TaskContextImpl(0, 0, 0, 0, null, null, accums)
val internalMetricsToAccums = taskContext.internalMetricsToAccumulators
val collectedInternalAccums = taskContext.collectInternalAccumulators()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment