From 39e4fda76f704f61924b10618a6f22bd93d8000f Mon Sep 17 00:00:00 2001 From: Christopher Nguyen <ctn@adatao.com> Date: Mon, 5 Aug 2013 02:09:54 -0700 Subject: [PATCH] [HOTFIX] Extend thread safety for SparkEnv.get() A ThreadLocal SparkEnv.env is facing various situations leading to NullPointerExceptions, where SparkEnv.env set in one thread is not gettable in another thread, but often assumed to be available. See, e.g., https://groups.google.com/forum/#!topic/spark-developers/GLx8yunSj0A This hotfixes SparkEnv.env to return either (a) the ThreadLocal value if non-null, or (b) the previously set value in any thread. This approach preserves SparkEnv.set() thread safety needed by RDD.compute() and possibly other places. A refactoring that parameterizes SparkEnv should be addressed subsequently. On branch adatao-global-SparkEnv Changes to be committed: modified: core/src/main/scala/spark/SparkEnv.scala --- core/src/main/scala/spark/SparkEnv.scala | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/spark/SparkEnv.scala index 4a1d341f5d..9ac59918cc 100644 --- a/core/src/main/scala/spark/SparkEnv.scala +++ b/core/src/main/scala/spark/SparkEnv.scala @@ -97,13 +97,26 @@ class SparkEnv ( object SparkEnv extends Logging { private val env = new ThreadLocal[SparkEnv] + private var lastSetSparkEnv : SparkEnv = _ def set(e: SparkEnv) { + lastSetSparkEnv = e env.set(e) } + /** + * Returns the ThreadLocal SparkEnv, if non-null. Else returns the SparkEnv + * previously set in any thread. + */ def get: SparkEnv = { - env.get() + Option(env.get()).getOrElse(lastSetSparkEnv) + } + + /** + * Returns the ThreadLocal SparkEnv. + */ + def getThreadLocal : SparkEnv = { + env.get() } def createFromSystemProperties( -- GitLab