From 39e4fda76f704f61924b10618a6f22bd93d8000f Mon Sep 17 00:00:00 2001
From: Christopher Nguyen <ctn@adatao.com>
Date: Mon, 5 Aug 2013 02:09:54 -0700
Subject: [PATCH] [HOTFIX] Extend thread safety for SparkEnv.get()

A ThreadLocal SparkEnv.env is facing various situations leading to
NullPointerExceptions, where SparkEnv.env set in one thread is not
gettable in another thread, but often assumed to be available.

See, e.g., https://groups.google.com/forum/#!topic/spark-developers/GLx8yunSj0A

This hotfixes SparkEnv.env to return either (a) the ThreadLocal
value if non-null, or (b) the previously set value in any thread.

This approach preserves SparkEnv.set() thread safety needed by
RDD.compute() and possibly other places. A refactoring that
parameterizes SparkEnv should be addressed subsequently.

On branch adatao-global-SparkEnv
Changes to be committed:

	modified:   core/src/main/scala/spark/SparkEnv.scala
---
 core/src/main/scala/spark/SparkEnv.scala | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/spark/SparkEnv.scala
index 4a1d341f5d..9ac59918cc 100644
--- a/core/src/main/scala/spark/SparkEnv.scala
+++ b/core/src/main/scala/spark/SparkEnv.scala
@@ -97,13 +97,26 @@ class SparkEnv (
 
 object SparkEnv extends Logging {
   private val env = new ThreadLocal[SparkEnv]
+  private var lastSetSparkEnv : SparkEnv = _
 
   def set(e: SparkEnv) {
+	  lastSetSparkEnv = e
     env.set(e)
   }
 
+  /**
+   * Returns the ThreadLocal SparkEnv, if non-null. Else returns the SparkEnv
+   * previously set in any thread.
+   */
   def get: SparkEnv = {
-    env.get()
+    Option(env.get()).getOrElse(lastSetSparkEnv)
+  }
+
+  /**
+   * Returns the ThreadLocal SparkEnv.
+   */
+  def getThreadLocal : SparkEnv = {
+	  env.get()
   }
 
   def createFromSystemProperties(
-- 
GitLab