From f7e21dd1ec4541be54eb01d8b15cfcc6714feed0 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 8 Apr 2015 10:14:52 -0700
Subject: [PATCH] [SPARK-6506] [pyspark] Do not try to retrieve SPARK_HOME when
 not needed...

....

In particular, this makes pyspark in yarn-cluster mode fail unless
SPARK_HOME is set, when it's not really needed.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #5405 from vanzin/SPARK-6506 and squashes the following commits:

e184507 [Marcelo Vanzin] [SPARK-6506] [pyspark] Do not try to retrieve SPARK_HOME when not needed.
---
 python/pyspark/java_gateway.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 0a16cbd8bf..2a5e84a7df 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -29,11 +29,10 @@ from pyspark.serializers import read_int
 
 
 def launch_gateway():
-    SPARK_HOME = os.environ["SPARK_HOME"]
-
     if "PYSPARK_GATEWAY_PORT" in os.environ:
         gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
     else:
+        SPARK_HOME = os.environ["SPARK_HOME"]
         # Launch the Py4j gateway using Spark's run command so that we pick up the
         # proper classpath and settings from spark-env.sh
         on_windows = platform.system() == "Windows"
-- 
GitLab