From c76baff0cc4775c2191d075cc9a8176e4915fec8 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Sun, 11 Sep 2016 10:19:39 +0100
Subject: [PATCH] [SPARK-17336][PYSPARK] Fix appending multiple times to
 PYTHONPATH from spark-config.sh

## What changes were proposed in this pull request?
During startup of Spark standalone, the script file spark-config.sh appends to the PYTHONPATH and can be sourced many times, causing duplicates in the path.  This change adds a env flag that is set when the PYTHONPATH is appended so it will happen only one time.

## How was this patch tested?
Manually started standalone master/worker and verified PYTHONPATH has no duplicate entries.

Author: Bryan Cutler <cutlerb@gmail.com>

Closes #15028 from BryanCutler/fix-duplicate-pythonpath-SPARK-17336.
---
 sbin/spark-config.sh | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index a7a44cdde6..b7284487c5 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -26,5 +26,8 @@ fi
 
 export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
 # Add the PySpark classes to the PYTHONPATH:
-export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.3-src.zip:${PYTHONPATH}"
+if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
+  export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
+  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.3-src.zip:${PYTHONPATH}"
+  export PYSPARK_PYTHONPATH_SET=1
+fi
-- 
GitLab