From c76baff0cc4775c2191d075cc9a8176e4915fec8 Mon Sep 17 00:00:00 2001 From: Bryan Cutler <cutlerb@gmail.com> Date: Sun, 11 Sep 2016 10:19:39 +0100 Subject: [PATCH] [SPARK-17336][PYSPARK] Fix appending multiple times to PYTHONPATH from spark-config.sh ## What changes were proposed in this pull request? During startup of Spark standalone, the script file spark-config.sh appends to the PYTHONPATH and can be sourced many times, causing duplicates in the path. This change adds a env flag that is set when the PYTHONPATH is appended so it will happen only one time. ## How was this patch tested? Manually started standalone master/worker and verified PYTHONPATH has no duplicate entries. Author: Bryan Cutler <cutlerb@gmail.com> Closes #15028 from BryanCutler/fix-duplicate-pythonpath-SPARK-17336. --- sbin/spark-config.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh index a7a44cdde6..b7284487c5 100755 --- a/sbin/spark-config.sh +++ b/sbin/spark-config.sh @@ -26,5 +26,8 @@ fi export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}" # Add the PySpark classes to the PYTHONPATH: -export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}" -export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.3-src.zip:${PYTHONPATH}" +if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then + export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}" + export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.3-src.zip:${PYTHONPATH}" + export PYSPARK_PYTHONPATH_SET=1 +fi -- GitLab