diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 47ff504316d38a8e1d6b144b4cb795dcddb37a14..6fb25f3b98722025c695c42afd175910efcc76f6 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -220,6 +220,8 @@ cp -r "$SPARK_HOME/data" "$DISTDIR" if [ "$MAKE_PIP" == "true" ]; then echo "Building python distribution package" pushd "$SPARK_HOME/python" > /dev/null + # Delete the egg info file if it exists, this can cache older setup files. + rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" python setup.py sdist popd > /dev/null else diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py index 430c2ab52766a1f0a5c268648aef7f708c8336ef..c491005f497195ee37acab20c958c5f7dd3b56fe 100644 --- a/dev/pip-sanity-check.py +++ b/dev/pip-sanity-check.py @@ -18,6 +18,8 @@ from __future__ import print_function from pyspark.sql import SparkSession +from pyspark.ml.param import Params +from pyspark.mllib.linalg import * import sys if __name__ == "__main__": diff --git a/dev/requirements.txt b/dev/requirements.txt index bf042d22a8b47af748b43ca898693004aa509077..79782279f8fbd067c080e5036dc689e270f81190 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -1,3 +1,4 @@ jira==1.0.3 PyGithub==1.26.0 Unidecode==0.04.19 +pypandoc==1.3.3 diff --git a/dev/run-pip-tests b/dev/run-pip-tests index e1da18e60bb3d45cf0fa348baed30f08ffa30d4f..af1b1feb70cd1fee526f4c69ad2f9928ff4c7ea4 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -78,11 +78,14 @@ for python in "${PYTHON_EXECS[@]}"; do mkdir -p "$VIRTUALENV_PATH" virtualenv --python=$python "$VIRTUALENV_PATH" source "$VIRTUALENV_PATH"/bin/activate - # Upgrade pip - pip install --upgrade pip + # Upgrade pip & friends + pip install --upgrade pip pypandoc wheel + pip install numpy # Needed so we can verify mllib imports echo "Creating pip installable source dist" cd "$FWDIR"/python + # Delete the egg info file if it exists, this can cache the setup file. + rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" $python setup.py sdist diff --git a/python/setup.py b/python/setup.py index bc2eb4ce9dbd057706ef16664488e3a543200e44..47eab98e0f7b3cc1f7597042e32fc070b9368ebb 100644 --- a/python/setup.py +++ b/python/setup.py @@ -162,7 +162,12 @@ try: url='https://github.com/apache/spark/tree/master/python', packages=['pyspark', 'pyspark.mllib', + 'pyspark.mllib.linalg', + 'pyspark.mllib.stat', 'pyspark.ml', + 'pyspark.ml.linalg', + 'pyspark.ml.param', + 'pyspark.ml.stat', 'pyspark.sql', 'pyspark.streaming', 'pyspark.bin',