diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 47ff504316d38a8e1d6b144b4cb795dcddb37a14..6fb25f3b98722025c695c42afd175910efcc76f6 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -220,6 +220,8 @@ cp -r "$SPARK_HOME/data" "$DISTDIR"
 if [ "$MAKE_PIP" == "true" ]; then
   echo "Building python distribution package"
   pushd "$SPARK_HOME/python" > /dev/null
+  # Delete the egg info file if it exists, this can cache older setup files.
+  rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
   python setup.py sdist
   popd > /dev/null
 else
diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py
index 430c2ab52766a1f0a5c268648aef7f708c8336ef..c491005f497195ee37acab20c958c5f7dd3b56fe 100644
--- a/dev/pip-sanity-check.py
+++ b/dev/pip-sanity-check.py
@@ -18,6 +18,8 @@
 from __future__ import print_function
 
 from pyspark.sql import SparkSession
+from pyspark.ml.param import Params
+from pyspark.mllib.linalg import *
 import sys
 
 if __name__ == "__main__":
diff --git a/dev/requirements.txt b/dev/requirements.txt
index bf042d22a8b47af748b43ca898693004aa509077..79782279f8fbd067c080e5036dc689e270f81190 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -1,3 +1,4 @@
 jira==1.0.3
 PyGithub==1.26.0
 Unidecode==0.04.19
+pypandoc==1.3.3
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index e1da18e60bb3d45cf0fa348baed30f08ffa30d4f..af1b1feb70cd1fee526f4c69ad2f9928ff4c7ea4 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -78,11 +78,14 @@ for python in "${PYTHON_EXECS[@]}"; do
     mkdir -p "$VIRTUALENV_PATH"
     virtualenv --python=$python "$VIRTUALENV_PATH"
     source "$VIRTUALENV_PATH"/bin/activate
-    # Upgrade pip
-    pip install --upgrade pip
+    # Upgrade pip & friends
+    pip install --upgrade pip pypandoc wheel
+    pip install numpy # Needed so we can verify mllib imports
 
     echo "Creating pip installable source dist"
     cd "$FWDIR"/python
+    # Delete the egg info file if it exists, this can cache the setup file.
+    rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
     $python setup.py sdist
 
 
diff --git a/python/setup.py b/python/setup.py
index bc2eb4ce9dbd057706ef16664488e3a543200e44..47eab98e0f7b3cc1f7597042e32fc070b9368ebb 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -162,7 +162,12 @@ try:
         url='https://github.com/apache/spark/tree/master/python',
         packages=['pyspark',
                   'pyspark.mllib',
+                  'pyspark.mllib.linalg',
+                  'pyspark.mllib.stat',
                   'pyspark.ml',
+                  'pyspark.ml.linalg',
+                  'pyspark.ml.param',
+                  'pyspark.ml.stat',
                   'pyspark.sql',
                   'pyspark.streaming',
                   'pyspark.bin',