diff --git a/python/epydoc.conf b/python/epydoc.conf
index 0b42e729f8dcc756c711584de2b2a4f071b480c5..95a6af09748065a3e1939e8662a818dcc9211d44 100644
--- a/python/epydoc.conf
+++ b/python/epydoc.conf
@@ -34,4 +34,4 @@ private: no
 
 exclude: pyspark.cloudpickle pyspark.worker pyspark.join
          pyspark.java_gateway pyspark.examples pyspark.shell pyspark.test
-         pyspark.rddsampler pyspark.daemon
+         pyspark.rddsampler pyspark.daemon pyspark.mllib._common
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index f1b95acf097810a9357f661f6bc4f9ca8578e106..2b2c3a061a71d49b0bc5f9815be1bfa2d90cb191 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -20,21 +20,24 @@ PySpark is the Python API for Spark.
 
 Public classes:
 
-    - L{SparkContext<pyspark.context.SparkContext>}
-        Main entry point for Spark functionality.
-    - L{RDD<pyspark.rdd.RDD>}
-        A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
-    - L{Broadcast<pyspark.broadcast.Broadcast>}
-        A broadcast variable that gets reused across tasks.
-    - L{Accumulator<pyspark.accumulators.Accumulator>}
-        An "add-only" shared variable that tasks can only add values to.
-    - L{SparkConf<pyspark.conf.SparkConf}
-        Configuration for a Spark application.
-    - L{SparkFiles<pyspark.files.SparkFiles>}
-        Access files shipped with jobs.
-    - L{StorageLevel<pyspark.storagelevel.StorageLevel>}
-        Finer-grained cache persistence levels.
+  - L{SparkContext<pyspark.context.SparkContext>}
+      Main entry point for Spark functionality.
+  - L{RDD<pyspark.rdd.RDD>}
+      A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+  - L{Broadcast<pyspark.broadcast.Broadcast>}
+      A broadcast variable that gets reused across tasks.
+  - L{Accumulator<pyspark.accumulators.Accumulator>}
+      An "add-only" shared variable that tasks can only add values to.
+  - L{SparkConf<pyspark.conf.SparkConf>}
+      For configuring Spark.
+  - L{SparkFiles<pyspark.files.SparkFiles>}
+      Access files shipped with jobs.
+  - L{StorageLevel<pyspark.storagelevel.StorageLevel>}
+      Finer-grained cache persistence levels.
 """
+
+
+
 import sys
 import os
 sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.egg"))
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index dfdaba274f43a86f97c66f17fdb6553f1a24676a..43f40f8783bfd426cb94f031527cc11ee43059c7 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -45,7 +45,18 @@ def _from_id(bid):
 
 
 class Broadcast(object):
+    """
+    A broadcast variable created with
+    L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}.
+    Access its value through C{.value}.
+    """
+
     def __init__(self, bid, value, java_broadcast=None, pickle_registry=None):
+        """
+        Should not be called directly by users -- use
+        L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}
+        instead.
+        """
         self.value = value
         self.bid = bid
         self._jbroadcast = java_broadcast
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index a79f348b526bd78299a88756d575964acd492ba0..cf98b0e071e8dbcb602671470783866a2f518dca 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -55,11 +55,11 @@ class SparkConf(object):
     parameters as key-value pairs.
 
     Most of the time, you would create a SparkConf object with
-    C{SparkConf()}, which will load values from `spark.*` Java system
-    properties and any `spark.conf` on your application's classpath.
-    In this case, system properties take priority over `spark.conf`,
-    and any parameters you set directly on the `SparkConf` object take
-    priority over both of those.
+    C{SparkConf()}, which will load values from C{spark.*} Java system
+    properties and any C{spark.conf} on your Spark classpath. In this
+    case, system properties take priority over C{spark.conf}, and any
+    parameters you set directly on the C{SparkConf} object take priority
+    over both of those.
 
     For unit tests, you can also call C{SparkConf(false)} to skip
     loading external settings and get the same configuration no matter
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 1244a1495f743e21bf3ab683f87cdb66c402af32..8b028027ebfec327c7ebf5bb17d3461edd4c20b1 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -267,7 +267,8 @@ class SparkContext(object):
 
     def broadcast(self, value):
         """
-        Broadcast a read-only variable to the cluster, returning a C{Broadcast}
+        Broadcast a read-only variable to the cluster, returning a
+        L{Broadcast<pyspark.broadcast.Broadcast>}
         object for reading it in distributed functions. The variable will be
         sent to each cluster only once.
         """
diff --git a/python/run-tests b/python/run-tests
index a0898b3c210fe4f6c95538f6f331ce22698e9238..4b71fff7c12711c856abc95dd322561f6604ecb4 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -29,7 +29,7 @@ FAILED=0
 rm -f unit-tests.log
 
 function run_test() {
-    $FWDIR/pyspark $1 2>&1 | tee -a unit-tests.log
+    SPARK_TESTING=0 $FWDIR/pyspark $1 2>&1 | tee -a unit-tests.log
     FAILED=$((PIPESTATUS[0]||$FAILED))
 }