Skip to content
Snippets Groups Projects
Commit 41afa165 authored by Josh Rosen's avatar Josh Rosen Committed by Davies Liu
Browse files

[SPARK-8652] [PYSPARK] Check return value for all uses of doctest.testmod()

This patch addresses a critical issue in the PySpark tests:

Several of our Python modules' `__main__` methods call `doctest.testmod()` in order to run doctests but forget to check and handle its return value. As a result, some PySpark test failures can go unnoticed because they will not fail the build.

Fortunately, there was only one test failure which was masked by this bug: a `pyspark.profiler` doctest was failing due to changes in RDD pipelining.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #7032 from JoshRosen/testmod-fix and squashes the following commits:

60dbdc0 [Josh Rosen] Account for int vs. long formatting change in Python 3
8b8d80a [Josh Rosen] Fix failing test.
e6423f9 [Josh Rosen] Check return code for all uses of doctest.testmod().
parent 37bf76a2
No related branches found
No related tags found
No related merge requests found
......@@ -431,6 +431,8 @@ def main():
if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
main()
......@@ -264,4 +264,6 @@ def _start_update_server():
if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
......@@ -115,4 +115,6 @@ class Broadcast(object):
if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
......@@ -883,6 +883,7 @@ except ImportError:
if __name__ == "__main__":
import doctest
print(doctest.testmod())
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
......@@ -90,9 +90,11 @@ class Profiler(object):
>>> sc = SparkContext('local', 'test', conf=conf, profiler_cls=MyCustomProfiler)
>>> sc.parallelize(range(1000)).map(lambda x: 2 * x).take(10)
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
>>> sc.parallelize(range(1000)).count()
1000
>>> sc.show_profiles()
My custom profiles for RDD:1
My custom profiles for RDD:2
My custom profiles for RDD:3
>>> sc.stop()
"""
......@@ -169,4 +171,6 @@ class BasicProfiler(Profiler):
if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
......@@ -44,8 +44,8 @@ which contains two batches of two objects:
>>> rdd.glom().collect()
[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]
>>> rdd._jrdd.count()
8L
>>> int(rdd._jrdd.count())
8
>>> sc.stop()
"""
......@@ -556,4 +556,6 @@ def write_with_length(obj, stream):
if __name__ == '__main__':
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
......@@ -838,4 +838,6 @@ class ExternalGroupBy(ExternalMerger):
if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
......@@ -125,4 +125,6 @@ def rddToFileName(prefix, suffix, timestamp):
if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment