diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index b0c51b1e9992e63ffe0b8e231351054a1876c00f..d31f3fb8f6046fb19a8a9842db0605150229e202 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -399,7 +399,8 @@ class DataFrameReader(OptionUtils): accessible via JDBC URL ``url`` and connection ``properties``. Partitions of the table will be retrieved in parallel if either ``column`` or - ``predicates`` is specified. + ``predicates`` is specified. ``lowerBound`, ``upperBound`` and ``numPartitions`` + is needed when ``column`` is specified. If both ``column`` and ``predicates`` are specified, ``column`` will be used. @@ -429,8 +430,10 @@ class DataFrameReader(OptionUtils): for k in properties: jprop.setProperty(k, properties[k]) if column is not None: - if numPartitions is None: - numPartitions = self._spark._sc.defaultParallelism + assert lowerBound is not None, "lowerBound can not be None when ``column`` is specified" + assert upperBound is not None, "upperBound can not be None when ``column`` is specified" + assert numPartitions is not None, \ + "numPartitions can not be None when ``column`` is specified" return self._df(self._jreader.jdbc(url, table, column, int(lowerBound), int(upperBound), int(numPartitions), jprop)) if predicates is not None: