From af8b6cc82336437a55ff7578c6505d251dfa30a9 Mon Sep 17 00:00:00 2001 From: Felix Cheung <felixcheung_m@hotmail.com> Date: Wed, 10 May 2017 09:33:49 -0700 Subject: [PATCH] [SPARK-20689][PYSPARK] python doctest leaking bucketed table ## What changes were proposed in this pull request? It turns out pyspark doctest is calling saveAsTable without ever dropping them. Since we have separate python tests for bucketed table, and there is no checking of results, there is really no need to run the doctest, other than leaving it as an example in the generated doc ## How was this patch tested? Jenkins Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #17932 from felixcheung/pytablecleanup. --- python/pyspark/sql/readwriter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 90ce8f81eb..61a6b76a79 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -575,7 +575,7 @@ class DataFrameWriter(OptionUtils): .. note:: Applicable for file-based data sources in combination with :py:meth:`DataFrameWriter.saveAsTable`. - >>> (df.write.format('parquet') + >>> (df.write.format('parquet') # doctest: +SKIP ... .bucketBy(100, 'year', 'month') ... .mode("overwrite") ... .saveAsTable('bucketed_table')) @@ -602,7 +602,7 @@ class DataFrameWriter(OptionUtils): :param col: a name of a column, or a list of names. :param cols: additional names (optional). If `col` is a list it should be empty. - >>> (df.write.format('parquet') + >>> (df.write.format('parquet') # doctest: +SKIP ... .bucketBy(100, 'year', 'month') ... .sortBy('day') ... .mode("overwrite") -- GitLab