From 64515e5fbfd694d06fdbc28040fce7baf90a32aa Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Fri, 12 Feb 2016 02:13:06 -0800
Subject: [PATCH] [SPARK-13154][PYTHON] Add linting for pydocs

We should have lint rules using sphinx to automatically catch the pydoc issues that are sometimes introduced.

Right now ./dev/lint-python will skip building the docs if sphinx isn't present - but it might make sense to fail hard - just a matter of if we want to insist all PySpark developers have sphinx present.

Author: Holden Karau <holden@us.ibm.com>

Closes #11109 from holdenk/SPARK-13154-add-pydoc-lint-for-docs.
---
 dev/lint-python     | 24 ++++++++++++++++++++++++
 python/docs/conf.py |  3 +++
 2 files changed, 27 insertions(+)

diff --git a/dev/lint-python b/dev/lint-python
index 1765a07d2f..068337d273 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -24,6 +24,8 @@ PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/run-tests.py ./python/run-tests.py ./dev/r
 PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
 PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt"
 PYLINT_INSTALL_INFO="$SPARK_ROOT_DIR/dev/pylint-info.txt"
+SPHINXBUILD=${SPHINXBUILD:=sphinx-build}
+SPHINX_REPORT_PATH="$SPARK_ROOT_DIR/dev/sphinx-report.txt"
 
 cd "$SPARK_ROOT_DIR"
 
@@ -96,6 +98,28 @@ fi
 
 rm "$PEP8_REPORT_PATH"
 
+# Check that the documentation builds acceptably, skip check if sphinx is not installed.
+if hash "$SPHINXBUILD" 2> /dev/null; then
+  cd python/docs
+  make clean
+  # Treat warnings as errors so we stop correctly
+  SPHINXOPTS="-a -W" make html &> "$SPHINX_REPORT_PATH" || lint_status=1
+  if [ "$lint_status" -ne 0 ]; then
+    echo "pydoc checks failed."
+    cat "$SPHINX_REPORT_PATH"
+    echo "re-running make html to print full warning list"
+    make clean
+    SPHINXOPTS="-a" make html
+  else
+    echo "pydoc checks passed."
+  fi
+  rm "$SPHINX_REPORT_PATH"
+  cd ../..
+else
+  echo >&2 "The $SPHINXBUILD command was not found. Skipping pydoc checks for now"
+fi
+
+
 # for to_be_checked in "$PATHS_TO_CHECK"
 # do
 #     pylint --rcfile="$SPARK_ROOT_DIR/pylintrc" $to_be_checked >> "$PYLINT_REPORT_PATH"
diff --git a/python/docs/conf.py b/python/docs/conf.py
index 365d6af514..d35bf73c30 100644
--- a/python/docs/conf.py
+++ b/python/docs/conf.py
@@ -334,3 +334,6 @@ epub_exclude_files = ['search.html']
 
 # If false, no index is generated.
 #epub_use_index = True
+
+# Skip sample endpoint link (not expected to resolve)
+linkcheck_ignore = [r'https://kinesis.us-east-1.amazonaws.com']
-- 
GitLab