From 8cb23a1f9a3ed08e57865bcb6cc1cc7902881073 Mon Sep 17 00:00:00 2001
From: Brennon York <brennon.york@capitalone.com>
Date: Thu, 19 Mar 2015 11:18:24 -0400
Subject: [PATCH] [SPARK-5313][Project Infra]: Create simple framework for
 highlighting changes introduced in a PR

Built a simple framework with a `dev/tests` directory to house all pull request related tests. I've moved the two original tests (`pr_merge_ability` and `pr_public_classes`) into the new `dev/tests` directory and tested to the best of my ability. At this point I need to test against Jenkins actually running the new `run-tests-jenkins` script to ensure things aren't broken down the path.

Author: Brennon York <brennon.york@capitalone.com>

Closes #5072 from brennonyork/SPARK-5313 and squashes the following commits:

8ae990c [Brennon York] added dev/run-tests back, removed echo
5db4ed4 [Brennon York] removed the git checkout
1b50050 [Brennon York] adding echos to see what jenkins is seeing
b823959 [Brennon York] removed run-tests to further test the public_classes pr test
2b9ce12 [Brennon York] added the dev/run-tests call back in
ffd49c0 [Brennon York] remove -c from bash as that was removing the trailing args
735d615 [Brennon York] removed the actual dev/run-tests command to further test jenkins
d579662 [Brennon York] Merge remote-tracking branch 'upstream/master' into SPARK-5313
aa48029 [Brennon York] removed echo lines for testing jenkins
24cd965 [Brennon York] added test output to check within jenkins to verify
3a38e73 [Brennon York] removed the temporary read
9c881ff [Brennon York] updated test suite
183b7ee [Brennon York] added documentation on how to create tests
0bc2efe [Brennon York] ensure each test starts on the current pr branch
1743378 [Brennon York] added tests in test suite
abd7430 [Brennon York] updated to include test suite
---
 dev/run-tests-jenkins          | 75 ++++++++++++++--------------------
 dev/tests/pr_merge_ability.sh  | 39 ++++++++++++++++++
 dev/tests/pr_public_classes.sh | 65 +++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+), 44 deletions(-)
 create mode 100755 dev/tests/pr_merge_ability.sh
 create mode 100755 dev/tests/pr_public_classes.sh

diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins
index 6a849e4f77..5f4000e839 100755
--- a/dev/run-tests-jenkins
+++ b/dev/run-tests-jenkins
@@ -49,6 +49,21 @@ SHORT_COMMIT_HASH="${ghprbActualCommit:0:7}"
 
 TESTS_TIMEOUT="120m" # format: http://linux.die.net/man/1/timeout
 
+# Array to capture all tests to run on the pull request. These tests are held under the
+#+ dev/tests/ directory.
+#
+# To write a PR test:
+#+  * the file must reside within the dev/tests directory
+#+  * be an executable bash script
+#+  * accept two arguments on the command line, the first being the Github PR long commit
+#+    hash and the second the Github SHA1 hash
+#+  * and, lastly, return string output to be included in the pr message output that will
+#+    be posted to Github
+PR_TESTS=(
+  "pr_merge_ability"
+  "pr_public_classes"
+)
+
 function post_message () {
   local message=$1
   local data="{\"body\": \"$message\"}"
@@ -131,48 +146,22 @@ function send_archived_logs () {
   fi
 }
 
-
-# We diff master...$ghprbActualCommit because that gets us changes introduced in the PR
-#+ and not anything else added to master since the PR was branched.
-
-# check PR merge-ability and check for new public classes
-{
-  if [ "$sha1" == "$ghprbActualCommit" ]; then
-    merge_note=" * This patch **does not merge cleanly**."
-  else
-    merge_note=" * This patch merges cleanly."
+# Environment variable to capture PR test output
+pr_message=""
+
+# Run pull request tests
+for t in "${PR_TESTS[@]}"; do
+  this_test="${FWDIR}/dev/tests/${t}.sh"
+  # Ensure the test is a file and is executable
+  if [ -x "$this_test" ]; then
+    echo "ghprb: $ghprbActualCommit sha1: $sha1"
+    this_mssg="`bash \"${this_test}\" \"${ghprbActualCommit}\" \"${sha1}\" 2>/dev/null`"
+    # Check if this is the merge test as we submit that note *before* and *after*
+    # the tests run
+    [ "$t" == "pr_merge_ability" ] && merge_note="${this_mssg}"
+    pr_message="${pr_message}\n${this_mssg}"
   fi
-
-  source_files=$(
-      git diff master...$ghprbActualCommit --name-only  `# diff patch against master from branch point` \
-    | grep -v -e "\/test"                               `# ignore files in test directories` \
-    | grep -e "\.py$" -e "\.java$" -e "\.scala$"        `# include only code files` \
-    | tr "\n" " "
-  )
-  new_public_classes=$(
-      git diff master...$ghprbActualCommit ${source_files}      `# diff patch against master from branch point` \
-    | grep "^\+"                              `# filter in only added lines` \
-    | sed -r -e "s/^\+//g"                    `# remove the leading +` \
-    | grep -e "trait " -e "class "            `# filter in lines with these key words` \
-    | grep -e "{" -e "("                      `# filter in lines with these key words, too` \
-    | grep -v -e "\@\@" -e "private"          `# exclude lines with these words` \
-    | grep -v -e "^// " -e "^/\*" -e "^ \* "  `# exclude comment lines` \
-    | sed -r -e "s/\{.*//g"                   `# remove from the { onwards` \
-    | sed -r -e "s/\}//g"                     `# just in case, remove }; they mess the JSON` \
-    | sed -r -e "s/\"/\\\\\"/g"               `# escape double quotes; they mess the JSON` \
-    | sed -r -e "s/^(.*)$/\`\1\`/g"           `# surround with backticks for style` \
-    | sed -r -e "s/^/  \* /g"                 `# prepend '  *' to start of line` \
-    | sed -r -e "s/$/\\\n/g"                  `# append newline to end of line` \
-    | tr -d "\n"                              `# remove actual LF characters`
-  )
-
-  if [ -z "$new_public_classes" ]; then
-    public_classes_note=" * This patch adds no public classes."
-  else
-    public_classes_note=" * This patch adds the following public classes _(experimental)_:"
-    public_classes_note="${public_classes_note}\n${new_public_classes}"
-  fi
-}
+done
 
 # post start message
 {
@@ -181,7 +170,6 @@ function send_archived_logs () {
   PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL})."
 
   start_message="${start_message}\n${merge_note}"
-  # start_message="${start_message}\n${public_classes_note}"
 
   post_message "$start_message"
 }
@@ -234,8 +222,7 @@ function send_archived_logs () {
   PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL})."
 
   result_message="${result_message}\n${test_result_note}"
-  result_message="${result_message}\n${merge_note}"
-  result_message="${result_message}\n${public_classes_note}"
+  result_message="${result_message}\n${pr_message}"
 
   post_message "$result_message"
 }
diff --git a/dev/tests/pr_merge_ability.sh b/dev/tests/pr_merge_ability.sh
new file mode 100755
index 0000000000..d9a347fe24
--- /dev/null
+++ b/dev/tests/pr_merge_ability.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# This script follows the base format for testing pull requests against
+# another branch and returning results to be published. More details can be
+# found at dev/run-tests-jenkins.
+#
+# Arg1: The Github Pull Request Actual Commit
+#+ known as `ghprbActualCommit` in `run-tests-jenkins`
+# Arg2: The SHA1 hash
+#+ known as `sha1` in `run-tests-jenkins`
+#
+
+ghprbActualCommit="$1"
+sha1="$2"
+
+# check PR merge-ability
+if [ "${sha1}" == "${ghprbActualCommit}" ]; then
+  echo " * This patch **does not merge cleanly**."
+else
+  echo " * This patch merges cleanly."
+fi
diff --git a/dev/tests/pr_public_classes.sh b/dev/tests/pr_public_classes.sh
new file mode 100755
index 0000000000..927295b88c
--- /dev/null
+++ b/dev/tests/pr_public_classes.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# This script follows the base format for testing pull requests against
+# another branch and returning results to be published. More details can be
+# found at dev/run-tests-jenkins.
+#
+# Arg1: The Github Pull Request Actual Commit
+#+ known as `ghprbActualCommit` in `run-tests-jenkins`
+# Arg2: The SHA1 hash
+#+ known as `sha1` in `run-tests-jenkins`
+#
+
+# We diff master...$ghprbActualCommit because that gets us changes introduced in the PR
+#+ and not anything else added to master since the PR was branched.
+
+ghprbActualCommit="$1"
+sha1="$2"
+
+source_files=$(
+  git diff master...$ghprbActualCommit --name-only  `# diff patch against master from branch point` \
+    | grep -v -e "\/test"                               `# ignore files in test directories` \
+    | grep -e "\.py$" -e "\.java$" -e "\.scala$"        `# include only code files` \
+    | tr "\n" " "
+)
+new_public_classes=$(
+  git diff master...$ghprbActualCommit ${source_files}      `# diff patch against master from branch point` \
+    | grep "^\+"                              `# filter in only added lines` \
+    | sed -r -e "s/^\+//g"                    `# remove the leading +` \
+    | grep -e "trait " -e "class "            `# filter in lines with these key words` \
+    | grep -e "{" -e "("                      `# filter in lines with these key words, too` \
+    | grep -v -e "\@\@" -e "private"          `# exclude lines with these words` \
+    | grep -v -e "^// " -e "^/\*" -e "^ \* "  `# exclude comment lines` \
+    | sed -r -e "s/\{.*//g"                   `# remove from the { onwards` \
+    | sed -r -e "s/\}//g"                     `# just in case, remove }; they mess the JSON` \
+    | sed -r -e "s/\"/\\\\\"/g"               `# escape double quotes; they mess the JSON` \
+    | sed -r -e "s/^(.*)$/\`\1\`/g"           `# surround with backticks for style` \
+    | sed -r -e "s/^/  \* /g"                 `# prepend '  *' to start of line` \
+    | sed -r -e "s/$/\\\n/g"                  `# append newline to end of line` \
+    | tr -d "\n"                              `# remove actual LF characters`
+)
+
+if [ -z "$new_public_classes" ]; then
+  echo " * This patch adds no public classes."
+else
+  public_classes_note=" * This patch adds the following public classes _(experimental)_:"
+  echo "${public_classes_note}\n${new_public_classes}"
+fi
-- 
GitLab