diff --git a/.gitignore b/.gitignore
index c67cffa1c4375c4da852ca8b71f9b2690ad2580b..3b9086c7187dc78f9ce2dc1b137bf55ce5e963cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@
 *.ipr
 *.iml
 *.iws
+*.pyc
 .idea/
 .idea_modules/
 sbt/*.jar
@@ -49,6 +50,8 @@ dependency-reduced-pom.xml
 checkpoint
 derby.log
 dist/
+dev/create-release/*txt
+dev/create-release/*new
 spark-*-bin-*.tgz
 unit-tests.log
 /lib/
diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py
index 99c29ef9ff8b6ba116b06fe2f9ff37cda37cac79..a3b78a3eac6d098afe9714ca6eb73a58f619bef4 100755
--- a/dev/create-release/generate-contributors.py
+++ b/dev/create-release/generate-contributors.py
@@ -26,8 +26,6 @@ from releaseutils import *
 
 # You must set the following before use!
 JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
-JIRA_USERNAME = os.environ.get("JIRA_USERNAME", None)
-JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", None)
 START_COMMIT = os.environ.get("START_COMMIT", "37b100")
 END_COMMIT = os.environ.get("END_COMMIT", "3693ae")
 
@@ -40,8 +38,6 @@ if not START_COMMIT or not END_COMMIT:
         END_COMMIT = raw_input("Please specify ending commit hash (non-inclusive): ")
 
 # Verify provided arguments
-if not JIRA_USERNAME: sys.exit("JIRA_USERNAME must be provided")
-if not JIRA_PASSWORD: sys.exit("JIRA_PASSWORD must be provided")
 start_commit_line = get_one_line(START_COMMIT)
 end_commit_line = get_one_line(END_COMMIT)
 num_commits = num_commits_in_range(START_COMMIT, END_COMMIT)
@@ -60,14 +56,6 @@ if response.lower() != "y" and response:
     sys.exit("Ok, exiting")
 print "==================================================================================\n"
 
-# Setup JIRA and github clients. We use two JIRA clients, one with authentication
-# and one without, because authentication is slow and required only when we query
-# JIRA user details but not Spark issues
-jira_options = { "server": JIRA_API_BASE }
-jira_client = JIRA(options = jira_options)
-jira_client_auth = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD))
-github_client = Github()
-
 # Find all commits within this range
 print "Gathering commits within range [%s..%s)" % (START_COMMIT, END_COMMIT)
 commits = get_one_line_commits(START_COMMIT, END_COMMIT)
@@ -105,13 +93,17 @@ if releases or reverts or nojiras:
     if reverts: print "Reverts (%d)" % len(reverts); print_indented(reverts)
     if nojiras: print "No JIRA (%d)" % len(nojiras); print_indented(nojiras)
     print "==================== Warning: the above commits will be ignored ==================\n"
-response = raw_input("%d commits left to process. Ok to proceed? [y/N] " % len(filtered_commits))
-if response.lower() != "y":
+response = raw_input("%d commits left to process. Ok to proceed? [Y/n] " % len(filtered_commits))
+if response.lower() != "y" and response:
     sys.exit("Ok, exiting.")
 
 # Keep track of warnings to tell the user at the end
 warnings = []
 
+# Mapping from the invalid author name to its associated JIRA issues
+# E.g. andrewor14 -> set("SPARK-2413", "SPARK-3551", "SPARK-3471")
+invalid_authors = {}
+
 # Populate a map that groups issues and components by author
 # It takes the form: Author name -> { Contribution type -> Spark components }
 # For instance,
@@ -127,16 +119,23 @@ warnings = []
 # }
 #
 author_info = {}
+jira_options = { "server": JIRA_API_BASE }
+jira_client = JIRA(options = jira_options)
 print "\n=========================== Compiling contributor list ==========================="
 for commit in filtered_commits:
     commit_hash = re.findall("^[a-z0-9]+", commit)[0]
     issues = re.findall("SPARK-[0-9]+", commit.upper())
-    # Translate the author in case the github username is not an actual name
-    # Also guard against any special characters used in the name
-    # Note the JIRA client we use here must have authentication enabled
     author = get_author(commit_hash)
-    author = unidecode.unidecode(unicode(author, "UTF-8"))
-    author = translate_author(author, github_client, jira_client_auth, warnings)
+    author = unidecode.unidecode(unicode(author, "UTF-8")).strip() # guard against special characters
+    # If the author name is invalid, keep track of it along
+    # with all associated issues so we can translate it later
+    if is_valid_author(author):
+        author = capitalize_author(author)
+    else:
+        if author not in invalid_authors:
+            invalid_authors[author] = set()
+        for issue in issues:
+            invalid_authors[author].add(issue)
     date = get_date(commit_hash)
     # Parse components from the commit message, if any
     commit_components = find_components(commit, commit_hash)
@@ -147,7 +146,7 @@ for commit in filtered_commits:
             author_info[author] = {}
         if issue_type not in author_info[author]:
             author_info[author][issue_type] = set()
-        for component in all_components:
+        for component in components:
             author_info[author][issue_type].add(component)
     # Find issues and components associated with this commit
     for issue in issues:
@@ -168,7 +167,6 @@ print "=========================================================================
 # Each line takes the format "Author name - semi-colon delimited contributions"
 # e.g. Andrew Or - Bug fixes in Windows, Core, and Web UI; improvements in Core
 # e.g. Tathagata Das - Bug fixes and new features in Streaming
-contributors_file_name = "contributors.txt"
 contributors_file = open(contributors_file_name, "w")
 authors = author_info.keys()
 authors.sort()
@@ -192,11 +190,23 @@ for author in authors:
     # Do not use python's capitalize() on the whole string to preserve case
     assert contribution
     contribution = contribution[0].capitalize() + contribution[1:]
+    # If the author name is invalid, use an intermediate format that
+    # can be translated through translate-contributors.py later
+    # E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
+    if author in invalid_authors and invalid_authors[author]:
+        author = author + "/" + "/".join(invalid_authors[author])
     line = "%s - %s" % (author, contribution)
     contributors_file.write(line + "\n")
 contributors_file.close()
 print "Contributors list is successfully written to %s!" % contributors_file_name
 
+# Prompt the user to translate author names if necessary
+if invalid_authors:
+    warnings.append("Found the following invalid authors:")
+    for a in invalid_authors:
+        warnings.append("\t%s" % a)
+    warnings.append("Please run './translate-contributors.py' to translate them.")
+
 # Log any warnings encountered in the process
 if warnings:
     print "\n============ Warnings encountered while creating the contributor list ============"
diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py
index 0d6830b11dc7302e78a958fcb3762daeb9e86a37..76a10c32886d4f29a1fce9a813c200f447a9bb5f 100755
--- a/dev/create-release/releaseutils.py
+++ b/dev/create-release/releaseutils.py
@@ -44,6 +44,9 @@ except ImportError:
     print "Install using 'sudo pip install unidecode'"
     sys.exit(-1)
 
+# Contributors list file name
+contributors_file_name = "contributors.txt"
+
 # Utility functions run git commands (written with Git 1.8.5)
 def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0]
 def get_author(commit_hash):
@@ -69,7 +72,8 @@ known_issue_types = {
     "build": "build fixes",
     "improvement": "improvements",
     "new feature": "new features",
-    "documentation": "documentation"
+    "documentation": "documentation",
+    "test": "test"
 }
 
 # Maintain a mapping for translating component names when creating the release notes
@@ -182,36 +186,3 @@ def capitalize_author(author):
     words = [w[0].capitalize() + w[1:] for w in words if w]
     return " ".join(words)
 
-# Maintain a mapping of translated author names as a cache
-translated_authors = {}
-
-# Format the given author in a format appropriate for the contributors list.
-# If the author is not an actual name, search github and JIRA for potential
-# replacements and log all candidates as a warning.
-def translate_author(github_author, github_client, jira_client, warnings):
-    if is_valid_author(github_author):
-        return capitalize_author(github_author)
-    # If the translated author is already cached, just return it
-    if github_author in translated_authors:
-        return translated_authors[github_author]
-    # Otherwise, author name is not found, so we need to search for an alternative name
-    candidates = set()
-    github_name = get_github_name(github_author, github_client)
-    jira_name = get_jira_name(github_author, jira_client)
-    if is_valid_author(github_name): github_name = capitalize_author(github_name)
-    if is_valid_author(jira_name): jira_name = capitalize_author(jira_name)
-    if github_name: candidates.add(github_name)
-    if jira_name: candidates.add(jira_name)
-    # Only use the github name as a replacement automatically
-    # The JIRA name may not make sense because it can belong to someone else
-    if is_valid_author(github_name):
-        candidates_message = " (another candidate is %s)" % jira_name if jira_name else ""
-        warnings.append("Replacing github user %s with %s%s" % (github_author, github_name, candidates_message))
-        translated_authors[github_name] = github_name
-        return translated_authors[github_name]
-    # No direct replacement, so return the original author and list any candidates found
-    candidates_message = " (candidates: %s)" % nice_join(candidates) if candidates else ""
-    warnings.append("Unable to find a replacement for github user %s%s" % (github_author, candidates_message))
-    translated_authors[github_author] = github_author
-    return translated_authors[github_author]
-
diff --git a/dev/create-release/translate-contributors.py b/dev/create-release/translate-contributors.py
new file mode 100755
index 0000000000000000000000000000000000000000..ef4625b003cb6b1081e460b95fe803c1bb70ddb7
--- /dev/null
+++ b/dev/create-release/translate-contributors.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script translates invalid authors in the contributors list generated
+# by generate-contributors.py. When the script encounters an author name that
+# is considered invalid, it searches Github and JIRA in an attempt to search
+# for replacements. This tool runs in two modes:
+#
+# (1) Interactive mode: For each invalid author name, this script presents
+# all candidate replacements to the user and awaits user response. In this
+# mode, the user may also input a custom name. This is the default.
+#
+# (2) Non-interactive mode: For each invalid author name, this script replaces
+# the name with the first valid candidate it can find. If there is none, it
+# uses the original name. This can be enabled through the --non-interactive flag.
+
+import os
+import sys
+
+from releaseutils import *
+
+# You must set the following before use!
+JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
+JIRA_USERNAME = os.environ.get("JIRA_USERNAME", None)
+JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", None)
+if not JIRA_USERNAME or not JIRA_PASSWORD:
+    sys.exit("Both JIRA_USERNAME and JIRA_PASSWORD must be set")
+
+# Write new contributors list to <old_file_name>.new
+if not os.path.isfile(contributors_file_name):
+    print "Contributors file %s does not exist!" % contributors_file_name
+    print "Have you run ./generate-contributors.py yet?"
+    sys.exit(1)
+contributors_file = open(contributors_file_name, "r")
+new_contributors_file_name = contributors_file_name + ".new"
+new_contributors_file = open(new_contributors_file_name, "w")
+warnings = []
+
+# In non-interactive mode, this script will choose the first replacement that is valid
+INTERACTIVE_MODE = True
+if len(sys.argv) > 1:
+    options = set(sys.argv[1:])
+    if "--non-interactive" in options:
+        INTERACTIVE_MODE = False
+if INTERACTIVE_MODE:
+    print "Running in interactive mode. To disable this, provide the --non-interactive flag."
+
+# Setup Github and JIRA clients
+jira_options = { "server": JIRA_API_BASE }
+jira_client = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD))
+github_client = Github()
+
+# Generate candidates for the given author. This should only be called if the given author
+# name does not represent a full name as this operation is somewhat expensive. Under the
+# hood, it makes several calls to the Github and JIRA API servers to find the candidates.
+#
+# This returns a list of (candidate name, source) 2-tuples. E.g.
+# [
+#   (NOT_FOUND, "No full name found for Github user andrewor14"),
+#   ("Andrew Or", "Full name of JIRA user andrewor14"),
+#   ("Andrew Orso", "Full name of SPARK-1444 assignee andrewor14"),
+#   ("Andrew Ordall", "Full name of SPARK-1663 assignee andrewor14"),
+#   (NOT_FOUND, "No assignee found for SPARK-1763")
+# ]
+NOT_FOUND = "Not found"
+def generate_candidates(author, issues):
+    candidates = []
+    # First check for full name of Github user
+    github_name = get_github_name(new_author, github_client)
+    if github_name:
+        candidates.append((github_name, "Full name of Github user %s" % new_author))
+    else:
+        candidates.append((NOT_FOUND, "No full name found for Github user %s" % new_author))
+    # Then do the same for JIRA user
+    jira_name = get_jira_name(new_author, jira_client)
+    if jira_name:
+        candidates.append((jira_name, "Full name of JIRA user %s" % new_author))
+    else:
+        candidates.append((NOT_FOUND, "No full name found for JIRA user %s" % new_author))
+    # Then do the same for the assignee of each of the associated JIRAs
+    # Note that a given issue may not have an assignee, or the assignee may not have a full name
+    for issue in issues:
+        jira_issue = jira_client.issue(issue)
+        jira_assignee = jira_issue.fields.assignee
+        if jira_assignee:
+            user_name = jira_assignee.name
+            display_name = jira_assignee.displayName
+            if display_name:
+                candidates.append((display_name, "Full name of %s assignee %s" % (issue, user_name)))
+            else:
+                candidates.append((NOT_FOUND, "No full name found for %s assignee %" % (issue, user_name)))
+        else:
+            candidates.append((NOT_FOUND, "No assignee found for %s" % issue))
+    # Guard against special characters in candidate names
+    # Note that the candidate name may already be in unicode (JIRA returns this)
+    for i, (candidate, source) in enumerate(candidates):
+        try:
+            candidate = unicode(candidate, "UTF-8")
+        except TypeError:
+            # already in unicode
+            pass
+        candidate = unidecode.unidecode(candidate).strip()
+        candidates[i] = (candidate, source)
+    return candidates
+
+# Translate each invalid author by searching for possible candidates from Github and JIRA
+# In interactive mode, this script presents the user with a list of choices and have the user
+# select from this list. Additionally, the user may also choose to enter a custom name.
+# In non-interactive mode, this script picks the first valid author name from the candidates
+# If no such name exists, the original name is used (without the JIRA numbers).
+print "\n========================== Translating contributor list =========================="
+for line in contributors_file:
+    author = line.split(" - ")[0]
+    print "Processing author %s" % author
+    if not author:
+        print "    ERROR: Expected the following format <author> - <contributions>"
+        print "    ERROR: Actual = %s" % line
+    if not is_valid_author(author):
+        new_author = author.split("/")[0]
+        issues = author.split("/")[1:]
+        candidates = generate_candidates(new_author, issues)
+        # Print out potential replacement candidates along with the sources, e.g.
+        #   [X] No full name found for Github user andrewor14
+        #   [0] Andrew Or - Full name of JIRA user andrewor14
+        #   [1] Andrew Orso - Full name of SPARK-1444 assignee andrewor14
+        #   [2] Andrew Ordall - Full name of SPARK-1663 assignee andrewor14
+        #   [X] No assignee found for SPARK-1763
+        #   [3] Custom
+        candidate_names = []
+        for candidate, source in candidates:
+            if candidate == NOT_FOUND:
+                print "    [X] %s" % source
+            else:
+                index = len(candidate_names)
+                candidate_names.append(candidate)
+                print "    [%d] %s - %s" % (index, candidate, source)
+        custom_index = len(candidate_names)
+        # In interactive mode, additionally provide "custom" option and await user response
+        if INTERACTIVE_MODE:
+            print "    [%d] Custom" % custom_index
+            response = raw_input("    Your choice: ")
+            while not response.isdigit() or int(response) > custom_index:
+                response = raw_input("    Please enter an integer between 0 and %d: " % custom_index)
+            response = int(response)
+            if response == custom_index:
+                new_author = raw_input("    Please type a custom name for this author: ")
+            else:
+                new_author = candidate_names[response]
+        # In non-interactive mode, just pick the first candidate
+        else:
+            valid_candidate_names = [name for name, _ in candidates\
+                if is_valid_author(name) and name != NOT_FOUND]
+            if valid_candidate_names:
+                new_author = valid_candidate_names[0]
+        # Finally, capitalize the author and replace the original one with it
+        # If the final replacement is still invalid, log a warning
+        if is_valid_author(new_author):
+            new_author = capitalize_author(new_author)
+        else:
+            warnings.append("Unable to find a valid name %s for author %s" % (new_author, author))
+        print "    * Replacing %s with %s" % (author, new_author)
+        line = line.replace(author, new_author)
+    new_contributors_file.write(line)
+print "==================================================================================\n"
+contributors_file.close()
+new_contributors_file.close()
+
+print "Translated contributors list successfully written to %s!" % new_contributors_file_name
+
+# Log any warnings encountered in the process
+if warnings:
+    print "\n========== Warnings encountered while translating the contributor list ==========="
+    for w in warnings: print w
+    print "Please manually correct these in the final contributors list at %s." % new_contributors_file_name
+    print "==================================================================================\n"
+