Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
spark
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
cs525-sp18-g07
spark
Commits
5da21f07
Commit
5da21f07
authored
10 years ago
by
Andrew Or
Browse files
Options
Downloads
Patches
Plain Diff
[Release] Translate unknown author names automatically
parent
2d4f6e70
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
dev/create-release/generate-contributors.py
+18
-18
18 additions, 18 deletions
dev/create-release/generate-contributors.py
dev/create-release/releaseutils.py
+93
-0
93 additions, 0 deletions
dev/create-release/releaseutils.py
with
111 additions
and
18 deletions
dev/create-release/generate-contributors.py
+
18
−
18
View file @
5da21f07
...
...
@@ -26,23 +26,11 @@ from releaseutils import *
# You must set the following before use!
JIRA_API_BASE
=
os
.
environ
.
get
(
"
JIRA_API_BASE
"
,
"
https://issues.apache.org/jira
"
)
JIRA_USERNAME
=
os
.
environ
.
get
(
"
JIRA_USERNAME
"
,
None
)
JIRA_PASSWORD
=
os
.
environ
.
get
(
"
JIRA_PASSWORD
"
,
None
)
START_COMMIT
=
os
.
environ
.
get
(
"
START_COMMIT
"
,
"
37b100
"
)
END_COMMIT
=
os
.
environ
.
get
(
"
END_COMMIT
"
,
"
3693ae
"
)
try
:
from
jira.client
import
JIRA
except
ImportError
:
print
"
This tool requires the jira-python library
"
print
"
Install using
'
sudo pip install jira-python
'"
sys
.
exit
(
-
1
)
try
:
import
unidecode
except
ImportError
:
print
"
This tool requires the unidecode library to decode obscure github usernames
"
print
"
Install using
'
sudo pip install unidecode
'"
sys
.
exit
(
-
1
)
# If commit range is not specified, prompt the user to provide it
if
not
START_COMMIT
or
not
END_COMMIT
:
print
"
A commit range is required to proceed.
"
...
...
@@ -52,6 +40,8 @@ if not START_COMMIT or not END_COMMIT:
END_COMMIT
=
raw_input
(
"
Please specify ending commit hash (non-inclusive):
"
)
# Verify provided arguments
if
not
JIRA_USERNAME
:
sys
.
exit
(
"
JIRA_USERNAME must be provided
"
)
if
not
JIRA_PASSWORD
:
sys
.
exit
(
"
JIRA_PASSWORD must be provided
"
)
start_commit_line
=
get_one_line
(
START_COMMIT
)
end_commit_line
=
get_one_line
(
END_COMMIT
)
num_commits
=
num_commits_in_range
(
START_COMMIT
,
END_COMMIT
)
...
...
@@ -70,6 +60,14 @@ if response.lower() != "y" and response:
sys
.
exit
(
"
Ok, exiting
"
)
print
"
==================================================================================
\n
"
# Setup JIRA and github clients. We use two JIRA clients, one with authentication
# and one without, because authentication is slow and required only when we query
# JIRA user details but not Spark issues
jira_options
=
{
"
server
"
:
JIRA_API_BASE
}
jira_client
=
JIRA
(
options
=
jira_options
)
jira_client_auth
=
JIRA
(
options
=
jira_options
,
basic_auth
=
(
JIRA_USERNAME
,
JIRA_PASSWORD
))
github_client
=
Github
()
# Find all commits within this range
print
"
Gathering commits within range [%s..%s)
"
%
(
START_COMMIT
,
END_COMMIT
)
commits
=
get_one_line_commits
(
START_COMMIT
,
END_COMMIT
)
...
...
@@ -129,14 +127,16 @@ warnings = []
# }
#
author_info
=
{}
jira_options
=
{
"
server
"
:
JIRA_API_BASE
}
jira
=
JIRA
(
jira_options
)
print
"
\n
=========================== Compiling contributor list ===========================
"
for
commit
in
filtered_commits
:
commit_hash
=
re
.
findall
(
"
^[a-z0-9]+
"
,
commit
)[
0
]
issues
=
re
.
findall
(
"
SPARK-[0-9]+
"
,
commit
.
upper
())
# Translate the author in case the github username is not an actual name
# Also guard against any special characters used in the name
# Note the JIRA client we use here must have authentication enabled
author
=
get_author
(
commit_hash
)
author
=
unidecode
.
unidecode
(
unicode
(
author
,
"
UTF-8
"
))
# guard against special characters
author
=
unidecode
.
unidecode
(
unicode
(
author
,
"
UTF-8
"
))
author
=
translate_author
(
author
,
github_client
,
jira_client_auth
,
warnings
)
date
=
get_date
(
commit_hash
)
# Parse components from the commit message, if any
commit_components
=
find_components
(
commit
,
commit_hash
)
...
...
@@ -151,7 +151,7 @@ for commit in filtered_commits:
author_info
[
author
][
issue_type
].
add
(
component
)
# Find issues and components associated with this commit
for
issue
in
issues
:
jira_issue
=
jira
.
issue
(
issue
)
jira_issue
=
jira
_client
.
issue
(
issue
)
jira_type
=
jira_issue
.
fields
.
issuetype
.
name
jira_type
=
translate_issue_type
(
jira_type
,
issue
,
warnings
)
jira_components
=
[
translate_component
(
c
.
name
,
commit_hash
,
warnings
)
\
...
...
This diff is collapsed.
Click to expand it.
dev/create-release/releaseutils.py
+
93
−
0
View file @
5da21f07
...
...
@@ -21,6 +21,29 @@
import
re
from
subprocess
import
Popen
,
PIPE
try
:
from
jira.client
import
JIRA
from
jira.exceptions
import
JIRAError
except
ImportError
:
print
"
This tool requires the jira-python library
"
print
"
Install using
'
sudo pip install jira-python
'"
sys
.
exit
(
-
1
)
try
:
from
github
import
Github
from
github
import
GithubException
except
ImportError
:
print
"
This tool requires the PyGithub library
"
print
"
Install using
'
sudo pip install PyGithub
'"
sys
.
exit
(
-
1
)
try
:
import
unidecode
except
ImportError
:
print
"
This tool requires the unidecode library to decode obscure github usernames
"
print
"
Install using
'
sudo pip install unidecode
'"
sys
.
exit
(
-
1
)
# Utility functions run git commands (written with Git 1.8.5)
def
run_cmd
(
cmd
):
return
Popen
(
cmd
,
stdout
=
PIPE
).
communicate
()[
0
]
def
get_author
(
commit_hash
):
...
...
@@ -122,3 +145,73 @@ def nice_join(str_list):
else
:
return
"
,
"
.
join
(
str_list
[:
-
1
])
+
"
, and
"
+
str_list
[
-
1
]
# Return the full name of the specified user on Github
# If the user doesn't exist, return None
def
get_github_name
(
author
,
github_client
):
if
github_client
:
try
:
return
github_client
.
get_user
(
author
).
name
except
GithubException
as
e
:
# If this is not a "not found" exception
if
e
.
status
!=
404
:
raise
e
return
None
# Return the full name of the specified user on JIRA
# If the user doesn't exist, return None
def
get_jira_name
(
author
,
jira_client
):
if
jira_client
:
try
:
return
jira_client
.
user
(
author
).
displayName
except
JIRAError
as
e
:
# If this is not a "not found" exception
if
e
.
status_code
!=
404
:
raise
e
return
None
# Return whether the given name is in the form <First Name><space><Last Name>
def
is_valid_author
(
author
):
if
not
author
:
return
False
author_words
=
len
(
author
.
split
(
"
"
))
return
author_words
==
2
or
author_words
==
3
# Capitalize the first letter of each word in the given author name
def
capitalize_author
(
author
):
if
not
author
:
return
None
words
=
author
.
split
(
"
"
)
words
=
[
w
[
0
].
capitalize
()
+
w
[
1
:]
for
w
in
words
if
w
]
return
"
"
.
join
(
words
)
# Maintain a mapping of translated author names as a cache
translated_authors
=
{}
# Format the given author in a format appropriate for the contributors list.
# If the author is not an actual name, search github and JIRA for potential
# replacements and log all candidates as a warning.
def
translate_author
(
github_author
,
github_client
,
jira_client
,
warnings
):
if
is_valid_author
(
github_author
):
return
capitalize_author
(
github_author
)
# If the translated author is already cached, just return it
if
github_author
in
translated_authors
:
return
translated_authors
[
github_author
]
# Otherwise, author name is not found, so we need to search for an alternative name
candidates
=
set
()
github_name
=
get_github_name
(
github_author
,
github_client
)
jira_name
=
get_jira_name
(
github_author
,
jira_client
)
if
is_valid_author
(
github_name
):
github_name
=
capitalize_author
(
github_name
)
if
is_valid_author
(
jira_name
):
jira_name
=
capitalize_author
(
jira_name
)
if
github_name
:
candidates
.
add
(
github_name
)
if
jira_name
:
candidates
.
add
(
jira_name
)
# Only use the github name as a replacement automatically
# The JIRA name may not make sense because it can belong to someone else
if
is_valid_author
(
github_name
):
candidates_message
=
"
(another candidate is %s)
"
%
jira_name
if
jira_name
else
""
warnings
.
append
(
"
Replacing github user %s with %s%s
"
%
(
github_author
,
github_name
,
candidates_message
))
translated_authors
[
github_name
]
=
github_name
return
translated_authors
[
github_name
]
# No direct replacement, so return the original author and list any candidates found
candidates_message
=
"
(candidates: %s)
"
%
nice_join
(
candidates
)
if
candidates
else
""
warnings
.
append
(
"
Unable to find a replacement for github user %s%s
"
%
(
github_author
,
candidates_message
))
translated_authors
[
github_author
]
=
github_author
return
translated_authors
[
github_author
]
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment