From 4be07d08a70b00de554cd4bfe4b28d7166826464 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Sat, 14 Oct 2023 08:41:03 +0200 Subject: [PATCH] DEV: Attribute correct authors in the release notes (#2246) Retrieve the user logins for all commits from the GitHub API to correctly attribute each user, while trying to keep the number of API requests low to not hit the rate limits Fixes #2245 --- make_release.py | 53 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/make_release.py b/make_release.py index d87044a00..9b106c7d8 100644 --- a/make_release.py +++ b/make_release.py @@ -1,9 +1,11 @@ """Internal tool to update the changelog.""" +import json import subprocess +import urllib.request from dataclasses import dataclass from datetime import datetime, timezone -from typing import List, Tuple +from typing import Dict, List, Tuple from rich.prompt import Prompt @@ -16,6 +18,7 @@ class Change: prefix: str message: str author: str + author_login: str def main(changelog_path: str) -> None: @@ -194,7 +197,7 @@ def get_formatted_changes(git_tag: str) -> Tuple[str, str]: for commit in commits: if commit.prefix not in grouped: grouped[commit.prefix] = [] - grouped[commit.prefix].append({"msg": commit.message, "author": commit.author}) + grouped[commit.prefix].append({"msg": commit.message, "author": commit.author_login}) # Order prefixes order = [ @@ -238,7 +241,7 @@ def get_formatted_changes(git_tag: str) -> Tuple[str, str]: for commit in grouped[prefix]: output += f"- {commit['msg']}\n" output_with_user += ( - f"- {commit['msg']} by @{author2github(commit['author'])}\n" + f"- {commit['msg']} by @{commit['author']}\n" ) del grouped[prefix] @@ -265,6 +268,31 @@ def get_most_recent_git_tag() -> str: return git_tag +def get_author_mapping(line_count: int) -> Dict[str, str]: + """ + Get the authors for each commit. + + Args: + line_count: Number of lines from Git log output. Used for determining how + many commits to fetch. + + Returns: + A mapping of long commit hashes to author login handles. + """ + per_page = min(line_count, 100) + page = 1 + mapping: Dict[str, str] = {} + for _ in range(0, line_count, per_page): + with urllib.request.urlopen( # noqa: S310 + f"https://api.github.com/repos/py-pdf/pypdf/commits?per_page={per_page}&page={page}" + ) as response: + commits = json.loads(response.read()) + page += 1 + for commit in commits: + mapping[commit["sha"]] = commit["author"]["login"] + return mapping + + def get_git_commits_since_tag(git_tag: str) -> List[Change]: """ Get all commits since the last tag. @@ -283,15 +311,17 @@ def get_git_commits_since_tag(git_tag: str) -> List[Change]: "--no-pager", "log", f"{git_tag}..HEAD", - '--pretty=format:"%h:::%s:::%aN"', + '--pretty=format:"%H:::%s:::%aN"', ], stderr=subprocess.STDOUT, ) ).strip("'b\\n") - return [parse_commit_line(line) for line in commits.split("\\n") if line != ""] + lines = commits.split("\\n") + authors = get_author_mapping(len(lines)) + return [parse_commit_line(line, authors) for line in lines if line != ""] -def parse_commit_line(line: str) -> Change: +def parse_commit_line(line: str, authors: Dict[str, str]) -> Change: """ Parse the first line of a git commit message. @@ -316,25 +346,20 @@ def parse_commit_line(line: str) -> Change: # Standardize message.strip() + commit_hash = commit_hash.strip('"') if author.endswith('"'): author = author[:-1] + author_login = authors[commit_hash] prefix = prefix.strip() if prefix == "DOCS": prefix = "DOC" return Change( - commit_hash=commit_hash, prefix=prefix, message=message, author=author + commit_hash=commit_hash, prefix=prefix, message=message, author=author, author_login=author_login ) -def author2github(author: str) -> str: - """Convert the author name to the github name""" - # I'm very uncertain if that always works, but for the current release it - # seems to be the case - return author.replace(" ", "") - - if __name__ == "__main__": main("CHANGELOG.md")