diff --git a/check-future-commits-prs.py b/check-future-commits-prs.py index c4ff2af4197f..fb4644112afe 100755 --- a/check-future-commits-prs.py +++ b/check-future-commits-prs.py @@ -36,7 +36,7 @@ if exists(join(repo_dir, "repo_config.py")): sys.path.insert(0, repo_dir) import repo_config -from process_pr import get_last_commit +from github_utils import get_last_commit gh = Github(login_or_token=open(expanduser(repo_config.GH_TOKEN)).read().strip()) api_rate_limits(gh) diff --git a/cms_static.py b/cms_static.py index 63036c44467c..dea5228595dd 100644 --- a/cms_static.py +++ b/cms_static.py @@ -11,6 +11,7 @@ CMSBUILD_GH_USER = "cmsbuild" CMSBOT_IGNORE_MSG = "\s*" CMSBOT_NO_NOTIFY_MSG = "\s*" +CMSBOT_TECHNICAL_MSG = "cms-bot internal usage" VALID_CMS_SW_REPOS_FOR_TESTS = [ "cmssw", "cmsdist", diff --git a/github_utils.py b/github_utils.py index 3cd942b341eb..31df1f058b6a 100644 --- a/github_utils.py +++ b/github_utils.py @@ -522,6 +522,29 @@ def get_rate_limits(): return github_api(uri="/rate_limit", method="GET") +def merge_dicts(old, new): + for k, v in new.items(): + if k not in old: + old[k] = new[k] + continue + + if isinstance(v, dict): + old[k] = merge_dicts(old[k], new[k]) + continue + + if isinstance(v, list): + old[k].extend(v) + continue + + if old[k] != new[k]: + raise RuntimeError( + f"Unable to merge dictionaries: value for key {k} differs. " + "Old {old[k]} {type(old[k])}, new {new[k]}, {type(new[k])}" + ) + + return old + + def github_api( uri, params=None, @@ -533,10 +556,15 @@ def github_api( last_page=False, all_pages=True, max_pages=-1, - status=[], + status=None, ): + if status is None: + status = [] + + check_rate_limits(msg=False) + global GH_RATE_LIMIT, GH_PAGE_RANGE - if max_pages > 0 and page > max_pages: + if max_pages > 0 and page > max_pages: # noqa for readability return "[]" if raw else [] if not params: params = {} @@ -604,11 +632,15 @@ def github_api( all_pages=False, ) for page in GH_PAGE_RANGE: - if max_pages > 0 and page > max_pages: + if max_pages > 0 and page > max_pages: # noqa for readability break - data += github_api( + new_data = github_api( uri, params, method, headers, page, raw=raw, per_page=per_page, all_pages=False ) + if isinstance(data, dict): + data = merge_dicts(data, new_data) + else: + data += new_data return data @@ -647,6 +679,10 @@ def pr_get_changed_files(pr): return rez +def get_commit(repository, commit_sha): + return github_api(f"/repos/{repository}/commits/{commit_sha}", method="GET") + + def get_unix_time(data_obj): return data_obj.strftime("%s") @@ -901,3 +937,29 @@ def get_pr(repository, pr_id): data = github_api("/repos/%s/pulls/%s" % (repository, pr_id), method="GET") return data + + +def get_last_commit(pr): + commits_ = get_pr_commits_reversed(pr) + if commits_: + return commits_[-1] + else: + return None + + +def get_pr_commits_reversed(pr): + """ + :param pr: + :return: PaginatedList[Commit] | List[Commit] + """ + try: + # This requires at least PyGithub 1.23.0. Making it optional for the moment. + return pr.get_commits().reversed + except: # noqa + # This seems to fail for more than 250 commits. Not sure if the + # problem is github itself or the bindings. + try: + return reversed(list(pr.get_commits())) + except IndexError: + print("Index error: May be PR with no commits") + return [] diff --git a/process-pull-request.py b/process-pull-request.py index 74c18e757d6d..e4e92a7aa007 100755 --- a/process-pull-request.py +++ b/process-pull-request.py @@ -81,3 +81,4 @@ from process_pr import process_pr process_pr(repo_config, gh, repo, repo.get_issue(prId), opts.dryRun, force=opts.force) + api_rate_limits(gh) diff --git a/process_pr.py b/process_pr.py index 9a3867ceecb9..a5a31d1e6bbb 100644 --- a/process_pr.py +++ b/process_pr.py @@ -20,19 +20,27 @@ CMSBOT_IGNORE_MSG, VALID_CMS_SW_REPOS_FOR_TESTS, CREATE_REPO, + CMSBOT_TECHNICAL_MSG, ) from cms_static import BACKPORT_STR, GH_CMSSW_ORGANIZATION, CMSBOT_NO_NOTIFY_MSG from githublabels import TYPE_COMMANDS, TEST_IGNORE_REASON from repo_config import GH_REPO_ORGANIZATION import re, time +import zlib, base64 from datetime import datetime from os.path import join, exists, dirname from os import environ -from github_utils import edit_pr, api_rate_limits +from github_utils import ( + edit_pr, + api_rate_limits, + get_pr_commits_reversed, + get_commit, +) from github_utils import set_comment_emoji, get_comment_emojis, delete_comment_emoji, set_gh_user from socket import setdefaulttimeout from _py2with3compatibility import run_cmd -from json import dumps, load +from json import dumps, load, loads + try: from categories import CMSSW_LABELS @@ -125,6 +133,8 @@ def format(s, **kwds): r"^\s*(?:(?:@|)cmsbuild\s*[,]*\s+|)(?:please\s*[,]*\s+|)ignore\s+tests-rejected\s+(?:with|)([a-z -]+)$", re.I, ) +REGEX_COMMITS_CACHE = re.compile(r"") +REGEX_IGNORE_COMMIT_COUNT = "\+commit-count" TEST_WAIT_GAP = 720 ALL_CHECK_FUNCTIONS = None EXTRA_RELVALS_TESTS = ["threading", "gpu", "high-stats", "nano"] @@ -175,6 +185,7 @@ def format(s, **kwds): + "|_input|)": [RELVAL_OPTS, "EXTRA_MATRIX_COMMAND_ARGS", True], } +MAX_INITIAL_COMMITS_IN_PR = 200 L2_DATA = {} @@ -203,21 +214,6 @@ def get_commenter_categories(commenter, comment_date): return [] -def get_last_commit(pr): - last_commit = None - try: - # This requires at least PyGithub 1.23.0. Making it optional for the moment. - last_commit = pr.get_commits().reversed[0] - except: - # This seems to fail for more than 250 commits. Not sure if the - # problem is github itself or the bindings. - try: - last_commit = pr.get_commits()[pr.commits - 1] - except IndexError: - print("Index error: May be PR with no commits") - return last_commit - - def get_package_categories(package): cats = [] for cat, packages in list(CMSSW_CATEGORIES.items()): @@ -666,6 +662,21 @@ def get_status_state(context, statuses): return "" +def dumps_maybe_compress(value): + json_ = dumps(value, separators=(",", ":"), sort_keys=True) + if len(json_) > 32000: + return "b64:" + base64.encodebytes(zlib.compress(json_.encode())).decode("ascii", "ignore") + else: + return json_ + + +def loads_maybe_decompress(data): + if data.startswith("b64:"): + data = zlib.decompress(base64.decodebytes(data[4:].encode())).decode() + + return loads(data) + + def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=False): global L2_DATA if (not force) and ignore_issue(repo_config, repo, issue): @@ -740,6 +751,13 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F # For future pre_checks # if prId>=somePRNumber: default_pre_checks+=["some","new","checks"] pre_checks_url = {} + events = {} + + commit_cache = {} + all_commits = [] + ok_too_many_commits = False + warned_too_many_commits = False + if issue.pull_request: pr = repo.get_pull(prId) if pr.changed_files == 0: @@ -870,9 +888,14 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F watchers.update(set(watchingGroups[watcher])) watchers = set([gh_user_char + u for u in watchers]) print("Watchers " + ", ".join(watchers)) - last_commit_obj = get_last_commit(pr) - if last_commit_obj is None: + + all_commits = get_pr_commits_reversed(pr) + + if all_commits: + last_commit_obj = all_commits[0] + else: return + last_commit = last_commit_obj.commit commit_statuses = last_commit_obj.get_combined_status().statuses bot_status = get_status(bot_status_name, commit_statuses) @@ -944,6 +967,7 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F ) print("Pre check status:", pre_checks_state) already_seen = None + technical_comment = None pull_request_updated = False comparison_done = False comparison_notrun = False @@ -1003,6 +1027,24 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F pull_request_updated = True continue + if ( + (commenter == cmsbuild_user) + and re.match(CMSBOT_TECHNICAL_MSG, first_line) + and not technical_comment + ): + technical_comment = comment + continue + + if (commenter == cmsbuild_user) and "This PR contains too many commits" in first_line: + warned_too_many_commits = True + continue + + if commenter in CMSSW_ISSUES_TRACKERS and re.match( + r"^\s*" + REGEX_IGNORE_COMMIT_COUNT + r"\s*$", first_line + ): + ok_too_many_commits = True + continue + assign_type, new_cats = get_assign_categories(first_line) if new_cats: if (assign_type == "new categories assigned:") and (commenter == cmsbuild_user): @@ -1115,6 +1157,40 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F if m.group(2): code_check_apply_patch = True + selected_cats = [] + + if re.match("^([+]1|approve[d]?|sign|signed)$", first_line, re.I): + ctype = "+1" + selected_cats = commenter_categories[:] + elif re.match("^([-]1|reject|rejected)$", first_line, re.I): + ctype = "-1" + selected_cats = commenter_categories[:] + elif re.match("^[+-][a-z][a-z0-9-]+$", first_line, re.I): + category_name = first_line[1:].lower() + if category_name in commenter_categories: + ctype = first_line[0] + "1" + selected_cats = [category_name] + + if "orp" in selected_cats: + # Lines 1281, 1286 of original code + mustClose = False + + if "tests" in selected_cats: + selected_cats.remove("tests") + + if "code-checks" in selected_cats: + selected_cats.remove("code-checks") + + if selected_cats: + events[comment.created_at] = { + "type": "sign", + "value": { + "ctype": ctype, + "selected_cats": selected_cats, + "comment": comment, + }, + } + # Ignore all other messages which are before last commit. if issue.pull_request and (comment.created_at < last_commit_date): continue @@ -1242,6 +1318,7 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F abort_test = comment test_comment = None signatures["tests"] = "pending" + continue elif REGEX_TEST_IGNORE.match(first_line): reason = REGEX_TEST_IGNORE.match(first_line)[1].strip() if reason not in TEST_IGNORE_REASON: @@ -1249,43 +1326,126 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F if not dryRun: set_comment_emoji(comment.id, repository, "-1") reason = "" - if reason: override_tests_failure = reason if not dryRun: set_comment_emoji(comment.id, repository) + # end of parsing comments section - # Check L2 signoff for users in this PR signing categories - if [x for x in commenter_categories if x in signing_categories]: - ctype = "" - selected_cats = [] - if re.match("^([+]1|approve[d]?|sign|signed)$", first_line, re.I): - ctype = "+1" - selected_cats = commenter_categories - elif re.match("^([-]1|reject|rejected)$", first_line, re.I): - ctype = "-1" - selected_cats = commenter_categories - elif re.match("^[+-][a-z][a-z0-9-]+$", first_line, re.I): - category_name = first_line[1:].lower() - if category_name in commenter_categories: - ctype = first_line[0] + "1" - selected_cats = [category_name] - if ctype == "+1": - for sign in selected_cats: - signatures[sign] = "approved" - if (test_comment is None) and ( - (repository in auto_test_repo) or ("*" in auto_test_repo) - ): - test_comment = comment - if sign == "orp": - mustClose = False - elif ctype == "-1": - for sign in selected_cats: - signatures[sign] = "rejected" - if sign == "orp": - mustClose = False + if issue.pull_request: + if ( + (not warned_too_many_commits) + and not ok_too_many_commits + and pr.commits >= MAX_INITIAL_COMMITS_IN_PR + ): + if not dryRun: + issue.create_comment( + f"This PR contains too many commits ({pr.commits} > {MAX_INITIAL_COMMITS_IN_PR}). " + "Make sure you chose the right target branch.\n" + f"{', '.join([gh_user_char + name for name in CMSSW_ISSUES_TRACKERS])}, " + "you can override this check with `+commit-count`." + ) + return - # end of parsing comments section + # Get the commit cache from `already_seen` commit or technical commit + print("Checking for commit cache") + cache_comment = None + + if technical_comment: + cache_comment = technical_comment + else: + if already_seen: + cache_comment = already_seen + + if cache_comment: + seen_commits_match = REGEX_COMMITS_CACHE.search(cache_comment.body) + if seen_commits_match: + print("Loading commit cache") + commit_cache = loads_maybe_decompress(seen_commits_match[1]) + + if pr.commits < MAX_INITIAL_COMMITS_IN_PR or ok_too_many_commits: + for commit in all_commits: + if commit.sha not in commit_cache: + commit_cache[commit.sha] = { + "time": int(commit.commit.committer.date.timestamp()), + "files": sorted( + x["filename"] for x in get_commit(repo.full_name, commit.sha)["files"] + ), + } + + cache_entry = commit_cache[commit.sha] + events[datetime.fromtimestamp(cache_entry["time"])] = { + "type": "commit", + "value": cache_entry["files"], + } + + print("Saving commit cache") + old_body = cache_comment.body if cache_comment else CMSBOT_TECHNICAL_MSG + new_body = ( + REGEX_COMMITS_CACHE.sub("", old_body) + + "" + ) + if len(new_body) <= 65535: + if not dryRun: + if cache_comment: + if old_body != new_body: + cache_comment.edit(new_body) + else: + issue.create_comment(new_body) + else: + if cache_comment: + print("DRY RUN: Updating existing comment with text") + print(new_body.encode("ascii", "ignore").decode()) + else: + print("DRY RUN: Creating technical comment with text") + print(new_body.encode("ascii", "ignore").decode()) + else: + raise RuntimeError(f"Updated comment body too long: {len(new_body)} > 65535") + + events = sorted(events.items()) + import pprint + + print("Events:", pprint.pformat(events).encode("ascii", "ignore").decode()) + print("Recalculating signatures", signatures) + + # Process commits and signatures + for _, event in events: + if event["type"] == "sign": + selected_cats = event["value"]["selected_cats"] + ctype = event["value"]["ctype"] + if any(x in signing_categories for x in selected_cats): + if ctype == "+1": + for sign in selected_cats: + signatures[sign] = "approved" + if ( + (test_comment is None) + and ((repository in auto_test_repo) or ("*" in auto_test_repo)) + and sign not in ("code-checks", "tests", "orp") + ): + test_comment = event["value"]["comment"] + elif ctype == "-1": + for sign in selected_cats: + signatures[sign] = "rejected" + else: + print(f"Ignoring event: {selected_cats} includes none of {signing_categories}") + elif event["type"] == "commit": + test_comment = None + if cmssw_repo: + chg_categories = set() + for fn in event["value"]: + chg_categories.update( + get_package_categories(cmssw_file2Package(repo_config, fn)) + ) + chg_categories.update(assign_cats.keys()) + + for cat in chg_categories: + if cat in signing_categories: + signatures[cat] = "pending" + else: + for cat in signing_categories: + signatures[cat] = "pending" if push_test_issue: auto_close_push_test_issue = True @@ -1638,7 +1798,7 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F ) print("Blockers:", blockers) - print("Changed Labels:", labels - old_labels, old_labels - labels) + print("Changed Labels: added", labels - old_labels, "removed", old_labels - labels) if old_labels == labels: print("Labels unchanged.") elif not dryRunOrig: @@ -1934,7 +2094,7 @@ def process_pr(repo_config, gh, repo, issue, dryRun, cmsbuild_user=None, force=F print("Pull request is already fully signed. Not sending message.") else: print("Already notified L2 about " + str(pr.number)) - if commentMsg and not dryRun: + if commentMsg and dryRun: print("The following comment will be made:") try: print(commentMsg.decode("ascii", "replace"))