diff --git a/scripts/tools/memory/gh_report.py b/scripts/tools/memory/gh_report.py index 0669be621eef6e..0dc82bd809c4cb 100755 --- a/scripts/tools/memory/gh_report.py +++ b/scripts/tools/memory/gh_report.py @@ -170,7 +170,9 @@ class SizeDatabase(memdf.util.sqlite.Database): pr INTEGER DEFAULT 0, -- Github PR number time INTEGER NOT NULL, -- Unix-epoch timestamp artifact INTEGER DEFAULT 0, -- Github artifact ID - commented INTEGER DEFAULT 0, + commented INTEGER DEFAULT 0, -- 1 if recorded in a GH comment + ref TEXT, -- Target git ref + event TEXT, -- Github build trigger event UNIQUE(thing_id, hash, parent, pr, time, artifact) ) """, """ @@ -194,13 +196,15 @@ def add_sizes(self, **kwargs): """ Add a size report to the database. - The incoming arguments must contain the non-ID column names from - ‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a sequence - of mappings containing 'name' and 'size'. + The incoming arguments must contain the required non-ID column names + from ‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a + sequence of mappings containing 'name' and 'size'. """ td = {k: kwargs[k] for k in ('platform', 'config', 'target')} thing = self.store_and_return_id('thing', **td) - bd = {k: kwargs[k] for k in ('hash', 'parent', 'time')} + bd = {k: kwargs[k] for k in ('hash', 'parent', 'time', 'event')} + if 'ref' in kwargs: + bd['ref'] = kwargs['ref'] cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')} build = self.store_and_return_id('build', thing_id=thing, **bd, **cd) if build is None: @@ -213,11 +217,11 @@ def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict): """Add sizes from a JSON size report.""" r = origin.copy() r.update(json.loads(s)) - by = r.get('by', 'section') - r['sizes'] = [{ - 'name': i[by], - 'size': i['size'] - } for i in r['frames'][by]] + r['sizes'] = [] + # Add section sizes. + for i in r['frames'].get('section', []): + r['sizes'].append({'name': i['section'], 'size': i['size']}) + # Add segment sizes. for i in r['frames'].get('wr', []): r['sizes'].append({ 'name': ('(read only)', '(read/write)')[int(i['wr'])], @@ -257,7 +261,7 @@ def add_sizes_from_github(self): artifact_pages = self.config['github.limit-artifact-pages'] # Size artifacts have names of the form: - # Size,{group},{pr},{commit_hash},{parent_hash} + # Size,{group},{pr},{commit_hash},{parent_hash}[,{event}] # Record them keyed by group and commit_hash to match them up # after we have the entire list. page = 0 @@ -266,12 +270,17 @@ def add_sizes_from_github(self): if not i.artifacts: break for a in i.artifacts: - if a.name.startswith('Size,'): - _, group, pr, commit, parent, *_ = (a.name + ',').split( - ',', 5) + if a.name.startswith('Size,') and a.name.count(',') >= 4: + _, group, pr, commit, parent, *etc = a.name.split(',') a.parent = parent a.pr = pr a.created_at = dateutil.parser.isoparse(a.created_at) + # Old artifact names don't include the event. + if etc: + event = etc[0] + else: + event = 'push' if pr == '0' else 'pull_request' + a.event = event if group not in size_artifacts: size_artifacts[group] = {} size_artifacts[group][commit] = a @@ -286,7 +295,7 @@ def add_sizes_from_github(self): for group, group_reports in size_artifacts.items(): logging.debug('ASG: group %s', group) for report in group_reports.values(): - if self.config['report.pr' if int(report.pr) else 'report.push']: + if self.should_report(report.event): if report.parent not in group_reports: logging.debug('ASN: No match for %s', report.name) continue @@ -308,8 +317,10 @@ def add_sizes_from_github(self): try: blob = self.gh.actions.download_artifact(i, 'zip') except Exception as e: + blob = None logging.error('Failed to download artifact %d: %s', i, e) - self.add_sizes_from_zipfile(io.BytesIO(blob), {'artifact': i}) + if blob: + self.add_sizes_from_zipfile(io.BytesIO(blob), {'artifact': i}) def read_inputs(self): """Read size report from github and/or local files.""" @@ -321,11 +332,15 @@ def read_inputs(self): def select_matching_commits(self): """Find matching builds, where one's commit is the other's parent.""" return self.execute(''' - SELECT DISTINCT c.pr AS pr, c.hash AS hash, p.hash AS parent - FROM build c - INNER JOIN build p ON p.hash = c.parent - WHERE c.commented = 0 - ORDER BY c.time DESC, c.pr, c.hash, p.hash + SELECT DISTINCT + c.event as event, + c.pr AS pr, + c.hash AS hash, + p.hash AS parent + FROM build c + INNER JOIN build p ON p.hash = c.parent + WHERE c.commented = 0 + ORDER BY c.time DESC, c.pr, c.hash, p.hash ''') def set_commented(self, build_ids: Iterable[int]): @@ -363,6 +378,14 @@ def delete_stale_artifacts(self, stale_artifacts: Iterable[int]): logging.info('DSA: deleting obsolete artifact %d', artifact_id) self.delete_artifact(artifact_id) + def should_report(self, event: Optional[str] = None) -> bool: + """Return true if reporting is enabled for the event.""" + if event is None: + return self.config['report.pr'] or self.config['report.push'] + if event == 'pull_request': + return self.config['report.pr'] + return self.config['report.push'] + def gh_open(config: Config) -> Optional[ghapi.core.GhApi]: """Return a GhApi, if so configured.""" @@ -647,7 +670,7 @@ def v1_comment_summary(df: pd.DataFrame) -> str: def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]: """Report on all new comparable commits.""" - if not (db.config['report.pr'] or db.config['report.push']): + if not db.should_report(): return {} comment_count = 0 @@ -655,13 +678,11 @@ def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]: comment_enabled = (db.config['github.comment'] or db.config['github.dryrun-comment']) - report_push = db.config['report.push'] - report_pr = db.config['report.pr'] only_pr = db.config['github.limit-pr'] dfs = {} - for pr, commit, parent in db.select_matching_commits().fetchall(): - if not (report_pr if pr else report_push): + for event, pr, commit, parent in db.select_matching_commits().fetchall(): + if not db.should_report(event): continue # Github doesn't have a way to fetch artifacts associated with a @@ -672,7 +693,7 @@ def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]: df = changes_for_commit(db, pr, commit, parent) dfs[df.attrs['name']] = df - if (pr and comment_enabled + if (event == 'pull_request' and comment_enabled and (comment_limit == 0 or comment_limit > comment_count)): if gh_send_change_report(db, df): # Mark the originating builds, and remove the originating diff --git a/scripts/tools/memory/gh_sizes.py b/scripts/tools/memory/gh_sizes.py index d6648481e86910..0468d0bc82dc28 100755 --- a/scripts/tools/memory/gh_sizes.py +++ b/scripts/tools/memory/gh_sizes.py @@ -31,11 +31,11 @@ This script also expects certain environment variables, which can be set in a github workflow as follows: - env: - BUILD_TYPE: nrfconnect - GH_EVENT_PR: ${{ github.event_name == 'pull_request' && github.event.number || 0 }} - GH_EVENT_HASH: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - GH_EVENT_PARENT: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event.before }} + - name: Set up environment for size reports + if: ${{ !env.ACT }} + env: + GH_CONTEXT: ${{ toJson(github) }} + run: gh_sizes_environment.py "${GH_CONTEXT}" Default output file is {platform}-{configname}-{buildname}-sizes.json in the binary's directory. This file has the form: @@ -51,11 +51,16 @@ "parent": "20796f752061726520746f6f20636c6f73652e0a", "pr": 12345, "by": "section", + "ref": "refs/pull/12345/merge" "frames": { "section": [ {"section": ".bss", "size": 260496}, {"section": ".data", "size": 1648}, {"section": ".text", "size": 740236} + ], + "wr": [ + {"wr": 0, "size": 262144}, + {"wr": 1, "size": 74023} ] } } @@ -100,10 +105,17 @@ 'metavar': 'HASH', 'default': os.environ.get('GH_EVENT_PARENT'), }, + 'ref': { + 'help': 'Target ref', + 'metavar': 'REF', + 'default': os.environ.get('GH_EVENT_REF'), + }, 'timestamp': { 'help': 'Build timestamp', 'metavar': 'TIME', - 'default': int(datetime.datetime.now().timestamp()), + 'default': int( + os.environ.get('GH_EVENT_TIMESTAMP') + or datetime.datetime.now().timestamp()), }, } @@ -165,7 +177,7 @@ def main(argv): config.put('output.metadata.time', config['timestamp']) config.put('output.metadata.input', binary) config.put('output.metadata.by', 'section') - for key in ['event', 'hash', 'parent', 'pr']: + for key in ['event', 'hash', 'parent', 'pr', 'ref']: if value := config[key]: config.putl(['output', 'metadata', key], value) diff --git a/scripts/tools/memory/gh_sizes_environment.py b/scripts/tools/memory/gh_sizes_environment.py index 4b38a53e12c2db..89950f387bdab3 100755 --- a/scripts/tools/memory/gh_sizes_environment.py +++ b/scripts/tools/memory/gh_sizes_environment.py @@ -16,10 +16,12 @@ Sets the following environment variables: -- `GH_EVENT_PR` For a pull request, the PR number; otherwise 0. -- `GH_EVENT_HASH` SHA of the commit under test. -- `GH_EVENT_PARENT` SHA of the parent commit to which the commit under - test is applied. +- `GH_EVENT_PR` For a pull request, the PR number; otherwise 0. +- `GH_EVENT_HASH` SHA of the commit under test. +- `GH_EVENT_PARENT` SHA of the parent commit to which the commit under + test is applied. +- `GH_EVENT_REF` The branch or tag ref that triggered the workflow run. +- `GH_EVENT_TIMESTAMP` For `push` events only, the timestamp of the commit. """ import json @@ -28,11 +30,21 @@ import subprocess import sys +import dateutil.parser + github = json.loads(sys.argv[1]) +commit = None +timestamp = None +ref = github['ref'] + if github['event_name'] == 'pull_request': + pr = github['event']['number'] commit = github['event']['pull_request']['head']['sha'] + + # Try to find the actual commit against which the current PR compares + # by scraping the HEAD commit message. r = subprocess.run(['git', 'show', '--no-patch', '--format=%s', 'HEAD'], capture_output=True, text=True, check=True) m = re.fullmatch('Merge [0-9a-f]+ into ([0-9a-f]+)', r.stdout) @@ -40,17 +52,25 @@ parent = m.group(1) else: parent = github['event']['pull_request']['base']['sha'] -else: - pr = 0 + +elif github['event_name'] == 'push': + commit = github['sha'] parent = github['event']['before'] + timestamp = dateutil.parser.isoparse( + github['event']['head_commit']['timestamp']).timestamp() + pr = 0 # Environment variables for subsequent workflow steps are set by # writing to the file named by `$GITHUB_ENV`. -env = os.environ.get('GITHUB_ENV') -assert env -with open(env, 'at') as out: - print(f'GH_EVENT_PR={pr}', file=out) - print(f'GH_EVENT_HASH={commit}', file=out) - print(f'GH_EVENT_PARENT={parent}', file=out) +if commit is not None: + env = os.environ.get('GITHUB_ENV') + assert env + with open(env, 'at') as out: + print(f'GH_EVENT_PR={pr}', file=out) + print(f'GH_EVENT_HASH={commit}', file=out) + print(f'GH_EVENT_PARENT={parent}', file=out) + print(f'GH_EVENT_REF={ref}', file=out) + if timestamp: + print(f'GH_EVENT_TIMESTAMP={timestamp}', file=out)