Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Size reports] Script improvements (1/3) #12886

Merged
merged 1 commit into from
Dec 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 48 additions & 27 deletions scripts/tools/memory/gh_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,9 @@ class SizeDatabase(memdf.util.sqlite.Database):
pr INTEGER DEFAULT 0, -- Github PR number
time INTEGER NOT NULL, -- Unix-epoch timestamp
artifact INTEGER DEFAULT 0, -- Github artifact ID
commented INTEGER DEFAULT 0,
commented INTEGER DEFAULT 0, -- 1 if recorded in a GH comment
ref TEXT, -- Target git ref
event TEXT, -- Github build trigger event
UNIQUE(thing_id, hash, parent, pr, time, artifact)
)
""", """
Expand All @@ -194,13 +196,15 @@ def add_sizes(self, **kwargs):
"""
Add a size report to the database.

The incoming arguments must contain the non-ID column names from
‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a sequence
of mappings containing 'name' and 'size'.
The incoming arguments must contain the required non-ID column names
from ‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a
sequence of mappings containing 'name' and 'size'.
"""
td = {k: kwargs[k] for k in ('platform', 'config', 'target')}
thing = self.store_and_return_id('thing', **td)
bd = {k: kwargs[k] for k in ('hash', 'parent', 'time')}
bd = {k: kwargs[k] for k in ('hash', 'parent', 'time', 'event')}
if 'ref' in kwargs:
bd['ref'] = kwargs['ref']
cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')}
build = self.store_and_return_id('build', thing_id=thing, **bd, **cd)
if build is None:
Expand All @@ -213,11 +217,11 @@ def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict):
"""Add sizes from a JSON size report."""
r = origin.copy()
r.update(json.loads(s))
by = r.get('by', 'section')
r['sizes'] = [{
'name': i[by],
'size': i['size']
} for i in r['frames'][by]]
r['sizes'] = []
# Add section sizes.
for i in r['frames'].get('section', []):
r['sizes'].append({'name': i['section'], 'size': i['size']})
# Add segment sizes.
for i in r['frames'].get('wr', []):
r['sizes'].append({
'name': ('(read only)', '(read/write)')[int(i['wr'])],
Expand Down Expand Up @@ -257,7 +261,7 @@ def add_sizes_from_github(self):
artifact_pages = self.config['github.limit-artifact-pages']

# Size artifacts have names of the form:
# Size,{group},{pr},{commit_hash},{parent_hash}
# Size,{group},{pr},{commit_hash},{parent_hash}[,{event}]
# Record them keyed by group and commit_hash to match them up
# after we have the entire list.
page = 0
Expand All @@ -266,12 +270,17 @@ def add_sizes_from_github(self):
if not i.artifacts:
break
for a in i.artifacts:
if a.name.startswith('Size,'):
_, group, pr, commit, parent, *_ = (a.name + ',').split(
',', 5)
if a.name.startswith('Size,') and a.name.count(',') >= 4:
_, group, pr, commit, parent, *etc = a.name.split(',')
a.parent = parent
a.pr = pr
a.created_at = dateutil.parser.isoparse(a.created_at)
# Old artifact names don't include the event.
if etc:
event = etc[0]
else:
event = 'push' if pr == '0' else 'pull_request'
a.event = event
if group not in size_artifacts:
size_artifacts[group] = {}
size_artifacts[group][commit] = a
Expand All @@ -286,7 +295,7 @@ def add_sizes_from_github(self):
for group, group_reports in size_artifacts.items():
logging.debug('ASG: group %s', group)
for report in group_reports.values():
if self.config['report.pr' if int(report.pr) else 'report.push']:
if self.should_report(report.event):
if report.parent not in group_reports:
logging.debug('ASN: No match for %s', report.name)
continue
Expand All @@ -308,8 +317,10 @@ def add_sizes_from_github(self):
try:
blob = self.gh.actions.download_artifact(i, 'zip')
except Exception as e:
blob = None
logging.error('Failed to download artifact %d: %s', i, e)
self.add_sizes_from_zipfile(io.BytesIO(blob), {'artifact': i})
if blob:
self.add_sizes_from_zipfile(io.BytesIO(blob), {'artifact': i})

def read_inputs(self):
"""Read size report from github and/or local files."""
Expand All @@ -321,11 +332,15 @@ def read_inputs(self):
def select_matching_commits(self):
"""Find matching builds, where one's commit is the other's parent."""
return self.execute('''
SELECT DISTINCT c.pr AS pr, c.hash AS hash, p.hash AS parent
FROM build c
INNER JOIN build p ON p.hash = c.parent
WHERE c.commented = 0
ORDER BY c.time DESC, c.pr, c.hash, p.hash
SELECT DISTINCT
c.event as event,
c.pr AS pr,
c.hash AS hash,
p.hash AS parent
FROM build c
INNER JOIN build p ON p.hash = c.parent
WHERE c.commented = 0
ORDER BY c.time DESC, c.pr, c.hash, p.hash
''')

def set_commented(self, build_ids: Iterable[int]):
Expand Down Expand Up @@ -363,6 +378,14 @@ def delete_stale_artifacts(self, stale_artifacts: Iterable[int]):
logging.info('DSA: deleting obsolete artifact %d', artifact_id)
self.delete_artifact(artifact_id)

def should_report(self, event: Optional[str] = None) -> bool:
"""Return true if reporting is enabled for the event."""
if event is None:
return self.config['report.pr'] or self.config['report.push']
if event == 'pull_request':
return self.config['report.pr']
return self.config['report.push']


def gh_open(config: Config) -> Optional[ghapi.core.GhApi]:
"""Return a GhApi, if so configured."""
Expand Down Expand Up @@ -647,21 +670,19 @@ def v1_comment_summary(df: pd.DataFrame) -> str:

def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
"""Report on all new comparable commits."""
if not (db.config['report.pr'] or db.config['report.push']):
if not db.should_report():
return {}

comment_count = 0
comment_limit = db.config['github.limit-comments']
comment_enabled = (db.config['github.comment']
or db.config['github.dryrun-comment'])

report_push = db.config['report.push']
report_pr = db.config['report.pr']
only_pr = db.config['github.limit-pr']

dfs = {}
for pr, commit, parent in db.select_matching_commits().fetchall():
if not (report_pr if pr else report_push):
for event, pr, commit, parent in db.select_matching_commits().fetchall():
if not db.should_report(event):
continue

# Github doesn't have a way to fetch artifacts associated with a
Expand All @@ -672,7 +693,7 @@ def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
df = changes_for_commit(db, pr, commit, parent)
dfs[df.attrs['name']] = df

if (pr and comment_enabled
if (event == 'pull_request' and comment_enabled
and (comment_limit == 0 or comment_limit > comment_count)):
if gh_send_change_report(db, df):
# Mark the originating builds, and remove the originating
Expand Down
26 changes: 19 additions & 7 deletions scripts/tools/memory/gh_sizes.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@
This script also expects certain environment variables, which can be set in a
github workflow as follows:

env:
BUILD_TYPE: nrfconnect
GH_EVENT_PR: ${{ github.event_name == 'pull_request' && github.event.number || 0 }}
GH_EVENT_HASH: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
GH_EVENT_PARENT: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event.before }}
- name: Set up environment for size reports
if: ${{ !env.ACT }}
env:
GH_CONTEXT: ${{ toJson(github) }}
run: gh_sizes_environment.py "${GH_CONTEXT}"

Default output file is {platform}-{configname}-{buildname}-sizes.json in the
binary's directory. This file has the form:
Expand All @@ -51,11 +51,16 @@
"parent": "20796f752061726520746f6f20636c6f73652e0a",
"pr": 12345,
"by": "section",
"ref": "refs/pull/12345/merge"
"frames": {
"section": [
{"section": ".bss", "size": 260496},
{"section": ".data", "size": 1648},
{"section": ".text", "size": 740236}
],
"wr": [
{"wr": 0, "size": 262144},
{"wr": 1, "size": 74023}
]
}
}
Expand Down Expand Up @@ -100,10 +105,17 @@
'metavar': 'HASH',
'default': os.environ.get('GH_EVENT_PARENT'),
},
'ref': {
'help': 'Target ref',
'metavar': 'REF',
'default': os.environ.get('GH_EVENT_REF'),
},
'timestamp': {
'help': 'Build timestamp',
'metavar': 'TIME',
'default': int(datetime.datetime.now().timestamp()),
'default': int(
os.environ.get('GH_EVENT_TIMESTAMP')
or datetime.datetime.now().timestamp()),
},
}

Expand Down Expand Up @@ -165,7 +177,7 @@ def main(argv):
config.put('output.metadata.time', config['timestamp'])
config.put('output.metadata.input', binary)
config.put('output.metadata.by', 'section')
for key in ['event', 'hash', 'parent', 'pr']:
for key in ['event', 'hash', 'parent', 'pr', 'ref']:
if value := config[key]:
config.putl(['output', 'metadata', key], value)

Expand Down
44 changes: 32 additions & 12 deletions scripts/tools/memory/gh_sizes_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@

Sets the following environment variables:

- `GH_EVENT_PR` For a pull request, the PR number; otherwise 0.
- `GH_EVENT_HASH` SHA of the commit under test.
- `GH_EVENT_PARENT` SHA of the parent commit to which the commit under
test is applied.
- `GH_EVENT_PR` For a pull request, the PR number; otherwise 0.
- `GH_EVENT_HASH` SHA of the commit under test.
- `GH_EVENT_PARENT` SHA of the parent commit to which the commit under
test is applied.
- `GH_EVENT_REF` The branch or tag ref that triggered the workflow run.
- `GH_EVENT_TIMESTAMP` For `push` events only, the timestamp of the commit.
"""

import json
Expand All @@ -28,29 +30,47 @@
import subprocess
import sys

import dateutil.parser

github = json.loads(sys.argv[1])

commit = None
timestamp = None
ref = github['ref']

if github['event_name'] == 'pull_request':

pr = github['event']['number']
commit = github['event']['pull_request']['head']['sha']

# Try to find the actual commit against which the current PR compares
# by scraping the HEAD commit message.
r = subprocess.run(['git', 'show', '--no-patch', '--format=%s', 'HEAD'],
capture_output=True, text=True, check=True)
m = re.fullmatch('Merge [0-9a-f]+ into ([0-9a-f]+)', r.stdout)
if m:
parent = m.group(1)
else:
parent = github['event']['pull_request']['base']['sha']
else:
pr = 0

elif github['event_name'] == 'push':

commit = github['sha']
parent = github['event']['before']
timestamp = dateutil.parser.isoparse(
github['event']['head_commit']['timestamp']).timestamp()
pr = 0

# Environment variables for subsequent workflow steps are set by
# writing to the file named by `$GITHUB_ENV`.

env = os.environ.get('GITHUB_ENV')
assert env
with open(env, 'at') as out:
print(f'GH_EVENT_PR={pr}', file=out)
print(f'GH_EVENT_HASH={commit}', file=out)
print(f'GH_EVENT_PARENT={parent}', file=out)
if commit is not None:
env = os.environ.get('GITHUB_ENV')
assert env
with open(env, 'at') as out:
print(f'GH_EVENT_PR={pr}', file=out)
print(f'GH_EVENT_HASH={commit}', file=out)
print(f'GH_EVENT_PARENT={parent}', file=out)
print(f'GH_EVENT_REF={ref}', file=out)
if timestamp:
print(f'GH_EVENT_TIMESTAMP={timestamp}', file=out)