From b10b809758ae94ab37c37bcb90c585a911f0ebc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Tue, 30 Jun 2020 12:31:04 +0200 Subject: [PATCH 01/29] Add scripts for PR metrics from github API (Previously a personal project.) --- pr-metrics/.gitignore | 3 ++ pr-metrics/Readme.md | 15 +++++++ pr-metrics/do.sh | 9 ++++ pr-metrics/get-pr-data.py | 26 ++++++++++++ pr-metrics/pending-mergeability.py | 39 +++++++++++++++++ pr-metrics/pr-closed.py | 44 ++++++++++++++++++++ pr-metrics/pr-created.py | 42 +++++++++++++++++++ pr-metrics/pr-lifetime.py | 61 +++++++++++++++++++++++++++ pr-metrics/pr-pending.py | 52 +++++++++++++++++++++++ pr-metrics/prs.py | 67 ++++++++++++++++++++++++++++++ 10 files changed, 358 insertions(+) create mode 100644 pr-metrics/.gitignore create mode 100644 pr-metrics/Readme.md create mode 100755 pr-metrics/do.sh create mode 100755 pr-metrics/get-pr-data.py create mode 100755 pr-metrics/pending-mergeability.py create mode 100755 pr-metrics/pr-closed.py create mode 100755 pr-metrics/pr-created.py create mode 100755 pr-metrics/pr-lifetime.py create mode 100755 pr-metrics/pr-pending.py create mode 100644 pr-metrics/prs.py diff --git a/pr-metrics/.gitignore b/pr-metrics/.gitignore new file mode 100644 index 000000000..71c733cda --- /dev/null +++ b/pr-metrics/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +pr-data.p +*.png diff --git a/pr-metrics/Readme.md b/pr-metrics/Readme.md new file mode 100644 index 000000000..565b91c9f --- /dev/null +++ b/pr-metrics/Readme.md @@ -0,0 +1,15 @@ +These scripts collect some metrics about mbed TLS PRs over time. + +Usage: + +1. `./get-pr-data.py` - this takes a long time and requires the environment + variable `GITHUB_API_TOKEN` to be set to a valid [github API +token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token) (unauthenticated access to the API has a limit on the number or requests that is too low for our number of PRs). It generates `pr-data.p` with pickled data. +2. `./do.sh` - this works offline from the data in `pr-data.p` and generates a + bunch of png and csv files. + +These scripts work with matplotlib 3.1.2 (python 3.8.2, ubuntu 20.04), but +appear to be broken with matplotlib 1.5.1 (python 3.5, ubuntu 16.04). + +They require pygithub, which can easily be installed with pip (any version +should do). diff --git a/pr-metrics/do.sh b/pr-metrics/do.sh new file mode 100755 index 000000000..3a3180a33 --- /dev/null +++ b/pr-metrics/do.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +set -eu + +for topic in created closed pending lifetime; do + echo "PRs $topic..." + rm -f prs-${topic}.png prs-${topic}.csv + ./pr-${topic}.py > prs-${topic}.csv +done diff --git a/pr-metrics/get-pr-data.py b/pr-metrics/get-pr-data.py new file mode 100755 index 000000000..cb918f1bf --- /dev/null +++ b/pr-metrics/get-pr-data.py @@ -0,0 +1,26 @@ +#!/usr/bin/python3 +# coding: utf-8 + +"""Get PR data from github and pickle it.""" + +import pickle +import os + +from github import Github + +if "GITHUB_API_TOKEN" in os.environ: + token = os.environ["GITHUB_API_TOKEN"] +else: + print("You need to provide a GitHub API token") + +g = Github(token) +r = g.get_repo("ARMMbed/mbedtls") + +prs = list() +for p in r.get_pulls(state="all"): + # accessing p.mergeable forces completion of PR data + print(p.number, p.mergeable) + prs.append(p) + +with open("pr-data.p", "wb") as f: + pickle.dump(prs, f) diff --git a/pr-metrics/pending-mergeability.py b/pr-metrics/pending-mergeability.py new file mode 100755 index 000000000..682f8b610 --- /dev/null +++ b/pr-metrics/pending-mergeability.py @@ -0,0 +1,39 @@ +#!/usr/bin/python3 +# coding: utf-8 + +"""Produce summary or PRs pending per branch and their mergeability status.""" + +import pickle +from datetime import datetime +from collections import Counter + +with open("pr-data.p", "rb") as f: + prs = pickle.load(f) + +c_open = Counter() +c_mergeable = Counter() +c_nowork = Counter() +c_recent = Counter() + +for p in prs: + if p.state != "open": + continue + + branch = p.base.ref + c_open[branch] += 1 + if p.mergeable: + c_mergeable[branch] += 1 + if "needs: work" not in [l.name for l in p.labels]: + c_nowork[branch] += 1 + days = (datetime.now() - p.updated_at).days + if days < 31: + c_recent[branch] += 1 + + +print("branch: open, no conflicts, minus need work, minus month-old") +for b in sorted(c_open, key=lambda b: c_open[b], reverse=True): + print( + "{:>15}: {: 4}, {: 3}, {: 3}, {: 3}".format( + b, c_open[b], c_mergeable[b], c_nowork[b], c_recent[b] + ) + ) diff --git a/pr-metrics/pr-closed.py b/pr-metrics/pr-closed.py new file mode 100755 index 000000000..7951cfa7b --- /dev/null +++ b/pr-metrics/pr-closed.py @@ -0,0 +1,44 @@ +#!/usr/bin/python3 +# coding: utf-8 + +"""Produce graph of PRs closed by time period.""" + +from prs import pr_dates, quarter + +from collections import Counter + +import matplotlib.pyplot as plt + +cutoff = "15q1" + +cnt_all = Counter() +cnt_com = Counter() + +for beg, end, com, cur in pr_dates(): + if cur: + continue + q = quarter(end) + cnt_all[q] += 1 + if com: + cnt_com[q] += 1 + +quarters = tuple(sorted(q for q in cnt_all if q >= cutoff)) + +prs_com = tuple(cnt_com[q] for q in quarters) +prs_team = tuple(cnt_all[q] - cnt_com[q] for q in quarters) + +width = 0.9 +fig, ax = plt.subplots() +ax.bar(quarters, prs_com, width, label="community") +ax.bar(quarters, prs_team, width, label="core team", bottom=prs_com) +ax.legend(loc="upper left") +ax.grid(True) +ax.set_xlabel("quarter") +ax.set_ylabel("Number or PRs closed") +fig.suptitle("Number of PRs closed per quarter") +fig.set_size_inches(12.8, 7.2) # default 100 dpi -> 720p +fig.savefig("prs-closed.png") + +print("Quarter,community closed,total closed") +for q in quarters: + print("{},{},{}".format(q, cnt_com[q], cnt_all[q])) diff --git a/pr-metrics/pr-created.py b/pr-metrics/pr-created.py new file mode 100755 index 000000000..e092e4837 --- /dev/null +++ b/pr-metrics/pr-created.py @@ -0,0 +1,42 @@ +#!/usr/bin/python3 +# coding: utf-8 + +"""Produce graph of PRs created by time period.""" + +from prs import pr_dates, quarter + +from collections import Counter + +import matplotlib.pyplot as plt + +cutoff = "15q1" + +cnt_all = Counter() +cnt_com = Counter() + +for beg, end, com, cur in pr_dates(): + q = quarter(beg) + cnt_all[q] += 1 + if com: + cnt_com[q] += 1 + +quarters = tuple(sorted(q for q in cnt_all if q >= cutoff)) + +prs_com = tuple(cnt_com[q] for q in quarters) +prs_team = tuple(cnt_all[q] - cnt_com[q] for q in quarters) + +width = 0.9 +fig, ax = plt.subplots() +ax.bar(quarters, prs_com, width, label="community") +ax.bar(quarters, prs_team, width, label="core team", bottom=prs_com) +ax.legend(loc="upper left") +ax.grid(True) +ax.set_xlabel("quarter") +ax.set_ylabel("Number or PRs created") +fig.suptitle("Number of PRs created per quarter") +fig.set_size_inches(12.8, 7.2) # default 100 dpi -> 720p +fig.savefig("prs-created.png") + +print("Quarter,community created,total created") +for q in quarters: + print("{},{},{}".format(q, cnt_com[q], cnt_all[q])) diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py new file mode 100755 index 000000000..a24fd4e9c --- /dev/null +++ b/pr-metrics/pr-lifetime.py @@ -0,0 +1,61 @@ +#!/usr/bin/python3 +# coding: utf-8 + +"""Produce graph of lifetime of PRs over time.""" + +from prs import pr_dates, quarter + +from collections import defaultdict + +import matplotlib.pyplot as plt + +cutoff = "15q1" + +lifetimes_all = defaultdict(list) +lifetimes_com = defaultdict(list) + +for beg, end, com, cur in pr_dates(): + lt = (end - beg).days + q = quarter(beg) + lifetimes_all[q].append(lt) + if com: + lifetimes_com[q].append(lt) + +quarters = tuple(sorted(q for q in lifetimes_all if q >= cutoff)) + +for q in quarters: + lifetimes_all[q].sort() + lifetimes_com[q].sort() + + +def median(sl): + """Return the median value of a sorted list of numbers (0 if empty).""" + index = (len(sl) - 1) / 2 + if index < 0: + return 0 + if int(index) == index: + return sl[int(index)] + + i, j = int(index - 0.5), int(index + 0.5) + return (sl[i] + sl[j]) / 2 + + +med_all = tuple(median(lifetimes_all[q]) for q in quarters) +med_com = tuple(median(lifetimes_com[q]) for q in quarters) + +fig, ax = plt.subplots() +ax.plot(quarters, med_all, "b-", label="median overall") +ax.plot(quarters, med_com, "r-", label="median community") +ax.legend(loc="upper right") +ax.grid(True) +ax.set_xlabel("quarter") +ax.set_ylabel("median lifetime in days of PRs created that quarter") +bot, top = ax.set_ylim() +ax.set_ylim(0, min(365, top)) # we don't care about values over 1 year +fig.suptitle("Median lifetime of PRs per quarter (less is better)") +fig.set_size_inches(12.8, 7.2) # default 100 dpi -> 720p +fig.savefig("prs-lifetime.png") + +print("Quarter,median overall,median community") +for q, a, c in zip(quarters, med_all, med_com): + print("{},{},{}".format(q, int(a), int(c))) diff --git a/pr-metrics/pr-pending.py b/pr-metrics/pr-pending.py new file mode 100755 index 000000000..979641edb --- /dev/null +++ b/pr-metrics/pr-pending.py @@ -0,0 +1,52 @@ +#!/usr/bin/python3 +# coding: utf-8 + +"""Produce graph of PRs pending over time.""" + +from prs import pr_dates + +from datetime import date, timedelta +from collections import Counter + +import matplotlib.pyplot as plt + +cutoff = date(2015, 1, 1) + +cnt_tot = Counter() +cnt_com = Counter() + +for beg, end, com, cur in pr_dates(): + n_days = (end - beg).days + dates = Counter(beg + timedelta(days=i) for i in range(n_days)) + cnt_tot.update(dates) + if com: + cnt_com.update(dates) + +dates = tuple(sorted(d for d in cnt_tot.keys() if d >= cutoff)) + + +def avg(cnt, date): + """Average number of open PRs over a week.""" + return sum(cnt[date - timedelta(days=i)] for i in range(7)) / 7 + + +nb_tot = tuple(avg(cnt_tot, d) for d in dates) +nb_com = tuple(avg(cnt_com, d) for d in dates) +nb_team = tuple(tot - com for tot, com in zip(nb_tot, nb_com)) + +fig, ax = plt.subplots() +ax.plot(dates, nb_tot, "b-", label="total") +ax.plot(dates, nb_team, "c-", label="core team") +ax.plot(dates, nb_com, "r-", label="community") +ax.legend(loc="upper left") +ax.grid(True) +ax.set_xlabel("date") +ax.set_ylabel("number of open PRs (sliding average over a week)") +fig.suptitle("Number of PRs pending over time (less is better)") +fig.set_size_inches(12.8, 7.2) # default 100 dpi -> 720p +fig.savefig("prs-pending.png") + +print("date,pending total, pending community") +for d in dates: + tot, com = cnt_tot[d], cnt_com[d] + print("{},{},{}".format(d, tot, com)) diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py new file mode 100644 index 000000000..5c30e46c7 --- /dev/null +++ b/pr-metrics/prs.py @@ -0,0 +1,67 @@ +#!/usr/bin/python3 +# coding: utf-8 + +"""PR data an misc common functions.""" + +from datetime import datetime, timedelta +import pickle + +with open("pr-data.p", "rb") as f: + prs = pickle.load(f) + + +_team_logins = ( + "gilles-peskine-arm", + "hanno-arm", + "RonEld", + "andresag01", + "mpg", + "sbutcher-arm", + "Patater", + "k-stachowiak", + "AndrzejKurek", + "yanesca", + "mazimkhan", + "dgreen-arm", + "artokin", + "jarlamsa", + "piotr-now", + "pjbakker", + "jarvte", + "danh-arm", + "ronald-cron-arm", + "paul-elliott-arm", + "gabor-mezei-arm", + "bensze01", +) + + +def is_community(pr): + """Return False if the PR is from a team member or from inside Arm.""" + labels = tuple(l.name for l in pr.labels) + if "mbed TLS team" in labels or "Arm Contribution" in labels: + return False + if pr.user.login in _team_logins: + return False + return True + + +def quarter(date): + """Return a string decribing this date's quarter, for example 19q3.""" + q = str(date.year % 100) + q += "q" + q += str((date.month + 2) // 3) + return q + + +_tomorrow = datetime.now().date() + timedelta(days=1) + + +def pr_dates(): + """Iterate over PRs with open/close dates and community status.""" + for pr in prs: + beg = pr.created_at.date() + end = pr.closed_at.date() if pr.closed_at else _tomorrow + com = is_community(pr) + cur = not pr.closed_at + yield (beg, end, com, cur) From c531cae73d2d5caa1953dea4af9b33d793e332af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Wed, 30 Sep 2020 11:13:53 +0200 Subject: [PATCH 02/29] Update requirements & allow use from venv Hardcoding the path to python doesn't play nice with venv. --- pr-metrics/.gitignore | 1 + pr-metrics/Readme.md | 29 ++++++++++++++++++++++++----- pr-metrics/get-pr-data.py | 2 +- pr-metrics/pending-mergeability.py | 2 +- pr-metrics/pr-closed.py | 2 +- pr-metrics/pr-created.py | 2 +- pr-metrics/pr-lifetime.py | 2 +- pr-metrics/pr-pending.py | 2 +- pr-metrics/prs.py | 2 +- pr-metrics/requirements.txt | 17 +++++++++++++++++ 10 files changed, 49 insertions(+), 12 deletions(-) create mode 100644 pr-metrics/requirements.txt diff --git a/pr-metrics/.gitignore b/pr-metrics/.gitignore index 71c733cda..a8188de82 100644 --- a/pr-metrics/.gitignore +++ b/pr-metrics/.gitignore @@ -1,3 +1,4 @@ __pycache__ pr-data.p *.png +*.csv diff --git a/pr-metrics/Readme.md b/pr-metrics/Readme.md index 565b91c9f..dc4aed36d 100644 --- a/pr-metrics/Readme.md +++ b/pr-metrics/Readme.md @@ -1,6 +1,7 @@ These scripts collect some metrics about mbed TLS PRs over time. -Usage: +Usage +----- 1. `./get-pr-data.py` - this takes a long time and requires the environment variable `GITHUB_API_TOKEN` to be set to a valid [github API @@ -8,8 +9,26 @@ token](https://help.github.com/en/github/authenticating-to-github/creating-a-per 2. `./do.sh` - this works offline from the data in `pr-data.p` and generates a bunch of png and csv files. -These scripts work with matplotlib 3.1.2 (python 3.8.2, ubuntu 20.04), but -appear to be broken with matplotlib 1.5.1 (python 3.5, ubuntu 16.04). +Requirements +------------ -They require pygithub, which can easily be installed with pip (any version -should do). +These scripts require: + +- Python >= 3.6 (required by recent enough matplotlib) +- matplotlib >= 3.1 (3.0 doesn't work) +- PyGithub >= 1.43 (any version should work, that was just the oldest tested) + +On Ubuntu 16.04, by default only Python 3.5 is available, which doesn't +support a recent enough matplotlib to support those scripts, so the following +was used to run those scripts on 16.04: + + sudo add-apt-repository ppa:deadsnakes/ppa + sudo apt update + sudo apt install python3.6 python3.6-venv + python3.6 -m venv 36env + source 36env/bin/activate + pip install --upgrade pip + pip install matlplotlib + pip install pygithub + +See `requirements.txt` for an example of a set of working versions. diff --git a/pr-metrics/get-pr-data.py b/pr-metrics/get-pr-data.py index cb918f1bf..b8e3d0320 100755 --- a/pr-metrics/get-pr-data.py +++ b/pr-metrics/get-pr-data.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # coding: utf-8 """Get PR data from github and pickle it.""" diff --git a/pr-metrics/pending-mergeability.py b/pr-metrics/pending-mergeability.py index 682f8b610..3d0609b9b 100755 --- a/pr-metrics/pending-mergeability.py +++ b/pr-metrics/pending-mergeability.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # coding: utf-8 """Produce summary or PRs pending per branch and their mergeability status.""" diff --git a/pr-metrics/pr-closed.py b/pr-metrics/pr-closed.py index 7951cfa7b..5b06a916e 100755 --- a/pr-metrics/pr-closed.py +++ b/pr-metrics/pr-closed.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # coding: utf-8 """Produce graph of PRs closed by time period.""" diff --git a/pr-metrics/pr-created.py b/pr-metrics/pr-created.py index e092e4837..fc6be84a7 100755 --- a/pr-metrics/pr-created.py +++ b/pr-metrics/pr-created.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # coding: utf-8 """Produce graph of PRs created by time period.""" diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index a24fd4e9c..bb21b53ae 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # coding: utf-8 """Produce graph of lifetime of PRs over time.""" diff --git a/pr-metrics/pr-pending.py b/pr-metrics/pr-pending.py index 979641edb..2468dd142 100755 --- a/pr-metrics/pr-pending.py +++ b/pr-metrics/pr-pending.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # coding: utf-8 """Produce graph of PRs pending over time.""" diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py index 5c30e46c7..c172d05cc 100644 --- a/pr-metrics/prs.py +++ b/pr-metrics/prs.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # coding: utf-8 """PR data an misc common functions.""" diff --git a/pr-metrics/requirements.txt b/pr-metrics/requirements.txt new file mode 100644 index 000000000..dd1e61571 --- /dev/null +++ b/pr-metrics/requirements.txt @@ -0,0 +1,17 @@ +certifi==2020.6.20 +chardet==3.0.4 +cycler==0.10.0 +Deprecated==1.2.10 +idna==2.10 +kiwisolver==1.2.0 +matplotlib==3.3.2 +numpy==1.19.2 +Pillow==7.2.0 +PyGithub==1.53 +PyJWT==1.7.1 +pyparsing==2.4.7 +python-dateutil==2.8.1 +requests==2.24.0 +six==1.15.0 +urllib3==1.25.10 +wrapt==1.12.1 From a7150535f852e696564916b3c74f774066fc75e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Thu, 24 Dec 2020 13:27:59 +0100 Subject: [PATCH 03/29] Add comments about Ubuntu 20.04 --- pr-metrics/Readme.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pr-metrics/Readme.md b/pr-metrics/Readme.md index dc4aed36d..461e8a0de 100644 --- a/pr-metrics/Readme.md +++ b/pr-metrics/Readme.md @@ -18,6 +18,12 @@ These scripts require: - matplotlib >= 3.1 (3.0 doesn't work) - PyGithub >= 1.43 (any version should work, that was just the oldest tested) +### Ubuntu 20.04 (and probaly 18.04) + +A simple `apt install python3-github python3-matplotlib` is enough. + +### Ubuntu 16.04 + On Ubuntu 16.04, by default only Python 3.5 is available, which doesn't support a recent enough matplotlib to support those scripts, so the following was used to run those scripts on 16.04: From bae9af302fa557f93280322ceda637b74231c9b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Thu, 24 Dec 2020 13:28:28 +0100 Subject: [PATCH 04/29] Make get-pr-data 10x faster --- pr-metrics/get-pr-data.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pr-metrics/get-pr-data.py b/pr-metrics/get-pr-data.py index b8e3d0320..02ade7a85 100755 --- a/pr-metrics/get-pr-data.py +++ b/pr-metrics/get-pr-data.py @@ -18,8 +18,13 @@ prs = list() for p in r.get_pulls(state="all"): - # accessing p.mergeable forces completion of PR data - print(p.number, p.mergeable) + print(p.number) + # Accessing p.mergeable forces completion of PR data (by default, only + # basic info such as status and dates is available) but makes the script + # slower (about 10x). + # Leave commented as we only need the basic info for do.sh. + # (Uncomment if you want to use extended PR data with other scripts.) + #dummy = p.mergeable prs.append(p) with open("pr-data.p", "wb") as f: From 00d84994bff156943ba4e723979c58f1d8027cf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Wed, 30 Dec 2020 14:59:59 +0100 Subject: [PATCH 05/29] Avoid potential better-than-reality lifetime figures --- pr-metrics/pr-lifetime.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index bb21b53ae..00714a6d4 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -16,6 +16,14 @@ for beg, end, com, cur in pr_dates(): lt = (end - beg).days + # If the PR is still open and it's recent (< 1 quarter), assign an + # arbitrary large lifetime. (The exact value doesn't matter for computing + # the median, as long as it's greater than the median.) Otherwise, if + # a large enough number of PRs were created shortly enough before this + # script is run, they could make the median look artifically low, because + # pr_dates() returns tomorrow as the 'end' date for still-open PRs. + if cur and lt < 90: + lt = 365 q = quarter(beg) lifetimes_all[q].append(lt) if com: From 28dffa79d9cdc6e62907da5433e048e0aef6e57a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Wed, 30 Dec 2020 15:21:34 +0100 Subject: [PATCH 06/29] Adjust pr_dates() to reduce risk of misuse Force users to check whether the PR's still open, and make sure an exception is raised if end date is used when it shouldn't. --- pr-metrics/pr-closed.py | 4 ++-- pr-metrics/pr-created.py | 2 +- pr-metrics/pr-lifetime.py | 25 +++++++++++++++---------- pr-metrics/pr-pending.py | 10 +++++++--- pr-metrics/prs.py | 9 ++------- 5 files changed, 27 insertions(+), 23 deletions(-) diff --git a/pr-metrics/pr-closed.py b/pr-metrics/pr-closed.py index 5b06a916e..664e39987 100755 --- a/pr-metrics/pr-closed.py +++ b/pr-metrics/pr-closed.py @@ -14,8 +14,8 @@ cnt_all = Counter() cnt_com = Counter() -for beg, end, com, cur in pr_dates(): - if cur: +for beg, end, com in pr_dates(): + if end is None: continue q = quarter(end) cnt_all[q] += 1 diff --git a/pr-metrics/pr-created.py b/pr-metrics/pr-created.py index fc6be84a7..28c6ad200 100755 --- a/pr-metrics/pr-created.py +++ b/pr-metrics/pr-created.py @@ -14,7 +14,7 @@ cnt_all = Counter() cnt_com = Counter() -for beg, end, com, cur in pr_dates(): +for beg, end, com in pr_dates(): q = quarter(beg) cnt_all[q] += 1 if com: diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index 00714a6d4..3cadc8aca 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -8,22 +8,27 @@ from collections import defaultdict import matplotlib.pyplot as plt +from datetime import datetime cutoff = "15q1" lifetimes_all = defaultdict(list) lifetimes_com = defaultdict(list) -for beg, end, com, cur in pr_dates(): - lt = (end - beg).days - # If the PR is still open and it's recent (< 1 quarter), assign an - # arbitrary large lifetime. (The exact value doesn't matter for computing - # the median, as long as it's greater than the median.) Otherwise, if - # a large enough number of PRs were created shortly enough before this - # script is run, they could make the median look artifically low, because - # pr_dates() returns tomorrow as the 'end' date for still-open PRs. - if cur and lt < 90: - lt = 365 +for beg, end, com in pr_dates(): + # If the PR is still open and it's recent, assign an arbitrary large + # lifetime. (The exact value doesn't matter for computing the median, as + # long as it's greater than the median.) Otherwise, if a large enough + # number of PRs were created shortly enough before this script is run, + # they could make the median look artifically low, because pr_dates() + # returns tomorrow as the 'end' date for still-open PRs. + if end is None: + today = datetime.now().date() + lt_so_far = (today - beg).days + lt = max(365, lt_so_far) + else: + lt = (end - beg).days + q = quarter(beg) lifetimes_all[q].append(lt) if com: diff --git a/pr-metrics/pr-pending.py b/pr-metrics/pr-pending.py index 2468dd142..7cab0ea8c 100755 --- a/pr-metrics/pr-pending.py +++ b/pr-metrics/pr-pending.py @@ -5,7 +5,7 @@ from prs import pr_dates -from datetime import date, timedelta +from datetime import date, datetime, timedelta from collections import Counter import matplotlib.pyplot as plt @@ -15,8 +15,12 @@ cnt_tot = Counter() cnt_com = Counter() -for beg, end, com, cur in pr_dates(): - n_days = (end - beg).days +for beg, end, com in pr_dates(): + if end is None: + tomorrow = datetime.now().date() + timedelta(days=1) + n_days = (tomorrow - beg).days + else: + n_days = (end - beg).days dates = Counter(beg + timedelta(days=i) for i in range(n_days)) cnt_tot.update(dates) if com: diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py index c172d05cc..c56b06159 100644 --- a/pr-metrics/prs.py +++ b/pr-metrics/prs.py @@ -3,7 +3,6 @@ """PR data an misc common functions.""" -from datetime import datetime, timedelta import pickle with open("pr-data.p", "rb") as f: @@ -54,14 +53,10 @@ def quarter(date): return q -_tomorrow = datetime.now().date() + timedelta(days=1) - - def pr_dates(): """Iterate over PRs with open/close dates and community status.""" for pr in prs: beg = pr.created_at.date() - end = pr.closed_at.date() if pr.closed_at else _tomorrow + end = pr.closed_at.date() if pr.closed_at else None com = is_community(pr) - cur = not pr.closed_at - yield (beg, end, com, cur) + yield (beg, end, com) From 3d7880cc9366e06341c3435536f57afef61bb91c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Fri, 2 Apr 2021 12:42:54 +0200 Subject: [PATCH 07/29] Adapt detection of community PRs --- pr-metrics/prs.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py index c56b06159..e2976395b 100644 --- a/pr-metrics/prs.py +++ b/pr-metrics/prs.py @@ -38,6 +38,12 @@ def is_community(pr): """Return False if the PR is from a team member or from inside Arm.""" labels = tuple(l.name for l in pr.labels) + # starting from 2021 we consistently label community PRs + if pr.created_at.date().year >= 2021: + return "Community" in labels + + # before that we used to inconsistently labeled PRs from the team or ARM, + # so complement that with a list of team members if "mbed TLS team" in labels or "Arm Contribution" in labels: return False if pr.user.login in _team_logins: From 37844d4a483f157037bbf44195c47a4b4059e19c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Fri, 2 Apr 2021 12:54:40 +0200 Subject: [PATCH 08/29] Add warning about making this work on 16.04 --- pr-metrics/Readme.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pr-metrics/Readme.md b/pr-metrics/Readme.md index 461e8a0de..f73c73d70 100644 --- a/pr-metrics/Readme.md +++ b/pr-metrics/Readme.md @@ -38,3 +38,10 @@ was used to run those scripts on 16.04: pip install pygithub See `requirements.txt` for an example of a set of working versions. + +Note: if you do this, I strongly recommend uninstalling python3.6, +python3.6-venv and all their dependencies, then removing the deadsnakes PPA +before any upgrade to 18.04. Failing to do so will result in +dependency-related headaches as some packages in 18.04 depend on a specific +version of python3.6 but the version from deadsnakes is higher, so apt won't +downgrade it and manual intervention will be required. From cf9e41da9a24fe31f95f1d386b88e8c44f65734a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Fri, 2 Apr 2021 13:10:08 +0200 Subject: [PATCH 09/29] Avoid repeating the start date in many places --- pr-metrics/pr-closed.py | 6 +++--- pr-metrics/pr-created.py | 6 +++--- pr-metrics/pr-lifetime.py | 6 +++--- pr-metrics/pr-pending.py | 6 ++---- pr-metrics/prs.py | 4 ++++ 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/pr-metrics/pr-closed.py b/pr-metrics/pr-closed.py index 664e39987..9009eb605 100755 --- a/pr-metrics/pr-closed.py +++ b/pr-metrics/pr-closed.py @@ -3,13 +3,13 @@ """Produce graph of PRs closed by time period.""" -from prs import pr_dates, quarter +from prs import pr_dates, quarter, first from collections import Counter import matplotlib.pyplot as plt -cutoff = "15q1" +first_q = quarter(first) cnt_all = Counter() cnt_com = Counter() @@ -22,7 +22,7 @@ if com: cnt_com[q] += 1 -quarters = tuple(sorted(q for q in cnt_all if q >= cutoff)) +quarters = tuple(sorted(q for q in cnt_all if q >= first_q)) prs_com = tuple(cnt_com[q] for q in quarters) prs_team = tuple(cnt_all[q] - cnt_com[q] for q in quarters) diff --git a/pr-metrics/pr-created.py b/pr-metrics/pr-created.py index 28c6ad200..b37420790 100755 --- a/pr-metrics/pr-created.py +++ b/pr-metrics/pr-created.py @@ -3,13 +3,13 @@ """Produce graph of PRs created by time period.""" -from prs import pr_dates, quarter +from prs import pr_dates, quarter, first from collections import Counter import matplotlib.pyplot as plt -cutoff = "15q1" +first_q = quarter(first) cnt_all = Counter() cnt_com = Counter() @@ -20,7 +20,7 @@ if com: cnt_com[q] += 1 -quarters = tuple(sorted(q for q in cnt_all if q >= cutoff)) +quarters = tuple(sorted(q for q in cnt_all if q >= first_q)) prs_com = tuple(cnt_com[q] for q in quarters) prs_team = tuple(cnt_all[q] - cnt_com[q] for q in quarters) diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index 3cadc8aca..8819e67f1 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -3,14 +3,14 @@ """Produce graph of lifetime of PRs over time.""" -from prs import pr_dates, quarter +from prs import pr_dates, quarter, first from collections import defaultdict import matplotlib.pyplot as plt from datetime import datetime -cutoff = "15q1" +first_q = quarter(first) lifetimes_all = defaultdict(list) lifetimes_com = defaultdict(list) @@ -34,7 +34,7 @@ if com: lifetimes_com[q].append(lt) -quarters = tuple(sorted(q for q in lifetimes_all if q >= cutoff)) +quarters = tuple(sorted(q for q in lifetimes_all if q >= first_q)) for q in quarters: lifetimes_all[q].sort() diff --git a/pr-metrics/pr-pending.py b/pr-metrics/pr-pending.py index 7cab0ea8c..a9fb4cfce 100755 --- a/pr-metrics/pr-pending.py +++ b/pr-metrics/pr-pending.py @@ -3,15 +3,13 @@ """Produce graph of PRs pending over time.""" -from prs import pr_dates +from prs import pr_dates, first from datetime import date, datetime, timedelta from collections import Counter import matplotlib.pyplot as plt -cutoff = date(2015, 1, 1) - cnt_tot = Counter() cnt_com = Counter() @@ -26,7 +24,7 @@ if com: cnt_com.update(dates) -dates = tuple(sorted(d for d in cnt_tot.keys() if d >= cutoff)) +dates = tuple(sorted(d for d in cnt_tot.keys() if d >= first)) def avg(cnt, date): diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py index e2976395b..8e7187c31 100644 --- a/pr-metrics/prs.py +++ b/pr-metrics/prs.py @@ -4,6 +4,7 @@ """PR data an misc common functions.""" import pickle +import datetime with open("pr-data.p", "rb") as f: prs = pickle.load(f) @@ -66,3 +67,6 @@ def pr_dates(): end = pr.closed_at.date() if pr.closed_at else None com = is_community(pr) yield (beg, end, com) + + +first = datetime.date(2015, 1, 1) From f06becf8749cba3e5dd60e5c2bd049681b9cb4a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Fri, 2 Apr 2021 13:13:36 +0200 Subject: [PATCH 10/29] Update outdated comment --- pr-metrics/pr-lifetime.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index 8819e67f1..56fda0df6 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -18,10 +18,9 @@ for beg, end, com in pr_dates(): # If the PR is still open and it's recent, assign an arbitrary large # lifetime. (The exact value doesn't matter for computing the median, as - # long as it's greater than the median.) Otherwise, if a large enough - # number of PRs were created shortly enough before this script is run, - # they could make the median look artifically low, because pr_dates() - # returns tomorrow as the 'end' date for still-open PRs. + # long as it's greater than the median - that is, as long as we've closed + # at least half the PRs created that quarter. Otherwise the large value + # will make that pretty visible.) if end is None: today = datetime.now().date() lt_so_far = (today - beg).days From cc05d6a61a9439eab56166a1dd25cab409ce5617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Fri, 2 Apr 2021 13:26:11 +0200 Subject: [PATCH 11/29] Make first and last date configurable --- pr-metrics/pr-closed.py | 5 +++-- pr-metrics/pr-created.py | 5 +++-- pr-metrics/pr-lifetime.py | 5 +++-- pr-metrics/pr-pending.py | 4 ++-- pr-metrics/prs.py | 8 ++++++++ 5 files changed, 19 insertions(+), 8 deletions(-) diff --git a/pr-metrics/pr-closed.py b/pr-metrics/pr-closed.py index 9009eb605..8589bb9e2 100755 --- a/pr-metrics/pr-closed.py +++ b/pr-metrics/pr-closed.py @@ -3,13 +3,14 @@ """Produce graph of PRs closed by time period.""" -from prs import pr_dates, quarter, first +from prs import pr_dates, quarter, first, last from collections import Counter import matplotlib.pyplot as plt first_q = quarter(first) +last_q = quarter(last) cnt_all = Counter() cnt_com = Counter() @@ -22,7 +23,7 @@ if com: cnt_com[q] += 1 -quarters = tuple(sorted(q for q in cnt_all if q >= first_q)) +quarters = tuple(sorted(q for q in cnt_all if first_q <= q <= last_q)) prs_com = tuple(cnt_com[q] for q in quarters) prs_team = tuple(cnt_all[q] - cnt_com[q] for q in quarters) diff --git a/pr-metrics/pr-created.py b/pr-metrics/pr-created.py index b37420790..6e09c9267 100755 --- a/pr-metrics/pr-created.py +++ b/pr-metrics/pr-created.py @@ -3,13 +3,14 @@ """Produce graph of PRs created by time period.""" -from prs import pr_dates, quarter, first +from prs import pr_dates, quarter, first, last from collections import Counter import matplotlib.pyplot as plt first_q = quarter(first) +last_q = quarter(last) cnt_all = Counter() cnt_com = Counter() @@ -20,7 +21,7 @@ if com: cnt_com[q] += 1 -quarters = tuple(sorted(q for q in cnt_all if q >= first_q)) +quarters = tuple(sorted(q for q in cnt_all if first_q <= q <= last_q)) prs_com = tuple(cnt_com[q] for q in quarters) prs_team = tuple(cnt_all[q] - cnt_com[q] for q in quarters) diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index 56fda0df6..9fdf544ec 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -3,7 +3,7 @@ """Produce graph of lifetime of PRs over time.""" -from prs import pr_dates, quarter, first +from prs import pr_dates, quarter, first, last from collections import defaultdict @@ -11,6 +11,7 @@ from datetime import datetime first_q = quarter(first) +last_q = quarter(last) lifetimes_all = defaultdict(list) lifetimes_com = defaultdict(list) @@ -33,7 +34,7 @@ if com: lifetimes_com[q].append(lt) -quarters = tuple(sorted(q for q in lifetimes_all if q >= first_q)) +quarters = tuple(sorted(q for q in lifetimes_all if first_q <= q <= last_q)) for q in quarters: lifetimes_all[q].sort() diff --git a/pr-metrics/pr-pending.py b/pr-metrics/pr-pending.py index a9fb4cfce..8768f6aa9 100755 --- a/pr-metrics/pr-pending.py +++ b/pr-metrics/pr-pending.py @@ -3,7 +3,7 @@ """Produce graph of PRs pending over time.""" -from prs import pr_dates, first +from prs import pr_dates, first, last from datetime import date, datetime, timedelta from collections import Counter @@ -24,7 +24,7 @@ if com: cnt_com.update(dates) -dates = tuple(sorted(d for d in cnt_tot.keys() if d >= first)) +dates = tuple(sorted(d for d in cnt_tot.keys() if first <= d <= last)) def avg(cnt, date): diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py index 8e7187c31..6a7e3dcab 100644 --- a/pr-metrics/prs.py +++ b/pr-metrics/prs.py @@ -5,6 +5,7 @@ import pickle import datetime +import os with open("pr-data.p", "rb") as f: prs = pickle.load(f) @@ -70,3 +71,10 @@ def pr_dates(): first = datetime.date(2015, 1, 1) +last = datetime.date(2099, 12, 31) +if "PR_LAST_DATE" in os.environ: + last_str = os.environ["PR_LAST_DATE"] + last = datetime.datetime.strptime(last_str, "%Y-%m-%d").date() +if "PR_FIRST_DATE" in os.environ: + first_str = os.environ["PR_FIRST_DATE"] + first = datetime.datetime.strptime(first_str, "%Y-%m-%d").date() From ed1adeace8b8c659279b646543b4df11e8f37e6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Fri, 2 Apr 2021 13:29:35 +0200 Subject: [PATCH 12/29] Fix flake8 warnings --- pr-metrics/get-pr-data.py | 2 +- pr-metrics/pr-pending.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pr-metrics/get-pr-data.py b/pr-metrics/get-pr-data.py index 02ade7a85..43d1bd9a1 100755 --- a/pr-metrics/get-pr-data.py +++ b/pr-metrics/get-pr-data.py @@ -24,7 +24,7 @@ # slower (about 10x). # Leave commented as we only need the basic info for do.sh. # (Uncomment if you want to use extended PR data with other scripts.) - #dummy = p.mergeable + # dummy = p.mergeable prs.append(p) with open("pr-data.p", "wb") as f: diff --git a/pr-metrics/pr-pending.py b/pr-metrics/pr-pending.py index 8768f6aa9..50aeac53c 100755 --- a/pr-metrics/pr-pending.py +++ b/pr-metrics/pr-pending.py @@ -5,7 +5,7 @@ from prs import pr_dates, first, last -from datetime import date, datetime, timedelta +from datetime import datetime, timedelta from collections import Counter import matplotlib.pyplot as plt From 08c0b7c48a5153ab307215015706870d6117ddad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Fri, 2 Apr 2021 13:36:44 +0200 Subject: [PATCH 13/29] Rotate labels for quarters --- pr-metrics/pr-closed.py | 1 + pr-metrics/pr-created.py | 1 + pr-metrics/pr-lifetime.py | 1 + 3 files changed, 3 insertions(+) diff --git a/pr-metrics/pr-closed.py b/pr-metrics/pr-closed.py index 8589bb9e2..c12740d79 100755 --- a/pr-metrics/pr-closed.py +++ b/pr-metrics/pr-closed.py @@ -36,6 +36,7 @@ ax.grid(True) ax.set_xlabel("quarter") ax.set_ylabel("Number or PRs closed") +ax.tick_params(axis="x", labelrotation=90) fig.suptitle("Number of PRs closed per quarter") fig.set_size_inches(12.8, 7.2) # default 100 dpi -> 720p fig.savefig("prs-closed.png") diff --git a/pr-metrics/pr-created.py b/pr-metrics/pr-created.py index 6e09c9267..e2a187714 100755 --- a/pr-metrics/pr-created.py +++ b/pr-metrics/pr-created.py @@ -34,6 +34,7 @@ ax.grid(True) ax.set_xlabel("quarter") ax.set_ylabel("Number or PRs created") +ax.tick_params(axis="x", labelrotation=90) fig.suptitle("Number of PRs created per quarter") fig.set_size_inches(12.8, 7.2) # default 100 dpi -> 720p fig.savefig("prs-created.png") diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index 9fdf544ec..48c58fd79 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -63,6 +63,7 @@ def median(sl): ax.grid(True) ax.set_xlabel("quarter") ax.set_ylabel("median lifetime in days of PRs created that quarter") +ax.tick_params(axis="x", labelrotation=90) bot, top = ax.set_ylim() ax.set_ylim(0, min(365, top)) # we don't care about values over 1 year fig.suptitle("Median lifetime of PRs per quarter (less is better)") From b2ee77573aff8344c52f3840ce5778e05c863c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Wed, 19 May 2021 12:32:27 +0200 Subject: [PATCH 14/29] Clarify community detection --- pr-metrics/prs.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py index 6a7e3dcab..85b9e7460 100644 --- a/pr-metrics/prs.py +++ b/pr-metrics/prs.py @@ -36,16 +36,10 @@ "bensze01", ) - -def is_community(pr): - """Return False if the PR is from a team member or from inside Arm.""" - labels = tuple(l.name for l in pr.labels) - # starting from 2021 we consistently label community PRs - if pr.created_at.date().year >= 2021: - return "Community" in labels - - # before that we used to inconsistently labeled PRs from the team or ARM, +def old_is_community(pr): + # in the past we used to inconsistently labeled PRs from the team or ARM, # so complement that with a list of team members + labels = tuple(l.name for l in pr.labels) if "mbed TLS team" in labels or "Arm Contribution" in labels: return False if pr.user.login in _team_logins: @@ -53,6 +47,20 @@ def is_community(pr): return True +def new_is_community(pr): + labels = tuple(l.name for l in pr.labels) + return "Community" in labels + + +def is_community(pr): + """Return False if the PR is from a team member or from inside Arm.""" + # starting from 2021 we consistently label community PRs + if pr.created_at.date().year >= 2021: + return new_is_community(pr) + + return old_is_community(pr) + + def quarter(date): """Return a string decribing this date's quarter, for example 19q3.""" q = str(date.year % 100) From 3feb297f1a8e4e584f85bf297e7e30ac8ef713fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Thu, 20 May 2021 12:39:34 +0200 Subject: [PATCH 15/29] Smarter handling of p.mergeable in get-pr-data --- pr-metrics/get-pr-data.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pr-metrics/get-pr-data.py b/pr-metrics/get-pr-data.py index 43d1bd9a1..717ec4b31 100755 --- a/pr-metrics/get-pr-data.py +++ b/pr-metrics/get-pr-data.py @@ -20,12 +20,22 @@ for p in r.get_pulls(state="all"): print(p.number) # Accessing p.mergeable forces completion of PR data (by default, only - # basic info such as status and dates is available) but makes the script - # slower (about 10x). - # Leave commented as we only need the basic info for do.sh. - # (Uncomment if you want to use extended PR data with other scripts.) - # dummy = p.mergeable + # basic info such as status and dates is available) but makes things + # slower (about 10x). Only do that for open PRs; we don't need the extra + # info for old PRs (only the dates which are part of the basic info). + if p.state == 'open': + dummy = p.mergeable prs.append(p) +# After a branch has been updated, github doesn't immediately go and recompute +# potential conflicts for all open PRs against this branch; instead it does +# that when the info is requested and even then it's done asynchronously: the +# first request might return no data, but if we come back after we've done all +# the other PRs, the info should have become available in the meantime. +for p in prs: + if p.state == 'open' and p.mergeable is None: + print(p.number, 'update') + p.update() + with open("pr-data.p", "wb") as f: pickle.dump(prs, f) From 1d58093a50df192b3c941538e0bfc0e1f5657f90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Thu, 20 May 2021 12:39:58 +0200 Subject: [PATCH 16/29] Update pending-mergeability --- pr-metrics/pending-mergeability.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/pr-metrics/pending-mergeability.py b/pr-metrics/pending-mergeability.py index 3d0609b9b..f6f8e34b1 100755 --- a/pr-metrics/pending-mergeability.py +++ b/pr-metrics/pending-mergeability.py @@ -12,8 +12,8 @@ c_open = Counter() c_mergeable = Counter() -c_nowork = Counter() c_recent = Counter() +c_recent2 = Counter() for p in prs: if p.state != "open": @@ -23,17 +23,14 @@ c_open[branch] += 1 if p.mergeable: c_mergeable[branch] += 1 - if "needs: work" not in [l.name for l in p.labels]: - c_nowork[branch] += 1 - days = (datetime.now() - p.updated_at).days - if days < 31: - c_recent[branch] += 1 + days = (datetime.now() - p.updated_at).days + if days < 31: + c_recent[branch] += 1 + if days < 8: + c_recent2[branch] += 1 -print("branch: open, no conflicts, minus need work, minus month-old") +print(" branch: open, mergeable, <31d, <8d") for b in sorted(c_open, key=lambda b: c_open[b], reverse=True): - print( - "{:>15}: {: 4}, {: 3}, {: 3}, {: 3}".format( - b, c_open[b], c_mergeable[b], c_nowork[b], c_recent[b] - ) - ) + print("{:>20}: {: 10}, {: 10}, {: 10}, {:10}".format( + b, c_open[b], c_mergeable[b], c_recent[b], c_recent2[b])) From 5f6d268d311ce02de4de196e8a395b97b9331b98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Fri, 30 Sep 2022 10:33:37 +0200 Subject: [PATCH 17/29] We no longer use labels for community PRs --- pr-metrics/prs.py | 74 +++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py index 85b9e7460..88dcc3cc5 100644 --- a/pr-metrics/prs.py +++ b/pr-metrics/prs.py @@ -11,56 +11,60 @@ prs = pickle.load(f) +# current and past team members, alphabetical order (sort -f) _team_logins = ( - "gilles-peskine-arm", - "hanno-arm", - "RonEld", + "aditya-deshpande-arm", "andresag01", - "mpg", - "sbutcher-arm", - "Patater", - "k-stachowiak", "AndrzejKurek", - "yanesca", - "mazimkhan", - "dgreen-arm", "artokin", + "bensze01", + "brett-warren-arm", + "d3zd3z", + "danh-arm", + "daverodgman", + "davidhorstmann-arm", + "dgreen-arm", + "gabor-mezei-arm", + "gilles-peskine-arm", + "hanno-arm", + "jackbondpreston-arm", "jarlamsa", + "jarvte", + "JoeSubbiani", + "k-stachowiak", + "lukgni", + "mazimkhan", + "minosgalanakis", + "mpg", + "mprse", + "mstarzyk-mobica", + "Patater", + "paul-elliott-arm", "piotr-now", "pjbakker", - "jarvte", - "danh-arm", + "RcColes", "ronald-cron-arm", - "paul-elliott-arm", - "gabor-mezei-arm", - "bensze01", + "RonEld", + "sbutcher-arm", + "tom-cosgrove-arm", + "tom-daubney-arm", + "tuvshinzayaArm", + "wernerlewis", + "xkqian", + "yanesca", + "yuhaoth", + "yutotakano", + "zhangsenWang", ) -def old_is_community(pr): - # in the past we used to inconsistently labeled PRs from the team or ARM, - # so complement that with a list of team members - labels = tuple(l.name for l in pr.labels) - if "mbed TLS team" in labels or "Arm Contribution" in labels: - return False + +def is_community(pr): + """Return False if the PR is from a team member.""" if pr.user.login in _team_logins: return False return True -def new_is_community(pr): - labels = tuple(l.name for l in pr.labels) - return "Community" in labels - - -def is_community(pr): - """Return False if the PR is from a team member or from inside Arm.""" - # starting from 2021 we consistently label community PRs - if pr.created_at.date().year >= 2021: - return new_is_community(pr) - - return old_is_community(pr) - - def quarter(date): """Return a string decribing this date's quarter, for example 19q3.""" q = str(date.year % 100) From 94533e109a41144c0a17bf7e6d6a9ba2877d396d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Wed, 12 Oct 2022 12:50:58 +0200 Subject: [PATCH 18/29] Update list of core contributors --- pr-metrics/prs.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py index 88dcc3cc5..48ce2197b 100644 --- a/pr-metrics/prs.py +++ b/pr-metrics/prs.py @@ -11,14 +11,21 @@ prs = pickle.load(f) -# current and past team members, alphabetical order (sort -f) +# Current and past core contributors, alphabetical order (sort -f). +# +# That is, people who are or have been in one of: +# - https://github.com/orgs/Mbed-TLS/teams/mbed-tls-reviewers/members +# - https://github.com/orgs/Mbed-TLS/teams/mbed-tls-developers/members +# The list is maintained manually in order to retain past members. _team_logins = ( + "adeaarm", "aditya-deshpande-arm", "andresag01", "AndrzejKurek", "artokin", "bensze01", "brett-warren-arm", + "chris-jones-arm", "d3zd3z", "danh-arm", "daverodgman", @@ -32,6 +39,7 @@ "jarvte", "JoeSubbiani", "k-stachowiak", + "lpy4105", "lukgni", "mazimkhan", "minosgalanakis", @@ -46,14 +54,18 @@ "ronald-cron-arm", "RonEld", "sbutcher-arm", + "shanechko", + "superna9999", "tom-cosgrove-arm", "tom-daubney-arm", "tuvshinzayaArm", "wernerlewis", "xkqian", "yanesca", + "yanrayw", "yuhaoth", "yutotakano", + "Zaya-dyno", "zhangsenWang", ) From b7f7f766bb0864f2679ad4a89acd3ac0c3e70279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Wed, 12 Oct 2022 13:08:35 +0200 Subject: [PATCH 19/29] Update Readme (PR last date) --- pr-metrics/Readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr-metrics/Readme.md b/pr-metrics/Readme.md index f73c73d70..2d14a78f5 100644 --- a/pr-metrics/Readme.md +++ b/pr-metrics/Readme.md @@ -6,8 +6,8 @@ Usage 1. `./get-pr-data.py` - this takes a long time and requires the environment variable `GITHUB_API_TOKEN` to be set to a valid [github API token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token) (unauthenticated access to the API has a limit on the number or requests that is too low for our number of PRs). It generates `pr-data.p` with pickled data. -2. `./do.sh` - this works offline from the data in `pr-data.p` and generates a - bunch of png and csv files. +2. `PR_LAST_DATE=20yy-mm-dd ./do.sh` - this works offline from the data in + `pr-data.p` and generates a bunch of png and csv files. Requirements ------------ From e69fb3a7e7ae0e33f8ea16599c59b722dd36bcf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Wed, 11 Jan 2023 10:34:09 +0100 Subject: [PATCH 20/29] Shift one month for quarterly PR lifetime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we end up including too recent PRs in the quarterly reports, which introduces unwanted variability. Signed-off-by: Manuel Pégourié-Gonnard --- pr-metrics/pr-lifetime.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index 48c58fd79..829b818b5 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -8,7 +8,7 @@ from collections import defaultdict import matplotlib.pyplot as plt -from datetime import datetime +from datetime import datetime, timedelta first_q = quarter(first) last_q = quarter(last) @@ -29,7 +29,12 @@ else: lt = (end - beg).days - q = quarter(beg) + # Shit one month (that is, for q2 count March to May, not April to July). + # This is because we want to measure this at the end of each quarter, but + # including PRs raised too recently skew the results. Shifting one month + # means we had time to look at the PR by the time we generate quaterly + # metrics. + q = quarter(beg - timedelta(days=30)) lifetimes_all[q].append(lt) if com: lifetimes_com[q].append(lt) @@ -62,11 +67,11 @@ def median(sl): ax.legend(loc="upper right") ax.grid(True) ax.set_xlabel("quarter") -ax.set_ylabel("median lifetime in days of PRs created that quarter") +ax.set_ylabel("median lifetime in days of PRs created that quarter (shifted 1 month)") ax.tick_params(axis="x", labelrotation=90) bot, top = ax.set_ylim() ax.set_ylim(0, min(365, top)) # we don't care about values over 1 year -fig.suptitle("Median lifetime of PRs per quarter (less is better)") +fig.suptitle("Median lifetime of PRs per quarter (shifted 1 month) (less is better)") fig.set_size_inches(12.8, 7.2) # default 100 dpi -> 720p fig.savefig("prs-lifetime.png") From cd9c1f6197361ace1545453bff4b27c3058d414b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Wed, 11 Jan 2023 10:44:09 +0100 Subject: [PATCH 21/29] Update list of team member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge today's members of the mbed-tls-reviewers into the list. Signed-off-by: Manuel Pégourié-Gonnard --- pr-metrics/prs.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py index 48ce2197b..a31182563 100644 --- a/pr-metrics/prs.py +++ b/pr-metrics/prs.py @@ -34,6 +34,7 @@ "gabor-mezei-arm", "gilles-peskine-arm", "hanno-arm", + "hanno-becker", "jackbondpreston-arm", "jarlamsa", "jarvte", @@ -55,10 +56,14 @@ "RonEld", "sbutcher-arm", "shanechko", + "silabs-hannes", + "silabs-Kusumit", + "silabs-Saketh", "superna9999", "tom-cosgrove-arm", "tom-daubney-arm", "tuvshinzayaArm", + "valeriosetti", "wernerlewis", "xkqian", "yanesca", From 4d58ba094a06a8a9d5e0b7100f6a912263f5878b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Wed, 11 Jan 2023 11:04:46 +0100 Subject: [PATCH 22/29] Revert "Shift one month for quarterly PR lifetime" This reverts commit e69fb3a7e7ae0e33f8ea16599c59b722dd36bcf3. This was misguided attempt: it makes the result harder to read without actually solving the underlying problem that we sometimes ask for the median of a series that's still not complete enough. --- pr-metrics/pr-lifetime.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index 829b818b5..48c58fd79 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -8,7 +8,7 @@ from collections import defaultdict import matplotlib.pyplot as plt -from datetime import datetime, timedelta +from datetime import datetime first_q = quarter(first) last_q = quarter(last) @@ -29,12 +29,7 @@ else: lt = (end - beg).days - # Shit one month (that is, for q2 count March to May, not April to July). - # This is because we want to measure this at the end of each quarter, but - # including PRs raised too recently skew the results. Shifting one month - # means we had time to look at the PR by the time we generate quaterly - # metrics. - q = quarter(beg - timedelta(days=30)) + q = quarter(beg) lifetimes_all[q].append(lt) if com: lifetimes_com[q].append(lt) @@ -67,11 +62,11 @@ def median(sl): ax.legend(loc="upper right") ax.grid(True) ax.set_xlabel("quarter") -ax.set_ylabel("median lifetime in days of PRs created that quarter (shifted 1 month)") +ax.set_ylabel("median lifetime in days of PRs created that quarter") ax.tick_params(axis="x", labelrotation=90) bot, top = ax.set_ylim() ax.set_ylim(0, min(365, top)) # we don't care about values over 1 year -fig.suptitle("Median lifetime of PRs per quarter (shifted 1 month) (less is better)") +fig.suptitle("Median lifetime of PRs per quarter (less is better)") fig.set_size_inches(12.8, 7.2) # default 100 dpi -> 720p fig.savefig("prs-lifetime.png") From 45fa6ce127f5c8b00a7a520ed6e810fc5f88b631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Wed, 11 Jan 2023 11:39:50 +0100 Subject: [PATCH 23/29] Use statistics.median MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The statistics module entered the standard library in 3.4, so it should be safe to use by now. Signed-off-by: Manuel Pégourié-Gonnard --- pr-metrics/pr-lifetime.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index 48c58fd79..373f87770 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -9,6 +9,7 @@ import matplotlib.pyplot as plt from datetime import datetime +from statistics import median first_q = quarter(first) last_q = quarter(last) @@ -36,23 +37,6 @@ quarters = tuple(sorted(q for q in lifetimes_all if first_q <= q <= last_q)) -for q in quarters: - lifetimes_all[q].sort() - lifetimes_com[q].sort() - - -def median(sl): - """Return the median value of a sorted list of numbers (0 if empty).""" - index = (len(sl) - 1) / 2 - if index < 0: - return 0 - if int(index) == index: - return sl[int(index)] - - i, j = int(index - 0.5), int(index + 0.5) - return (sl[i] + sl[j]) / 2 - - med_all = tuple(median(lifetimes_all[q]) for q in quarters) med_com = tuple(median(lifetimes_com[q]) for q in quarters) From f1b54e1e4c4dbef933abd0ef1976399cfd94baed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Wed, 11 Jan 2023 13:58:44 +0100 Subject: [PATCH 24/29] Handle uncertainty about lifetimes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Manuel Pégourié-Gonnard --- pr-metrics/pr-lifetime.py | 63 ++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index 373f87770..8f40573c6 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -10,39 +10,46 @@ import matplotlib.pyplot as plt from datetime import datetime from statistics import median +import math first_q = quarter(first) last_q = quarter(last) -lifetimes_all = defaultdict(list) -lifetimes_com = defaultdict(list) +lifetimes_all_hi = defaultdict(list) +lifetimes_all_lo = defaultdict(list) +lifetimes_com_hi = defaultdict(list) +lifetimes_com_lo = defaultdict(list) +today = datetime.now().date() for beg, end, com in pr_dates(): - # If the PR is still open and it's recent, assign an arbitrary large - # lifetime. (The exact value doesn't matter for computing the median, as - # long as it's greater than the median - that is, as long as we've closed - # at least half the PRs created that quarter. Otherwise the large value - # will make that pretty visible.) if end is None: - today = datetime.now().date() - lt_so_far = (today - beg).days - lt = max(365, lt_so_far) + lo = (today - beg).days + hi = math.inf else: - lt = (end - beg).days + hi = lo = (end - beg).days q = quarter(beg) - lifetimes_all[q].append(lt) + lifetimes_all_hi[q].append(hi) + lifetimes_all_lo[q].append(lo) if com: - lifetimes_com[q].append(lt) + lifetimes_com_hi[q].append(hi) + lifetimes_com_lo[q].append(lo) -quarters = tuple(sorted(q for q in lifetimes_all if first_q <= q <= last_q)) +quarters = tuple(sorted(q for q in lifetimes_all_hi if first_q <= q <= last_q)) -med_all = tuple(median(lifetimes_all[q]) for q in quarters) -med_com = tuple(median(lifetimes_com[q]) for q in quarters) +med_all_hi = tuple(median(lifetimes_all_hi[q]) for q in quarters) +med_all_lo = tuple(median(lifetimes_all_lo[q]) for q in quarters) +med_com_hi = tuple(median(lifetimes_com_hi[q]) for q in quarters) +med_com_lo = tuple(median(lifetimes_com_lo[q]) for q in quarters) + +# skip uncertain quarters in the graph +i = len(quarters) +while med_all_hi[i - 1] != med_all_lo[i - 1] or med_com_hi[i - 1] != med_com_lo[i - 1]: + i -= 1 fig, ax = plt.subplots() -ax.plot(quarters, med_all, "b-", label="median overall") -ax.plot(quarters, med_com, "r-", label="median community") +ax.plot(quarters[:i], med_all_hi[:i], "b-", label="median overall") +ax.plot(quarters[:i], med_com_hi[:i], "r-", label="median community") ax.legend(loc="upper right") ax.grid(True) ax.set_xlabel("quarter") @@ -54,6 +61,22 @@ fig.set_size_inches(12.8, 7.2) # default 100 dpi -> 720p fig.savefig("prs-lifetime.png") + +def interval(lo, hi): + if hi == lo: + return str(int(hi)) + if math.isinf(hi): + return "> " + str(int(lo)) + + return str(int(lo)) + "-" + str(int(hi)) + + print("Quarter,median overall,median community") -for q, a, c in zip(quarters, med_all, med_com): - print("{},{},{}".format(q, int(a), int(c))) +for i in range(len(quarters)): + print( + "{},{},{}".format( + quarters[i], + interval(med_all_lo[i], med_all_hi[i]), + interval(med_com_lo[i], med_com_hi[i]), + ) + ) From ac21a51e6be46efb981fa88f6a8b0badb74fbd4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Thu, 12 Jan 2023 10:12:09 +0100 Subject: [PATCH 25/29] Update Readme about incomplete results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Manuel Pégourié-Gonnard --- pr-metrics/Readme.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pr-metrics/Readme.md b/pr-metrics/Readme.md index 2d14a78f5..badcc8505 100644 --- a/pr-metrics/Readme.md +++ b/pr-metrics/Readme.md @@ -9,6 +9,20 @@ token](https://help.github.com/en/github/authenticating-to-github/creating-a-per 2. `PR_LAST_DATE=20yy-mm-dd ./do.sh` - this works offline from the data in `pr-data.p` and generates a bunch of png and csv files. +For example, the report for 22Q4 can be generated with: +``` +./get-pr-data.py # assuming GITHUB_API_TOKEN is set in the environement +PR_LAST_DATE=2022-12-31 ./do.sh +``` +The use of `PR_LAST_DATE` is mostly cosmectic in order to avoid including +incomplete data about 23Q1 in the outputs. + +Note that the usage the metric "median lifetime" is special in that it can't +always be computed right after the quarter is over, it sometimes need more +time to pass and/or more PRs from that quarter to be closed. In that case, the +uncertain quarter(s) will be excluded from the png graph, and in the csv file +an interval will be reported for the value(s) that can't be determined yet. + Requirements ------------ From e095fc33182ccdce2ee93c55f3c1b77df68243aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Thu, 6 Apr 2023 08:48:40 +0200 Subject: [PATCH 26/29] Update team members with current reviewers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Manuel Pégourié-Gonnard --- pr-metrics/prs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py index a31182563..ca640b0f4 100644 --- a/pr-metrics/prs.py +++ b/pr-metrics/prs.py @@ -40,6 +40,7 @@ "jarvte", "JoeSubbiani", "k-stachowiak", + "laumor01", "lpy4105", "lukgni", "mazimkhan", From ce0804961ccd7d6382914dc7f25a018f0d065c66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Thu, 6 Apr 2023 09:16:22 +0200 Subject: [PATCH 27/29] Draw error bars, don't skip uncertain quarters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Manuel Pégourié-Gonnard --- pr-metrics/pr-lifetime.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index 8f40573c6..47db7d075 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -42,14 +42,15 @@ med_com_hi = tuple(median(lifetimes_com_hi[q]) for q in quarters) med_com_lo = tuple(median(lifetimes_com_lo[q]) for q in quarters) -# skip uncertain quarters in the graph -i = len(quarters) -while med_all_hi[i - 1] != med_all_lo[i - 1] or med_com_hi[i - 1] != med_com_lo[i - 1]: - i -= 1 +l = len(quarters) +med_all = tuple((med_all_hi[i] + med_all_lo[i]) / 2 for i in range(l)) +med_com = tuple((med_com_hi[i] + med_com_lo[i]) / 2 for i in range(l)) +err_all = tuple((med_all_hi[i] - med_all_lo[i]) / 2 for i in range(l)) +err_com = tuple((med_com_hi[i] - med_com_lo[i]) / 2 for i in range(l)) fig, ax = plt.subplots() -ax.plot(quarters[:i], med_all_hi[:i], "b-", label="median overall") -ax.plot(quarters[:i], med_com_hi[:i], "r-", label="median community") +ax.errorbar(quarters, med_all, yerr=err_all, fmt="b-", ecolor="r", label="median overall") +ax.errorbar(quarters, med_com, yerr=err_com, fmt="g-", ecolor="r", label="median community") ax.legend(loc="upper right") ax.grid(True) ax.set_xlabel("quarter") From c86237c304a410cf17ef032b29bb24511b0f67e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Thu, 6 Apr 2023 11:12:22 +0200 Subject: [PATCH 28/29] New script pr-backlog.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Manuel Pégourié-Gonnard --- pr-metrics/do.sh | 2 +- pr-metrics/pr-backlog.py | 72 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100755 pr-metrics/pr-backlog.py diff --git a/pr-metrics/do.sh b/pr-metrics/do.sh index 3a3180a33..5c64a809f 100755 --- a/pr-metrics/do.sh +++ b/pr-metrics/do.sh @@ -2,7 +2,7 @@ set -eu -for topic in created closed pending lifetime; do +for topic in created closed pending lifetime backlog; do echo "PRs $topic..." rm -f prs-${topic}.png prs-${topic}.csv ./pr-${topic}.py > prs-${topic}.csv diff --git a/pr-metrics/pr-backlog.py b/pr-metrics/pr-backlog.py new file mode 100755 index 000000000..f3634221f --- /dev/null +++ b/pr-metrics/pr-backlog.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +"""Produce analysis of PR backlog over time""" + +from prs import pr_dates, first, last, quarter + +from datetime import datetime, timedelta +from collections import Counter +from itertools import chain + +import matplotlib.pyplot as plt + +new_days = 90 +old_days = 365 + +new = Counter() +med = Counter() +old = Counter() + +for beg, end, com in pr_dates(): + if end is None: + tomorrow = datetime.now().date() + timedelta(days=1) + n_days = (tomorrow - beg).days + else: + n_days = (end - beg).days + for i in range(n_days): + q = quarter(beg + timedelta(days=i)) + q1 = quarter(beg + timedelta(days=i+1)) + # Only count on each quarter's last day + if q == q1: + continue + if i <= new_days: + new[q] += 1 + elif i <= old_days: + med[q] += 1 + else: + old[q] += 1 + +first_q = quarter(first) +last_q = quarter(last) + +quarters = (q for q in chain(new, med, old) if first_q <= q <= last_q) +quarters = tuple(sorted(set(quarters))) + +new_y = tuple(new[q] for q in quarters) +med_y = tuple(med[q] for q in quarters) +old_y = tuple(old[q] for q in quarters) +sum_y = tuple(old[q] + med[q] for q in quarters) + +old_name = "older than {} days".format(old_days) +med_name = "medium" +new_name = "recent (less {} days old)".format(new_days) + +width = 0.9 +fig, ax = plt.subplots() +ax.bar(quarters, old_y, width, label=old_name) +ax.bar(quarters, med_y, width, label=med_name, bottom=old_y) +ax.bar(quarters, new_y, width, label=new_name, bottom=sum_y) +ax.legend(loc="upper left") +ax.grid(True) +ax.set_xlabel("quarter") +ax.set_ylabel("Number or PRs pending") +ax.tick_params(axis="x", labelrotation=90) +fig.suptitle("State of the PR backlog at the end of each quarter") +fig.set_size_inches(12.8, 7.2) # default 100 dpi -> 720p +fig.savefig("prs-backlog.png") + +print("Quarter,recent,medium,old,total") +for q in quarters: + print("{},{},{},{},{}".format(q, new[q], med[q], old[q], + new[q] + med[q] + old[q])) From b7a02f6af4659202837d7e0eaddf4a67d456f862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Thu, 6 Apr 2023 11:26:27 +0200 Subject: [PATCH 29/29] Cosmetic adjustments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - default dates - defaults should correspond to typical use - one legend was not in the same place as the others Signed-off-by: Manuel Pégourié-Gonnard --- pr-metrics/Readme.md | 26 +++++++++++++++----------- pr-metrics/pr-lifetime.py | 2 +- pr-metrics/prs.py | 10 ++++++++-- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/pr-metrics/Readme.md b/pr-metrics/Readme.md index badcc8505..febedd976 100644 --- a/pr-metrics/Readme.md +++ b/pr-metrics/Readme.md @@ -6,22 +6,26 @@ Usage 1. `./get-pr-data.py` - this takes a long time and requires the environment variable `GITHUB_API_TOKEN` to be set to a valid [github API token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token) (unauthenticated access to the API has a limit on the number or requests that is too low for our number of PRs). It generates `pr-data.p` with pickled data. -2. `PR_LAST_DATE=20yy-mm-dd ./do.sh` - this works offline from the data in +2. `./do.sh` - this works offline from the data in `pr-data.p` and generates a bunch of png and csv files. -For example, the report for 22Q4 can be generated with: +For example, the report for the last quarter can be generated with: ``` ./get-pr-data.py # assuming GITHUB_API_TOKEN is set in the environement -PR_LAST_DATE=2022-12-31 ./do.sh +./do.sh ``` -The use of `PR_LAST_DATE` is mostly cosmectic in order to avoid including -incomplete data about 23Q1 in the outputs. - -Note that the usage the metric "median lifetime" is special in that it can't -always be computed right after the quarter is over, it sometimes need more -time to pass and/or more PRs from that quarter to be closed. In that case, the -uncertain quarter(s) will be excluded from the png graph, and in the csv file -an interval will be reported for the value(s) that can't be determined yet. +Note that the metric "median lifetime" is special in that it can't always be +computed right after the quarter is over, it sometimes need more time to pass +and/or more PRs from that quarter to be closed. In that case, the uncertain +quarter(s) will shown with an error bar the png graph, and in the csv file an +interval will be reported for the value(s) that can't be determined yet. + +By default, data extends from start of 2020 to end of the previous quarter. It +is possible to change that range using environment variables, for example: +``` +PR_FIRST_DATE=2016-01-01 PR_LAST_DATE=2022-12-32 ./do.sh +``` +gives date from 2016 to 2022 included. Requirements ------------ diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py index 47db7d075..fa01d44cc 100755 --- a/pr-metrics/pr-lifetime.py +++ b/pr-metrics/pr-lifetime.py @@ -51,7 +51,7 @@ fig, ax = plt.subplots() ax.errorbar(quarters, med_all, yerr=err_all, fmt="b-", ecolor="r", label="median overall") ax.errorbar(quarters, med_com, yerr=err_com, fmt="g-", ecolor="r", label="median community") -ax.legend(loc="upper right") +ax.legend(loc="upper left") ax.grid(True) ax.set_xlabel("quarter") ax.set_ylabel("median lifetime in days of PRs created that quarter") diff --git a/pr-metrics/prs.py b/pr-metrics/prs.py index ca640b0f4..a436d7e27 100644 --- a/pr-metrics/prs.py +++ b/pr-metrics/prs.py @@ -100,8 +100,14 @@ def pr_dates(): yield (beg, end, com) -first = datetime.date(2015, 1, 1) -last = datetime.date(2099, 12, 31) +# default start date: 2020-01-01 (when we moved to tf.org) +first = datetime.date(2020, 1, 1) +# default end date: end of the previous quarter +last = datetime.datetime.now().date() +current_q = quarter(last) +while quarter(last) == current_q: + last -= datetime.timedelta(days=1) +# default start/end dates can be overriden from the environment if "PR_LAST_DATE" in os.environ: last_str = os.environ["PR_LAST_DATE"] last = datetime.datetime.strptime(last_str, "%Y-%m-%d").date()