Mbed-TLS · mpg · Jun 30, 2020 · Sep 30, 2020 · Dec 24, 2020 · Dec 24, 2020
diff --git a/pr-metrics/.gitignore b/pr-metrics/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+pr-data.p
+*.png
+*.csv
diff --git a/pr-metrics/Readme.md b/pr-metrics/Readme.md
@@ -0,0 +1,65 @@
+These scripts collect some metrics about mbed TLS PRs over time.
+
+Usage
+-----
+
+1. `./get-pr-data.py` - this takes a long time and requires the environment
+   variable `GITHUB_API_TOKEN` to be set to a valid [github API
+token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token) (unauthenticated access to the API has a limit on the number or requests that is too low for our number of PRs). It generates `pr-data.p` with pickled data.
+2. `./do.sh` - this works offline from the data in
+   `pr-data.p` and generates a bunch of png and csv files.
+
+For example, the report for the last quarter can be generated with:
+```
+./get-pr-data.py # assuming GITHUB_API_TOKEN is set in the environement
+./do.sh
+```
+Note that the metric "median lifetime" is special in that it can't always be
+computed right after the quarter is over, it sometimes need more time to pass
+and/or more PRs from that quarter to be closed. In that case, the uncertain
+quarter(s) will shown with an error bar the png graph, and in the csv file an
+interval will be reported for the value(s) that can't be determined yet.
+
+By default, data extends from start of 2020 to end of the previous quarter. It
+is possible to change that range using environment variables, for example:
+```
+PR_FIRST_DATE=2016-01-01 PR_LAST_DATE=2022-12-32 ./do.sh
+```
+gives date from 2016 to 2022 included.
+
+Requirements
+------------
+
+These scripts require:
+
+- Python >= 3.6 (required by recent enough matplotlib)
+- matplotlib >= 3.1 (3.0 doesn't work)
+- PyGithub >= 1.43 (any version should work, that was just the oldest tested)
+
+### Ubuntu 20.04 (and probaly 18.04)
-### Ubuntu 20.04 (and probaly 18.04)
+### Ubuntu 20.04 (and probably 18.04)
-### Ubuntu 20.04 (and probaly 18.04)
+### Ubuntu 20.04 (and probably 18.04)
+
+A simple `apt install python3-github python3-matplotlib` is enough.
+
+### Ubuntu 16.04
+
+On Ubuntu 16.04, by default only Python 3.5 is available, which doesn't
+support a recent enough matplotlib to support those scripts, so the following
+was used to run those scripts on 16.04:
+
+    sudo add-apt-repository ppa:deadsnakes/ppa
+    sudo apt update
+    sudo apt install python3.6 python3.6-venv
+    python3.6 -m venv 36env
+    source 36env/bin/activate
+    pip install --upgrade pip
+    pip install matlplotlib
+    pip install pygithub
+
+See `requirements.txt` for an example of a set of working versions.
+
+Note: if you do this, I strongly recommend uninstalling python3.6,
+python3.6-venv and all their dependencies, then removing the deadsnakes PPA
+before any upgrade to 18.04. Failing to do so will result in
+dependency-related headaches as some packages in 18.04 depend on a specific
+version of python3.6 but the version from deadsnakes is higher, so apt won't
+downgrade it and manual intervention will be required.
diff --git a/pr-metrics/do.sh b/pr-metrics/do.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+set -eu
+
+for topic in created closed pending lifetime backlog; do
+    echo "PRs $topic..."
+    rm -f prs-${topic}.png prs-${topic}.csv
+    ./pr-${topic}.py > prs-${topic}.csv
+done
diff --git a/pr-metrics/get-pr-data.py b/pr-metrics/get-pr-data.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Get PR data from github and pickle it."""
+
+import pickle
+import os
+
+from github import Github
+
+if "GITHUB_API_TOKEN" in os.environ:
+    token = os.environ["GITHUB_API_TOKEN"]
+else:
+    print("You need to provide a GitHub API token")
+
+g = Github(token)
+r = g.get_repo("ARMMbed/mbedtls")
+
+prs = list()
+for p in r.get_pulls(state="all"):
+    print(p.number)
+    # Accessing p.mergeable forces completion of PR data (by default, only
+    # basic info such as status and dates is available) but makes things
+    # slower (about 10x). Only do that for open PRs; we don't need the extra
+    # info for old PRs (only the dates which are part of the basic info).
+    if p.state == 'open':
+        dummy = p.mergeable
+    prs.append(p)
+
+# After a branch has been updated, github doesn't immediately go and recompute
+# potential conflicts for all open PRs against this branch; instead it does
+# that when the info is requested and even then it's done asynchronously: the
+# first request might return no data, but if we come back after we've done all
+# the other PRs, the info should have become available in the meantime.
+for p in prs:
+    if p.state == 'open' and p.mergeable is None:
+        print(p.number, 'update')
+        p.update()
+
+with open("pr-data.p", "wb") as f:
+    pickle.dump(prs, f)
diff --git a/pr-metrics/pending-mergeability.py b/pr-metrics/pending-mergeability.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Produce summary or PRs pending per branch and their mergeability status."""
+
+import pickle
+from datetime import datetime
+from collections import Counter
+
+with open("pr-data.p", "rb") as f:
+    prs = pickle.load(f)
+
+c_open = Counter()
+c_mergeable = Counter()
+c_recent = Counter()
+c_recent2 = Counter()
+
+for p in prs:
+    if p.state != "open":
+        continue
+
+    branch = p.base.ref
+    c_open[branch] += 1
+    if p.mergeable:
+        c_mergeable[branch] += 1
+        days = (datetime.now() - p.updated_at).days
+        if days < 31:
+            c_recent[branch] += 1
+        if days < 8:
+            c_recent2[branch] += 1
+
+
+print("              branch:       open,  mergeable,       <31d,        <8d")
+for b in sorted(c_open, key=lambda b: c_open[b], reverse=True):
+    print("{:>20}: {: 10}, {: 10}, {: 10}, {:10}".format(
+            b, c_open[b], c_mergeable[b], c_recent[b], c_recent2[b]))
diff --git a/pr-metrics/pr-backlog.py b/pr-metrics/pr-backlog.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Produce analysis of PR backlog over time"""
+
+from prs import pr_dates, first, last, quarter
+
+from datetime import datetime, timedelta
+from collections import Counter
+from itertools import chain
+
+import matplotlib.pyplot as plt
+
+new_days = 90
+old_days = 365
+
+new = Counter()
+med = Counter()
+old = Counter()
+
+for beg, end, com in pr_dates():
+    if end is None:
+        tomorrow = datetime.now().date() + timedelta(days=1)
+        n_days = (tomorrow - beg).days
+    else:
+        n_days = (end - beg).days
+    for i in range(n_days):
+        q = quarter(beg + timedelta(days=i))
+        q1 = quarter(beg + timedelta(days=i+1))
+        # Only count on each quarter's last day
+        if q == q1:
+            continue
+        if i <= new_days:
+            new[q] += 1
+        elif i <= old_days:
+            med[q] += 1
+        else:
+            old[q] += 1
+
+first_q = quarter(first)
+last_q = quarter(last)
+
+quarters = (q for q in chain(new, med, old) if first_q <= q <= last_q)
+quarters = tuple(sorted(set(quarters)))
+
+new_y = tuple(new[q] for q in quarters)
+med_y = tuple(med[q] for q in quarters)
+old_y = tuple(old[q] for q in quarters)
+sum_y = tuple(old[q] + med[q] for q in quarters)
+
+old_name = "older than {} days".format(old_days)
+med_name = "medium"
+new_name = "recent (less {} days old)".format(new_days)
+
+width = 0.9
+fig, ax = plt.subplots()
+ax.bar(quarters, old_y, width, label=old_name)
+ax.bar(quarters, med_y, width, label=med_name, bottom=old_y)
+ax.bar(quarters, new_y, width, label=new_name, bottom=sum_y)
+ax.legend(loc="upper left")
+ax.grid(True)
+ax.set_xlabel("quarter")
+ax.set_ylabel("Number or PRs pending")
+ax.tick_params(axis="x", labelrotation=90)
+fig.suptitle("State of the PR backlog at the end of each quarter")
+fig.set_size_inches(12.8, 7.2)  # default 100 dpi -> 720p
+fig.savefig("prs-backlog.png")
+
+print("Quarter,recent,medium,old,total")
+for q in quarters:
+    print("{},{},{},{},{}".format(q, new[q], med[q], old[q],
+            new[q] + med[q] + old[q]))
diff --git a/pr-metrics/pr-closed.py b/pr-metrics/pr-closed.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Produce graph of PRs closed by time period."""
+
+from prs import pr_dates, quarter, first, last
+
+from collections import Counter
+
+import matplotlib.pyplot as plt
+
+first_q = quarter(first)
+last_q = quarter(last)
+
+cnt_all = Counter()
+cnt_com = Counter()
+
+for beg, end, com in pr_dates():
+    if end is None:
+        continue
+    q = quarter(end)
+    cnt_all[q] += 1
+    if com:
+        cnt_com[q] += 1
+
+quarters = tuple(sorted(q for q in cnt_all if first_q <= q <= last_q))
+
+prs_com = tuple(cnt_com[q] for q in quarters)
+prs_team = tuple(cnt_all[q] - cnt_com[q] for q in quarters)
+
+width = 0.9
+fig, ax = plt.subplots()
+ax.bar(quarters, prs_com, width, label="community")
+ax.bar(quarters, prs_team, width, label="core team", bottom=prs_com)
+ax.legend(loc="upper left")
+ax.grid(True)
+ax.set_xlabel("quarter")
+ax.set_ylabel("Number or PRs closed")
+ax.tick_params(axis="x", labelrotation=90)
+fig.suptitle("Number of PRs closed per quarter")
+fig.set_size_inches(12.8, 7.2)  # default 100 dpi -> 720p
+fig.savefig("prs-closed.png")
+
+print("Quarter,community closed,total closed")
+for q in quarters:
+    print("{},{},{}".format(q, cnt_com[q], cnt_all[q]))
diff --git a/pr-metrics/pr-created.py b/pr-metrics/pr-created.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Produce graph of PRs created by time period."""
+
+from prs import pr_dates, quarter, first, last
+
+from collections import Counter
+
+import matplotlib.pyplot as plt
+
+first_q = quarter(first)
+last_q = quarter(last)
+
+cnt_all = Counter()
+cnt_com = Counter()
+
+for beg, end, com in pr_dates():
+    q = quarter(beg)
+    cnt_all[q] += 1
+    if com:
+        cnt_com[q] += 1
+
+quarters = tuple(sorted(q for q in cnt_all if first_q <= q <= last_q))
+
+prs_com = tuple(cnt_com[q] for q in quarters)
+prs_team = tuple(cnt_all[q] - cnt_com[q] for q in quarters)
+
+width = 0.9
+fig, ax = plt.subplots()
+ax.bar(quarters, prs_com, width, label="community")
+ax.bar(quarters, prs_team, width, label="core team", bottom=prs_com)
+ax.legend(loc="upper left")
+ax.grid(True)
+ax.set_xlabel("quarter")
+ax.set_ylabel("Number or PRs created")
+ax.tick_params(axis="x", labelrotation=90)
+fig.suptitle("Number of PRs created per quarter")
+fig.set_size_inches(12.8, 7.2)  # default 100 dpi -> 720p
+fig.savefig("prs-created.png")
+
+print("Quarter,community created,total created")
+for q in quarters:
+    print("{},{},{}".format(q, cnt_com[q], cnt_all[q]))