diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
new file mode 100644
index 000000000000..69d8d44f504f
--- /dev/null
+++ b/.github/workflows/bench-upload.yml
@@ -0,0 +1,46 @@
+# This file is manually managed. It is used to upload benchmarks to to the
+# https://github.com/enso-org/engine-benchmark-results repository.
+
+name: Benchmarks upload
+on:
+ workflow_run:
+ workflows: ["Benchmark Engine", "Benchmark Standard Libraries"]
+ types:
+ - completed
+jobs:
+ upload-benchmarks:
+ name: Upload benchmarks
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout enso repository
+ uses: actions/checkout@v4
+ with:
+ repository: enso-org/enso
+ path: enso
+ - name: Checkout engine-benchmark-results repository
+ uses: actions/checkout@v4
+ with:
+ repository: enso-org/engine-benchmark-results
+ path: engine-benchmark-results
+ token: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y \
+ python3 \
+ python3-jinja2 \
+ python3-numpy \
+ python3-pandas
+ sudo apt-get install -y gh
+ - name: Set up git
+ run: |
+ git config --global user.email "ci@enso.org"
+ git config --global user.name "Enso CI Bot"
+ - name: Upload benchmarks
+ run: |
+ cd enso/tools/performance/engine-benchmarks
+ python3 website_regen.py \
+ -v \
+ --local-repo ${{ github.workspace }}/engine-benchmark-results
+ env:
+ GITHUB_TOKEN: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}
diff --git a/tools/performance/engine-benchmarks/README.md b/tools/performance/engine-benchmarks/README.md
index c37cfbe060af..3b7bba729079 100644
--- a/tools/performance/engine-benchmarks/README.md
+++ b/tools/performance/engine-benchmarks/README.md
@@ -4,6 +4,12 @@ This directory contains a python script `bench_download.py` for downloading
Engine and stdlib benchmark results from GitHub, and `Engine_Benchs` Enso
project for analysing the downloaded data.
+Note that for convenience, there is `bench_tool` directory that is a Python
+package. The `bench_download.py` script uses this package.
+
+To run all the Python tests for that package, run `python -m unittest` in this
+directory.
+
Dependencies for `bench_download.py`:
- python >= 3.7
diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index 7743adb5627a..9aa6b3c081d4 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -45,451 +45,44 @@
- Used as a template engine for the HTML.
"""
+import sys
+
+from bench_tool.bench_results import get_bench_runs, fetch_job_reports
+from bench_tool.remote_cache import ReadonlyRemoteCache
+from bench_tool.utils import gather_all_bench_labels, sort_job_reports
+
+if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
+ print("ERROR: python version lower than 3.7")
+ exit(1)
+
import asyncio
-import json
import logging
import logging.config
-import math
import os
-import re
import shutil
-import subprocess
-import sys
import tempfile
-import zipfile
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from csv import DictWriter
from datetime import datetime, timedelta
-from enum import Enum
from os import path
-from typing import List, Dict, Optional, Any, Union, Set
-from dataclasses import dataclass
-import xml.etree.ElementTree as ET
-from urllib.parse import urlencode
+from typing import List, Dict, Optional, Set
+from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, \
+ JINJA_TEMPLATE, JobRun, JobReport, \
+ TemplateBenchData, JinjaData, Source
+from bench_tool.gh import ensure_gh_installed
+from bench_tool.template_render import create_template_data, render_html
-if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
- print("ERROR: python version lower than 3.7")
- exit(1)
try:
import pandas as pd
import numpy as np
import jinja2
except ModuleNotFoundError as err:
print("ERROR: One of pandas, numpy, or jinja2 packages not installed", file=sys.stderr)
+ print("Install either with `pip install pandas numpy jinja2` or "
+ "with `apt-get install python3-pandas python3-numpy python3-jinja2`", file=sys.stderr)
exit(1)
-DATE_FORMAT = "%Y-%m-%d"
-ENGINE_BENCH_WORKFLOW_ID = 29450898
-"""
-Workflow ID of engine benchmarks, got via `gh api
-'/repos/enso-org/enso/actions/workflows'`.
-The name of the workflow is 'Benchmark Engine'
-"""
-NEW_ENGINE_BENCH_WORKFLOW_ID = 67075764
-"""
-Workflow ID for 'Benchmark Engine' workflow, which is the new workflow
-since 2023-08-22.
-"""
-STDLIBS_BENCH_WORKFLOW_ID = 66661001
-"""
-Workflow ID of stdlibs benchmarks, got via `gh api
-'/repos/enso-org/enso/actions/workflows'`.
-The name is 'Benchmark Standard Libraries'
-"""
-GH_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
-""" Date format as returned from responses in GH API"""
-ENSO_COMMIT_BASE_URL = "https://github.com/enso-org/enso/commit/"
-JINJA_TEMPLATE = "templates/template_jinja.html"
-""" Path to the Jinja HTML template """
-TEMPLATES_DIR = "templates"
-GENERATED_SITE_DIR = "generated_site"
-GH_ARTIFACT_RETENTION_PERIOD = timedelta(days=90)
-
-
-class Source(Enum):
- ENGINE = "engine"
- STDLIB = "stdlib"
-
- def workflow_ids(self) -> List[int]:
- if self == Source.ENGINE:
- return [ENGINE_BENCH_WORKFLOW_ID, NEW_ENGINE_BENCH_WORKFLOW_ID]
- elif self == Source.STDLIB:
- return [STDLIBS_BENCH_WORKFLOW_ID]
- else:
- raise ValueError(f"Unknown source {self}")
-
-
-@dataclass
-class Author:
- name: str
-
-
-@dataclass
-class Commit:
- """ Corresponds to the commit from GH API """
- id: str
- author: Author
- timestamp: str
- message: str
-
-
-@dataclass
-class JobRun:
- """
- Gathered via the GH API. Defines a single run of an Engine benchmark job.
- """
- id: str
- display_title: str
- html_url: str
- run_attempt: int
- """ An event as defined by the GitHub API, for example 'push' or 'schedule' """
- event: str
- head_commit: Commit
-
-
-@dataclass
-class JobReport:
- """
- Gathered via the GH API - a report that is pushed as an aritfact to the job.
- Contains a XML file with scores for all the benchmarks.
- """
- label_score_dict: Dict[str, float]
- """ A mapping of benchmark labels to their scores """
- bench_run: JobRun
-
-
-@dataclass
-class BenchmarkData:
- """
- Data for a single benchmark compiled from all the job reports.
- """
-
- @dataclass
- class Entry:
- score: float
- commit: Commit
- bench_run_url: str
- bench_run_event: str
-
- label: str
- """ Label for the benchmark, as reported by org.enso.interpreter.bench.BenchmarksRunner """
- entries: List[Entry]
- """ Entries sorted by timestamps """
-
-
-@dataclass
-class BenchDatapoint:
- """
- A single datapoint that will be on the chart. `timestamp` is on X axis,
- `score` on Y axis, and the rest of the fields is used either for the tooltip,
- or for the selection info.
- """
- timestamp: datetime
- score: float
- score_diff: str
- """ Difference of the score with previous datapoint, or NaN """
- score_diff_perc: str
- tooltip: str
- bench_run_url: str
- commit_id: str
- commit_msg: str
- commit_author: str
- commit_url: str
-
-
-@dataclass
-class TemplateBenchData:
- """ Data for one benchmark label (with a unique name and ID) """
- id: str
- """ ID of the benchmark, must not contain dots """
- name: str
- """ Human readable name of the benchmark """
- branches_datapoints: Dict[str, List[BenchDatapoint]]
- """ Mapping of branches to datapoints for that branch """
-
-
-@dataclass
-class JinjaData:
- bench_source: Source
- bench_datas: List[TemplateBenchData]
- branches: List[str]
- since: datetime
- until: datetime
- display_since: datetime
- """ The date from which all the datapoints are first displayed """
-
-
-def _parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun:
- return JobRun(
- id=str(obj["id"]),
- html_url=obj["html_url"],
- run_attempt=int(obj["run_attempt"]),
- event=obj["event"],
- display_title=obj["display_title"],
- head_commit=Commit(
- id=obj["head_commit"]["id"],
- message=obj["head_commit"]["message"],
- timestamp=obj["head_commit"]["timestamp"],
- author=Author(
- name=obj["head_commit"]["author"]["name"]
- )
- )
- )
-
-
-def _parse_bench_report_from_json(obj: Dict[Any, Any]) -> JobReport:
- return JobReport(
- bench_run=_parse_bench_run_from_json(obj["bench_run"]),
- label_score_dict=obj["label_score_dict"]
- )
-
-
-def _bench_report_to_json(bench_report: JobReport) -> Dict[Any, Any]:
- return {
- "bench_run": {
- "id": bench_report.bench_run.id,
- "html_url": bench_report.bench_run.html_url,
- "run_attempt": bench_report.bench_run.run_attempt,
- "event": bench_report.bench_run.event,
- "display_title": bench_report.bench_run.display_title,
- "head_commit": {
- "id": bench_report.bench_run.head_commit.id,
- "message": bench_report.bench_run.head_commit.message,
- "timestamp": bench_report.bench_run.head_commit.timestamp,
- "author": {
- "name": bench_report.bench_run.head_commit.author.name
- }
- }
- },
- "label_score_dict": bench_report.label_score_dict
- }
-
-
-def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun) -> "JobReport":
- logging.debug(f"Parsing BenchReport from {bench_report_xml_path}")
- tree = ET.parse(bench_report_xml_path)
- root = tree.getroot()
- label_score_dict: Dict[str, float] = dict()
- for cases in root:
- assert cases.tag == "cases"
- for case in cases:
- assert case.tag == "case"
- label = case.findtext("label").strip()
- scores = case.find("scores")
- scores_float = [float(score.text.strip()) for score in scores]
- if len(scores_float) > 1:
- logging.warning(f"More than one score for benchmark {label}, "
- f"using the last one (the newest one).")
- label_score_dict[label] = scores_float[len(scores_float) - 1]
- return JobReport(
- label_score_dict=label_score_dict,
- bench_run=bench_run
- )
-
-
-def _is_benchrun_id(name: str) -> bool:
- return re.match("\d{9}", name) is not None
-
-
-def _read_json(json_file: str) -> Dict[Any, Any]:
- assert path.exists(json_file) and path.isfile(json_file)
- with open(json_file, "r") as f:
- return json.load(f)
-
-
-async def _invoke_gh_api(endpoint: str,
- query_params: Dict[str, str] = {},
- result_as_text: bool = True) -> Union[Dict[str, Any], bytes]:
- urlencode(query_params)
- cmd = [
- "gh",
- "api",
- f"/repos/enso-org/enso{endpoint}" + "?" + urlencode(query_params)
- ]
- logging.info(f"Starting subprocess `{' '.join(cmd)}`")
- proc = await asyncio.create_subprocess_exec("gh", *cmd[1:],
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- out, err = await proc.communicate()
- logging.info(f"Finished subprocess `{' '.join(cmd)}`")
- if proc.returncode != 0:
- print("Command `" + " ".join(cmd) + "` FAILED with errcode " + str(
- proc.returncode))
- print(err.decode())
- exit(proc.returncode)
- if result_as_text:
- return json.loads(out.decode())
- else:
- return out
-
-
-class Cache:
- """
- Cache is a directory filled with json files that have name of format .json, and
- in every json, there is `BenchReport` dataclass serialized.
- """
-
- def __init__(self, dirname: str):
- assert path.exists(dirname) and path.isdir(dirname)
- self._dir = dirname
- # Keys are BenchRun ids
- self._items: Dict[str, JobReport] = {}
- for fname in os.listdir(dirname):
- fname_without_ext, ext = path.splitext(fname)
- if _is_benchrun_id(fname_without_ext) and ext == ".json":
- logging.debug(f"Loading into cache from {fname}")
- bench_report = _parse_bench_report_from_json(
- _read_json(path.join(dirname, fname))
- )
- self._items[fname_without_ext] = bench_report
-
- def __len__(self) -> int:
- return len(self._items)
-
- def __contains__(self, key: str) -> bool:
- assert _is_benchrun_id(key)
- return key in self._items
-
- def __getitem__(self, item: str) -> Optional[JobReport]:
- if not _is_benchrun_id(item):
- return None
- else:
- return self._items[item]
-
- def __setitem__(self, bench_run_id: str, bench_report: JobReport) -> None:
- assert isinstance(bench_report, JobReport)
- assert isinstance(bench_run_id, str)
- assert _is_benchrun_id(bench_run_id)
- self._items[bench_run_id] = bench_report
- json_fname = path.join(self._dir, bench_run_id + ".json")
- logging.debug(f"Putting {bench_run_id} into cache {json_fname}")
- with open(json_fname, "w") as json_file:
- json.dump(
- _bench_report_to_json(bench_report),
- json_file,
- indent=2,
- ensure_ascii=False
- )
-
- def __str__(self) -> str:
- return str(self._items)
-
- def contains(self, bench_run_id: str) -> bool:
- return bench_run_id in self._items
-
-
-class FakeCache:
- def __getitem__(self, item):
- return None
-
- def __setitem__(self, key, value):
- pass
-
- def __contains__(self, item):
- return False
-
- def __len__(self):
- return 0
-
-
-async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow_id: int) -> List[JobRun]:
- """
- Fetches the list of all the job runs from the GH API for the specified `branch`.
- """
- logging.info(f"Looking for all successful Engine benchmark workflow run "
- f"actions from {since} to {until} for branch {branch} "
- f"and workflow ID {workflow_id}")
- query_fields = {
- "branch": branch,
- "status": "success",
- "created": since.strftime(DATE_FORMAT) + ".." + until.strftime(DATE_FORMAT),
- # Start with 1, just to determine the total count
- "per_page": "1"
- }
- res = await _invoke_gh_api(f"/actions/workflows/{workflow_id}/runs", query_fields)
- total_count = int(res["total_count"])
- per_page = 3
- logging.debug(f"Total count of all runs: {total_count} for workflow ID "
- f"{workflow_id}. Will process {per_page} runs per page")
-
- async def get_and_parse_run(page: int, parsed_bench_runs) -> None:
- _query_fields = query_fields.copy()
- _query_fields["page"] = str(page)
- res = await _invoke_gh_api(f"/actions/workflows/{workflow_id}/runs", _query_fields)
- bench_runs_json = res["workflow_runs"]
- _parsed_bench_runs = [_parse_bench_run_from_json(bench_run_json)
- for bench_run_json in bench_runs_json]
- parsed_bench_runs.extend(_parsed_bench_runs)
-
- # Now we know the total count, so we can fetch all the runs
- query_fields["per_page"] = str(per_page)
- num_queries = math.ceil(total_count / per_page)
- parsed_bench_runs = []
-
- tasks = []
- # Page is indexed from 1
- for page in range(1, num_queries + 1):
- tasks.append(get_and_parse_run(page, parsed_bench_runs))
- await asyncio.gather(*tasks)
-
- return parsed_bench_runs
-
-
-async def get_bench_report(bench_run: JobRun, cache: Cache, temp_dir: str) -> Optional[JobReport]:
- """
- Extracts some data from the given bench_run, which was fetched via the GH API,
- optionally getting it from the cache.
- An artifact in GH can expire, in such case, returns None.
- :param bench_run:
- :param cache:
- :param temp_dir: Used for downloading and unzipping artifacts.
- :return: None if the corresponding artifact expired.
- """
- if bench_run.id in cache:
- logging.info(f"Getting bench run with ID {bench_run.id} from cache")
- return cache[bench_run.id]
-
- # There might be multiple artifacts in the artifact list for a benchmark run
- # We are looking for the one named 'Runtime Benchmark Report', which will
- # be downloaded as a ZIP file.
- obj: Dict[str, Any] = await _invoke_gh_api(f"/actions/runs/{bench_run.id}/artifacts")
- artifacts = obj["artifacts"]
- assert len(artifacts) == 1, "There should be exactly one artifact for a benchmark run"
- bench_report_artifact = artifacts[0]
- assert bench_report_artifact, "Benchmark Report artifact not found"
- artifact_id = str(bench_report_artifact["id"])
- if bench_report_artifact["expired"]:
- created_at = bench_report_artifact["created_at"]
- updated_at = bench_report_artifact["updated_at"]
- expires_at = bench_report_artifact["expires_at"]
- logging.warning(f"Artifact with ID {artifact_id} from bench report {bench_run.id} has expired. "
- f"created_at={created_at}, updated_at={updated_at}, expires_at={expires_at}")
- return None
-
- # Get contents of the ZIP artifact file
- artifact_ret = await _invoke_gh_api(f"/actions/artifacts/{artifact_id}/zip", result_as_text=False)
- zip_file_name = os.path.join(temp_dir, artifact_id + ".zip")
- logging.debug(f"Writing artifact ZIP content into {zip_file_name}")
- with open(zip_file_name, "wb") as zip_file:
- zip_file.write(artifact_ret)
-
- extracted_dirname = os.path.join(temp_dir, artifact_id)
- if os.path.exists(extracted_dirname):
- shutil.rmtree(extracted_dirname)
- os.mkdir(extracted_dirname)
-
- logging.debug(f"Extracting {zip_file_name} into {extracted_dirname}")
- zip_file = zipfile.ZipFile(zip_file_name, "r")
- zip_file.extractall(extracted_dirname)
- bench_report_xml = path.join(extracted_dirname, "bench-report.xml")
- assert path.exists(bench_report_xml)
-
- bench_report_parsed = _parse_bench_report_from_xml(bench_report_xml, bench_run)
- cache[bench_run.id] = bench_report_parsed
- return bench_report_parsed
-
-
CSV_FIELDNAMES = [
"label",
"score",
@@ -524,171 +117,9 @@ def write_bench_reports_to_csv(bench_reports: List[JobReport], csv_fname: str) -
})
-def populate_cache(cache_dir: str) -> Cache:
- """
- Initializes cache from `cache_dir`, if there are any items.
- See docs of `Cache`.
-
- :param cache_dir: Path to the cache directory. Does not have to exist
- :return: Populated cache. Might be empty.
- """
- if not path.exists(cache_dir):
- logging.info(f"No cache at {cache_dir}, creating the cache directory")
- os.mkdir(cache_dir)
- logging.debug(f"Initializing cache from {cache_dir}")
- cache = Cache(cache_dir)
- logging.debug(f"Cache populated with {len(cache)} items")
- return cache
-
-
-def create_template_data(
- job_reports_per_branch: Dict[str, List[JobReport]],
- bench_labels: Set[str]) -> List[TemplateBenchData]:
- """
- Creates all the necessary data for the Jinja template from all collected
- benchmark job reports.
- :param job_reports_per_branch: Mapping of branch name to list of job reports.
- job reports should be sorted by the commit date, otherwise the difference
- between scores might be wrongly computed.
- :param bench_labels:
- :return:
- """
-
- def pct_to_str(score_diff_perc: float) -> str:
- if not np.isnan(score_diff_perc):
- buff = "+" if score_diff_perc > 0 else ""
- buff += "{:.5f}".format(score_diff_perc * 100)
- buff += "%"
- return buff
- else:
- return "NaN"
-
- def diff_str(score_diff: float, score_diff_perc: float) -> str:
- if not np.isnan(score_diff):
- diff_str = "+" if score_diff > 0 else ""
- diff_str += "{:.5f}".format(score_diff)
- diff_str += " ("
- diff_str += pct_to_str(score_diff_perc)
- diff_str += ")"
- return diff_str
- else:
- return "NA"
-
- template_bench_datas: List[TemplateBenchData] = []
- for bench_label in bench_labels:
- logging.debug(f"Creating template data for benchmark {bench_label}")
- branch_datapoints: Dict[str, List[BenchDatapoint]] = {}
- for branch, job_reports in job_reports_per_branch.items():
- logging.debug(f"Creating datapoints for branch {branch} from {len(job_reports)} job reports")
- datapoints: List[BenchDatapoint] = []
- for job_report in job_reports:
- prev_datapoint: Optional[BenchDatapoint] = \
- datapoints[-1] if len(datapoints) > 0 else None
- if bench_label in job_report.label_score_dict:
- score = job_report.label_score_dict[bench_label]
- commit = job_report.bench_run.head_commit
- timestamp = datetime.strptime(
- commit.timestamp,
- GH_DATE_FORMAT
- )
- commit_msg_header = \
- commit.message.splitlines()[0].replace('"', "'")
- series = pd.Series([
- prev_datapoint.score if prev_datapoint else None,
- score
- ])
- score_diff = series.diff()[1]
- score_diff_perc = series.pct_change()[1]
- tooltip = "score = " + str(score) + "\\n"
- tooltip += "date = " + str(timestamp) + "\\n"
- tooltip += "branch = " + branch + "\\n"
- tooltip += "diff = " + diff_str(score_diff, score_diff_perc)
- author_name = commit.author.name\
- .replace('"', '\\"')\
- .replace("'", "\\'")
- datapoints.append(BenchDatapoint(
- timestamp=timestamp,
- score=score,
- score_diff=str(score_diff),
- score_diff_perc=pct_to_str(score_diff_perc),
- tooltip=tooltip,
- bench_run_url=job_report.bench_run.html_url,
- commit_id=commit.id,
- commit_msg=commit_msg_header,
- commit_author=author_name,
- commit_url=ENSO_COMMIT_BASE_URL + commit.id,
- ))
- logging.debug(f"{len(datapoints)} datapoints created for branch {branch}")
- branch_datapoints[branch] = datapoints.copy()
- logging.debug(f"Template data for benchmark {bench_label} created")
- template_bench_datas.append(TemplateBenchData(
- id=_label_to_id(bench_label),
- name=_label_to_name(bench_label),
- branches_datapoints=branch_datapoints,
- ))
- return template_bench_datas
-
-
-def _label_to_id(label: str) -> str:
- return label.replace(".", "_")
-
-
-def _label_to_name(label: str) -> str:
- items = label.split(".")
- assert len(items) >= 2
- filtered_items = \
- [item for item in items if item not in (
- "org",
- "enso",
- "benchmark",
- "benchmarks",
- "semantic",
- "interpreter",
- "bench"
- )]
- return "_".join(filtered_items)
-
-
-def _gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]:
- """
- Iterates through all the job reports and gathers all the benchmark labels
- found. Note that every job report can have a different set of benchmark labels.
- :return: List of benchmark labels.
- """
- all_labels = set()
- for job_report in job_reports:
- for labels in job_report.label_score_dict.keys():
- all_labels.add(labels)
- return all_labels
-
-
-def render_html(jinja_data: JinjaData, template_file: str, html_out_fname: str) -> None:
- jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader("."))
- jinja_template = jinja_env.get_template(template_file)
- generated_html = jinja_template.render(jinja_data.__dict__)
- if path.exists(html_out_fname):
- logging.info(f"{html_out_fname} already exist, rewritting")
- with open(html_out_fname, "w") as html_file:
- html_file.write(generated_html)
-
-
-def ensure_gh_installed() -> None:
- try:
- out = subprocess.run(["gh", "--version"], check=True, capture_output=True)
- if out.returncode != 0:
- print("`gh` command not found - GH CLI utility is not installed. "
- "See https://cli.github.com/", file=sys.stderr)
- exit(1)
- except subprocess.CalledProcessError:
- print("`gh` command not found - GH CLI utility is not installed. "
- "See https://cli.github.com/", file=sys.stderr)
- exit(1)
-
-
async def main():
default_since: datetime = (datetime.now() - timedelta(days=14))
default_until: datetime = datetime.now()
- default_cache_dir = path.expanduser("~/.cache/enso_bench_download")
default_csv_out = "Engine_Benchs/data/benchs.csv"
date_format_help = DATE_FORMAT.replace("%", "%%")
@@ -724,17 +155,6 @@ def _parse_bench_source(_bench_source: str) -> Source:
help=f"The date until which the benchmark results will be gathered. "
f"Format is {date_format_help}. "
f"The default is today")
- arg_parser.add_argument("--use-cache",
- default=False,
- metavar="(true|false)",
- type=lambda input: True if input in ("true", "True") else False,
- help="Whether the cache directory should be used. The default is False.")
- arg_parser.add_argument("-c", "--cache", action="store",
- default=default_cache_dir,
- metavar="CACHE_DIR",
- help=f"Cache directory. Makes sense only iff specified with --use-cache argument. "
- f"The default is {default_cache_dir}. If there are any troubles with the "
- f"cache, just do `rm -rf {default_cache_dir}`.")
arg_parser.add_argument("-b", "--branches", action="store",
nargs="+",
default=["develop"],
@@ -766,20 +186,17 @@ def _parse_bench_source(_bench_source: str) -> Source:
since: datetime = args.since
until: datetime = args.until
- cache_dir: str = args.cache
if not args.tmp_dir:
temp_dir: str = tempfile.mkdtemp()
else:
temp_dir: str = args.tmp_dir
- use_cache: bool = args.use_cache
- assert cache_dir and temp_dir
bench_source: Source = args.source
csv_output: str = args.csv_output
create_csv: bool = args.create_csv
branches: List[str] = args.branches
labels_override: Set[str] = args.labels
- logging.debug(f"parsed args: since={since}, until={until}, cache_dir={cache_dir}, "
- f"temp_dir={temp_dir}, use_cache={use_cache}, bench_source={bench_source}, "
+ logging.debug(f"parsed args: since={since}, until={until}, "
+ f"temp_dir={temp_dir}, bench_source={bench_source}, "
f"csv_output={csv_output}, "
f"create_csv={create_csv}, branches={branches}, "
f"labels_override={labels_override}")
@@ -789,22 +206,15 @@ def _parse_bench_source(_bench_source: str) -> Source:
# If the user requires benchmarks for which artifacts are not retained
# anymore, then cache should be used.
min_since_without_cache = datetime.today() - GH_ARTIFACT_RETENTION_PERIOD
- if not use_cache and since < min_since_without_cache:
- logging.warning(f"The default GH artifact retention period is "
+ if since < min_since_without_cache:
+ logging.info(f"The default GH artifact retention period is "
f"{GH_ARTIFACT_RETENTION_PERIOD.days} days. "
f"This means that all the artifacts older than "
f"{min_since_without_cache.date()} are expired."
- f"The use_cache parameter is set to False, so no "
- f"expired artifacts will be fetched.")
- logging.warning(f"The `since` parameter is reset to "
- f"{min_since_without_cache.date()} to prevent "
- f"unnecessary GH API queries.")
- since = min_since_without_cache
-
- if use_cache:
- cache = populate_cache(cache_dir)
- else:
- cache = FakeCache()
+ f"The since date was set to {since}, so the remote cache is enabled, "
+ f"and the older artifacts will be fetched from the cache.")
+
+ remote_cache = ReadonlyRemoteCache()
bench_labels: Optional[Set[str]] = None
""" Set of all gathered benchmark labels from all the job reports """
@@ -821,18 +231,7 @@ def _parse_bench_source(_bench_source: str) -> Source:
f" until {until} for branch {branch}")
exit(1)
- job_reports: List[JobReport] = []
-
- async def _process_report(_bench_run):
- _job_report = await get_bench_report(_bench_run, cache, temp_dir)
- if _job_report:
- job_reports.append(_job_report)
-
- tasks = []
- for bench_run in bench_runs:
- tasks.append(_process_report(bench_run))
- await asyncio.gather(*tasks)
-
+ job_reports = await fetch_job_reports(bench_runs, remote_cache)
logging.debug(f"Got {len(job_reports)} job reports for branch {branch}")
if len(job_reports) == 0:
print(f"There were 0 job_reports in the specified time interval, "
@@ -841,14 +240,7 @@ async def _process_report(_bench_run):
exit(1)
logging.debug("Sorting job_reports by commit date")
-
- def _get_timestamp(job_report: JobReport) -> datetime:
- return datetime.strptime(
- job_report.bench_run.head_commit.timestamp,
- GH_DATE_FORMAT
- )
-
- job_reports.sort(key=lambda report: _get_timestamp(report))
+ sort_job_reports(job_reports)
if create_csv:
write_bench_reports_to_csv(job_reports, csv_output)
@@ -858,7 +250,7 @@ def _get_timestamp(job_report: JobReport) -> datetime:
# Gather all the benchmark labels from all the job reports
if bench_labels is None:
- all_bench_labels = _gather_all_bench_labels(job_reports)
+ all_bench_labels = gather_all_bench_labels(job_reports)
if len(labels_override) > 0:
logging.info(f"Subset of labels specified: {labels_override}")
if not set(labels_override).issubset(all_bench_labels):
@@ -883,6 +275,7 @@ def _get_timestamp(job_report: JobReport) -> datetime:
bench_datas=template_bench_datas,
bench_source=bench_source,
branches=branches,
+ timestamp=datetime.now()
)
# Render Jinja template with jinja_data
@@ -890,10 +283,9 @@ def _get_timestamp(job_report: JobReport) -> datetime:
os.mkdir(GENERATED_SITE_DIR)
logging.debug(f"Rendering HTML from {JINJA_TEMPLATE} to {GENERATED_SITE_DIR}")
- site_path = path.join(GENERATED_SITE_DIR, bench_source.value + "-benchs.html")
+ site_path = GENERATED_SITE_DIR.joinpath(bench_source.value + "-benchs.html")
render_html(
jinja_data,
- JINJA_TEMPLATE,
site_path
)
logging.debug(f"Copying static site content from {TEMPLATES_DIR} to {GENERATED_SITE_DIR}")
diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
new file mode 100644
index 000000000000..f3318a98c0c7
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -0,0 +1,220 @@
+import os
+from dataclasses import dataclass
+from datetime import timedelta, datetime
+from enum import Enum
+from pathlib import Path
+from typing import List, Dict, Any
+
+
+def pkg_dir() -> Path:
+ """ Directory of this package """
+ return Path(os.path.dirname(os.path.realpath(__file__)))
+
+
+ENSO_REPO = "enso-org/enso"
+BENCH_REPO = "enso-org/engine-benchmark-results"
+BRANCH_DEVELOP = "develop"
+DATE_FORMAT = "%Y-%m-%d"
+GH_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
+ENGINE_BENCH_WORKFLOW_ID = 29450898
+"""
+Workflow ID of engine benchmarks, got via `gh api
+'/repos/enso-org/enso/actions/workflows'`.
+The name of the workflow is 'Benchmark Engine'
+"""
+NEW_ENGINE_BENCH_WORKFLOW_ID = 67075764
+"""
+Workflow ID for 'Benchmark Engine' workflow, which is the new workflow
+since 2023-08-22.
+"""
+STDLIBS_BENCH_WORKFLOW_ID = 66661001
+"""
+Workflow ID of stdlibs benchmarks, got via `gh api
+'/repos/enso-org/enso/actions/workflows'`.
+The name is 'Benchmark Standard Libraries'
+"""
+""" Date format as returned from responses in GH API"""
+ENSO_COMMIT_BASE_URL = "https://github.com/enso-org/enso/commit/"
+
+GH_ARTIFACT_RETENTION_PERIOD = timedelta(days=90)
+
+GENERATED_SITE_DIR = pkg_dir().parent.joinpath("generated_site")
+TEMPLATES_DIR = pkg_dir().parent.joinpath("templates")
+JINJA_TEMPLATE = TEMPLATES_DIR.joinpath("template_jinja.html")
+
+assert TEMPLATES_DIR.exists()
+assert JINJA_TEMPLATE.exists()
+
+
+class Source(Enum):
+ ENGINE = "engine"
+ STDLIB = "stdlib"
+
+ def workflow_ids(self) -> List[int]:
+ if self == Source.ENGINE:
+ return [ENGINE_BENCH_WORKFLOW_ID, NEW_ENGINE_BENCH_WORKFLOW_ID]
+ elif self == Source.STDLIB:
+ return [STDLIBS_BENCH_WORKFLOW_ID]
+ else:
+ raise ValueError(f"Unknown source {self}")
+
+ def artifact_names(self) -> List[str]:
+ if self == Source.ENGINE:
+ return ["Runtime Benchmark Report"]
+ elif self == Source.STDLIB:
+ return ["Enso JMH Benchmark Report"]
+ else:
+ raise ValueError(f"Unknown source {self}")
+
+
+@dataclass
+class Author:
+ name: str
+
+
+@dataclass
+class Commit:
+ """ Corresponds to the commit from GH API """
+ id: str
+ author: Author
+ timestamp: str
+ message: str
+
+
+@dataclass
+class JobRun:
+ """
+ Gathered via the GH API. Defines a single run of an Engine benchmark job.
+ """
+ id: str
+ display_title: str
+ html_url: str
+ run_attempt: int
+ """ An event as defined by the GitHub API, for example 'push' or 'schedule' """
+ event: str
+ head_commit: Commit
+
+ @staticmethod
+ def from_dict(obj: Dict[Any, Any]) -> "JobRun":
+ return JobRun(
+ id=str(obj["id"]),
+ html_url=obj["html_url"],
+ run_attempt=int(obj["run_attempt"]),
+ event=obj["event"],
+ display_title=obj["display_title"],
+ head_commit=Commit(
+ id=obj["head_commit"]["id"],
+ message=obj["head_commit"]["message"],
+ timestamp=obj["head_commit"]["timestamp"],
+ author=Author(
+ name=obj["head_commit"]["author"]["name"]
+ )
+ )
+ )
+
+ def to_dict(self) -> Dict[Any, Any]:
+ return {
+ "id": self.id,
+ "html_url": self.html_url,
+ "run_attempt": self.run_attempt,
+ "event": self.event,
+ "display_title": self.display_title,
+ "head_commit": {
+ "id": self.head_commit.id,
+ "message": self.head_commit.message,
+ "timestamp": self.head_commit.timestamp,
+ "author": {
+ "name": self.head_commit.author.name
+ }
+ }
+ }
+
+
+@dataclass
+class JobReport:
+ """
+ Gathered via the GH API - a report that is pushed as an aritfact to the job.
+ Contains a XML file with scores for all the benchmarks.
+ """
+ label_score_dict: Dict[str, float]
+ """ A mapping of benchmark labels to their scores """
+ bench_run: JobRun
+
+ @staticmethod
+ def from_dict(obj: Dict[Any, Any]) -> "JobReport":
+ return JobReport(
+ bench_run=JobRun.from_dict(obj["bench_run"]),
+ label_score_dict=obj["label_score_dict"]
+ )
+
+ def to_dict(self) -> Dict[Any, Any]:
+ return {
+ "bench_run": self.bench_run.to_dict(),
+ "label_score_dict": self.label_score_dict
+ }
+
+
+@dataclass
+class BenchmarkData:
+ """
+ Data for a single benchmark compiled from all the job reports.
+ """
+
+ @dataclass
+ class Entry:
+ score: float
+ commit: Commit
+ bench_run_url: str
+ bench_run_event: str
+
+ label: str
+ """ Label for the benchmark, as reported by org.enso.interpreter.bench.BenchmarksRunner """
+ entries: List[Entry]
+ """ Entries sorted by timestamps """
+
+
+@dataclass
+class BenchDatapoint:
+ """
+ A single datapoint that will be on the chart. `timestamp` is on X axis,
+ `score` on Y axis, and the rest of the fields is used either for the tooltip,
+ or for the selection info.
+ """
+ timestamp: datetime
+ score: float
+ score_diff: str
+ """ Difference of the score with previous datapoint, or NaN """
+ score_diff_perc: str
+ tooltip: str
+ bench_run_url: str
+ commit_id: str
+ commit_msg: str
+ commit_author: str
+ commit_url: str
+
+
+@dataclass
+class TemplateBenchData:
+ """ Data for one benchmark label (with a unique name and ID) """
+ id: str
+ """ ID of the benchmark, must not contain dots """
+ name: str
+ """ Human readable name of the benchmark """
+ branches_datapoints: Dict[str, List[BenchDatapoint]]
+ """ Mapping of branches to datapoints for that branch """
+
+
+@dataclass
+class JinjaData:
+ bench_source: Source
+ bench_datas: List[TemplateBenchData]
+ branches: List[str]
+ since: datetime
+ until: datetime
+ display_since: datetime
+ """ The date from which all the datapoints are first displayed """
+ timestamp: datetime
+ """ The time when the website was generated """
+
+
+
diff --git a/tools/performance/engine-benchmarks/bench_tool/bench_results.py b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
new file mode 100644
index 000000000000..5337203334c1
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
@@ -0,0 +1,194 @@
+import asyncio
+import logging
+import math
+import os
+import shutil
+import zipfile
+from datetime import datetime
+from os import path
+from typing import List, Dict, Optional, Any
+from xml.etree import ElementTree as ET
+
+from bench_tool import JobRun, DATE_FORMAT, ENSO_REPO, JobReport, Source
+from bench_tool.gh import invoke_gh_api
+from bench_tool.remote_cache import RemoteCache
+from bench_tool.utils import WithTempDir
+
+ARTIFACT_ID = "Runtime Benchmark Report"
+
+_logger = logging.getLogger(__name__)
+
+
+async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow_id: int) -> List[JobRun]:
+ """
+ Fetches the list of all the SUCCESSFUL job runs from the GH API for the specified `branch`.
+
+ :param since: The date from which the benchmark results will be gathered.
+ :param until: The date until which the benchmark results will be gathered.
+ :param branch: The branch for which the benchmark results will be gathered.
+ :param workflow_id: The ID of the workflow for which the benchmark results will be gathered.
+ """
+ _logger.info(f"Looking for all successful Engine benchmark workflow run "
+ f"actions from {since} to {until} for branch {branch} "
+ f"and workflow ID {workflow_id}")
+ query_fields = {
+ "branch": branch,
+ "status": "success",
+ "created": since.strftime(DATE_FORMAT) + ".." + until.strftime(DATE_FORMAT),
+ # Start with 1, just to determine the total count
+ "per_page": "1"
+ }
+ res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", query_fields)
+ total_count = int(res["total_count"])
+ per_page = 3
+ _logger.debug(f"Total count of all runs: {total_count} for workflow ID "
+ f"{workflow_id}. Will process {per_page} runs per page")
+
+ async def get_and_parse_run(page: int, parsed_bench_runs) -> None:
+ _query_fields = query_fields.copy()
+ _query_fields["page"] = str(page)
+ res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", _query_fields)
+ bench_runs_json = res["workflow_runs"]
+ _parsed_bench_runs = [JobRun.from_dict(bench_run_json)
+ for bench_run_json in bench_runs_json]
+ parsed_bench_runs.extend(_parsed_bench_runs)
+
+ # Now we know the total count, so we can fetch all the runs
+ query_fields["per_page"] = str(per_page)
+ num_queries = math.ceil(total_count / per_page)
+ parsed_bench_runs = []
+
+ tasks = []
+ # Page is indexed from 1
+ for page in range(1, num_queries + 1):
+ tasks.append(get_and_parse_run(page, parsed_bench_runs))
+ await asyncio.gather(*tasks)
+
+ return parsed_bench_runs
+
+
+async def fetch_job_reports(
+ bench_runs: List[JobRun],
+ remote_cache: RemoteCache
+) -> List[JobReport]:
+ """
+ Fetches all benchmark reports for the given benchmark runs. Benchmark runs are basically
+ just IDs of artifacts, and the reports are the actual benchmark results. These results are
+ either on the GH as artifacts, or are fetched from the cache if the artifact is expired.
+ All the runs are fetched in parallel.
+ :param bench_runs:
+ :param remote_cache:
+ :return:
+ """
+ job_reports: List[JobReport] = []
+
+ async def _process_report(_bench_run: JobRun):
+ with WithTempDir("bench_download") as temp_dir:
+ _job_report = await get_bench_report(_bench_run, temp_dir, remote_cache)
+ if _job_report:
+ job_reports.append(_job_report)
+
+ tasks = []
+ for bench_run in bench_runs:
+ tasks.append(_process_report(bench_run))
+ await asyncio.gather(*tasks)
+ return job_reports
+
+
+def _known_artifact_names() -> List[str]:
+ return Source.STDLIB.artifact_names() + Source.ENGINE.artifact_names()
+
+
+async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: RemoteCache) -> Optional[JobReport]:
+ """
+ Extracts some data from the given bench_run, which was fetched via the GH API,
+ optionally getting it from the cache.
+ An artifact in GH can expire, in such case, returns None.
+ :param bench_run:
+ :param temp_dir: Used for downloading and unzipping artifacts.
+ :return: None if the corresponding artifact cannot be found, neither as a GH artifact, neither from the remote cache.
+ """
+ assert os.path.exists(temp_dir) and os.path.isdir(temp_dir)
+
+ # There might be multiple artifacts in the artifact list for a benchmark run
+ # We are looking for the one named 'Runtime Benchmark Report', which will
+ # be downloaded as a ZIP file.
+ obj: Dict[str, Any] = await invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run.id}/artifacts")
+ artifacts = obj["artifacts"]
+ artifacts_by_names = {artifact["name"]: artifact for artifact in artifacts}
+ # At this point, we don't know the source of the benchmark - either it is from
+ # Engine, or from stdlib. Thus, we don't know exactly which artifact name we
+ # are looking for. But we know, there must be exactly one of the artifact names.
+ bench_report_artifact = None
+ for known_name in _known_artifact_names():
+ if known_name in artifacts_by_names:
+ bench_report_artifact = artifacts_by_names[known_name]
+ if bench_report_artifact is None:
+ _logger.warning(f"Bench run {bench_run.id} does not contain any of the known artifact names: "
+ f"{_known_artifact_names()}, but it is a successful run.")
+ return None
+ assert bench_report_artifact, "Benchmark Report artifact not found"
+ artifact_id = str(bench_report_artifact["id"])
+ created_at = bench_report_artifact["created_at"]
+ updated_at = bench_report_artifact["updated_at"]
+ expires_at = bench_report_artifact["expires_at"]
+ is_expired = bench_report_artifact["expired"]
+ _logger.debug(f"Got artifact with ID {artifact_id}, from bench run {bench_run.id}: "
+ f"created_at={created_at}, updated_at={updated_at}, expires_at={expires_at}, "
+ f"is_expired={is_expired}")
+
+ job_report = await remote_cache.fetch(bench_run.id)
+ if is_expired and job_report is None:
+ _logger.error(
+ f"Artifact {artifact_id} from bench run {bench_run.id} is expired, and it is not in the remote cache")
+ return None
+ if job_report:
+ _logger.debug(f"Got job report from the cache for {bench_run.id}")
+ return job_report
+
+ assert not is_expired
+
+ # Get contents of the ZIP artifact file
+ artifact_ret = await invoke_gh_api(ENSO_REPO, f"/actions/artifacts/{artifact_id}/zip", result_as_json=False)
+ zip_file_name = os.path.join(temp_dir, artifact_id + ".zip")
+ _logger.debug(f"Writing artifact ZIP content into {zip_file_name}")
+ with open(zip_file_name, "wb") as zip_file:
+ zip_file.write(artifact_ret)
+
+ extracted_dirname = os.path.join(temp_dir, artifact_id)
+ if os.path.exists(extracted_dirname):
+ shutil.rmtree(extracted_dirname)
+ os.mkdir(extracted_dirname)
+
+ _logger.debug(f"Extracting {zip_file_name} into {extracted_dirname}")
+ zip_file = zipfile.ZipFile(zip_file_name, "r")
+ zip_file.extractall(extracted_dirname)
+ bench_report_xml = path.join(extracted_dirname, "bench-report.xml")
+ assert path.exists(bench_report_xml)
+
+ bench_report_parsed = _parse_bench_report_from_xml(bench_report_xml, bench_run)
+ await remote_cache.put(bench_run.id, bench_report_parsed)
+ return bench_report_parsed
+
+
+def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun) -> "JobReport":
+ _logger.debug(f"Parsing BenchReport from {bench_report_xml_path}")
+ tree = ET.parse(bench_report_xml_path)
+ root = tree.getroot()
+ label_score_dict: Dict[str, float] = dict()
+ for cases in root:
+ assert cases.tag == "cases"
+ for case in cases:
+ assert case.tag == "case"
+ label = case.findtext("label").strip()
+ scores = case.find("scores")
+ scores_float = [float(score.text.strip()) for score in scores]
+ if len(scores_float) > 1:
+ _logger.warning(f"More than one score for benchmark {label}, "
+ f"using the last one (the newest one).")
+ label_score_dict[label] = scores_float[len(scores_float) - 1]
+ return JobReport(
+ label_score_dict=label_score_dict,
+ bench_run=bench_run
+ )
+
diff --git a/tools/performance/engine-benchmarks/bench_tool/gh.py b/tools/performance/engine-benchmarks/bench_tool/gh.py
new file mode 100644
index 000000000000..d8899e15b22d
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/gh.py
@@ -0,0 +1,107 @@
+import asyncio
+import base64
+import json
+import logging
+import subprocess
+import sys
+from typing import Dict, Optional, Union, Any
+from urllib.parse import urlencode
+
+_logger = logging.getLogger(__name__)
+
+MAX_BACKOFF_SECONDS = 120
+
+
+def ensure_gh_installed() -> None:
+ try:
+ out = subprocess.run(["gh", "--version"], check=True,
+ capture_output=True)
+ if out.returncode != 0:
+ print("`gh` command not found - GH CLI utility is not installed. "
+ "See https://cli.github.com/", file=sys.stderr)
+ exit(1)
+ except subprocess.CalledProcessError:
+ print("`gh` command not found - GH CLI utility is not installed. "
+ "See https://cli.github.com/", file=sys.stderr)
+ exit(1)
+
+
+async def invoke_gh_api(
+ repo: str,
+ endpoint: str,
+ query_params: Dict[str, str] = {},
+ fields: Dict[str, str] = {},
+ result_as_json: bool = True,
+ method: str = "GET",
+ backoff: int = 0,
+) -> Optional[Union[Dict[str, Any], bytes]]:
+ """
+ Invokes the GitHub API using the `gh` command line tool.
+ :param repo: Repository name in the form `owner/repo`
+ :param endpoint: Endpoint of the query. Must start with `/`.
+ :param query_params: Additional query parameters.
+ :param fields: Additional fields to be added to the query. add static
+ string parameters to the request payload.
+ :param result_as_json: If result should be parsed as JSON.
+ If false, the raw bytes are returned.
+ :param method: HTTP method to use, 'GET' by default.
+ :param backoff: Number of seconds to wait before retrying the request.
+ If higher than 0, it means that the request has already been retried,
+ try to do it again, with a higher backoff.
+ :return: None if the query fails
+ """
+ assert endpoint.startswith("/")
+ if len(fields) > 0 and method != "POST":
+ raise ValueError("Fields can be used only with POST method")
+ urlencode(query_params)
+ cmd = [
+ "gh",
+ "api",
+ "--method", method,
+ f"/repos/{repo}{endpoint}" + "?" + urlencode(query_params)
+ ]
+ for k, v in fields.items():
+ cmd.append("-f")
+ cmd.append(f"{k}='{v}'")
+ if 0 < backoff <= MAX_BACKOFF_SECONDS:
+ _logger.debug(f"Backing off for {backoff} seconds")
+ await asyncio.sleep(backoff)
+ elif backoff > MAX_BACKOFF_SECONDS:
+ _logger.error(f"Backoff of {backoff} seconds is too high, giving up.")
+ return None
+ _logger.debug("Invoking gh API with `%s`", " ".join(cmd))
+ proc = await asyncio.create_subprocess_exec("gh", *cmd[1:],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = await proc.communicate()
+ _logger.debug("Finished gh API `%s`", " ".join(cmd))
+ if proc.returncode != 0:
+ # Special handling of rate limit exceeded - just try to make the
+ # request one more time after some backoff.
+ if "You have exceeded a secondary rate limit" in err.decode():
+ new_backoff = 10 if backoff == 0 else backoff * 2
+ _logger.warning(f"Trying to retry the request with a new backoff "
+ f"of {new_backoff} seconds.")
+ return await invoke_gh_api(repo, endpoint, query_params, fields,
+ result_as_json, method, new_backoff)
+ else:
+ _logger.error("Command `%s` FAILED with errcode %d",
+ " ".join(cmd),
+ proc.returncode)
+ _logger.error(" stdout: %s", out.decode())
+ _logger.error(" stderr: %s", err.decode())
+ return None
+ if result_as_json:
+ return json.loads(out.decode())
+ else:
+ return out
+
+
+async def fetch_file(repo: str, file_path: str) -> Optional[str]:
+ ret = await invoke_gh_api(repo, f"/contents/{file_path}",
+ result_as_json=True)
+ if ret is None:
+ _logger.warning("File %s not found in %s", file_path, repo)
+ return None
+ file_content = base64.b64decode(ret["content"]).decode()
+ return file_content
diff --git a/tools/performance/engine-benchmarks/bench_tool/git.py b/tools/performance/engine-benchmarks/bench_tool/git.py
new file mode 100644
index 000000000000..8e3529f05cd3
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/git.py
@@ -0,0 +1,119 @@
+import asyncio
+import logging
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Set
+
+_logger = logging.getLogger(__name__)
+
+
+@dataclass
+class GitStatus:
+ modified: Set[str]
+ untracked: Set[str]
+ added: Set[str]
+
+
+async def clone(repo: str, dest: Path) -> None:
+ _logger.debug("Cloning %s to %s", repo, dest)
+ dest_abs_path = str(dest.absolute())
+ args = ["clone", f"git@github.com:{repo}.git", dest_abs_path]
+ proc = await asyncio.create_subprocess_exec("git", *args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ ret = await proc.wait()
+ if ret != 0:
+ stdout, stderr = await proc.communicate()
+ out = stdout.decode() + stderr.decode()
+ raise RuntimeError(f"Failed to clone {repo}: {out}")
+ assert dest.exists()
+
+
+async def pull(repo: Path) -> None:
+ _logger.debug("Pulling %s", repo)
+ # Avoid unnecessary merge commits by using `--ff-only`
+ args = ["pull", "--ff-only"]
+ proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ ret = await proc.wait()
+ if ret != 0:
+ stdout, stderr = await proc.communicate()
+ out = stdout.decode() + stderr.decode()
+ raise RuntimeError(f"Failed to pull {repo}: {out}")
+
+
+async def status(repo: Path) -> GitStatus:
+ assert repo.exists()
+ proc = await asyncio.create_subprocess_exec("git", "status", "--porcelain", cwd=repo,
+ stdout=subprocess.PIPE)
+ out, _ = await proc.communicate()
+ lines = out.decode().splitlines()
+ untracked: Set[str] = set()
+ modified: Set[str] = set()
+ added: Set[str] = set()
+ for line in lines:
+ line = line.strip()
+ if line.startswith("??"):
+ untracked.add(line.split()[1])
+ elif line.startswith("M "):
+ modified.add(line.split()[1])
+ elif line.startswith("A "):
+ added.add(line.split()[1])
+ return GitStatus(modified, untracked, added)
+
+
+async def add(repo: Path, files: Set[str]) -> None:
+ _logger.debug("Adding %s to %s", files, repo)
+ assert len(files) > 0
+ args = ["add"] + list(files)
+ proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ ret = await proc.wait()
+ if ret != 0:
+ out, err = await proc.communicate()
+ all_out = out.decode() + err.decode()
+ raise RuntimeError(f"Failed to add {files} to {repo}. Output: {all_out}")
+
+
+async def commit(repo: Path, msg: str) -> None:
+ _logger.debug("Committing %s with message '%s'", repo, msg)
+ stat = await status(repo)
+ assert len(stat.added) > 0 or len(stat.modified) > 0
+ args = ["commit", "-m", msg]
+ proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ ret = await proc.wait()
+ if ret != 0:
+ out, err = await proc.communicate()
+ all_out = out.decode() + err.decode()
+ raise RuntimeError(f"Failed to commit {repo}. Output: {all_out}")
+
+
+async def push(repo: Path) -> None:
+ _logger.debug("Pushing to %s", repo)
+ args = ["push"]
+ proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ ret = await proc.wait()
+ if ret != 0:
+ out, err = await proc.communicate()
+ all_out = out.decode() + err.decode()
+ raise RuntimeError(f"Failed to push {repo}. Output: {all_out}")
+
+
+async def init(repo: Path) -> None:
+ _logger.debug("Initializing git repo in %s", repo)
+ assert repo.exists()
+ args = ["init"]
+ proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ ret = await proc.wait()
+ if ret != 0:
+ out, err = await proc.communicate()
+ all_out = out.decode() + err.decode()
+ raise RuntimeError(f"Failed to init {repo}. Output: {all_out}")
+
+
+async def head_commit(repo: Path) -> str:
+ args = ["rev-parse", "HEAD"]
+ proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ ret = await proc.wait()
+ out, err = await proc.communicate()
+ if ret != 0:
+ raise RuntimeError(f"Failed to get HEAD commit of {repo}: {err.decode()}")
+ else:
+ return out.decode().strip()
diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
new file mode 100644
index 000000000000..b6627fb639cd
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -0,0 +1,174 @@
+"""
+A remote cache is located inhttps://github.com/enso-org/engine-benchmark-results/tree/main/cache.
+It is just a bunch of JSON files, each representing a single job report.
+"""
+import abc
+import json
+import logging
+import os
+import re
+import tempfile
+from pathlib import Path
+from typing import Dict, Optional
+
+from . import gh, JobReport, BENCH_REPO, git
+
+_logger = logging.getLogger(__name__)
+
+CACHE_REMOTE_DIR = "cache"
+ENGINE_INDEX_HTML = "engine-benchs.html"
+STDLIB_INDEX_HTML = "stdlib-benchs.html"
+
+
+class RemoteCache(abc.ABC):
+
+ @abc.abstractmethod
+ async def fetch(self, bench_id: str) -> Optional[JobReport]:
+ """
+ Fetches a job report for the given bench ID from the remote cache
+ :param bench_id:
+ :return: None if the report does not exist
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ async def put(self, bench_id: str, job_report: JobReport) -> None:
+ """
+ Puts a job report to the remote cache, or to the internal data structures.
+ :param bench_id:
+ :param job_report:
+ :return:
+ """
+ raise NotImplementedError
+
+
+class ReadonlyRemoteCache(RemoteCache):
+ """
+ Only fetches the artifacts from the remote cache, does not push anything.
+ """
+
+ def __init__(self):
+ self._fetched_items: Dict[str, JobReport] = {}
+
+ async def fetch(self, bench_id: str) -> Optional[JobReport]:
+ """ Fetches a job report for the given bench ID from the remote cache """
+ if bench_id in self._fetched_items:
+ return self._fetched_items[bench_id]
+ if not _is_benchrun_id(bench_id):
+ _logger.warning("Invalid bench ID: %s", bench_id)
+ return None
+ remote_path = self._get_remote_path(bench_id)
+ _logger.debug("Fetching cache from %s", remote_path)
+ content = await gh.fetch_file(BENCH_REPO, remote_path)
+ if content is None:
+ _logger.warning("Cache not found for %s", bench_id)
+ return None
+ bench_report = JobReport.from_dict(
+ json.loads(content)
+ )
+ assert bench_id not in self._fetched_items
+ self._fetched_items[bench_id] = bench_report
+ return bench_report
+
+ async def put(self, bench_id: str, job_report: JobReport) -> None:
+ assert _is_benchrun_id(bench_id)
+ assert bench_id not in self._fetched_items
+ self._fetched_items[bench_id] = job_report
+
+ def _get_remote_path(self, bench_id: str) -> str:
+ assert _is_benchrun_id(bench_id)
+ return os.path.join(CACHE_REMOTE_DIR, bench_id + ".json")
+
+
+class SyncRemoteCache(RemoteCache):
+ """
+ Fetches and pushes the artifacts to the remote cache. Needs a write permissions to the repo.
+ """
+
+ def __init__(self, local_root_dir: Optional[Path] = None):
+ if local_root_dir is not None:
+ assert local_root_dir.exists()
+ assert local_root_dir.is_dir()
+ assert local_root_dir.joinpath(".git").exists()
+ self._repo_root_dir = local_root_dir
+ self._should_clone = False
+ else:
+ self._repo_root_dir = Path(tempfile.mkdtemp(prefix="bench_tool_remote_cache"))
+ self._should_clone = True
+ assert self._repo_root_dir.exists()
+ assert self._repo_root_dir.is_dir()
+ self._cache_dir = self._repo_root_dir.joinpath(CACHE_REMOTE_DIR)
+
+ def repo_root_dir(self) -> Path:
+ return self._repo_root_dir
+
+ def cache_dir(self) -> Path:
+ return self._cache_dir
+
+ def engine_index_html(self) -> Path:
+ return self._repo_root_dir.joinpath(ENGINE_INDEX_HTML)
+
+ def stdlib_index_html(self) -> Path:
+ return self._repo_root_dir.joinpath(STDLIB_INDEX_HTML)
+
+ async def initialize(self) -> None:
+ """
+ Make sure the repo is up-to-date
+ :return:
+ """
+ if self._should_clone:
+ await git.clone(BENCH_REPO, self._repo_root_dir)
+ else:
+ await git.pull(self._repo_root_dir)
+ assert self._repo_root_dir.exists()
+ assert self._cache_dir.exists()
+
+ async def fetch(self, bench_id: str) -> Optional[JobReport]:
+ assert self._cache_dir.exists()
+ path = self._cache_dir.joinpath(bench_id + ".json")
+ if path.exists():
+ with path.open() as f:
+ return JobReport.from_dict(json.load(f))
+ return None
+
+ async def put(self, bench_id: str, job_report: JobReport) -> None:
+ assert self._cache_dir.exists()
+ path = self._cache_dir.joinpath(bench_id + ".json")
+ assert not path.exists()
+ with path.open("w") as f:
+ json.dump(
+ job_report.to_dict(),
+ f,
+ ensure_ascii=True,
+ indent=2
+ )
+
+ async def sync(self) -> None:
+ """
+ Synchronizes the local repo state with upstream. That means, pushes if some untracked or
+ modified files are in the local directory.
+ :return:
+ """
+ status = await git.status(self._repo_root_dir)
+ is_repo_dirty = len(status.modified) > 0 or len(status.added) > 0
+ if is_repo_dirty:
+ _logger.info("Untracked or modified files found in the repo: %s", self._repo_root_dir)
+ commit_msg = "Regenerate websites"
+ if len(status.modified) > 0:
+ _logger.debug("Modified files: %s", status.modified)
+ await git.add(self._repo_root_dir, status.modified)
+ if len(status.untracked) > 0:
+ _logger.debug("Untracked files: %s", status.untracked)
+ await git.add(self._repo_root_dir, status.untracked)
+ commit_msg += f" - Add {len(status.untracked)} new reports."
+ else:
+ commit_msg += "."
+ await git.commit(self._repo_root_dir, commit_msg)
+ await git.push(self._repo_root_dir)
+
+
+def _is_benchrun_id(name: str) -> bool:
+ return re.match(r"\d{9}", name) is not None
+
+
+
diff --git a/tools/performance/engine-benchmarks/bench_tool/requirements.txt b/tools/performance/engine-benchmarks/bench_tool/requirements.txt
new file mode 100644
index 000000000000..d9ac381d2764
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/requirements.txt
@@ -0,0 +1,3 @@
+
+Jinja2 == 3.1.2
+numpy == 1.24.2
diff --git a/tools/performance/engine-benchmarks/bench_tool/template_render.py b/tools/performance/engine-benchmarks/bench_tool/template_render.py
new file mode 100644
index 000000000000..57a626487111
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/template_render.py
@@ -0,0 +1,133 @@
+import logging
+from pathlib import Path
+from typing import List, Dict, Optional, Set
+
+import jinja2
+import numpy as np
+import pandas as pd
+
+from bench_tool import JobReport, TemplateBenchData, BenchDatapoint, ENSO_COMMIT_BASE_URL, JinjaData, \
+ JINJA_TEMPLATE, TEMPLATES_DIR
+from bench_tool.utils import parse_commit_timestamp
+
+_logger = logging.getLogger(__name__)
+
+
+def create_template_data(
+ job_reports_per_branch: Dict[str, List[JobReport]],
+ bench_labels: Set[str]) -> List[TemplateBenchData]:
+ """
+ Creates all the necessary data for the Jinja template from all collected
+ benchmark job reports.
+ :param job_reports_per_branch: Mapping of branch name to list of job reports.
+ job reports should be sorted by the commit date, otherwise the difference
+ between scores might be wrongly computed.
+ :param bench_labels:
+ :return:
+ """
+
+ def pct_to_str(score_diff_perc: float) -> str:
+ if not np.isnan(score_diff_perc):
+ buff = "+" if score_diff_perc > 0 else ""
+ buff += "{:.5f}".format(score_diff_perc * 100)
+ buff += "%"
+ return buff
+ else:
+ return "NaN"
+
+ def diff_str(score_diff: float, score_diff_perc: float) -> str:
+ if not np.isnan(score_diff):
+ diff_str = "+" if score_diff > 0 else ""
+ diff_str += "{:.5f}".format(score_diff)
+ diff_str += " ("
+ diff_str += pct_to_str(score_diff_perc)
+ diff_str += ")"
+ return diff_str
+ else:
+ return "NA"
+
+ template_bench_datas: List[TemplateBenchData] = []
+ for bench_label in bench_labels:
+ _logger.debug("Creating template data for benchmark %s", bench_label)
+ branch_datapoints: Dict[str, List[BenchDatapoint]] = {}
+ for branch, job_reports in job_reports_per_branch.items():
+ _logger.debug("Creating datapoints for branch %s from %d job reports",
+ branch, len(job_reports))
+ datapoints: List[BenchDatapoint] = []
+ for job_report in job_reports:
+ prev_datapoint: Optional[BenchDatapoint] = \
+ datapoints[-1] if len(datapoints) > 0 else None
+ if bench_label in job_report.label_score_dict:
+ score = job_report.label_score_dict[bench_label]
+ commit = job_report.bench_run.head_commit
+ timestamp = parse_commit_timestamp(commit)
+ commit_msg_header = \
+ commit.message.splitlines()[0].replace('"', "'")
+ series = pd.Series([
+ prev_datapoint.score if prev_datapoint else None,
+ score
+ ])
+ score_diff = series.diff()[1]
+ score_diff_perc = series.pct_change()[1]
+ tooltip = "score = " + str(score) + "\\n"
+ tooltip += "date = " + str(timestamp) + "\\n"
+ tooltip += "branch = " + branch + "\\n"
+ tooltip += "diff = " + diff_str(score_diff, score_diff_perc)
+ author_name = commit.author.name \
+ .replace('"', '\\"') \
+ .replace("'", "\\'")
+ datapoints.append(BenchDatapoint(
+ timestamp=timestamp,
+ score=score,
+ score_diff=str(score_diff),
+ score_diff_perc=pct_to_str(score_diff_perc),
+ tooltip=tooltip,
+ bench_run_url=job_report.bench_run.html_url,
+ commit_id=commit.id,
+ commit_msg=commit_msg_header,
+ commit_author=author_name,
+ commit_url=ENSO_COMMIT_BASE_URL + commit.id,
+ ))
+ _logger.debug("%d datapoints created for branch %s",
+ len(datapoints), branch)
+ branch_datapoints[branch] = datapoints.copy()
+ _logger.debug("Template data for benchmark %s created", bench_label)
+ template_bench_datas.append(TemplateBenchData(
+ id=_label_to_id(bench_label),
+ name=_label_to_name(bench_label),
+ branches_datapoints=branch_datapoints,
+ ))
+ return template_bench_datas
+
+
+def render_html(jinja_data: JinjaData, html_out: Path) -> None:
+ jinja_env = jinja2.Environment(
+ loader=jinja2.FileSystemLoader(TEMPLATES_DIR)
+ )
+ template_name = str(JINJA_TEMPLATE.name)
+ jinja_template = jinja_env.get_template(template_name)
+ generated_html = jinja_template.render(jinja_data.__dict__)
+ if html_out.exists():
+ _logger.info("%s already exist, rewriting", html_out)
+ with html_out.open("w") as html_file:
+ html_file.write(generated_html)
+
+
+def _label_to_id(label: str) -> str:
+ return label.replace(".", "_")
+
+
+def _label_to_name(label: str) -> str:
+ items = label.split(".")
+ assert len(items) >= 2
+ filtered_items = \
+ [item for item in items if item not in (
+ "org",
+ "enso",
+ "benchmark",
+ "benchmarks",
+ "semantic",
+ "interpreter",
+ "bench"
+ )]
+ return "_".join(filtered_items)
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
new file mode 100644
index 000000000000..0c45ba70ae05
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
@@ -0,0 +1,78 @@
+import json
+import unittest
+from datetime import datetime
+
+from bench_tool import ENGINE_BENCH_WORKFLOW_ID, JobReport, JobRun, Commit, \
+ Author
+from .bench_results import get_bench_report, get_bench_runs
+from .remote_cache import ReadonlyRemoteCache
+from .utils import parse_commit_timestamp, WithTempDir
+
+# A single ID for a benchmark run between 2023-05-01 and 2023-05-05
+# We know for sure that this workflow run is on the GH.
+BENCH_RUN_ID = "4888453297"
+
+sample_job_report = JobReport(
+ label_score_dict={
+ "test_label": 1.0
+ },
+ bench_run=JobRun(
+ id="123456789",
+ display_title="Test",
+ html_url="https://github.com/enso-org/enso/actions/runs/123456789",
+ run_attempt=1,
+ event="push",
+ head_commit=Commit(
+ id="a67297aebf6a094d1ad0b0d88cf7438dbf8bd8fe",
+ message="Test commit",
+ timestamp="2021-06-01T12:00:00Z",
+ author=Author(
+ name="Pavel Marek"
+ )
+ )
+ )
+)
+
+
+class TestBenchResults(unittest.IsolatedAsyncioTestCase):
+ def test_job_report_is_serializable(self):
+ s = json.dumps(sample_job_report.to_dict())
+ self.assertIsNotNone(s)
+ self.assertGreater(len(s), 0)
+
+ def test_job_report_is_deserializable(self):
+ d = sample_job_report.to_dict()
+ job_report = JobReport.from_dict(d)
+ self.assertEqual(sample_job_report, job_report)
+
+ async def test_get_bench_run(self):
+ """
+ Bench run does not need remote cache - it fetches just some metadata about GH artifacts.
+ :return:
+ """
+ since = datetime.fromisoformat("2023-05-01")
+ until = datetime.fromisoformat("2023-05-05")
+ bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
+ self.assertEqual(1, len(bench_runs))
+ # There is just a single bench run between 2023-05-01 and 2023-05-05
+ bench_run = bench_runs[0]
+ self.assertEqual(BENCH_RUN_ID, bench_run.id)
+ commit_ts = parse_commit_timestamp(bench_run.head_commit)
+ self.assertLess(since, commit_ts)
+ self.assertGreater(until, commit_ts)
+
+ async def test_get_bench_report(self):
+ # We choose an old date on purpose, so that the remote cache must be used, and is thus
+ # transitively tested.
+ since = datetime.fromisoformat("2023-05-01")
+ until = datetime.fromisoformat("2023-05-05")
+ bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
+ self.assertEqual(1, len(bench_runs))
+ bench_run = bench_runs[0]
+ remote_cache = ReadonlyRemoteCache()
+ with WithTempDir("test_get_bench_report") as temp_dir:
+ bench_report = await get_bench_report(bench_run, temp_dir, remote_cache)
+ self.assertIsNotNone(bench_report)
+ self.assertEqual(bench_run, bench_report.bench_run)
+ self.assertEqual(55, len(bench_report.label_score_dict))
+
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_gh.py b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
new file mode 100644
index 000000000000..1882390fd601
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
@@ -0,0 +1,34 @@
+import unittest
+
+from bench_tool import ENSO_REPO, Source
+from . import gh
+
+
+class TestGH(unittest.IsolatedAsyncioTestCase):
+ async def test_ensure_gh_installed(self):
+ self.assertIsNone(gh.ensure_gh_installed())
+
+ async def test_file_fetch(self):
+ content = await gh.fetch_file(ENSO_REPO, "README.md")
+ self.assertIsNotNone(content)
+ self.assertIsInstance(content, str)
+ self.assertGreater(len(content), 0)
+
+ async def test_fetch_non_existing_file(self):
+ content = await gh.fetch_file(ENSO_REPO, "non_existing_file")
+ self.assertIsNone(content)
+
+ async def test_wrong_gh_query_should_not_fail(self):
+ res = await gh.invoke_gh_api("non_existing_repo", "/non_existing_endpoint")
+ self.assertIsNone(res)
+
+ async def test_get_stdlib_bench_run(self):
+ # This bench run ID does not contain the "Runtime Benchmark Report" artifact name,
+ # but it is a successful run. There should be a special handling for this case
+ # https://github.com/enso-org/enso/actions/runs/7909011591
+ bench_run_id = "7909011591"
+ obj = await gh.invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run_id}/artifacts")
+ artifacts = obj["artifacts"]
+ stdlib_artifact_name = Source.STDLIB.artifact_names()[0]
+ self.assertEqual(1, len(artifacts))
+ self.assertEqual(stdlib_artifact_name, artifacts[0]["name"])
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_git.py b/tools/performance/engine-benchmarks/bench_tool/test_git.py
new file mode 100644
index 000000000000..61a635786a0f
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/test_git.py
@@ -0,0 +1,59 @@
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+
+from . import git
+
+
+class TestGit(unittest.IsolatedAsyncioTestCase):
+ def setUp(self):
+ self.repo_root = Path(tempfile.mkdtemp())
+
+ def tearDown(self):
+ shutil.rmtree(self.repo_root)
+
+ async def test_init(self):
+ await git.init(self.repo_root)
+ status = await git.status(self.repo_root)
+ self.assertEqual(0, len(status.added))
+ self.assertEqual(0, len(status.modified))
+ self.assertEqual(0, len(status.untracked))
+
+ async def test_add_file(self):
+ await git.init(self.repo_root)
+ self.repo_root.joinpath("README.md").write_text("Hello")
+ status = await git.status(self.repo_root)
+ self.assertEqual(1, len(status.untracked))
+
+ async def test_commit(self):
+ await git.init(self.repo_root)
+ self.repo_root.joinpath("README.md").write_text("Hello")
+ await git.add(self.repo_root, {"README.md"})
+ await git.commit(self.repo_root, "Initial commit")
+ status = await git.status(self.repo_root)
+ self.assertEqual(0, len(status.added))
+ self.assertEqual(0, len(status.modified))
+ self.assertEqual(0, len(status.untracked))
+
+ async def test_modify_file(self):
+ await git.init(self.repo_root)
+ self.repo_root.joinpath("README.md").write_text("Hello")
+ await git.add(self.repo_root, {"README.md"})
+ await git.commit(self.repo_root, "Initial commit")
+ self.repo_root.joinpath("README.md").write_text("Hello World")
+ status = await git.status(self.repo_root)
+ self.assertEqual(0, len(status.added))
+ self.assertEqual(1, len(status.modified))
+ self.assertEqual(0, len(status.untracked))
+
+ async def test_add_more_files(self):
+ await git.init(self.repo_root)
+ self.repo_root.joinpath("README.md").write_text("Hello")
+ self.repo_root.joinpath("pom.xml").write_text("")
+ status = await git.status(self.repo_root)
+ self.assertEqual(2, len(status.untracked))
+ await git.add(self.repo_root, {"README.md", "pom.xml"})
+ status = await git.status(self.repo_root)
+ self.assertEqual(2, len(status.added))
+
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
new file mode 100644
index 000000000000..18e046c12700
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
@@ -0,0 +1,114 @@
+import unittest
+from pathlib import Path
+
+from . import JobReport, JobRun, Commit, Author
+from .bench_results import fetch_job_reports
+from .remote_cache import ReadonlyRemoteCache, SyncRemoteCache
+
+
+sample_job_report = JobReport(
+ label_score_dict={
+ "test_label": 1.0
+ },
+ bench_run=JobRun(
+ id="123456789",
+ display_title="Test",
+ html_url="https://github.com/enso-org/enso/actions/runs/123456789",
+ run_attempt=1,
+ event="push",
+ head_commit=Commit(
+ id="a67297aebf6a094d1ad0b0d88cf7438dbf8bd8fe",
+ message="Test commit",
+ timestamp="2021-06-01T12:00:00Z",
+ author=Author(
+ name="Pavel Marek"
+ )
+ )
+ )
+)
+
+stdlib_bench_run = JobRun(
+ id='7879611014',
+ display_title='Benchmark Standard Libraries',
+ html_url='https://github.com/enso-org/enso/actions/runs/7879611014',
+ run_attempt=1,
+ event='schedule',
+ head_commit=Commit(
+ id='eb59b475f68146f03fc3cef1092ee56eaaa1600a',
+ author=Author(name='Radosław Waśko'),
+ timestamp='2024-02-12T19:04:13Z',
+ message='Write support for S3 (#8921)\n\n- Closes #8809'
+ )
+)
+
+
+class TestReadonlyRemoteCache(unittest.IsolatedAsyncioTestCase):
+ async def test_fetch_some_cache(self):
+ remote_cache = ReadonlyRemoteCache()
+ # This ID is definitelly in the cache
+ bench_id = "3686412302"
+ job_report = await remote_cache.fetch(bench_id)
+ self.assertIsNotNone(job_report)
+ self.assertEqual(1, job_report.bench_run.run_attempt)
+ self.assertEqual(bench_id, job_report.bench_run.id)
+ self.assertEqual("Jaroslav Tulach", job_report.bench_run.head_commit.author.name)
+
+ async def test_non_existing_cache_should_not_fail(self):
+ remote_cache = ReadonlyRemoteCache()
+ bench_id = "FOOOO BAR"
+ job_report = await remote_cache.fetch(bench_id)
+ self.assertIsNone(job_report)
+
+ async def test_put_job_report_into_cache(self):
+ remote_cache = ReadonlyRemoteCache()
+ bench_id = sample_job_report.bench_run.id
+ await remote_cache.put(bench_id, sample_job_report)
+ job_report = await remote_cache.fetch(bench_id)
+ self.assertIsNotNone(job_report)
+ self.assertEqual(bench_id, job_report.bench_run.id)
+
+ async def test_fetch_stdlib_report(self):
+ remote_cache = ReadonlyRemoteCache()
+ job_reports = await fetch_job_reports([stdlib_bench_run], remote_cache)
+ self.assertIsNotNone(job_reports)
+ self.assertEqual(1, len(job_reports))
+
+
+class TestSyncRemoteCache(unittest.IsolatedAsyncioTestCase):
+ LOCAL_REPO_ROOT = Path("/home/pavel/dev/engine-benchmark-results")
+
+ async def test_init_sync_remote_cache_from_local_repo(self):
+ if not self.LOCAL_REPO_ROOT.exists():
+ self.skipTest(f"Local repo {self.LOCAL_REPO_ROOT} does not exist")
+ remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT)
+ await remote_cache.initialize()
+ root_dir = remote_cache.repo_root_dir()
+ self.assertTrue(root_dir.exists())
+ self.assertTrue(root_dir.is_dir())
+ cache_dir = remote_cache.cache_dir()
+ self.assertTrue(cache_dir.exists())
+ self.assertTrue(cache_dir.is_dir())
+ self.assertTrue(remote_cache.engine_index_html().exists())
+ self.assertTrue(remote_cache.stdlib_index_html().exists())
+
+ async def test_clone_sync_remote_cache(self):
+ self.skipTest("TODO: Takes too long")
+ remote_cache = SyncRemoteCache()
+ await remote_cache.initialize()
+ root_dir = remote_cache.repo_root_dir()
+ self.assertTrue(root_dir.exists())
+ self.assertTrue(root_dir.is_dir())
+ cache_dir = remote_cache.cache_dir()
+ self.assertTrue(cache_dir.exists())
+ self.assertTrue(cache_dir.is_dir())
+ self.assertTrue(remote_cache.engine_index_html().exists())
+ self.assertTrue(remote_cache.stdlib_index_html().exists())
+
+ async def test_fetch_stdlib_report(self):
+ if not self.LOCAL_REPO_ROOT.exists():
+ self.skipTest(f"Local repo {self.LOCAL_REPO_ROOT} does not exist")
+ remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT)
+ await remote_cache.initialize()
+ job_reports = await fetch_job_reports([stdlib_bench_run], remote_cache)
+ self.assertIsNotNone(job_reports)
+ self.assertEqual(1, len(job_reports))
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py b/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py
new file mode 100644
index 000000000000..567533d4d5d2
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py
@@ -0,0 +1,31 @@
+import unittest
+from pathlib import Path
+from datetime import datetime
+
+from bench_tool import Source
+from bench_tool.remote_cache import SyncRemoteCache
+from bench_tool.utils import WithTempDir
+from bench_tool.website import generate_bench_website
+
+
+class TestWebsiteRegen(unittest.IsolatedAsyncioTestCase):
+ LOCAL_REPO_ROOT = Path("/home/pavel/dev/engine-benchmark-results")
+
+ async def test_engine_website_regen(self):
+ if not self.LOCAL_REPO_ROOT.exists():
+ self.skipTest(f"Local repo {self.LOCAL_REPO_ROOT} does not exist")
+ remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT)
+ # Pull the repo if necessary
+ await remote_cache.initialize()
+ since = datetime.fromisoformat("2023-02-01")
+ until = datetime.fromisoformat("2023-02-25")
+ with WithTempDir("test_engine_website_regen") as temp_dir:
+ temp_dir_path = Path(temp_dir)
+ html_out = temp_dir_path.joinpath("engine-benchs.html")
+ await generate_bench_website(Source.ENGINE, remote_cache, since, until, html_out)
+ self.assertTrue(html_out.exists())
+ self.assertGreater(
+ html_out.stat().st_size, 100 * 1024,
+ "The generated HTML file should have size bigger than 100 KB"
+ )
+ pass
diff --git a/tools/performance/engine-benchmarks/bench_tool/utils.py b/tools/performance/engine-benchmarks/bench_tool/utils.py
new file mode 100644
index 000000000000..0a04f0784a87
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/utils.py
@@ -0,0 +1,54 @@
+import logging
+import shutil
+import tempfile
+from datetime import datetime
+from typing import List, Set
+
+from bench_tool import JobReport, GH_DATE_FORMAT, Commit
+
+_logger = logging.getLogger(__name__)
+
+
+class WithTempDir:
+ def __init__(self, prefix: str):
+ self.prefix = prefix
+ self.temp_dir = None
+
+ def __enter__(self):
+ self.temp_dir = tempfile.mkdtemp(prefix=self.prefix)
+ return self.temp_dir
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+
+def gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]:
+ """
+ Iterates through all the job reports and gathers all the benchmark labels
+ found. Note that every job report can have a different set of benchmark labels.
+ :return: List of benchmark labels.
+ """
+ all_labels = set()
+ for job_report in job_reports:
+ for labels in job_report.label_score_dict.keys():
+ all_labels.add(labels)
+ return all_labels
+
+
+def parse_commit_timestamp(commit: Commit) -> datetime:
+ """ Parses the timestamp from the commit based on the GH's formatting. """
+ return datetime.strptime(commit.timestamp, GH_DATE_FORMAT)
+
+
+def sort_job_reports(
+ job_reports: List[JobReport]
+) -> None:
+ """
+ Sorts the job reports in place by the commit date.
+ :param job_reports:
+ :return:
+ """
+ def _get_timestamp(job_report: JobReport) -> datetime:
+ return parse_commit_timestamp(job_report.bench_run.head_commit)
+
+ job_reports.sort(key=lambda report: _get_timestamp(report))
diff --git a/tools/performance/engine-benchmarks/bench_tool/website.py b/tools/performance/engine-benchmarks/bench_tool/website.py
new file mode 100644
index 000000000000..57f6f6da29d3
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/website.py
@@ -0,0 +1,66 @@
+import logging
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import List, Dict, Set
+
+from bench_tool import JobRun, BRANCH_DEVELOP, Source, JobReport, TemplateBenchData, JinjaData
+from bench_tool.bench_results import get_bench_runs, fetch_job_reports
+from bench_tool.remote_cache import SyncRemoteCache
+from bench_tool.template_render import create_template_data, render_html
+from bench_tool.utils import sort_job_reports, gather_all_bench_labels
+
+_logger = logging.getLogger(__name__)
+
+
+async def generate_bench_website(
+ bench_source: Source,
+ remote_cache: SyncRemoteCache,
+ since: datetime,
+ until: datetime,
+ generated_html: Path
+) -> None:
+ """
+ Generates single `index.html` website with the benchmark results.
+
+ :param bench_source: Source of the benchmarks, either engine or stdlib
+ :param remote_cache: Remote cache used for fetching the job reports.
+ :param since: Date since when the benchmarks should be considered
+ :param until: Date until when the benchmarks should be considered
+ :param generated_html: Path to the generated HTML file
+ :return:
+ """
+ bench_runs: List[JobRun] = []
+ for workflow_id in bench_source.workflow_ids():
+ bench_runs.extend(
+ await get_bench_runs(since, until, BRANCH_DEVELOP, workflow_id)
+ )
+ assert len(bench_runs) > 0, "No benchmark runs found"
+
+ job_reports = await fetch_job_reports(bench_runs, remote_cache)
+ _logger.debug(f"Gathered {len(job_reports)} job reports")
+ assert len(job_reports) > 0, "No job reports found"
+
+ _logger.debug("Sorting job_reports by commit date")
+ sort_job_reports(job_reports)
+
+ all_bench_labels: Set[str] = gather_all_bench_labels(job_reports)
+ _logger.debug(f"Found {len(all_bench_labels)} unique benchmark labels")
+
+ job_reports_per_branch: Dict[str, List[JobReport]] = {
+ BRANCH_DEVELOP: job_reports
+ }
+ template_bench_datas: List[TemplateBenchData] = \
+ create_template_data(job_reports_per_branch, all_bench_labels)
+ template_bench_datas.sort(key=lambda data: data.id)
+
+ jinja_data = JinjaData(
+ since=since,
+ display_since=max(until - timedelta(days=30), since),
+ until=until,
+ bench_datas=template_bench_datas,
+ bench_source=bench_source,
+ branches=[BRANCH_DEVELOP],
+ timestamp=datetime.now()
+ )
+ _logger.debug(f"Rendering HTML to {generated_html}")
+ render_html(jinja_data, generated_html)
diff --git a/tools/performance/engine-benchmarks/templates/template_jinja.html b/tools/performance/engine-benchmarks/templates/template_jinja.html
index 97311c0d8af8..838f0968c368 100644
--- a/tools/performance/engine-benchmarks/templates/template_jinja.html
+++ b/tools/performance/engine-benchmarks/templates/template_jinja.html
@@ -280,7 +280,8 @@
- Generated by the bench_download.py
script.
+ Generated by the bench_download.py
script in
+ {{ timestamp }}
.
@@ -334,7 +335,10 @@
{% for bench_data in bench_datas %}
-
+
+
diff --git a/tools/performance/engine-benchmarks/website_regen.py b/tools/performance/engine-benchmarks/website_regen.py
new file mode 100644
index 000000000000..be408fa6f7eb
--- /dev/null
+++ b/tools/performance/engine-benchmarks/website_regen.py
@@ -0,0 +1,66 @@
+"""
+IMPORTANT NOTE: Should be run only on the CI!!
+
+This script regenerate the benchmark results website, hosted as GH web pages on the
+https://github.com/enso-org/engine-benchmark-results repo.
+"""
+import asyncio
+import logging
+from argparse import ArgumentParser
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+from bench_tool import Source
+from bench_tool.remote_cache import SyncRemoteCache
+from bench_tool.website import generate_bench_website
+
+# The inception date of the benchmarks, i.e., the date of the first benchmark run.
+ENGINE_SINCE = datetime.fromisoformat("2022-12-01")
+STDLIB_SINCE = datetime.fromisoformat("2023-08-22")
+
+_logger = logging.getLogger("website_regen")
+
+
+async def main():
+ arg_parser = ArgumentParser(description="Regenerate the benchmark results website")
+ arg_parser.add_argument("-v", "--verbose", action="store_true")
+ arg_parser.add_argument("-n", "--dry-run", action="store_true")
+ arg_parser.add_argument("--local-repo",
+ type=str,
+ help="Path to the local clone of the engine-benchmark-results repo")
+ args = arg_parser.parse_args()
+ dry_run: bool = args.dry_run
+ verbose: bool = args.verbose
+ local_repo: Optional[Path] = Path(args.local_repo) if args.local_repo else None
+ logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
+ _logger.debug(f"Args: dry_run={dry_run}, verbose={verbose}, local_repo={local_repo}")
+ remote_cache = SyncRemoteCache(local_repo)
+ _logger.info("Initializing the bench results repo, this might take some time")
+ await remote_cache.initialize()
+ _logger.info("Bench results repo initialized")
+
+ now = datetime.now()
+ engine_html_task = generate_bench_website(
+ Source.ENGINE,
+ remote_cache,
+ ENGINE_SINCE,
+ now,
+ remote_cache.engine_index_html()
+ )
+ stdlib_html_task = generate_bench_website(
+ Source.STDLIB,
+ remote_cache,
+ STDLIB_SINCE,
+ now,
+ remote_cache.stdlib_index_html()
+ )
+ await asyncio.gather(engine_html_task, stdlib_html_task)
+ if dry_run:
+ _logger.info("Dry-run, not syncing the remote cache")
+ else:
+ await remote_cache.sync()
+
+
+if __name__ == "__main__":
+ asyncio.run(main())