diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml new file mode 100644 index 000000000000..69d8d44f504f --- /dev/null +++ b/.github/workflows/bench-upload.yml @@ -0,0 +1,46 @@ +# This file is manually managed. It is used to upload benchmarks to to the +# https://github.com/enso-org/engine-benchmark-results repository. + +name: Benchmarks upload +on: + workflow_run: + workflows: ["Benchmark Engine", "Benchmark Standard Libraries"] + types: + - completed +jobs: + upload-benchmarks: + name: Upload benchmarks + runs-on: ubuntu-latest + steps: + - name: Checkout enso repository + uses: actions/checkout@v4 + with: + repository: enso-org/enso + path: enso + - name: Checkout engine-benchmark-results repository + uses: actions/checkout@v4 + with: + repository: enso-org/engine-benchmark-results + path: engine-benchmark-results + token: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }} + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + python3 \ + python3-jinja2 \ + python3-numpy \ + python3-pandas + sudo apt-get install -y gh + - name: Set up git + run: | + git config --global user.email "ci@enso.org" + git config --global user.name "Enso CI Bot" + - name: Upload benchmarks + run: | + cd enso/tools/performance/engine-benchmarks + python3 website_regen.py \ + -v \ + --local-repo ${{ github.workspace }}/engine-benchmark-results + env: + GITHUB_TOKEN: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }} diff --git a/tools/performance/engine-benchmarks/README.md b/tools/performance/engine-benchmarks/README.md index c37cfbe060af..3b7bba729079 100644 --- a/tools/performance/engine-benchmarks/README.md +++ b/tools/performance/engine-benchmarks/README.md @@ -4,6 +4,12 @@ This directory contains a python script `bench_download.py` for downloading Engine and stdlib benchmark results from GitHub, and `Engine_Benchs` Enso project for analysing the downloaded data. +Note that for convenience, there is `bench_tool` directory that is a Python +package. The `bench_download.py` script uses this package. + +To run all the Python tests for that package, run `python -m unittest` in this +directory. + Dependencies for `bench_download.py`: - python >= 3.7 diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py index 7743adb5627a..9aa6b3c081d4 100755 --- a/tools/performance/engine-benchmarks/bench_download.py +++ b/tools/performance/engine-benchmarks/bench_download.py @@ -45,451 +45,44 @@ - Used as a template engine for the HTML. """ +import sys + +from bench_tool.bench_results import get_bench_runs, fetch_job_reports +from bench_tool.remote_cache import ReadonlyRemoteCache +from bench_tool.utils import gather_all_bench_labels, sort_job_reports + +if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7): + print("ERROR: python version lower than 3.7") + exit(1) + import asyncio -import json import logging import logging.config -import math import os -import re import shutil -import subprocess -import sys import tempfile -import zipfile from argparse import ArgumentParser, RawDescriptionHelpFormatter from csv import DictWriter from datetime import datetime, timedelta -from enum import Enum from os import path -from typing import List, Dict, Optional, Any, Union, Set -from dataclasses import dataclass -import xml.etree.ElementTree as ET -from urllib.parse import urlencode +from typing import List, Dict, Optional, Set +from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, \ + JINJA_TEMPLATE, JobRun, JobReport, \ + TemplateBenchData, JinjaData, Source +from bench_tool.gh import ensure_gh_installed +from bench_tool.template_render import create_template_data, render_html -if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7): - print("ERROR: python version lower than 3.7") - exit(1) try: import pandas as pd import numpy as np import jinja2 except ModuleNotFoundError as err: print("ERROR: One of pandas, numpy, or jinja2 packages not installed", file=sys.stderr) + print("Install either with `pip install pandas numpy jinja2` or " + "with `apt-get install python3-pandas python3-numpy python3-jinja2`", file=sys.stderr) exit(1) -DATE_FORMAT = "%Y-%m-%d" -ENGINE_BENCH_WORKFLOW_ID = 29450898 -""" -Workflow ID of engine benchmarks, got via `gh api -'/repos/enso-org/enso/actions/workflows'`. -The name of the workflow is 'Benchmark Engine' -""" -NEW_ENGINE_BENCH_WORKFLOW_ID = 67075764 -""" -Workflow ID for 'Benchmark Engine' workflow, which is the new workflow -since 2023-08-22. -""" -STDLIBS_BENCH_WORKFLOW_ID = 66661001 -""" -Workflow ID of stdlibs benchmarks, got via `gh api -'/repos/enso-org/enso/actions/workflows'`. -The name is 'Benchmark Standard Libraries' -""" -GH_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ" -""" Date format as returned from responses in GH API""" -ENSO_COMMIT_BASE_URL = "https://github.com/enso-org/enso/commit/" -JINJA_TEMPLATE = "templates/template_jinja.html" -""" Path to the Jinja HTML template """ -TEMPLATES_DIR = "templates" -GENERATED_SITE_DIR = "generated_site" -GH_ARTIFACT_RETENTION_PERIOD = timedelta(days=90) - - -class Source(Enum): - ENGINE = "engine" - STDLIB = "stdlib" - - def workflow_ids(self) -> List[int]: - if self == Source.ENGINE: - return [ENGINE_BENCH_WORKFLOW_ID, NEW_ENGINE_BENCH_WORKFLOW_ID] - elif self == Source.STDLIB: - return [STDLIBS_BENCH_WORKFLOW_ID] - else: - raise ValueError(f"Unknown source {self}") - - -@dataclass -class Author: - name: str - - -@dataclass -class Commit: - """ Corresponds to the commit from GH API """ - id: str - author: Author - timestamp: str - message: str - - -@dataclass -class JobRun: - """ - Gathered via the GH API. Defines a single run of an Engine benchmark job. - """ - id: str - display_title: str - html_url: str - run_attempt: int - """ An event as defined by the GitHub API, for example 'push' or 'schedule' """ - event: str - head_commit: Commit - - -@dataclass -class JobReport: - """ - Gathered via the GH API - a report that is pushed as an aritfact to the job. - Contains a XML file with scores for all the benchmarks. - """ - label_score_dict: Dict[str, float] - """ A mapping of benchmark labels to their scores """ - bench_run: JobRun - - -@dataclass -class BenchmarkData: - """ - Data for a single benchmark compiled from all the job reports. - """ - - @dataclass - class Entry: - score: float - commit: Commit - bench_run_url: str - bench_run_event: str - - label: str - """ Label for the benchmark, as reported by org.enso.interpreter.bench.BenchmarksRunner """ - entries: List[Entry] - """ Entries sorted by timestamps """ - - -@dataclass -class BenchDatapoint: - """ - A single datapoint that will be on the chart. `timestamp` is on X axis, - `score` on Y axis, and the rest of the fields is used either for the tooltip, - or for the selection info. - """ - timestamp: datetime - score: float - score_diff: str - """ Difference of the score with previous datapoint, or NaN """ - score_diff_perc: str - tooltip: str - bench_run_url: str - commit_id: str - commit_msg: str - commit_author: str - commit_url: str - - -@dataclass -class TemplateBenchData: - """ Data for one benchmark label (with a unique name and ID) """ - id: str - """ ID of the benchmark, must not contain dots """ - name: str - """ Human readable name of the benchmark """ - branches_datapoints: Dict[str, List[BenchDatapoint]] - """ Mapping of branches to datapoints for that branch """ - - -@dataclass -class JinjaData: - bench_source: Source - bench_datas: List[TemplateBenchData] - branches: List[str] - since: datetime - until: datetime - display_since: datetime - """ The date from which all the datapoints are first displayed """ - - -def _parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun: - return JobRun( - id=str(obj["id"]), - html_url=obj["html_url"], - run_attempt=int(obj["run_attempt"]), - event=obj["event"], - display_title=obj["display_title"], - head_commit=Commit( - id=obj["head_commit"]["id"], - message=obj["head_commit"]["message"], - timestamp=obj["head_commit"]["timestamp"], - author=Author( - name=obj["head_commit"]["author"]["name"] - ) - ) - ) - - -def _parse_bench_report_from_json(obj: Dict[Any, Any]) -> JobReport: - return JobReport( - bench_run=_parse_bench_run_from_json(obj["bench_run"]), - label_score_dict=obj["label_score_dict"] - ) - - -def _bench_report_to_json(bench_report: JobReport) -> Dict[Any, Any]: - return { - "bench_run": { - "id": bench_report.bench_run.id, - "html_url": bench_report.bench_run.html_url, - "run_attempt": bench_report.bench_run.run_attempt, - "event": bench_report.bench_run.event, - "display_title": bench_report.bench_run.display_title, - "head_commit": { - "id": bench_report.bench_run.head_commit.id, - "message": bench_report.bench_run.head_commit.message, - "timestamp": bench_report.bench_run.head_commit.timestamp, - "author": { - "name": bench_report.bench_run.head_commit.author.name - } - } - }, - "label_score_dict": bench_report.label_score_dict - } - - -def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun) -> "JobReport": - logging.debug(f"Parsing BenchReport from {bench_report_xml_path}") - tree = ET.parse(bench_report_xml_path) - root = tree.getroot() - label_score_dict: Dict[str, float] = dict() - for cases in root: - assert cases.tag == "cases" - for case in cases: - assert case.tag == "case" - label = case.findtext("label").strip() - scores = case.find("scores") - scores_float = [float(score.text.strip()) for score in scores] - if len(scores_float) > 1: - logging.warning(f"More than one score for benchmark {label}, " - f"using the last one (the newest one).") - label_score_dict[label] = scores_float[len(scores_float) - 1] - return JobReport( - label_score_dict=label_score_dict, - bench_run=bench_run - ) - - -def _is_benchrun_id(name: str) -> bool: - return re.match("\d{9}", name) is not None - - -def _read_json(json_file: str) -> Dict[Any, Any]: - assert path.exists(json_file) and path.isfile(json_file) - with open(json_file, "r") as f: - return json.load(f) - - -async def _invoke_gh_api(endpoint: str, - query_params: Dict[str, str] = {}, - result_as_text: bool = True) -> Union[Dict[str, Any], bytes]: - urlencode(query_params) - cmd = [ - "gh", - "api", - f"/repos/enso-org/enso{endpoint}" + "?" + urlencode(query_params) - ] - logging.info(f"Starting subprocess `{' '.join(cmd)}`") - proc = await asyncio.create_subprocess_exec("gh", *cmd[1:], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - out, err = await proc.communicate() - logging.info(f"Finished subprocess `{' '.join(cmd)}`") - if proc.returncode != 0: - print("Command `" + " ".join(cmd) + "` FAILED with errcode " + str( - proc.returncode)) - print(err.decode()) - exit(proc.returncode) - if result_as_text: - return json.loads(out.decode()) - else: - return out - - -class Cache: - """ - Cache is a directory filled with json files that have name of format .json, and - in every json, there is `BenchReport` dataclass serialized. - """ - - def __init__(self, dirname: str): - assert path.exists(dirname) and path.isdir(dirname) - self._dir = dirname - # Keys are BenchRun ids - self._items: Dict[str, JobReport] = {} - for fname in os.listdir(dirname): - fname_without_ext, ext = path.splitext(fname) - if _is_benchrun_id(fname_without_ext) and ext == ".json": - logging.debug(f"Loading into cache from {fname}") - bench_report = _parse_bench_report_from_json( - _read_json(path.join(dirname, fname)) - ) - self._items[fname_without_ext] = bench_report - - def __len__(self) -> int: - return len(self._items) - - def __contains__(self, key: str) -> bool: - assert _is_benchrun_id(key) - return key in self._items - - def __getitem__(self, item: str) -> Optional[JobReport]: - if not _is_benchrun_id(item): - return None - else: - return self._items[item] - - def __setitem__(self, bench_run_id: str, bench_report: JobReport) -> None: - assert isinstance(bench_report, JobReport) - assert isinstance(bench_run_id, str) - assert _is_benchrun_id(bench_run_id) - self._items[bench_run_id] = bench_report - json_fname = path.join(self._dir, bench_run_id + ".json") - logging.debug(f"Putting {bench_run_id} into cache {json_fname}") - with open(json_fname, "w") as json_file: - json.dump( - _bench_report_to_json(bench_report), - json_file, - indent=2, - ensure_ascii=False - ) - - def __str__(self) -> str: - return str(self._items) - - def contains(self, bench_run_id: str) -> bool: - return bench_run_id in self._items - - -class FakeCache: - def __getitem__(self, item): - return None - - def __setitem__(self, key, value): - pass - - def __contains__(self, item): - return False - - def __len__(self): - return 0 - - -async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow_id: int) -> List[JobRun]: - """ - Fetches the list of all the job runs from the GH API for the specified `branch`. - """ - logging.info(f"Looking for all successful Engine benchmark workflow run " - f"actions from {since} to {until} for branch {branch} " - f"and workflow ID {workflow_id}") - query_fields = { - "branch": branch, - "status": "success", - "created": since.strftime(DATE_FORMAT) + ".." + until.strftime(DATE_FORMAT), - # Start with 1, just to determine the total count - "per_page": "1" - } - res = await _invoke_gh_api(f"/actions/workflows/{workflow_id}/runs", query_fields) - total_count = int(res["total_count"]) - per_page = 3 - logging.debug(f"Total count of all runs: {total_count} for workflow ID " - f"{workflow_id}. Will process {per_page} runs per page") - - async def get_and_parse_run(page: int, parsed_bench_runs) -> None: - _query_fields = query_fields.copy() - _query_fields["page"] = str(page) - res = await _invoke_gh_api(f"/actions/workflows/{workflow_id}/runs", _query_fields) - bench_runs_json = res["workflow_runs"] - _parsed_bench_runs = [_parse_bench_run_from_json(bench_run_json) - for bench_run_json in bench_runs_json] - parsed_bench_runs.extend(_parsed_bench_runs) - - # Now we know the total count, so we can fetch all the runs - query_fields["per_page"] = str(per_page) - num_queries = math.ceil(total_count / per_page) - parsed_bench_runs = [] - - tasks = [] - # Page is indexed from 1 - for page in range(1, num_queries + 1): - tasks.append(get_and_parse_run(page, parsed_bench_runs)) - await asyncio.gather(*tasks) - - return parsed_bench_runs - - -async def get_bench_report(bench_run: JobRun, cache: Cache, temp_dir: str) -> Optional[JobReport]: - """ - Extracts some data from the given bench_run, which was fetched via the GH API, - optionally getting it from the cache. - An artifact in GH can expire, in such case, returns None. - :param bench_run: - :param cache: - :param temp_dir: Used for downloading and unzipping artifacts. - :return: None if the corresponding artifact expired. - """ - if bench_run.id in cache: - logging.info(f"Getting bench run with ID {bench_run.id} from cache") - return cache[bench_run.id] - - # There might be multiple artifacts in the artifact list for a benchmark run - # We are looking for the one named 'Runtime Benchmark Report', which will - # be downloaded as a ZIP file. - obj: Dict[str, Any] = await _invoke_gh_api(f"/actions/runs/{bench_run.id}/artifacts") - artifacts = obj["artifacts"] - assert len(artifacts) == 1, "There should be exactly one artifact for a benchmark run" - bench_report_artifact = artifacts[0] - assert bench_report_artifact, "Benchmark Report artifact not found" - artifact_id = str(bench_report_artifact["id"]) - if bench_report_artifact["expired"]: - created_at = bench_report_artifact["created_at"] - updated_at = bench_report_artifact["updated_at"] - expires_at = bench_report_artifact["expires_at"] - logging.warning(f"Artifact with ID {artifact_id} from bench report {bench_run.id} has expired. " - f"created_at={created_at}, updated_at={updated_at}, expires_at={expires_at}") - return None - - # Get contents of the ZIP artifact file - artifact_ret = await _invoke_gh_api(f"/actions/artifacts/{artifact_id}/zip", result_as_text=False) - zip_file_name = os.path.join(temp_dir, artifact_id + ".zip") - logging.debug(f"Writing artifact ZIP content into {zip_file_name}") - with open(zip_file_name, "wb") as zip_file: - zip_file.write(artifact_ret) - - extracted_dirname = os.path.join(temp_dir, artifact_id) - if os.path.exists(extracted_dirname): - shutil.rmtree(extracted_dirname) - os.mkdir(extracted_dirname) - - logging.debug(f"Extracting {zip_file_name} into {extracted_dirname}") - zip_file = zipfile.ZipFile(zip_file_name, "r") - zip_file.extractall(extracted_dirname) - bench_report_xml = path.join(extracted_dirname, "bench-report.xml") - assert path.exists(bench_report_xml) - - bench_report_parsed = _parse_bench_report_from_xml(bench_report_xml, bench_run) - cache[bench_run.id] = bench_report_parsed - return bench_report_parsed - - CSV_FIELDNAMES = [ "label", "score", @@ -524,171 +117,9 @@ def write_bench_reports_to_csv(bench_reports: List[JobReport], csv_fname: str) - }) -def populate_cache(cache_dir: str) -> Cache: - """ - Initializes cache from `cache_dir`, if there are any items. - See docs of `Cache`. - - :param cache_dir: Path to the cache directory. Does not have to exist - :return: Populated cache. Might be empty. - """ - if not path.exists(cache_dir): - logging.info(f"No cache at {cache_dir}, creating the cache directory") - os.mkdir(cache_dir) - logging.debug(f"Initializing cache from {cache_dir}") - cache = Cache(cache_dir) - logging.debug(f"Cache populated with {len(cache)} items") - return cache - - -def create_template_data( - job_reports_per_branch: Dict[str, List[JobReport]], - bench_labels: Set[str]) -> List[TemplateBenchData]: - """ - Creates all the necessary data for the Jinja template from all collected - benchmark job reports. - :param job_reports_per_branch: Mapping of branch name to list of job reports. - job reports should be sorted by the commit date, otherwise the difference - between scores might be wrongly computed. - :param bench_labels: - :return: - """ - - def pct_to_str(score_diff_perc: float) -> str: - if not np.isnan(score_diff_perc): - buff = "+" if score_diff_perc > 0 else "" - buff += "{:.5f}".format(score_diff_perc * 100) - buff += "%" - return buff - else: - return "NaN" - - def diff_str(score_diff: float, score_diff_perc: float) -> str: - if not np.isnan(score_diff): - diff_str = "+" if score_diff > 0 else "" - diff_str += "{:.5f}".format(score_diff) - diff_str += " (" - diff_str += pct_to_str(score_diff_perc) - diff_str += ")" - return diff_str - else: - return "NA" - - template_bench_datas: List[TemplateBenchData] = [] - for bench_label in bench_labels: - logging.debug(f"Creating template data for benchmark {bench_label}") - branch_datapoints: Dict[str, List[BenchDatapoint]] = {} - for branch, job_reports in job_reports_per_branch.items(): - logging.debug(f"Creating datapoints for branch {branch} from {len(job_reports)} job reports") - datapoints: List[BenchDatapoint] = [] - for job_report in job_reports: - prev_datapoint: Optional[BenchDatapoint] = \ - datapoints[-1] if len(datapoints) > 0 else None - if bench_label in job_report.label_score_dict: - score = job_report.label_score_dict[bench_label] - commit = job_report.bench_run.head_commit - timestamp = datetime.strptime( - commit.timestamp, - GH_DATE_FORMAT - ) - commit_msg_header = \ - commit.message.splitlines()[0].replace('"', "'") - series = pd.Series([ - prev_datapoint.score if prev_datapoint else None, - score - ]) - score_diff = series.diff()[1] - score_diff_perc = series.pct_change()[1] - tooltip = "score = " + str(score) + "\\n" - tooltip += "date = " + str(timestamp) + "\\n" - tooltip += "branch = " + branch + "\\n" - tooltip += "diff = " + diff_str(score_diff, score_diff_perc) - author_name = commit.author.name\ - .replace('"', '\\"')\ - .replace("'", "\\'") - datapoints.append(BenchDatapoint( - timestamp=timestamp, - score=score, - score_diff=str(score_diff), - score_diff_perc=pct_to_str(score_diff_perc), - tooltip=tooltip, - bench_run_url=job_report.bench_run.html_url, - commit_id=commit.id, - commit_msg=commit_msg_header, - commit_author=author_name, - commit_url=ENSO_COMMIT_BASE_URL + commit.id, - )) - logging.debug(f"{len(datapoints)} datapoints created for branch {branch}") - branch_datapoints[branch] = datapoints.copy() - logging.debug(f"Template data for benchmark {bench_label} created") - template_bench_datas.append(TemplateBenchData( - id=_label_to_id(bench_label), - name=_label_to_name(bench_label), - branches_datapoints=branch_datapoints, - )) - return template_bench_datas - - -def _label_to_id(label: str) -> str: - return label.replace(".", "_") - - -def _label_to_name(label: str) -> str: - items = label.split(".") - assert len(items) >= 2 - filtered_items = \ - [item for item in items if item not in ( - "org", - "enso", - "benchmark", - "benchmarks", - "semantic", - "interpreter", - "bench" - )] - return "_".join(filtered_items) - - -def _gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]: - """ - Iterates through all the job reports and gathers all the benchmark labels - found. Note that every job report can have a different set of benchmark labels. - :return: List of benchmark labels. - """ - all_labels = set() - for job_report in job_reports: - for labels in job_report.label_score_dict.keys(): - all_labels.add(labels) - return all_labels - - -def render_html(jinja_data: JinjaData, template_file: str, html_out_fname: str) -> None: - jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(".")) - jinja_template = jinja_env.get_template(template_file) - generated_html = jinja_template.render(jinja_data.__dict__) - if path.exists(html_out_fname): - logging.info(f"{html_out_fname} already exist, rewritting") - with open(html_out_fname, "w") as html_file: - html_file.write(generated_html) - - -def ensure_gh_installed() -> None: - try: - out = subprocess.run(["gh", "--version"], check=True, capture_output=True) - if out.returncode != 0: - print("`gh` command not found - GH CLI utility is not installed. " - "See https://cli.github.com/", file=sys.stderr) - exit(1) - except subprocess.CalledProcessError: - print("`gh` command not found - GH CLI utility is not installed. " - "See https://cli.github.com/", file=sys.stderr) - exit(1) - - async def main(): default_since: datetime = (datetime.now() - timedelta(days=14)) default_until: datetime = datetime.now() - default_cache_dir = path.expanduser("~/.cache/enso_bench_download") default_csv_out = "Engine_Benchs/data/benchs.csv" date_format_help = DATE_FORMAT.replace("%", "%%") @@ -724,17 +155,6 @@ def _parse_bench_source(_bench_source: str) -> Source: help=f"The date until which the benchmark results will be gathered. " f"Format is {date_format_help}. " f"The default is today") - arg_parser.add_argument("--use-cache", - default=False, - metavar="(true|false)", - type=lambda input: True if input in ("true", "True") else False, - help="Whether the cache directory should be used. The default is False.") - arg_parser.add_argument("-c", "--cache", action="store", - default=default_cache_dir, - metavar="CACHE_DIR", - help=f"Cache directory. Makes sense only iff specified with --use-cache argument. " - f"The default is {default_cache_dir}. If there are any troubles with the " - f"cache, just do `rm -rf {default_cache_dir}`.") arg_parser.add_argument("-b", "--branches", action="store", nargs="+", default=["develop"], @@ -766,20 +186,17 @@ def _parse_bench_source(_bench_source: str) -> Source: since: datetime = args.since until: datetime = args.until - cache_dir: str = args.cache if not args.tmp_dir: temp_dir: str = tempfile.mkdtemp() else: temp_dir: str = args.tmp_dir - use_cache: bool = args.use_cache - assert cache_dir and temp_dir bench_source: Source = args.source csv_output: str = args.csv_output create_csv: bool = args.create_csv branches: List[str] = args.branches labels_override: Set[str] = args.labels - logging.debug(f"parsed args: since={since}, until={until}, cache_dir={cache_dir}, " - f"temp_dir={temp_dir}, use_cache={use_cache}, bench_source={bench_source}, " + logging.debug(f"parsed args: since={since}, until={until}, " + f"temp_dir={temp_dir}, bench_source={bench_source}, " f"csv_output={csv_output}, " f"create_csv={create_csv}, branches={branches}, " f"labels_override={labels_override}") @@ -789,22 +206,15 @@ def _parse_bench_source(_bench_source: str) -> Source: # If the user requires benchmarks for which artifacts are not retained # anymore, then cache should be used. min_since_without_cache = datetime.today() - GH_ARTIFACT_RETENTION_PERIOD - if not use_cache and since < min_since_without_cache: - logging.warning(f"The default GH artifact retention period is " + if since < min_since_without_cache: + logging.info(f"The default GH artifact retention period is " f"{GH_ARTIFACT_RETENTION_PERIOD.days} days. " f"This means that all the artifacts older than " f"{min_since_without_cache.date()} are expired." - f"The use_cache parameter is set to False, so no " - f"expired artifacts will be fetched.") - logging.warning(f"The `since` parameter is reset to " - f"{min_since_without_cache.date()} to prevent " - f"unnecessary GH API queries.") - since = min_since_without_cache - - if use_cache: - cache = populate_cache(cache_dir) - else: - cache = FakeCache() + f"The since date was set to {since}, so the remote cache is enabled, " + f"and the older artifacts will be fetched from the cache.") + + remote_cache = ReadonlyRemoteCache() bench_labels: Optional[Set[str]] = None """ Set of all gathered benchmark labels from all the job reports """ @@ -821,18 +231,7 @@ def _parse_bench_source(_bench_source: str) -> Source: f" until {until} for branch {branch}") exit(1) - job_reports: List[JobReport] = [] - - async def _process_report(_bench_run): - _job_report = await get_bench_report(_bench_run, cache, temp_dir) - if _job_report: - job_reports.append(_job_report) - - tasks = [] - for bench_run in bench_runs: - tasks.append(_process_report(bench_run)) - await asyncio.gather(*tasks) - + job_reports = await fetch_job_reports(bench_runs, remote_cache) logging.debug(f"Got {len(job_reports)} job reports for branch {branch}") if len(job_reports) == 0: print(f"There were 0 job_reports in the specified time interval, " @@ -841,14 +240,7 @@ async def _process_report(_bench_run): exit(1) logging.debug("Sorting job_reports by commit date") - - def _get_timestamp(job_report: JobReport) -> datetime: - return datetime.strptime( - job_report.bench_run.head_commit.timestamp, - GH_DATE_FORMAT - ) - - job_reports.sort(key=lambda report: _get_timestamp(report)) + sort_job_reports(job_reports) if create_csv: write_bench_reports_to_csv(job_reports, csv_output) @@ -858,7 +250,7 @@ def _get_timestamp(job_report: JobReport) -> datetime: # Gather all the benchmark labels from all the job reports if bench_labels is None: - all_bench_labels = _gather_all_bench_labels(job_reports) + all_bench_labels = gather_all_bench_labels(job_reports) if len(labels_override) > 0: logging.info(f"Subset of labels specified: {labels_override}") if not set(labels_override).issubset(all_bench_labels): @@ -883,6 +275,7 @@ def _get_timestamp(job_report: JobReport) -> datetime: bench_datas=template_bench_datas, bench_source=bench_source, branches=branches, + timestamp=datetime.now() ) # Render Jinja template with jinja_data @@ -890,10 +283,9 @@ def _get_timestamp(job_report: JobReport) -> datetime: os.mkdir(GENERATED_SITE_DIR) logging.debug(f"Rendering HTML from {JINJA_TEMPLATE} to {GENERATED_SITE_DIR}") - site_path = path.join(GENERATED_SITE_DIR, bench_source.value + "-benchs.html") + site_path = GENERATED_SITE_DIR.joinpath(bench_source.value + "-benchs.html") render_html( jinja_data, - JINJA_TEMPLATE, site_path ) logging.debug(f"Copying static site content from {TEMPLATES_DIR} to {GENERATED_SITE_DIR}") diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py new file mode 100644 index 000000000000..f3318a98c0c7 --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py @@ -0,0 +1,220 @@ +import os +from dataclasses import dataclass +from datetime import timedelta, datetime +from enum import Enum +from pathlib import Path +from typing import List, Dict, Any + + +def pkg_dir() -> Path: + """ Directory of this package """ + return Path(os.path.dirname(os.path.realpath(__file__))) + + +ENSO_REPO = "enso-org/enso" +BENCH_REPO = "enso-org/engine-benchmark-results" +BRANCH_DEVELOP = "develop" +DATE_FORMAT = "%Y-%m-%d" +GH_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ" +ENGINE_BENCH_WORKFLOW_ID = 29450898 +""" +Workflow ID of engine benchmarks, got via `gh api +'/repos/enso-org/enso/actions/workflows'`. +The name of the workflow is 'Benchmark Engine' +""" +NEW_ENGINE_BENCH_WORKFLOW_ID = 67075764 +""" +Workflow ID for 'Benchmark Engine' workflow, which is the new workflow +since 2023-08-22. +""" +STDLIBS_BENCH_WORKFLOW_ID = 66661001 +""" +Workflow ID of stdlibs benchmarks, got via `gh api +'/repos/enso-org/enso/actions/workflows'`. +The name is 'Benchmark Standard Libraries' +""" +""" Date format as returned from responses in GH API""" +ENSO_COMMIT_BASE_URL = "https://github.com/enso-org/enso/commit/" + +GH_ARTIFACT_RETENTION_PERIOD = timedelta(days=90) + +GENERATED_SITE_DIR = pkg_dir().parent.joinpath("generated_site") +TEMPLATES_DIR = pkg_dir().parent.joinpath("templates") +JINJA_TEMPLATE = TEMPLATES_DIR.joinpath("template_jinja.html") + +assert TEMPLATES_DIR.exists() +assert JINJA_TEMPLATE.exists() + + +class Source(Enum): + ENGINE = "engine" + STDLIB = "stdlib" + + def workflow_ids(self) -> List[int]: + if self == Source.ENGINE: + return [ENGINE_BENCH_WORKFLOW_ID, NEW_ENGINE_BENCH_WORKFLOW_ID] + elif self == Source.STDLIB: + return [STDLIBS_BENCH_WORKFLOW_ID] + else: + raise ValueError(f"Unknown source {self}") + + def artifact_names(self) -> List[str]: + if self == Source.ENGINE: + return ["Runtime Benchmark Report"] + elif self == Source.STDLIB: + return ["Enso JMH Benchmark Report"] + else: + raise ValueError(f"Unknown source {self}") + + +@dataclass +class Author: + name: str + + +@dataclass +class Commit: + """ Corresponds to the commit from GH API """ + id: str + author: Author + timestamp: str + message: str + + +@dataclass +class JobRun: + """ + Gathered via the GH API. Defines a single run of an Engine benchmark job. + """ + id: str + display_title: str + html_url: str + run_attempt: int + """ An event as defined by the GitHub API, for example 'push' or 'schedule' """ + event: str + head_commit: Commit + + @staticmethod + def from_dict(obj: Dict[Any, Any]) -> "JobRun": + return JobRun( + id=str(obj["id"]), + html_url=obj["html_url"], + run_attempt=int(obj["run_attempt"]), + event=obj["event"], + display_title=obj["display_title"], + head_commit=Commit( + id=obj["head_commit"]["id"], + message=obj["head_commit"]["message"], + timestamp=obj["head_commit"]["timestamp"], + author=Author( + name=obj["head_commit"]["author"]["name"] + ) + ) + ) + + def to_dict(self) -> Dict[Any, Any]: + return { + "id": self.id, + "html_url": self.html_url, + "run_attempt": self.run_attempt, + "event": self.event, + "display_title": self.display_title, + "head_commit": { + "id": self.head_commit.id, + "message": self.head_commit.message, + "timestamp": self.head_commit.timestamp, + "author": { + "name": self.head_commit.author.name + } + } + } + + +@dataclass +class JobReport: + """ + Gathered via the GH API - a report that is pushed as an aritfact to the job. + Contains a XML file with scores for all the benchmarks. + """ + label_score_dict: Dict[str, float] + """ A mapping of benchmark labels to their scores """ + bench_run: JobRun + + @staticmethod + def from_dict(obj: Dict[Any, Any]) -> "JobReport": + return JobReport( + bench_run=JobRun.from_dict(obj["bench_run"]), + label_score_dict=obj["label_score_dict"] + ) + + def to_dict(self) -> Dict[Any, Any]: + return { + "bench_run": self.bench_run.to_dict(), + "label_score_dict": self.label_score_dict + } + + +@dataclass +class BenchmarkData: + """ + Data for a single benchmark compiled from all the job reports. + """ + + @dataclass + class Entry: + score: float + commit: Commit + bench_run_url: str + bench_run_event: str + + label: str + """ Label for the benchmark, as reported by org.enso.interpreter.bench.BenchmarksRunner """ + entries: List[Entry] + """ Entries sorted by timestamps """ + + +@dataclass +class BenchDatapoint: + """ + A single datapoint that will be on the chart. `timestamp` is on X axis, + `score` on Y axis, and the rest of the fields is used either for the tooltip, + or for the selection info. + """ + timestamp: datetime + score: float + score_diff: str + """ Difference of the score with previous datapoint, or NaN """ + score_diff_perc: str + tooltip: str + bench_run_url: str + commit_id: str + commit_msg: str + commit_author: str + commit_url: str + + +@dataclass +class TemplateBenchData: + """ Data for one benchmark label (with a unique name and ID) """ + id: str + """ ID of the benchmark, must not contain dots """ + name: str + """ Human readable name of the benchmark """ + branches_datapoints: Dict[str, List[BenchDatapoint]] + """ Mapping of branches to datapoints for that branch """ + + +@dataclass +class JinjaData: + bench_source: Source + bench_datas: List[TemplateBenchData] + branches: List[str] + since: datetime + until: datetime + display_since: datetime + """ The date from which all the datapoints are first displayed """ + timestamp: datetime + """ The time when the website was generated """ + + + diff --git a/tools/performance/engine-benchmarks/bench_tool/bench_results.py b/tools/performance/engine-benchmarks/bench_tool/bench_results.py new file mode 100644 index 000000000000..5337203334c1 --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/bench_results.py @@ -0,0 +1,194 @@ +import asyncio +import logging +import math +import os +import shutil +import zipfile +from datetime import datetime +from os import path +from typing import List, Dict, Optional, Any +from xml.etree import ElementTree as ET + +from bench_tool import JobRun, DATE_FORMAT, ENSO_REPO, JobReport, Source +from bench_tool.gh import invoke_gh_api +from bench_tool.remote_cache import RemoteCache +from bench_tool.utils import WithTempDir + +ARTIFACT_ID = "Runtime Benchmark Report" + +_logger = logging.getLogger(__name__) + + +async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow_id: int) -> List[JobRun]: + """ + Fetches the list of all the SUCCESSFUL job runs from the GH API for the specified `branch`. + + :param since: The date from which the benchmark results will be gathered. + :param until: The date until which the benchmark results will be gathered. + :param branch: The branch for which the benchmark results will be gathered. + :param workflow_id: The ID of the workflow for which the benchmark results will be gathered. + """ + _logger.info(f"Looking for all successful Engine benchmark workflow run " + f"actions from {since} to {until} for branch {branch} " + f"and workflow ID {workflow_id}") + query_fields = { + "branch": branch, + "status": "success", + "created": since.strftime(DATE_FORMAT) + ".." + until.strftime(DATE_FORMAT), + # Start with 1, just to determine the total count + "per_page": "1" + } + res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", query_fields) + total_count = int(res["total_count"]) + per_page = 3 + _logger.debug(f"Total count of all runs: {total_count} for workflow ID " + f"{workflow_id}. Will process {per_page} runs per page") + + async def get_and_parse_run(page: int, parsed_bench_runs) -> None: + _query_fields = query_fields.copy() + _query_fields["page"] = str(page) + res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", _query_fields) + bench_runs_json = res["workflow_runs"] + _parsed_bench_runs = [JobRun.from_dict(bench_run_json) + for bench_run_json in bench_runs_json] + parsed_bench_runs.extend(_parsed_bench_runs) + + # Now we know the total count, so we can fetch all the runs + query_fields["per_page"] = str(per_page) + num_queries = math.ceil(total_count / per_page) + parsed_bench_runs = [] + + tasks = [] + # Page is indexed from 1 + for page in range(1, num_queries + 1): + tasks.append(get_and_parse_run(page, parsed_bench_runs)) + await asyncio.gather(*tasks) + + return parsed_bench_runs + + +async def fetch_job_reports( + bench_runs: List[JobRun], + remote_cache: RemoteCache +) -> List[JobReport]: + """ + Fetches all benchmark reports for the given benchmark runs. Benchmark runs are basically + just IDs of artifacts, and the reports are the actual benchmark results. These results are + either on the GH as artifacts, or are fetched from the cache if the artifact is expired. + All the runs are fetched in parallel. + :param bench_runs: + :param remote_cache: + :return: + """ + job_reports: List[JobReport] = [] + + async def _process_report(_bench_run: JobRun): + with WithTempDir("bench_download") as temp_dir: + _job_report = await get_bench_report(_bench_run, temp_dir, remote_cache) + if _job_report: + job_reports.append(_job_report) + + tasks = [] + for bench_run in bench_runs: + tasks.append(_process_report(bench_run)) + await asyncio.gather(*tasks) + return job_reports + + +def _known_artifact_names() -> List[str]: + return Source.STDLIB.artifact_names() + Source.ENGINE.artifact_names() + + +async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: RemoteCache) -> Optional[JobReport]: + """ + Extracts some data from the given bench_run, which was fetched via the GH API, + optionally getting it from the cache. + An artifact in GH can expire, in such case, returns None. + :param bench_run: + :param temp_dir: Used for downloading and unzipping artifacts. + :return: None if the corresponding artifact cannot be found, neither as a GH artifact, neither from the remote cache. + """ + assert os.path.exists(temp_dir) and os.path.isdir(temp_dir) + + # There might be multiple artifacts in the artifact list for a benchmark run + # We are looking for the one named 'Runtime Benchmark Report', which will + # be downloaded as a ZIP file. + obj: Dict[str, Any] = await invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run.id}/artifacts") + artifacts = obj["artifacts"] + artifacts_by_names = {artifact["name"]: artifact for artifact in artifacts} + # At this point, we don't know the source of the benchmark - either it is from + # Engine, or from stdlib. Thus, we don't know exactly which artifact name we + # are looking for. But we know, there must be exactly one of the artifact names. + bench_report_artifact = None + for known_name in _known_artifact_names(): + if known_name in artifacts_by_names: + bench_report_artifact = artifacts_by_names[known_name] + if bench_report_artifact is None: + _logger.warning(f"Bench run {bench_run.id} does not contain any of the known artifact names: " + f"{_known_artifact_names()}, but it is a successful run.") + return None + assert bench_report_artifact, "Benchmark Report artifact not found" + artifact_id = str(bench_report_artifact["id"]) + created_at = bench_report_artifact["created_at"] + updated_at = bench_report_artifact["updated_at"] + expires_at = bench_report_artifact["expires_at"] + is_expired = bench_report_artifact["expired"] + _logger.debug(f"Got artifact with ID {artifact_id}, from bench run {bench_run.id}: " + f"created_at={created_at}, updated_at={updated_at}, expires_at={expires_at}, " + f"is_expired={is_expired}") + + job_report = await remote_cache.fetch(bench_run.id) + if is_expired and job_report is None: + _logger.error( + f"Artifact {artifact_id} from bench run {bench_run.id} is expired, and it is not in the remote cache") + return None + if job_report: + _logger.debug(f"Got job report from the cache for {bench_run.id}") + return job_report + + assert not is_expired + + # Get contents of the ZIP artifact file + artifact_ret = await invoke_gh_api(ENSO_REPO, f"/actions/artifacts/{artifact_id}/zip", result_as_json=False) + zip_file_name = os.path.join(temp_dir, artifact_id + ".zip") + _logger.debug(f"Writing artifact ZIP content into {zip_file_name}") + with open(zip_file_name, "wb") as zip_file: + zip_file.write(artifact_ret) + + extracted_dirname = os.path.join(temp_dir, artifact_id) + if os.path.exists(extracted_dirname): + shutil.rmtree(extracted_dirname) + os.mkdir(extracted_dirname) + + _logger.debug(f"Extracting {zip_file_name} into {extracted_dirname}") + zip_file = zipfile.ZipFile(zip_file_name, "r") + zip_file.extractall(extracted_dirname) + bench_report_xml = path.join(extracted_dirname, "bench-report.xml") + assert path.exists(bench_report_xml) + + bench_report_parsed = _parse_bench_report_from_xml(bench_report_xml, bench_run) + await remote_cache.put(bench_run.id, bench_report_parsed) + return bench_report_parsed + + +def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun) -> "JobReport": + _logger.debug(f"Parsing BenchReport from {bench_report_xml_path}") + tree = ET.parse(bench_report_xml_path) + root = tree.getroot() + label_score_dict: Dict[str, float] = dict() + for cases in root: + assert cases.tag == "cases" + for case in cases: + assert case.tag == "case" + label = case.findtext("label").strip() + scores = case.find("scores") + scores_float = [float(score.text.strip()) for score in scores] + if len(scores_float) > 1: + _logger.warning(f"More than one score for benchmark {label}, " + f"using the last one (the newest one).") + label_score_dict[label] = scores_float[len(scores_float) - 1] + return JobReport( + label_score_dict=label_score_dict, + bench_run=bench_run + ) + diff --git a/tools/performance/engine-benchmarks/bench_tool/gh.py b/tools/performance/engine-benchmarks/bench_tool/gh.py new file mode 100644 index 000000000000..d8899e15b22d --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/gh.py @@ -0,0 +1,107 @@ +import asyncio +import base64 +import json +import logging +import subprocess +import sys +from typing import Dict, Optional, Union, Any +from urllib.parse import urlencode + +_logger = logging.getLogger(__name__) + +MAX_BACKOFF_SECONDS = 120 + + +def ensure_gh_installed() -> None: + try: + out = subprocess.run(["gh", "--version"], check=True, + capture_output=True) + if out.returncode != 0: + print("`gh` command not found - GH CLI utility is not installed. " + "See https://cli.github.com/", file=sys.stderr) + exit(1) + except subprocess.CalledProcessError: + print("`gh` command not found - GH CLI utility is not installed. " + "See https://cli.github.com/", file=sys.stderr) + exit(1) + + +async def invoke_gh_api( + repo: str, + endpoint: str, + query_params: Dict[str, str] = {}, + fields: Dict[str, str] = {}, + result_as_json: bool = True, + method: str = "GET", + backoff: int = 0, +) -> Optional[Union[Dict[str, Any], bytes]]: + """ + Invokes the GitHub API using the `gh` command line tool. + :param repo: Repository name in the form `owner/repo` + :param endpoint: Endpoint of the query. Must start with `/`. + :param query_params: Additional query parameters. + :param fields: Additional fields to be added to the query. add static + string parameters to the request payload. + :param result_as_json: If result should be parsed as JSON. + If false, the raw bytes are returned. + :param method: HTTP method to use, 'GET' by default. + :param backoff: Number of seconds to wait before retrying the request. + If higher than 0, it means that the request has already been retried, + try to do it again, with a higher backoff. + :return: None if the query fails + """ + assert endpoint.startswith("/") + if len(fields) > 0 and method != "POST": + raise ValueError("Fields can be used only with POST method") + urlencode(query_params) + cmd = [ + "gh", + "api", + "--method", method, + f"/repos/{repo}{endpoint}" + "?" + urlencode(query_params) + ] + for k, v in fields.items(): + cmd.append("-f") + cmd.append(f"{k}='{v}'") + if 0 < backoff <= MAX_BACKOFF_SECONDS: + _logger.debug(f"Backing off for {backoff} seconds") + await asyncio.sleep(backoff) + elif backoff > MAX_BACKOFF_SECONDS: + _logger.error(f"Backoff of {backoff} seconds is too high, giving up.") + return None + _logger.debug("Invoking gh API with `%s`", " ".join(cmd)) + proc = await asyncio.create_subprocess_exec("gh", *cmd[1:], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = await proc.communicate() + _logger.debug("Finished gh API `%s`", " ".join(cmd)) + if proc.returncode != 0: + # Special handling of rate limit exceeded - just try to make the + # request one more time after some backoff. + if "You have exceeded a secondary rate limit" in err.decode(): + new_backoff = 10 if backoff == 0 else backoff * 2 + _logger.warning(f"Trying to retry the request with a new backoff " + f"of {new_backoff} seconds.") + return await invoke_gh_api(repo, endpoint, query_params, fields, + result_as_json, method, new_backoff) + else: + _logger.error("Command `%s` FAILED with errcode %d", + " ".join(cmd), + proc.returncode) + _logger.error(" stdout: %s", out.decode()) + _logger.error(" stderr: %s", err.decode()) + return None + if result_as_json: + return json.loads(out.decode()) + else: + return out + + +async def fetch_file(repo: str, file_path: str) -> Optional[str]: + ret = await invoke_gh_api(repo, f"/contents/{file_path}", + result_as_json=True) + if ret is None: + _logger.warning("File %s not found in %s", file_path, repo) + return None + file_content = base64.b64decode(ret["content"]).decode() + return file_content diff --git a/tools/performance/engine-benchmarks/bench_tool/git.py b/tools/performance/engine-benchmarks/bench_tool/git.py new file mode 100644 index 000000000000..8e3529f05cd3 --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/git.py @@ -0,0 +1,119 @@ +import asyncio +import logging +import subprocess +from dataclasses import dataclass +from pathlib import Path +from typing import Set + +_logger = logging.getLogger(__name__) + + +@dataclass +class GitStatus: + modified: Set[str] + untracked: Set[str] + added: Set[str] + + +async def clone(repo: str, dest: Path) -> None: + _logger.debug("Cloning %s to %s", repo, dest) + dest_abs_path = str(dest.absolute()) + args = ["clone", f"git@github.com:{repo}.git", dest_abs_path] + proc = await asyncio.create_subprocess_exec("git", *args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = await proc.wait() + if ret != 0: + stdout, stderr = await proc.communicate() + out = stdout.decode() + stderr.decode() + raise RuntimeError(f"Failed to clone {repo}: {out}") + assert dest.exists() + + +async def pull(repo: Path) -> None: + _logger.debug("Pulling %s", repo) + # Avoid unnecessary merge commits by using `--ff-only` + args = ["pull", "--ff-only"] + proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = await proc.wait() + if ret != 0: + stdout, stderr = await proc.communicate() + out = stdout.decode() + stderr.decode() + raise RuntimeError(f"Failed to pull {repo}: {out}") + + +async def status(repo: Path) -> GitStatus: + assert repo.exists() + proc = await asyncio.create_subprocess_exec("git", "status", "--porcelain", cwd=repo, + stdout=subprocess.PIPE) + out, _ = await proc.communicate() + lines = out.decode().splitlines() + untracked: Set[str] = set() + modified: Set[str] = set() + added: Set[str] = set() + for line in lines: + line = line.strip() + if line.startswith("??"): + untracked.add(line.split()[1]) + elif line.startswith("M "): + modified.add(line.split()[1]) + elif line.startswith("A "): + added.add(line.split()[1]) + return GitStatus(modified, untracked, added) + + +async def add(repo: Path, files: Set[str]) -> None: + _logger.debug("Adding %s to %s", files, repo) + assert len(files) > 0 + args = ["add"] + list(files) + proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = await proc.wait() + if ret != 0: + out, err = await proc.communicate() + all_out = out.decode() + err.decode() + raise RuntimeError(f"Failed to add {files} to {repo}. Output: {all_out}") + + +async def commit(repo: Path, msg: str) -> None: + _logger.debug("Committing %s with message '%s'", repo, msg) + stat = await status(repo) + assert len(stat.added) > 0 or len(stat.modified) > 0 + args = ["commit", "-m", msg] + proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = await proc.wait() + if ret != 0: + out, err = await proc.communicate() + all_out = out.decode() + err.decode() + raise RuntimeError(f"Failed to commit {repo}. Output: {all_out}") + + +async def push(repo: Path) -> None: + _logger.debug("Pushing to %s", repo) + args = ["push"] + proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = await proc.wait() + if ret != 0: + out, err = await proc.communicate() + all_out = out.decode() + err.decode() + raise RuntimeError(f"Failed to push {repo}. Output: {all_out}") + + +async def init(repo: Path) -> None: + _logger.debug("Initializing git repo in %s", repo) + assert repo.exists() + args = ["init"] + proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = await proc.wait() + if ret != 0: + out, err = await proc.communicate() + all_out = out.decode() + err.decode() + raise RuntimeError(f"Failed to init {repo}. Output: {all_out}") + + +async def head_commit(repo: Path) -> str: + args = ["rev-parse", "HEAD"] + proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = await proc.wait() + out, err = await proc.communicate() + if ret != 0: + raise RuntimeError(f"Failed to get HEAD commit of {repo}: {err.decode()}") + else: + return out.decode().strip() diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py new file mode 100644 index 000000000000..b6627fb639cd --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py @@ -0,0 +1,174 @@ +""" +A remote cache is located inhttps://github.com/enso-org/engine-benchmark-results/tree/main/cache. +It is just a bunch of JSON files, each representing a single job report. +""" +import abc +import json +import logging +import os +import re +import tempfile +from pathlib import Path +from typing import Dict, Optional + +from . import gh, JobReport, BENCH_REPO, git + +_logger = logging.getLogger(__name__) + +CACHE_REMOTE_DIR = "cache" +ENGINE_INDEX_HTML = "engine-benchs.html" +STDLIB_INDEX_HTML = "stdlib-benchs.html" + + +class RemoteCache(abc.ABC): + + @abc.abstractmethod + async def fetch(self, bench_id: str) -> Optional[JobReport]: + """ + Fetches a job report for the given bench ID from the remote cache + :param bench_id: + :return: None if the report does not exist + """ + raise NotImplementedError + + @abc.abstractmethod + async def put(self, bench_id: str, job_report: JobReport) -> None: + """ + Puts a job report to the remote cache, or to the internal data structures. + :param bench_id: + :param job_report: + :return: + """ + raise NotImplementedError + + +class ReadonlyRemoteCache(RemoteCache): + """ + Only fetches the artifacts from the remote cache, does not push anything. + """ + + def __init__(self): + self._fetched_items: Dict[str, JobReport] = {} + + async def fetch(self, bench_id: str) -> Optional[JobReport]: + """ Fetches a job report for the given bench ID from the remote cache """ + if bench_id in self._fetched_items: + return self._fetched_items[bench_id] + if not _is_benchrun_id(bench_id): + _logger.warning("Invalid bench ID: %s", bench_id) + return None + remote_path = self._get_remote_path(bench_id) + _logger.debug("Fetching cache from %s", remote_path) + content = await gh.fetch_file(BENCH_REPO, remote_path) + if content is None: + _logger.warning("Cache not found for %s", bench_id) + return None + bench_report = JobReport.from_dict( + json.loads(content) + ) + assert bench_id not in self._fetched_items + self._fetched_items[bench_id] = bench_report + return bench_report + + async def put(self, bench_id: str, job_report: JobReport) -> None: + assert _is_benchrun_id(bench_id) + assert bench_id not in self._fetched_items + self._fetched_items[bench_id] = job_report + + def _get_remote_path(self, bench_id: str) -> str: + assert _is_benchrun_id(bench_id) + return os.path.join(CACHE_REMOTE_DIR, bench_id + ".json") + + +class SyncRemoteCache(RemoteCache): + """ + Fetches and pushes the artifacts to the remote cache. Needs a write permissions to the repo. + """ + + def __init__(self, local_root_dir: Optional[Path] = None): + if local_root_dir is not None: + assert local_root_dir.exists() + assert local_root_dir.is_dir() + assert local_root_dir.joinpath(".git").exists() + self._repo_root_dir = local_root_dir + self._should_clone = False + else: + self._repo_root_dir = Path(tempfile.mkdtemp(prefix="bench_tool_remote_cache")) + self._should_clone = True + assert self._repo_root_dir.exists() + assert self._repo_root_dir.is_dir() + self._cache_dir = self._repo_root_dir.joinpath(CACHE_REMOTE_DIR) + + def repo_root_dir(self) -> Path: + return self._repo_root_dir + + def cache_dir(self) -> Path: + return self._cache_dir + + def engine_index_html(self) -> Path: + return self._repo_root_dir.joinpath(ENGINE_INDEX_HTML) + + def stdlib_index_html(self) -> Path: + return self._repo_root_dir.joinpath(STDLIB_INDEX_HTML) + + async def initialize(self) -> None: + """ + Make sure the repo is up-to-date + :return: + """ + if self._should_clone: + await git.clone(BENCH_REPO, self._repo_root_dir) + else: + await git.pull(self._repo_root_dir) + assert self._repo_root_dir.exists() + assert self._cache_dir.exists() + + async def fetch(self, bench_id: str) -> Optional[JobReport]: + assert self._cache_dir.exists() + path = self._cache_dir.joinpath(bench_id + ".json") + if path.exists(): + with path.open() as f: + return JobReport.from_dict(json.load(f)) + return None + + async def put(self, bench_id: str, job_report: JobReport) -> None: + assert self._cache_dir.exists() + path = self._cache_dir.joinpath(bench_id + ".json") + assert not path.exists() + with path.open("w") as f: + json.dump( + job_report.to_dict(), + f, + ensure_ascii=True, + indent=2 + ) + + async def sync(self) -> None: + """ + Synchronizes the local repo state with upstream. That means, pushes if some untracked or + modified files are in the local directory. + :return: + """ + status = await git.status(self._repo_root_dir) + is_repo_dirty = len(status.modified) > 0 or len(status.added) > 0 + if is_repo_dirty: + _logger.info("Untracked or modified files found in the repo: %s", self._repo_root_dir) + commit_msg = "Regenerate websites" + if len(status.modified) > 0: + _logger.debug("Modified files: %s", status.modified) + await git.add(self._repo_root_dir, status.modified) + if len(status.untracked) > 0: + _logger.debug("Untracked files: %s", status.untracked) + await git.add(self._repo_root_dir, status.untracked) + commit_msg += f" - Add {len(status.untracked)} new reports." + else: + commit_msg += "." + await git.commit(self._repo_root_dir, commit_msg) + await git.push(self._repo_root_dir) + + +def _is_benchrun_id(name: str) -> bool: + return re.match(r"\d{9}", name) is not None + + + diff --git a/tools/performance/engine-benchmarks/bench_tool/requirements.txt b/tools/performance/engine-benchmarks/bench_tool/requirements.txt new file mode 100644 index 000000000000..d9ac381d2764 --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/requirements.txt @@ -0,0 +1,3 @@ + +Jinja2 == 3.1.2 +numpy == 1.24.2 diff --git a/tools/performance/engine-benchmarks/bench_tool/template_render.py b/tools/performance/engine-benchmarks/bench_tool/template_render.py new file mode 100644 index 000000000000..57a626487111 --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/template_render.py @@ -0,0 +1,133 @@ +import logging +from pathlib import Path +from typing import List, Dict, Optional, Set + +import jinja2 +import numpy as np +import pandas as pd + +from bench_tool import JobReport, TemplateBenchData, BenchDatapoint, ENSO_COMMIT_BASE_URL, JinjaData, \ + JINJA_TEMPLATE, TEMPLATES_DIR +from bench_tool.utils import parse_commit_timestamp + +_logger = logging.getLogger(__name__) + + +def create_template_data( + job_reports_per_branch: Dict[str, List[JobReport]], + bench_labels: Set[str]) -> List[TemplateBenchData]: + """ + Creates all the necessary data for the Jinja template from all collected + benchmark job reports. + :param job_reports_per_branch: Mapping of branch name to list of job reports. + job reports should be sorted by the commit date, otherwise the difference + between scores might be wrongly computed. + :param bench_labels: + :return: + """ + + def pct_to_str(score_diff_perc: float) -> str: + if not np.isnan(score_diff_perc): + buff = "+" if score_diff_perc > 0 else "" + buff += "{:.5f}".format(score_diff_perc * 100) + buff += "%" + return buff + else: + return "NaN" + + def diff_str(score_diff: float, score_diff_perc: float) -> str: + if not np.isnan(score_diff): + diff_str = "+" if score_diff > 0 else "" + diff_str += "{:.5f}".format(score_diff) + diff_str += " (" + diff_str += pct_to_str(score_diff_perc) + diff_str += ")" + return diff_str + else: + return "NA" + + template_bench_datas: List[TemplateBenchData] = [] + for bench_label in bench_labels: + _logger.debug("Creating template data for benchmark %s", bench_label) + branch_datapoints: Dict[str, List[BenchDatapoint]] = {} + for branch, job_reports in job_reports_per_branch.items(): + _logger.debug("Creating datapoints for branch %s from %d job reports", + branch, len(job_reports)) + datapoints: List[BenchDatapoint] = [] + for job_report in job_reports: + prev_datapoint: Optional[BenchDatapoint] = \ + datapoints[-1] if len(datapoints) > 0 else None + if bench_label in job_report.label_score_dict: + score = job_report.label_score_dict[bench_label] + commit = job_report.bench_run.head_commit + timestamp = parse_commit_timestamp(commit) + commit_msg_header = \ + commit.message.splitlines()[0].replace('"', "'") + series = pd.Series([ + prev_datapoint.score if prev_datapoint else None, + score + ]) + score_diff = series.diff()[1] + score_diff_perc = series.pct_change()[1] + tooltip = "score = " + str(score) + "\\n" + tooltip += "date = " + str(timestamp) + "\\n" + tooltip += "branch = " + branch + "\\n" + tooltip += "diff = " + diff_str(score_diff, score_diff_perc) + author_name = commit.author.name \ + .replace('"', '\\"') \ + .replace("'", "\\'") + datapoints.append(BenchDatapoint( + timestamp=timestamp, + score=score, + score_diff=str(score_diff), + score_diff_perc=pct_to_str(score_diff_perc), + tooltip=tooltip, + bench_run_url=job_report.bench_run.html_url, + commit_id=commit.id, + commit_msg=commit_msg_header, + commit_author=author_name, + commit_url=ENSO_COMMIT_BASE_URL + commit.id, + )) + _logger.debug("%d datapoints created for branch %s", + len(datapoints), branch) + branch_datapoints[branch] = datapoints.copy() + _logger.debug("Template data for benchmark %s created", bench_label) + template_bench_datas.append(TemplateBenchData( + id=_label_to_id(bench_label), + name=_label_to_name(bench_label), + branches_datapoints=branch_datapoints, + )) + return template_bench_datas + + +def render_html(jinja_data: JinjaData, html_out: Path) -> None: + jinja_env = jinja2.Environment( + loader=jinja2.FileSystemLoader(TEMPLATES_DIR) + ) + template_name = str(JINJA_TEMPLATE.name) + jinja_template = jinja_env.get_template(template_name) + generated_html = jinja_template.render(jinja_data.__dict__) + if html_out.exists(): + _logger.info("%s already exist, rewriting", html_out) + with html_out.open("w") as html_file: + html_file.write(generated_html) + + +def _label_to_id(label: str) -> str: + return label.replace(".", "_") + + +def _label_to_name(label: str) -> str: + items = label.split(".") + assert len(items) >= 2 + filtered_items = \ + [item for item in items if item not in ( + "org", + "enso", + "benchmark", + "benchmarks", + "semantic", + "interpreter", + "bench" + )] + return "_".join(filtered_items) diff --git a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py new file mode 100644 index 000000000000..0c45ba70ae05 --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py @@ -0,0 +1,78 @@ +import json +import unittest +from datetime import datetime + +from bench_tool import ENGINE_BENCH_WORKFLOW_ID, JobReport, JobRun, Commit, \ + Author +from .bench_results import get_bench_report, get_bench_runs +from .remote_cache import ReadonlyRemoteCache +from .utils import parse_commit_timestamp, WithTempDir + +# A single ID for a benchmark run between 2023-05-01 and 2023-05-05 +# We know for sure that this workflow run is on the GH. +BENCH_RUN_ID = "4888453297" + +sample_job_report = JobReport( + label_score_dict={ + "test_label": 1.0 + }, + bench_run=JobRun( + id="123456789", + display_title="Test", + html_url="https://github.com/enso-org/enso/actions/runs/123456789", + run_attempt=1, + event="push", + head_commit=Commit( + id="a67297aebf6a094d1ad0b0d88cf7438dbf8bd8fe", + message="Test commit", + timestamp="2021-06-01T12:00:00Z", + author=Author( + name="Pavel Marek" + ) + ) + ) +) + + +class TestBenchResults(unittest.IsolatedAsyncioTestCase): + def test_job_report_is_serializable(self): + s = json.dumps(sample_job_report.to_dict()) + self.assertIsNotNone(s) + self.assertGreater(len(s), 0) + + def test_job_report_is_deserializable(self): + d = sample_job_report.to_dict() + job_report = JobReport.from_dict(d) + self.assertEqual(sample_job_report, job_report) + + async def test_get_bench_run(self): + """ + Bench run does not need remote cache - it fetches just some metadata about GH artifacts. + :return: + """ + since = datetime.fromisoformat("2023-05-01") + until = datetime.fromisoformat("2023-05-05") + bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID) + self.assertEqual(1, len(bench_runs)) + # There is just a single bench run between 2023-05-01 and 2023-05-05 + bench_run = bench_runs[0] + self.assertEqual(BENCH_RUN_ID, bench_run.id) + commit_ts = parse_commit_timestamp(bench_run.head_commit) + self.assertLess(since, commit_ts) + self.assertGreater(until, commit_ts) + + async def test_get_bench_report(self): + # We choose an old date on purpose, so that the remote cache must be used, and is thus + # transitively tested. + since = datetime.fromisoformat("2023-05-01") + until = datetime.fromisoformat("2023-05-05") + bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID) + self.assertEqual(1, len(bench_runs)) + bench_run = bench_runs[0] + remote_cache = ReadonlyRemoteCache() + with WithTempDir("test_get_bench_report") as temp_dir: + bench_report = await get_bench_report(bench_run, temp_dir, remote_cache) + self.assertIsNotNone(bench_report) + self.assertEqual(bench_run, bench_report.bench_run) + self.assertEqual(55, len(bench_report.label_score_dict)) + diff --git a/tools/performance/engine-benchmarks/bench_tool/test_gh.py b/tools/performance/engine-benchmarks/bench_tool/test_gh.py new file mode 100644 index 000000000000..1882390fd601 --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/test_gh.py @@ -0,0 +1,34 @@ +import unittest + +from bench_tool import ENSO_REPO, Source +from . import gh + + +class TestGH(unittest.IsolatedAsyncioTestCase): + async def test_ensure_gh_installed(self): + self.assertIsNone(gh.ensure_gh_installed()) + + async def test_file_fetch(self): + content = await gh.fetch_file(ENSO_REPO, "README.md") + self.assertIsNotNone(content) + self.assertIsInstance(content, str) + self.assertGreater(len(content), 0) + + async def test_fetch_non_existing_file(self): + content = await gh.fetch_file(ENSO_REPO, "non_existing_file") + self.assertIsNone(content) + + async def test_wrong_gh_query_should_not_fail(self): + res = await gh.invoke_gh_api("non_existing_repo", "/non_existing_endpoint") + self.assertIsNone(res) + + async def test_get_stdlib_bench_run(self): + # This bench run ID does not contain the "Runtime Benchmark Report" artifact name, + # but it is a successful run. There should be a special handling for this case + # https://github.com/enso-org/enso/actions/runs/7909011591 + bench_run_id = "7909011591" + obj = await gh.invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run_id}/artifacts") + artifacts = obj["artifacts"] + stdlib_artifact_name = Source.STDLIB.artifact_names()[0] + self.assertEqual(1, len(artifacts)) + self.assertEqual(stdlib_artifact_name, artifacts[0]["name"]) diff --git a/tools/performance/engine-benchmarks/bench_tool/test_git.py b/tools/performance/engine-benchmarks/bench_tool/test_git.py new file mode 100644 index 000000000000..61a635786a0f --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/test_git.py @@ -0,0 +1,59 @@ +import shutil +import tempfile +import unittest +from pathlib import Path + +from . import git + + +class TestGit(unittest.IsolatedAsyncioTestCase): + def setUp(self): + self.repo_root = Path(tempfile.mkdtemp()) + + def tearDown(self): + shutil.rmtree(self.repo_root) + + async def test_init(self): + await git.init(self.repo_root) + status = await git.status(self.repo_root) + self.assertEqual(0, len(status.added)) + self.assertEqual(0, len(status.modified)) + self.assertEqual(0, len(status.untracked)) + + async def test_add_file(self): + await git.init(self.repo_root) + self.repo_root.joinpath("README.md").write_text("Hello") + status = await git.status(self.repo_root) + self.assertEqual(1, len(status.untracked)) + + async def test_commit(self): + await git.init(self.repo_root) + self.repo_root.joinpath("README.md").write_text("Hello") + await git.add(self.repo_root, {"README.md"}) + await git.commit(self.repo_root, "Initial commit") + status = await git.status(self.repo_root) + self.assertEqual(0, len(status.added)) + self.assertEqual(0, len(status.modified)) + self.assertEqual(0, len(status.untracked)) + + async def test_modify_file(self): + await git.init(self.repo_root) + self.repo_root.joinpath("README.md").write_text("Hello") + await git.add(self.repo_root, {"README.md"}) + await git.commit(self.repo_root, "Initial commit") + self.repo_root.joinpath("README.md").write_text("Hello World") + status = await git.status(self.repo_root) + self.assertEqual(0, len(status.added)) + self.assertEqual(1, len(status.modified)) + self.assertEqual(0, len(status.untracked)) + + async def test_add_more_files(self): + await git.init(self.repo_root) + self.repo_root.joinpath("README.md").write_text("Hello") + self.repo_root.joinpath("pom.xml").write_text("") + status = await git.status(self.repo_root) + self.assertEqual(2, len(status.untracked)) + await git.add(self.repo_root, {"README.md", "pom.xml"}) + status = await git.status(self.repo_root) + self.assertEqual(2, len(status.added)) + diff --git a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py new file mode 100644 index 000000000000..18e046c12700 --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py @@ -0,0 +1,114 @@ +import unittest +from pathlib import Path + +from . import JobReport, JobRun, Commit, Author +from .bench_results import fetch_job_reports +from .remote_cache import ReadonlyRemoteCache, SyncRemoteCache + + +sample_job_report = JobReport( + label_score_dict={ + "test_label": 1.0 + }, + bench_run=JobRun( + id="123456789", + display_title="Test", + html_url="https://github.com/enso-org/enso/actions/runs/123456789", + run_attempt=1, + event="push", + head_commit=Commit( + id="a67297aebf6a094d1ad0b0d88cf7438dbf8bd8fe", + message="Test commit", + timestamp="2021-06-01T12:00:00Z", + author=Author( + name="Pavel Marek" + ) + ) + ) +) + +stdlib_bench_run = JobRun( + id='7879611014', + display_title='Benchmark Standard Libraries', + html_url='https://github.com/enso-org/enso/actions/runs/7879611014', + run_attempt=1, + event='schedule', + head_commit=Commit( + id='eb59b475f68146f03fc3cef1092ee56eaaa1600a', + author=Author(name='Radosław Waśko'), + timestamp='2024-02-12T19:04:13Z', + message='Write support for S3 (#8921)\n\n- Closes #8809' + ) +) + + +class TestReadonlyRemoteCache(unittest.IsolatedAsyncioTestCase): + async def test_fetch_some_cache(self): + remote_cache = ReadonlyRemoteCache() + # This ID is definitelly in the cache + bench_id = "3686412302" + job_report = await remote_cache.fetch(bench_id) + self.assertIsNotNone(job_report) + self.assertEqual(1, job_report.bench_run.run_attempt) + self.assertEqual(bench_id, job_report.bench_run.id) + self.assertEqual("Jaroslav Tulach", job_report.bench_run.head_commit.author.name) + + async def test_non_existing_cache_should_not_fail(self): + remote_cache = ReadonlyRemoteCache() + bench_id = "FOOOO BAR" + job_report = await remote_cache.fetch(bench_id) + self.assertIsNone(job_report) + + async def test_put_job_report_into_cache(self): + remote_cache = ReadonlyRemoteCache() + bench_id = sample_job_report.bench_run.id + await remote_cache.put(bench_id, sample_job_report) + job_report = await remote_cache.fetch(bench_id) + self.assertIsNotNone(job_report) + self.assertEqual(bench_id, job_report.bench_run.id) + + async def test_fetch_stdlib_report(self): + remote_cache = ReadonlyRemoteCache() + job_reports = await fetch_job_reports([stdlib_bench_run], remote_cache) + self.assertIsNotNone(job_reports) + self.assertEqual(1, len(job_reports)) + + +class TestSyncRemoteCache(unittest.IsolatedAsyncioTestCase): + LOCAL_REPO_ROOT = Path("/home/pavel/dev/engine-benchmark-results") + + async def test_init_sync_remote_cache_from_local_repo(self): + if not self.LOCAL_REPO_ROOT.exists(): + self.skipTest(f"Local repo {self.LOCAL_REPO_ROOT} does not exist") + remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT) + await remote_cache.initialize() + root_dir = remote_cache.repo_root_dir() + self.assertTrue(root_dir.exists()) + self.assertTrue(root_dir.is_dir()) + cache_dir = remote_cache.cache_dir() + self.assertTrue(cache_dir.exists()) + self.assertTrue(cache_dir.is_dir()) + self.assertTrue(remote_cache.engine_index_html().exists()) + self.assertTrue(remote_cache.stdlib_index_html().exists()) + + async def test_clone_sync_remote_cache(self): + self.skipTest("TODO: Takes too long") + remote_cache = SyncRemoteCache() + await remote_cache.initialize() + root_dir = remote_cache.repo_root_dir() + self.assertTrue(root_dir.exists()) + self.assertTrue(root_dir.is_dir()) + cache_dir = remote_cache.cache_dir() + self.assertTrue(cache_dir.exists()) + self.assertTrue(cache_dir.is_dir()) + self.assertTrue(remote_cache.engine_index_html().exists()) + self.assertTrue(remote_cache.stdlib_index_html().exists()) + + async def test_fetch_stdlib_report(self): + if not self.LOCAL_REPO_ROOT.exists(): + self.skipTest(f"Local repo {self.LOCAL_REPO_ROOT} does not exist") + remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT) + await remote_cache.initialize() + job_reports = await fetch_job_reports([stdlib_bench_run], remote_cache) + self.assertIsNotNone(job_reports) + self.assertEqual(1, len(job_reports)) diff --git a/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py b/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py new file mode 100644 index 000000000000..567533d4d5d2 --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py @@ -0,0 +1,31 @@ +import unittest +from pathlib import Path +from datetime import datetime + +from bench_tool import Source +from bench_tool.remote_cache import SyncRemoteCache +from bench_tool.utils import WithTempDir +from bench_tool.website import generate_bench_website + + +class TestWebsiteRegen(unittest.IsolatedAsyncioTestCase): + LOCAL_REPO_ROOT = Path("/home/pavel/dev/engine-benchmark-results") + + async def test_engine_website_regen(self): + if not self.LOCAL_REPO_ROOT.exists(): + self.skipTest(f"Local repo {self.LOCAL_REPO_ROOT} does not exist") + remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT) + # Pull the repo if necessary + await remote_cache.initialize() + since = datetime.fromisoformat("2023-02-01") + until = datetime.fromisoformat("2023-02-25") + with WithTempDir("test_engine_website_regen") as temp_dir: + temp_dir_path = Path(temp_dir) + html_out = temp_dir_path.joinpath("engine-benchs.html") + await generate_bench_website(Source.ENGINE, remote_cache, since, until, html_out) + self.assertTrue(html_out.exists()) + self.assertGreater( + html_out.stat().st_size, 100 * 1024, + "The generated HTML file should have size bigger than 100 KB" + ) + pass diff --git a/tools/performance/engine-benchmarks/bench_tool/utils.py b/tools/performance/engine-benchmarks/bench_tool/utils.py new file mode 100644 index 000000000000..0a04f0784a87 --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/utils.py @@ -0,0 +1,54 @@ +import logging +import shutil +import tempfile +from datetime import datetime +from typing import List, Set + +from bench_tool import JobReport, GH_DATE_FORMAT, Commit + +_logger = logging.getLogger(__name__) + + +class WithTempDir: + def __init__(self, prefix: str): + self.prefix = prefix + self.temp_dir = None + + def __enter__(self): + self.temp_dir = tempfile.mkdtemp(prefix=self.prefix) + return self.temp_dir + + def __exit__(self, exc_type, exc_val, exc_tb): + shutil.rmtree(self.temp_dir, ignore_errors=True) + + +def gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]: + """ + Iterates through all the job reports and gathers all the benchmark labels + found. Note that every job report can have a different set of benchmark labels. + :return: List of benchmark labels. + """ + all_labels = set() + for job_report in job_reports: + for labels in job_report.label_score_dict.keys(): + all_labels.add(labels) + return all_labels + + +def parse_commit_timestamp(commit: Commit) -> datetime: + """ Parses the timestamp from the commit based on the GH's formatting. """ + return datetime.strptime(commit.timestamp, GH_DATE_FORMAT) + + +def sort_job_reports( + job_reports: List[JobReport] +) -> None: + """ + Sorts the job reports in place by the commit date. + :param job_reports: + :return: + """ + def _get_timestamp(job_report: JobReport) -> datetime: + return parse_commit_timestamp(job_report.bench_run.head_commit) + + job_reports.sort(key=lambda report: _get_timestamp(report)) diff --git a/tools/performance/engine-benchmarks/bench_tool/website.py b/tools/performance/engine-benchmarks/bench_tool/website.py new file mode 100644 index 000000000000..57f6f6da29d3 --- /dev/null +++ b/tools/performance/engine-benchmarks/bench_tool/website.py @@ -0,0 +1,66 @@ +import logging +from datetime import datetime, timedelta +from pathlib import Path +from typing import List, Dict, Set + +from bench_tool import JobRun, BRANCH_DEVELOP, Source, JobReport, TemplateBenchData, JinjaData +from bench_tool.bench_results import get_bench_runs, fetch_job_reports +from bench_tool.remote_cache import SyncRemoteCache +from bench_tool.template_render import create_template_data, render_html +from bench_tool.utils import sort_job_reports, gather_all_bench_labels + +_logger = logging.getLogger(__name__) + + +async def generate_bench_website( + bench_source: Source, + remote_cache: SyncRemoteCache, + since: datetime, + until: datetime, + generated_html: Path +) -> None: + """ + Generates single `index.html` website with the benchmark results. + + :param bench_source: Source of the benchmarks, either engine or stdlib + :param remote_cache: Remote cache used for fetching the job reports. + :param since: Date since when the benchmarks should be considered + :param until: Date until when the benchmarks should be considered + :param generated_html: Path to the generated HTML file + :return: + """ + bench_runs: List[JobRun] = [] + for workflow_id in bench_source.workflow_ids(): + bench_runs.extend( + await get_bench_runs(since, until, BRANCH_DEVELOP, workflow_id) + ) + assert len(bench_runs) > 0, "No benchmark runs found" + + job_reports = await fetch_job_reports(bench_runs, remote_cache) + _logger.debug(f"Gathered {len(job_reports)} job reports") + assert len(job_reports) > 0, "No job reports found" + + _logger.debug("Sorting job_reports by commit date") + sort_job_reports(job_reports) + + all_bench_labels: Set[str] = gather_all_bench_labels(job_reports) + _logger.debug(f"Found {len(all_bench_labels)} unique benchmark labels") + + job_reports_per_branch: Dict[str, List[JobReport]] = { + BRANCH_DEVELOP: job_reports + } + template_bench_datas: List[TemplateBenchData] = \ + create_template_data(job_reports_per_branch, all_bench_labels) + template_bench_datas.sort(key=lambda data: data.id) + + jinja_data = JinjaData( + since=since, + display_since=max(until - timedelta(days=30), since), + until=until, + bench_datas=template_bench_datas, + bench_source=bench_source, + branches=[BRANCH_DEVELOP], + timestamp=datetime.now() + ) + _logger.debug(f"Rendering HTML to {generated_html}") + render_html(jinja_data, generated_html) diff --git a/tools/performance/engine-benchmarks/templates/template_jinja.html b/tools/performance/engine-benchmarks/templates/template_jinja.html index 97311c0d8af8..838f0968c368 100644 --- a/tools/performance/engine-benchmarks/templates/template_jinja.html +++ b/tools/performance/engine-benchmarks/templates/template_jinja.html @@ -280,7 +280,8 @@


- Generated by the bench_download.py script. + Generated by the bench_download.py script in + {{ timestamp }}.
@@ -334,7 +335,10 @@

Applied filters

{% for bench_data in bench_datas %}
-

{{ bench_data.id }}

+ +

+ {{ bench_data.id }} +

diff --git a/tools/performance/engine-benchmarks/website_regen.py b/tools/performance/engine-benchmarks/website_regen.py new file mode 100644 index 000000000000..be408fa6f7eb --- /dev/null +++ b/tools/performance/engine-benchmarks/website_regen.py @@ -0,0 +1,66 @@ +""" +IMPORTANT NOTE: Should be run only on the CI!! + +This script regenerate the benchmark results website, hosted as GH web pages on the +https://github.com/enso-org/engine-benchmark-results repo. +""" +import asyncio +import logging +from argparse import ArgumentParser +from datetime import datetime +from pathlib import Path +from typing import Optional + +from bench_tool import Source +from bench_tool.remote_cache import SyncRemoteCache +from bench_tool.website import generate_bench_website + +# The inception date of the benchmarks, i.e., the date of the first benchmark run. +ENGINE_SINCE = datetime.fromisoformat("2022-12-01") +STDLIB_SINCE = datetime.fromisoformat("2023-08-22") + +_logger = logging.getLogger("website_regen") + + +async def main(): + arg_parser = ArgumentParser(description="Regenerate the benchmark results website") + arg_parser.add_argument("-v", "--verbose", action="store_true") + arg_parser.add_argument("-n", "--dry-run", action="store_true") + arg_parser.add_argument("--local-repo", + type=str, + help="Path to the local clone of the engine-benchmark-results repo") + args = arg_parser.parse_args() + dry_run: bool = args.dry_run + verbose: bool = args.verbose + local_repo: Optional[Path] = Path(args.local_repo) if args.local_repo else None + logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) + _logger.debug(f"Args: dry_run={dry_run}, verbose={verbose}, local_repo={local_repo}") + remote_cache = SyncRemoteCache(local_repo) + _logger.info("Initializing the bench results repo, this might take some time") + await remote_cache.initialize() + _logger.info("Bench results repo initialized") + + now = datetime.now() + engine_html_task = generate_bench_website( + Source.ENGINE, + remote_cache, + ENGINE_SINCE, + now, + remote_cache.engine_index_html() + ) + stdlib_html_task = generate_bench_website( + Source.STDLIB, + remote_cache, + STDLIB_SINCE, + now, + remote_cache.stdlib_index_html() + ) + await asyncio.gather(engine_html_task, stdlib_html_task) + if dry_run: + _logger.info("Dry-run, not syncing the remote cache") + else: + await remote_cache.sync() + + +if __name__ == "__main__": + asyncio.run(main())