Skip to content

Commit

Permalink
Yay! cli.py now runs the example workload, and creates the example da…
Browse files Browse the repository at this point in the history
…taset!
  • Loading branch information
JackKelly committed Sep 29, 2023
1 parent ef482a6 commit 28aedb0
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 31 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,12 @@ Capture the performance of a computer system whilst running a set of benchmark w
2. Optionally create a virtual Python environment (e.g. with `python -m venv </path/to/venv/>`) and
activate that venv (`source </path/to/venv/>bin/activate`).
3. `pip install -e .`

# Usage

To run the examples:

```
~/dev/perfcapture$ mkdir -p ~/temp/perfcapture_data_path
~/dev/perfcapture$ python scripts/cli.py --data-path ~/temp/perfcapture_data_path --recipe-path examples
```
12 changes: 9 additions & 3 deletions examples/read_numpy_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,18 @@ def prepare(self) -> None:
print("Created array", flush=True)

# Save array to temporary file
with open(self.path_to_dataset, mode="wb") as fh:
with open(self.path, mode="wb") as fh:
np.save(fh, array)


class ReadNumpyFile(Workload):
def init_dataset(self) -> Dataset:
return NumpyDataset()

def run(self):
"""Load numpy file into RAM."""
for _ in range(100):
np.load(self.dataset.path_to_dataset)
np.load(self.dataset.path)

@property
def n_repeats(self) -> int:
return 10
69 changes: 57 additions & 12 deletions scripts/cli.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,88 @@
#!/usr/bin/env python
import pathlib
import shutil
import subprocess
import sys
import typer
from typing_extensions import Annotated
from typing import Optional

from perfcapture.workload import discover_workloads

app = typer.Typer()


@app.command()
def bench(
data_path: Annotated[
pathlib.Path,
typer.Argument(help="The directory for storing the data which the benchmarks read from.")
typer.Option(help="The path for storing the data which the benchmarks read from.")
],
recipe_dir: Annotated[
recipe_path: Annotated[
pathlib.Path,
typer.Argument(help=(
"The directory containing the code which defines the Workloads and Datasets."))
typer.Option(help=(
"The path containing the code which defines the Workloads and Datasets."))
] = pathlib.Path("."),
workloads: Annotated[
selected_workloads: Annotated[
Optional[str],
typer.Argument(help=(
"Space-separated list of workload classes to run. If not set, all workloads found in"
" recipe_dir will be run."))
typer.Option(help=(
"Space-separated list of workloads to run. If not set, all workloads found in"
" recipe_path will be run. Use the `name` of each workload."))
] = None,
keep_cache: Annotated[
bool,
typer.Option(
"--keep-cache",
help="Set this flag to prevent `vmtouch -e` being called before each benchmark.",
)
] = False,
) -> None:
"""Run workload(s) and measure performance.
If any of the workloads require datasets to be pre-prepared then this script will first generate
all datasets required by the workload(s). Those datasets will be stored at the `data_path`.
The time spent creating the datasets will not be recorded. The contents of `data_path` will not
be removed after running this script. So if you run this script multiple times then subsequent
runs can make use of the already existing datasets.
If you update the recipe which specifies the dataset creation then it is up to you to manually
delete the old dataset on disk.
vmtouch must be installed if you wish to clear the page cache after each iteration.
"""
# Sanity checks
if not data_path.exists():
sys.exit(f"ERROR! {data_path} does not exist! Please create the directory!")
if not recipe_path.exists():
sys.exit(f"ERROR! {recipe_path} does not exist!")
if shutil.which("vmtouch") is None: # Check if vmtouch has been installed.
sys.exit(
"If you want to flush the page cache before each iteration, then please install"
" vmtouch. Or run with the --keep-cache option, which does not call vmtouch.")

all_workloads = descover_workloads(recipe_dir)
all_datasets = set([workload.dataset for workload in all_workloads])
workloads = discover_workloads(recipe_path)
print(f"Found {len(workloads)} Workload(s) in {recipe_path}")

# Filter workloads (if necessary).
if selected_workloads:
selected_workloads = selected_workloads.split(" ")
workloads = filter(lambda workload: workload.name in selected_workloads, workloads)

# Prepare datasets (if necessary).
all_datasets = set([workload.dataset for workload in workloads])
for dataset in all_datasets:
dataset.set_path(data_path)
if not dataset.already_exists():
dataset.prepare(base_data_path=data_path)
dataset.prepare()

# Run the workloads!
for workload in workloads:
print(f"Running {workload.name} {workload.n_repeats} times!", flush=True)
for _ in range(workload.n_repeats):
if not keep_cache:
p = subprocess.run(
["vmtouch", "-e", workload.dataset.path], capture_output=True, check=True)
workload.run()
print(f" Finished running {workload.name} {workload.n_repeats} times!", flush=True)

if __name__ == "__main__":
app()
24 changes: 13 additions & 11 deletions src/perfcapture/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,22 @@ class Dataset(abc.ABC):
Datasets are read by `Workload`s.
"""
def __init__(self, base_data_path: pathlib.Path):
self.path_to_dataset = base_data_path / self.name
def set_path(self, base_data_path: pathlib.Path):
self.path = base_data_path / self.name

@property
@abc.abstractmethod
def name(self) -> str:
"""The name of this dataset. Must be unique amongst all the datasets used in the benchmark suite."""
pass
"""The name of this dataset.
Must be unique amongst all the datasets used in the benchmark suite.
"""
return self.__class__.__name__

@abc.abstractmethod
def prepare(self) -> None:
"""Override this method if your workload needs to prepare a local dataset.
Store your dataset at `self.path_to_dataset`.
Store your dataset at `self.path`.
Every time the workload runner executes, it runs this pseudocode:
Expand All @@ -34,12 +36,12 @@ def prepare(self) -> None:
def already_exists(self) -> bool:
"""Returns True if the dataset is already on disk."""
path_is_dir_which_is_not_empty = (
self.path_to_dataset.exists() and
self.path_to_dataset.is_dir() and
path_not_empty(self.path_to_dataset)
self.path.exists() and
self.path.is_dir() and
path_not_empty(self.path)
)
path_is_single_file = (
self.path_to_dataset.exists() and
not self.path_to_dataset.is_dir()
self.path.exists() and
not self.path.is_dir()
)
return path_is_dir_which_is_not_empty or path_is_single_file
14 changes: 12 additions & 2 deletions src/perfcapture/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
"""Simple utility functions."""

import importlib.util
import pathlib

import sys

def path_not_empty(path: pathlib.Path) -> bool:
"""Returns True if `path` is not empty."""
# If `path` contains just a single entry then return True.
# To save time, don't bother iterating past the first entry.
for _ in path.iterdir():
return True
return False
return False


def load_module_from_filename(py_filename: pathlib.Path):
module_name = py_filename.stem
spec = importlib.util.spec_from_file_location(module_name, py_filename)
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
50 changes: 47 additions & 3 deletions src/perfcapture/workload.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,60 @@
import abc
import inspect
import pathlib
from perfcapture.dataset import Dataset

from perfcapture.utils import path_not_empty
from perfcapture.utils import load_module_from_filename, path_not_empty


class Workload(abc.ABC):
"""Inherit from `Workload` to implement a new benchmark workload."""

def __init__(self, dataset: Dataset):
self.dataset = dataset
def __init__(self):
self.dataset = self.init_dataset()

@abc.abstractmethod
def init_dataset(self) -> Dataset:
"""Initialises and returns a concrete Dataset objects."""

@abc.abstractmethod
def run(self) -> dict[str, object]:
"""Must be overridden to implement the workload."""

@property
def name(self) -> str:
"""The name of this workload.
Must be unique amongst all the workloads used in this benchmark suite.
"""
return self.__class__.__name__

@property
def n_repeats(self) -> int:
"""The number of times to repeat this workload."""
return 1


def load_workloads_from_filename(py_filename: pathlib.Path) -> list[Workload]:
workloads = []
module = load_module_from_filename(py_filename)
for member_name in dir(module):
module_attr = getattr(module, member_name)
if (module_attr
and inspect.isclass(module_attr)
and issubclass(module_attr, Workload)
and module_attr is not Workload
):
print(f"Instantiating {member_name}")
workload_obj = module_attr()
workloads.append(workload_obj)
return workloads


def discover_workloads(recipe_path: pathlib.Path) -> list[Workload]:
workloads = []
for py_filename in recipe_path.glob("*.py"):
workloads_from_py_file = load_workloads_from_filename(py_filename)
workloads.extend(workloads_from_py_file)
return workloads


0 comments on commit 28aedb0

Please sign in to comment.