Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prototype RLops Utility #307

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
compare.pdf
compare.png
balance_bot.xml
cleanrl/ppo_continuous_action_isaacgym/isaacgym/examples
cleanrl/ppo_continuous_action_isaacgym/isaacgym/isaacgym
Expand Down
186 changes: 186 additions & 0 deletions cleanrl_utils/rlops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
import argparse
from distutils.util import strtobool
from typing import List

import expt
import matplotlib.pyplot as plt
import numpy as np
import wandb
import wandb.apis.reports as wb # noqa
from expt import Hypothesis, Run
from expt.plot import GridPlot

wandb.require("report-editing")
api = wandb.Api()


def parse_args():
# fmt: off
parser = argparse.ArgumentParser()
parser.add_argument("--exp-name", type=str, default="ddpg_continuous_action_jax",
help="the name of this experiment")
parser.add_argument("--wandb-project-name", type=str, default="cleanrl",
help="the wandb's project name")
parser.add_argument("--wandb-entity", type=str, default="openrlbenchmark",
help="the entity (team) of wandb's project")
parser.add_argument("--tags", nargs="+", default=["v1.0.0b2-9-g4605546", "rlops-pilot"],
help="the tags of the runsets")
parser.add_argument("--env-ids", nargs="+", default=["Hopper-v2", "Walker2d-v2", "HalfCheetah-v2"],
help="the ids of the environment to compare")
parser.add_argument("--output-filename", type=str, default="compare.png",
help="the output filename of the plot")
parser.add_argument("--report", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
help="if toggled, a wandb report will be created")
# fmt: on
return parser.parse_args()


def create_hypothesis(name: str, wandb_runs: List[wandb.apis.public.Run]) -> Hypothesis:
runs = []
for idx, run in enumerate(wandb_runs):
wandb_run = run.history()
if "videos" in wandb_run:
wandb_run = wandb_run.drop(columns=["videos"], axis=1)
runs += [Run(f"seed{idx}", wandb_run)]
return Hypothesis(name, runs)


class Runset:
def __init__(self, name: str, filters: dict, entity: str, project: str, groupby: str = ""):
self.name = name
self.filters = filters
self.entity = entity
self.project = project
self.groupby = groupby

@property
def runs(self):
return wandb.Api().runs(path=f"{self.entity}/{self.project}", filters=self.filters)

@property
def report_runset(self):
return wb.RunSet(
name=self.name,
entity=self.entity,
project=self.project,
filters={"$or": [self.filters]},
groupby=[self.groupby] if len(self.groupby) > 0 else None,
)


def compare(
runsetss: List[List[Runset]],
env_ids: List[str],
ncols: int,
output_filename: str = "compare.png",
):
blocks = []
for idx, env_id in enumerate(env_ids):
blocks += [
wb.PanelGrid(
runsets=[runsets[idx].report_runset for runsets in runsetss],
panels=[
wb.LinePlot(
x="global_step",
y=["charts/episodic_return"],
title=env_id,
title_x="Steps",
title_y="Episodic Return",
max_runs_to_show=100,
smoothing_factor=0.8,
groupby_rangefunc="stderr",
legend_template="${runsetName}",
),
wb.LinePlot(
x="_runtime",
y=["charts/episodic_return"],
title=env_id,
title_y="Episodic Return",
max_runs_to_show=100,
smoothing_factor=0.8,
groupby_rangefunc="stderr",
legend_template="${runsetName}",
),
# wb.MediaBrowser(
# num_columns=2,
# media_keys="videos",
# ),
],
),
]

nrows = np.ceil(len(env_ids) / ncols).astype(int)
figsize = (ncols * 4, nrows * 3)
fig, axes = plt.subplots(
nrows=nrows,
ncols=ncols,
figsize=figsize,
# sharex=True,
# sharey=True,
)

for idx, env_id in enumerate(env_ids):
ex = expt.Experiment("Comparison")
for runsets in runsetss:
h = create_hypothesis(runsets[idx].name, runsets[idx].runs)
ex.add_hypothesis(h)
ax = axes.flatten()[idx]
ex.plot(
ax=ax,
title=env_id,
x="_runtime",
y="charts/episodic_return",
err_style="band",
std_alpha=0.1,
rolling=50,
n_samples=400,
legend=False,
)

h, l = ax.get_legend_handles_labels()
fig.legend(h, l, loc="upper center", ncol=2)
fig.subplots_adjust(top=0.9)
# remove the empty axes
for ax in axes.flatten()[len(env_ids) :]:
ax.remove()

print(f"saving figure to {output_filename}")
plt.savefig(f"{output_filename}", bbox_inches="tight")
plt.savefig(f"{output_filename.replace('.png', '.pdf')}", bbox_inches="tight")
return blocks


if __name__ == "__main__":
args = parse_args()

g = GridPlot(y_names=args.env_ids)
blocks = []
runsetss = []
for tag in args.tags:
runsets = []
for env_id in args.env_ids:
runsets += [
Runset(
name=f"CleanRL's {args.exp_name} ({tag})",
filters={
"$and": [{"config.env_id.value": env_id}, {"tags": tag}, {"config.exp_name.value": args.exp_name}]
},
entity=args.wandb_entity,
project=args.wandb_project_name,
groupby="exp_name",
)
]
print(f"CleanRL's {args.exp_name} ({tag}) in {env_id} has {len(runsets[0].runs)} runs")
assert len(runsets[0].runs) > 0, f"CleanRL's {args.exp_name} ({tag}) in {env_id} has no runs"
runsetss += [runsets]

blocks = compare(runsetss, args.env_ids, output_filename="compare.png", ncols=2)
if args.report:
print("saving report")
report = wb.Report(
project="cleanrl",
title=f"Regression Report: {args.exp_name} ({args.tags})",
blocks=blocks,
)
report.save()
print(f"view the generated report at {report.url}")
37 changes: 37 additions & 0 deletions cleanrl_utils/rlops_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import argparse

import wandb

api = wandb.Api()


def parse_args():
# fmt: off
parser = argparse.ArgumentParser()
parser.add_argument("--wandb-project-name", type=str, default="cleanrl",
help="the wandb's project name")
parser.add_argument("--wandb-entity", type=str, default="openrlbenchmark",
help="the entity (team) of wandb's project")

parser.add_argument("--add", type=str, default="",
help="the tag to be added to any runs with the `--source-tag`")
parser.add_argument("--remove", type=str, default="",
help="the tag to be removed from any runs with the `--source-tag`")
parser.add_argument("--source-tag", type=str, default="v1.0.0b2-7-g4bb6766",
help="the source tag of the set of runs")
# fmt: on
return parser.parse_args()


if __name__ == "__main__":
args = parse_args()
print(args)
runs = api.runs(path=f"{args.wandb_entity}/{args.wandb_project_name}", filters={"tags": {"$in": [args.source_tag]}})
for run in runs:
tags = run.tags
if args.add:
tags.append(args.add)
if args.remove:
tags.remove(args.remove)
run.tags = tags
run.update()
61 changes: 61 additions & 0 deletions docs/advanced/rlops.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# RLops

This document describes how to we do "RLops" to validate new features / bug fixes and avoid introducing regressions.


## Background
DRL is brittle and has a series of reproducibility issues — even bug fixes sometimes could introduce performance regression (e.g., see [how a bug fix of contact force in MuJoCo results in worse performance for PPO](https://github.com/openai/gym/pull/2762#discussion_r853488897)). Therefore, it is essential to understand how the proposed changes impact the performance of the algorithms. At large, we wish to distinguish two types of contributions: 1) **non-performance-impacting changes** and 2) **performance-impacting changes**.

* **non-performance-impacting changes**: this type of change does *not* impact the performance of the algorithm, such as documentation fixes (#282), renaming variables (#257), and removing unused code (#287). For this type of change, we can easily merge them without worrying too much about the consequences.
* **performance-impacting changes**: this type of change impacts the algorithm's performance. Examples include making a slight modification to the `gamma` parameter in PPO (https://github.com/vwxyzjn/cleanrl/pull/209), properly handling action bounds in DDPG (https://github.com/vwxyzjn/cleanrl/pull/211), and fixing bugs (https://github.com/vwxyzjn/cleanrl/pull/281)


**Importantly, regardless of the slight difference in performance-impacting changes, we need to re-run the benchmark to ensure there is no regression**. This post proposes a way for us to re-run the model and check regression seamlessly.

## Methodology


### (Step 1) Run the benchmark

We usually ran the benchmark experiments through [`benchmark.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl_utils/benchmark.py), such as the following:

```bash
poetry install
OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
--command "poetry run python cleanrl/ppo.py --cuda False --track --capture-video" \
--num-seeds 3 \
--workers 9
```

under the hood, this script will invoke an `autotag` feature that tries to tag the the experiments with version control information, such as the git tag (e.g., `v1.0.0b1-4-g4ea73d9`) and the github PR number (e.g., `pr-308`). This is useful for us to compare the performance of the same algorithm across different versions.


### (Step 2) Regression check

Let's say our latest experiments is tagged with `v1.0.0b2-9-g4605546`. We can then run the following command to compare its performance with the the current version `latest`:


```bash
python rlops.py --exp-name ddpg_continuous_action_jax \
--wandb-project-name cleanrl \
--wandb-entity openrlbenchmark \
--tags v1.0.0b2-9-g4605546 rlops-pilot \
--env-ids Hopper-v2 Walker2d-v2 HalfCheetah-v2 \
--output-filename compare.png \
--report
```
which could generate wandb reports with the following figure and corresponding tables.

<img width="1195" alt="image" src="https://user-images.githubusercontent.com/5555347/196775462-2ef25c47-72dd-426d-88b8-9d74e5062936.png">


### (Step 3) Merge the PR

Once we confirm there is no regression in the performance, we can merge the PR. Furthermore, we will label the new experiments as `latest` (and remove the tag `latest` for `v1.0.0b2-7-gxfd3d3` correspondingly.

```bash
python rlops_tags.py --add latest --source-tag v1.0.0b2-9-g4605546
python rlops_tags.py --remove latest --source-tag rlops-pilot
```
```