Refactor to use tyro (#424)

* Refactor to use tyro * push * psuh * refactor * fix pre-commit * fix pre-commit * fix commend * refactor * update poetry * fix test case * quick fix * fix * update optuna * quick change * fix ppg * quick fix * fix optuna * quick change * fix * quick change * quick change * fix bug in multi-gpu * refactor benchmark, support slurm * remove mujoco_py stuff * add slurm template * pre-commit * update ddpg docs * update td3 docs * update sac * bug fix * update docs * update ppo docs * bump version * bump version * bump test cases * add benchmark utility docs * bump test * fix #418 * update requirements.txt * test * add numpy --------- Co-authored-by: Adam Yanxiao Zhao <[email protected]>
vwxyzjn · Nov 28, 2023 · 35896b1 · 35896b1 · vercel · Nov 28, 2023
1 parent 2d660b6
commit 35896b1
Show file tree

Hide file tree

Showing 121 changed files with 4,641 additions and 5,016 deletions.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -22,7 +22,7 @@
 If you need to run benchmark experiments for a performance-impacting changes:
 
 - [ ] I have contacted @vwxyzjn to obtain access to the [openrlbenchmark W&B team](https://wandb.ai/openrlbenchmark).
-- [ ] I have used the [benchmark utility](/get-started/benchmark-utility/) to submit the tracked experiments to the [openrlbenchmark/cleanrl](https://wandb.ai/openrlbenchmark/cleanrl) W&B project, optionally with `--capture-video`.
+- [ ] I have used the [benchmark utility](/get-started/benchmark-utility/) to submit the tracked experiments to the [openrlbenchmark/cleanrl](https://wandb.ai/openrlbenchmark/cleanrl) W&B project, optionally with `--capture_video`.
 - [ ] I have performed RLops with `python -m openrlbenchmark.rlops`.
     - For new feature or bug fix:
         - [ ] I have used the RLops utility to understand the performance impact of the changes and confirmed there is no regression.

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -1,10 +1,5 @@
 name: tests
 on:
-  push:
-    paths-ignore:
-      - '**/README.md'
-      - 'docs/**/*'
-      - 'cloud/**/*'
   pull_request:
     paths-ignore:
       - '**/README.md'
@@ -15,8 +10,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04, macos-latest, windows-latest]
     runs-on: ${{ matrix.os }}
     steps:
@@ -58,8 +53,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04, macos-latest, windows-latest]
     runs-on: ${{ matrix.os }}
     steps:
@@ -94,8 +89,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04, macos-latest, windows-latest]
     runs-on: ${{ matrix.os }}
     steps:
@@ -120,8 +115,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:
@@ -180,8 +175,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:
@@ -194,29 +189,12 @@ jobs:
         with:
           poetry-version: ${{ matrix.poetry-version }}
 
-      # mujoco_py tests
-      - name: Install dependencies
-        run: poetry install -E "pytest mujoco_py mujoco jax"
-      - name: Run gymnasium migration dependencies
-        run: poetry run pip install "stable_baselines3==2.0.0a1"
-      - name: Downgrade setuptools
-        run: poetry run pip install setuptools==59.5.0
-      - name: install mujoco_py dependencies
-        run: |
-          sudo apt-get update && sudo apt-get -y install wget unzip software-properties-common \
-            libgl1-mesa-dev \
-            libgl1-mesa-glx \
-            libglew-dev \
-            libosmesa6-dev patchelf
-      - name: Run mujoco_py tests
-        run: poetry run pytest tests/test_mujoco_py.py
-
   test-envpool-envs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:
@@ -241,8 +219,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:
@@ -267,8 +245,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:

diff --git a/.github/workflows/utils_test.yaml b/.github/workflows/utils_test.yaml
@@ -15,8 +15,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:

diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+slurm
+.aim
 runs
 balance_bot.xml
 cleanrl/ppo_continuous_action_isaacgym/isaacgym/examples

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -56,10 +56,6 @@ repos:
         name: poetry-export requirements-dm_control.txt
         args: ["--without-hashes", "-o", "requirements/requirements-dm_control.txt", "-E", "dm_control"]
         stages: [manual]
-      - id: poetry-export
-        name: poetry-export requirements-mujoco_py.txt
-        args: ["--without-hashes", "-o", "requirements/requirements-mujoco_py.txt", "-E", "mujoco_py"]
-        stages: [manual]
       - id: poetry-export
         name: poetry-export requirements-procgen.txt
         args: ["--without-hashes", "-o", "requirements/requirements-procgen.txt", "-E", "procgen"]

diff --git a/README.md b/README.md
@@ -191,3 +191,8 @@ If you use CleanRL in your work, please cite our technical [paper](https://www.j
   url     = {http://jmlr.org/papers/v23/21-1342.html}
 }
 ```
+
+
+## Acknowledgement
+
+We thank [Hugging Face](https://huggingface.co/)'s cluster for providing GPU computational resources to this project.
diff --git a/benchmark/c51.sh b/benchmark/c51.sh
@@ -1,29 +1,29 @@
 poetry install
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/c51.py --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/c51.py --no_cuda --track --capture_video" \
     --num-seeds 3 \
     --workers 9
 
 poetry install -E atari
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/c51_atari.py --track --capture-video" \
+    --command "poetry run python cleanrl/c51_atari.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
 
 poetry install -E "jax"
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 CUDA_VISIBLE_DEVICES=-1 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/c51_jax.py --track --capture-video" \
+    --command "poetry run python cleanrl/c51_jax.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
 
 poetry install -E "atari jax"
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/c51_atari_jax.py --track --capture-video" \
+    --command "poetry run python cleanrl/c51_atari_jax.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
diff --git a/benchmark/cleanrl_1gpu.slurm_template b/benchmark/cleanrl_1gpu.slurm_template
@@ -0,0 +1,21 @@
+#!/bin/bash
+#SBATCH --job-name=low-priority
+#SBATCH --partition=production-cluster
+#SBATCH --gpus-per-task={{gpus_per_task}}
+#SBATCH --cpus-per-gpu={{cpus_per_gpu}}
+#SBATCH --ntasks={{ntasks}}
+#SBATCH --output=slurm/logs/%x_%j.out
+#SBATCH --array={{array}}
+#SBATCH --mem-per-cpu=12G
+#SBATCH --exclude=ip-26-0-146-[33,100,122-123,149,183,212,249],ip-26-0-147-[6,94,120,141],ip-26-0-152-[71,101,119,178,186,207,211],ip-26-0-153-[6,62,112,132,166,251],ip-26-0-154-[38,65],ip-26-0-155-[164,174,187,217],ip-26-0-156-[13,40],ip-26-0-157-27
+##SBATCH --nodelist=ip-26-0-147-204
+{{nodes}}
+
+env_ids={{env_ids}}
+seeds={{seeds}}
+env_id=${env_ids[$SLURM_ARRAY_TASK_ID / {{len_seeds}}]}
+seed=${seeds[$SLURM_ARRAY_TASK_ID % {{len_seeds}}]}
+
+echo "Running task $SLURM_ARRAY_TASK_ID with env_id: $env_id and seed: $seed"
+
+srun {{command}} --env-id $env_id --seed $seed # 
diff --git a/benchmark/ddpg.sh b/benchmark/ddpg.sh
@@ -1,16 +1,22 @@
-poetry install -E "mujoco_py"
-python -c "import mujoco_py"
-xvfb-run -a python -m cleanrl_utils.benchmark \
-    --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 InvertedPendulum-v2 Humanoid-v2 Pusher-v2 \
-    --command "poetry run python cleanrl/ddpg_continuous_action.py --track --capture-video" \
+poetry install -E "mujoco"
+python -m cleanrl_utils.benchmark \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --command "poetry run python cleanrl/ddpg_continuous_action.py --track" \
     --num-seeds 3 \
-    --workers 1
+    --workers 18 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 
-poetry install -E "mujoco_py jax"
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
-poetry run python -c "import mujoco_py"
-xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
-    --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \
-    --command "poetry run python cleanrl/ddpg_continuous_action_jax.py --track --capture-video" \
+poetry install -E "mujoco jax"
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run python -m cleanrl_utils.benchmark \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --command "poetry run python cleanrl/ddpg_continuous_action_jax.py --track" \
     --num-seeds 3 \
-    --workers 1
+    --workers 18 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
diff --git a/benchmark/ddpg_plot.sh b/benchmark/ddpg_plot.sh
@@ -0,0 +1,20 @@
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ddpg_continuous_action?tag=pr-424' \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ddpg \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ddpg_continuous_action?tag=pr-424' \
+        'ddpg_continuous_action_jax?tag=pr-424' \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ddpg_jax \
+    --scan-history
diff --git a/benchmark/dqn.sh b/benchmark/dqn.sh
@@ -1,29 +1,29 @@
 poetry install
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/dqn.py --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/dqn.py --no_cuda --track --capture_video" \
     --num-seeds 3 \
     --workers 9
 
 poetry install -E atari
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/dqn_atari.py --track --capture-video" \
+    --command "poetry run python cleanrl/dqn_atari.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
 
 poetry install -E jax
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/dqn_jax.py --track --capture-video" \
+    --command "poetry run python cleanrl/dqn_jax.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
 
 poetry install -E "atari jax"
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/dqn_atari_jax.py --track --capture-video" \
+    --command "poetry run python cleanrl/dqn_atari_jax.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
diff --git a/benchmark/ppg.sh b/benchmark/ppg.sh
@@ -3,6 +3,6 @@
 poetry install -E procgen
 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids starpilot bossfight bigfish \
-    --command "poetry run python cleanrl/ppg_procgen.py --track --capture-video" \
+    --command "poetry run python cleanrl/ppg_procgen.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1