diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index ff239510f..52a0aa5b4 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -22,7 +22,7 @@
 If you need to run benchmark experiments for a performance-impacting changes:
 
 - [ ] I have contacted @vwxyzjn to obtain access to the [openrlbenchmark W&B team](https://wandb.ai/openrlbenchmark).
-- [ ] I have used the [benchmark utility](/get-started/benchmark-utility/) to submit the tracked experiments to the [openrlbenchmark/cleanrl](https://wandb.ai/openrlbenchmark/cleanrl) W&B project, optionally with `--capture-video`.
+- [ ] I have used the [benchmark utility](/get-started/benchmark-utility/) to submit the tracked experiments to the [openrlbenchmark/cleanrl](https://wandb.ai/openrlbenchmark/cleanrl) W&B project, optionally with `--capture_video`.
 - [ ] I have performed RLops with `python -m openrlbenchmark.rlops`.
     - For new feature or bug fix:
         - [ ] I have used the RLops utility to understand the performance impact of the changes and confirmed there is no regression.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index ae4bc1540..ebdd57e4c 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -1,10 +1,5 @@
 name: tests
 on:
-  push:
-    paths-ignore:
-      - '**/README.md'
-      - 'docs/**/*'
-      - 'cloud/**/*'
   pull_request:
     paths-ignore:
       - '**/README.md'
@@ -15,8 +10,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04, macos-latest, windows-latest]
     runs-on: ${{ matrix.os }}
     steps:
@@ -58,8 +53,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04, macos-latest, windows-latest]
     runs-on: ${{ matrix.os }}
     steps:
@@ -94,8 +89,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04, macos-latest, windows-latest]
     runs-on: ${{ matrix.os }}
     steps:
@@ -120,8 +115,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:
@@ -180,8 +175,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:
@@ -194,29 +189,12 @@ jobs:
         with:
           poetry-version: ${{ matrix.poetry-version }}
 
-      # mujoco_py tests
-      - name: Install dependencies
-        run: poetry install -E "pytest mujoco_py mujoco jax"
-      - name: Run gymnasium migration dependencies
-        run: poetry run pip install "stable_baselines3==2.0.0a1"
-      - name: Downgrade setuptools
-        run: poetry run pip install setuptools==59.5.0
-      - name: install mujoco_py dependencies
-        run: |
-          sudo apt-get update && sudo apt-get -y install wget unzip software-properties-common \
-            libgl1-mesa-dev \
-            libgl1-mesa-glx \
-            libglew-dev \
-            libosmesa6-dev patchelf
-      - name: Run mujoco_py tests
-        run: poetry run pytest tests/test_mujoco_py.py
-
   test-envpool-envs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:
@@ -241,8 +219,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:
@@ -267,8 +245,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:
diff --git a/.github/workflows/utils_test.yaml b/.github/workflows/utils_test.yaml
index 8b1929503..cd668166f 100644
--- a/.github/workflows/utils_test.yaml
+++ b/.github/workflows/utils_test.yaml
@@ -15,8 +15,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8]
-        poetry-version: [1.3.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        poetry-version: ["1.7"]
         os: [ubuntu-22.04]
     runs-on: ${{ matrix.os }}
     steps:
diff --git a/.gitignore b/.gitignore
index 4784f1086..1d4cfa0e4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+slurm
+.aim
 runs
 balance_bot.xml
 cleanrl/ppo_continuous_action_isaacgym/isaacgym/examples
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ccb3fc71a..516cd23bc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -56,10 +56,6 @@ repos:
         name: poetry-export requirements-dm_control.txt
         args: ["--without-hashes", "-o", "requirements/requirements-dm_control.txt", "-E", "dm_control"]
         stages: [manual]
-      - id: poetry-export
-        name: poetry-export requirements-mujoco_py.txt
-        args: ["--without-hashes", "-o", "requirements/requirements-mujoco_py.txt", "-E", "mujoco_py"]
-        stages: [manual]
       - id: poetry-export
         name: poetry-export requirements-procgen.txt
         args: ["--without-hashes", "-o", "requirements/requirements-procgen.txt", "-E", "procgen"]
diff --git a/README.md b/README.md
index 5e645ab46..790ad9933 100644
--- a/README.md
+++ b/README.md
@@ -191,3 +191,8 @@ If you use CleanRL in your work, please cite our technical [paper](https://www.j
   url     = {http://jmlr.org/papers/v23/21-1342.html}
 }
 ```
+
+
+## Acknowledgement
+
+We thank [Hugging Face](https://huggingface.co/)'s cluster for providing GPU computational resources to this project.
diff --git a/benchmark/c51.sh b/benchmark/c51.sh
index fb46bb6b4..6aba77810 100644
--- a/benchmark/c51.sh
+++ b/benchmark/c51.sh
@@ -1,29 +1,29 @@
 poetry install
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/c51.py --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/c51.py --no_cuda --track --capture_video" \
     --num-seeds 3 \
     --workers 9
 
 poetry install -E atari
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/c51_atari.py --track --capture-video" \
+    --command "poetry run python cleanrl/c51_atari.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
 
 poetry install -E "jax"
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 CUDA_VISIBLE_DEVICES=-1 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/c51_jax.py --track --capture-video" \
+    --command "poetry run python cleanrl/c51_jax.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
 
 poetry install -E "atari jax"
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/c51_atari_jax.py --track --capture-video" \
+    --command "poetry run python cleanrl/c51_atari_jax.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
diff --git a/benchmark/cleanrl_1gpu.slurm_template b/benchmark/cleanrl_1gpu.slurm_template
new file mode 100644
index 000000000..b7c76c297
--- /dev/null
+++ b/benchmark/cleanrl_1gpu.slurm_template
@@ -0,0 +1,21 @@
+#!/bin/bash
+#SBATCH --job-name=low-priority
+#SBATCH --partition=production-cluster
+#SBATCH --gpus-per-task={{gpus_per_task}}
+#SBATCH --cpus-per-gpu={{cpus_per_gpu}}
+#SBATCH --ntasks={{ntasks}}
+#SBATCH --output=slurm/logs/%x_%j.out
+#SBATCH --array={{array}}
+#SBATCH --mem-per-cpu=12G
+#SBATCH --exclude=ip-26-0-146-[33,100,122-123,149,183,212,249],ip-26-0-147-[6,94,120,141],ip-26-0-152-[71,101,119,178,186,207,211],ip-26-0-153-[6,62,112,132,166,251],ip-26-0-154-[38,65],ip-26-0-155-[164,174,187,217],ip-26-0-156-[13,40],ip-26-0-157-27
+##SBATCH --nodelist=ip-26-0-147-204
+{{nodes}}
+
+env_ids={{env_ids}}
+seeds={{seeds}}
+env_id=${env_ids[$SLURM_ARRAY_TASK_ID / {{len_seeds}}]}
+seed=${seeds[$SLURM_ARRAY_TASK_ID % {{len_seeds}}]}
+
+echo "Running task $SLURM_ARRAY_TASK_ID with env_id: $env_id and seed: $seed"
+
+srun {{command}} --env-id $env_id --seed $seed # 
diff --git a/benchmark/ddpg.sh b/benchmark/ddpg.sh
index 9f26b302e..3746b4d99 100755
--- a/benchmark/ddpg.sh
+++ b/benchmark/ddpg.sh
@@ -1,16 +1,22 @@
-poetry install -E "mujoco_py"
-python -c "import mujoco_py"
-xvfb-run -a python -m cleanrl_utils.benchmark \
-    --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 InvertedPendulum-v2 Humanoid-v2 Pusher-v2 \
-    --command "poetry run python cleanrl/ddpg_continuous_action.py --track --capture-video" \
+poetry install -E "mujoco"
+python -m cleanrl_utils.benchmark \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --command "poetry run python cleanrl/ddpg_continuous_action.py --track" \
     --num-seeds 3 \
-    --workers 1
+    --workers 18 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 
-poetry install -E "mujoco_py jax"
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
-poetry run python -c "import mujoco_py"
-xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
-    --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \
-    --command "poetry run python cleanrl/ddpg_continuous_action_jax.py --track --capture-video" \
+poetry install -E "mujoco jax"
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run python -m cleanrl_utils.benchmark \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --command "poetry run python cleanrl/ddpg_continuous_action_jax.py --track" \
     --num-seeds 3 \
-    --workers 1
+    --workers 18 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
diff --git a/benchmark/ddpg_plot.sh b/benchmark/ddpg_plot.sh
new file mode 100755
index 000000000..d36db199e
--- /dev/null
+++ b/benchmark/ddpg_plot.sh
@@ -0,0 +1,20 @@
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ddpg_continuous_action?tag=pr-424' \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ddpg \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ddpg_continuous_action?tag=pr-424' \
+        'ddpg_continuous_action_jax?tag=pr-424' \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ddpg_jax \
+    --scan-history
diff --git a/benchmark/dqn.sh b/benchmark/dqn.sh
index 9a8d8e32e..dcd90446b 100644
--- a/benchmark/dqn.sh
+++ b/benchmark/dqn.sh
@@ -1,29 +1,29 @@
 poetry install
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/dqn.py --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/dqn.py --no_cuda --track --capture_video" \
     --num-seeds 3 \
     --workers 9
 
 poetry install -E atari
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/dqn_atari.py --track --capture-video" \
+    --command "poetry run python cleanrl/dqn_atari.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
 
 poetry install -E jax
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/dqn_jax.py --track --capture-video" \
+    --command "poetry run python cleanrl/dqn_jax.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
 
 poetry install -E "atari jax"
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/dqn_atari_jax.py --track --capture-video" \
+    --command "poetry run python cleanrl/dqn_atari_jax.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
diff --git a/benchmark/ppg.sh b/benchmark/ppg.sh
index 20fde68cf..ee5580f33 100644
--- a/benchmark/ppg.sh
+++ b/benchmark/ppg.sh
@@ -3,6 +3,6 @@
 poetry install -E procgen
 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids starpilot bossfight bigfish \
-    --command "poetry run python cleanrl/ppg_procgen.py --track --capture-video" \
+    --command "poetry run python cleanrl/ppg_procgen.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
diff --git a/benchmark/ppo.sh b/benchmark/ppo.sh
index 7fefcd933..70f374785 100644
--- a/benchmark/ppo.sh
+++ b/benchmark/ppo.sh
@@ -3,118 +3,143 @@
 poetry install
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/ppo.py --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/ppo.py --no_cuda --track --capture_video" \
     --num-seeds 3 \
-    --workers 9
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 
 poetry install -E atari
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/ppo_atari.py --track --capture-video" \
+    --command "poetry run python cleanrl/ppo_atari.py --track --capture_video" \
     --num-seeds 3 \
-    --workers 3
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
+
+poetry install -E mujoco
+OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --command "poetry run python cleanrl/ppo_continuous_action.py --no_cuda --track --capture_video" \
+    --num-seeds 3 \
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
+
+poetry install -E "mujoco dm_control"
+OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
+    --env-ids dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 dm_control/cartpole-balance-v0 dm_control/cartpole-balance_sparse-v0 dm_control/cartpole-swingup-v0 dm_control/cartpole-swingup_sparse-v0 dm_control/cartpole-two_poles-v0 dm_control/cartpole-three_poles-v0 dm_control/cheetah-run-v0 dm_control/dog-stand-v0 dm_control/dog-walk-v0 dm_control/dog-trot-v0 dm_control/dog-run-v0 dm_control/dog-fetch-v0 dm_control/finger-spin-v0 dm_control/finger-turn_easy-v0 dm_control/finger-turn_hard-v0 dm_control/fish-upright-v0 dm_control/fish-swim-v0 dm_control/hopper-stand-v0 dm_control/hopper-hop-v0 dm_control/humanoid-stand-v0 dm_control/humanoid-walk-v0 dm_control/humanoid-run-v0 dm_control/humanoid-run_pure_state-v0 dm_control/humanoid_CMU-stand-v0 dm_control/humanoid_CMU-run-v0 dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 dm_control/manipulator-bring_ball-v0 dm_control/manipulator-bring_peg-v0 dm_control/manipulator-insert_ball-v0 dm_control/manipulator-insert_peg-v0 dm_control/pendulum-swingup-v0 dm_control/point_mass-easy-v0 dm_control/point_mass-hard-v0 dm_control/quadruped-walk-v0 dm_control/quadruped-run-v0 dm_control/quadruped-escape-v0 dm_control/quadruped-fetch-v0 dm_control/reacher-easy-v0 dm_control/reacher-hard-v0 dm_control/stacker-stack_2-v0 dm_control/stacker-stack_4-v0 dm_control/swimmer-swimmer6-v0 dm_control/swimmer-swimmer15-v0 dm_control/walker-stand-v0 dm_control/walker-walk-v0 dm_control/walker-run-v0 \
+    --command "poetry run python cleanrl/ppo_continuous_action.py --exp-name ppo_continuous_action_8M  --total-timesteps 8000000 --no_cuda --track" \
+    --num-seeds 10 \
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 
 poetry install -E atari
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/ppo_atari_lstm.py --track --capture-video" \
+    --command "poetry run python cleanrl/ppo_atari_lstm.py --track --capture_video" \
     --num-seeds 3 \
-    --workers 3
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 
 poetry install -E envpool
-xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
+poetry run python -m cleanrl_utils.benchmark \
     --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \
-    --command "poetry run python cleanrl/ppo_atari_envpool.py --track --capture-video" \
+    --command "poetry run python cleanrl/ppo_atari_envpool.py --track --capture_video" \
     --num-seeds 3 \
-    --workers 1
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 
-poetry install -E "mujoco_py mujoco"
-poetry run python -c "import mujoco_py"
-OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
-    --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 InvertedPendulum-v2 Humanoid-v2 Pusher-v2 \
-    --command "poetry run python cleanrl/ppo_continuous_action.py --cuda False --track --capture-video" \
+poetry install -E "envpool jax"
+poetry run python -m cleanrl_utils.benchmark \
+    --env-ids Alien-v5 Amidar-v5 Assault-v5 Asterix-v5 Asteroids-v5 Atlantis-v5 BankHeist-v5 BattleZone-v5 BeamRider-v5 Berzerk-v5 Bowling-v5 Boxing-v5 Breakout-v5 Centipede-v5 ChopperCommand-v5 CrazyClimber-v5 Defender-v5 DemonAttack-v5 DoubleDunk-v5 Enduro-v5 FishingDerby-v5 Freeway-v5 Frostbite-v5 Gopher-v5 Gravitar-v5 Hero-v5 IceHockey-v5 Jamesbond-v5 Kangaroo-v5 Krull-v5 KungFuMaster-v5 MontezumaRevenge-v5 MsPacman-v5 NameThisGame-v5 Phoenix-v5 Pitfall-v5 Pong-v5 PrivateEye-v5 Qbert-v5 Riverraid-v5 RoadRunner-v5 Robotank-v5 Seaquest-v5 Skiing-v5 Solaris-v5 SpaceInvaders-v5 StarGunner-v5 Surround-v5 Tennis-v5 TimePilot-v5 Tutankham-v5 UpNDown-v5 Venture-v5 VideoPinball-v5 WizardOfWor-v5 YarsRevenge-v5 Zaxxon-v5 \
+    --command "poetry run python ppo_atari_envpool_xla_jax.py --track --wandb-project-name envpool-atari --wandb-entity openrlbenchmark" \
+    --num-seeds 3 \
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
+
+poetry install -E "envpool jax"
+python -m cleanrl_utils.benchmark \
+    --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \
+    --command "poetry run python cleanrl/ppo_atari_envpool_xla_jax_scan.py --track --capture_video" \
     --num-seeds 3 \
-    --workers 6
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 
 poetry install -E procgen
-xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
+poetry run python -m cleanrl_utils.benchmark \
     --env-ids starpilot bossfight bigfish \
-    --command "poetry run python cleanrl/ppo_procgen.py --track --capture-video" \
+    --command "poetry run python cleanrl/ppo_procgen.py --track --capture_video" \
     --num-seeds 3 \
-    --workers 1
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 
 poetry install -E atari
 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run torchrun --standalone --nnodes=1 --nproc_per_node=2 cleanrl/ppo_atari_multigpu.py --track --capture-video" \
+    --command "poetry run torchrun --standalone --nnodes=1 --nproc_per_node=2 cleanrl/ppo_atari_multigpu.py --local-num-envs 4 --track --capture_video" \
     --num-seeds 3 \
-    --workers 1
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 
-poetry install  "pettingzoo atari"
+poetry install -E "pettingzoo atari"
 poetry run AutoROM --accept-license
 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids pong_v3 surround_v2 tennis_v3  \
-    --command "poetry run python cleanrl/ppo_pettingzoo_ma_atari.py --track --capture-video" \
+    --command "poetry run python cleanrl/ppo_pettingzoo_ma_atari.py --track --capture_video" \
     --num-seeds 3 \
-    --workers 3
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 
 # IMPORTANT: see specific Isaac Gym installation at
 # https://docs.cleanrl.dev/rl-algorithms/ppo/#usage_8
 poetry install --with isaacgym
 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids Cartpole Ant Humanoid BallBalance Anymal  \
-    --command "poetry run python cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py --track --capture-video" \
+    --command "poetry run python cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py --track --capture_video" \
     --num-seeds 3 \
-    --workers 1
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids AllegroHand ShadowHand \
-    --command "poetry run python cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py --track --capture-video --num-envs 8192 --num-steps 8 --update-epochs 5 --num-minibatches 4 --reward-scaler 0.01 --total-timesteps 600000000 --record-video-step-frequency 3660" \
+    --command "poetry run python cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py --track --capture_video --num-envs 8192 --num-steps 8 --update-epochs 5 --num-minibatches 4 --reward-scaler 0.01 --total-timesteps 600000000 --record-video-step-frequency 3660" \
     --num-seeds 3 \
-    --workers 1
-
-
-poetry install  "envpool jax"
-poetry run python -m cleanrl_utils.benchmark \
-    --env-ids Alien-v5 Amidar-v5 Assault-v5 Asterix-v5 Asteroids-v5 Atlantis-v5 BankHeist-v5 BattleZone-v5 BeamRider-v5 Berzerk-v5 Bowling-v5 Boxing-v5 Breakout-v5 Centipede-v5 ChopperCommand-v5 CrazyClimber-v5 Defender-v5 DemonAttack-v5 \
-    --command "poetry run python ppo_atari_envpool_xla_jax.py --track --wandb-project-name envpool-atari --wandb-entity openrlbenchmark" \
-    --num-seeds 3 \
-    --workers 1
-poetry run python -m cleanrl_utils.benchmark \
-    --env-ids DoubleDunk-v5 Enduro-v5 FishingDerby-v5 Freeway-v5 Frostbite-v5 Gopher-v5 Gravitar-v5 Hero-v5 IceHockey-v5 Jamesbond-v5 Kangaroo-v5 Krull-v5 KungFuMaster-v5 MontezumaRevenge-v5 MsPacman-v5 NameThisGame-v5 Phoenix-v5 Pitfall-v5 Pong-v5 \
-    --command "poetry run python ppo_atari_envpool_xla_jax.py --track --wandb-project-name envpool-atari --wandb-entity openrlbenchmark" \
-    --num-seeds 3 \
-    --workers 1
-poetry run python -m cleanrl_utils.benchmark \
-    --env-ids PrivateEye-v5 Qbert-v5 Riverraid-v5 RoadRunner-v5 Robotank-v5 Seaquest-v5 Skiing-v5 Solaris-v5 SpaceInvaders-v5 StarGunner-v5 Surround-v5 Tennis-v5 TimePilot-v5 Tutankham-v5 UpNDown-v5 Venture-v5 VideoPinball-v5 WizardOfWor-v5 YarsRevenge-v5 Zaxxon-v5 \
-    --command "poetry run python ppo_atari_envpool_xla_jax.py --track --wandb-project-name envpool-atari --wandb-entity openrlbenchmark" \
-    --num-seeds 3 \
-    --workers 1
-
-# gymnasium support
-poetry install -E mujoco
-OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
-    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
-    --command "poetry run python cleanrl/gymnasium_support/ppo_continuous_action.py --cuda False --track" \
-    --num-seeds 3 \
-    --workers 1
-
-poetry install  "dm_control mujoco"
-OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
-    --env-ids dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 dm_control/cartpole-balance-v0 dm_control/cartpole-balance_sparse-v0 dm_control/cartpole-swingup-v0 dm_control/cartpole-swingup_sparse-v0 dm_control/cartpole-two_poles-v0 dm_control/cartpole-three_poles-v0 dm_control/cheetah-run-v0 dm_control/dog-stand-v0 dm_control/dog-walk-v0 dm_control/dog-trot-v0 dm_control/dog-run-v0 dm_control/dog-fetch-v0 dm_control/finger-spin-v0 dm_control/finger-turn_easy-v0 dm_control/finger-turn_hard-v0 dm_control/fish-upright-v0 dm_control/fish-swim-v0 dm_control/hopper-stand-v0 dm_control/hopper-hop-v0 dm_control/humanoid-stand-v0 dm_control/humanoid-walk-v0 dm_control/humanoid-run-v0 dm_control/humanoid-run_pure_state-v0 dm_control/humanoid_CMU-stand-v0 dm_control/humanoid_CMU-run-v0 dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 dm_control/manipulator-bring_ball-v0 dm_control/manipulator-bring_peg-v0 dm_control/manipulator-insert_ball-v0 dm_control/manipulator-insert_peg-v0 dm_control/pendulum-swingup-v0 dm_control/point_mass-easy-v0 dm_control/point_mass-hard-v0 dm_control/quadruped-walk-v0 dm_control/quadruped-run-v0 dm_control/quadruped-escape-v0 dm_control/quadruped-fetch-v0 dm_control/reacher-easy-v0 dm_control/reacher-hard-v0 dm_control/stacker-stack_2-v0 dm_control/stacker-stack_4-v0 dm_control/swimmer-swimmer6-v0 dm_control/swimmer-swimmer15-v0 dm_control/walker-stand-v0 dm_control/walker-walk-v0 dm_control/walker-run-v0 \
-    --command "poetry run python cleanrl/gymnasium_support/ppo_continuous_action.py --cuda False --track" \
-    --num-seeds 3 \
-    --workers 9
- 
-poetry install  "envpool jax"
-python -m cleanrl_utils.benchmark \
-    --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \
-    --command "poetry run python cleanrl/ppo_atari_envpool_xla_jax_scan.py --track --capture-video" \
-    --num-seeds 3 \
-    --workers 1
-
-poetry install  "mujoco dm_control"
-OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
-    --env-ids dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 dm_control/cartpole-balance-v0 dm_control/cartpole-balance_sparse-v0 dm_control/cartpole-swingup-v0 dm_control/cartpole-swingup_sparse-v0 dm_control/cartpole-two_poles-v0 dm_control/cartpole-three_poles-v0 dm_control/cheetah-run-v0 dm_control/dog-stand-v0 dm_control/dog-walk-v0 dm_control/dog-trot-v0 dm_control/dog-run-v0 dm_control/dog-fetch-v0 dm_control/finger-spin-v0 dm_control/finger-turn_easy-v0 dm_control/finger-turn_hard-v0 dm_control/fish-upright-v0 dm_control/fish-swim-v0 dm_control/hopper-stand-v0 dm_control/hopper-hop-v0 dm_control/humanoid-stand-v0 dm_control/humanoid-walk-v0 dm_control/humanoid-run-v0 dm_control/humanoid-run_pure_state-v0 dm_control/humanoid_CMU-stand-v0 dm_control/humanoid_CMU-run-v0 dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 dm_control/manipulator-bring_ball-v0 dm_control/manipulator-bring_peg-v0 dm_control/manipulator-insert_ball-v0 dm_control/manipulator-insert_peg-v0 dm_control/pendulum-swingup-v0 dm_control/point_mass-easy-v0 dm_control/point_mass-hard-v0 dm_control/quadruped-walk-v0 dm_control/quadruped-run-v0 dm_control/quadruped-escape-v0 dm_control/quadruped-fetch-v0 dm_control/reacher-easy-v0 dm_control/reacher-hard-v0 dm_control/stacker-stack_2-v0 dm_control/stacker-stack_4-v0 dm_control/swimmer-swimmer6-v0 dm_control/swimmer-swimmer15-v0 dm_control/walker-stand-v0 dm_control/walker-walk-v0 dm_control/walker-run-v0 \
-    --command "poetry run python cleanrl/ppo_continuous_action.py --exp-name ppo_continuous_action_8M  --total-timesteps 8000000 --cuda False --track" \
-    --num-seeds 10 \
-    --workers 1
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
diff --git a/benchmark/ppo_plot.sh b/benchmark/ppo_plot.sh
new file mode 100644
index 000000000..95678d986
--- /dev/null
+++ b/benchmark/ppo_plot.sh
@@ -0,0 +1,117 @@
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ppo?tag=pr-424' \
+    --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ppo \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ppo_atari?tag=pr-424' \
+    --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ppo_atari \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ppo_continuous_action?tag=pr-424' \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ppo_continuous_action \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ppo_continuous_action?tag=v1.0.0-13-gcbd83f6' \
+    --env-ids dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 dm_control/cartpole-balance-v0 dm_control/cartpole-balance_sparse-v0 dm_control/cartpole-swingup-v0 dm_control/cartpole-swingup_sparse-v0 dm_control/cartpole-two_poles-v0 dm_control/cartpole-three_poles-v0 dm_control/cheetah-run-v0 dm_control/dog-stand-v0 dm_control/dog-walk-v0 dm_control/dog-trot-v0 dm_control/dog-run-v0 dm_control/dog-fetch-v0 dm_control/finger-spin-v0 dm_control/finger-turn_easy-v0 dm_control/finger-turn_hard-v0 dm_control/fish-upright-v0 dm_control/fish-swim-v0 dm_control/hopper-stand-v0 dm_control/hopper-hop-v0 dm_control/humanoid-stand-v0 dm_control/humanoid-walk-v0 dm_control/humanoid-run-v0 dm_control/humanoid-run_pure_state-v0 dm_control/humanoid_CMU-stand-v0 dm_control/humanoid_CMU-run-v0 dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 dm_control/manipulator-bring_ball-v0 dm_control/manipulator-bring_peg-v0 dm_control/manipulator-insert_ball-v0 dm_control/manipulator-insert_peg-v0 dm_control/pendulum-swingup-v0 dm_control/point_mass-easy-v0 dm_control/point_mass-hard-v0 dm_control/quadruped-walk-v0 dm_control/quadruped-run-v0 dm_control/quadruped-escape-v0 dm_control/quadruped-fetch-v0 dm_control/reacher-easy-v0 dm_control/reacher-hard-v0 dm_control/stacker-stack_2-v0 dm_control/stacker-stack_4-v0 dm_control/swimmer-swimmer6-v0 dm_control/swimmer-swimmer15-v0 dm_control/walker-stand-v0 dm_control/walker-walk-v0 dm_control/walker-run-v0 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ppo_continuous_action_dm_control \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ppo_atari_lstm?tag=pr-424' \
+    --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ppo_atari_lstm \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/avg_episodic_return' \
+        'ppo_atari_envpool?tag=pr-424' \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ppo_atari?tag=pr-424' \
+    --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \
+    --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ppo_atari_envpool \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=envpool-atari&ceik=env_id&cen=exp_name&metric=charts/avg_episodic_return' \
+        'ppo_atari_envpool_xla_jax' \
+    --filters '?we=openrlbenchmark&wpn=baselines&ceik=env&cen=exp_name&metric=charts/episodic_return' \
+        'baselines-ppo2-cnn' \
+    --env-ids Alien-v5 Amidar-v5 Assault-v5 Asterix-v5 Asteroids-v5 Atlantis-v5 BankHeist-v5 BattleZone-v5 BeamRider-v5 Berzerk-v5 Bowling-v5 Boxing-v5 Breakout-v5 Centipede-v5 ChopperCommand-v5 CrazyClimber-v5 Defender-v5 DemonAttack-v5 DoubleDunk-v5 Enduro-v5 FishingDerby-v5 Freeway-v5 Frostbite-v5 Gopher-v5 Gravitar-v5 Hero-v5 IceHockey-v5 Jamesbond-v5 Kangaroo-v5 Krull-v5 KungFuMaster-v5 MontezumaRevenge-v5 MsPacman-v5 NameThisGame-v5 Phoenix-v5 Pitfall-v5 Pong-v5 PrivateEye-v5 Qbert-v5 Riverraid-v5 RoadRunner-v5 Robotank-v5 Seaquest-v5 Skiing-v5 Solaris-v5 SpaceInvaders-v5 StarGunner-v5 Surround-v5 Tennis-v5 TimePilot-v5 Tutankham-v5 UpNDown-v5 Venture-v5 VideoPinball-v5 WizardOfWor-v5 YarsRevenge-v5 Zaxxon-v5 \
+    --env-ids AlienNoFrameskip-v4 AmidarNoFrameskip-v4 AssaultNoFrameskip-v4 AsterixNoFrameskip-v4 AsteroidsNoFrameskip-v4 AtlantisNoFrameskip-v4 BankHeistNoFrameskip-v4 BattleZoneNoFrameskip-v4 BeamRiderNoFrameskip-v4 BerzerkNoFrameskip-v4 BowlingNoFrameskip-v4 BoxingNoFrameskip-v4 BreakoutNoFrameskip-v4 CentipedeNoFrameskip-v4 ChopperCommandNoFrameskip-v4 CrazyClimberNoFrameskip-v4 DefenderNoFrameskip-v4 DemonAttackNoFrameskip-v4 DoubleDunkNoFrameskip-v4 EnduroNoFrameskip-v4 FishingDerbyNoFrameskip-v4 FreewayNoFrameskip-v4 FrostbiteNoFrameskip-v4 GopherNoFrameskip-v4 GravitarNoFrameskip-v4 HeroNoFrameskip-v4 IceHockeyNoFrameskip-v4 JamesbondNoFrameskip-v4 KangarooNoFrameskip-v4 KrullNoFrameskip-v4 KungFuMasterNoFrameskip-v4 MontezumaRevengeNoFrameskip-v4 MsPacmanNoFrameskip-v4 NameThisGameNoFrameskip-v4 PhoenixNoFrameskip-v4 PitfallNoFrameskip-v4 PongNoFrameskip-v4 PrivateEyeNoFrameskip-v4 QbertNoFrameskip-v4 RiverraidNoFrameskip-v4 RoadRunnerNoFrameskip-v4 RobotankNoFrameskip-v4 SeaquestNoFrameskip-v4 SkiingNoFrameskip-v4 SolarisNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 StarGunnerNoFrameskip-v4 SurroundNoFrameskip-v4 TennisNoFrameskip-v4 TimePilotNoFrameskip-v4 TutankhamNoFrameskip-v4 UpNDownNoFrameskip-v4 VentureNoFrameskip-v4 VideoPinballNoFrameskip-v4 WizardOfWorNoFrameskip-v4 YarsRevengeNoFrameskip-v4 ZaxxonNoFrameskip-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 4 \
+    --pc.ncols-legend 2 \
+    --rliable \
+    --rc.score_normalization_method atari \
+    --rc.normalized_score_threshold 8.0 \
+    --rc.sample_efficiency_plots \
+    --rc.sample_efficiency_and_walltime_efficiency_method Median \
+    --rc.performance_profile_plots  \
+    --rc.aggregate_metrics_plots  \
+    --rc.sample_efficiency_num_bootstrap_reps 50000 \
+    --rc.performance_profile_num_bootstrap_reps 50000 \
+    --rc.interval_estimates_num_bootstrap_reps 50000 \
+    --output-filename benchmark/cleanrl/ppo_atari_envpool_xla_jax \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/avg_episodic_return' \
+        'ppo_atari_envpool_xla_jax?tag=pr-424' \
+        'ppo_atari_envpool_xla_jax_scan?tag=pr-424' \
+    --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ppo_atari_envpool_xla_jax_scan \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ppo_procgen?tag=pr-424' \
+    --env-ids starpilot bossfight bigfish \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ppo_procgen \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'ppo_atari_multigpu?tag=pr-424' \
+        'ppo_atari?tag=pr-424' \
+    --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/ppo_atari_multigpu \
+    --scan-history
diff --git a/benchmark/qdagger.sh b/benchmark/qdagger.sh
index 2491716a0..dc7851fb3 100644
--- a/benchmark/qdagger.sh
+++ b/benchmark/qdagger.sh
@@ -1,15 +1,15 @@
 poetry install -E atari
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/qdagger_dqn_atari_impalacnn.py --track --capture-video" \
+    --command "poetry run python cleanrl/qdagger_dqn_atari_impalacnn.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
 
 
 poetry install -E "atari jax"
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --track --capture-video" \
+    --command "poetry run python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
diff --git a/benchmark/rpo.sh b/benchmark/rpo.sh
index cbb551bac..d389197fa 100644
--- a/benchmark/rpo.sh
+++ b/benchmark/rpo.sh
@@ -1,42 +1,42 @@
 poetry install  "mujoco dm_control"
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 dm_control/cartpole-balance-v0 dm_control/cartpole-balance_sparse-v0 dm_control/cartpole-swingup-v0 dm_control/cartpole-swingup_sparse-v0 dm_control/cartpole-two_poles-v0 dm_control/cartpole-three_poles-v0 dm_control/cheetah-run-v0 dm_control/dog-stand-v0 dm_control/dog-walk-v0 dm_control/dog-trot-v0 dm_control/dog-run-v0 dm_control/dog-fetch-v0 dm_control/finger-spin-v0 dm_control/finger-turn_easy-v0 dm_control/finger-turn_hard-v0 dm_control/fish-upright-v0 dm_control/fish-swim-v0 dm_control/hopper-stand-v0 dm_control/hopper-hop-v0 dm_control/humanoid-stand-v0 dm_control/humanoid-walk-v0 dm_control/humanoid-run-v0 dm_control/humanoid-run_pure_state-v0 dm_control/humanoid_CMU-stand-v0 dm_control/humanoid_CMU-run-v0 dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 dm_control/manipulator-bring_ball-v0 dm_control/manipulator-bring_peg-v0 dm_control/manipulator-insert_ball-v0 dm_control/manipulator-insert_peg-v0 dm_control/pendulum-swingup-v0 dm_control/point_mass-easy-v0 dm_control/point_mass-hard-v0 dm_control/quadruped-walk-v0 dm_control/quadruped-run-v0 dm_control/quadruped-escape-v0 dm_control/quadruped-fetch-v0 dm_control/reacher-easy-v0 dm_control/reacher-hard-v0 dm_control/stacker-stack_2-v0 dm_control/stacker-stack_4-v0 dm_control/swimmer-swimmer6-v0 dm_control/swimmer-swimmer15-v0 dm_control/walker-stand-v0 dm_control/walker-walk-v0 dm_control/walker-run-v0 \
-    --command "poetry run python cleanrl/rpo_continuous_action.py --cuda False --track" \
+    --command "poetry run python cleanrl/rpo_continuous_action.py --no_cuda --track" \
     --num-seeds 10 \
     --workers 1
 
 poetry run pip install box2d-py==2.3.5
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids Pendulum-v1 BipedalWalker-v3 \
-    --command "poetry run python cleanrl/rpo_continuous_action.py --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/rpo_continuous_action.py --no_cuda --track --capture_video" \
     --num-seeds 1 \
     --workers 1
 
 poetry install -E mujoco
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids HumanoidStandup-v4 Humanoid-v4 InvertedPendulum-v4 Walker2d-v4 \
-    --command "poetry run python cleanrl/rpo_continuous_action.py --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/rpo_continuous_action.py --no_cuda --track --capture_video" \
     --num-seeds 10 \
     --workers 1
 
 poetry install -E mujoco
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids HumanoidStandup-v2 Humanoid-v2 InvertedPendulum-v2 Walker2d-v2 \
-    --command "poetry run python cleanrl/rpo_continuous_action.py --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/rpo_continuous_action.py --no_cuda --track --capture_video" \
     --num-seeds 10 \
     --workers 1
 
 poetry install -E mujoco
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids Ant-v4 InvertedDoublePendulum-v4 Reacher-v4 Pusher-v4 Hopper-v4 HalfCheetah-v4 Swimmer-v4 \
-    --command "poetry run python cleanrl/rpo_continuous_action.py --rpo-alpha 0.01 --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/rpo_continuous_action.py --rpo-alpha 0.01 --no_cuda --track --capture_video" \
     --num-seeds 10 \
     --workers 1
 
 poetry install -E mujoco
 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids Ant-v2 InvertedDoublePendulum-v2 Reacher-v2 Pusher-v2 Hopper-v2 HalfCheetah-v2 Swimmer-v2 \
-    --command "poetry run python cleanrl/rpo_continuous_action.py --rpo-alpha 0.01 --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/rpo_continuous_action.py --rpo-alpha 0.01 --no_cuda --track --capture_video" \
     --num-seeds 10 \
     --workers 1
 
diff --git a/benchmark/sac.sh b/benchmark/sac.sh
index e94e11192..2c948bc93 100644
--- a/benchmark/sac.sh
+++ b/benchmark/sac.sh
@@ -1,7 +1,10 @@
-poetry install -E mujoco_py
-poetry run python -c "import mujoco_py"
-OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
-    --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \
-    --command "poetry run python cleanrl/sac_continuous_action.py --track --capture-video" \
+poetry install -E mujoco
+poetry run python -m cleanrl_utils.benchmark \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --command "poetry run python cleanrl/sac_continuous_action.py --track" \
     --num-seeds 3 \
-    --workers 3
\ No newline at end of file
+    --workers 18 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
diff --git a/benchmark/sac_atari.sh b/benchmark/sac_atari.sh
index 13f9e3c9d..a8e8a78ed 100755
--- a/benchmark/sac_atari.sh
+++ b/benchmark/sac_atari.sh
@@ -1,6 +1,6 @@
 poetry install -E atari
 OMP_NUM_THREADS=1 python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BreakoutNoFrameskip-v4 BeamRiderNoFrameskip-v4 \
-    --command "poetry run python cleanrl/sac_atari.py --cuda True --track" \
+    --command "poetry run python cleanrl/sac_atari.py --track" \
     --num-seeds 3 \
     --workers 2
diff --git a/benchmark/sac_plot.sh b/benchmark/sac_plot.sh
new file mode 100644
index 000000000..7d82406fa
--- /dev/null
+++ b/benchmark/sac_plot.sh
@@ -0,0 +1,9 @@
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'sac_continuous_action?tag=pr-424' \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/sac \
+    --scan-history
diff --git a/benchmark/td3.sh b/benchmark/td3.sh
index ea94c2c32..e68004c73 100644
--- a/benchmark/td3.sh
+++ b/benchmark/td3.sh
@@ -1,16 +1,22 @@
-poetry install -E mujoco_py
-python -c "import mujoco_py"
-OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
-    --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 InvertedPendulum-v2 Humanoid-v2 Pusher-v2 \
-    --command "poetry run python cleanrl/td3_continuous_action.py --track --capture-video" \
+poetry install -E "mujoco"
+python -m cleanrl_utils.benchmark \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --command "poetry run python cleanrl/td3_continuous_action.py --track" \
     --num-seeds 3 \
-    --workers 1
+    --workers 18 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
 
-poetry install -E "mujoco_py jax"
-poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
-poetry run python -c "import mujoco_py"
-xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
-    --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \
-    --command "poetry run python cleanrl/td3_continuous_action_jax.py --track --capture-video" \
+poetry install -E "mujoco jax"
+poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+poetry run python -m cleanrl_utils.benchmark \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --command "poetry run python cleanrl/td3_continuous_action_jax.py --track" \
     --num-seeds 3 \
-    --workers 1
+    --workers 18 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
diff --git a/benchmark/td3_plot.sh b/benchmark/td3_plot.sh
new file mode 100644
index 000000000..ad37305cc
--- /dev/null
+++ b/benchmark/td3_plot.sh
@@ -0,0 +1,21 @@
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'td3_continuous_action?tag=pr-424' \
+        'td3_continuous_action_jax?tag=pr-424' \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/td3 \
+    --scan-history
+
+python -m openrlbenchmark.rlops \
+    --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \
+        'sac_continuous_action?tag=pr-424' \
+    --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \
+    --no-check-empty-runs \
+    --pc.ncols 3 \
+    --pc.ncols-legend 2 \
+    --output-filename benchmark/cleanrl/sac \
+    --scan-history
diff --git a/benchmark/zoo.sh b/benchmark/zoo.sh
index f7646c5d5..a5ab38e14 100644
--- a/benchmark/zoo.sh
+++ b/benchmark/zoo.sh
@@ -3,25 +3,25 @@ poetry run python cleanrl/dqn_atari_jax.py --env-id SeaquestNoFrameskip-v4  --sa
 
 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/dqn.py --cuda False --track --capture-video --save-model --upload-model --hf-entity cleanrl" \
+    --command "poetry run python cleanrl/dqn.py --no_cuda --track --capture_video --save-model --upload-model --hf-entity cleanrl" \
     --num-seeds 1 \
     --workers 1
 
 CUDA_VISIBLE_DEVICES="-1" xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/dqn_jax.py --track --capture-video --save-model --upload-model --hf-entity cleanrl" \
+    --command "poetry run python cleanrl/dqn_jax.py --track --capture_video --save-model --upload-model --hf-entity cleanrl" \
     --num-seeds 1 \
     --workers 1
 
 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/dqn_atari_jax.py --track --capture-video --save-model --upload-model --hf-entity cleanrl" \
+    --command "poetry run python cleanrl/dqn_atari_jax.py --track --capture_video --save-model --upload-model --hf-entity cleanrl" \
     --num-seeds 1 \
     --workers 1
 
 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
-    --command "poetry run python cleanrl/dqn_atari.py --track --capture-video --save-model --upload-model --hf-entity cleanrl" \
+    --command "poetry run python cleanrl/dqn_atari.py --track --capture_video --save-model --upload-model --hf-entity cleanrl" \
     --num-seeds 1 \
     --workers 1
 
diff --git a/cleanrl/c51.py b/cleanrl/c51.py
index 3959466f1..9f99a7a31 100755
--- a/cleanrl/c51.py
+++ b/cleanrl/c51.py
@@ -1,83 +1,77 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/c51/#c51py
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gymnasium as gym
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from stable_baselines3.common.buffers import ReplayBuffer
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="CartPole-v1",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=500000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--n-atoms", type=int, default=101,
-        help="the number of atoms")
-    parser.add_argument("--v-min", type=float, default=-100,
-        help="the return lower bound")
-    parser.add_argument("--v-max", type=float, default=100,
-        help="the return upper bound")
-    parser.add_argument("--buffer-size", type=int, default=10000,
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--target-network-frequency", type=int, default=500,
-        help="the timesteps it takes to update the target network")
-    parser.add_argument("--batch-size", type=int, default=128,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--start-e", type=float, default=1,
-        help="the starting epsilon for exploration")
-    parser.add_argument("--end-e", type=float, default=0.05,
-        help="the ending epsilon for exploration")
-    parser.add_argument("--exploration-fraction", type=float, default=0.5,
-        help="the fraction of `total-timesteps` it takes from start-e to go end-e")
-    parser.add_argument("--learning-starts", type=int, default=10000,
-        help="timestep to start learning")
-    parser.add_argument("--train-frequency", type=int, default=10,
-        help="the frequency of training")
-    args = parser.parse_args()
-    # fmt: on
-    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
-
-    return args
+    env_id: str = "CartPole-v1"
+    """the id of the environment"""
+    total_timesteps: int = 500000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    n_atoms: int = 101
+    """the number of atoms"""
+    v_min: float = -100
+    """the return lower bound"""
+    v_max: float = 100
+    """the return upper bound"""
+    buffer_size: int = 10000
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    target_network_frequency: int = 500
+    """the timesteps it takes to update the target network"""
+    batch_size: int = 128
+    """the batch size of sample from the reply memory"""
+    start_e: float = 1
+    """the starting epsilon for exploration"""
+    end_e: float = 0.05
+    """the ending epsilon for exploration"""
+    exploration_fraction: float = 0.5
+    """the fraction of `total-timesteps` it takes from start-e to go end-e"""
+    learning_starts: int = 10000
+    """timestep to start learning"""
+    train_frequency: int = 10
+    """the frequency of training"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -136,7 +130,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
 poetry run pip install "stable_baselines3==2.0.0a1"
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
+    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -201,14 +196,10 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         if "final_info" in infos:
             for info in infos["final_info"]:
-                # Skip the envs that are not done
-                if "episode" not in info:
-                    continue
-                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
-                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
-                writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
-                writer.add_scalar("charts/epsilon", epsilon, global_step)
-                break
+                if info and "episode" in info:
+                    print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                    writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                    writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation`
         real_next_obs = next_obs.copy()
diff --git a/cleanrl/c51_atari.py b/cleanrl/c51_atari.py
index 8e47bacc5..97b790759 100755
--- a/cleanrl/c51_atari.py
+++ b/cleanrl/c51_atari.py
@@ -1,15 +1,15 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/c51/#c51_ataripy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gymnasium as gym
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from stable_baselines3.common.atari_wrappers import (
     ClipRewardEnv,
     EpisodicLifeEnv,
@@ -21,70 +21,64 @@
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--n-atoms", type=int, default=51,
-        help="the number of atoms")
-    parser.add_argument("--v-min", type=float, default=-10,
-        help="the return lower bound")
-    parser.add_argument("--v-max", type=float, default=10,
-        help="the return upper bound")
-    parser.add_argument("--buffer-size", type=int, default=1000000,
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--target-network-frequency", type=int, default=10000,
-        help="the timesteps it takes to update the target network")
-    parser.add_argument("--batch-size", type=int, default=32,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--start-e", type=float, default=1,
-        help="the starting epsilon for exploration")
-    parser.add_argument("--end-e", type=float, default=0.01,
-        help="the ending epsilon for exploration")
-    parser.add_argument("--exploration-fraction", type=float, default=0.10,
-        help="the fraction of `total-timesteps` it takes from start-e to go end-e")
-    parser.add_argument("--learning-starts", type=int, default=80000,
-        help="timestep to start learning")
-    parser.add_argument("--train-frequency", type=int, default=4,
-        help="the frequency of training")
-    args = parser.parse_args()
-    # fmt: on
-    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
-
-    return args
+    env_id: str = "BreakoutNoFrameskip-v4"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    n_atoms: int = 51
+    """the number of atoms"""
+    v_min: float = -10
+    """the return lower bound"""
+    v_max: float = 10
+    """the return upper bound"""
+    buffer_size: int = 1000000
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    target_network_frequency: int = 10000
+    """the timesteps it takes to update the target network"""
+    batch_size: int = 32
+    """the batch size of sample from the reply memory"""
+    start_e: float = 1
+    """the starting epsilon for exploration"""
+    end_e: float = 0.01
+    """the ending epsilon for exploration"""
+    exploration_fraction: float = 0.10
+    """the fraction of `total-timesteps` it takes from start-e to go end-e"""
+    learning_starts: int = 80000
+    """timestep to start learning"""
+    train_frequency: int = 4
+    """the frequency of training"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -158,7 +152,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
 poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1"  "ale-py==0.8.1" 
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
+    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -224,14 +219,10 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         if "final_info" in infos:
             for info in infos["final_info"]:
-                # Skip the envs that are not done
-                if "episode" not in info:
-                    continue
-                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
-                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
-                writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
-                writer.add_scalar("charts/epsilon", epsilon, global_step)
-                break
+                if info and "episode" in info:
+                    print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                    writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                    writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation`
         real_next_obs = next_obs.copy()
diff --git a/cleanrl/c51_atari_jax.py b/cleanrl/c51_atari_jax.py
index 93c436ec5..8cd46e855 100644
--- a/cleanrl/c51_atari_jax.py
+++ b/cleanrl/c51_atari_jax.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/c51/#c51_atari_jaxpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 os.environ[
     "XLA_PYTHON_CLIENT_MEM_FRACTION"
@@ -16,6 +15,7 @@
 import jax.numpy as jnp
 import numpy as np
 import optax
+import tyro
 from flax.training.train_state import TrainState
 from stable_baselines3.common.atari_wrappers import (
     ClipRewardEnv,
@@ -28,66 +28,60 @@
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--n-atoms", type=int, default=51,
-        help="the number of atoms")
-    parser.add_argument("--v-min", type=float, default=-10,
-        help="the return lower bound")
-    parser.add_argument("--v-max", type=float, default=10,
-        help="the return upper bound")
-    parser.add_argument("--buffer-size", type=int, default=1000000,
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--target-network-frequency", type=int, default=10000,
-        help="the timesteps it takes to update the target network")
-    parser.add_argument("--batch-size", type=int, default=32,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--start-e", type=float, default=1,
-        help="the starting epsilon for exploration")
-    parser.add_argument("--end-e", type=float, default=0.01,
-        help="the ending epsilon for exploration")
-    parser.add_argument("--exploration-fraction", type=float, default=0.1,
-        help="the fraction of `total-timesteps` it takes from start-e to go end-e")
-    parser.add_argument("--learning-starts", type=int, default=80000,
-        help="timestep to start learning")
-    parser.add_argument("--train-frequency", type=int, default=4,
-        help="the frequency of training")
-    args = parser.parse_args()
-    # fmt: on
-    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
-
-    return args
+    env_id: str = "BreakoutNoFrameskip-v4"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    n_atoms: int = 51
+    """the number of atoms"""
+    v_min: float = -10
+    """the return lower bound"""
+    v_max: float = 10
+    """the return upper bound"""
+    buffer_size: int = 1000000
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    target_network_frequency: int = 10000
+    """the timesteps it takes to update the target network"""
+    batch_size: int = 32
+    """the batch size of sample from the reply memory"""
+    start_e: float = 1
+    """the starting epsilon for exploration"""
+    end_e: float = 0.01
+    """the ending epsilon for exploration"""
+    exploration_fraction: float = 0.10
+    """the fraction of `total-timesteps` it takes from start-e to go end-e"""
+    learning_starts: int = 80000
+    """timestep to start learning"""
+    train_frequency: int = 4
+    """the frequency of training"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -159,7 +153,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
 poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1"  "ale-py==0.8.1" 
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
+    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -284,14 +279,10 @@ def get_action(q_state, obs):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         if "final_info" in infos:
             for info in infos["final_info"]:
-                # Skip the envs that are not done
-                if "episode" not in info:
-                    continue
-                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
-                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
-                writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
-                writer.add_scalar("charts/epsilon", epsilon, global_step)
-                break
+                if info and "episode" in info:
+                    print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                    writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                    writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation`
         real_next_obs = next_obs.copy()
diff --git a/cleanrl/c51_jax.py b/cleanrl/c51_jax.py
index 4b65f3595..7ad810cdb 100644
--- a/cleanrl/c51_jax.py
+++ b/cleanrl/c51_jax.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/c51/#c51_jaxpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import flax
 import flax.linen as nn
@@ -12,71 +11,66 @@
 import jax.numpy as jnp
 import numpy as np
 import optax
+import tyro
 from flax.training.train_state import TrainState
 from stable_baselines3.common.buffers import ReplayBuffer
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="CartPole-v1",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=500000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--n-atoms", type=int, default=101,
-        help="the number of atoms")
-    parser.add_argument("--v-min", type=float, default=-100,
-        help="the return lower bound")
-    parser.add_argument("--v-max", type=float, default=100,
-        help="the return upper bound")
-    parser.add_argument("--buffer-size", type=int, default=10000,
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--target-network-frequency", type=int, default=500,
-        help="the timesteps it takes to update the target network")
-    parser.add_argument("--batch-size", type=int, default=128,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--start-e", type=float, default=1,
-        help="the starting epsilon for exploration")
-    parser.add_argument("--end-e", type=float, default=0.05,
-        help="the ending epsilon for exploration")
-    parser.add_argument("--exploration-fraction", type=float, default=0.5,
-        help="the fraction of `total-timesteps` it takes from start-e to go end-e")
-    parser.add_argument("--learning-starts", type=int, default=10000,
-        help="timestep to start learning")
-    parser.add_argument("--train-frequency", type=int, default=10,
-        help="the frequency of training")
-    args = parser.parse_args()
-    # fmt: on
-    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
-
-    return args
+    env_id: str = "CartPole-v1"
+    """the id of the environment"""
+    total_timesteps: int = 500000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    n_atoms: int = 101
+    """the number of atoms"""
+    v_min: float = -100
+    """the return lower bound"""
+    v_max: float = 100
+    """the return upper bound"""
+    buffer_size: int = 10000
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    target_network_frequency: int = 500
+    """the timesteps it takes to update the target network"""
+    batch_size: int = 128
+    """the batch size of sample from the reply memory"""
+    start_e: float = 1
+    """the starting epsilon for exploration"""
+    end_e: float = 0.05
+    """the ending epsilon for exploration"""
+    exploration_fraction: float = 0.5
+    """the fraction of `total-timesteps` it takes from start-e to go end-e"""
+    learning_starts: int = 10000
+    """timestep to start learning"""
+    train_frequency: int = 10
+    """the frequency of training"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -131,7 +125,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
 poetry run pip install "stable_baselines3==2.0.0a1"
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
+    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -247,14 +242,10 @@ def loss(q_params, observations, actions, target_pmfs):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         if "final_info" in infos:
             for info in infos["final_info"]:
-                # Skip the envs that are not done
-                if "episode" not in info:
-                    continue
-                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
-                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
-                writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
-                writer.add_scalar("charts/epsilon", epsilon, global_step)
-                break
+                if info and "episode" in info:
+                    print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                    writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                    writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation`
         real_next_obs = next_obs.copy()
diff --git a/cleanrl/ddpg_continuous_action.py b/cleanrl/ddpg_continuous_action.py
index d42d3bc5a..1aa8b9972 100644
--- a/cleanrl/ddpg_continuous_action.py
+++ b/cleanrl/ddpg_continuous_action.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ddpg/#ddpg_continuous_actionpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gymnasium as gym
 import numpy as np
@@ -11,62 +10,59 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+import tyro
 from stable_baselines3.common.buffers import ReplayBuffer
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="Hopper-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=1000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=3e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--buffer-size", type=int, default=int(1e6),
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=0.005,
-        help="target smoothing coefficient (default: 0.005)")
-    parser.add_argument("--batch-size", type=int, default=256,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--exploration-noise", type=float, default=0.1,
-        help="the scale of exploration noise")
-    parser.add_argument("--learning-starts", type=int, default=25e3,
-        help="timestep to start learning")
-    parser.add_argument("--policy-frequency", type=int, default=2,
-        help="the frequency of training policy (delayed)")
-    parser.add_argument("--noise-clip", type=float, default=0.5,
-        help="noise clip parameter of the Target Policy Smoothing Regularization")
-    args = parser.parse_args()
-    # fmt: on
-    return args
+    env_id: str = "Hopper-v4"
+    """the environment id of the Atari game"""
+    total_timesteps: int = 1000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 3e-4
+    """the learning rate of the optimizer"""
+    buffer_size: int = int(1e6)
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 0.005
+    """target smoothing coefficient (default: 0.005)"""
+    batch_size: int = 256
+    """the batch size of sample from the reply memory"""
+    exploration_noise: float = 0.1
+    """the scale of exploration noise"""
+    learning_starts: int = 25e3
+    """timestep to start learning"""
+    policy_frequency: int = 2
+    """the frequency of training policy (delayed)"""
+    noise_clip: float = 0.5
+    """noise clip parameter of the Target Policy Smoothing Regularization"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -129,8 +125,7 @@ def forward(self, x):
 poetry run pip install "stable_baselines3==2.0.0a1"
 """
         )
-
-    args = parse_args()
+    args = tyro.cli(Args)
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
diff --git a/cleanrl/ddpg_continuous_action_jax.py b/cleanrl/ddpg_continuous_action_jax.py
index e074acd60..b12a14a28 100644
--- a/cleanrl/ddpg_continuous_action_jax.py
+++ b/cleanrl/ddpg_continuous_action_jax.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ddpg/#ddpg_continuous_action_jaxpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import flax
 import flax.linen as nn
@@ -12,59 +11,56 @@
 import jax.numpy as jnp
 import numpy as np
 import optax
+import tyro
 from flax.training.train_state import TrainState
 from stable_baselines3.common.buffers import ReplayBuffer
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="HalfCheetah-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=1000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=3e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--buffer-size", type=int, default=int(1e6),
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=0.005,
-        help="target smoothing coefficient (default: 0.005)")
-    parser.add_argument("--batch-size", type=int, default=256,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--exploration-noise", type=float, default=0.1,
-        help="the scale of exploration noise")
-    parser.add_argument("--learning-starts", type=int, default=25e3,
-        help="timestep to start learning")
-    parser.add_argument("--policy-frequency", type=int, default=2,
-        help="the frequency of training policy (delayed)")
-    parser.add_argument("--noise-clip", type=float, default=0.5,
-        help="noise clip parameter of the Target Policy Smoothing Regularization")
-    args = parser.parse_args()
-    # fmt: on
-    return args
+    env_id: str = "Hopper-v4"
+    """the environment id of the Atari game"""
+    total_timesteps: int = 1000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 3e-4
+    """the learning rate of the optimizer"""
+    buffer_size: int = int(1e6)
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 0.005
+    """target smoothing coefficient (default: 0.005)"""
+    batch_size: int = 256
+    """the batch size of sample from the reply memory"""
+    exploration_noise: float = 0.1
+    """the scale of exploration noise"""
+    learning_starts: int = 25e3
+    """timestep to start learning"""
+    policy_frequency: int = 2
+    """the frequency of training policy (delayed)"""
+    noise_clip: float = 0.5
+    """noise clip parameter of the Target Policy Smoothing Regularization"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -124,7 +120,7 @@ class TrainState(TrainState):
 poetry run pip install "stable_baselines3==2.0.0a1"
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
diff --git a/cleanrl/dqn.py b/cleanrl/dqn.py
index 2aa8f9bc6..e74e289c3 100644
--- a/cleanrl/dqn.py
+++ b/cleanrl/dqn.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/dqn/#dqnpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gymnasium as gym
 import numpy as np
@@ -11,70 +10,65 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+import tyro
 from stable_baselines3.common.buffers import ReplayBuffer
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="CartPole-v1",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=500000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--buffer-size", type=int, default=10000,
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=1.,
-        help="the target network update rate")
-    parser.add_argument("--target-network-frequency", type=int, default=500,
-        help="the timesteps it takes to update the target network")
-    parser.add_argument("--batch-size", type=int, default=128,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--start-e", type=float, default=1,
-        help="the starting epsilon for exploration")
-    parser.add_argument("--end-e", type=float, default=0.05,
-        help="the ending epsilon for exploration")
-    parser.add_argument("--exploration-fraction", type=float, default=0.5,
-        help="the fraction of `total-timesteps` it takes from start-e to go end-e")
-    parser.add_argument("--learning-starts", type=int, default=10000,
-        help="timestep to start learning")
-    parser.add_argument("--train-frequency", type=int, default=10,
-        help="the frequency of training")
-    args = parser.parse_args()
-    # fmt: on
-    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
-
-    return args
+    env_id: str = "CartPole-v1"
+    """the id of the environment"""
+    total_timesteps: int = 500000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    buffer_size: int = 10000
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 1.0
+    """the target network update rate"""
+    target_network_frequency: int = 500
+    """the timesteps it takes to update the target network"""
+    batch_size: int = 128
+    """the batch size of sample from the reply memory"""
+    start_e: float = 1
+    """the starting epsilon for exploration"""
+    end_e: float = 0.05
+    """the ending epsilon for exploration"""
+    exploration_fraction: float = 0.5
+    """the fraction of `total-timesteps` it takes from start-e to go end-e"""
+    learning_starts: int = 10000
+    """timestep to start learning"""
+    train_frequency: int = 10
+    """the frequency of training"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -123,7 +117,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
 poetry run pip install "stable_baselines3==2.0.0a1"
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
+    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -188,14 +183,10 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         if "final_info" in infos:
             for info in infos["final_info"]:
-                # Skip the envs that are not done
-                if "episode" not in info:
-                    continue
-                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
-                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
-                writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
-                writer.add_scalar("charts/epsilon", epsilon, global_step)
-                break
+                if info and "episode" in info:
+                    print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                    writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                    writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation`
         real_next_obs = next_obs.copy()
diff --git a/cleanrl/dqn_atari.py b/cleanrl/dqn_atari.py
index a4c3df339..a23b84391 100644
--- a/cleanrl/dqn_atari.py
+++ b/cleanrl/dqn_atari.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/dqn/#dqn_ataripy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gymnasium as gym
 import numpy as np
@@ -11,6 +10,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+import tyro
 from stable_baselines3.common.atari_wrappers import (
     ClipRewardEnv,
     EpisodicLifeEnv,
@@ -22,66 +22,60 @@
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=1e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--buffer-size", type=int, default=1000000,
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=1.,
-        help="the target network update rate")
-    parser.add_argument("--target-network-frequency", type=int, default=1000,
-        help="the timesteps it takes to update the target network")
-    parser.add_argument("--batch-size", type=int, default=32,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--start-e", type=float, default=1,
-        help="the starting epsilon for exploration")
-    parser.add_argument("--end-e", type=float, default=0.01,
-        help="the ending epsilon for exploration")
-    parser.add_argument("--exploration-fraction", type=float, default=0.10,
-        help="the fraction of `total-timesteps` it takes from start-e to go end-e")
-    parser.add_argument("--learning-starts", type=int, default=80000,
-        help="timestep to start learning")
-    parser.add_argument("--train-frequency", type=int, default=4,
-        help="the frequency of training")
-    args = parser.parse_args()
-    # fmt: on
-    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
-
-    return args
+    env_id: str = "BreakoutNoFrameskip-v4"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 1e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    buffer_size: int = 1000000
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 1.0
+    """the target network update rate"""
+    target_network_frequency: int = 1000
+    """the timesteps it takes to update the target network"""
+    batch_size: int = 32
+    """the batch size of sample from the reply memory"""
+    start_e: float = 1
+    """the starting epsilon for exploration"""
+    end_e: float = 0.01
+    """the ending epsilon for exploration"""
+    exploration_fraction: float = 0.10
+    """the fraction of `total-timesteps` it takes from start-e to go end-e"""
+    learning_starts: int = 80000
+    """timestep to start learning"""
+    train_frequency: int = 4
+    """the frequency of training"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -145,7 +139,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
 poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1"  "ale-py==0.8.1" 
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
+    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -211,14 +206,10 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         if "final_info" in infos:
             for info in infos["final_info"]:
-                # Skip the envs that are not done
-                if "episode" not in info:
-                    continue
-                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
-                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
-                writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
-                writer.add_scalar("charts/epsilon", epsilon, global_step)
-                break
+                if info and "episode" in info:
+                    print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                    writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                    writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation`
         real_next_obs = next_obs.copy()
diff --git a/cleanrl/dqn_atari_jax.py b/cleanrl/dqn_atari_jax.py
index 5f74d57a9..383ceeef8 100644
--- a/cleanrl/dqn_atari_jax.py
+++ b/cleanrl/dqn_atari_jax.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/dqn/#dqn_atari_jaxpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 os.environ[
     "XLA_PYTHON_CLIENT_MEM_FRACTION"
@@ -16,6 +15,7 @@
 import jax.numpy as jnp
 import numpy as np
 import optax
+import tyro
 from flax.training.train_state import TrainState
 from stable_baselines3.common.atari_wrappers import (
     ClipRewardEnv,
@@ -28,62 +28,56 @@
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=1e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--buffer-size", type=int, default=1000000,
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=1.,
-        help="the target network update rate")
-    parser.add_argument("--target-network-frequency", type=int, default=1000,
-        help="the timesteps it takes to update the target network")
-    parser.add_argument("--batch-size", type=int, default=32,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--start-e", type=float, default=1,
-        help="the starting epsilon for exploration")
-    parser.add_argument("--end-e", type=float, default=0.01,
-        help="the ending epsilon for exploration")
-    parser.add_argument("--exploration-fraction", type=float, default=0.10,
-        help="the fraction of `total-timesteps` it takes from start-e to go end-e")
-    parser.add_argument("--learning-starts", type=int, default=80000,
-        help="timestep to start learning")
-    parser.add_argument("--train-frequency", type=int, default=4,
-        help="the frequency of training")
-    args = parser.parse_args()
-    # fmt: on
-    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
-
-    return args
+    env_id: str = "BreakoutNoFrameskip-v4"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 1e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    buffer_size: int = 1000000
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 1.0
+    """the target network update rate"""
+    target_network_frequency: int = 1000
+    """the timesteps it takes to update the target network"""
+    batch_size: int = 32
+    """the batch size of sample from the reply memory"""
+    start_e: float = 1
+    """the starting epsilon for exploration"""
+    end_e: float = 0.01
+    """the ending epsilon for exploration"""
+    exploration_fraction: float = 0.10
+    """the fraction of `total-timesteps` it takes from start-e to go end-e"""
+    learning_starts: int = 80000
+    """timestep to start learning"""
+    train_frequency: int = 4
+    """the frequency of training"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -151,7 +145,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
 poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1"  "ale-py==0.8.1" 
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
+    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -242,14 +237,10 @@ def mse_loss(params):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         if "final_info" in infos:
             for info in infos["final_info"]:
-                # Skip the envs that are not done
-                if "episode" not in info:
-                    continue
-                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
-                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
-                writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
-                writer.add_scalar("charts/epsilon", epsilon, global_step)
-                break
+                if info and "episode" in info:
+                    print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                    writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                    writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation`
         real_next_obs = next_obs.copy()
diff --git a/cleanrl/dqn_jax.py b/cleanrl/dqn_jax.py
index 1f0eaf623..917282578 100644
--- a/cleanrl/dqn_jax.py
+++ b/cleanrl/dqn_jax.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/dqn/#dqn_jaxpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import flax
 import flax.linen as nn
@@ -12,67 +11,62 @@
 import jax.numpy as jnp
 import numpy as np
 import optax
+import tyro
 from flax.training.train_state import TrainState
 from stable_baselines3.common.buffers import ReplayBuffer
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="CartPole-v1",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=500000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--buffer-size", type=int, default=10000,
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=1.,
-        help="the target network update rate")
-    parser.add_argument("--target-network-frequency", type=int, default=500,
-        help="the timesteps it takes to update the target network")
-    parser.add_argument("--batch-size", type=int, default=128,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--start-e", type=float, default=1,
-        help="the starting epsilon for exploration")
-    parser.add_argument("--end-e", type=float, default=0.05,
-        help="the ending epsilon for exploration")
-    parser.add_argument("--exploration-fraction", type=float, default=0.5,
-        help="the fraction of `total-timesteps` it takes from start-e to go end-e")
-    parser.add_argument("--learning-starts", type=int, default=10000,
-        help="timestep to start learning")
-    parser.add_argument("--train-frequency", type=int, default=10,
-        help="the frequency of training")
-    args = parser.parse_args()
-    # fmt: on
-    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
-
-    return args
+    env_id: str = "CartPole-v1"
+    """the id of the environment"""
+    total_timesteps: int = 500000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    buffer_size: int = 10000
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 1.0
+    """the target network update rate"""
+    target_network_frequency: int = 500
+    """the timesteps it takes to update the target network"""
+    batch_size: int = 128
+    """the batch size of sample from the reply memory"""
+    start_e: float = 1
+    """the starting epsilon for exploration"""
+    end_e: float = 0.05
+    """the ending epsilon for exploration"""
+    exploration_fraction: float = 0.5
+    """the fraction of `total-timesteps` it takes from start-e to go end-e"""
+    learning_starts: int = 10000
+    """timestep to start learning"""
+    train_frequency: int = 10
+    """the frequency of training"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -123,7 +117,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
 poetry run pip install "stable_baselines3==2.0.0a1"
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
+    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -211,14 +206,10 @@ def mse_loss(params):
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         if "final_info" in infos:
             for info in infos["final_info"]:
-                # Skip the envs that are not done
-                if "episode" not in info:
-                    continue
-                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
-                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
-                writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
-                writer.add_scalar("charts/epsilon", epsilon, global_step)
-                break
+                if info and "episode" in info:
+                    print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                    writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                    writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation`
         real_next_obs = next_obs.copy()
diff --git a/cleanrl/ppg_procgen.py b/cleanrl/ppg_procgen.py
index ea574a814..845fe1726 100644
--- a/cleanrl/ppg_procgen.py
+++ b/cleanrl/ppg_procgen.py
@@ -1,99 +1,101 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppg/#ppg_procgenpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gym
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from procgen import ProcgenEnv
 from torch import distributions as td
 from torch.distributions.categorical import Categorical
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="starpilot",
-        help="the id of the environment")
-    parser.add_argument("--learning-rate", type=float, default=5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--total-timesteps", type=int, default=25e6,
-        help="total timesteps of the experiments")
-    parser.add_argument("--num-envs", type=int, default=64,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=256,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Use GAE for advantage computation")
-    parser.add_argument("--gamma", type=float, default=0.999,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=8,
-        help="the number of mini-batches")
-    parser.add_argument("--adv-norm-fullbatch", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Full batch advantage normalization as used in PPG code")
-    parser.add_argument("--clip-coef", type=float, default=0.2,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.01,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
+    env_id: str = "starpilot"
+    """the id of the environment"""
+    total_timesteps: int = int(25e6)
+    """total timesteps of the experiments"""
+    learning_rate: float = 5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 64
+    """the number of parallel game environments"""
+    num_steps: int = 256
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = False
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.999
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 8
+    """the number of mini-batches"""
+    adv_norm_fullbatch: bool = True
+    """Toggle full batch advantage normalization as used in PPG code"""
+    clip_coef: float = 0.2
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.01
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
 
     # PPG specific arguments
-    parser.add_argument("--n-iteration", type=int, default=32,
-        help="N_pi: the number of policy update in the policy phase ")
-    parser.add_argument("--e-policy", type=int, default=1,
-        help="E_pi: the number of policy update in the policy phase ")
-    parser.add_argument("--v-value", type=int, default=1,
-        help="E_V: the number of policy update in the policy phase ")
-    parser.add_argument("--e-auxiliary", type=int, default=6,
-        help="E_aux:the K epochs to update the policy")
-    parser.add_argument("--beta-clone", type=float, default=1.0,
-        help="the behavior cloning coefficient")
-    parser.add_argument("--num-aux-rollouts", type=int, default=4,
-        help="the number of mini batch in the auxiliary phase")
-    parser.add_argument("--n-aux-grad-accum", type=int, default=1,
-        help="the number of gradient accumulation in mini batch")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    args.aux_batch_rollouts = int(args.num_envs * args.n_iteration)
-    assert args.v_value == 1, "Multiple value epoch (v_value != 1) is not supported yet"
-    # fmt: on
-    return args
+    n_iteration: int = 32
+    """N_pi: the number of policy update in the policy phase """
+    e_policy: int = 1
+    """E_pi: the number of policy update in the policy phase """
+    v_value: int = 1
+    """E_V: the number of policy update in the policy phase """
+    e_auxiliary: int = 6
+    """E_aux:the K epochs to update the policy"""
+    beta_clone: float = 1.0
+    """the behavior cloning coefficient"""
+    num_aux_rollouts: int = 4
+    """the number of mini batch in the auxiliary phase"""
+    n_aux_grad_accum: int = 1
+    """the number of gradient accumulation in mini batch"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
+    num_phases: int = 0
+    """the number of phases (computed in runtime)"""
+    aux_batch_rollouts: int = 0
+    """the number of rollouts in the auxiliary phase (computed in runtime)"""
 
 
 def layer_init_normed(layer, norm_dim, scale=1.0):
@@ -210,7 +212,13 @@ def get_pi(self, x):
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
+    args.num_phases = int(args.num_iterations // args.n_iteration)
+    args.aux_batch_rollouts = int(args.num_envs * args.n_iteration)
+    assert args.v_value == 1, "Multiple value epoch (v_value != 1) is not supported yet"
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -273,16 +281,14 @@ def get_pi(self, x):
     start_time = time.time()
     next_obs = torch.Tensor(envs.reset()).to(device)
     next_done = torch.zeros(args.num_envs).to(device)
-    num_updates = args.total_timesteps // args.batch_size
-    num_phases = int(num_updates // args.n_iteration)
 
-    for phase in range(1, num_phases + 1):
+    for phase in range(1, args.num_phases + 1):
 
         # POLICY PHASE
         for update in range(1, args.n_iteration + 1):
             # Annealing the rate if instructed to do so.
             if args.anneal_lr:
-                frac = 1.0 - (update - 1.0) / num_updates
+                frac = 1.0 - (update - 1.0) / args.num_iterations
                 lrnow = frac * args.learning_rate
                 optimizer.param_groups[0]["lr"] = lrnow
 
@@ -313,30 +319,18 @@ def get_pi(self, x):
             # bootstrap value if not done
             with torch.no_grad():
                 next_value = agent.get_value(next_obs).reshape(1, -1)
-                if args.gae:
-                    advantages = torch.zeros_like(rewards).to(device)
-                    lastgaelam = 0
-                    for t in reversed(range(args.num_steps)):
-                        if t == args.num_steps - 1:
-                            nextnonterminal = 1.0 - next_done
-                            nextvalues = next_value
-                        else:
-                            nextnonterminal = 1.0 - dones[t + 1]
-                            nextvalues = values[t + 1]
-                        delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
-                        advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam
-                    returns = advantages + values
-                else:
-                    returns = torch.zeros_like(rewards).to(device)
-                    for t in reversed(range(args.num_steps)):
-                        if t == args.num_steps - 1:
-                            nextnonterminal = 1.0 - next_done
-                            next_return = next_value
-                        else:
-                            nextnonterminal = 1.0 - dones[t + 1]
-                            next_return = returns[t + 1]
-                        returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return
-                    advantages = returns - values
+                advantages = torch.zeros_like(rewards).to(device)
+                lastgaelam = 0
+                for t in reversed(range(args.num_steps)):
+                    if t == args.num_steps - 1:
+                        nextnonterminal = 1.0 - next_done
+                        nextvalues = next_value
+                    else:
+                        nextnonterminal = 1.0 - dones[t + 1]
+                        nextvalues = values[t + 1]
+                    delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
+                    advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam
+                returns = advantages + values
 
             # flatten the batch
             b_obs = obs.reshape((-1,) + envs.single_observation_space.shape)
@@ -399,9 +393,8 @@ def get_pi(self, x):
                     nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
                     optimizer.step()
 
-                if args.target_kl is not None:
-                    if approx_kl > args.target_kl:
-                        break
+                if args.target_kl is not None and approx_kl > args.target_kl:
+                    break
 
             y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
             var_y = np.var(y_true)
diff --git a/cleanrl/ppo.py b/cleanrl/ppo.py
index 091378209..bfec99563 100644
--- a/cleanrl/ppo.py
+++ b/cleanrl/ppo.py
@@ -1,82 +1,84 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppopy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
-import gym
+import gymnasium as gym
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from torch.distributions.categorical import Categorical
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="CartPole-v1",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=500000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=4,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=128,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=4,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=4,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.2,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.01,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    # fmt: on
-    return args
-
-
-def make_env(env_id, seed, idx, capture_video, run_name):
+    env_id: str = "CartPole-v1"
+    """the id of the environment"""
+    total_timesteps: int = 500000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 4
+    """the number of parallel game environments"""
+    num_steps: int = 128
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = True
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 4
+    """the number of mini-batches"""
+    update_epochs: int = 4
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.2
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.01
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
+
+
+def make_env(env_id, idx, capture_video, run_name):
     def thunk():
         if capture_video and idx == 0:
             env = gym.make(env_id)
@@ -84,9 +86,6 @@ def thunk():
         else:
             env = gym.make(env_id)
         env = gym.wrappers.RecordEpisodeStatistics(env)
-        env.seed(seed)
-        env.action_space.seed(seed)
-        env.observation_space.seed(seed)
         return env
 
     return thunk
@@ -128,7 +127,10 @@ def get_action_and_value(self, x, action=None):
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -158,7 +160,7 @@ def get_action_and_value(self, x, action=None):
 
     # env setup
     envs = gym.vector.SyncVectorEnv(
-        [make_env(args.env_id, args.seed + i, i, args.capture_video, run_name) for i in range(args.num_envs)]
+        [make_env(args.env_id, i, args.capture_video, run_name) for i in range(args.num_envs)],
     )
     assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported"
 
@@ -176,19 +178,19 @@ def get_action_and_value(self, x, action=None):
     # TRY NOT TO MODIFY: start the game
     global_step = 0
     start_time = time.time()
-    next_obs = torch.Tensor(envs.reset()).to(device)
+    next_obs, _ = envs.reset(seed=args.seed)
+    next_obs = torch.Tensor(next_obs).to(device)
     next_done = torch.zeros(args.num_envs).to(device)
-    num_updates = args.total_timesteps // args.batch_size
 
-    for update in range(1, num_updates + 1):
+    for iteration in range(1, args.num_iterations + 1):
         # Annealing the rate if instructed to do so.
         if args.anneal_lr:
-            frac = 1.0 - (update - 1.0) / num_updates
+            frac = 1.0 - (iteration - 1.0) / args.num_iterations
             lrnow = frac * args.learning_rate
             optimizer.param_groups[0]["lr"] = lrnow
 
         for step in range(0, args.num_steps):
-            global_step += 1 * args.num_envs
+            global_step += args.num_envs
             obs[step] = next_obs
             dones[step] = next_done
 
@@ -200,16 +202,17 @@ def get_action_and_value(self, x, action=None):
             logprobs[step] = logprob
 
             # TRY NOT TO MODIFY: execute the game and log data.
-            next_obs, reward, done, info = envs.step(action.cpu().numpy())
+            next_obs, reward, terminations, truncations, infos = envs.step(action.cpu().numpy())
+            next_done = np.logical_or(terminations, truncations)
             rewards[step] = torch.tensor(reward).to(device).view(-1)
-            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
+            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device)
 
-            for item in info:
-                if "episode" in item.keys():
-                    print(f"global_step={global_step}, episodic_return={item['episode']['r']}")
-                    writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step)
-                    writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step)
-                    break
+            if "final_info" in infos:
+                for info in infos["final_info"]:
+                    if info and "episode" in info:
+                        print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                        writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                        writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # bootstrap value if not done
         with torch.no_grad():
@@ -286,9 +289,8 @@ def get_action_and_value(self, x, action=None):
                 nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
                 optimizer.step()
 
-            if args.target_kl is not None:
-                if approx_kl > args.target_kl:
-                    break
+            if args.target_kl is not None and approx_kl > args.target_kl:
+                break
 
         y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
         var_y = np.var(y_true)
diff --git a/cleanrl/ppo_atari.py b/cleanrl/ppo_atari.py
index 14be7a470..5bc9859cc 100644
--- a/cleanrl/ppo_atari.py
+++ b/cleanrl/ppo_atari.py
@@ -1,15 +1,15 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_ataripy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
-import gym
+import gymnasium as gym
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from torch.distributions.categorical import Categorical
 from torch.utils.tensorboard import SummaryWriter
 
@@ -22,71 +22,77 @@
 )
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=8,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=128,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=4,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=4,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.1,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.01,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    # fmt: on
-    return args
-
-
-def make_env(env_id, seed, idx, capture_video, run_name):
+    env_id: str = "BreakoutNoFrameskip-v4"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 8
+    """the number of parallel game environments"""
+    num_steps: int = 128
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = True
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 4
+    """the number of mini-batches"""
+    update_epochs: int = 4
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.1
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.01
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
+
+
+def make_env(env_id, idx, capture_video, run_name):
     def thunk():
-        env = gym.make(env_id)
+        if capture_video and idx == 0:
+            env = gym.make(env_id)
+            env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
+        else:
+            env = gym.make(env_id)
         env = gym.wrappers.RecordEpisodeStatistics(env)
         if capture_video:
             if idx == 0:
@@ -100,9 +106,6 @@ def thunk():
         env = gym.wrappers.ResizeObservation(env, (84, 84))
         env = gym.wrappers.GrayScaleObservation(env)
         env = gym.wrappers.FrameStack(env, 4)
-        env.seed(seed)
-        env.action_space.seed(seed)
-        env.observation_space.seed(seed)
         return env
 
     return thunk
@@ -144,7 +147,10 @@ def get_action_and_value(self, x, action=None):
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -174,7 +180,7 @@ def get_action_and_value(self, x, action=None):
 
     # env setup
     envs = gym.vector.SyncVectorEnv(
-        [make_env(args.env_id, args.seed + i, i, args.capture_video, run_name) for i in range(args.num_envs)]
+        [make_env(args.env_id, i, args.capture_video, run_name) for i in range(args.num_envs)],
     )
     assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported"
 
@@ -192,19 +198,19 @@ def get_action_and_value(self, x, action=None):
     # TRY NOT TO MODIFY: start the game
     global_step = 0
     start_time = time.time()
-    next_obs = torch.Tensor(envs.reset()).to(device)
+    next_obs, _ = envs.reset(seed=args.seed)
+    next_obs = torch.Tensor(next_obs).to(device)
     next_done = torch.zeros(args.num_envs).to(device)
-    num_updates = args.total_timesteps // args.batch_size
 
-    for update in range(1, num_updates + 1):
+    for iteration in range(1, args.num_iterations + 1):
         # Annealing the rate if instructed to do so.
         if args.anneal_lr:
-            frac = 1.0 - (update - 1.0) / num_updates
+            frac = 1.0 - (iteration - 1.0) / args.num_iterations
             lrnow = frac * args.learning_rate
             optimizer.param_groups[0]["lr"] = lrnow
 
         for step in range(0, args.num_steps):
-            global_step += 1 * args.num_envs
+            global_step += args.num_envs
             obs[step] = next_obs
             dones[step] = next_done
 
@@ -216,16 +222,17 @@ def get_action_and_value(self, x, action=None):
             logprobs[step] = logprob
 
             # TRY NOT TO MODIFY: execute the game and log data.
-            next_obs, reward, done, info = envs.step(action.cpu().numpy())
+            next_obs, reward, terminations, truncations, infos = envs.step(action.cpu().numpy())
+            next_done = np.logical_or(terminations, truncations)
             rewards[step] = torch.tensor(reward).to(device).view(-1)
-            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
+            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device)
 
-            for item in info:
-                if "episode" in item.keys():
-                    print(f"global_step={global_step}, episodic_return={item['episode']['r']}")
-                    writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step)
-                    writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step)
-                    break
+            if "final_info" in infos:
+                for info in infos["final_info"]:
+                    if info and "episode" in info:
+                        print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                        writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                        writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # bootstrap value if not done
         with torch.no_grad():
@@ -302,9 +309,8 @@ def get_action_and_value(self, x, action=None):
                 nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
                 optimizer.step()
 
-            if args.target_kl is not None:
-                if approx_kl > args.target_kl:
-                    break
+            if args.target_kl is not None and approx_kl > args.target_kl:
+                break
 
         y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
         var_y = np.var(y_true)
diff --git a/cleanrl/ppo_atari_envpool.py b/cleanrl/ppo_atari_envpool.py
index f72f223ed..1be166fba 100644
--- a/cleanrl/ppo_atari_envpool.py
+++ b/cleanrl/ppo_atari_envpool.py
@@ -1,10 +1,9 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_atari_envpoolpy
-import argparse
 import os
 import random
 import time
 from collections import deque
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import envpool
 import gym
@@ -12,70 +11,73 @@
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from torch.distributions.categorical import Categorical
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="Pong-v5",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=8,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=128,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=4,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=4,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.1,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.01,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    # fmt: on
-    return args
+    env_id: str = "Breakout-v5"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 8
+    """the number of parallel game environments"""
+    num_steps: int = 128
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = True
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 4
+    """the number of mini-batches"""
+    update_epochs: int = 4
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.1
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.01
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
 
 
 class RecordEpisodeStatistics(gym.Wrapper):
@@ -148,7 +150,10 @@ def get_action_and_value(self, x, action=None):
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -208,17 +213,16 @@ def get_action_and_value(self, x, action=None):
     start_time = time.time()
     next_obs = torch.Tensor(envs.reset()).to(device)
     next_done = torch.zeros(args.num_envs).to(device)
-    num_updates = args.total_timesteps // args.batch_size
 
-    for update in range(1, num_updates + 1):
+    for iteration in range(1, args.num_iterations + 1):
         # Annealing the rate if instructed to do so.
         if args.anneal_lr:
-            frac = 1.0 - (update - 1.0) / num_updates
+            frac = 1.0 - (iteration - 1.0) / args.num_iterations
             lrnow = frac * args.learning_rate
             optimizer.param_groups[0]["lr"] = lrnow
 
         for step in range(0, args.num_steps):
-            global_step += 1 * args.num_envs
+            global_step += args.num_envs
             obs[step] = next_obs
             dones[step] = next_done
 
@@ -230,11 +234,11 @@ def get_action_and_value(self, x, action=None):
             logprobs[step] = logprob
 
             # TRY NOT TO MODIFY: execute the game and log data.
-            next_obs, reward, done, info = envs.step(action.cpu().numpy())
+            next_obs, reward, next_done, info = envs.step(action.cpu().numpy())
             rewards[step] = torch.tensor(reward).to(device).view(-1)
-            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
+            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device)
 
-            for idx, d in enumerate(done):
+            for idx, d in enumerate(next_done):
                 if d and info["lives"][idx] == 0:
                     print(f"global_step={global_step}, episodic_return={info['r'][idx]}")
                     avg_returns.append(info["r"][idx])
@@ -317,9 +321,8 @@ def get_action_and_value(self, x, action=None):
                 nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
                 optimizer.step()
 
-            if args.target_kl is not None:
-                if approx_kl > args.target_kl:
-                    break
+            if args.target_kl is not None and approx_kl > args.target_kl:
+                break
 
         y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
         var_y = np.var(y_true)
diff --git a/cleanrl/ppo_atari_envpool_xla_jax.py b/cleanrl/ppo_atari_envpool_xla_jax.py
index f47d28513..8e7da71f5 100644
--- a/cleanrl/ppo_atari_envpool_xla_jax.py
+++ b/cleanrl/ppo_atari_envpool_xla_jax.py
@@ -1,15 +1,10 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_atari_envpool_xla_jaxpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 from typing import Sequence
 
-os.environ[
-    "XLA_PYTHON_CLIENT_MEM_FRACTION"
-] = "0.7"  # see https://github.com/google/jax/discussions/6332#discussioncomment-1279991
-
 import envpool
 import flax
 import flax.linen as nn
@@ -18,70 +13,80 @@
 import jax.numpy as jnp
 import numpy as np
 import optax
+import tyro
 from flax.linen.initializers import constant, orthogonal
 from flax.training.train_state import TrainState
 from torch.utils.tensorboard import SummaryWriter
 
-
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+# Fix weird OOM https://github.com/google/jax/discussions/6332#discussioncomment-1279991
+os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = "0.6"
+# Fix CUDNN non-determinisim; https://github.com/google/jax/issues/4823#issuecomment-952835771
+os.environ["TF_XLA_FLAGS"] = "--xla_gpu_autotune_level=2 --xla_gpu_deterministic_reductions"
+os.environ["TF_CUDNN DETERMINISTIC"] = "1"
+
+
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="Pong-v5",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=8,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=128,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=4,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=4,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.1,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--ent-coef", type=float, default=0.01,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    args.num_updates = args.total_timesteps // args.batch_size
-    # fmt: on
-    return args
+    env_id: str = "Breakout-v5"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 8
+    """the number of parallel game environments"""
+    num_steps: int = 128
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = True
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 4
+    """the number of mini-batches"""
+    update_epochs: int = 4
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.1
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.01
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
 
 
 class Network(nn.Module):
@@ -164,7 +169,10 @@ class EpisodeStatistics:
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -233,7 +241,7 @@ def step_env_wrappeed(episode_stats, handle, action):
     def linear_schedule(count):
         # anneal learning rate linearly after one training iteration which contains
         # (args.num_minibatches * args.update_epochs) gradient updates
-        frac = 1.0 - (count // (args.num_minibatches * args.update_epochs)) / args.num_updates
+        frac = 1.0 - (count // (args.num_minibatches * args.update_epochs)) / args.num_iterations
         return args.learning_rate * frac
 
     network = Network()
@@ -401,7 +409,7 @@ def ppo_loss(params, x, a, logp, mb_advantages, mb_returns):
     @jax.jit
     def rollout(agent_state, episode_stats, next_obs, next_done, storage, key, handle, global_step):
         for step in range(0, args.num_steps):
-            global_step += 1 * args.num_envs
+            global_step += args.num_envs
             storage, action, key = get_action_and_value(agent_state, next_obs, next_done, storage, step, key)
 
             # TRY NOT TO MODIFY: execute the game and log data.
@@ -409,8 +417,8 @@ def rollout(agent_state, episode_stats, next_obs, next_done, storage, key, handl
             storage = storage.replace(rewards=storage.rewards.at[step].set(reward))
         return agent_state, episode_stats, next_obs, next_done, storage, key, handle, global_step
 
-    for update in range(1, args.num_updates + 1):
-        update_time_start = time.time()
+    for iteration in range(1, args.num_iterations + 1):
+        iteration_time_start = time.time()
         agent_state, episode_stats, next_obs, next_done, storage, key, handle, global_step = rollout(
             agent_state, episode_stats, next_obs, next_done, storage, key, handle, global_step
         )
@@ -437,7 +445,7 @@ def rollout(agent_state, episode_stats, next_obs, next_done, storage, key, handl
         print("SPS:", int(global_step / (time.time() - start_time)))
         writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)
         writer.add_scalar(
-            "charts/SPS_update", int(args.num_envs * args.num_steps / (time.time() - update_time_start)), global_step
+            "charts/SPS_update", int(args.num_envs * args.num_steps / (time.time() - iteration_time_start)), global_step
         )
 
     envs.close()
diff --git a/cleanrl/ppo_atari_envpool_xla_jax_scan.py b/cleanrl/ppo_atari_envpool_xla_jax_scan.py
index 19f97b1d1..cf8d8f88d 100644
--- a/cleanrl/ppo_atari_envpool_xla_jax_scan.py
+++ b/cleanrl/ppo_atari_envpool_xla_jax_scan.py
@@ -1,16 +1,11 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_atari_envpool_xla_jaxpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 from functools import partial
 from typing import Sequence
 
-os.environ[
-    "XLA_PYTHON_CLIENT_MEM_FRACTION"
-] = "0.7"  # see https://github.com/google/jax/discussions/6332#discussioncomment-1279991
-
 import envpool
 import flax
 import flax.linen as nn
@@ -19,76 +14,86 @@
 import jax.numpy as jnp
 import numpy as np
 import optax
+import tyro
 from flax.linen.initializers import constant, orthogonal
 from flax.training.train_state import TrainState
 from torch.utils.tensorboard import SummaryWriter
 
-
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+# Fix weird OOM https://github.com/google/jax/discussions/6332#discussioncomment-1279991
+os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = "0.6"
+# Fix CUDNN non-determinisim; https://github.com/google/jax/issues/4823#issuecomment-952835771
+os.environ["TF_XLA_FLAGS"] = "--xla_gpu_autotune_level=2 --xla_gpu_deterministic_reductions"
+os.environ["TF_CUDNN DETERMINISTIC"] = "1"
+
+
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="Pong-v5",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=8,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=128,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=4,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=4,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.1,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--ent-coef", type=float, default=0.01,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    args.num_updates = args.total_timesteps // args.batch_size
-    # fmt: on
-    return args
+    env_id: str = "Breakout-v5"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 8
+    """the number of parallel game environments"""
+    num_steps: int = 128
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = True
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 4
+    """the number of mini-batches"""
+    update_epochs: int = 4
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.1
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.01
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
 
 
 def make_env(env_id, seed, num_envs):
@@ -190,7 +195,10 @@ class EpisodeStatistics:
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -248,7 +256,7 @@ def step_env_wrappeed(episode_stats, handle, action):
     def linear_schedule(count):
         # anneal learning rate linearly after one training iteration which contains
         # (args.num_minibatches * args.update_epochs) gradient updates
-        frac = 1.0 - (count // (args.num_minibatches * args.update_epochs)) / args.num_updates
+        frac = 1.0 - (count // (args.num_minibatches * args.update_epochs)) / args.num_iterations
         return args.learning_rate * frac
 
     network = Network()
@@ -442,8 +450,8 @@ def rollout(agent_state, episode_stats, next_obs, next_done, key, handle, step_o
 
     rollout = partial(rollout, step_once_fn=partial(step_once, env_step_fn=step_env_wrappeed), max_steps=args.num_steps)
 
-    for update in range(1, args.num_updates + 1):
-        update_time_start = time.time()
+    for iteration in range(1, args.num_iterations + 1):
+        iteration_time_start = time.time()
         agent_state, episode_stats, next_obs, next_done, storage, key, handle = rollout(
             agent_state, episode_stats, next_obs, next_done, key, handle
         )
@@ -471,7 +479,7 @@ def rollout(agent_state, episode_stats, next_obs, next_done, key, handle, step_o
         print("SPS:", int(global_step / (time.time() - start_time)))
         writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)
         writer.add_scalar(
-            "charts/SPS_update", int(args.num_envs * args.num_steps / (time.time() - update_time_start)), global_step
+            "charts/SPS_update", int(args.num_envs * args.num_steps / (time.time() - iteration_time_start)), global_step
         )
 
     if args.save_model:
diff --git a/cleanrl/ppo_atari_lstm.py b/cleanrl/ppo_atari_lstm.py
index a90aa4ce8..630414dfb 100644
--- a/cleanrl/ppo_atari_lstm.py
+++ b/cleanrl/ppo_atari_lstm.py
@@ -1,15 +1,15 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_atari_lstmpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
-import gym
+import gymnasium as gym
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from torch.distributions.categorical import Categorical
 from torch.utils.tensorboard import SummaryWriter
 
@@ -22,71 +22,77 @@
 )
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=8,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=128,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=4,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=4,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.1,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.01,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    # fmt: on
-    return args
-
-
-def make_env(env_id, seed, idx, capture_video, run_name):
+    env_id: str = "BreakoutNoFrameskip-v4"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 8
+    """the number of parallel game environments"""
+    num_steps: int = 128
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = True
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 4
+    """the number of mini-batches"""
+    update_epochs: int = 4
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.1
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.01
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
+
+
+def make_env(env_id, idx, capture_video, run_name):
     def thunk():
-        env = gym.make(env_id)
+        if capture_video and idx == 0:
+            env = gym.make(env_id)
+            env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
+        else:
+            env = gym.make(env_id)
         env = gym.wrappers.RecordEpisodeStatistics(env)
         if capture_video:
             if idx == 0:
@@ -100,9 +106,6 @@ def thunk():
         env = gym.wrappers.ResizeObservation(env, (84, 84))
         env = gym.wrappers.GrayScaleObservation(env)
         env = gym.wrappers.FrameStack(env, 1)
-        env.seed(seed)
-        env.action_space.seed(seed)
-        env.observation_space.seed(seed)
         return env
 
     return thunk
@@ -171,7 +174,10 @@ def get_action_and_value(self, x, lstm_state, done, action=None):
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -201,7 +207,7 @@ def get_action_and_value(self, x, lstm_state, done, action=None):
 
     # env setup
     envs = gym.vector.SyncVectorEnv(
-        [make_env(args.env_id, args.seed + i, i, args.capture_video, run_name) for i in range(args.num_envs)]
+        [make_env(args.env_id, i, args.capture_video, run_name) for i in range(args.num_envs)],
     )
     assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported"
 
@@ -219,24 +225,24 @@ def get_action_and_value(self, x, lstm_state, done, action=None):
     # TRY NOT TO MODIFY: start the game
     global_step = 0
     start_time = time.time()
-    next_obs = torch.Tensor(envs.reset()).to(device)
+    next_obs, _ = envs.reset(seed=args.seed)
+    next_obs = torch.Tensor(next_obs).to(device)
     next_done = torch.zeros(args.num_envs).to(device)
     next_lstm_state = (
         torch.zeros(agent.lstm.num_layers, args.num_envs, agent.lstm.hidden_size).to(device),
         torch.zeros(agent.lstm.num_layers, args.num_envs, agent.lstm.hidden_size).to(device),
     )  # hidden and cell states (see https://youtu.be/8HyCNIVRbSU)
-    num_updates = args.total_timesteps // args.batch_size
 
-    for update in range(1, num_updates + 1):
+    for iteration in range(1, args.num_iterations + 1):
         initial_lstm_state = (next_lstm_state[0].clone(), next_lstm_state[1].clone())
         # Annealing the rate if instructed to do so.
         if args.anneal_lr:
-            frac = 1.0 - (update - 1.0) / num_updates
+            frac = 1.0 - (iteration - 1.0) / args.num_iterations
             lrnow = frac * args.learning_rate
             optimizer.param_groups[0]["lr"] = lrnow
 
         for step in range(0, args.num_steps):
-            global_step += 1 * args.num_envs
+            global_step += args.num_envs
             obs[step] = next_obs
             dones[step] = next_done
 
@@ -248,16 +254,17 @@ def get_action_and_value(self, x, lstm_state, done, action=None):
             logprobs[step] = logprob
 
             # TRY NOT TO MODIFY: execute the game and log data.
-            next_obs, reward, done, info = envs.step(action.cpu().numpy())
+            next_obs, reward, terminations, truncations, infos = envs.step(action.cpu().numpy())
+            next_done = np.logical_or(terminations, truncations)
             rewards[step] = torch.tensor(reward).to(device).view(-1)
-            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
+            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device)
 
-            for item in info:
-                if "episode" in item.keys():
-                    print(f"global_step={global_step}, episodic_return={item['episode']['r']}")
-                    writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step)
-                    writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step)
-                    break
+            if "final_info" in infos:
+                for info in infos["final_info"]:
+                    if info and "episode" in info:
+                        print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                        writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                        writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # bootstrap value if not done
         with torch.no_grad():
@@ -348,9 +355,8 @@ def get_action_and_value(self, x, lstm_state, done, action=None):
                 nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
                 optimizer.step()
 
-            if args.target_kl is not None:
-                if approx_kl > args.target_kl:
-                    break
+            if args.target_kl is not None and approx_kl > args.target_kl:
+                break
 
         y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
         var_y = np.var(y_true)
diff --git a/cleanrl/ppo_atari_multigpu.py b/cleanrl/ppo_atari_multigpu.py
index 8955e1298..5fedd7881 100644
--- a/cleanrl/ppo_atari_multigpu.py
+++ b/cleanrl/ppo_atari_multigpu.py
@@ -1,17 +1,19 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_atari_multigpupy
-import argparse
 import os
 import random
 import time
 import warnings
-from distutils.util import strtobool
+from dataclasses import dataclass, field
+from typing import List, Literal
 
-import gym
+import gymnasium as gym
 import numpy as np
 import torch
 import torch.distributed as dist
 import torch.nn as nn
 import torch.optim as optim
+import tyro
+from rich.pretty import pprint
 from torch.distributions.categorical import Categorical
 from torch.utils.tensorboard import SummaryWriter
 
@@ -24,75 +26,89 @@
 )
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=8,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=128,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=4,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=4,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.1,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.01,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    parser.add_argument("--device-ids", nargs="+", default=[],
-        help="the device ids that subprocess workers will use")
-    parser.add_argument("--backend", type=str, default="gloo", choices=["gloo", "nccl", "mpi"],
-        help="the id of the environment")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    # fmt: on
-    return args
-
-
-def make_env(env_id, seed, idx, capture_video, run_name):
+    env_id: str = "BreakoutNoFrameskip-v4"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    local_num_envs: int = 8
+    """the number of parallel game environments (in the local rank)"""
+    num_steps: int = 128
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = True
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 4
+    """the number of mini-batches"""
+    update_epochs: int = 4
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.1
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.01
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+    device_ids: List[int] = field(default_factory=lambda: [])
+    """the device ids that subprocess workers will use"""
+    backend: Literal["gloo", "nccl", "mpi"] = "gloo"
+    """the backend for distributed training"""
+
+    # to be filled in runtime
+    local_batch_size: int = 0
+    """the local batch size in the local rank (computed in runtime)"""
+    local_minibatch_size: int = 0
+    """the local mini-batch size in the local rank (computed in runtime)"""
+    num_envs: int = 0
+    """the number of parallel game environments (computed in runtime)"""
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
+    world_size: int = 0
+    """the number of processes (computed in runtime)"""
+
+
+def make_env(env_id, idx, capture_video, run_name):
     def thunk():
-        env = gym.make(env_id)
+        if capture_video and idx == 0:
+            env = gym.make(env_id)
+            env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
+        else:
+            env = gym.make(env_id)
         env = gym.wrappers.RecordEpisodeStatistics(env)
         if capture_video:
             if idx == 0:
@@ -106,9 +122,6 @@ def thunk():
         env = gym.wrappers.ResizeObservation(env, (84, 84))
         env = gym.wrappers.GrayScaleObservation(env)
         env = gym.wrappers.FrameStack(env, 4)
-        env.seed(seed)
-        env.action_space.seed(seed)
-        env.observation_space.seed(seed)
         return env
 
     return thunk
@@ -152,15 +165,17 @@ def get_action_and_value(self, x, action=None):
 if __name__ == "__main__":
     # torchrun --standalone --nnodes=1 --nproc_per_node=2 ppo_atari_multigpu.py
     # taken from https://pytorch.org/docs/stable/elastic/run.html
+    args = tyro.cli(Args)
     local_rank = int(os.getenv("LOCAL_RANK", "0"))
-    world_size = int(os.getenv("WORLD_SIZE", "1"))
-    args = parse_args()
-    args.world_size = world_size
-    args.num_envs = int(args.num_envs / world_size)
+    args.world_size = int(os.getenv("WORLD_SIZE", "1"))
+    args.local_batch_size = int(args.local_num_envs * args.num_steps)
+    args.local_minibatch_size = int(args.local_batch_size // args.num_minibatches)
+    args.num_envs = args.local_num_envs * args.world_size
     args.batch_size = int(args.num_envs * args.num_steps)
     args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    if world_size > 1:
-        dist.init_process_group(args.backend, rank=local_rank, world_size=world_size)
+    args.num_iterations = args.total_timesteps // args.batch_size
+    if args.world_size > 1:
+        dist.init_process_group(args.backend, rank=local_rank, world_size=args.world_size)
     else:
         warnings.warn(
             """
@@ -169,8 +184,6 @@ def get_action_and_value(self, x, action=None):
 E.g., `torchrun --standalone --nnodes=1 --nproc_per_node=2 ppo_atari_multigpu.py`
         """
         )
-    print(f"================================")
-    print(f"args.num_envs: {args.num_envs}, args.batch_size: {args.batch_size}, args.minibatch_size: {args.minibatch_size}")
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     writer = None
     if local_rank == 0:
@@ -191,6 +204,7 @@ def get_action_and_value(self, x, action=None):
             "hyperparameters",
             "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])),
         )
+        pprint(args)
 
     # TRY NOT TO MODIFY: seeding
     # CRUCIAL: note that we needed to pass a different seed for each data parallelism worker
@@ -201,18 +215,18 @@ def get_action_and_value(self, x, action=None):
     torch.backends.cudnn.deterministic = args.torch_deterministic
 
     if len(args.device_ids) > 0:
-        assert len(args.device_ids) == world_size, "you must specify the same number of device ids as `--nproc_per_node`"
+        assert len(args.device_ids) == args.world_size, "you must specify the same number of device ids as `--nproc_per_node`"
         device = torch.device(f"cuda:{args.device_ids[local_rank]}" if torch.cuda.is_available() and args.cuda else "cpu")
     else:
         device_count = torch.cuda.device_count()
-        if device_count < world_size:
+        if device_count < args.world_size:
             device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
         else:
             device = torch.device(f"cuda:{local_rank}" if torch.cuda.is_available() and args.cuda else "cpu")
 
     # env setup
     envs = gym.vector.SyncVectorEnv(
-        [make_env(args.env_id, args.seed + i, i, args.capture_video, run_name) for i in range(args.num_envs)]
+        [make_env(args.env_id, i, args.capture_video, run_name) for i in range(args.local_num_envs)],
     )
     assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported"
 
@@ -221,29 +235,29 @@ def get_action_and_value(self, x, action=None):
     optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5)
 
     # ALGO Logic: Storage setup
-    obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device)
-    actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device)
-    logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device)
-    rewards = torch.zeros((args.num_steps, args.num_envs)).to(device)
-    dones = torch.zeros((args.num_steps, args.num_envs)).to(device)
-    values = torch.zeros((args.num_steps, args.num_envs)).to(device)
+    obs = torch.zeros((args.num_steps, args.local_num_envs) + envs.single_observation_space.shape).to(device)
+    actions = torch.zeros((args.num_steps, args.local_num_envs) + envs.single_action_space.shape).to(device)
+    logprobs = torch.zeros((args.num_steps, args.local_num_envs)).to(device)
+    rewards = torch.zeros((args.num_steps, args.local_num_envs)).to(device)
+    dones = torch.zeros((args.num_steps, args.local_num_envs)).to(device)
+    values = torch.zeros((args.num_steps, args.local_num_envs)).to(device)
 
     # TRY NOT TO MODIFY: start the game
     global_step = 0
     start_time = time.time()
-    next_obs = torch.Tensor(envs.reset()).to(device)
-    next_done = torch.zeros(args.num_envs).to(device)
-    num_updates = args.total_timesteps // (args.batch_size * world_size)
+    next_obs, _ = envs.reset(seed=args.seed)
+    next_obs = torch.Tensor(next_obs).to(device)
+    next_done = torch.zeros(args.local_num_envs).to(device)
 
-    for update in range(1, num_updates + 1):
+    for iteration in range(1, args.num_iterations + 1):
         # Annealing the rate if instructed to do so.
         if args.anneal_lr:
-            frac = 1.0 - (update - 1.0) / num_updates
+            frac = 1.0 - (iteration - 1.0) / args.num_iterations
             lrnow = frac * args.learning_rate
             optimizer.param_groups[0]["lr"] = lrnow
 
         for step in range(0, args.num_steps):
-            global_step += 1 * args.num_envs * world_size
+            global_step += args.num_envs
             obs[step] = next_obs
             dones[step] = next_done
 
@@ -255,19 +269,23 @@ def get_action_and_value(self, x, action=None):
             logprobs[step] = logprob
 
             # TRY NOT TO MODIFY: execute the game and log data.
-            next_obs, reward, done, info = envs.step(action.cpu().numpy())
+            next_obs, reward, terminations, truncations, infos = envs.step(action.cpu().numpy())
+            next_done = np.logical_or(terminations, truncations)
             rewards[step] = torch.tensor(reward).to(device).view(-1)
-            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
+            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device)
+
+            if not writer:
+                continue
 
-            for item in info:
-                if "episode" in item.keys() and local_rank == 0:
-                    print(f"global_step={global_step}, episodic_return={item['episode']['r']}")
-                    writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step)
-                    writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step)
-                    break
+            if "final_info" in infos:
+                for info in infos["final_info"]:
+                    if info and "episode" in info:
+                        print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                        writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                        writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         print(
-            f"local_rank: {local_rank}, action.sum(): {action.sum()}, update: {update}, agent.actor.weight.sum(): {agent.actor.weight.sum()}"
+            f"local_rank: {local_rank}, action.sum(): {action.sum()}, iteration: {iteration}, agent.actor.weight.sum(): {agent.actor.weight.sum()}"
         )
         # bootstrap value if not done
         with torch.no_grad():
@@ -294,12 +312,12 @@ def get_action_and_value(self, x, action=None):
         b_values = values.reshape(-1)
 
         # Optimizing the policy and value network
-        b_inds = np.arange(args.batch_size)
+        b_inds = np.arange(args.local_batch_size)
         clipfracs = []
         for epoch in range(args.update_epochs):
             np.random.shuffle(b_inds)
-            for start in range(0, args.batch_size, args.minibatch_size):
-                end = start + args.minibatch_size
+            for start in range(0, args.local_batch_size, args.local_minibatch_size):
+                end = start + args.local_minibatch_size
                 mb_inds = b_inds[start:end]
 
                 _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds])
@@ -342,7 +360,7 @@ def get_action_and_value(self, x, action=None):
                 optimizer.zero_grad()
                 loss.backward()
 
-                if world_size > 1:
+                if args.world_size > 1:
                     # batch allreduce ops: see https://github.com/entity-neural-network/incubator/pull/220
                     all_grads_list = []
                     for param in agent.parameters():
@@ -354,16 +372,15 @@ def get_action_and_value(self, x, action=None):
                     for param in agent.parameters():
                         if param.grad is not None:
                             param.grad.data.copy_(
-                                all_grads[offset : offset + param.numel()].view_as(param.grad.data) / world_size
+                                all_grads[offset : offset + param.numel()].view_as(param.grad.data) / args.world_size
                             )
                             offset += param.numel()
 
                 nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
                 optimizer.step()
 
-            if args.target_kl is not None:
-                if approx_kl > args.target_kl:
-                    break
+            if args.target_kl is not None and approx_kl > args.target_kl:
+                break
 
         y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
         var_y = np.var(y_true)
diff --git a/cleanrl/ppo_continuous_action.py b/cleanrl/ppo_continuous_action.py
index 0f2f3b033..b454521b6 100644
--- a/cleanrl/ppo_continuous_action.py
+++ b/cleanrl/ppo_continuous_action.py
@@ -1,85 +1,87 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_continuous_actionpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gymnasium as gym
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from torch.distributions.normal import Normal
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="HalfCheetah-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=1000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=3e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=2048,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=32,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=10,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.2,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.0,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    # fmt: on
-    return args
+    env_id: str = "HalfCheetah-v4"
+    """the id of the environment"""
+    total_timesteps: int = 1000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 3e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    num_steps: int = 2048
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = True
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 32
+    """the number of mini-batches"""
+    update_epochs: int = 10
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.2
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.0
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
 
 
 def make_env(env_id, idx, capture_video, run_name, gamma):
@@ -140,7 +142,10 @@ def get_action_and_value(self, x, action=None):
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -191,17 +196,16 @@ def get_action_and_value(self, x, action=None):
     next_obs, _ = envs.reset(seed=args.seed)
     next_obs = torch.Tensor(next_obs).to(device)
     next_done = torch.zeros(args.num_envs).to(device)
-    num_updates = args.total_timesteps // args.batch_size
 
-    for update in range(1, num_updates + 1):
+    for iteration in range(1, args.num_iterations + 1):
         # Annealing the rate if instructed to do so.
         if args.anneal_lr:
-            frac = 1.0 - (update - 1.0) / num_updates
+            frac = 1.0 - (iteration - 1.0) / args.num_iterations
             lrnow = frac * args.learning_rate
             optimizer.param_groups[0]["lr"] = lrnow
 
         for step in range(0, args.num_steps):
-            global_step += 1 * args.num_envs
+            global_step += args.num_envs
             obs[step] = next_obs
             dones[step] = next_done
 
@@ -214,21 +218,16 @@ def get_action_and_value(self, x, action=None):
 
             # TRY NOT TO MODIFY: execute the game and log data.
             next_obs, reward, terminations, truncations, infos = envs.step(action.cpu().numpy())
-            done = np.logical_or(terminations, truncations)
+            next_done = np.logical_or(terminations, truncations)
             rewards[step] = torch.tensor(reward).to(device).view(-1)
-            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
-
-            # Only print when at least 1 env is done
-            if "final_info" not in infos:
-                continue
+            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device)
 
-            for info in infos["final_info"]:
-                # Skip the envs that are not done
-                if info is None:
-                    continue
-                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
-                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
-                writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
+            if "final_info" in infos:
+                for info in infos["final_info"]:
+                    if info and "episode" in info:
+                        print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                        writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                        writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # bootstrap value if not done
         with torch.no_grad():
@@ -305,9 +304,8 @@ def get_action_and_value(self, x, action=None):
                 nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
                 optimizer.step()
 
-            if args.target_kl is not None:
-                if approx_kl > args.target_kl:
-                    break
+            if args.target_kl is not None and approx_kl > args.target_kl:
+                break
 
         y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
         var_y = np.var(y_true)
diff --git a/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py b/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py
index ddf3cf898..9ba45735d 100644
--- a/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py
+++ b/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py
@@ -27,11 +27,10 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_continuous_action_isaacgympy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gym
 import isaacgym  # noqa
@@ -40,75 +39,77 @@
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from torch.distributions.normal import Normal
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="Ant",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=30000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=0.0026,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=4096,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=16,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=2,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=4,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.2,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.0,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=2,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=1,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-
-    parser.add_argument("--reward-scaler", type=float, default=1,
-        help="the scale factor applied to the reward during training")
-    parser.add_argument("--record-video-step-frequency", type=int, default=1464,
-        help="the frequency at which to record the videos")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    # fmt: on
-    return args
+    env_id: str = "Ant"
+    """the id of the environment"""
+    total_timesteps: int = 30000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 0.0026
+    """the learning rate of the optimizer"""
+    num_envs: int = 4096
+    """the number of parallel game environments"""
+    num_steps: int = 16
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = False
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 2
+    """the number of mini-batches"""
+    update_epochs: int = 4
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.2
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = False
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.0
+    """coefficient of the entropy"""
+    vf_coef: float = 2
+    """coefficient of the value function"""
+    max_grad_norm: float = 1
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+    reward_scaler: float = 1
+    """the scale factor applied to the reward during training"""
+    record_video_step_frequency: int = 1464
+    """the frequency at which to record the videos"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
 
 
 class RecordEpisodeStatisticsTorch(gym.Wrapper):
@@ -189,7 +190,10 @@ def observation(self, obs):
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -262,17 +266,16 @@ def observation(self, obs):
     start_time = time.time()
     next_obs = envs.reset()
     next_done = torch.zeros(args.num_envs, dtype=torch.float).to(device)
-    num_updates = args.total_timesteps // args.batch_size
 
-    for update in range(1, num_updates + 1):
+    for iteration in range(1, args.num_iterations + 1):
         # Annealing the rate if instructed to do so.
         if args.anneal_lr:
-            frac = 1.0 - (update - 1.0) / num_updates
+            frac = 1.0 - (iteration - 1.0) / args.num_iterations
             lrnow = frac * args.learning_rate
             optimizer.param_groups[0]["lr"] = lrnow
 
         for step in range(0, args.num_steps):
-            global_step += 1 * args.num_envs
+            global_step += args.num_envs
             obs[step] = next_obs
             dones[step] = next_done
 
@@ -372,9 +375,8 @@ def observation(self, obs):
                 nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
                 optimizer.step()
 
-            if args.target_kl is not None:
-                if approx_kl > args.target_kl:
-                    break
+            if args.target_kl is not None and approx_kl > args.target_kl:
+                break
 
         # TRY NOT TO MODIFY: record rewards for plotting purposes
         writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step)
diff --git a/cleanrl/ppo_pettingzoo_ma_atari.py b/cleanrl/ppo_pettingzoo_ma_atari.py
index bc51c703c..87b2b3123 100644
--- a/cleanrl/ppo_pettingzoo_ma_atari.py
+++ b/cleanrl/ppo_pettingzoo_ma_atari.py
@@ -33,7 +33,7 @@ def parse_args():
         help="the wandb's project name")
     parser.add_argument("--wandb-entity", type=str, default=None,
         help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
+    parser.add_argument("--capture_video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
         help="whether to capture videos of the agent performances (check out `videos` folder)")
 
     # Algorithm specific arguments
diff --git a/cleanrl/ppo_procgen.py b/cleanrl/ppo_procgen.py
index 9a93eb0cd..0a13317da 100644
--- a/cleanrl/ppo_procgen.py
+++ b/cleanrl/ppo_procgen.py
@@ -1,80 +1,82 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_procgenpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gym
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from procgen import ProcgenEnv
 from torch.distributions.categorical import Categorical
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="starpilot",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=int(25e6),
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=64,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=256,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.999,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=8,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=3,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.2,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.01,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    # fmt: on
-    return args
+    env_id: str = "starpilot"
+    """the id of the environment"""
+    total_timesteps: int = int(25e6)
+    """total timesteps of the experiments"""
+    learning_rate: float = 5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 64
+    """the number of parallel game environments"""
+    num_steps: int = 256
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = False
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.999
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 8
+    """the number of mini-batches"""
+    update_epochs: int = 3
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.2
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.01
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
 
 
 def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
@@ -154,7 +156,10 @@ def get_action_and_value(self, x, action=None):
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -211,17 +216,16 @@ def get_action_and_value(self, x, action=None):
     start_time = time.time()
     next_obs = torch.Tensor(envs.reset()).to(device)
     next_done = torch.zeros(args.num_envs).to(device)
-    num_updates = args.total_timesteps // args.batch_size
 
-    for update in range(1, num_updates + 1):
+    for iteration in range(1, args.num_iterations + 1):
         # Annealing the rate if instructed to do so.
         if args.anneal_lr:
-            frac = 1.0 - (update - 1.0) / num_updates
+            frac = 1.0 - (iteration - 1.0) / args.num_iterations
             lrnow = frac * args.learning_rate
             optimizer.param_groups[0]["lr"] = lrnow
 
         for step in range(0, args.num_steps):
-            global_step += 1 * args.num_envs
+            global_step += args.num_envs
             obs[step] = next_obs
             dones[step] = next_done
 
@@ -233,9 +237,9 @@ def get_action_and_value(self, x, action=None):
             logprobs[step] = logprob
 
             # TRY NOT TO MODIFY: execute the game and log data.
-            next_obs, reward, done, info = envs.step(action.cpu().numpy())
+            next_obs, reward, next_done, info = envs.step(action.cpu().numpy())
             rewards[step] = torch.tensor(reward).to(device).view(-1)
-            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
+            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device)
 
             for item in info:
                 if "episode" in item.keys():
@@ -319,9 +323,8 @@ def get_action_and_value(self, x, action=None):
                 nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
                 optimizer.step()
 
-            if args.target_kl is not None:
-                if approx_kl > args.target_kl:
-                    break
+            if args.target_kl is not None and approx_kl > args.target_kl:
+                break
 
         y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
         var_y = np.var(y_true)
diff --git a/cleanrl/ppo_rnd_envpool.py b/cleanrl/ppo_rnd_envpool.py
index 32676d08b..0c1758274 100644
--- a/cleanrl/ppo_rnd_envpool.py
+++ b/cleanrl/ppo_rnd_envpool.py
@@ -1,10 +1,9 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo-rnd/#ppo_rnd_envpoolpy
-import argparse
 import os
 import random
 import time
 from collections import deque
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import envpool
 import gym
@@ -13,84 +12,86 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+import tyro
 from gym.wrappers.normalize import RunningMeanStd
 from torch.distributions.categorical import Categorical
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="MontezumaRevenge-v5",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=2000000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=1e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=128,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=128,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.999,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=4,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=4,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.1,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.001,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    parser.add_argument("--sticky-action", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, sticky action will be used")
+    env_id: str = "MontezumaRevenge-v5"
+    """the id of the environment"""
+    total_timesteps: int = 2000000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 1e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 128
+    """the number of parallel game environments"""
+    num_steps: int = 128
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = True
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.999
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 4
+    """the number of mini-batches"""
+    update_epochs: int = 4
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.1
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.001
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
 
     # RND arguments
-    parser.add_argument("--update-proportion", type=float, default=0.25,
-        help="proportion of exp used for predictor update")
-    parser.add_argument("--int-coef", type=float, default=1.0,
-        help="coefficient of extrinsic reward")
-    parser.add_argument("--ext-coef", type=float, default=2.0,
-        help="coefficient of intrinsic reward")
-    parser.add_argument("--int-gamma", type=float, default=0.99,
-        help="Intrinsic reward discount rate")
-    parser.add_argument("--num-iterations-obs-norm-init", type=int, default=50,
-        help="number of iterations to initialize the observations normalization parameters")
-
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    # fmt: on
-    return args
+    update_proportion: float = 0.25
+    """proportion of exp used for predictor update"""
+    int_coef: float = 1.0
+    """coefficient of extrinsic reward"""
+    ext_coef: float = 2.0
+    """coefficient of intrinsic reward"""
+    int_gamma: float = 0.99
+    """Intrinsic reward discount rate"""
+    num_iterations_obs_norm_init: int = 50
+    """number of iterations to initialize the observations normalization parameters"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
 
 
 class RecordEpisodeStatistics(gym.Wrapper):
@@ -242,7 +243,10 @@ def update(self, rews):
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
diff --git a/cleanrl/qdagger_dqn_atari_impalacnn.py b/cleanrl/qdagger_dqn_atari_impalacnn.py
index ef7922a91..6cde11c99 100644
--- a/cleanrl/qdagger_dqn_atari_impalacnn.py
+++ b/cleanrl/qdagger_dqn_atari_impalacnn.py
@@ -1,10 +1,9 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/qdagger/#qdagger_dqn_atari_jax_impalacnnpy
-import argparse
 import os
 import random
 import time
 from collections import deque
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gymnasium as gym
 import numpy as np
@@ -12,6 +11,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+import tyro
 from huggingface_hub import hf_hub_download
 from rich.progress import track
 from stable_baselines3.common.atari_wrappers import (
@@ -28,81 +28,74 @@
 from cleanrl_utils.evals.dqn_eval import evaluate
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=1e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--buffer-size", type=int, default=1000000,
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=1.,
-        help="the target network update rate")
-    parser.add_argument("--target-network-frequency", type=int, default=1000,
-        help="the timesteps it takes to update the target network")
-    parser.add_argument("--batch-size", type=int, default=32,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--start-e", type=float, default=1,
-        help="the starting epsilon for exploration")
-    parser.add_argument("--end-e", type=float, default=0.01,
-        help="the ending epsilon for exploration")
-    parser.add_argument("--exploration-fraction", type=float, default=0.10,
-        help="the fraction of `total-timesteps` it takes from start-e to go end-e")
-    parser.add_argument("--learning-starts", type=int, default=80000,
-        help="timestep to start learning")
-    parser.add_argument("--train-frequency", type=int, default=4,
-        help="the frequency of training")
+    env_id: str = "BreakoutNoFrameskip-v4"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 1e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    buffer_size: int = 1000000
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 1.0
+    """the target network update rate"""
+    target_network_frequency: int = 1000
+    """the timesteps it takes to update the target network"""
+    batch_size: int = 32
+    """the batch size of sample from the reply memory"""
+    start_e: float = 1.0
+    """the starting epsilon for exploration"""
+    end_e: float = 0.01
+    """the ending epsilon for exploration"""
+    exploration_fraction: float = 0.10
+    """the fraction of `total-timesteps` it takes from start-e to go end-e"""
+    learning_starts: int = 80000
+    """timestep to start learning"""
+    train_frequency: int = 4
+    """the frequency of training"""
 
     # QDagger specific arguments
-    parser.add_argument("--teacher-policy-hf-repo", type=str, default=None,
-        help="the huggingface repo of the teacher policy")
-    parser.add_argument("--teacher-eval-episodes", type=int, default=10,
-        help="the number of episodes to run the teacher policy evaluate")
-    parser.add_argument("--teacher-steps", type=int, default=500000,
-        help="the number of steps to run the teacher policy to generate the replay buffer")
-    parser.add_argument("--offline-steps", type=int, default=500000,
-        help="the number of steps to run the student policy with the teacher's replay buffer")
-    parser.add_argument("--temperature", type=float, default=1.0,
-        help="the temperature parameter for qdagger")
-    args = parser.parse_args()
-    # fmt: on
-    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
-
-    if args.teacher_policy_hf_repo is None:
-        args.teacher_policy_hf_repo = f"cleanrl/{args.env_id}-dqn_atari-seed1"
-
-    return args
+    teacher_policy_hf_repo: str = None
+    """the huggingface repo of the teacher policy"""
+    teacher_model_exp_name: str = "dqn_atari"
+    """the experiment name of the teacher model"""
+    teacher_eval_episodes: int = 10
+    """the number of episodes to run the teacher policy evaluate"""
+    teacher_steps: int = 500000
+    """the number of steps to run the teacher policy to generate the replay buffer"""
+    offline_steps: int = 500000
+    """the number of steps to run the student policy with the teacher's replay buffer"""
+    temperature: float = 1.0
+    """the temperature parameter for qdagger"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -212,7 +205,10 @@ def kl_divergence_with_logits(target_logits, prediction_logits):
 poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1"  "ale-py==0.8.1" 
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
+    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
+    if args.teacher_policy_hf_repo is None:
+        args.teacher_policy_hf_repo = f"cleanrl/{args.env_id}-{args.teacher_model_exp_name}-seed1"
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -252,7 +248,9 @@ def kl_divergence_with_logits(target_logits, prediction_logits):
     target_network.load_state_dict(q_network.state_dict())
 
     # QDAGGER LOGIC:
-    teacher_model_path = hf_hub_download(repo_id=args.teacher_policy_hf_repo, filename="dqn_atari.cleanrl_model")
+    teacher_model_path = hf_hub_download(
+        repo_id=args.teacher_policy_hf_repo, filename=f"{args.teacher_model_exp_name}.cleanrl_model"
+    )
     teacher_model = TeacherModel(envs).to(device)
     teacher_model.load_state_dict(torch.load(teacher_model_path, map_location=device))
     teacher_model.eval()
diff --git a/cleanrl/qdagger_dqn_atari_jax_impalacnn.py b/cleanrl/qdagger_dqn_atari_jax_impalacnn.py
index ce55baf4c..7ecbb5c47 100644
--- a/cleanrl/qdagger_dqn_atari_jax_impalacnn.py
+++ b/cleanrl/qdagger_dqn_atari_jax_impalacnn.py
@@ -1,10 +1,9 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/qdagger/#qdagger_dqn_atari_jax_impalacnnpy
-import argparse
 import os
 import random
 import time
 from collections import deque
-from distutils.util import strtobool
+from dataclasses import dataclass
 from typing import Sequence
 
 os.environ[
@@ -18,6 +17,7 @@
 import jax.numpy as jnp
 import numpy as np
 import optax
+import tyro
 from flax.training.train_state import TrainState
 from huggingface_hub import hf_hub_download
 from rich.progress import track
@@ -35,77 +35,70 @@
 from cleanrl_utils.evals.dqn_jax_eval import evaluate
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=10000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=1e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--buffer-size", type=int, default=1000000,
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=1.,
-        help="the target network update rate")
-    parser.add_argument("--target-network-frequency", type=int, default=1000,
-        help="the timesteps it takes to update the target network")
-    parser.add_argument("--batch-size", type=int, default=32,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--start-e", type=float, default=1,
-        help="the starting epsilon for exploration")
-    parser.add_argument("--end-e", type=float, default=0.01,
-        help="the ending epsilon for exploration")
-    parser.add_argument("--exploration-fraction", type=float, default=0.10,
-        help="the fraction of `total-timesteps` it takes from start-e to go end-e")
-    parser.add_argument("--learning-starts", type=int, default=80000,
-        help="timestep to start learning")
-    parser.add_argument("--train-frequency", type=int, default=4,
-        help="the frequency of training")
+    env_id: str = "BreakoutNoFrameskip-v4"
+    """the id of the environment"""
+    total_timesteps: int = 10000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 1e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    buffer_size: int = 1000000
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 1.0
+    """the target network update rate"""
+    target_network_frequency: int = 1000
+    """the timesteps it takes to update the target network"""
+    batch_size: int = 32
+    """the batch size of sample from the reply memory"""
+    start_e: float = 1.0
+    """the starting epsilon for exploration"""
+    end_e: float = 0.01
+    """the ending epsilon for exploration"""
+    exploration_fraction: float = 0.10
+    """the fraction of `total-timesteps` it takes from start-e to go end-e"""
+    learning_starts: int = 80000
+    """timestep to start learning"""
+    train_frequency: int = 4
+    """the frequency of training"""
 
     # QDagger specific arguments
-    parser.add_argument("--teacher-policy-hf-repo", type=str, default=None,
-        help="the huggingface repo of the teacher policy")
-    parser.add_argument("--teacher-eval-episodes", type=int, default=10,
-        help="the number of episodes to run the teacher policy evaluate")
-    parser.add_argument("--teacher-steps", type=int, default=500000,
-        help="the number of steps to run the teacher policy to generate the replay buffer")
-    parser.add_argument("--offline-steps", type=int, default=500000,
-        help="the number of steps to run the student policy with the teacher's replay buffer")
-    parser.add_argument("--temperature", type=float, default=1.0,
-        help="the temperature parameter for qdagger")
-    args = parser.parse_args()
-    # fmt: on
-    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
-
-    if args.teacher_policy_hf_repo is None:
-        args.teacher_policy_hf_repo = f"cleanrl/{args.env_id}-dqn_atari_jax-seed1"
-
-    return args
+    teacher_policy_hf_repo: str = None
+    """the huggingface repo of the teacher policy"""
+    teacher_model_exp_name: str = "dqn_atari_jax"
+    """the experiment name of the teacher model"""
+    teacher_eval_episodes: int = 10
+    """the number of episodes to run the teacher policy evaluate"""
+    teacher_steps: int = 500000
+    """the number of steps to run the teacher policy to generate the replay buffer"""
+    offline_steps: int = 500000
+    """the number of steps to run the student policy with the teacher's replay buffer"""
+    temperature: float = 1.0
+    """the temperature parameter for qdagger"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -205,7 +198,10 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
 poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1"  "ale-py==0.8.1" 
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
+    assert args.num_envs == 1, "vectorized envs are not supported at the moment"
+    if args.teacher_policy_hf_repo is None:
+        args.teacher_policy_hf_repo = f"cleanrl/{args.env_id}-{args.teacher_model_exp_name}-seed1"
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -248,7 +244,9 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
     q_network.apply = jax.jit(q_network.apply)
 
     # QDAGGER LOGIC:
-    teacher_model_path = hf_hub_download(repo_id=args.teacher_policy_hf_repo, filename="dqn_atari_jax.cleanrl_model")
+    teacher_model_path = hf_hub_download(
+        repo_id=args.teacher_policy_hf_repo, filename=f"{args.teacher_model_exp_name}.cleanrl_model"
+    )
     teacher_model = TeacherModel(action_dim=envs.single_action_space.n)
     teacher_model_key = jax.random.PRNGKey(args.seed)
     teacher_params = teacher_model.init(teacher_model_key, envs.observation_space.sample())
diff --git a/cleanrl/rpo_continuous_action.py b/cleanrl/rpo_continuous_action.py
index 919ee72ae..6db3d696b 100644
--- a/cleanrl/rpo_continuous_action.py
+++ b/cleanrl/rpo_continuous_action.py
@@ -1,81 +1,83 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/rpo/#rpo_continuous_actionpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gymnasium as gym
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from torch.distributions.normal import Normal
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="HalfCheetah-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=8000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=3e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=1,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=2048,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=32,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=10,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.2,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.0,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    parser.add_argument("--rpo-alpha", type=float, default=0.5,
-        help="the alpha parameter for RPO")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    # fmt: on
-    return args
+    env_id: str = "HalfCheetah-v4"
+    """the id of the environment"""
+    total_timesteps: int = 8000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 3e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 1
+    """the number of parallel game environments"""
+    num_steps: int = 2048
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = True
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 32
+    """the number of mini-batches"""
+    update_epochs: int = 10
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.2
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.0
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+    rpo_alpha: float = 0.5
+    """the alpha parameter for RPO"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
 
 
 def make_env(env_id, idx, capture_video, run_name, gamma):
@@ -143,7 +145,10 @@ def get_action_and_value(self, x, action=None):
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -221,17 +226,12 @@ def get_action_and_value(self, x, action=None):
             rewards[step] = torch.tensor(reward).to(device).view(-1)
             next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
 
-            # Only print when at least 1 env is done
-            if "final_info" not in infos:
-                continue
-
-            for info in infos["final_info"]:
-                # Skip the envs that are not done
-                if info is None:
-                    continue
-                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
-                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
-                writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
+            if "final_info" in infos:
+                for info in infos["final_info"]:
+                    if info and "episode" in info:
+                        print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                        writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                        writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
 
         # bootstrap value if not done
         with torch.no_grad():
diff --git a/cleanrl/sac_atari.py b/cleanrl/sac_atari.py
index f7f4ccb99..36c8c5d59 100644
--- a/cleanrl/sac_atari.py
+++ b/cleanrl/sac_atari.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/sac/#sac_ataripy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gymnasium as gym
 import numpy as np
@@ -11,6 +10,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+import tyro
 from stable_baselines3.common.atari_wrappers import (
     ClipRewardEnv,
     EpisodicLifeEnv,
@@ -23,58 +23,54 @@
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1, 
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="weather to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="BeamRiderNoFrameskip-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=5000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--buffer-size", type=int, default=int(1e6),
-        help="the replay memory buffer size") # smaller than in original paper but evaluation is done only for 100k steps anyway
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=1.0,
-        help="target smoothing coefficient (default: 1)") # Default is 1 to perform replacement update
-    parser.add_argument("--batch-size", type=int, default=64,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--learning-starts", type=int, default=2e4,
-        help="timestep to start learning")
-    parser.add_argument("--policy-lr", type=float, default=3e-4,
-        help="the learning rate of the policy network optimizer")
-    parser.add_argument("--q-lr", type=float, default=3e-4,
-        help="the learning rate of the Q network network optimizer")
-    parser.add_argument("--update-frequency", type=int, default=4,
-        help="the frequency of training updates")
-    parser.add_argument("--target-network-frequency", type=int, default=8000,
-        help="the frequency of updates for the target networks")
-    parser.add_argument("--alpha", type=float, default=0.2,
-        help="Entropy regularization coefficient.")
-    parser.add_argument("--autotune", type=lambda x:bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="automatic tuning of the entropy coefficient")
-    parser.add_argument("--target-entropy-scale", type=float, default=0.89,
-        help="coefficient for scaling the autotune entropy target")
-    args = parser.parse_args()
-    # fmt: on
-    return args
+    env_id: str = "BeamRiderNoFrameskip-v4"
+    """the id of the environment"""
+    total_timesteps: int = 5000000
+    """total timesteps of the experiments"""
+    buffer_size: int = int(1e6)
+    """the replay memory buffer size"""  # smaller than in original paper but evaluation is done only for 100k steps anyway
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 1.0
+    """target smoothing coefficient (default: 1)"""
+    batch_size: int = 64
+    """the batch size of sample from the reply memory"""
+    learning_starts: int = 2e4
+    """timestep to start learning"""
+    policy_lr: float = 3e-4
+    """the learning rate of the policy network optimizer"""
+    q_lr: float = 3e-4
+    """the learning rate of the Q network network optimizer"""
+    update_frequency: int = 4
+    """the frequency of training updates"""
+    target_network_frequency: int = 8000
+    """the frequency of updates for the target networks"""
+    alpha: float = 0.2
+    """Entropy regularization coefficient."""
+    autotune: bool = True
+    """automatic tuning of the entropy coefficient"""
+    target_entropy_scale: float = 0.89
+    """coefficient for scaling the autotune entropy target"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -184,7 +180,7 @@ def get_action(self, x):
 poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1"  "ale-py==0.8.1" 
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
diff --git a/cleanrl/sac_continuous_action.py b/cleanrl/sac_continuous_action.py
index a12beec64..019608442 100644
--- a/cleanrl/sac_continuous_action.py
+++ b/cleanrl/sac_continuous_action.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/sac/#sac_continuous_actionpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gymnasium as gym
 import numpy as np
@@ -11,62 +10,59 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+import tyro
 from stable_baselines3.common.buffers import ReplayBuffer
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="Hopper-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=1000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--buffer-size", type=int, default=int(1e6),
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=0.005,
-        help="target smoothing coefficient (default: 0.005)")
-    parser.add_argument("--batch-size", type=int, default=256,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--learning-starts", type=int, default=5e3,
-        help="timestep to start learning")
-    parser.add_argument("--policy-lr", type=float, default=3e-4,
-        help="the learning rate of the policy network optimizer")
-    parser.add_argument("--q-lr", type=float, default=1e-3,
-        help="the learning rate of the Q network network optimizer")
-    parser.add_argument("--policy-frequency", type=int, default=2,
-        help="the frequency of training policy (delayed)")
-    parser.add_argument("--target-network-frequency", type=int, default=1, # Denis Yarats' implementation delays this by 2.
-        help="the frequency of updates for the target nerworks")
-    parser.add_argument("--noise-clip", type=float, default=0.5,
-        help="noise clip parameter of the Target Policy Smoothing Regularization")
-    parser.add_argument("--alpha", type=float, default=0.2,
-            help="Entropy regularization coefficient.")
-    parser.add_argument("--autotune", type=lambda x:bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="automatic tuning of the entropy coefficient")
-    args = parser.parse_args()
-    # fmt: on
-    return args
+    env_id: str = "Hopper-v4"
+    """the environment id of the task"""
+    total_timesteps: int = 1000000
+    """total timesteps of the experiments"""
+    buffer_size: int = int(1e6)
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 0.005
+    """target smoothing coefficient (default: 0.005)"""
+    batch_size: int = 256
+    """the batch size of sample from the reply memory"""
+    learning_starts: int = 5e3
+    """timestep to start learning"""
+    policy_lr: float = 3e-4
+    """the learning rate of the policy network optimizer"""
+    q_lr: float = 1e-3
+    """the learning rate of the Q network network optimizer"""
+    policy_frequency: int = 2
+    """the frequency of training policy (delayed)"""
+    target_network_frequency: int = 1  # Denis Yarats' implementation delays this by 2.
+    """the frequency of updates for the target nerworks"""
+    noise_clip: float = 0.5
+    """noise clip parameter of the Target Policy Smoothing Regularization"""
+    alpha: float = 0.2
+    """Entropy regularization coefficient."""
+    autotune: bool = True
+    """automatic tuning of the entropy coefficient"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -153,7 +149,7 @@ def get_action(self, x):
 """
         )
 
-    args = parse_args()
+    args = tyro.cli(Args)
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
diff --git a/cleanrl/td3_continuous_action.py b/cleanrl/td3_continuous_action.py
index 837e27faf..418c4b3b3 100644
--- a/cleanrl/td3_continuous_action.py
+++ b/cleanrl/td3_continuous_action.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/td3/#td3_continuous_actionpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import gymnasium as gym
 import numpy as np
@@ -11,64 +10,61 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+import tyro
 from stable_baselines3.common.buffers import ReplayBuffer
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="Hopper-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=1000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=3e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--buffer-size", type=int, default=int(1e6),
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=0.005,
-        help="target smoothing coefficient (default: 0.005)")
-    parser.add_argument("--batch-size", type=int, default=256,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--policy-noise", type=float, default=0.2,
-        help="the scale of policy noise")
-    parser.add_argument("--exploration-noise", type=float, default=0.1,
-        help="the scale of exploration noise")
-    parser.add_argument("--learning-starts", type=int, default=25e3,
-        help="timestep to start learning")
-    parser.add_argument("--policy-frequency", type=int, default=2,
-        help="the frequency of training policy (delayed)")
-    parser.add_argument("--noise-clip", type=float, default=0.5,
-        help="noise clip parameter of the Target Policy Smoothing Regularization")
-    args = parser.parse_args()
-    # fmt: on
-    return args
+    env_id: str = "Hopper-v4"
+    """the id of the environment"""
+    total_timesteps: int = 1000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 3e-4
+    """the learning rate of the optimizer"""
+    buffer_size: int = int(1e6)
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 0.005
+    """target smoothing coefficient (default: 0.005)"""
+    batch_size: int = 256
+    """the batch size of sample from the reply memory"""
+    policy_noise: float = 0.2
+    """the scale of policy noise"""
+    exploration_noise: float = 0.1
+    """the scale of exploration noise"""
+    learning_starts: int = 25e3
+    """timestep to start learning"""
+    policy_frequency: int = 2
+    """the frequency of training policy (delayed)"""
+    noise_clip: float = 0.5
+    """noise clip parameter of the Target Policy Smoothing Regularization"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -132,7 +128,7 @@ def forward(self, x):
 """
         )
 
-    args = parse_args()
+    args = tyro.cli(Args)
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
diff --git a/cleanrl/td3_continuous_action_jax.py b/cleanrl/td3_continuous_action_jax.py
index 3c584c6f3..b69f0a0d1 100644
--- a/cleanrl/td3_continuous_action_jax.py
+++ b/cleanrl/td3_continuous_action_jax.py
@@ -1,9 +1,8 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/td3/#td3_continuous_action_jaxpy
-import argparse
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
 import flax
 import flax.linen as nn
@@ -12,61 +11,58 @@
 import jax.numpy as jnp
 import numpy as np
 import optax
+import tyro
 from flax.training.train_state import TrainState
 from stable_baselines3.common.buffers import ReplayBuffer
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
-    parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to save model into the `runs/{run_name}` folder")
-    parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to upload the saved model to huggingface")
-    parser.add_argument("--hf-entity", type=str, default="",
-        help="the user or org name of the model repository from the Hugging Face Hub")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = ""
+    """the user or org name of the model repository from the Hugging Face Hub"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="HalfCheetah-v4",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=1000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=3e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--buffer-size", type=int, default=int(1e6),
-        help="the replay memory buffer size")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--tau", type=float, default=0.005,
-        help="target smoothing coefficient (default: 0.005)")
-    parser.add_argument("--policy-noise", type=float, default=0.2,
-        help="the scale of policy noise")
-    parser.add_argument("--batch-size", type=int, default=256,
-        help="the batch size of sample from the reply memory")
-    parser.add_argument("--exploration-noise", type=float, default=0.1,
-        help="the scale of exploration noise")
-    parser.add_argument("--learning-starts", type=int, default=25e3,
-        help="timestep to start learning")
-    parser.add_argument("--policy-frequency", type=int, default=2,
-        help="the frequency of training policy (delayed)")
-    parser.add_argument("--noise-clip", type=float, default=0.5,
-        help="noise clip parameter of the Target Policy Smoothing Regularization")
-    args = parser.parse_args()
-    # fmt: on
-    return args
+    env_id: str = "Hopper-v4"
+    """the id of the environment"""
+    total_timesteps: int = 1000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 3e-4
+    """the learning rate of the optimizer"""
+    buffer_size: int = int(1e6)
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 0.005
+    """target smoothing coefficient (default: 0.005)"""
+    batch_size: int = 256
+    """the batch size of sample from the reply memory"""
+    policy_noise: float = 0.2
+    """the scale of policy noise"""
+    exploration_noise: float = 0.1
+    """the scale of exploration noise"""
+    learning_starts: int = 25e3
+    """timestep to start learning"""
+    policy_frequency: int = 2
+    """the frequency of training policy (delayed)"""
+    noise_clip: float = 0.5
+    """noise clip parameter of the Target Policy Smoothing Regularization"""
 
 
 def make_env(env_id, seed, idx, capture_video, run_name):
@@ -126,7 +122,7 @@ class TrainState(TrainState):
 poetry run pip install "stable_baselines3==2.0.0a1"
 """
         )
-    args = parse_args()
+    args = tyro.cli(Args)
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
diff --git a/cleanrl_utils/benchmark.py b/cleanrl_utils/benchmark.py
index 5274810ba..042a223f7 100644
--- a/cleanrl_utils/benchmark.py
+++ b/cleanrl_utils/benchmark.py
@@ -1,49 +1,74 @@
-import argparse
+import math
 import os
 import shlex
 import subprocess
-from distutils.util import strtobool
+import uuid
+from dataclasses import dataclass
+from typing import List, Optional
 
 import requests
+import tyro
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--env-ids", nargs="+", default=["CartPole-v1", "Acrobot-v1", "MountainCar-v0"],
-        help="the ids of the environment to benchmark")
-    parser.add_argument("--command", type=str, default="poetry run python cleanrl/ppo.py",
-        help="the command to run")
-    parser.add_argument("--num-seeds", type=int, default=3,
-        help="the number of random seeds")
-    parser.add_argument("--start-seed", type=int, default=1,
-        help="the number of the starting seed")
-    parser.add_argument("--workers", type=int, default=0,
-        help="the number of workers to run benchmark experimenets")
-    parser.add_argument("--auto-tag", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, the runs will be tagged with git tags, commit, and pull request number if possible")
-    args = parser.parse_args()
-    # fmt: on
-    return args
+@dataclass
+class Args:
+    env_ids: List[str]
+    """the ids of the environment to compare"""
+    command: str
+    """the command to run"""
+    num_seeds: int = 3
+    """the number of random seeds"""
+    start_seed: int = 1
+    """the number of the starting seed"""
+    workers: int = 0
+    """the number of workers to run benchmark experimenets"""
+    auto_tag: bool = True
+    """if toggled, the runs will be tagged with git tags, commit, and pull request number if possible"""
+    slurm_template_path: Optional[str] = None
+    """the path to the slurm template file (see docs for more details)"""
+    slurm_gpus_per_task: Optional[int] = None
+    """the number of gpus per task to use for slurm jobs"""
+    slurm_total_cpus: Optional[int] = None
+    """the number of gpus per task to use for slurm jobs"""
+    slurm_ntasks: Optional[int] = None
+    """the number of tasks to use for slurm jobs"""
+    slurm_nodes: Optional[int] = None
+    """the number of nodes to use for slurm jobs"""
 
 
 def run_experiment(command: str):
     command_list = shlex.split(command)
     print(f"running {command}")
-    fd = subprocess.Popen(command_list)
-    return_code = fd.wait()
-    assert return_code == 0
+
+    # Use subprocess.PIPE to capture the output
+    fd = subprocess.Popen(command_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    output, errors = fd.communicate()
+
+    return_code = fd.returncode
+    assert return_code == 0, f"Command failed with error: {errors.decode('utf-8')}"
+
+    # Convert bytes to string and strip leading/trailing whitespaces
+    return output.decode("utf-8").strip()
 
 
 def autotag() -> str:
     wandb_tag = ""
     print("autotag feature is enabled")
+    git_tag = ""
     try:
         git_tag = subprocess.check_output(["git", "describe", "--tags"]).decode("ascii").strip()
-        wandb_tag = f"{git_tag}"
         print(f"identified git tag: {git_tag}")
-    except subprocess.CalledProcessError:
-        return wandb_tag
+    except subprocess.CalledProcessError as e:
+        print(e)
+    if len(git_tag) == 0:
+        try:
+            count = int(subprocess.check_output(["git", "rev-list", "--count", "HEAD"]).decode("ascii").strip())
+            hash = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("ascii").strip()
+            git_tag = f"no-tag-{count}-g{hash}"
+            print(f"identified git tag: {git_tag}")
+        except subprocess.CalledProcessError as e:
+            print(e)
+    wandb_tag = git_tag
 
     git_commit = subprocess.check_output(["git", "rev-parse", "--verify", "HEAD"]).decode("ascii").strip()
     try:
@@ -63,16 +88,16 @@ def autotag() -> str:
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
     if args.auto_tag:
-        if "WANDB_TAGS" in os.environ:
-            raise ValueError(
-                "WANDB_TAGS is already set. Please unset it before running this script or run the script with --auto-tag False"
-            )
+        existing_wandb_tag = os.environ.get("WANDB_TAGS", "")
         wandb_tag = autotag()
         if len(wandb_tag) > 0:
-            os.environ["WANDB_TAGS"] = wandb_tag
-
+            if len(existing_wandb_tag) > 0:
+                os.environ["WANDB_TAGS"] = ",".join([existing_wandb_tag, wandb_tag])
+            else:
+                os.environ["WANDB_TAGS"] = wandb_tag
+    print("WANDB_TAGS: ", os.environ.get("WANDB_TAGS", ""))
     commands = []
     for seed in range(0, args.num_seeds):
         for env_id in args.env_ids:
@@ -82,7 +107,7 @@ def autotag() -> str:
     for command in commands:
         print(command)
 
-    if args.workers > 0:
+    if args.workers > 0 and args.slurm_template_path is None:
         from concurrent.futures import ThreadPoolExecutor
 
         executor = ThreadPoolExecutor(max_workers=args.workers, thread_name_prefix="cleanrl-benchmark-worker-")
@@ -91,3 +116,37 @@ def autotag() -> str:
         executor.shutdown(wait=True)
     else:
         print("not running the experiments because --workers is set to 0; just printing the commands to run")
+
+    # SLURM logic
+    if args.slurm_template_path is not None:
+        if not os.path.exists("slurm"):
+            os.makedirs("slurm")
+        if not os.path.exists("slurm/logs"):
+            os.makedirs("slurm/logs")
+        print("======= slurm commands to run:")
+        with open(args.slurm_template_path) as f:
+            slurm_template = f.read()
+        slurm_template = slurm_template.replace("{{array}}", f"0-{len(commands) - 1}%{args.workers}")
+        slurm_template = slurm_template.replace("{{env_ids}}", f"({' '.join(args.env_ids)})")
+        slurm_template = slurm_template.replace(
+            "{{seeds}}",
+            f"({' '.join([str(args.start_seed + int(seed)) for seed in range(args.num_seeds)])})",
+        )
+        slurm_template = slurm_template.replace("{{len_seeds}}", f"{args.num_seeds}")
+        slurm_template = slurm_template.replace("{{command}}", args.command)
+        slurm_template = slurm_template.replace("{{gpus_per_task}}", f"{args.slurm_gpus_per_task}")
+        total_gpus = args.slurm_gpus_per_task * args.slurm_ntasks
+        slurm_cpus_per_gpu = math.ceil(args.slurm_total_cpus / total_gpus)
+        slurm_template = slurm_template.replace("{{cpus_per_gpu}}", f"{slurm_cpus_per_gpu}")
+        slurm_template = slurm_template.replace("{{ntasks}}", f"{args.slurm_ntasks}")
+        if args.slurm_nodes is not None:
+            slurm_template = slurm_template.replace("{{nodes}}", f"#SBATCH --nodes={args.slurm_nodes}")
+        else:
+            slurm_template = slurm_template.replace("{{nodes}}", "")
+        filename = str(uuid.uuid4())
+        open(os.path.join("slurm", f"{filename}.slurm"), "w").write(slurm_template)
+        slurm_path = os.path.join("slurm", f"{filename}.slurm")
+        print(f"saving command in {slurm_path}")
+        if args.workers > 0:
+            job_id = run_experiment(f"sbatch --parsable {slurm_path}")
+            print(f"Job ID: {job_id}")
diff --git a/cleanrl_utils/enjoy.py b/cleanrl_utils/enjoy.py
index a9ab51b78..afc869669 100644
--- a/cleanrl_utils/enjoy.py
+++ b/cleanrl_utils/enjoy.py
@@ -1,5 +1,4 @@
 import argparse
-from distutils.util import strtobool
 
 from huggingface_hub import hf_hub_download
 
@@ -21,8 +20,6 @@ def parse_args():
         help="the id of the environment")
     parser.add_argument("--eval-episodes", type=int, default=10,
         help="the number of evaluation episodes")
-    parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
     args = parser.parse_args()
     # fmt: on
     return args
diff --git a/cleanrl_utils/tuner.py b/cleanrl_utils/tuner.py
index ed576e559..d72af58ab 100644
--- a/cleanrl_utils/tuner.py
+++ b/cleanrl_utils/tuner.py
@@ -87,7 +87,7 @@ def objective(trial: optuna.Trial):
             for seed in range(num_seeds):
                 normalized_scores = []
                 for env_id in self.target_scores.keys():
-                    sys.argv = algo_command + [f"--env-id={env_id}", f"--seed={seed}", "--track=False"]
+                    sys.argv = algo_command + [f"--env-id={env_id}", f"--seed={seed}"]
                     with HiddenPrints():
                         experiment = runpy.run_path(path_name=self.script, run_name="__main__")
 
diff --git a/cloud/examples/submit_exp.sh b/cloud/examples/submit_exp.sh
index 56344cea7..de54cec3c 100644
--- a/cloud/examples/submit_exp.sh
+++ b/cloud/examples/submit_exp.sh
@@ -13,7 +13,7 @@ python -m cleanrl.submit_exp --exp-script offline_dqn_cql_atari_visual.sh \
     --num-hours 48.0 \
     --submit-aws $SUBMIT_AWS
 
-python ppg_procgen_impala_cnn.py --env-id starpilot --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1
+python ppg_procgen_impala_cnn.py --env-id starpilot --capture_video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1
 
 python -m cleanrl.utils.submit_exp --exp-script ppo.sh \
     --algo ppo.py \
diff --git a/docs/advanced/hyperparameter-tuning.md b/docs/advanced/hyperparameter-tuning.md
index 65eb7ffc5..849632969 100644
--- a/docs/advanced/hyperparameter-tuning.md
+++ b/docs/advanced/hyperparameter-tuning.md
@@ -23,12 +23,12 @@ tuner = Tuner(
         "Acrobot-v1": [-500, 0],
     },
     params_fn=lambda trial: {
-        "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003),
+        "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True),
         "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]),
         "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]),
         "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]),
-        "vf-coef": trial.suggest_uniform("vf-coef", 0, 5),
-        "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5),
+        "vf-coef": trial.suggest_float("vf-coef", 0, 5),
+        "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5),
         "total-timesteps": 100000,
         "num-envs": 16,
     },
@@ -143,12 +143,12 @@ tuner = Tuner(
         "CartPole-v1": None,
     },
     params_fn=lambda trial: {
-        "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003),
+        "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True),
         "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]),
         "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]),
         "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]),
-        "vf-coef": trial.suggest_uniform("vf-coef", 0, 5),
-        "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5),
+        "vf-coef": trial.suggest_float("vf-coef", 0, 5),
+        "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5),
         "total-timesteps": 100000,
         "num-envs": 16,
     },
@@ -183,12 +183,12 @@ tuner = Tuner(
         "CartPole-v1": None,
     },
     params_fn=lambda trial: {
-        "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003),
+        "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True),
         "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]),
         "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]),
         "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]),
-        "vf-coef": trial.suggest_uniform("vf-coef", 0, 5),
-        "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5),
+        "vf-coef": trial.suggest_float("vf-coef", 0, 5),
+        "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5),
         "total-timesteps": 100000,
         "num-envs": 16,
     },
@@ -222,12 +222,12 @@ tuner = Tuner(
         "CartPole-v1": None,
     },
     params_fn=lambda trial: {
-        "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003),
+        "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True),
         "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]),
         "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]),
         "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]),
-        "vf-coef": trial.suggest_uniform("vf-coef", 0, 5),
-        "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5),
+        "vf-coef": trial.suggest_float("vf-coef", 0, 5),
+        "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5),
         "total-timesteps": 100000,
         "num-envs": 16,
     },
diff --git a/docs/advanced/resume-training.md b/docs/advanced/resume-training.md
index a3f8a7f59..97ded751e 100644
--- a/docs/advanced/resume-training.md
+++ b/docs/advanced/resume-training.md
@@ -27,7 +27,7 @@ for update in range(starting_update, num_updates + 1):
 Then we could run the following to train our agents
 
 ```
-python ppo_gridnet.py --prod-mode --capture-video
+python ppo_gridnet.py --prod-mode --capture_video
 ```
 
 If the training was terminated early, we can still see the last updated model `agent.pt` in W&B like in this URL [https://wandb.ai/costa-huang/cleanRL/runs/21421tda/files](https://wandb.ai/costa-huang/cleanRL/runs/21421tda/files) or as follows
@@ -72,5 +72,5 @@ for update in range(starting_update, num_updates + 1):
 To resume training, note the ID of the experiment is `21421tda` as in the URL [https://wandb.ai/costa-huang/cleanRL/runs/21421tda](https://wandb.ai/costa-huang/cleanRL/runs/21421tda), so we need to pass in the ID via environment variable to trigger the resume mode of W&B:
 
 ```
-WANDB_RUN_ID=21421tda WANDB_RESUME=must python ppo_gridnet.py --prod-mode --capture-video
+WANDB_RUN_ID=21421tda WANDB_RESUME=must python ppo_gridnet.py --prod-mode --capture_video
 ``` 
\ No newline at end of file
diff --git a/docs/benchmark/ddpg.md b/docs/benchmark/ddpg.md
new file mode 100644
index 000000000..cb03c0f31
--- /dev/null
+++ b/docs/benchmark/ddpg.md
@@ -0,0 +1,8 @@
+|                     | openrlbenchmark/cleanrl/ddpg_continuous_action ({'tag': ['pr-424']})   | openrlbenchmark/cleanrl/ddpg_continuous_action_jax ({'tag': ['pr-424']})   |
+|:--------------------|:-----------------------------------------------------------------------|:---------------------------------------------------------------------------|
+| HalfCheetah-v4      | 10374.07 ± 157.37                                                      | 8638.60 ± 1954.46                                                          |
+| Walker2d-v4         | 1240.16 ± 390.10                                                       | 1427.23 ± 104.91                                                           |
+| Hopper-v4           | 1576.78 ± 818.98                                                       | 1208.52 ± 659.22                                                           |
+| InvertedPendulum-v4 | 642.68 ± 69.56                                                         | 804.30 ± 87.60                                                             |
+| Humanoid-v4         | 1699.56 ± 694.22                                                       | 1513.61 ± 248.60                                                           |
+| Pusher-v4           | -77.30 ± 38.78                                                         | -38.56 ± 4.47                                                              |
\ No newline at end of file
diff --git a/docs/benchmark/ppo.md b/docs/benchmark/ppo.md
new file mode 100644
index 000000000..07fa4506c
--- /dev/null
+++ b/docs/benchmark/ppo.md
@@ -0,0 +1,5 @@
+|                | openrlbenchmark/cleanrl/ppo ({'tag': ['pr-424']})   |
+|:---------------|:----------------------------------------------------|
+| CartPole-v1    | 490.04 ± 6.12                                       |
+| Acrobot-v1     | -86.36 ± 1.32                                       |
+| MountainCar-v0 | -200.00 ± 0.00                                      |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari.md b/docs/benchmark/ppo_atari.md
new file mode 100644
index 000000000..f8a0094cd
--- /dev/null
+++ b/docs/benchmark/ppo_atari.md
@@ -0,0 +1,5 @@
+|                         | openrlbenchmark/cleanrl/ppo_atari ({'tag': ['pr-424']})   |
+|:------------------------|:----------------------------------------------------------|
+| PongNoFrameskip-v4      | 20.36 ± 0.20                                              |
+| BeamRiderNoFrameskip-v4 | 1915.93 ± 484.58                                          |
+| BreakoutNoFrameskip-v4  | 414.66 ± 28.09                                            |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari_envpool.md b/docs/benchmark/ppo_atari_envpool.md
new file mode 100644
index 000000000..4f6f20afc
--- /dev/null
+++ b/docs/benchmark/ppo_atari_envpool.md
@@ -0,0 +1,5 @@
+|              | openrlbenchmark/cleanrl/ppo_atari_envpool ({'tag': ['pr-424']})   | openrlbenchmark/cleanrl/ppo_atari ({'tag': ['pr-424']})   |
+|:-------------|:------------------------------------------------------------------|:----------------------------------------------------------|
+| Pong-v5      | 20.45 ± 0.09                                                      | 20.36 ± 0.20                                              |
+| BeamRider-v5 | 2501.85 ± 210.52                                                  | 1915.93 ± 484.58                                          |
+| Breakout-v5  | 211.24 ± 151.84                                                   | 414.66 ± 28.09                                            |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari_envpool_runtimes.md b/docs/benchmark/ppo_atari_envpool_runtimes.md
new file mode 100644
index 000000000..dcd106cbe
--- /dev/null
+++ b/docs/benchmark/ppo_atari_envpool_runtimes.md
@@ -0,0 +1,5 @@
+|              |   openrlbenchmark/cleanrl/ppo_atari_envpool ({'tag': ['pr-424']}) |   openrlbenchmark/cleanrl/ppo_atari ({'tag': ['pr-424']}) |
+|:-------------|------------------------------------------------------------------:|----------------------------------------------------------:|
+| Pong-v5      |                                                           178.375 |                                                   281.071 |
+| BeamRider-v5 |                                                           182.944 |                                                   284.941 |
+| Breakout-v5  |                                                           151.384 |                                                   264.077 |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari_envpool_xla_jax.md b/docs/benchmark/ppo_atari_envpool_xla_jax.md
new file mode 100644
index 000000000..e85cd8e55
--- /dev/null
+++ b/docs/benchmark/ppo_atari_envpool_xla_jax.md
@@ -0,0 +1,59 @@
+|                     | openrlbenchmark/envpool-atari/ppo_atari_envpool_xla_jax ({})   | openrlbenchmark/baselines/baselines-ppo2-cnn ({})   |
+|:--------------------|:---------------------------------------------------------------|:----------------------------------------------------|
+| Alien-v5            | 1736.39 ± 68.65                                                | 1705.80 ± 439.74                                    |
+| Amidar-v5           | 653.53 ± 44.06                                                 | 585.99 ± 52.92                                      |
+| Assault-v5          | 6791.74 ± 420.03                                               | 4878.67 ± 815.64                                    |
+| Asterix-v5          | 4820.33 ± 1091.83                                              | 3738.50 ± 745.13                                    |
+| Asteroids-v5        | 1633.67 ± 247.21                                               | 1556.90 ± 151.20                                    |
+| Atlantis-v5         | 3778458.33 ± 117680.68                                         | 2036749.00 ± 95929.75                               |
+| BankHeist-v5        | 1195.44 ± 18.54                                                | 1213.47 ± 14.46                                     |
+| BattleZone-v5       | 24283.75 ± 1841.94                                             | 19980.00 ± 1355.21                                  |
+| BeamRider-v5        | 2478.44 ± 336.55                                               | 2835.71 ± 387.92                                    |
+| Berzerk-v5          | 992.88 ± 196.90                                                | 1049.77 ± 144.58                                    |
+| Bowling-v5          | 51.62 ± 13.53                                                  | 59.66 ± 0.62                                        |
+| Boxing-v5           | 92.68 ± 1.41                                                   | 93.32 ± 0.36                                        |
+| Breakout-v5         | 430.09 ± 8.12                                                  | 405.73 ± 11.47                                      |
+| Centipede-v5        | 3309.34 ± 325.05                                               | 3688.54 ± 412.24                                    |
+| ChopperCommand-v5   | 5642.83 ± 802.34                                               | 816.33 ± 114.14                                     |
+| CrazyClimber-v5     | 118763.04 ± 4915.34                                            | 119344.67 ± 4902.83                                 |
+| Defender-v5         | 48558.98 ± 4466.76                                             | 50161.67 ± 4477.49                                  |
+| DemonAttack-v5      | 29283.83 ± 7007.31                                             | 13788.43 ± 1313.44                                  |
+| DoubleDunk-v5       | -6.81 ± 0.24                                                   | -12.96 ± 0.31                                       |
+| Enduro-v5           | 1297.23 ± 143.71                                               | 986.69 ± 25.28                                      |
+| FishingDerby-v5     | 21.21 ± 6.73                                                   | 26.23 ± 2.76                                        |
+| Freeway-v5          | 33.10 ± 0.31                                                   | 32.97 ± 0.37                                        |
+| Frostbite-v5        | 1137.34 ± 1192.05                                              | 933.60 ± 885.92                                     |
+| Gopher-v5           | 6505.29 ± 7655.20                                              | 3672.53 ± 1749.20                                   |
+| Gravitar-v5         | 1099.33 ± 603.06                                               | 881.67 ± 33.73                                      |
+| Hero-v5             | 26429.65 ± 924.74                                              | 24746.88 ± 3530.10                                  |
+| IceHockey-v5        | -4.33 ± 0.43                                                   | -4.12 ± 0.20                                        |
+| Jamesbond-v5        | 496.08 ± 24.60                                                 | 536.50 ± 82.33                                      |
+| Kangaroo-v5         | 6582.12 ± 5395.44                                              | 5325.33 ± 3464.80                                   |
+| Krull-v5            | 9718.09 ± 649.15                                               | 8737.10 ± 294.58                                    |
+| KungFuMaster-v5     | 26000.25 ± 1965.22                                             | 30451.67 ± 5515.45                                  |
+| MontezumaRevenge-v5 | 0.08 ± 0.12                                                    | 1.00 ± 1.41                                         |
+| MsPacman-v5         | 2345.67 ± 185.94                                               | 2152.83 ± 152.80                                    |
+| NameThisGame-v5     | 5750.00 ± 181.32                                               | 6815.63 ± 1098.95                                   |
+| Phoenix-v5          | 14474.11 ± 1794.83                                             | 9517.73 ± 1176.62                                   |
+| Pitfall-v5          | 0.00 ± 0.00                                                    | -0.76 ± 0.55                                        |
+| Pong-v5             | 20.39 ± 0.24                                                   | 20.45 ± 0.81                                        |
+| PrivateEye-v5       | 100.00 ± 0.00                                                  | 31.83 ± 43.74                                       |
+| Qbert-v5            | 17246.27 ± 605.40                                              | 15228.25 ± 920.95                                   |
+| Riverraid-v5        | 8275.25 ± 256.63                                               | 9023.57 ± 1386.85                                   |
+| RoadRunner-v5       | 33040.38 ± 16488.95                                            | 40125.33 ± 7249.13                                  |
+| Robotank-v5         | 14.43 ± 4.98                                                   | 16.45 ± 3.37                                        |
+| Seaquest-v5         | 1240.30 ± 419.36                                               | 1518.33 ± 400.35                                    |
+| Skiing-v5           | -18483.46 ± 8684.71                                            | -22978.48 ± 9894.25                                 |
+| Solaris-v5          | 2198.36 ± 147.23                                               | 2365.33 ± 157.75                                    |
+| SpaceInvaders-v5    | 1188.82 ± 80.52                                                | 1019.75 ± 49.08                                     |
+| StarGunner-v5       | 43519.12 ± 4709.23                                             | 44457.67 ± 3031.86                                  |
+| Surround-v5         | -2.58 ± 2.31                                                   | -4.97 ± 0.99                                        |
+| Tennis-v5           | -17.64 ± 4.60                                                  | -16.44 ± 1.46                                       |
+| TimePilot-v5        | 6476.46 ± 993.30                                               | 6346.67 ± 663.31                                    |
+| Tutankham-v5        | 249.05 ± 16.56                                                 | 190.73 ± 12.00                                      |
+| UpNDown-v5          | 487495.41 ± 39751.49                                           | 156143.70 ± 70620.88                                |
+| Venture-v5          | 0.00 ± 0.00                                                    | 109.33 ± 61.57                                      |
+| VideoPinball-v5     | 43133.94 ± 6362.12                                             | 53121.26 ± 2580.70                                  |
+| WizardOfWor-v5      | 6353.58 ± 116.59                                               | 5346.33 ± 277.11                                    |
+| YarsRevenge-v5      | 55757.68 ± 7467.49                                             | 9394.97 ± 2743.74                                   |
+| Zaxxon-v5           | 3689.67 ± 2477.25                                              | 5532.67 ± 2607.65                                   |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari_envpool_xla_jax_runtimes.md b/docs/benchmark/ppo_atari_envpool_xla_jax_runtimes.md
new file mode 100644
index 000000000..09fe628f0
--- /dev/null
+++ b/docs/benchmark/ppo_atari_envpool_xla_jax_runtimes.md
@@ -0,0 +1,59 @@
+|                     |   openrlbenchmark/envpool-atari/ppo_atari_envpool_xla_jax ({}) |   openrlbenchmark/baselines/baselines-ppo2-cnn ({}) |
+|:--------------------|---------------------------------------------------------------:|----------------------------------------------------:|
+| Alien-v5            |                                                        50.3275 |                                             117.397 |
+| Amidar-v5           |                                                        42.8176 |                                             114.093 |
+| Assault-v5          |                                                        35.9245 |                                             108.094 |
+| Asterix-v5          |                                                        37.7117 |                                             113.386 |
+| Asteroids-v5        |                                                        39.9731 |                                             114.409 |
+| Atlantis-v5         |                                                        40.1527 |                                             123.05  |
+| BankHeist-v5        |                                                        38.7443 |                                             137.308 |
+| BattleZone-v5       |                                                        45.0654 |                                             138.489 |
+| BeamRider-v5        |                                                        42.0778 |                                             119.437 |
+| Berzerk-v5          |                                                        38.7173 |                                             135.316 |
+| Bowling-v5          |                                                        35.0156 |                                             131.365 |
+| Boxing-v5           |                                                        48.8149 |                                             151.607 |
+| Breakout-v5         |                                                        42.3547 |                                             122.828 |
+| Centipede-v5        |                                                        43.6886 |                                             150.112 |
+| ChopperCommand-v5   |                                                        45.9308 |                                             131.192 |
+| CrazyClimber-v5     |                                                        36.0841 |                                             127.942 |
+| Defender-v5         |                                                        35.1029 |                                             132.29  |
+| DemonAttack-v5      |                                                        35.41   |                                             128.476 |
+| DoubleDunk-v5       |                                                        41.4521 |                                             108.028 |
+| Enduro-v5           |                                                        44.9909 |                                             142.046 |
+| FishingDerby-v5     |                                                        51.6075 |                                             151.286 |
+| Freeway-v5          |                                                        50.7103 |                                             154.163 |
+| Frostbite-v5        |                                                        47.5474 |                                             146.092 |
+| Gopher-v5           |                                                        36.2977 |                                             139.496 |
+| Gravitar-v5         |                                                        41.9322 |                                             138.746 |
+| Hero-v5             |                                                        50.5106 |                                             152.413 |
+| IceHockey-v5        |                                                        43.0228 |                                             144.455 |
+| Jamesbond-v5        |                                                        38.8264 |                                             137.321 |
+| Kangaroo-v5         |                                                        44.4304 |                                             142.436 |
+| Krull-v5            |                                                        47.7748 |                                             147.313 |
+| KungFuMaster-v5     |                                                        43.1534 |                                             141.903 |
+| MontezumaRevenge-v5 |                                                        44.8838 |                                             146.777 |
+| MsPacman-v5         |                                                        42.6463 |                                             138.382 |
+| NameThisGame-v5     |                                                        43.8473 |                                             136.264 |
+| Phoenix-v5          |                                                        36.7586 |                                             129.716 |
+| Pitfall-v5          |                                                        44.6369 |                                             137.36  |
+| Pong-v5             |                                                        36.7657 |                                             118.745 |
+| PrivateEye-v5       |                                                        43.3399 |                                             143.957 |
+| Qbert-v5            |                                                        40.1475 |                                             135.255 |
+| Riverraid-v5        |                                                        44.2555 |                                             142.627 |
+| RoadRunner-v5       |                                                        46.1059 |                                             145.451 |
+| Robotank-v5         |                                                        48.3364 |                                             149.681 |
+| Seaquest-v5         |                                                        38.3639 |                                             136.942 |
+| Skiing-v5           |                                                        38.6402 |                                             132.061 |
+| Solaris-v5          |                                                        50.2944 |                                             136.9   |
+| SpaceInvaders-v5    |                                                        39.4931 |                                             125.83  |
+| StarGunner-v5       |                                                        33.7096 |                                             119.18  |
+| Surround-v5         |                                                        33.923  |                                             132.017 |
+| Tennis-v5           |                                                        39.6194 |                                              97.019 |
+| TimePilot-v5        |                                                        37.0124 |                                             130.693 |
+| Tutankham-v5        |                                                        36.9677 |                                             139.694 |
+| UpNDown-v5          |                                                        52.9895 |                                             140.876 |
+| Venture-v5          |                                                        37.9828 |                                             144.236 |
+| VideoPinball-v5     |                                                        47.1716 |                                             179.866 |
+| WizardOfWor-v5      |                                                        37.5751 |                                             142.086 |
+| YarsRevenge-v5      |                                                        36.5889 |                                             127.358 |
+| Zaxxon-v5           |                                                        41.9785 |                                             133.922 |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari_envpool_xla_jax_scan.md b/docs/benchmark/ppo_atari_envpool_xla_jax_scan.md
new file mode 100644
index 000000000..7fca897b7
--- /dev/null
+++ b/docs/benchmark/ppo_atari_envpool_xla_jax_scan.md
@@ -0,0 +1,5 @@
+|              | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax ({'tag': ['pr-424']})   | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax_scan ({'tag': ['pr-424']})   |
+|:-------------|:--------------------------------------------------------------------------|:-------------------------------------------------------------------------------|
+| Pong-v5      | 20.82 ± 0.21                                                              | 20.52 ± 0.32                                                                   |
+| BeamRider-v5 | 2678.73 ± 426.42                                                          | 2860.61 ± 801.30                                                               |
+| Breakout-v5  | 420.92 ± 16.75                                                            | 423.90 ± 5.49                                                                  |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari_envpool_xla_jax_scan_runtimes.md b/docs/benchmark/ppo_atari_envpool_xla_jax_scan_runtimes.md
new file mode 100644
index 000000000..7c77fc420
--- /dev/null
+++ b/docs/benchmark/ppo_atari_envpool_xla_jax_scan_runtimes.md
@@ -0,0 +1,5 @@
+|              |   openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax ({'tag': ['pr-424']}) |   openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax_scan ({'tag': ['pr-424']}) |
+|:-------------|--------------------------------------------------------------------------:|-------------------------------------------------------------------------------:|
+| Pong-v5      |                                                                   34.3237 |                                                                        34.701  |
+| BeamRider-v5 |                                                                   37.1076 |                                                                        37.2449 |
+| Breakout-v5  |                                                                   39.576  |                                                                        39.775  |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari_lstm.md b/docs/benchmark/ppo_atari_lstm.md
new file mode 100644
index 000000000..3fad61873
--- /dev/null
+++ b/docs/benchmark/ppo_atari_lstm.md
@@ -0,0 +1,5 @@
+|                         | openrlbenchmark/cleanrl/ppo_atari_lstm ({'tag': ['pr-424']})   |
+|:------------------------|:---------------------------------------------------------------|
+| PongNoFrameskip-v4      | 19.81 ± 0.62                                                   |
+| BeamRiderNoFrameskip-v4 | 1299.25 ± 509.90                                               |
+| BreakoutNoFrameskip-v4  | 113.42 ± 5.85                                                  |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari_lstm_runtimes.md b/docs/benchmark/ppo_atari_lstm_runtimes.md
new file mode 100644
index 000000000..079df7642
--- /dev/null
+++ b/docs/benchmark/ppo_atari_lstm_runtimes.md
@@ -0,0 +1,5 @@
+|                         |   openrlbenchmark/cleanrl/ppo_atari_lstm ({'tag': ['pr-424']}) |
+|:------------------------|---------------------------------------------------------------:|
+| PongNoFrameskip-v4      |                                                        317.607 |
+| BeamRiderNoFrameskip-v4 |                                                        314.864 |
+| BreakoutNoFrameskip-v4  |                                                        383.724 |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari_multigpu.md b/docs/benchmark/ppo_atari_multigpu.md
new file mode 100644
index 000000000..7cec5206e
--- /dev/null
+++ b/docs/benchmark/ppo_atari_multigpu.md
@@ -0,0 +1,5 @@
+|                         | openrlbenchmark/cleanrl/ppo_atari_multigpu ({'tag': ['pr-424']})   | openrlbenchmark/cleanrl/ppo_atari ({'tag': ['pr-424']})   |
+|:------------------------|:-------------------------------------------------------------------|:----------------------------------------------------------|
+| PongNoFrameskip-v4      | 20.34 ± 0.43                                                       | 20.36 ± 0.20                                              |
+| BeamRiderNoFrameskip-v4 | 2414.65 ± 643.74                                                   | 1915.93 ± 484.58                                          |
+| BreakoutNoFrameskip-v4  | 414.94 ± 20.60                                                     | 414.66 ± 28.09                                            |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari_multigpu_runtimes.md b/docs/benchmark/ppo_atari_multigpu_runtimes.md
new file mode 100644
index 000000000..60b18bad3
--- /dev/null
+++ b/docs/benchmark/ppo_atari_multigpu_runtimes.md
@@ -0,0 +1,5 @@
+|                         |   openrlbenchmark/cleanrl/ppo_atari_multigpu ({'tag': ['pr-424']}) |
+|:------------------------|-------------------------------------------------------------------:|
+| PongNoFrameskip-v4      |                                                            276.599 |
+| BeamRiderNoFrameskip-v4 |                                                            280.902 |
+| BreakoutNoFrameskip-v4  |                                                            270.532 |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_atari_runtimes.md b/docs/benchmark/ppo_atari_runtimes.md
new file mode 100644
index 000000000..2d189947e
--- /dev/null
+++ b/docs/benchmark/ppo_atari_runtimes.md
@@ -0,0 +1,5 @@
+|                         |   openrlbenchmark/cleanrl/ppo_atari ({'tag': ['pr-424']}) |
+|:------------------------|----------------------------------------------------------:|
+| PongNoFrameskip-v4      |                                                   281.071 |
+| BeamRiderNoFrameskip-v4 |                                                   284.941 |
+| BreakoutNoFrameskip-v4  |                                                   264.077 |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_continuous_action.md b/docs/benchmark/ppo_continuous_action.md
new file mode 100644
index 000000000..bab9ae421
--- /dev/null
+++ b/docs/benchmark/ppo_continuous_action.md
@@ -0,0 +1,11 @@
+|                                      | openrlbenchmark/cleanrl/ppo_continuous_action ({'tag': ['pr-424']})   |
+|:-------------------------------------|:----------------------------------------------------------------------|
+| HalfCheetah-v4                       | 1442.64 ± 46.03                                                       |
+| Walker2d-v4                          | 2287.95 ± 571.78                                                      |
+| Hopper-v4                            | 2382.86 ± 271.74                                                      |
+| InvertedPendulum-v4                  | 963.09 ± 22.20                                                        |
+| Humanoid-v4                          | 716.11 ± 49.08                                                        |
+| Pusher-v4                            | -40.38 ± 7.15                                                         |
+| dm_control/acrobot-swingup-v0        | 25.60 ± 6.30                                                          |
+| dm_control/acrobot-swingup_sparse-v0 | 1.35 ± 0.27                                                           |
+| dm_control/ball_in_cup-catch-v0      | 619.26 ± 278.67                                                       |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_continuous_action_runtimes.md b/docs/benchmark/ppo_continuous_action_runtimes.md
new file mode 100644
index 000000000..4ffc2e28d
--- /dev/null
+++ b/docs/benchmark/ppo_continuous_action_runtimes.md
@@ -0,0 +1,11 @@
+|                                      |   openrlbenchmark/cleanrl/ppo_continuous_action ({'tag': ['pr-424']}) |
+|:-------------------------------------|----------------------------------------------------------------------:|
+| HalfCheetah-v4                       |                                                               25.3589 |
+| Walker2d-v4                          |                                                               24.3157 |
+| Hopper-v4                            |                                                               25.7066 |
+| InvertedPendulum-v4                  |                                                               23.7672 |
+| Humanoid-v4                          |                                                               49.5592 |
+| Pusher-v4                            |                                                               28.8162 |
+| dm_control/acrobot-swingup-v0        |                                                               26.5793 |
+| dm_control/acrobot-swingup_sparse-v0 |                                                               25.1265 |
+| dm_control/ball_in_cup-catch-v0      |                                                               26.1947 |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_envpool.md b/docs/benchmark/ppo_envpool.md
new file mode 100644
index 000000000..f295403e0
--- /dev/null
+++ b/docs/benchmark/ppo_envpool.md
@@ -0,0 +1,5 @@
+|              | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax ({'tag': ['pr-424']})   | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax_scan ({'tag': ['pr-424']})   | openrlbenchmark/cleanrl/ppo_atari_envpool ({'tag': ['pr-424']})   |
+|:-------------|:--------------------------------------------------------------------------|:-------------------------------------------------------------------------------|:------------------------------------------------------------------|
+| Pong-v5      | 20.82 ± 0.21                                                              | 20.52 ± 0.32                                                                   | 20.45 ± 0.09                                                      |
+| BeamRider-v5 | 2678.73 ± 426.42                                                          | 2860.61 ± 801.30                                                               | 2501.85 ± 210.52                                                  |
+| Breakout-v5  | 420.92 ± 16.75                                                            | 423.90 ± 5.49                                                                  | 211.24 ± 151.84                                                   |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_envpool_runtimes.md b/docs/benchmark/ppo_envpool_runtimes.md
new file mode 100644
index 000000000..2a923348b
--- /dev/null
+++ b/docs/benchmark/ppo_envpool_runtimes.md
@@ -0,0 +1,5 @@
+|              |   openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax ({'tag': ['pr-424']}) |   openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax_scan ({'tag': ['pr-424']}) |   openrlbenchmark/cleanrl/ppo_atari_envpool ({'tag': ['pr-424']}) |
+|:-------------|--------------------------------------------------------------------------:|-------------------------------------------------------------------------------:|------------------------------------------------------------------:|
+| Pong-v5      |                                                                   34.3237 |                                                                        34.701  |                                                           178.375 |
+| BeamRider-v5 |                                                                   37.1076 |                                                                        37.2449 |                                                           182.944 |
+| Breakout-v5  |                                                                   39.576  |                                                                        39.775  |                                                           151.384 |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_procgen.md b/docs/benchmark/ppo_procgen.md
new file mode 100644
index 000000000..bc1865ca3
--- /dev/null
+++ b/docs/benchmark/ppo_procgen.md
@@ -0,0 +1,5 @@
+|           | openrlbenchmark/cleanrl/ppo_procgen ({'tag': ['pr-424']})   |
+|:----------|:------------------------------------------------------------|
+| starpilot | 30.99 ± 1.96                                                |
+| bossfight | 8.85 ± 0.33                                                 |
+| bigfish   | 16.46 ± 2.71                                                |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_procgen_runtimes.md b/docs/benchmark/ppo_procgen_runtimes.md
new file mode 100644
index 000000000..5b956125e
--- /dev/null
+++ b/docs/benchmark/ppo_procgen_runtimes.md
@@ -0,0 +1,5 @@
+|           |   openrlbenchmark/cleanrl/ppo_procgen ({'tag': ['pr-424']}) |
+|:----------|------------------------------------------------------------:|
+| starpilot |                                                     114.649 |
+| bossfight |                                                     128.679 |
+| bigfish   |                                                     107.788 |
\ No newline at end of file
diff --git a/docs/benchmark/ppo_runtimes.md b/docs/benchmark/ppo_runtimes.md
new file mode 100644
index 000000000..0277e1f85
--- /dev/null
+++ b/docs/benchmark/ppo_runtimes.md
@@ -0,0 +1,5 @@
+|                |   openrlbenchmark/cleanrl/ppo ({'tag': ['pr-424']}) |
+|:---------------|----------------------------------------------------:|
+| CartPole-v1    |                                            10.4737  |
+| Acrobot-v1     |                                            15.4606  |
+| MountainCar-v0 |                                             6.95995 |
\ No newline at end of file
diff --git a/docs/benchmark/sac.md b/docs/benchmark/sac.md
new file mode 100644
index 000000000..b56e478db
--- /dev/null
+++ b/docs/benchmark/sac.md
@@ -0,0 +1,8 @@
+|                     | openrlbenchmark/cleanrl/sac_continuous_action ({'tag': ['pr-424']})   |
+|:--------------------|:----------------------------------------------------------------------|
+| HalfCheetah-v4      | 9634.89 ± 1423.73                                                     |
+| Walker2d-v4         | 3591.45 ± 911.33                                                      |
+| Hopper-v4           | 2310.46 ± 342.82                                                      |
+| InvertedPendulum-v4 | 909.37 ± 55.66                                                        |
+| Humanoid-v4         | 4996.29 ± 686.40                                                      |
+| Pusher-v4           | -22.45 ± 0.51                                                         |
\ No newline at end of file
diff --git a/docs/benchmark/sac_runtimes.md b/docs/benchmark/sac_runtimes.md
new file mode 100644
index 000000000..b35f21121
--- /dev/null
+++ b/docs/benchmark/sac_runtimes.md
@@ -0,0 +1,8 @@
+|                     |   openrlbenchmark/cleanrl/sac_continuous_action ({'tag': ['pr-424']}) |
+|:--------------------|----------------------------------------------------------------------:|
+| HalfCheetah-v4      |                                                               174.778 |
+| Walker2d-v4         |                                                               161.161 |
+| Hopper-v4           |                                                               173.242 |
+| InvertedPendulum-v4 |                                                               179.042 |
+| Humanoid-v4         |                                                               177.31  |
+| Pusher-v4           |                                                               172.123 |
\ No newline at end of file
diff --git a/docs/benchmark/td3.md b/docs/benchmark/td3.md
new file mode 100644
index 000000000..383b08a0c
--- /dev/null
+++ b/docs/benchmark/td3.md
@@ -0,0 +1,8 @@
+|                     | openrlbenchmark/cleanrl/td3_continuous_action ({'tag': ['pr-424']})   | openrlbenchmark/cleanrl/td3_continuous_action_jax ({'tag': ['pr-424']})   |
+|:--------------------|:----------------------------------------------------------------------|:--------------------------------------------------------------------------|
+| HalfCheetah-v4      | 9583.22 ± 126.09                                                      | 9345.93 ± 770.54                                                          |
+| Walker2d-v4         | 4057.59 ± 658.78                                                      | 3686.19 ± 141.23                                                          |
+| Hopper-v4           | 3134.61 ± 360.18                                                      | 2940.10 ± 655.63                                                          |
+| InvertedPendulum-v4 | 968.99 ± 25.80                                                        | 988.94 ± 8.86                                                             |
+| Humanoid-v4         | 5035.36 ± 21.67                                                       | 5033.22 ± 122.14                                                          |
+| Pusher-v4           | -30.92 ± 1.05                                                         | -29.18 ± 1.02                                                             |
\ No newline at end of file
diff --git a/docs/benchmark/td3_runtimes.md b/docs/benchmark/td3_runtimes.md
new file mode 100644
index 000000000..76451881e
--- /dev/null
+++ b/docs/benchmark/td3_runtimes.md
@@ -0,0 +1,8 @@
+|                     |   openrlbenchmark/cleanrl/td3_continuous_action ({'tag': ['pr-424']}) |   openrlbenchmark/cleanrl/td3_continuous_action_jax ({'tag': ['pr-424']}) |
+|:--------------------|----------------------------------------------------------------------:|--------------------------------------------------------------------------:|
+| HalfCheetah-v4      |                                                               87.353  |                                                                   39.5119 |
+| Walker2d-v4         |                                                               80.8592 |                                                                   34.0497 |
+| Hopper-v4           |                                                               90.9921 |                                                                   33.4079 |
+| InvertedPendulum-v4 |                                                               70.4218 |                                                                   30.2624 |
+| Humanoid-v4         |                                                               79.1624 |                                                                   70.2437 |
+| Pusher-v4           |                                                               95.2208 |                                                                   39.6051 |
\ No newline at end of file
diff --git a/docs/blog/posts/cleanrl-v1.md b/docs/blog/posts/cleanrl-v1.md
index e40d48294..cfd820212 100644
--- a/docs/blog/posts/cleanrl-v1.md
+++ b/docs/blog/posts/cleanrl-v1.md
@@ -133,12 +133,12 @@ tuner = Tuner(
         "Acrobot-v1": [-500, 0],
     },
     params_fn=lambda trial: {
-        "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003),
+        "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True),
         "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]),
         "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]),
         "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]),
-        "vf-coef": trial.suggest_uniform("vf-coef", 0, 5),
-        "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5),
+        "vf-coef": trial.suggest_float("vf-coef", 0, 5),
+        "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5),
         "total-timesteps": 100000,
         "num-envs": 16,
     },
@@ -158,12 +158,12 @@ We also added a new tool for running benchmark experiments. The tool is designed
 ```bash
 OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/ppo.py --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/ppo.py --no_cuda --track --capture_video" \
     --num-seeds 3 \
     --workers 5
 ```
 
-which will run the `ppo.py` script with `--cuda False --track --capture-video` arguments across 3 random seeds for 3 environments. It uses `multiprocessing` to create a pool of 5 workers run the experiments in parallel.
+which will run the `ppo.py` script with `--no_cuda --track --capture_video` arguments across 3 random seeds for 3 environments. It uses `multiprocessing` to create a pool of 5 workers run the experiments in parallel.
 
 
 
diff --git a/docs/cloud/submit-experiments.md b/docs/cloud/submit-experiments.md
index 8cac08458..dc776247f 100644
--- a/docs/cloud/submit-experiments.md
+++ b/docs/cloud/submit-experiments.md
@@ -6,13 +6,13 @@ Dry run to inspect the generated docker command
 ```
 poetry run python -m cleanrl_utils.submit_exp \
     --docker-tag vwxyzjn/cleanrl:latest \
-    --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video" \
+    --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video" \
     --num-seed 1
 ```
 
 The generated docker command should look like
 ```
-docker run -d --cpuset-cpus="0" -e WANDB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx vwxyzjn/cleanrl:latest /bin/bash -c "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video --seed 1"
+docker run -d --cpuset-cpus="0" -e WANDB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx vwxyzjn/cleanrl:latest /bin/bash -c "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video --seed 1"
 ```
 
 ### Run on AWS
@@ -21,7 +21,7 @@ Submit a job using AWS's compute-optimized spot instances
 ```
 poetry run python -m cleanrl_utils.submit_exp \
     --docker-tag vwxyzjn/cleanrl:latest \
-    --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video" \
+    --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video" \
     --job-queue c5a-large-spot \
     --num-seed 1 \
     --num-vcpu 1 \
@@ -34,7 +34,7 @@ Submit a job using AWS's accelerated-computing spot instances
 ```
 poetry run python -m cleanrl_utils.submit_exp \
     --docker-tag vwxyzjn/cleanrl:latest \
-    --command "poetry run python cleanrl/ppo_atari.py --env-id BreakoutNoFrameskip-v4 --track --capture-video" \
+    --command "poetry run python cleanrl/ppo_atari.py --env-id BreakoutNoFrameskip-v4 --track --capture_video" \
     --job-queue g4dn-xlarge-spot \
     --num-seed 1 \
     --num-vcpu 1 \
@@ -48,7 +48,7 @@ Submit a job using AWS's compute-optimized on-demand instances
 ```
 poetry run python -m cleanrl_utils.submit_exp \
     --docker-tag vwxyzjn/cleanrl:latest \
-    --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video" \
+    --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video" \
     --job-queue c5a-large \
     --num-seed 1 \
     --num-vcpu 1 \
@@ -61,7 +61,7 @@ Submit a job using AWS's accelerated-computing on-demand instances
 ```
 poetry run python -m cleanrl_utils.submit_exp \
     --docker-tag vwxyzjn/cleanrl:latest \
-    --command "poetry run python cleanrl/ppo_atari.py --env-id BreakoutNoFrameskip-v4 --track --capture-video" \
+    --command "poetry run python cleanrl/ppo_atari.py --env-id BreakoutNoFrameskip-v4 --track --capture_video" \
     --job-queue g4dn-xlarge \
     --num-seed 1 \
     --num-vcpu 1 \
@@ -94,7 +94,7 @@ Then you could build a container using the `--build` flag based on the `Dockerfi
 ```
 poetry run python -m cleanrl_utils.submit_exp \
     --docker-tag vwxyzjn/cleanrl:latest \
-    --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video" \
+    --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video" \
     --build --push
 ```
 
@@ -103,7 +103,7 @@ To build a multi-arch image using `--archs linux/arm64,linux/amd64`:
 ```
 poetry run python -m cleanrl_utils.submit_exp \
     --docker-tag vwxyzjn/cleanrl:latest \
-    --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video" \
+    --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video" \
     --archs linux/arm64,linux/amd64
     --build --push
 ```
diff --git a/docs/contribution.md b/docs/contribution.md
index aa38905ac..4f4b45580 100644
--- a/docs/contribution.md
+++ b/docs/contribution.md
@@ -81,7 +81,7 @@ poetry install -E "docs mujoco_py"
 python -c "import mujoco_py"
 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \
-    --command "poetry run python cleanrl/ddpg_continuous_action.py --track --capture-video" \
+    --command "poetry run python cleanrl/ddpg_continuous_action.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
 ```
@@ -209,7 +209,7 @@ Here is a checklist of the contribution process. See [:material-github: #331](ht
 If you need to run benchmark experiments for a performance-impacting changes:
 
 - [ ] I have contacted @vwxyzjn to obtain access to the [openrlbenchmark W&B team](https://wandb.ai/openrlbenchmark).
-- [ ] I have used the [benchmark utility](/get-started/benchmark-utility/) to submit the tracked experiments to the [openrlbenchmark/cleanrl](https://wandb.ai/openrlbenchmark/cleanrl) W&B project, optionally with `--capture-video`.
+- [ ] I have used the [benchmark utility](/get-started/benchmark-utility/) to submit the tracked experiments to the [openrlbenchmark/cleanrl](https://wandb.ai/openrlbenchmark/cleanrl) W&B project, optionally with `--capture_video`.
 - [ ] I have performed RLops with `python -m openrlbenchmark.rlops`.
     - For new feature or bug fix:
         - [ ] I have used the RLops utility to understand the performance impact of the changes and confirmed there is no regression.
diff --git a/docs/get-started/CleanRL_Huggingface_Integration_Demo.ipynb b/docs/get-started/CleanRL_Huggingface_Integration_Demo.ipynb
index 756f1d099..4cb022ec3 100644
--- a/docs/get-started/CleanRL_Huggingface_Integration_Demo.ipynb
+++ b/docs/get-started/CleanRL_Huggingface_Integration_Demo.ipynb
@@ -292,7 +292,7 @@
       "source": [
         "## Enjoy Utility\n",
         "\n",
-        "We have a simple way to load the model by running our \"enjoy\" utility, which automatically pull the model from 🤗 HuggingFace and run for a few episodes. It also produces a rendered video through the `--capture-video` flag. See more at our [📜 Documentation](https://docs.cleanrl.dev/get-started/zoo/)."
+        "We have a simple way to load the model by running our \"enjoy\" utility, which automatically pull the model from 🤗 HuggingFace and run for a few episodes. It also produces a rendered video through the `--capture_video` flag. See more at our [📜 Documentation](https://docs.cleanrl.dev/get-started/zoo/)."
       ]
     },
     {
@@ -338,7 +338,7 @@
         }
       ],
       "source": [
-        "!python -m cleanrl_utils.enjoy --exp-name dqn_atari_jax --env-id BreakoutNoFrameskip-v4 --eval-episodes 2 --capture-video"
+        "!python -m cleanrl_utils.enjoy --exp-name dqn_atari_jax --env-id BreakoutNoFrameskip-v4 --eval-episodes 2 --capture_video"
       ]
     },
     {
diff --git a/docs/get-started/basic-usage.md b/docs/get-started/basic-usage.md
index 5571c3e7e..55aeb3311 100644
--- a/docs/get-started/basic-usage.md
+++ b/docs/get-started/basic-usage.md
@@ -75,7 +75,7 @@ poetry run pip install sb3==2.0.0a1
 
     This is because the `torch` wheel on PyPi is built with cuda 10.2. You would need to manually install the cuda 11.3 wheel like this:
     ```bash
-    poetry run pip install torch --upgrade --extra-index-url https://download.pytorch.org/whl/cu113
+    poetry run pip install torch==1.12.1 --upgrade --extra-index-url https://download.pytorch.org/whl/cu113
     ```
     Then, you can run the script again.
 
@@ -93,7 +93,7 @@ tensorboard --logdir runs
 
 ## Visualize the Agent's Gameplay Videos
 
-CleanRL helps record the agent's gameplay videos with a `--capture-video` flag,
+CleanRL helps record the agent's gameplay videos with a `--capture_video` flag,
 which will save the videos in the `videos/{$run_name}` folder.
 
 ```bash linenums="1" hl_lines="5"
@@ -101,7 +101,7 @@ python cleanrl/ppo.py \
     --seed 1 \
     --env-id CartPole-v0 \
     --total-timesteps 50000 \
-    --capture-video
+    --capture_video
 ```
 
 ![videos](videos.png)
@@ -119,7 +119,7 @@ usage: ppo.py [-h] [--exp-name EXP_NAME] [--env-id ENV_ID]
               [--total-timesteps TOTAL_TIMESTEPS]
               [--torch-deterministic [TORCH_DETERMINISTIC]] [--cuda [CUDA]]
               [--track [TRACK]] [--wandb-project-name WANDB_PROJECT_NAME]
-              [--wandb-entity WANDB_ENTITY] [--capture-video [CAPTURE_VIDEO]]
+              [--wandb-entity WANDB_ENTITY] [--capture_video [CAPTURE_VIDEO]]
               [--num-envs NUM_ENVS] [--num-steps NUM_STEPS]
               [--anneal-lr [ANNEAL_LR]] [--gae [GAE]] [--gamma GAMMA]
               [--gae-lambda GAE_LAMBDA] [--num-minibatches NUM_MINIBATCHES]
@@ -146,7 +146,7 @@ optional arguments:
                         the wandb's project name
   --wandb-entity WANDB_ENTITY
                         the entity (team) of wandb's project
-  --capture-video [CAPTURE_VIDEO]
+  --capture_video [CAPTURE_VIDEO]
                         weather to capture videos of the agent performances (check
                         out `videos` folder)
   --num-envs NUM_ENVS   the number of parallel game environments
diff --git a/docs/get-started/benchmark-utility.md b/docs/get-started/benchmark-utility.md
index eee448b73..7233d4111 100644
--- a/docs/get-started/benchmark-utility.md
+++ b/docs/get-started/benchmark-utility.md
@@ -7,22 +7,44 @@ CleanRL comes with a utility module `cleanrl_utils.benchmark` to help schedule a
 Try running `python -m cleanrl_utils.benchmark --help` to get the help text.
 
 ```bash
-python -m cleanrl_utils.benchmark --help
-usage: benchmark.py [-h] [--env-ids ENV_IDS [ENV_IDS ...]] [--command COMMAND] [--num-seeds NUM_SEEDS] [--start-seed START_SEED] [--workers WORKERS]
-                    [--auto-tag [AUTO_TAG]]
-
-optional arguments:
-  -h, --help            show this help message and exit
-  --env-ids ENV_IDS [ENV_IDS ...]
-                        the ids of the environment to benchmark
-  --command COMMAND     the command to run
-  --num-seeds NUM_SEEDS
-                        the number of random seeds
-  --start-seed START_SEED
-                        the number of the starting seed
-  --workers WORKERS     the number of workers to run benchmark experimenets
-  --auto-tag [AUTO_TAG]
-                        if toggled, the runs will be tagged with git tags, commit, and pull request number if possible
+$ python -m cleanrl_utils.benchmark --help
+usage: benchmark.py [-h] --env-ids [STR
+                    [STR ...]] --command STR [--num-seeds INT]
+                    [--start-seed INT] [--workers INT]
+                    [--auto-tag | --no-auto-tag]
+                    [--slurm-template-path {None}|STR]
+                    [--slurm-gpus-per-task {None}|INT]
+                    [--slurm-total-cpus {None}|INT]
+                    [--slurm-ntasks {None}|INT] [--slurm-nodes {None}|INT]
+
+╭─ arguments ──────────────────────────────────────────────────────────────╮
+│ -h, --help                                                               │
+│     show this help message and exit                                      │
+│ --env-ids [STR [STR ...]]                                                │
+│     the ids of the environment to compare (required)                     │
+│ --command STR                                                            │
+│     the command to run (required)                                        │
+│ --num-seeds INT                                                          │
+│     the number of random seeds (default: 3)                              │
+│ --start-seed INT                                                         │
+│     the number of the starting seed (default: 1)                         │
+│ --workers INT                                                            │
+│     the number of workers to run benchmark experimenets (default: 0)     │
+│ --auto-tag, --no-auto-tag                                                │
+│     if toggled, the runs will be tagged with git tags, commit, and pull  │
+│     request number if possible (default: True)                           │
+│ --slurm-template-path {None}|STR                                         │
+│     the path to the slurm template file (see docs for more details)      │
+│     (default: None)                                                      │
+│ --slurm-gpus-per-task {None}|INT                                         │
+│     the number of gpus per task to use for slurm jobs (default: None)    │
+│ --slurm-total-cpus {None}|INT                                            │
+│     the number of gpus per task to use for slurm jobs (default: None)    │
+│ --slurm-ntasks {None}|INT                                                │
+│     the number of tasks to use for slurm jobs (default: None)            │
+│ --slurm-nodes {None}|INT                                                 │
+│     the number of nodes to use for slurm jobs (default: None)            │
+╰──────────────────────────────────────────────────────────────────────────╯
 ```
 
 ## Examples
@@ -32,7 +54,7 @@ The following example demonstrates how to run classic control benchmark experime
 ```bash
 OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
-    --command "poetry run python cleanrl/ppo.py --cuda False --track --capture-video" \
+    --command "poetry run python cleanrl/ppo.py --no_cuda --track --capture_video" \
     --num-seeds 3 \
     --workers 5
 ```
@@ -40,24 +62,24 @@ OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
 What just happened here? In principle the helps run the following commands in 5 subprocesses:
 
 ```bash
-poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id CartPole-v1 --seed 1
-poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id Acrobot-v1 --seed 1
-poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id MountainCar-v0 --seed 1
-poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id CartPole-v1 --seed 2
-poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id Acrobot-v1 --seed 2
-poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id MountainCar-v0 --seed 2
-poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id CartPole-v1 --seed 3
-poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id Acrobot-v1 --seed 3
-poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id MountainCar-v0 --seed 3
+poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id CartPole-v1 --seed 1
+poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id Acrobot-v1 --seed 1
+poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id MountainCar-v0 --seed 1
+poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id CartPole-v1 --seed 2
+poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id Acrobot-v1 --seed 2
+poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id MountainCar-v0 --seed 2
+poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id CartPole-v1 --seed 3
+poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id Acrobot-v1 --seed 3
+poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id MountainCar-v0 --seed 3
 ```
 
 More specifically:
 
 1. `--env-ids CartPole-v1 Acrobot-v1 MountainCar-v0` specifies that running experiments against these three environments
-1. `--command "poetry run python cleanrl/ppo.py --cuda False --track --capture-video"` suggests running `ppo.py` with these settings:
-    * turn off GPU usage via `--cuda False`: because `ppo.py` has such as small neural network it often runs faster on CPU only
+1. `--command "poetry run python cleanrl/ppo.py --no_cuda --track --capture_video"` suggests running `ppo.py` with these settings:
+    * turn off GPU usage via `--no_cuda`: because `ppo.py` has such as small neural network it often runs faster on CPU only
     * track the experiments via `--track`
-    * render the agent gameplay videos via `--capture-video`; these videos algo get saved to the tracked experiments
+    * render the agent gameplay videos via `--capture_video`; these videos algo get saved to the tracked experiments
         * ` xvfb-run -a` virtualizes a display for video recording, enabling these commands on a headless linux system
 1. `--num-seeds 3` suggests running the the command with 3 random seeds for each `env-id`
 1. `--workers 5` suggests at maximum using 5 subprocesses to run the experiments
@@ -70,9 +92,68 @@ Note that when you run with high-throughput environments such as `envpool` or `p
 ```bash
 xvfb-run -a python -m cleanrl_utils.benchmark \
     --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \
-    --command "poetry run python cleanrl/ppo_atari_envpool.py --track --capture-video" \
+    --command "poetry run python cleanrl/ppo_atari_envpool.py --track --capture_video" \
     --num-seeds 3 \
     --workers 1
 ```
 
-For more example usage, see [https://github.com/vwxyzjn/cleanrl/blob/master/benchmark](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark)
\ No newline at end of file
+For more example usage, see [https://github.com/vwxyzjn/cleanrl/blob/master/benchmark](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark)
+
+
+## Slurm integration
+
+If you have access to a slurm cluster, you can use `cleanrl_utils.benchmark` to schedule jobs on the cluster. The following example demonstrates how to run classic control benchmark experiments on a slurm cluster.
+
+``` title="benchmark/ppo.sh" linenums="1"
+--8<-- "benchmark/ppo.sh:3:12"
+```
+
+```
+poetry install
+OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
+    --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
+    --command "poetry run python cleanrl/ppo.py --no_cuda --track --capture_video" \
+    --num-seeds 3 \
+    --workers 9 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 10 \
+    --slurm-template-path benchmark/cleanrl_1gpu.slurm_template
+```
+
+Here, we have
+* `--slurm-gpus-per-task 1` suggests that each slurm job should use 1 GPU
+* `--slurm-ntasks 1` suggests that each slurm job should use 1 CPU
+* `--slurm-total-cpus 10` suggests that each slurm job should use 10 CPUs in total
+* `--slurm-template-path benchmark/cleanrl_1gpu.slurm_template` suggests that we should use the template file `benchmark/cleanrl_1gpu.slurm_template` to generate the slurm job scripts. The template file looks like this:
+
+``` title="benchmark/cleanrl_1gpu.slurm_template" linenums="1"
+--8<-- "benchmark/cleanrl_1gpu.slurm_template"
+```
+
+The utility will generate a slurm script based on the template file and submit the job to the cluster. The generated slurm script looks like this:
+
+```
+#!/bin/bash
+#SBATCH --job-name=low-priority
+#SBATCH --partition=production-cluster
+#SBATCH --gpus-per-task=1
+#SBATCH --cpus-per-gpu=10
+#SBATCH --ntasks=1
+#SBATCH --output=slurm/logs/%x_%j.out
+#SBATCH --array=0-8%9
+#SBATCH --mem-per-cpu=12G
+#SBATCH --exclude=ip-26-0-147-[245,247],ip-26-0-156-239
+##SBATCH --nodelist=ip-26-0-156-13
+
+
+env_ids=(CartPole-v1 Acrobot-v1 MountainCar-v0)
+seeds=(1 2 3)
+env_id=${env_ids[$SLURM_ARRAY_TASK_ID / 3]}
+seed=${seeds[$SLURM_ARRAY_TASK_ID % 3]}
+
+echo "Running task $SLURM_ARRAY_TASK_ID with env_id: $env_id and seed: $seed"
+
+srun poetry run python cleanrl/ppo.py --no_cuda --track --env-id $env_id --seed $seed # 
+```
+
diff --git a/docs/get-started/experiment-tracking.md b/docs/get-started/experiment-tracking.md
index 89f91f42b..395aaabf1 100644
--- a/docs/get-started/experiment-tracking.md
+++ b/docs/get-started/experiment-tracking.md
@@ -1,11 +1,11 @@
 # Experiment tracking
 
 To use experiment tracking with wandb, run with the `--track` flag, which will also
-upload the videos recorded by the `--capture-video` flag.
+upload the videos recorded by the `--capture_video` flag.
 ```bash
 poetry shell
 wandb login # only required for the first time
-python cleanrl/ppo.py --track --capture-video
+python cleanrl/ppo.py --track --capture_video
 ```
 
 
diff --git a/docs/rl-algorithms/c51.md b/docs/rl-algorithms/c51.md
index c5dd66503..e5d1356cf 100644
--- a/docs/rl-algorithms/c51.md
+++ b/docs/rl-algorithms/c51.md
@@ -216,7 +216,7 @@ The [c51_atari_jax.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/c5
 
     ```bash
     poetry install -E "atari jax"
-    poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+    poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
     poetry run python cleanrl/c51_atari_jax.py --env-id BreakoutNoFrameskip-v4
     poetry run python cleanrl/c51_atari_jax.py --env-id PongNoFrameskip-v4
     ```
@@ -291,7 +291,7 @@ The [c51_jax.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/c51_jax.
 
     ```bash
     poetry install -E jax
-    poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+    poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
     poetry run python cleanrl/c51_jax.py --env-id CartPole-v1
     ```
 
diff --git a/docs/rl-algorithms/ddpg.md b/docs/rl-algorithms/ddpg.md
index 77d2f6837..671d4e903 100644
--- a/docs/rl-algorithms/ddpg.md
+++ b/docs/rl-algorithms/ddpg.md
@@ -41,8 +41,6 @@ The [ddpg_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/c
     ```bash
     poetry install
     poetry run python cleanrl/ddpg_continuous_action.py --help
-    poetry install -E mujoco_py # only works in Linux
-    poetry run python cleanrl/ddpg_continuous_action.py --env-id Hopper-v2
     poetry install -E mujoco
     poetry run python cleanrl/ddpg_continuous_action.py --env-id Hopper-v4
     ```
@@ -51,8 +49,6 @@ The [ddpg_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/c
 
     ```bash
     python cleanrl/ddpg_continuous_action.py --help
-    pip install -r requirements/requirements-mujoco_py.txt # only works in Linux, you have to pick either `mujoco` or `mujoco_py`
-    python cleanrl/ddpg_continuous_action.py --env-id Hopper-v2
     pip install -r requirements/requirements-mujoco.txt
     python cleanrl/ddpg_continuous_actions.py --env-id Hopper-v4
     ```
@@ -232,23 +228,28 @@ Additionally, when drawing exploration noise that is added to the actions produc
 
 To run benchmark experiments, see :material-github: [benchmark/ddpg.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ddpg.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fddpg.sh%23L1-L7&style=github&showBorder=on&showLineNumbers=on&showFileMeta=on&showCopy=on"></script>
+
+``` title="benchmark/ddpg.sh" linenums="1"
+--8<-- "benchmark/ddpg.sh::7"
+```
+
 
 Below are the average episodic returns for [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) (3 random seeds). To ensure the quality of the implementation, we compared the results against (Fujimoto et al., 2018)[^2].
 
 | Environment      | [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) | [`OurDDPG.py`](https://github.com/sfujim/TD3/blob/master/OurDDPG.py) (Fujimoto et al., 2018, Table 1)[^2]  | [`DDPG.py`](https://github.com/sfujim/TD3/blob/master/DDPG.py) using settings from (Lillicrap et al., 2016)[^1] in (Fujimoto et al., 2018, Table 1)[^2]    |
 | ----------- | ----------- | ----------- | ----------- |
-| HalfCheetah      | 10210.57 ± 196.22      |8577.29  | 3305.60|
-| Walker2d   | 1661.14 ± 250.01     |  3098.11 | 1843.85 |
-| Hopper   | 1007.44 ± 148.29         |  1860.02 | 2020.46 |
-| Humanoid |  910.61 ± 97.58      |  not available | 
-| Pusher |  -39.39 ± 9.54      |  not available | 
-| InvertedPendulum |    684.61 ± 94.41    | 1000.00 ± 0.00  | 
+| HalfCheetah-v4       | 10374.07 ± 157.37      |8577.29  | 3305.60|
+| Walker2d-v4    | 1240.16 ± 390.10     |  3098.11 | 1843.85 |
+| Hopper-v4    | 1576.78 ± 818.98         |  1860.02 | 2020.46 |
+| InvertedPendulum-v4  |    642.68 ± 69.56    | 1000.00 ± 0.00  | 
+| Humanoid-v4  |  1699.56 ± 694.22      |  not available | 
+| Pusher-v4  |  -77.30 ± 38.78      |  not available | 
+
 
 
 ???+ info
 
-    Note that [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) uses gym MuJoCo v2 environments while [`OurDDPG.py`](https://github.com/sfujim/TD3/blob/master/OurDDPG.py) (Fujimoto et al., 2018)[^2] uses the gym MuJoCo v1 environments. According to the :material-github: [openai/gym#834](https://github.com/openai/gym/pull/834), gym MuJoCo v2 environments should be equivalent to the gym MuJoCo v1 environments.
+    Note that [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) uses gym MuJoCo v4 environments while [`OurDDPG.py`](https://github.com/sfujim/TD3/blob/master/OurDDPG.py) (Fujimoto et al., 2018)[^2] uses the gym MuJoCo v1 environments.
 
     Also note the performance of our `ddpg_continuous_action.py` seems to be worse than the reference implementation on Walker2d and Hopper. This is likely due to :material-github: [openai/gym#938](https://github.com/openai/baselines/issues/938). We would have a hard time reproducing gym MuJoCo v1 environments because they have been long deprecated.
 
@@ -256,7 +257,12 @@ Below are the average episodic returns for [`ddpg_continuous_action.py`](https:/
 
 Learning curves:
 
-<img loading="lazy" src="../ddpg/ddpg.png">
+``` title="benchmark/ddpg_plot.sh" linenums="1"
+--8<-- "benchmark/ddpg_plot.sh::9"
+```
+
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ddpg.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ddpg-time.png">
 
 <iframe loading="lazy" src="https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/MuJoCo-CleanRL-s-DDPG--VmlldzoxNjkyMjc1" style="width:100%; height:500px" title="MuJoCo: CleanRL's DDPG"></iframe>
 
@@ -314,41 +320,59 @@ See [related docs](/rl-algorithms/ddpg/#implementation-details) for `ddpg_contin
 
 To run benchmark experiments, see :material-github: [benchmark/ddpg.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ddpg.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fddpg.sh%23L9-L16&style=github&showBorder=on&showLineNumbers=on&showFileMeta=on&showCopy=on"></script>
 
-Below are the average episodic returns for [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (3 random seeds). To ensure the quality of the implementation, we compared the results against (Fujimoto et al., 2018)[^2].
+``` title="benchmark/ddpg.sh" linenums="1"
+--8<-- "benchmark/ddpg.sh:12:19"
+```
+
+Below are the average episodic returns for [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (3 random seeds).
+
+
+{!benchmark/ddpg.md!}
+
+Learning curves:
+
+
+``` title="benchmark/ddpg_plot.sh" linenums="1"
+--8<-- "benchmark/ddpg_plot.sh:11:20"
+```
+
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ddpg_jax.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ddpg_jax-time.png">
 
-| Environment      | [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (RTX 3060 TI) | [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (VM w/ TPU) | [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) (RTX 3060 TI) | [`OurDDPG.py`](https://github.com/sfujim/TD3/blob/master/OurDDPG.py) (Fujimoto et al., 2018, Table 1)[^2]    |
-| ----------- | ----------- | ----------- | ----------- | ----------- |
-| HalfCheetah |  9592.25 ± 135.10 | 9125.06 ± 1477.58  | 10210.57 ± 196.22      |8577.29  |
-| Walker2d |  1083.15 ± 567.65  | 1303.82 ± 448.41 | 1661.14 ± 250.01     |  3098.11 | 
-| Hopper |  1275.28 ± 209.60  | 1145.05 ± 41.95 | 1007.44 ± 148.29         |  1860.02 |     
 
 ???+ info
 
-    Note that the experiments were conducted on different hardwares, so your mileage might vary. This inconsistency is because 1) re-running expeirments on the same hardware is computationally expensive and 2) requiring the same hardware is not inclusive nor feasible to other contributors who might have different hardwares.
+    These are some previous experiments with TPUs. Note the results are very similar to the ones above, but the runtime can be different due to different hardware used.
 
-    That said, we roughly expect to see a 2-4x speed improvement from using [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) under the same hardware. And if you disable the `--capture-video` overhead, the speed improvement will be even higher.
 
+    Note that the experiments were conducted on different hardwares, so your mileage might vary. This inconsistency is because 1) re-running expeirments on the same hardware is computationally expensive and 2) requiring the same hardware is not inclusive nor feasible to other contributors who might have different hardwares.
+
+    That said, we roughly expect to see a 2-4x speed improvement from using [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) under the same hardware. And if you disable the `--capture_video` overhead, the speed improvement will be even higher.
 
-Learning curves:
 
-<div class="grid-container">
-<img src="../ddpg-jax/HalfCheetah-v2.png">
-<img src="../ddpg-jax/HalfCheetah-v2-time.png">
+    | Environment      | [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (RTX 3060 TI) | [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (VM w/ TPU) | [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) (RTX 3060 TI) | [`OurDDPG.py`](https://github.com/sfujim/TD3/blob/master/OurDDPG.py) (Fujimoto et al., 2018, Table 1)[^2]    |
+    | ----------- | ----------- | ----------- | ----------- | ----------- |
+    | HalfCheetah |  9592.25 ± 135.10 | 9125.06 ± 1477.58  | 10210.57 ± 196.22      |8577.29  |
+    | Walker2d |  1083.15 ± 567.65  | 1303.82 ± 448.41 | 1661.14 ± 250.01     |  3098.11 | 
+    | Hopper |  1275.28 ± 209.60  | 1145.05 ± 41.95 | 1007.44 ± 148.29         |  1860.02 |    
 
-<img src="../ddpg-jax/Walker2d-v2.png">
-<img src="../ddpg-jax/Walker2d-v2-time.png">
+    Learning curves:
 
-<img src="../ddpg-jax/Hopper-v2.png">
-<img src="../ddpg-jax/Hopper-v2-time.png">
-</div>
+    <div class="grid-container">
+    <img loading="lazy" src="../ddpg-jax/HalfCheetah-v2.png">
+    <img loading="lazy" src="../ddpg-jax/HalfCheetah-v2-time.png">
 
+    <img loading="lazy" src="../ddpg-jax/Walker2d-v2.png">
+    <img loading="lazy" src="../ddpg-jax/Walker2d-v2-time.png">
 
+    <img loading="lazy" src="../ddpg-jax/Hopper-v2.png">
+    <img loading="lazy" src="../ddpg-jax/Hopper-v2-time.png">
+    </div> 
 
-Tracked experiments and game play videos:
+    Tracked experiments and game play videos:
 
-<iframe src="https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/MuJoCo-CleanRL-s-DDPG-JAX--VmlldzoyMjQxMjE2" style="width:100%; height:500px" title="MuJoCo: CleanRL's DDPG + JAX"></iframe>
+    <iframe loading="lazy" src="https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/MuJoCo-CleanRL-s-DDPG-JAX--VmlldzoyMjQxMjE2" style="width:100%; height:500px" title="MuJoCo: CleanRL's DDPG + JAX"></iframe>
 
 
 [^1]:Lillicrap, T.P., Hunt, J.J., Pritzel, A., Heess, N.M., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2016). Continuous control with deep reinforcement learning. CoRR, abs/1509.02971. https://arxiv.org/abs/1509.02971
diff --git a/docs/rl-algorithms/dqn.md b/docs/rl-algorithms/dqn.md
index ce64b1847..e2316e9ab 100644
--- a/docs/rl-algorithms/dqn.md
+++ b/docs/rl-algorithms/dqn.md
@@ -250,7 +250,7 @@ The [dqn_atari_jax.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/dq
 
     ```bash
     poetry install -E "atari jax"
-    poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+    poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
     poetry run python cleanrl/dqn_atari_jax.py --env-id BreakoutNoFrameskip-v4
     poetry run python cleanrl/dqn_atari_jax.py --env-id PongNoFrameskip-v4
     ```
@@ -333,7 +333,7 @@ python cleanrl/dqn_jax.py --env-id CartPole-v1
 === "poetry"
 
     ```bash
-    poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+    poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
     poetry run python cleanrl/dqn_jax.py --env-id CartPole-v1
     ```
 
diff --git a/docs/rl-algorithms/ppo-isaacgymenvs.md b/docs/rl-algorithms/ppo-isaacgymenvs.md
index 7508b591e..38ff65d7f 100644
--- a/docs/rl-algorithms/ppo-isaacgymenvs.md
+++ b/docs/rl-algorithms/ppo-isaacgymenvs.md
@@ -23,7 +23,11 @@
 
 ## `ppo_continuous_action_isaacgym.py`
 
-:octicons-beaker-24: Experimental
+
+???+ warning
+
+    `ppo_continuous_action_isaacgym.py` is temporarily deprecated. Please checkout the code in [https://github.com/vwxyzjn/cleanrl/releases/tag/v1.0.0](https://github.com/vwxyzjn/cleanrl/releases/tag/v1.0.0)
+
 
 The [ppo_continuous_action_isaacgym.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py) has the following features:
 
@@ -250,6 +254,6 @@ Old Learning curves w/ Isaac Gym Preview 3 (no longer available in Nvidia's webs
 
 ???+ info
 
-    Note the `AllegroHand` and `ShadowHand` experiments used the following command `ppo_continuous_action_isaacgym.py --track --capture-video --num-envs 16384 --num-steps 8 --update-epochs 5 --reward-scaler 0.01 --total-timesteps 600000000 --record-video-step-frequency 3660`. Costa: I was able to run this during my internship at NVIDIA, but in my home setup, the computer has less GPU memory which makes it hard to replicate the results w/ `--num-envs 16384`.
+    Note the `AllegroHand` and `ShadowHand` experiments used the following command `ppo_continuous_action_isaacgym.py --track --capture_video --num-envs 16384 --num-steps 8 --update-epochs 5 --reward-scaler 0.01 --total-timesteps 600000000 --record-video-step-frequency 3660`. Costa: I was able to run this during my internship at NVIDIA, but in my home setup, the computer has less GPU memory which makes it hard to replicate the results w/ `--num-envs 16384`.
 
 
diff --git a/docs/rl-algorithms/ppo.md b/docs/rl-algorithms/ppo.md
index 4f588bf03..e83b38e63 100644
--- a/docs/rl-algorithms/ppo.md
+++ b/docs/rl-algorithms/ppo.md
@@ -100,27 +100,28 @@ Running `python cleanrl/ppo.py` will automatically record various metrics such a
 
 To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fppo.sh%23L4-L9&style=github&showBorder=on&showLineNumbers=on&showFileMeta=on&showCopy=on"></script>
-
+``` title="benchmark/ppo.sh" linenums="1"
+--8<-- "benchmark/ppo.sh:3:8"
+```
 
 Below are the average episodic returns for `ppo.py`. To ensure the quality of the implementation, we compared the results against `openai/baselies`' PPO.
 
 | Environment      | `ppo.py` | `openai/baselies`' PPO (Huang et al., 2022)[^1]
 | ----------- | ----------- | ----------- |
-| CartPole-v1      | 492.40 ± 13.05     |497.54 ± 4.02  |
-| Acrobot-v1   | -89.93 ± 6.34     |  -81.82 ± 5.58 |
+| CartPole-v1      | 490.04 ± 6.12     |497.54 ± 4.02  |
+| Acrobot-v1       | -86.36 ± 1.32     |  -81.82 ± 5.58 |
 | MountainCar-v0   | -200.00 ± 0.00         | -200.00 ± 0.00 |
 
 
 Learning curves:
 
-<div class="grid-container">
-<img src="../ppo/CartPole-v1.png">
+``` title="benchmark/ppo_plot.sh" linenums="1"
+--8<-- "benchmark/ppo_plot.sh::9"
+```
 
-<img src="../ppo/Acrobot-v1.png">
 
-<img src="../ppo/MountainCar-v0.png">
-</div>
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo-time.png">
 
 
 Tracked experiments and game play videos:
@@ -186,27 +187,28 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p
 
 To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fppo.sh%23L10-L16&style=github&showBorder=on&showLineNumbers=on&showFileMeta=on&showCopy=on"></script>
+``` title="benchmark/ppo.sh" linenums="1"
+--8<-- "benchmark/ppo.sh:14:19"
+```
 
 
 Below are the average episodic returns for `ppo_atari.py`. To ensure the quality of the implementation, we compared the results against `openai/baselies`' PPO.
 
 | Environment      | `ppo_atari.py` | `openai/baselies`' PPO (Huang et al., 2022)[^1]
 | ----------- | ----------- | ----------- |
-| BreakoutNoFrameskip-v4      | 416.31 ± 43.92     | 406.57 ± 31.554  |
-| PongNoFrameskip-v4   | 20.59 ± 0.35    |  20.512 ± 0.50 |
-| BeamRiderNoFrameskip-v4   | 2445.38 ± 528.91         | 2642.97 ± 670.37 |
+| BreakoutNoFrameskip-v4      | 414.66 ± 28.09     | 406.57 ± 31.554  |
+| PongNoFrameskip-v4   | 20.36 ± 0.20    |  20.512 ± 0.50 |
+| BeamRiderNoFrameskip-v4   | 1915.93 ± 484.58         | 2642.97 ± 670.37 |
 
 
 Learning curves:
 
-<div class="grid-container">
-<img src="../ppo/BreakoutNoFrameskip-v4.png">
+``` title="benchmark/ppo_plot.sh" linenums="1"
+--8<-- "benchmark/ppo_plot.sh:11:19"
+```
 
-<img src="../ppo/PongNoFrameskip-v4.png">
-
-<img src="../ppo/BeamRiderNoFrameskip-v4.png">
-</div>
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari-time.png">
 
 
 Tracked experiments and game play videos:
@@ -248,9 +250,6 @@ The [ppo_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/cl
     # dm_control environments
     poetry install -E "mujoco dm_control"
     python cleanrl/ppo_continuous_action.py --env-id dm_control/cartpole-balance-v0
-    # backwards compatibility with mujoco v2 environments
-    poetry install -E mujoco_py  # only works in Linux
-    python cleanrl/ppo_continuous_action.py --env-id Hopper-v2
     ```
 
 === "pip"
@@ -261,8 +260,6 @@ The [ppo_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/cl
     python cleanrl/ppo_continuous_action.py --env-id Hopper-v4
     pip install -r requirements/requirements-dm_control.txt
     python cleanrl/ppo_continuous_action.py --env-id dm_control/cartpole-balance-v0
-    pip install -r requirements/requirements-mujoco_py.txt
-    python cleanrl/ppo_continuous_action.py --env-id Hopper-v2
     ```
 
 ???+ warning "dm_control installation issue"
@@ -301,122 +298,97 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p
 
 To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fppo.sh%23L31-L38&style=github&showBorder=on&showLineNumbers=on&showFileMeta=on&showCopy=on"></script>
-
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fppo.sh%23L93-L106&style=github&type=code&showBorder=on&showLineNumbers=on&showFileMeta=on&showFullPath=on&showCopy=on"></script>
-
-
-???+ note "Result tables, learning curves, and interactive reports"
-
-    === "MuJoCo v2"
-
-        Below are the average episodic returns for `ppo_continuous_action.py`. To ensure the quality of the implementation, we compared the results against `openai/baselies`' PPO.
-
-        |                     | ppo_continuous_action ({'tag': ['v1.0.0-27-gde3f410']})   | `openai/baselies`' PPO (results taken from [here](https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/MuJoCo-openai-baselines--VmlldzoyMTgyNjM0))   |
-        |:--------------------|:----------------------------------------------------------|:---------------------------------------------------------------------------------------------|
-        | HalfCheetah-v2      | 2262.50 ± 1196.81                                         | 1428.55 ± 62.40                                                                              |
-        | Walker2d-v2         | 3312.32 ± 429.87                                          | 3356.49 ± 322.61                                                                             |
-        | Hopper-v2           | 2311.49 ± 440.99                                          | 2158.65 ± 302.33                                                                             |
-        | InvertedPendulum-v2 | 852.04 ± 17.04                                            | 901.25 ± 35.73                                                                               |
-        | Humanoid-v2         | 676.34 ± 78.68                                            | 673.11 ± 53.02                                                                               |
-        | Pusher-v2           | -60.49 ± 4.37                                             | -56.83 ± 13.33                                                                               |
-
-        Learning curves:
-
-        ![](../ppo/ppo_continuous_action_gymnasium_mujoco_v2.png)
-
 
-        Tracked experiments and game play videos:
+MuJoCo v4
 
-        <iframe loading="lazy" src="https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/MuJoCo-CleanRL-s-PPO--VmlldzoxODAwNjkw" style="width:100%; height:500px" title="MuJoCo-CleanRL-s-PPO"></iframe>
+``` title="benchmark/ppo.sh" linenums="1"
+--8<-- "benchmark/ppo.sh:25:30"
+```
 
-    === "MuJoCo v4"
+{!benchmark/ppo_continuous_action.md!}
 
-        Below are the average episodic returns for `ppo_continuous_action.py` in MuJoCo v4 environments and `dm_control` environments.
+Learning curves:
 
-        |                     | ppo_continuous_action ({'tag': ['v1.0.0-12-g99f7789']})   |
-        |:--------------------|:----------------------------------------------------------|
-        | HalfCheetah-v4      | 2905.85 ± 1129.37                                         |
-        | Walker2d-v4         | 2890.97 ± 231.40                                          |
-        | Hopper-v4           | 2051.80 ± 313.94                                          |
-        | InvertedPendulum-v4 | 950.98 ± 36.39                                            |
-        | Humanoid-v4         | 742.19 ± 155.77                                           |
-        | Pusher-v4           | -55.60 ± 3.98                                             |
+``` title="benchmark/ppo_plot.sh" linenums="1"
+--8<-- "benchmark/ppo_plot.sh:11:19"
+```
 
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_continuous_action.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_continuous_action-time.png">
 
-        Learning curves:
+Tracked experiments and game play videos:
 
-        ![](../ppo/ppo_continuous_action_gymnasium_mujoco_v4.png)
+<iframe loading="lazy" src="https://wandb.ai/costa-huang/cleanRL/reports/MuJoCo-v4-CleanRL-s-PPO--VmlldzozMTIxOTI5" style="width:100%; height:500px" title="MuJoCo-CleanRL-s-PPO"></iframe>
+
+
+
+``` title="benchmark/ppo.sh" linenums="1"
+--8<-- "benchmark/ppo.sh:36:41"
+```
+
+Below are the average episodic returns for `ppo_continuous_action.py` in `dm_control` environments.
+
+|                                       | ppo_continuous_action ({'tag': ['v1.0.0-13-gcbd83f6']})   |
+|:--------------------------------------|:----------------------------------------------------------|
+| dm_control/acrobot-swingup-v0         | 27.84 ± 9.25                                              |
+| dm_control/acrobot-swingup_sparse-v0  | 1.60 ± 1.17                                               |
+| dm_control/ball_in_cup-catch-v0       | 900.78 ± 5.26                                             |
+| dm_control/cartpole-balance-v0        | 855.47 ± 22.06                                            |
+| dm_control/cartpole-balance_sparse-v0 | 999.93 ± 0.10                                             |
+| dm_control/cartpole-swingup-v0        | 640.86 ± 11.44                                            |
+| dm_control/cartpole-swingup_sparse-v0 | 51.34 ± 58.35                                             |
+| dm_control/cartpole-two_poles-v0      | 203.86 ± 11.84                                            |
+| dm_control/cartpole-three_poles-v0    | 164.59 ± 3.23                                             |
+| dm_control/cheetah-run-v0             | 432.56 ± 82.54                                            |
+| dm_control/dog-stand-v0               | 307.79 ± 46.26                                            |
+| dm_control/dog-walk-v0                | 120.05 ± 8.80                                             |
+| dm_control/dog-trot-v0                | 76.56 ± 6.44                                              |
+| dm_control/dog-run-v0                 | 60.25 ± 1.33                                              |
+| dm_control/dog-fetch-v0               | 34.26 ± 2.24                                              |
+| dm_control/finger-spin-v0             | 590.49 ± 171.09                                           |
+| dm_control/finger-turn_easy-v0        | 180.42 ± 44.91                                            |
+| dm_control/finger-turn_hard-v0        | 61.40 ± 9.59                                              |
+| dm_control/fish-upright-v0            | 516.21 ± 59.52                                            |
+| dm_control/fish-swim-v0               | 87.91 ± 6.83                                              |
+| dm_control/hopper-stand-v0            | 2.72 ± 1.72                                               |
+| dm_control/hopper-hop-v0              | 0.52 ± 0.48                                               |
+| dm_control/humanoid-stand-v0          | 6.59 ± 0.18                                               |
+| dm_control/humanoid-walk-v0           | 1.73 ± 0.03                                               |
+| dm_control/humanoid-run-v0            | 1.11 ± 0.04                                               |
+| dm_control/humanoid-run_pure_state-v0 | 0.98 ± 0.03                                               |
+| dm_control/humanoid_CMU-stand-v0      | 4.79 ± 0.18                                               |
+| dm_control/humanoid_CMU-run-v0        | 0.88 ± 0.05                                               |
+| dm_control/manipulator-bring_ball-v0  | 0.50 ± 0.29                                               |
+| dm_control/manipulator-bring_peg-v0   | 1.80 ± 1.58                                               |
+| dm_control/manipulator-insert_ball-v0 | 35.50 ± 13.04                                             |
+| dm_control/manipulator-insert_peg-v0  | 60.40 ± 21.76                                             |
+| dm_control/pendulum-swingup-v0        | 242.81 ± 245.95                                           |
+| dm_control/point_mass-easy-v0         | 273.95 ± 362.28                                           |
+| dm_control/point_mass-hard-v0         | 143.25 ± 38.12                                            |
+| dm_control/quadruped-walk-v0          | 239.03 ± 66.17                                            |
+| dm_control/quadruped-run-v0           | 180.44 ± 32.91                                            |
+| dm_control/quadruped-escape-v0        | 28.92 ± 11.21                                             |
+| dm_control/quadruped-fetch-v0         | 193.97 ± 22.20                                            |
+| dm_control/reacher-easy-v0            | 626.28 ± 15.51                                            |
+| dm_control/reacher-hard-v0            | 443.80 ± 9.64                                             |
+| dm_control/stacker-stack_2-v0         | 75.68 ± 4.83                                              |
+| dm_control/stacker-stack_4-v0         | 68.02 ± 4.02                                              |
+| dm_control/swimmer-swimmer6-v0        | 158.19 ± 10.22                                            |
+| dm_control/swimmer-swimmer15-v0       | 131.94 ± 0.88                                             |
+| dm_control/walker-stand-v0            | 564.46 ± 235.22                                           |
+| dm_control/walker-walk-v0             | 392.51 ± 56.25                                            |
+| dm_control/walker-run-v0              | 125.92 ± 10.01                                            |
+
+Note that the dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 environments are never terminated or truncated. See https://wandb.ai/openrlbenchmark/cleanrl/runs/3tm00923 and https://wandb.ai/openrlbenchmark/cleanrl/runs/1z9us07j as an example.
 
-        Tracked experiments and game play videos:
-        
-        <iframe loading="lazy" src="https://wandb.ai/costa-huang/cleanRL/reports/MuJoCo-v4-CleanRL-s-PPO--VmlldzozMTIxOTI5" style="width:100%; height:500px" title="MuJoCo-CleanRL-s-PPO"></iframe>
+Learning curves:
 
-    === "dm_control"
+![](../ppo/ppo_continuous_action_gymnasium_dm_control.png)
 
-        Below are the average episodic returns for `ppo_continuous_action.py` in `dm_control` environments.
+Tracked experiments and game play videos:
 
-        |                                       | ppo_continuous_action ({'tag': ['v1.0.0-13-gcbd83f6']})   |
-        |:--------------------------------------|:----------------------------------------------------------|
-        | dm_control/acrobot-swingup-v0         | 27.84 ± 9.25                                              |
-        | dm_control/acrobot-swingup_sparse-v0  | 1.60 ± 1.17                                               |
-        | dm_control/ball_in_cup-catch-v0       | 900.78 ± 5.26                                             |
-        | dm_control/cartpole-balance-v0        | 855.47 ± 22.06                                            |
-        | dm_control/cartpole-balance_sparse-v0 | 999.93 ± 0.10                                             |
-        | dm_control/cartpole-swingup-v0        | 640.86 ± 11.44                                            |
-        | dm_control/cartpole-swingup_sparse-v0 | 51.34 ± 58.35                                             |
-        | dm_control/cartpole-two_poles-v0      | 203.86 ± 11.84                                            |
-        | dm_control/cartpole-three_poles-v0    | 164.59 ± 3.23                                             |
-        | dm_control/cheetah-run-v0             | 432.56 ± 82.54                                            |
-        | dm_control/dog-stand-v0               | 307.79 ± 46.26                                            |
-        | dm_control/dog-walk-v0                | 120.05 ± 8.80                                             |
-        | dm_control/dog-trot-v0                | 76.56 ± 6.44                                              |
-        | dm_control/dog-run-v0                 | 60.25 ± 1.33                                              |
-        | dm_control/dog-fetch-v0               | 34.26 ± 2.24                                              |
-        | dm_control/finger-spin-v0             | 590.49 ± 171.09                                           |
-        | dm_control/finger-turn_easy-v0        | 180.42 ± 44.91                                            |
-        | dm_control/finger-turn_hard-v0        | 61.40 ± 9.59                                              |
-        | dm_control/fish-upright-v0            | 516.21 ± 59.52                                            |
-        | dm_control/fish-swim-v0               | 87.91 ± 6.83                                              |
-        | dm_control/hopper-stand-v0            | 2.72 ± 1.72                                               |
-        | dm_control/hopper-hop-v0              | 0.52 ± 0.48                                               |
-        | dm_control/humanoid-stand-v0          | 6.59 ± 0.18                                               |
-        | dm_control/humanoid-walk-v0           | 1.73 ± 0.03                                               |
-        | dm_control/humanoid-run-v0            | 1.11 ± 0.04                                               |
-        | dm_control/humanoid-run_pure_state-v0 | 0.98 ± 0.03                                               |
-        | dm_control/humanoid_CMU-stand-v0      | 4.79 ± 0.18                                               |
-        | dm_control/humanoid_CMU-run-v0        | 0.88 ± 0.05                                               |
-        | dm_control/manipulator-bring_ball-v0  | 0.50 ± 0.29                                               |
-        | dm_control/manipulator-bring_peg-v0   | 1.80 ± 1.58                                               |
-        | dm_control/manipulator-insert_ball-v0 | 35.50 ± 13.04                                             |
-        | dm_control/manipulator-insert_peg-v0  | 60.40 ± 21.76                                             |
-        | dm_control/pendulum-swingup-v0        | 242.81 ± 245.95                                           |
-        | dm_control/point_mass-easy-v0         | 273.95 ± 362.28                                           |
-        | dm_control/point_mass-hard-v0         | 143.25 ± 38.12                                            |
-        | dm_control/quadruped-walk-v0          | 239.03 ± 66.17                                            |
-        | dm_control/quadruped-run-v0           | 180.44 ± 32.91                                            |
-        | dm_control/quadruped-escape-v0        | 28.92 ± 11.21                                             |
-        | dm_control/quadruped-fetch-v0         | 193.97 ± 22.20                                            |
-        | dm_control/reacher-easy-v0            | 626.28 ± 15.51                                            |
-        | dm_control/reacher-hard-v0            | 443.80 ± 9.64                                             |
-        | dm_control/stacker-stack_2-v0         | 75.68 ± 4.83                                              |
-        | dm_control/stacker-stack_4-v0         | 68.02 ± 4.02                                              |
-        | dm_control/swimmer-swimmer6-v0        | 158.19 ± 10.22                                            |
-        | dm_control/swimmer-swimmer15-v0       | 131.94 ± 0.88                                             |
-        | dm_control/walker-stand-v0            | 564.46 ± 235.22                                           |
-        | dm_control/walker-walk-v0             | 392.51 ± 56.25                                            |
-        | dm_control/walker-run-v0              | 125.92 ± 10.01                                            |
-
-        Note that the dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 environments are never terminated or truncated. See https://wandb.ai/openrlbenchmark/cleanrl/runs/3tm00923 and https://wandb.ai/openrlbenchmark/cleanrl/runs/1z9us07j as an example.
-
-        Learning curves:
-
-        ![](../ppo/ppo_continuous_action_gymnasium_dm_control.png)
-
-        Tracked experiments and game play videos:
-
-        <iframe loading="lazy" src="https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/dm_control-CleanRL-s-PPO-part-1---VmlldzozMTI2MjE2" style="width:100%; height:500px" title="dm_control-CleanRL-s-PPO-part-1"></iframe>
-        <iframe loading="lazy" src="https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/dm_control-CleanRL-s-PPO-part-2---VmlldzozMTI2MjI1" style="width:100%; height:500px" title="dm_control-CleanRL-s-PPO-part-2"></iframe>
+<iframe loading="lazy" src="https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/dm_control-CleanRL-s-PPO-part-1---VmlldzozMTI2MjE2" style="width:100%; height:500px" title="dm_control-CleanRL-s-PPO-part-1"></iframe>
+<iframe loading="lazy" src="https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/dm_control-CleanRL-s-PPO-part-2---VmlldzozMTI2MjI1" style="width:100%; height:500px" title="dm_control-CleanRL-s-PPO-part-2"></iframe>
 
 
 ???+ info
@@ -484,8 +456,9 @@ To help test out the memory, we remove the 4 stacked frames from the observation
 
 To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fppo.sh%23L17-L23&style=github&showBorder=on&showLineNumbers=on&showFileMeta=on&showCopy=on"></script>
-
+``` title="benchmark/ppo.sh" linenums="1"
+--8<-- "benchmark/ppo.sh:47:52"
+```
 
 Below are the average episodic returns for `ppo_atari_lstm.py`. To ensure the quality of the implementation, we compared the results against `openai/baselies`' PPO.
 
@@ -499,14 +472,12 @@ Below are the average episodic returns for `ppo_atari_lstm.py`. To ensure the qu
 
 Learning curves:
 
-<div class="grid-container">
-<img src="../ppo/lstm/BreakoutNoFrameskip-v4.png">
-
-<img src="../ppo/lstm/PongNoFrameskip-v4.png">
-
-<img src="../ppo/lstm/BeamRiderNoFrameskip-v4.png">
-</div>
+``` title="benchmark/ppo_plot.sh" linenums="1"
+--8<-- "benchmark/ppo_plot.sh:11:19"
+```
 
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari_lstm.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari_lstm-time.png">
 
 Tracked experiments and game play videos:
 
@@ -568,34 +539,22 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p
 
 To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fppo.sh%23L24-L30&style=github&showBorder=on&showLineNumbers=on&showFileMeta=on&showCopy=on"></script>
-
-
-Below are the average episodic returns for `ppo_atari_envpool.py`. Notice it has the same sample efficiency as `ppo_atari.py`, but runs about 3x faster.
-
-
-
-| Environment      | `ppo_atari_envpool.py` (~80 mins) | `ppo_atari.py` (~220 mins)
-| ----------- | ----------- | ----------- |
-| BreakoutNoFrameskip-v4 |   389.57 ± 29.62    | 416.31 ± 43.92 
-| PongNoFrameskip-v4 | 20.55 ± 0.37   | 20.59 ± 0.35   
-| BeamRiderNoFrameskip-v4 |   2039.83 ± 1146.62 | 2445.38 ± 528.91  
 
+``` title="benchmark/ppo.sh" linenums="1"
+--8<-- "benchmark/ppo.sh:58:63"
+```
 
+{!benchmark/ppo_atari_envpool.md!}
 
 
 Learning curves:
 
-<div class="grid-container">
-<img src="../ppo/Breakout.png">
-<img src="../ppo/Breakout-time.png">
+``` title="benchmark/ppo_plot.sh" linenums="1"
+--8<-- "benchmark/ppo_plot.sh:51:62"
+```
 
-<img src="../ppo/Pong.png">
-<img src="../ppo/Pong-time.png">
-
-<img src="../ppo/BeamRider.png">
-<img src="../ppo/BeamRider-time.png">
-</div>
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari_envpool.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari_envpool-time.png">
 
 
 Tracked experiments and game play videos:
@@ -637,7 +596,7 @@ The [ppo_atari_envpool_xla_jax.py](https://github.com/vwxyzjn/cleanrl/blob/maste
 
     ```bash
     poetry install -E "envpool jax"
-    poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+    poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
     poetry run python cleanrl/ppo_atari_envpool_xla_jax.py --help
     poetry run python cleanrl/ppo_atari_envpool_xla_jax.py --env-id Breakout-v5
     ```
@@ -684,96 +643,25 @@ Additionally, we record the following metric:
 
 To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fppo.sh%23L76-L91&style=github&type=code&showBorder=on&showLineNumbers=on&showFileMeta=on&showFullPath=on&showCopy=on"></script>
 
+``` title="benchmark/ppo.sh" linenums="1"
+--8<-- "benchmark/ppo.sh:69:74"
+```
 
-Below are the average episodic returns for `ppo_atari_envpool_xla_jax.py`. Notice it has the same sample efficiency as `ppo_atari.py`, but runs about 3x faster.
 
-???+ info
+{!benchmark/ppo_atari_envpool_xla_jax.md!}
 
-    The following table and charts are generated by [atari_hns_new.py](https://github.com/openrlbenchmark/openrlbenchmark/blob/0c16fda7d7873143a632865010c74263ea487339/atari_hns_new.py),  [ours_vs_baselines_hns.py](https://github.com/openrlbenchmark/openrlbenchmark/blob/0c16fda7d7873143a632865010c74263ea487339/ours_vs_baselines_hns.py), and [ours_vs_seedrl_hns.py](https://github.com/openrlbenchmark/openrlbenchmark/blob/0c16fda7d7873143a632865010c74263ea487339/ours_vs_seedrl_hns.py).
 
+Learning curves:
+
+``` title="benchmark/ppo_plot.sh" linenums="1"
+--8<-- "benchmark/ppo_plot.sh:64:85"
+```
+
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari_envpool_xla_jax_sample_walltime_efficiency.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari_envpool_xla_jax.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari_envpool_xla_jax-time.png">
 
-<!-- | Environment      | `ppo_atari_envpool_xla_jax.py` (~80 mins) | `ppo_atari.py` (~220 mins)
-| ----------- | ----------- | ----------- |
-| BreakoutNoFrameskip-v4 |   389.57 ± 29.62    | 416.31 ± 43.92 
-| PongNoFrameskip-v4 | 20.55 ± 0.37   | 20.59 ± 0.35   
-| BeamRiderNoFrameskip-v4 |   2039.83 ± 1146.62 | 2445.38 ± 528.91  
- -->
-| Environment         |   CleanRL ppo_atari_envpool_xla_jax.py |   openai/baselines' PPO |
-|:--------------------|---------------------------------------:|------------------------:|
-| Alien-v5            |                         1744.76        |          1549.42        |
-| Amidar-v5           |                          617.137       |           546.406       |
-| Assault-v5          |                         5734.04        |          4050.78        |
-| Asterix-v5          |                         3341.9         |          3459.9         |
-| Asteroids-v5        |                         1669.3         |          1467.19        |
-| Atlantis-v5         |                            3.92929e+06 |             3.09748e+06 |
-| BankHeist-v5        |                         1192.68        |          1195.34        |
-| BattleZone-v5       |                        24937.9         |         20314.3         |
-| BeamRider-v5        |                         2447.84        |          2740.02        |
-| Berzerk-v5          |                         1082.72        |           887.019       |
-| Bowling-v5          |                           44.0681      |            62.2634      |
-| Boxing-v5           |                           92.0554      |            93.3596      |
-| Breakout-v5         |                          431.795       |           388.891       |
-| Centipede-v5        |                         2910.69        |          3688.16        |
-| ChopperCommand-v5   |                         5555.84        |           933.333       |
-| CrazyClimber-v5     |                       116114           |        111675           |
-| Defender-v5         |                        51439.2         |         50045.1         |
-| DemonAttack-v5      |                        22824.8         |         12173.9         |
-| DoubleDunk-v5       |                           -8.56781     |            -9           |
-| Enduro-v5           |                         1262.79        |          1061.12        |
-| FishingDerby-v5     |                           21.6222      |            23.8876      |
-| Freeway-v5          |                           33.1075      |            32.9167      |
-| Frostbite-v5        |                          904.346       |           924.5         |
-| Gopher-v5           |                        11369.6         |          2899.57        |
-| Gravitar-v5         |                         1141.95        |           870.755       |
-| Hero-v5             |                        24628.3         |         25984.5         |
-| IceHockey-v5        |                           -4.91917     |            -4.71505     |
-| Jamesbond-v5        |                          504.105       |           516.489       |
-| Kangaroo-v5         |                         7281.59        |          3791.5         |
-| Krull-v5            |                         9384.7         |          8672.95        |
-| KungFuMaster-v5     |                        26594.5         |         29116.1         |
-| MontezumaRevenge-v5 |                            0.240385    |             0           |
-| MsPacman-v5         |                         2461.62        |          2113.44        |
-| NameThisGame-v5     |                         5442.67        |          5713.89        |
-| Phoenix-v5          |                        14008.5         |          8693.21        |
-| Pitfall-v5          |                           -0.0801282   |            -1.47059     |
-| Pong-v5             |                           20.309       |            20.4043      |
-| PrivateEye-v5       |                           99.5283      |            21.2121      |
-| Qbert-v5            |                        16430.7         |         14283.4         |
-| Riverraid-v5        |                         8297.21        |          9267.48        |
-| RoadRunner-v5       |                        19342.2         |         40325           |
-| Robotank-v5         |                           15.45        |            16           |
-| Seaquest-v5         |                         1230.02        |          1754.44        |
-| Skiing-v5           |                       -14684.3         |        -13901.7         |
-| Solaris-v5          |                         2353.62        |          2088.12        |
-| SpaceInvaders-v5    |                         1162.16        |          1017.65        |
-| StarGunner-v5       |                        53535.9         |         40906           |
-| Surround-v5         |                           -2.94558     |            -6.08095     |
-| Tennis-v5           |                          -15.0446      |            -9.71429     |
-| TimePilot-v5        |                         6224.87        |          5775.53        |
-| Tutankham-v5        |                          238.419       |           197.929       |
-| UpNDown-v5          |                       430177           |        129459           |
-| Venture-v5          |                            0           |           115.278       |
-| VideoPinball-v5     |                        42975.3         |         32777.4         |
-| WizardOfWor-v5      |                         6247.83        |          5024.03        |
-| YarsRevenge-v5      |                        56696.7         |          8238.44        |
-| Zaxxon-v5           |                         6015.8         |          6379.79        |
-
-
-
-Median Human Normalized Score (HNS) compared to openai/baselines.
-
-![](../ppo/ppo_atari_envpool_xla_jax/hns_ppo_vs_baselines.svg)
-
-
-Learning curves (left y-axis is the return and right y-axis is the human normalized score):
-
-![](../ppo/ppo_atari_envpool_xla_jax/hms_each_game.svg)
-
-
-Percentage of human normalized score (HMS) for each game.
-![](../ppo/ppo_atari_envpool_xla_jax/runset_0_hms_bar.svg)
 
 
 ???+ info
@@ -812,7 +700,7 @@ The [ppo_atari_envpool_xla_jax_scan.py](https://github.com/vwxyzjn/cleanrl/blob/
 
     ```bash
     poetry install -E "envpool jax"
-    poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+    poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
     poetry run python cleanrl/ppo_atari_envpool_xla_jax_scan.py --help
     poetry run python cleanrl/ppo_atari_envpool_xla_jax_scan.py --env-id Breakout-v5
     ```
@@ -839,15 +727,23 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p
 
 To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fppo.sh%23L108-L113&style=github&type=code&showBorder=on&showLineNumbers=on&showFileMeta=on&showFullPath=on&showCopy=on"></script>
 
-Below are the average episodic returns for `ppo_atari_envpool_xla_jax_scan.py` in 3 atari games. It has the same sample efficiency as `ppo_atari_envpool_xla_jax.py`.
+``` title="benchmark/ppo.sh" linenums="1"
+--8<-- "benchmark/ppo.sh:80:85"
+```
+
+
+{!benchmark/ppo_atari_envpool_xla_jax_scan.md!}
 
-|              | ppo_atari_envpool_xla_jax_scan ({'tag': ['pr-328'], 'user': ['51616']})   | ppo_atari_envpool_xla_jax ({'tag': ['pr-328'], 'user': ['51616']})   | baselines-ppo2-cnn ({})   | ppo_atari_envpool_xla_jax_truncation ({'user': ['costa-huang']})   |
-|:-------------|:--------------------------------------------------------------------------|:---------------------------------------------------------------------|:--------------------------|:-------------------------------------------------------------------|
-| BeamRider-v5 | 2899.62 ± 482.12                                                          | 2222.09 ± 1047.86                                                    | 2835.71 ± 387.92          | 3133.78 ± 293.02                                                   |
-| Breakout-v5  | 451.27 ± 45.52                                                            | 424.97 ± 18.37                                                       | 405.73 ± 11.47            | 465.90 ± 14.30                                                     |
-| Pong-v5      | 20.37 ± 0.20                                                              | 20.59 ± 0.40                                                         | 20.45 ± 0.81              | 20.62 ± 0.18                                                       |
+
+Learning curves:
+
+``` title="benchmark/ppo_plot.sh" linenums="1"
+--8<-- "benchmark/ppo_plot.sh:87:96"
+```
+
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari_envpool_xla_jax_scan.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari_envpool_xla_jax_scan-time.png">
 
 Learning curves:
 
@@ -855,15 +751,15 @@ Learning curves:
 
     The trainig time of this variant and that of [ppo_atari_envpool_xla_jax.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_atari_envpool_xla_jax.py) are very similar but the compilation time is reduced significantly (see [vwxyzjn/cleanrl#328](https://github.com/vwxyzjn/cleanrl/pull/328#issuecomment-1340474894)). Note that the hardware also affects the speed in the learning curve below. Runs from [`costa-huang`](https://github.com/vwxyzjn/) (red) are slower from those of [`51616`](https://github.com/51616/) (blue and orange) because of hardware differences.
 
-![](../ppo/ppo_atari_envpool_xla_jax_scan/compare.png)
-![](../ppo/ppo_atari_envpool_xla_jax_scan/compare-time.png)
+    ![](../ppo/ppo_atari_envpool_xla_jax_scan/compare.png)
+    ![](../ppo/ppo_atari_envpool_xla_jax_scan/compare-time.png)
+
 
 Tracked experiments:
 
 <iframe src="https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/Regression-Report-ppo_atari_envpool_xla_jax_scan--VmlldzozMTk2MzM2" style="width:100%; height:500px" title="Atari-CleanRL-s-PPO-Envpool-Jax-scan"></iframe>
 
 
-
 ## `ppo_procgen.py`
 
 The [ppo_procgen.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_procgen.py) has the following features:
@@ -908,8 +804,10 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p
 
 To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fppo.sh%23L39-L45&style=github&showBorder=on&showLineNumbers=on&showFileMeta=on&showCopy=on"></script>
 
+``` title="benchmark/ppo.sh" linenums="1"
+--8<-- "benchmark/ppo.sh:91:100"
+```
 
 We try to match the default setting in [openai/train-procgen](https://github.com/openai/train-procgen) except that we use the `easy` distribution mode and `total_timesteps=25e6` to save compute. Notice [openai/train-procgen](https://github.com/openai/train-procgen) has the following settings:
 
@@ -921,24 +819,25 @@ Below are the average episodic returns for `ppo_procgen.py`. To ensure the quali
 
 | Environment      | `ppo_procgen.py` | `openai/baselies`' PPO (Huang et al., 2022)[^1]
 | ----------- | ----------- | ----------- |
-| StarPilot (easy)      | 32.47 ± 11.21      | 33.97 ± 7.86  |
-| BossFight (easy)   | 9.63 ± 2.35    |  9.35 ± 2.04 |
-| BigFish  (easy)  | 16.80 ± 9.49         | 20.06 ± 5.34 |
-
+| StarPilot (easy)      | 30.99 ± 1.96      | 33.97 ± 7.86  |
+| BossFight (easy)   | 8.85 ± 0.33    |  9.35 ± 2.04 |
+| BigFish  (easy)  | 16.46 ± 2.71         | 20.06 ± 5.34 |
 
-???+ info
 
-    Note that we have run the procgen experiments using the `easy` distribution for reducing the computational cost.
 
 Learning curves:
 
-<div class="grid-container">
-<img src="../ppo/StarPilot.png">
+``` title="benchmark/ppo_plot.sh" linenums="1"
+--8<-- "benchmark/ppo_plot.sh:98:106"
+```
 
-<img src="../ppo/BossFight.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_procgen.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_procgen-time.png">
 
-<img src="../ppo/BigFish.png">
-</div>
+
+???+ info
+
+    Note that we have run the procgen experiments using the `easy` distribution for reducing the computational cost.
 
 
 Tracked experiments and game play videos:
@@ -1010,13 +909,13 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p
 
 [ppo_atari_multigpu.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_atari_multigpu.py) is based on `ppo_atari.py` (see its [related docs](/rl-algorithms/ppo/#implementation-details_1)).
 
-We use [Pytorch's distributed API](https://pytorch.org/tutorials/intermediate/dist_tuto.html) to implement the data parallelism paradigm. The basic idea is that the user can spawn $N$ processes each holding a copy of the model, step the environments, and averages their gradients together for the backward pass. Here are a few note-worthy implementation details.
+We use [Pytorch's distributed API](https://pytorch.org/tutorials/intermediate/dist_tuto.html) to implement the data parallelism paradigm. The basic idea is that the user can spawn $N$ processes each running a copy of `ppo_atari.py`,  holding a copy of the model, stepping the environments, and averaging their gradients together for the backward pass. Here are a few note-worthy implementation details.
 
-1. **Shard the environments**: by default, `ppo_atari_multigpu.py` uses `--num-envs=8`. When calling `torchrun --standalone --nnodes=1 --nproc_per_node=2 cleanrl/ppo_atari_multigpu.py --env-id BreakoutNoFrameskip-v4`, it spawns $N=2$ (by `--nproc_per_node=2`) subprocesses and shard the environments across these 2 subprocesses. In particular, each subprocess will have `8/2=4` environments. Implementation wise, we do `args.num_envs = int(args.num_envs / world_size)`. Here `world_size=2` refers to the size of the **world**, which means the group of subprocesses. We also need to adjust various variables as follows:
-    * **batch size**: by default it is `(num_envs * num_steps) = 8 * 128 = 1024` and we adjust it to `(num_envs / world_size * num_steps) = (4 * 128) = 512`. 
-    * **minibatch size**: by default it is `(num_envs * num_steps) / num_minibatches = (8 * 128) / 4 = 256` and we adjust it to `(num_envs / world_size * num_steps) / num_minibatches = (4 * 128) / 4 = 128`. 
-    * **number of updates**: by default it is `total_timesteps // batch_size = 10000000 // (8 * 128) = 9765` and we adjust it to   `total_timesteps // (batch_size * world_size) = 10000000 // (8 * 128 * 2) = 4882`.
-    * **global step increment**: by default it is `num_envs`  and we adjust it to `num_envs * world_size`.
+1. **Local versus global parameters**: All of the parameters in `ppo_atari.py` are global (such as batch size), but in `ppo_atari_multigpu.py` we have local parameters as well. Say we run `torchrun --standalone --nnodes=1 --nproc_per_node=2 cleanrl/ppo_atari_multigpu.py --env-id BreakoutNoFrameskip-v4 --local-num-envs=4`; here are how all multi-gpu related parameters are adjusted:
+    * **number of environments**: `num_envs = local_num_envs * world_size = 4 * 2 = 8`
+    * **batch size**: `local_batch_size = local_num_envs * num_steps = 4 * 128 = 512`, `batch_size = num_envs * num_steps) = 8 * 128 = 1024`
+    * **minibatch size**:  `local_minibatch_size = int(args.local_batch_size // args.num_minibatches) = 512 // 4 = 128`, `minibatch_size = int(args.batch_size // args.num_minibatches) = 1024 // 4 = 256`
+    * **number of updates**: `num_iterations = args.total_timesteps // args.batch_size = 10000000 // 1024 = 9765`
 1. **Adjust seed per process**: we need be very careful with seeding: we could have used the exact same seed for each subprocess. To ensure this does not happen, we do the following
 
     ```python hl_lines="2 5 16"
@@ -1070,100 +969,6 @@ We use [Pytorch's distributed API](https://pytorch.org/tutorials/intermediate/di
 
 
 
-We can see how `ppo_atari_multigpu.py` can result in no loss of sample efficiency. In this example, the `ppo_atari.py`'s minibatch size is `256` and the `ppo_atari_multigpu.py`'s minibatch size is `128` with world size 2. Because we average gradient across the world, the gradient under  `ppo_atari_multigpu.py` should be virtually the same as the gradient under `ppo_atari.py`.
-
-<!-- 
-
-<script src="https://unpkg.com/monaco-editor@latest/min/vs/loader.js"></script>
-
-
-<div style="padding-bottom: 20px;">
-	<div
-	id="ppo_shared"
-	style="width: 100%; height: 600px; border: 1px solid grey"
-	></div>
-</div>
-
-<script>
-  require.config({
-    paths: { vs: "https://unpkg.com/monaco-editor@latest/min/vs" },
-  });
-  window.MonacoEnvironment = { getWorkerUrl: () => proxy };
-
-  let proxy = URL.createObjectURL(
-    new Blob(
-      [
-        `
-	self.MonacoEnvironment = {
-		baseUrl: 'https://unpkg.com/monaco-editor@latest/min/'
-	};
-	importScripts('https://unpkg.com/monaco-editor@latest/min/vs/base/worker/workerMain.js');
-`,
-      ],
-      { type: "text/javascript" }
-    )
-  );
-
-  require(["vs/editor/editor.main"], function () {
-    var diffEditor = monaco.editor.createDiffEditor(
-      document.getElementById("ppo_shared")
-    );
-
-	
-    Promise.all([
-		xhr("https://raw.githubusercontent.com/vwxyzjn/cleanrl/master/cleanrl/ppo_atari.py"),
-		xhr("https://raw.githubusercontent.com/vwxyzjn/cleanrl/master/cleanrl/ppo_atari.py")
-	]).then(function (r) {
-      var originalTxt = r[0].responseText;
-      var modifiedTxt = r[1].responseText;
-
-      diffEditor.setModel({
-        original: monaco.editor.createModel(originalTxt, "python"),
-        modified: monaco.editor.createModel(modifiedTxt, "python"),
-        startLineNumber: 104,
-      });
-      diffEditor.revealPositionInCenter({ lineNumber: 115, column: 0 });
-    });
-  });
-</script>
-<script>
-  function xhr(url) {
-    var req = null;
-    return new Promise(
-      function (c, e) {
-        req = new XMLHttpRequest();
-        req.onreadystatechange = function () {
-          if (req._canceled) {
-            return;
-          }
-
-          if (req.readyState === 4) {
-            if (
-              (req.status >= 200 && req.status < 300) ||
-              req.status === 1223
-            ) {
-              c(req);
-            } else {
-              e(req);
-            }
-            req.onreadystatechange = function () {};
-          }
-        };
-
-        req.open("GET", url, true);
-        req.responseType = "";
-
-        req.send(null);
-      },
-      function () {
-        req._canceled = true;
-        req.abort();
-      }
-    );
-  }
-</script> -->
-
-
 
 ### Experiment results
 
@@ -1171,35 +976,37 @@ We can see how `ppo_atari_multigpu.py` can result in no loss of sample efficienc
 
 To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fppo.sh%23L46-L52&style=github&showBorder=on&showLineNumbers=on&showFileMeta=on&showCopy=on"></script>
 
+``` title="benchmark/ppo.sh" linenums="1"
+--8<-- "benchmark/ppo.sh:102:107"
+```
 
 Below are the average episodic returns for `ppo_atari_multigpu.py`. To ensure no loss of sample efficiency, we compared the results against `ppo_atari.py`.
 
-| Environment      | `ppo_atari_multigpu.py` (in ~160 mins) | `ppo_atari.py` (in ~215 mins)
-| ----------- | ----------- | ----------- |
-| BreakoutNoFrameskip-v4 | 429.06 ± 52.09      | 416.31 ± 43.92     | 
-| PongNoFrameskip-v4 | 20.40 ± 0.46  | 20.59 ± 0.35    |  
-| BeamRiderNoFrameskip-v4 | 2454.54 ± 740.49   | 2445.38 ± 528.91         | 
+
+{!benchmark/ppo_atari_multigpu.md!}
 
 
 Learning curves:
 
-<div class="grid-container">
-<img src="../ppo/BreakoutNoFrameskip-v4multigpu.png">
-<img src="../ppo/BreakoutNoFrameskip-v4multigpu-time.png">
+``` title="benchmark/ppo_plot.sh" linenums="1"
+--8<-- "benchmark/ppo_plot.sh:108:117"
+```
+
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari_multigpu.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/ppo_atari_multigpu-time.png">
 
-<img src="../ppo/PongNoFrameskip-v4multigpu.png">
-<img src="../ppo/PongNoFrameskip-v4multigpu-time.png">
 
-<img src="../ppo/BeamRiderNoFrameskip-v4multigpu.png">
-<img src="../ppo/BeamRiderNoFrameskip-v4multigpu-time.png">
-</div>
 
 
 Under the same hardware, we see that `ppo_atari_multigpu.py` is about **30% faster** than `ppo_atari.py` with no loss of sample efficiency. 
 
 
+???+ info
+
+    The experiments above is to show correctness -- we show that by aligning the same hyperparameters of `ppo_atari.py` and `ppo_atari_multigpu.py`, we can achieve the same sample efficiency. However, we can train even faster by simply running a much larger batch size. For example, we can run `torchrun --standalone --nnodes=1 --nproc_per_node=8 cleanrl/ppo_atari_multigpu.py --env-id BreakoutNoFrameskip-v4 --local-num-envs=8`, which will run 8 x 8 = 64 environments in parallel and achieve a batch size of 64 x 128 = 8192. This will likely result in a sample efficiency but should increase the wall time efficiency.
+
+
 ???+ info
 
     Although `ppo_atari_multigpu.py` is 30% faster than `ppo_atari.py`, `ppo_atari_multigpu.py` is still slower than `ppo_atari_envpool.py`, as shown below.  This comparison really highlights the different kinds of optimization possible.
diff --git a/docs/rl-algorithms/qdagger.md b/docs/rl-algorithms/qdagger.md
index 678c73818..bdb9f2690 100644
--- a/docs/rl-algorithms/qdagger.md
+++ b/docs/rl-algorithms/qdagger.md
@@ -143,7 +143,7 @@ The [qdagger_dqn_atari_jax_impalacnn.py](https://github.com/vwxyzjn/cleanrl/blob
 
     ```bash
     poetry install -E "atari jax"
-    poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+    poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
     poetry run python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --env-id BreakoutNoFrameskip-v4
     poetry run python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --env-id PongNoFrameskip-v4
     ```
diff --git a/docs/rl-algorithms/sac.md b/docs/rl-algorithms/sac.md
index 0bbc119dd..1594ac2c5 100644
--- a/docs/rl-algorithms/sac.md
+++ b/docs/rl-algorithms/sac.md
@@ -200,11 +200,13 @@ CleanRL's [`sac_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/m
 
 3. [`sac_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/sac_continuous_action.py) uses `--batch-size=256` while :material-github: [openai/spinningup](https://github.com/openai/spinningup/blob/038665d62d569055401d91856abb287263096178/spinup/algos/tf1/sac/sac.py#L44)'s uses `--batch-size=100` by default.
 
-### Pybullet experiment results for SAC
+### Experiment results
 
 To run benchmark experiments, see :material-github: [benchmark/sac.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/sac.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Fsac.sh%23L1-L7&style=github&showBorder=on&showLineNumbers=on&showFileMeta=on&showCopy=on"></script>
+``` title="benchmark/sac.sh" linenums="1"
+--8<-- "benchmark/sac.sh::7"
+```
 
 The table below compares the results of CleanRL's [`sac_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/sac_continuous_action.py) with the [latest published results](https://arxiv.org/abs/1812.05905) by the original authors of the SAC algorithm.
 
@@ -213,19 +215,22 @@ The table below compares the results of CleanRL's [`sac_continuous_action.py`](h
 
 | Environment      | [`sac_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/sac_continuous_action.py) |[SAC: Algorithms and Applications](https://arxiv.org/abs/1812.05905) @ 1M steps|
 | --------------- | ------------------ | ---------------- |
-| HalfCheetah-v2  | 10310.37 ± 1873.21       | ~11,250          |
-| Walker2d-v2     | 4418.15 ± 592.82         | ~4,800           |
-| Hopper-v2       | 2685.76 ± 762.16         | ~3,250           |
+| HalfCheetah-v2  | 9634.89 ± 1423.73       | ~11,250          |
+| Walker2d-v2     | 3591.45 ± 911.33         | ~4,800           |
+| Hopper-v2       | 2310.46 ± 342.82         | ~3,250           |
+| InvertedPendulum-v4 | 909.37 ± 55.66                                                        | N/A |
+| Humanoid-v4         | 4996.29 ± 686.40                                                      | ~4500
+| Pusher-v4           | -22.45 ± 0.51                                                         | N/A |
 
 Learning curves:
 
-<div class="grid-container">
-    <img src="../sac/HalfCheetah-v2.png">
-    <img src="../sac/Walker2d-v2.png">
-    <img src="../sac/Hopper-v2.png">
-</div>
+``` title="benchmark/sac_plot.sh" linenums="1"
+--8<-- "benchmark/sac_plot.sh::9"
+```
 
-<div></div>
+
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/sac.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/sac-time.png">
 
 Tracked experiments and game play videos:
 
diff --git a/docs/rl-algorithms/td3.md b/docs/rl-algorithms/td3.md
index e1d595f11..6bf4494f9 100644
--- a/docs/rl-algorithms/td3.md
+++ b/docs/rl-algorithms/td3.md
@@ -42,8 +42,6 @@ The [td3_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/cl
     poetry install -E mujoco
     poetry run python cleanrl/td3_continuous_action.py --help
     poetry run python cleanrl/td3_continuous_action.py --env-id Hopper-v4
-    poetry install -E mujoco_py # only works in Linux
-    poetry run python cleanrl/td3_continuous_action.py --env-id Hopper-v2
     ```
 
 === "pip"
@@ -52,8 +50,6 @@ The [td3_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/cl
     pip install -r requirements/requirements-mujoco.txt
     python cleanrl/td3_continuous_action.py --help
     python cleanrl/td3_continuous_action.py --env-id Hopper-v4
-    pip install -r requirements/requirements-mujoco_py.txt  # only works in Linux,
-    python cleanrl/td3_continuous_action.py --env-id Hopper-v2
     ```
 
 ### Explanation of the logged metrics
@@ -128,25 +124,27 @@ Additionally, when drawing exploration noise that is added to the actions produc
 
 To run benchmark experiments, see :material-github: [benchmark/td3.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/td3.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Ftd3.sh%23L1-L7&style=github&type=code&showBorder=on&showLineNumbers=on&showFileMeta=on&showFullPath=on&showCopy=on"></script>
+``` title="benchmark/td3.sh" linenums="1"
+--8<-- "benchmark/td3.sh::7"
+```
 
 
 Below are the average episodic returns for [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) (3 random seeds). To ensure the quality of the implementation, we compared the results against (Fujimoto et al., 2018)[^2].
 
 | Environment      | [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) | [`TD3.py`](https://github.com/sfujim/TD3/blob/master/TD3.py) (Fujimoto et al., 2018, Table 1)[^2]  |
 | ----------- | ----------- | ----------- | 
-| HalfCheetah      | 9449.94 ± 1586.49      |9636.95 ± 859.065  |
-| Walker2d   | 3851.55 ± 335.29     |  4682.82 ± 539.64 | 
-| Hopper   | 3162.21 ± 261.08        |  3564.07 ± 114.74 | 
-| Humanoid |  5011.05 ± 254.89      |  not available | 
-| Pusher |  -37.49 ± 10.22      |  not available | 
-| InvertedPendulum |    996.81 ± 4.50    | 1000.00 ± 0.00  | 
+| HalfCheetah-v4      |   9583.22 ± 126.09      |9636.95 ± 859.065  |
+| Walker2d-v4   |         4057.59 ± 658.78     |  4682.82 ± 539.64 | 
+| Hopper-v4   |           3134.61 ± 360.18        |  3564.07 ± 114.74 | 
+| InvertedPendulum-v4 |   968.99 ± 25.80    | 1000.00 ± 0.00  | 
+| Humanoid-v4 |           5035.36 ± 21.67      |  not available | 
+| Pusher-v4 |             -30.92 ± 1.05      |  not available | 
 
 
 
 ???+ info
 
-    Note that [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) uses gym MuJoCo v2 environments while [`TD3.py`](https://github.com/sfujim/TD3/blob/master/TD3.py) (Fujimoto et al., 2018)[^2] uses the gym MuJoCo v1 environments. According to the :material-github: [openai/gym#834](https://github.com/openai/gym/pull/834), gym MuJoCo v2 environments should be equivalent to the gym MuJoCo v1 environments.
+    Note that [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) uses gym MuJoCo v4 environments while [`TD3.py`](https://github.com/sfujim/TD3/blob/master/TD3.py) (Fujimoto et al., 2018)[^2] uses the gym MuJoCo v1 environments.
 
     Also note the performance of our `td3_continuous_action.py` seems to be worse than the reference implementation on Walker2d. This is likely due to :material-github: [openai/gym#938](https://github.com/openai/baselines/issues/938). We would have a hard time reproducing gym MuJoCo v1 environments because they have been long deprecated.
 
@@ -154,20 +152,12 @@ Below are the average episodic returns for [`td3_continuous_action.py`](https://
 
 Learning curves:
 
-<div class="grid-container">
-<img src="../td3/HalfCheetah-v2.png">
-
-<img src="../td3/Walker2d-v2.png">
-
-<img src="../td3/Hopper-v2.png">
-
-<img src="../td3/Humanoid-v2.png">
-
-<img src="../td3/Pusher-v2.png">
-
-<img src="../td3/InvertedPendulum-v2.png">
-</div>
+``` title="benchmark/td3_plot.sh" linenums="1"
+--8<-- "benchmark/td3_plot.sh::9"
+```
 
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/td3.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/td3-time.png">
 
 Tracked experiments and game play videos:
 
@@ -194,8 +184,6 @@ The [td3_continuous_action_jax.py](https://github.com/vwxyzjn/cleanrl/blob/maste
     poetry install -E "mujoco jax"
     poetry run python cleanrl/td3_continuous_action_jax.py --help
     poetry run python cleanrl/td3_continuous_action_jax.py --env-id Hopper-v4
-    poetry install -E mujoco_py # only works in Linux
-    poetry run python cleanrl/td3_continuous_action_jax.py --env-id Hopper-v2
     ```
 
 === "pip"
@@ -205,8 +193,6 @@ The [td3_continuous_action_jax.py](https://github.com/vwxyzjn/cleanrl/blob/maste
     pip install -r requirements/requirements-jax.txt
     python cleanrl/td3_continuous_action_jax.py --help
     python cleanrl/td3_continuous_action_jax.py --env-id Hopper-v4
-    pip install -r requirements/requirements-mujoco_py.txt  # only works in Linux
-    python cleanrl/td3_continuous_action_jax.py --env-id Hopper-v2
     ```
 
 ### Explanation of the logged metrics
@@ -223,42 +209,53 @@ See [related docs](/rl-algorithms/td3/#implementation-details) for `td3_continuo
 
 To run benchmark experiments, see :material-github: [benchmark/td3.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/td3.sh). Specifically, execute the following command:
 
-<script src="https://emgithub.com/embed.js?target=https%3A%2F%2Fgithub.com%2Fvwxyzjn%2Fcleanrl%2Fblob%2Fmaster%2Fbenchmark%2Ftd3.sh%23L9-L16&style=github&type=code&showBorder=on&showLineNumbers=on&showFileMeta=on&showFullPath=on&showCopy=on"></script>
+``` title="benchmark/td3.sh" linenums="1"
+--8<-- "benchmark/td3.sh:12:19"
+```
 
-Below are the average episodic returns for [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) (3 random seeds). To ensure the quality of the implementation, we compared the results against (Fujimoto et al., 2018)[^2].
+Below are the average episodic returns for [`td3_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action_jax.py) (3 random seeds).
 
-| Environment      | [`td3_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action_jax.py) (RTX 3060 TI) | [`td3_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action_jax.py) (VM w/ TPU) | [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) (RTX 3060 TI) | [`TD3.py`](https://github.com/sfujim/TD3/blob/master/TD3.py) (Fujimoto et al., 2018, Table 1)[^2]  |
-| ----------- | ----------- | ----------- | ----------- |  ----------- | 
-| HalfCheetah |  9408.62 ± 473.23  | 8948.33 ± 1196.87  | 9449.94 ± 1586.49      |9636.95 ± 859.065  |
-| Walker2d |  3512.14 ± 1576.59 | 4107.63 ± 173.93  | 3851.55 ± 335.29     |  4682.82 ± 539.64 | 
-| Hopper |   2898.62 ± 485.18 | 3151.80 ± 458.68 | 3162.21 ± 261.08        |  3564.07 ± 114.74 | 
+{!benchmark/td3.md!}
 
+Learning curves:
+
+
+``` title="benchmark/td3_plot.sh" linenums="1"
+--8<-- "benchmark/td3_plot.sh:11:20"
+```
+
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/td3.png">
+<img loading="lazy" src="https://huggingface.co/datasets/cleanrl/benchmark/resolve/main/benchmark/pr-424/td3-time.png">
 
 ???+ info
 
+    These are some previous experiments with TPUs. Note the results are very similar to the ones above, but the runtime can be different due to different hardware used.
+
     Note that the experiments were conducted on different hardwares, so your mileage might vary. This inconsistency is because 1) re-running expeirments on the same hardware is computationally expensive and 2) requiring the same hardware is not inclusive nor feasible to other contributors who might have different hardwares.
 
-    That said, we roughly expect to see a 2-4x speed improvement from using [`td3_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action_jax.py) under the same hardware. And if you disable the `--capture-video` overhead, the speed improvement will be even higher.
+    That said, we roughly expect to see a 2-4x speed improvement from using [`td3_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action_jax.py) under the same hardware. And if you disable the `--capture_video` overhead, the speed improvement will be even higher.
 
+    Learning curves:
 
-Learning curves:
 
-<div class="grid-container">
-<img src="../td3-jax/HalfCheetah-v2.png">
-<img src="../td3-jax/HalfCheetah-v2-time.png">
+    <div class="grid-container">
+    <img loading="lazy" src="../td3-jax/HalfCheetah-v2.png">
+    <img loading="lazy" src="../td3-jax/HalfCheetah-v2-time.png">
 
-<img src="../td3-jax/Walker2d-v2.png">
-<img src="../td3-jax/Walker2d-v2-time.png">
+    <img loading="lazy" src="../td3-jax/Walker2d-v2.png">
+    <img loading="lazy" src="../td3-jax/Walker2d-v2-time.png">
 
-<img src="../td3-jax/Hopper-v2.png">
-<img src="../td3-jax/Hopper-v2-time.png">
-</div>
+    <img loading="lazy" src="../td3-jax/Hopper-v2.png">
+    <img loading="lazy" src="../td3-jax/Hopper-v2-time.png">
+    </div>
 
 
 
-Tracked experiments and game play videos:
+    Tracked experiments and game play videos:
+
+    <iframe src="https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/MuJoCo-CleanRL-s-TD3-JAX--VmlldzoyMzU1OTA4" style="width:100%; height:500px" title="MuJoCo: CleanRL's TD3 + JAX"></iframe>
+
 
-<iframe src="https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/MuJoCo-CleanRL-s-TD3-JAX--VmlldzoyMzU1OTA4" style="width:100%; height:500px" title="MuJoCo: CleanRL's TD3 + JAX"></iframe>
 
 [^1]:Lillicrap, T.P., Hunt, J.J., Pritzel, A., Heess, N.M., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2016). Continuous control with deep reinforcement learning. CoRR, abs/1509.02971. https://arxiv.org/abs/1509.02971
 
diff --git a/poetry.lock b/poetry.lock
index 21c891792..f30baf28c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,10 +1,9 @@
-# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
 version = "1.4.0"
 description = "Abseil Python Common Libraries, see https://github.com/abseil/abseil-py."
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -12,59 +11,47 @@ files = [
     {file = "absl_py-1.4.0-py3-none-any.whl", hash = "sha256:0d3fe606adfa4f7db64792dd4c7aee4ee0c38ab75dfd353b7a83ed3e957fcb47"},
 ]
 
-[[package]]
-name = "aiosignal"
-version = "1.3.1"
-description = "aiosignal: a list of registered asynchronous callbacks"
-category = "dev"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"},
-    {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"},
-]
-
-[package.dependencies]
-frozenlist = ">=1.1.0"
-
 [[package]]
 name = "ale-py"
-version = "0.7.4"
+version = "0.8.1"
 description = "The Arcade Learning Environment (ALE) - a platform for AI research."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "ale_py-0.7.4-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:418eea1539c2669c799274fedead4d44d05dfc3dcd6c536378d5984c42bc340b"},
-    {file = "ale_py-0.7.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:38e4823be04761a2ebc0167ed710a318cc9f0fec3815576c45030fe8e67f9c98"},
-    {file = "ale_py-0.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9af49488ec1b4facb299975a665e9e706921dd2d756daad813e2897debc5fc3c"},
-    {file = "ale_py-0.7.4-cp310-cp310-win_amd64.whl", hash = "sha256:f600c55d6a7c6c30f5592b30afc34366101fc7561842bdd5740d5bca390201eb"},
-    {file = "ale_py-0.7.4-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:da3e1400e02fb46659dfb3af92e8a4cf4c5b2d4f9d19a008ce9d5fa8eebb4ab6"},
-    {file = "ale_py-0.7.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c073005b68901f0003ffe871d56021245eda9e88f27cc91745627c099932499f"},
-    {file = "ale_py-0.7.4-cp37-cp37m-win_amd64.whl", hash = "sha256:913394ad1dbe22a8d489378d702f296234721ca0a0e76e5354764e8bf40bc623"},
-    {file = "ale_py-0.7.4-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:4841f395e3166d4a7b1e9207cafab08de4b9e9b4178afd97a36f53844ade98a2"},
-    {file = "ale_py-0.7.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5b2899b4cf659bc14a20047455e681e991cb96ceed937d22a5dac1a97a16bf3e"},
-    {file = "ale_py-0.7.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9aff7a8ce37d00a87ef4114666db0b45d499744d08f5ff1683dbbbcac4783569"},
-    {file = "ale_py-0.7.4-cp38-cp38-win_amd64.whl", hash = "sha256:a23f4c858a2c5cbfa3c0cb2c9ab167359c368104b67e19b332710c19b43c6091"},
-    {file = "ale_py-0.7.4-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:0b9ab62f12a325e92ba2af99c5b231ad3b219a46913b14068c857d37837025fb"},
-    {file = "ale_py-0.7.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:269dcf94024ba7a8276d4dcf04c526df695cb383aa2372e9903a08ec6f679262"},
-    {file = "ale_py-0.7.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f65371c180779b115d8600d99780e9e83b229812e94c6b49be1686ce4d82573"},
-    {file = "ale_py-0.7.4-cp39-cp39-win_amd64.whl", hash = "sha256:b53e7d0c8f8e8610ebaec88887da2427ce16811f9697ccbb39588ec784bea145"},
+    {file = "ale_py-0.8.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:b2aa2f69a4169742800615970efe6914fa856e33eaf7fa9133c0e06a617a80e2"},
+    {file = "ale_py-0.8.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f2f6b92c8fd6189654979bbf0b305dbe0ecf82176c47f244d8c1cbc36286b89"},
+    {file = "ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9b168eb88c87d0f3e2a778e6c5cdde4ad951d1ca8a6dc3d3679fd45398df7d1"},
+    {file = "ale_py-0.8.1-cp310-cp310-win_amd64.whl", hash = "sha256:5fcc31f495de79ee1d6bfc0f4b7c4619948851e679bbf010035e25f23146a687"},
+    {file = "ale_py-0.8.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:0856ca777473ec4ae8a59f3af9580259adb0fd4a47d586a125a440c62e82fc10"},
+    {file = "ale_py-0.8.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f10b1df8774bbe3b00365748b5e0e07cf35f6a703bbaff991bc7b3b2247dccc9"},
+    {file = "ale_py-0.8.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0006d80dfe7745eb5a93444492337203c8bc7eb594a2c24c6a651c5c5b0eaf09"},
+    {file = "ale_py-0.8.1-cp311-cp311-win_amd64.whl", hash = "sha256:9773eea7505484e024beb2fff0f3bfd363db151bdb9799d70995448e196b1ded"},
+    {file = "ale_py-0.8.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:87557db05be0e04130e2ec1bf909d3bb0b0bc034645d4f664e6baa573fe32191"},
+    {file = "ale_py-0.8.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae2ba24557e0ce541ea3be13b148db2a9cfa730d83537b4cbed5e10449826e51"},
+    {file = "ale_py-0.8.1-cp37-cp37m-win_amd64.whl", hash = "sha256:ade5c32af567629164a6b49378978c728a15dc4db07ad6b679e8832d4fd3ea1f"},
+    {file = "ale_py-0.8.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:0ffecb5c956749596030e464827642945162170a132d093c3d4fa2d7e5725c18"},
+    {file = "ale_py-0.8.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7cd74b7ee0248ef11a086c9764e142e71defd40ec8989a99232bfd2d9e8023be"},
+    {file = "ale_py-0.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eadf9f3990b4ff2f9e5ca35889f5e2e95cddd6a353d9d857d9b4601a6e1c4e7c"},
+    {file = "ale_py-0.8.1-cp38-cp38-win_amd64.whl", hash = "sha256:817adf9a3a82c4923c731e634520a5ecf296aca0367f5c69959a96b32119d831"},
+    {file = "ale_py-0.8.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:2d9fcfa06c74a613c5419e942ef4d3e0959533f52e94d2d4bda61d07fbfffeee"},
+    {file = "ale_py-0.8.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f278036f9b6066062abcdf0987a0ec5a8e0f22a2c7cfac925e39378d4343d490"},
+    {file = "ale_py-0.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b00f74e27815131c1a2791f3d48114363fa2708e19f09ce6b7b614cb14c9d469"},
+    {file = "ale_py-0.8.1-cp39-cp39-win_amd64.whl", hash = "sha256:d49b550a2d9c25b63c343aa680fd81f253a3714cdc0e1835640933ebff1798ff"},
 ]
 
 [package.dependencies]
 importlib-metadata = {version = ">=4.10.0", markers = "python_version < \"3.10\""}
 importlib-resources = "*"
 numpy = "*"
+typing-extensions = {version = "*", markers = "python_version < \"3.11\""}
 
 [package.extras]
-test = ["gym", "pytest"]
+test = ["gym (>=0.23,<1.0)", "pytest (>=7.0)"]
 
 [[package]]
 name = "alembic"
 version = "1.10.4"
 description = "A database migration tool for SQLAlchemy."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -82,22 +69,10 @@ typing-extensions = ">=4"
 [package.extras]
 tz = ["python-dateutil"]
 
-[[package]]
-name = "antlr4-python3-runtime"
-version = "4.9.3"
-description = "ANTLR 4.9.3 runtime for Python 3.7"
-category = "dev"
-optional = false
-python-versions = "*"
-files = [
-    {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"},
-]
-
 [[package]]
 name = "appdirs"
 version = "1.4.4"
 description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -105,33 +80,10 @@ files = [
     {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"},
 ]
 
-[[package]]
-name = "attrs"
-version = "23.1.0"
-description = "Classes Without Boilerplate"
-category = "dev"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"},
-    {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
-]
-
-[package.dependencies]
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
-
-[package.extras]
-cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
-dev = ["attrs[docs,tests]", "pre-commit"]
-docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
-tests = ["attrs[tests-no-zope]", "zope-interface"]
-tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-
 [[package]]
 name = "autorom"
 version = "0.4.2"
 description = "Automated installation of Atari ROMs for Gym/ALE-Py"
-category = "main"
 optional = true
 python-versions = ">=3.6"
 files = [
@@ -153,7 +105,6 @@ accept-rom-license = ["AutoROM.accept-rom-license"]
 name = "autorom-accept-rom-license"
 version = "0.6.1"
 description = "Automated installation of Atari ROMs for Gym/ALE-Py"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -170,29 +121,27 @@ tests = ["ale_py", "multi_agent_ale_py"]
 
 [[package]]
 name = "awscli"
-version = "1.27.132"
+version = "1.31.0"
 description = "Universal Command Line Environment for AWS."
-category = "main"
 optional = true
 python-versions = ">= 3.7"
 files = [
-    {file = "awscli-1.27.132-py3-none-any.whl", hash = "sha256:85e183827e2f89f7de2d3ccea2f4cf10dc48c763fabec1fd11546eca9324fd0e"},
-    {file = "awscli-1.27.132.tar.gz", hash = "sha256:cf1464b9dc1ffdee6c1e5f8e33cb9f5d0c7e1feb1e7638c8b437df3f6f40f186"},
+    {file = "awscli-1.31.0-py3-none-any.whl", hash = "sha256:182499f95fd3a6bf7d6ebd72ee68609990008c64a3646161b80023d3c9e42e95"},
+    {file = "awscli-1.31.0.tar.gz", hash = "sha256:6e8d396a8fb95fcdb8d2713153596ce0d8d4a1f62ab9e365e832e10f78f4237e"},
 ]
 
 [package.dependencies]
-botocore = "1.29.132"
+botocore = "1.33.0"
 colorama = ">=0.2.5,<0.4.5"
 docutils = ">=0.10,<0.17"
-PyYAML = ">=3.10,<5.5"
+PyYAML = ">=3.10,<6.1"
 rsa = ">=3.1.2,<4.8"
-s3transfer = ">=0.6.0,<0.7.0"
+s3transfer = ">=0.8.0,<0.9.0"
 
 [[package]]
 name = "bitmath"
 version = "1.3.3.1"
 description = "Pythonic module for representing and manipulating file sizes with different prefix notations (file size unit conversion)"
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -201,49 +150,49 @@ files = [
 
 [[package]]
 name = "boto3"
-version = "1.26.132"
+version = "1.33.0"
 description = "The AWS SDK for Python"
-category = "main"
 optional = true
 python-versions = ">= 3.7"
 files = [
-    {file = "boto3-1.26.132-py3-none-any.whl", hash = "sha256:e579b70028cdc4194fe92c745256b04880e7db39259a4c8a61b71117713d3c17"},
-    {file = "boto3-1.26.132.tar.gz", hash = "sha256:d45672571da9bf4ba130d525832013aef95aee83b1711e847ef7cdb54cc5ac41"},
+    {file = "boto3-1.33.0-py3-none-any.whl", hash = "sha256:799fe8399ea132aa5aa868caf78c47ef9ed675d5ef61be97cb7131081bb8a861"},
+    {file = "boto3-1.33.0.tar.gz", hash = "sha256:ebf6d86217c37986f965dbe35a3bbd0318127d23a65737ab6486667496decb54"},
 ]
 
 [package.dependencies]
-botocore = ">=1.29.132,<1.30.0"
+botocore = ">=1.33.0,<1.34.0"
 jmespath = ">=0.7.1,<2.0.0"
-s3transfer = ">=0.6.0,<0.7.0"
+s3transfer = ">=0.8.0,<0.9.0"
 
 [package.extras]
 crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.29.132"
+version = "1.33.0"
 description = "Low-level, data-driven core of boto 3."
-category = "main"
 optional = true
 python-versions = ">= 3.7"
 files = [
-    {file = "botocore-1.29.132-py3-none-any.whl", hash = "sha256:422186c13406a2c2668e4b2d9070097b4b024a9290a6af2a8e21eb2bd17322d6"},
-    {file = "botocore-1.29.132.tar.gz", hash = "sha256:9b6d2b60325b815ff9123f172af83b7b866c8813088d969eeb9030fa189417f6"},
+    {file = "botocore-1.33.0-py3-none-any.whl", hash = "sha256:ccf3d67fd046265ae73bc9862d1618c6e774a61a96beac832edb63d9a21fe1ba"},
+    {file = "botocore-1.33.0.tar.gz", hash = "sha256:e35526421fe8ee180b6aed3102929594aa51e4d60e3f29366a603707c37c0d52"},
 ]
 
 [package.dependencies]
 jmespath = ">=0.7.1,<2.0.0"
 python-dateutil = ">=2.1,<3.0.0"
-urllib3 = ">=1.25.4,<1.27"
+urllib3 = [
+    {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""},
+    {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""},
+]
 
 [package.extras]
-crt = ["awscrt (==0.16.9)"]
+crt = ["awscrt (==0.19.17)"]
 
 [[package]]
 name = "bottle"
 version = "0.12.25"
 description = "Fast and simple WSGI-framework for small web-applications."
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -255,7 +204,6 @@ files = [
 name = "cached-property"
 version = "1.5.2"
 description = "A decorator for caching properties in classes."
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -267,7 +215,6 @@ files = [
 name = "cachetools"
 version = "5.3.0"
 description = "Extensible memoizing collections and decorators"
-category = "main"
 optional = false
 python-versions = "~=3.7"
 files = [
@@ -279,7 +226,6 @@ files = [
 name = "certifi"
 version = "2023.5.7"
 description = "Python package for providing Mozilla's CA Bundle."
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -291,7 +237,6 @@ files = [
 name = "cffi"
 version = "1.15.1"
 description = "Foreign Function Interface for Python calling C code."
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -368,7 +313,6 @@ pycparser = "*"
 name = "cfgv"
 version = "3.3.1"
 description = "Validate configuration and produce human readable error messages."
-category = "dev"
 optional = false
 python-versions = ">=3.6.1"
 files = [
@@ -380,7 +324,6 @@ files = [
 name = "chardet"
 version = "4.0.0"
 description = "Universal encoding detector for Python 2 and 3"
-category = "main"
 optional = true
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -392,7 +335,6 @@ files = [
 name = "charset-normalizer"
 version = "3.1.0"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
-category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
@@ -477,7 +419,6 @@ files = [
 name = "chex"
 version = "0.1.5"
 description = "Chex: Testing made fun, in JAX!"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -497,7 +438,6 @@ toolz = ">=0.9.0"
 name = "click"
 version = "8.1.3"
 description = "Composable command line interface toolkit"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -507,13 +447,11 @@ files = [
 
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 
 [[package]]
 name = "cloudpickle"
 version = "2.2.1"
 description = "Extended pickling support for Python objects"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -523,14 +461,13 @@ files = [
 
 [[package]]
 name = "cmaes"
-version = "0.9.1"
+version = "0.10.0"
 description = "Lightweight Covariance Matrix Adaptation Evolution Strategy (CMA-ES) implementation for Python 3."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "cmaes-0.9.1-py3-none-any.whl", hash = "sha256:6e2930b6a99dd94621bf62966c13d29e6a7f90a909b4e4266010d5f3a7fb74b8"},
-    {file = "cmaes-0.9.1.tar.gz", hash = "sha256:d122f8d46377f643a150c85ffc81c4e33909a34cfdcb522ee7a6fb17ea4f232c"},
+    {file = "cmaes-0.10.0-py3-none-any.whl", hash = "sha256:72cea747ad37b1780b0eb6f3c098cee33907fafbf6690c0c02db1e010cab72f6"},
+    {file = "cmaes-0.10.0.tar.gz", hash = "sha256:48afc70df027114739872b50489ae6b32461c307b92d084a63c7090a9742faf9"},
 ]
 
 [package.dependencies]
@@ -543,7 +480,6 @@ cmawm = ["scipy"]
 name = "colorama"
 version = "0.4.4"
 description = "Cross-platform colored terminal text."
-category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -555,7 +491,6 @@ files = [
 name = "colorlog"
 version = "6.7.0"
 description = "Add colours to the output of Python's logging module."
-category = "main"
 optional = true
 python-versions = ">=3.6"
 files = [
@@ -573,7 +508,6 @@ development = ["black", "flake8", "mypy", "pytest", "types-colorama"]
 name = "commonmark"
 version = "0.9.1"
 description = "Python parser for the CommonMark Markdown spec"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -588,7 +522,6 @@ test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
 name = "cycler"
 version = "0.11.0"
 description = "Composable style cycles"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -596,61 +529,10 @@ files = [
     {file = "cycler-0.11.0.tar.gz", hash = "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"},
 ]
 
-[[package]]
-name = "cython"
-version = "0.29.34"
-description = "The Cython compiler for writing C extensions for the Python language."
-category = "main"
-optional = true
-python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
-files = [
-    {file = "Cython-0.29.34-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:742544024ddb74314e2d597accdb747ed76bd126e61fcf49940a5b5be0a8f381"},
-    {file = "Cython-0.29.34-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:03daae07f8cbf797506446adae512c3dd86e7f27a62a541fa1ee254baf43e32c"},
-    {file = "Cython-0.29.34-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5a8de3e793a576e40ca9b4f5518610cd416273c7dc5e254115656b6e4ec70663"},
-    {file = "Cython-0.29.34-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:60969d38e6a456a67e7ef8ae20668eff54e32ba439d4068ccf2854a44275a30f"},
-    {file = "Cython-0.29.34-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:21b88200620d80cfe193d199b259cdad2b9af56f916f0f7f474b5a3631ca0caa"},
-    {file = "Cython-0.29.34-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:308c8f1e58bf5e6e8a1c4dcf8abbd2d13d0f9b1e582f4d9ae8b89857342d8bb5"},
-    {file = "Cython-0.29.34-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:d8f822fb6ecd5d88c42136561f82960612421154fc5bf23c57103a367bb91356"},
-    {file = "Cython-0.29.34-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:56866323f1660cecb4d5ff3a1fba92a56b91b7cfae0a8253777aa4bdb3bdf9a8"},
-    {file = "Cython-0.29.34-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:e971db8aeb12e7c0697cefafe65eefcc33ff1224ae3d8c7f83346cbc42c6c270"},
-    {file = "Cython-0.29.34-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e4401270b0dc464c23671e2e9d52a60985f988318febaf51b047190e855bbe7d"},
-    {file = "Cython-0.29.34-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:dce0a36d163c05ae8b21200059511217d79b47baf2b7b0f926e8367bd7a3cc24"},
-    {file = "Cython-0.29.34-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dbd79221869ee9a6ccc4953b2c8838bb6ae08ab4d50ea4b60d7894f03739417b"},
-    {file = "Cython-0.29.34-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a0f4229df10bc4545ebbeaaf96ebb706011d8b333e54ed202beb03f2bee0a50e"},
-    {file = "Cython-0.29.34-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fd1ea21f1cebf33ae288caa0f3e9b5563a709f4df8925d53bad99be693fc0d9b"},
-    {file = "Cython-0.29.34-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:d7ef5f68f4c5baa93349ea54a352f8716d18bee9a37f3e93eff38a5d4e9b7262"},
-    {file = "Cython-0.29.34-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:459994d1de0f99bb18fad9f2325f760c4b392b1324aef37bcc1cd94922dfce41"},
-    {file = "Cython-0.29.34-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:1d6c809e2f9ce5950bbc52a1d2352ef3d4fc56186b64cb0d50c8c5a3c1d17661"},
-    {file = "Cython-0.29.34-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f674ceb5f722d364395f180fbac273072fc1a266aab924acc9cfd5afc645aae1"},
-    {file = "Cython-0.29.34-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9489de5b2044dcdfd9d6ca8242a02d560137b3c41b1f5ae1c4f6707d66d6e44d"},
-    {file = "Cython-0.29.34-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:5c121dc185040f4333bfded68963b4529698e1b6d994da56be32c97a90c896b6"},
-    {file = "Cython-0.29.34-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:b6149f7cc5b31bccb158c5b968e5a8d374fdc629792e7b928a9b66e08b03fca5"},
-    {file = "Cython-0.29.34-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0ab3cbf3d62b0354631a45dc93cfcdf79098663b1c65a6033af4a452b52217a7"},
-    {file = "Cython-0.29.34-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:4a2723447d1334484681d5aede34184f2da66317891f94b80e693a2f96a8f1a7"},
-    {file = "Cython-0.29.34-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e40cf86aadc29ecd1cb6de67b0d9488705865deea4fc185c7ad56d7a6fc78703"},
-    {file = "Cython-0.29.34-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:8c3cd8bb8e880a3346f5685601004d96e0a2221e73edcaeea57ea848618b4ac6"},
-    {file = "Cython-0.29.34-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0e9032cd650b0cb1d2c2ef2623f5714c14d14c28d7647d589c3eeed0baf7428e"},
-    {file = "Cython-0.29.34-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:bdb3285660e3068438791ace7dd7b1efd6b442a10b5c8d7a4f0c9d184d08c8ed"},
-    {file = "Cython-0.29.34-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a8ad755f9364e720f10a36734a1c7a5ced5c679446718b589259261438a517c9"},
-    {file = "Cython-0.29.34-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:7595d29eaee95633dd8060f50f0e54b27472d01587659557ebcfe39da3ea946b"},
-    {file = "Cython-0.29.34-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e6ef7879668214d80ea3914c17e7d4e1ebf4242e0dd4dabe95ca5ccbe75589a5"},
-    {file = "Cython-0.29.34-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:ccb223b5f0fd95d8d27561efc0c14502c0945f1a32274835831efa5d5baddfc1"},
-    {file = "Cython-0.29.34-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:11b1b278b8edef215caaa5250ad65a10023bfa0b5a93c776552248fc6f60098d"},
-    {file = "Cython-0.29.34-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:5718319a01489688fdd22ddebb8e2fcbbd60be5f30de4336ea7063c3ae29fbe5"},
-    {file = "Cython-0.29.34-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:cfb2302ef617d647ee590a4c0a00ba3c2da05f301dcefe7721125565d2e51351"},
-    {file = "Cython-0.29.34-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:67b850cf46b861bc27226d31e1d87c0e69869a02f8d3cc5d5bef549764029879"},
-    {file = "Cython-0.29.34-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0963266dad685812c1dbb758fcd4de78290e3adc7db271c8664dcde27380b13e"},
-    {file = "Cython-0.29.34-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7879992487d9060a61393eeefe00d299210256928dce44d887b6be313d342bac"},
-    {file = "Cython-0.29.34-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:44733366f1604b0c327613b6918469284878d2f5084297d10d26072fc6948d51"},
-    {file = "Cython-0.29.34-py2.py3-none-any.whl", hash = "sha256:be4f6b7be75a201c290c8611c0978549c60353890204573078e865423dbe3c83"},
-    {file = "Cython-0.29.34.tar.gz", hash = "sha256:1909688f5d7b521a60c396d20bba9e47a1b2d2784bfb085401e1e1e7d29a29a8"},
-]
-
 [[package]]
 name = "dataclasses"
 version = "0.6"
 description = "A backport of the dataclasses module for Python 3.6"
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -662,7 +544,6 @@ files = [
 name = "decorator"
 version = "4.4.2"
 description = "Decorators for Humans"
-category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*"
 files = [
@@ -674,7 +555,6 @@ files = [
 name = "dill"
 version = "0.3.6"
 description = "serialize all of python"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -689,7 +569,6 @@ graph = ["objgraph (>=1.7.2)"]
 name = "distlib"
 version = "0.3.6"
 description = "Distribution utilities"
-category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -701,7 +580,6 @@ files = [
 name = "dm-control"
 version = "1.0.11"
 description = "Continuous control environments and MuJoCo Python bindings."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -733,7 +611,6 @@ hdf5 = ["h5py"]
 name = "dm-env"
 version = "1.6"
 description = "A Python interface for Reinforcement Learning environments."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -750,7 +627,6 @@ numpy = "*"
 name = "dm-tree"
 version = "0.1.8"
 description = "Tree is a library for working with nested data structures."
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -799,7 +675,6 @@ files = [
 name = "docker-pycreds"
 version = "0.4.0"
 description = "Python bindings for the docker credentials store API"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -810,11 +685,21 @@ files = [
 [package.dependencies]
 six = ">=1.4.0"
 
+[[package]]
+name = "docstring-parser"
+version = "0.15"
+description = "Parse Python docstrings in reST, Google and Numpydoc format"
+optional = false
+python-versions = ">=3.6,<4.0"
+files = [
+    {file = "docstring_parser-0.15-py3-none-any.whl", hash = "sha256:d1679b86250d269d06a99670924d6bce45adc00b08069dae8c47d98e89b667a9"},
+    {file = "docstring_parser-0.15.tar.gz", hash = "sha256:48ddc093e8b1865899956fcc03b03e66bb7240c310fac5af81814580c55bf682"},
+]
+
 [[package]]
 name = "docutils"
 version = "0.16"
 description = "Docutils -- Python Documentation Utilities"
-category = "main"
 optional = true
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -826,7 +711,6 @@ files = [
 name = "enum-tools"
 version = "0.9.0.post1"
 description = "Tools to expand Python's enum module."
-category = "main"
 optional = true
 python-versions = ">=3.6"
 files = [
@@ -846,7 +730,6 @@ sphinx = ["sphinx (>=3.2.0)", "sphinx-toolbox (>=2.16.0)"]
 name = "envpool"
 version = "0.6.6"
 description = "\"C++-based high-performance parallel environment execution engine (vectorized env) for general RL environments.\""
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -869,7 +752,6 @@ typing-extensions = "*"
 name = "etils"
 version = "0.9.0"
 description = "Collection of common python utils"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -898,7 +780,6 @@ lazy-imports = ["etils[ecolab]"]
 name = "exceptiongroup"
 version = "1.1.1"
 description = "Backport of PEP 654 (exception groups)"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -913,7 +794,6 @@ test = ["pytest (>=6)"]
 name = "expt"
 version = "0.4.1"
 description = "EXperiment. Plot. Tabulate."
-category = "main"
 optional = true
 python-versions = ">=3.6"
 files = [
@@ -936,7 +816,6 @@ test = ["mock (>=2.0.0)", "pytest (>=5.0)", "pytest-asyncio", "pytest-cov", "ten
 name = "farama-notifications"
 version = "0.0.4"
 description = "Notifications for all Farama Foundation maintained libraries."
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -944,27 +823,10 @@ files = [
     {file = "Farama_Notifications-0.0.4-py3-none-any.whl", hash = "sha256:14de931035a41961f7c056361dc7f980762a143d05791ef5794a751a2caf05ae"},
 ]
 
-[[package]]
-name = "fasteners"
-version = "0.15"
-description = "A python package that provides useful locks."
-category = "main"
-optional = true
-python-versions = "*"
-files = [
-    {file = "fasteners-0.15-py2.py3-none-any.whl", hash = "sha256:007e4d2b2d4a10093f67e932e5166722d2eab83b77724156e92ad013c6226574"},
-    {file = "fasteners-0.15.tar.gz", hash = "sha256:3a176da6b70df9bb88498e1a18a9e4a8579ed5b9141207762368a1017bf8f5ef"},
-]
-
-[package.dependencies]
-monotonic = ">=0.1"
-six = "*"
-
 [[package]]
 name = "filelock"
 version = "3.12.0"
 description = "A platform independent file lock."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -978,19 +840,17 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "diff-cover (>=7.5)", "p
 
 [[package]]
 name = "flax"
-version = "0.6.4"
+version = "0.6.8"
 description = "Flax: A neural network library for JAX designed for flexibility"
-category = "main"
 optional = true
 python-versions = "*"
 files = [
-    {file = "flax-0.6.4-py3-none-any.whl", hash = "sha256:fe5010525202241fdc960920033d2e4c0b35f06090c1ad9e280b1f4415ae308f"},
-    {file = "flax-0.6.4.tar.gz", hash = "sha256:d06465a3e6636c3c23c29f651a13f5367d06c41373b441dc8ec1bfaa4db06a48"},
+    {file = "flax-0.6.8-py3-none-any.whl", hash = "sha256:221225804c263e39fe3cc8f754dc4192597cb0f063926b2338ea6563604747ed"},
+    {file = "flax-0.6.8.tar.gz", hash = "sha256:bf1f81dd5dfbb10c603490531a86b1174ebbc38e5c5e8116a98115c135194c10"},
 ]
 
 [package.dependencies]
-jax = ">=0.3.16"
-matplotlib = "*"
+jax = ">=0.4.2"
 msgpack = "*"
 numpy = ">=1.12"
 optax = "*"
@@ -1001,13 +861,13 @@ tensorstore = "*"
 typing-extensions = ">=4.1.1"
 
 [package.extras]
-testing = ["atari-py (==0.2.5)", "clu", "gym (==0.18.3)", "jaxlib", "jraph (>=0.0.6dev0)", "ml-collections", "mypy", "opencv-python", "pytest", "pytest-cov", "pytest-custom-exit-code", "pytest-xdist (==1.34.0)", "pytype", "sentencepiece", "tensorflow", "tensorflow-datasets", "tensorflow-text (>=2.4.0)", "torch"]
+all = ["matplotlib"]
+testing = ["atari-py (==0.2.5)", "clu", "einops", "gym (==0.18.3)", "jaxlib", "jraph (>=0.0.6dev0)", "ml-collections", "mypy", "nbstripout", "opencv-python", "pytest", "pytest-cov", "pytest-custom-exit-code", "pytest-xdist (==1.34.0)", "pytype", "sentencepiece", "tensorflow", "tensorflow-datasets", "tensorflow-text (>=2.11.0)", "torch"]
 
 [[package]]
 name = "fonttools"
 version = "4.38.0"
 description = "Tools to manipulate font files"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1029,115 +889,10 @@ ufo = ["fs (>=2.2.0,<3)"]
 unicode = ["unicodedata2 (>=14.0.0)"]
 woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
 
-[[package]]
-name = "free-mujoco-py"
-version = "2.1.6"
-description = ""
-category = "main"
-optional = true
-python-versions = ">=3.7.1,<3.11"
-files = [
-    {file = "free-mujoco-py-2.1.6.tar.gz", hash = "sha256:77e18302e21979bbd77a7c1584070815843cab1b1249f8a17667e15aba528a9a"},
-    {file = "free_mujoco_py-2.1.6-py3-none-any.whl", hash = "sha256:f541d84b6bd87919ccf28f5a708681ca90560a945d104aca393d89275790efb8"},
-]
-
-[package.dependencies]
-cffi = ">=1.15.0,<2.0.0"
-Cython = ">=0.29.24,<0.30.0"
-fasteners = "0.15"
-glfw = ">=1.4.0,<2.0.0"
-imageio = ">=2.9.0,<3.0.0"
-numpy = ">=1.21.3,<2.0.0"
-
-[[package]]
-name = "frozenlist"
-version = "1.3.3"
-description = "A list-like structure which implements collections.abc.MutableSequence"
-category = "dev"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff8bf625fe85e119553b5383ba0fb6aa3d0ec2ae980295aaefa552374926b3f4"},
-    {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dfbac4c2dfcc082fcf8d942d1e49b6aa0766c19d3358bd86e2000bf0fa4a9cf0"},
-    {file = "frozenlist-1.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b1c63e8d377d039ac769cd0926558bb7068a1f7abb0f003e3717ee003ad85530"},
-    {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fdfc24dcfce5b48109867c13b4cb15e4660e7bd7661741a391f821f23dfdca7"},
-    {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c926450857408e42f0bbc295e84395722ce74bae69a3b2aa2a65fe22cb14b99"},
-    {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1841e200fdafc3d51f974d9d377c079a0694a8f06de2e67b48150328d66d5483"},
-    {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f470c92737afa7d4c3aacc001e335062d582053d4dbe73cda126f2d7031068dd"},
-    {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:783263a4eaad7c49983fe4b2e7b53fa9770c136c270d2d4bbb6d2192bf4d9caf"},
-    {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:924620eef691990dfb56dc4709f280f40baee568c794b5c1885800c3ecc69816"},
-    {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ae4dc05c465a08a866b7a1baf360747078b362e6a6dbeb0c57f234db0ef88ae0"},
-    {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:bed331fe18f58d844d39ceb398b77d6ac0b010d571cba8267c2e7165806b00ce"},
-    {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:02c9ac843e3390826a265e331105efeab489ffaf4dd86384595ee8ce6d35ae7f"},
-    {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9545a33965d0d377b0bc823dcabf26980e77f1b6a7caa368a365a9497fb09420"},
-    {file = "frozenlist-1.3.3-cp310-cp310-win32.whl", hash = "sha256:d5cd3ab21acbdb414bb6c31958d7b06b85eeb40f66463c264a9b343a4e238642"},
-    {file = "frozenlist-1.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b756072364347cb6aa5b60f9bc18e94b2f79632de3b0190253ad770c5df17db1"},
-    {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b4395e2f8d83fbe0c627b2b696acce67868793d7d9750e90e39592b3626691b7"},
-    {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14143ae966a6229350021384870458e4777d1eae4c28d1a7aa47f24d030e6678"},
-    {file = "frozenlist-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d8860749e813a6f65bad8285a0520607c9500caa23fea6ee407e63debcdbef6"},
-    {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23d16d9f477bb55b6154654e0e74557040575d9d19fe78a161bd33d7d76808e8"},
-    {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb82dbba47a8318e75f679690190c10a5e1f447fbf9df41cbc4c3afd726d88cb"},
-    {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9309869032abb23d196cb4e4db574232abe8b8be1339026f489eeb34a4acfd91"},
-    {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a97b4fe50b5890d36300820abd305694cb865ddb7885049587a5678215782a6b"},
-    {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c188512b43542b1e91cadc3c6c915a82a5eb95929134faf7fd109f14f9892ce4"},
-    {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:303e04d422e9b911a09ad499b0368dc551e8c3cd15293c99160c7f1f07b59a48"},
-    {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0771aed7f596c7d73444c847a1c16288937ef988dc04fb9f7be4b2aa91db609d"},
-    {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:66080ec69883597e4d026f2f71a231a1ee9887835902dbe6b6467d5a89216cf6"},
-    {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:41fe21dc74ad3a779c3d73a2786bdf622ea81234bdd4faf90b8b03cad0c2c0b4"},
-    {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f20380df709d91525e4bee04746ba612a4df0972c1b8f8e1e8af997e678c7b81"},
-    {file = "frozenlist-1.3.3-cp311-cp311-win32.whl", hash = "sha256:f30f1928162e189091cf4d9da2eac617bfe78ef907a761614ff577ef4edfb3c8"},
-    {file = "frozenlist-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:a6394d7dadd3cfe3f4b3b186e54d5d8504d44f2d58dcc89d693698e8b7132b32"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8df3de3a9ab8325f94f646609a66cbeeede263910c5c0de0101079ad541af332"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0693c609e9742c66ba4870bcee1ad5ff35462d5ffec18710b4ac89337ff16e27"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd4210baef299717db0a600d7a3cac81d46ef0e007f88c9335db79f8979c0d3d"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:394c9c242113bfb4b9aa36e2b80a05ffa163a30691c7b5a29eba82e937895d5e"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6327eb8e419f7d9c38f333cde41b9ae348bec26d840927332f17e887a8dcb70d"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e24900aa13212e75e5b366cb9065e78bbf3893d4baab6052d1aca10d46d944c"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3843f84a6c465a36559161e6c59dce2f2ac10943040c2fd021cfb70d58c4ad56"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:84610c1502b2461255b4c9b7d5e9c48052601a8957cd0aea6ec7a7a1e1fb9420"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:c21b9aa40e08e4f63a2f92ff3748e6b6c84d717d033c7b3438dd3123ee18f70e"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:efce6ae830831ab6a22b9b4091d411698145cb9b8fc869e1397ccf4b4b6455cb"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:40de71985e9042ca00b7953c4f41eabc3dc514a2d1ff534027f091bc74416401"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-win32.whl", hash = "sha256:180c00c66bde6146a860cbb81b54ee0df350d2daf13ca85b275123bbf85de18a"},
-    {file = "frozenlist-1.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:9bbbcedd75acdfecf2159663b87f1bb5cfc80e7cd99f7ddd9d66eb98b14a8411"},
-    {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:034a5c08d36649591be1cbb10e09da9f531034acfe29275fc5454a3b101ce41a"},
-    {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ba64dc2b3b7b158c6660d49cdb1d872d1d0bf4e42043ad8d5006099479a194e5"},
-    {file = "frozenlist-1.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:47df36a9fe24054b950bbc2db630d508cca3aa27ed0566c0baf661225e52c18e"},
-    {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:008a054b75d77c995ea26629ab3a0c0d7281341f2fa7e1e85fa6153ae29ae99c"},
-    {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:841ea19b43d438a80b4de62ac6ab21cfe6827bb8a9dc62b896acc88eaf9cecba"},
-    {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e235688f42b36be2b6b06fc37ac2126a73b75fb8d6bc66dd632aa35286238703"},
-    {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca713d4af15bae6e5d79b15c10c8522859a9a89d3b361a50b817c98c2fb402a2"},
-    {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ac5995f2b408017b0be26d4a1d7c61bce106ff3d9e3324374d66b5964325448"},
-    {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a4ae8135b11652b08a8baf07631d3ebfe65a4c87909dbef5fa0cdde440444ee4"},
-    {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4ea42116ceb6bb16dbb7d526e242cb6747b08b7710d9782aa3d6732bd8d27649"},
-    {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:810860bb4bdce7557bc0febb84bbd88198b9dbc2022d8eebe5b3590b2ad6c842"},
-    {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ee78feb9d293c323b59a6f2dd441b63339a30edf35abcb51187d2fc26e696d13"},
-    {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0af2e7c87d35b38732e810befb9d797a99279cbb85374d42ea61c1e9d23094b3"},
-    {file = "frozenlist-1.3.3-cp38-cp38-win32.whl", hash = "sha256:899c5e1928eec13fd6f6d8dc51be23f0d09c5281e40d9cf4273d188d9feeaf9b"},
-    {file = "frozenlist-1.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:7f44e24fa70f6fbc74aeec3e971f60a14dde85da364aa87f15d1be94ae75aeef"},
-    {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2b07ae0c1edaa0a36339ec6cce700f51b14a3fc6545fdd32930d2c83917332cf"},
-    {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ebb86518203e12e96af765ee89034a1dbb0c3c65052d1b0c19bbbd6af8a145e1"},
-    {file = "frozenlist-1.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5cf820485f1b4c91e0417ea0afd41ce5cf5965011b3c22c400f6d144296ccbc0"},
-    {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c11e43016b9024240212d2a65043b70ed8dfd3b52678a1271972702d990ac6d"},
-    {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8fa3c6e3305aa1146b59a09b32b2e04074945ffcfb2f0931836d103a2c38f936"},
-    {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:352bd4c8c72d508778cf05ab491f6ef36149f4d0cb3c56b1b4302852255d05d5"},
-    {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65a5e4d3aa679610ac6e3569e865425b23b372277f89b5ef06cf2cdaf1ebf22b"},
-    {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e2c1185858d7e10ff045c496bbf90ae752c28b365fef2c09cf0fa309291669"},
-    {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f163d2fd041c630fed01bc48d28c3ed4a3b003c00acd396900e11ee5316b56bb"},
-    {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:05cdb16d09a0832eedf770cb7bd1fe57d8cf4eaf5aced29c4e41e3f20b30a784"},
-    {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:8bae29d60768bfa8fb92244b74502b18fae55a80eac13c88eb0b496d4268fd2d"},
-    {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eedab4c310c0299961ac285591acd53dc6723a1ebd90a57207c71f6e0c2153ab"},
-    {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3bbdf44855ed8f0fbcd102ef05ec3012d6a4fd7c7562403f76ce6a52aeffb2b1"},
-    {file = "frozenlist-1.3.3-cp39-cp39-win32.whl", hash = "sha256:efa568b885bca461f7c7b9e032655c0c143d305bf01c30caf6db2854a4532b38"},
-    {file = "frozenlist-1.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:cfe33efc9cb900a4c46f91a5ceba26d6df370ffddd9ca386eb1d4f0ad97b9ea9"},
-    {file = "frozenlist-1.3.3.tar.gz", hash = "sha256:58bcc55721e8a90b88332d6cd441261ebb22342e238296bb330968952fbb3a6a"},
-]
-
 [[package]]
 name = "ghp-import"
 version = "2.1.0"
 description = "Copy your docs directly to the gh-pages branch."
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -1155,7 +910,6 @@ dev = ["flake8", "markdown", "twine", "wheel"]
 name = "gitdb"
 version = "4.0.10"
 description = "Git Object Database"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1170,7 +924,6 @@ smmap = ">=3.0.1,<6"
 name = "gitpython"
 version = "3.1.31"
 description = "GitPython is a Python library used to interact with Git repositories"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1180,13 +933,11 @@ files = [
 
 [package.dependencies]
 gitdb = ">=4.0.1,<5"
-typing-extensions = {version = ">=3.7.4.3", markers = "python_version < \"3.8\""}
 
 [[package]]
 name = "glcontext"
 version = "2.3.7"
 description = "Portable OpenGL Context"
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -1246,7 +997,6 @@ files = [
 name = "glfw"
 version = "1.12.0"
 description = "A ctypes-based wrapper for GLFW3."
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -1263,7 +1013,6 @@ files = [
 name = "google-auth"
 version = "2.18.0"
 description = "Google Authentication Library"
-category = "main"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*"
 files = [
@@ -1289,7 +1038,6 @@ requests = ["requests (>=2.20.0,<3.0.0dev)"]
 name = "google-auth-oauthlib"
 version = "0.4.6"
 description = "Google Authentication Library"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1308,7 +1056,6 @@ tool = ["click (>=6.0.0)"]
 name = "graphviz"
 version = "0.20.1"
 description = "Simple Python interface for Graphviz"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -1325,7 +1072,6 @@ test = ["coverage", "mock (>=4)", "pytest (>=7)", "pytest-cov", "pytest-mock (>=
 name = "greenlet"
 version = "2.0.2"
 description = "Lightweight in-process concurrent programming"
-category = "main"
 optional = true
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
 files = [
@@ -1403,7 +1149,6 @@ test = ["objgraph", "psutil"]
 name = "grpcio"
 version = "1.54.0"
 description = "HTTP/2-based RPC framework"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1461,7 +1206,6 @@ protobuf = ["grpcio-tools (>=1.54.0)"]
 name = "gym"
 version = "0.23.1"
 description = "Gym: A universal API for reinforcement learning environments"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1489,7 +1233,6 @@ toy-text = ["pygame (==2.1.0)", "scipy (>=1.4.1)"]
 name = "gym-notices"
 version = "0.0.8"
 description = "Notices for gym"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1501,7 +1244,6 @@ files = [
 name = "gym3"
 version = "0.3.3"
 description = "Vectorized Reinforcement Learning Environment Interface"
-category = "main"
 optional = true
 python-versions = ">=3.6.0"
 files = [
@@ -1523,7 +1265,6 @@ test = ["gym (==0.17.2)", "gym-retro (==0.8.0)", "mpi4py (==3.0.3)", "pytest (==
 name = "gymnasium"
 version = "0.28.1"
 description = "A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1541,9 +1282,9 @@ typing-extensions = ">=4.3.0"
 
 [package.extras]
 accept-rom-license = ["autorom[accept-rom-license] (>=0.4.2,<0.5.0)"]
-all = ["box2d-py (==2.3.5)", "imageio (>=2.14.1)", "jax (==0.3.24)", "jaxlib (==0.3.24)", "lz4 (>=3.1.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "mujoco (>=2.3.2)", "mujoco-py (>=2.1,<2.2)", "opencv-python (>=3.0)", "pygame (==2.1.3)", "shimmy[atari] (>=0.1.0,<1.0)", "swig (>=4.0.0,<5.0.0)", "torch (>=1.0.0)"]
+all = ["box2d-py (==2.3.5)", "imageio (>=2.14.1)", "jax (==0.3.24)", "jaxlib (==0.3.24)", "lz4 (>=3.1.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "mujoco (>=2.3.2)", "mujoco-py (>=2.1,<2.2)", "opencv-python (>=3.0)", "pygame (==2.1.3)", "shimmy[atari] (>=0.1.0,<1.0)", "swig (==4.*)", "torch (>=1.0.0)"]
 atari = ["shimmy[atari] (>=0.1.0,<1.0)"]
-box2d = ["box2d-py (==2.3.5)", "pygame (==2.1.3)", "swig (>=4.0.0,<5.0.0)"]
+box2d = ["box2d-py (==2.3.5)", "pygame (==2.1.3)", "swig (==4.*)"]
 classic-control = ["pygame (==2.1.3)", "pygame (==2.1.3)"]
 jax = ["jax (==0.3.24)", "jaxlib (==0.3.24)"]
 mujoco = ["imageio (>=2.14.1)", "mujoco (>=2.3.2)"]
@@ -1556,7 +1297,6 @@ toy-text = ["pygame (==2.1.3)", "pygame (==2.1.3)"]
 name = "h5py"
 version = "3.8.0"
 description = "Read and write HDF5 files from Python"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -1594,7 +1334,6 @@ numpy = ">=1.14.5"
 name = "hbutils"
 version = "0.8.6"
 description = "Some useful functions and classes in Python infrastructure development."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -1617,7 +1356,6 @@ test = ["click (>=7.0.0)", "coverage (>=5)", "easydict (>=1.7,<2)", "faker", "fl
 name = "huggingface-hub"
 version = "0.11.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
-category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
@@ -1627,7 +1365,6 @@ files = [
 
 [package.dependencies]
 filelock = "*"
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 packaging = ">=20.9"
 pyyaml = ">=5.1"
 requests = "*"
@@ -1645,29 +1382,10 @@ testing = ["InquirerPy (==0.3.4)", "Jinja2", "isort (>=5.5.4)", "jedi", "pytest"
 torch = ["torch"]
 typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
 
-[[package]]
-name = "hydra-core"
-version = "1.3.2"
-description = "A framework for elegantly configuring complex applications"
-category = "dev"
-optional = false
-python-versions = "*"
-files = [
-    {file = "hydra-core-1.3.2.tar.gz", hash = "sha256:8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824"},
-    {file = "hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b"},
-]
-
-[package.dependencies]
-antlr4-python3-runtime = ">=4.9.0,<4.10.0"
-importlib-resources = {version = "*", markers = "python_version < \"3.9\""}
-omegaconf = ">=2.2,<2.4"
-packaging = "*"
-
 [[package]]
 name = "identify"
 version = "2.5.24"
 description = "File identification library for Python"
-category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1682,7 +1400,6 @@ license = ["ukkonen"]
 name = "idna"
 version = "3.4"
 description = "Internationalized Domain Names in Applications (IDNA)"
-category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1694,7 +1411,6 @@ files = [
 name = "imageio"
 version = "2.28.1"
 description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1726,7 +1442,6 @@ tifffile = ["tifffile"]
 name = "imageio-ffmpeg"
 version = "0.3.0"
 description = "FFMPEG wrapper for Python"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1741,7 +1456,6 @@ files = [
 name = "importlib-metadata"
 version = "5.2.0"
 description = "Read metadata from Python packages"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1750,7 +1464,6 @@ files = [
 ]
 
 [package.dependencies]
-typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
 zipp = ">=0.5"
 
 [package.extras]
@@ -1762,8 +1475,7 @@ testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packag
 name = "importlib-resources"
 version = "5.12.0"
 description = "Read resources from Python packages"
-category = "main"
-optional = false
+optional = true
 python-versions = ">=3.7"
 files = [
     {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"},
@@ -1781,7 +1493,6 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec
 name = "iniconfig"
 version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -1789,88 +1500,40 @@ files = [
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
 ]
 
-[[package]]
-name = "isaacgym"
-version = "1.0.preview4"
-description = ""
-category = "dev"
-optional = false
-python-versions = ">=3.7.1"
-files = []
-develop = true
-
-[package.dependencies]
-gym = "0.23.1"
-imageio = "^2.19.5"
-ninja = "^1.10.2"
-numpy = ">=1.16.4"
-Pillow = "^9.2.0"
-PyYAML = ">=5.3.1"
-scipy = ">=1.5.0"
-torch = "^1.12.0"
-torchvision = "^0.13.0"
-
-[package.source]
-type = "directory"
-url = "cleanrl/ppo_continuous_action_isaacgym/isaacgym"
-
-[[package]]
-name = "isaacgymenvs"
-version = "0.1.0"
-description = ""
-category = "dev"
-optional = false
-python-versions = ">=3.7.1,<3.10"
-files = []
-develop = false
-
-[package.dependencies]
-gym = "0.23.1"
-hydra-core = "^1.2.0"
-numpy = ">=1.16.4"
-omegaconf = "^2.2.2"
-PyVirtualDisplay = "^3.0"
-rl-games = "1.5.2"
-termcolor = "^1.1.0"
-
-[package.source]
-type = "git"
-url = "https://github.com/vwxyzjn/IsaacGymEnvs.git"
-reference = "poetry"
-resolved_reference = "27cc130a811b2305056c2f03f5f4cc0819b7867c"
-
 [[package]]
 name = "jax"
-version = "0.3.25"
+version = "0.4.8"
 description = "Differentiate, compile, and transform Numpy code."
-category = "main"
 optional = true
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "jax-0.3.25.tar.gz", hash = "sha256:18bea69321cb95ea5ea913adfe5e2c1d453cade9d4cfd0dc814ecba9fc0cb6e3"},
+    {file = "jax-0.4.8.tar.gz", hash = "sha256:08116481f7336db16c24812bfb5e6f9786915f4c2f6ff4028331fa69e7535202"},
 ]
 
 [package.dependencies]
-numpy = ">=1.20"
+ml_dtypes = ">=0.0.3"
+numpy = ">=1.21"
 opt_einsum = "*"
-scipy = ">=1.5"
-typing_extensions = "*"
+scipy = ">=1.7"
 
 [package.extras]
 australis = ["protobuf (>=3.13,<4)"]
-ci = ["jaxlib (==0.3.24)"]
-cpu = ["jaxlib (==0.3.25)"]
-cuda = ["jaxlib (==0.3.25+cuda11.cudnn82)"]
-cuda11-cudnn805 = ["jaxlib (==0.3.25+cuda11.cudnn805)"]
-cuda11-cudnn82 = ["jaxlib (==0.3.25+cuda11.cudnn82)"]
-minimum-jaxlib = ["jaxlib (==0.3.22)"]
-tpu = ["jaxlib (==0.3.25)", "libtpu-nightly (==0.1.dev20221109)", "requests"]
+ci = ["jaxlib (==0.4.7)"]
+cpu = ["jaxlib (==0.4.7)"]
+cuda = ["jaxlib (==0.4.7+cuda11.cudnn86)"]
+cuda11-cudnn82 = ["jaxlib (==0.4.7+cuda11.cudnn82)"]
+cuda11-cudnn86 = ["jaxlib (==0.4.7+cuda11.cudnn86)"]
+cuda11-local = ["jaxlib (==0.4.7+cuda11.cudnn86)"]
+cuda11-pip = ["jaxlib (==0.4.7+cuda11.cudnn86)", "nvidia-cublas-cu11 (>=11.11)", "nvidia-cuda-nvcc-cu11 (>=11.8)", "nvidia-cuda-runtime-cu11 (>=11.8)", "nvidia-cudnn-cu11 (>=8.6)", "nvidia-cufft-cu11 (>=10.9)", "nvidia-cusolver-cu11 (>=11.4)", "nvidia-cusparse-cu11 (>=11.7)"]
+cuda12-local = ["jaxlib (==0.4.7+cuda12.cudnn88)"]
+cuda12-pip = ["jaxlib (==0.4.7+cuda12.cudnn88)", "nvidia-cublas-cu12", "nvidia-cuda-nvcc-cu12", "nvidia-cuda-runtime-cu12", "nvidia-cudnn-cu12", "nvidia-cufft-cu12", "nvidia-cusolver-cu12", "nvidia-cusparse-cu12"]
+minimum-jaxlib = ["jaxlib (==0.4.7)"]
+tpu = ["jaxlib (==0.4.7)", "libtpu-nightly (==0.1.dev20230327)", "requests"]
 
 [[package]]
 name = "jax-jumpy"
 version = "1.0.0"
 description = "Common backend for Jax or Numpy."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1887,37 +1550,34 @@ testing = ["pytest (==7.1.3)"]
 
 [[package]]
 name = "jaxlib"
-version = "0.3.25"
+version = "0.4.7"
 description = "XLA library for JAX"
-category = "main"
 optional = true
-python-versions = ">=3.7"
-files = [
-    {file = "jaxlib-0.3.25-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:09508f7000c0fa958fba29267338e8de75b31d7ea29bd79719a568c38f0f8d31"},
-    {file = "jaxlib-0.3.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3c75c8efd3702687968820446e3fb9ff997f8a2a07ab92e33b80e2f12eab3d9a"},
-    {file = "jaxlib-0.3.25-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:6e2f4e51041b8371aa3976b5a3a9cdcdccb1bd7b040c9b1345cbf24bd28a8d19"},
-    {file = "jaxlib-0.3.25-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:f2d517635fd77e2729c0ab7863be0d290927d01b2abb2f5dc955c821f8b0d53e"},
-    {file = "jaxlib-0.3.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:13446a8382aa9ed944c16af636ca111d0afbbead91eed5cc2dc71195045e71b3"},
-    {file = "jaxlib-0.3.25-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:71866aeaafbc9a12b59dcbe443353772ef235b40c53f8bd7403d39311822c276"},
-    {file = "jaxlib-0.3.25-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:1e59ba58c9e93c1e1cef243f2609ec0b0c0a81160c20b9555aecdea32ccd6a78"},
-    {file = "jaxlib-0.3.25-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:5295354ed5db111e6f3e88cdfa4010d11c33dd926ac61735b9096b4e0746aa7b"},
-    {file = "jaxlib-0.3.25-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:9f3116389ee834b3cdeb30001b085f4b55d7741366034f041c1d377154aa5afa"},
-    {file = "jaxlib-0.3.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:78b29c72d0680829db9377ed9be326875849258a60b8173b4a388b34ad18bc78"},
-    {file = "jaxlib-0.3.25-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:2e008e0a6c10aa7e949555e98dc0471e0d550d5d7c109771e38a971b49480538"},
-    {file = "jaxlib-0.3.25-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:fec043cdd55f3257d02e9d8880b33860eacadcae1bd5e26f43fdd08ada23614d"},
-    {file = "jaxlib-0.3.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a50193ba0cbf879021c9d73d7bcfa7eafb9138895d057b774c301aac3701f9a5"},
-    {file = "jaxlib-0.3.25-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:1f1448f102a9d05186f579b6931fa0c607783ecc915fdfaa482c19538affa180"},
+python-versions = ">=3.8"
+files = [
+    {file = "jaxlib-0.4.7-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:63c2890978e8646516db3d8a680b43d2bed8b63543a70556391f589a261bd85f"},
+    {file = "jaxlib-0.4.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c16f922507277d5630e81d9c1a4974366a27aad5230d645d063bc2011564d01"},
+    {file = "jaxlib-0.4.7-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:da88382e6487805974cea6facc61ba92b5828a7a1f2dd80f762c487d873a2b47"},
+    {file = "jaxlib-0.4.7-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:022b216036c009989d4c0683538820c19247215bb99fdd35c7bf32838d596be6"},
+    {file = "jaxlib-0.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d0f1d3b6ef6c68013898cca958ab1507d6809b523275037efbdb9aaaaab158ba"},
+    {file = "jaxlib-0.4.7-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:0ae7178c33460822d9d8d03718cba395e02e6bac2402709c35826c94f0c9cc7b"},
+    {file = "jaxlib-0.4.7-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:ea07605e37d2b4e25f3c639e0d22ab4605fbc1a10ea918fd14ce09077bdaffb6"},
+    {file = "jaxlib-0.4.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:48b85d3c8923b1619ddf8cbf14c4e4daf6919796d8aa9d006ce2a085e8202930"},
+    {file = "jaxlib-0.4.7-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:a860f2990c97bee5ffcdbb5111751591e5e7a66d5e32b4f6d9e6aa14ac82bf27"},
+    {file = "jaxlib-0.4.7-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:c78dc2b6fa1c92ead137a23d1bd3e10d04c58b268e77eca811502abac05b2b19"},
+    {file = "jaxlib-0.4.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f1f3726e374d0d6fcc14da540b71b758d37356c6726f0f4b48e2f5530a5f8769"},
+    {file = "jaxlib-0.4.7-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:d4629205dbe342153941db5f69c4a1bfe35fd8d2947aebe34f4dff3771d3fff7"},
 ]
 
 [package.dependencies]
-numpy = ">=1.20"
-scipy = ">=1.5"
+ml-dtypes = ">=0.0.3"
+numpy = ">=1.21"
+scipy = ">=1.7"
 
 [[package]]
 name = "jinja2"
 version = "3.1.2"
 description = "A very fast and expressive template engine."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -1935,7 +1595,6 @@ i18n = ["Babel (>=2.7)"]
 name = "jmespath"
 version = "1.0.1"
 description = "JSON Matching Expressions"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -1947,7 +1606,6 @@ files = [
 name = "joblib"
 version = "1.2.0"
 description = "Lightweight pipelining with Python functions"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -1955,35 +1613,10 @@ files = [
     {file = "joblib-1.2.0.tar.gz", hash = "sha256:e1cee4a79e4af22881164f218d4311f60074197fb707e082e803b61f6d137018"},
 ]
 
-[[package]]
-name = "jsonschema"
-version = "4.17.3"
-description = "An implementation of JSON Schema validation for Python"
-category = "dev"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "jsonschema-4.17.3-py3-none-any.whl", hash = "sha256:a870ad254da1a8ca84b6a2905cac29d265f805acc57af304784962a2aa6508f6"},
-    {file = "jsonschema-4.17.3.tar.gz", hash = "sha256:0f864437ab8b6076ba6707453ef8f98a6a0d512a80e93f8abdb676f737ecb60d"},
-]
-
-[package.dependencies]
-attrs = ">=17.4.0"
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
-importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""}
-pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""}
-pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2"
-typing-extensions = {version = "*", markers = "python_version < \"3.8\""}
-
-[package.extras]
-format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
-format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
-
 [[package]]
 name = "kiwisolver"
 version = "1.4.4"
 description = "A fast implementation of the Cassowary constraint solver"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2057,14 +1690,10 @@ files = [
     {file = "kiwisolver-1.4.4.tar.gz", hash = "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955"},
 ]
 
-[package.dependencies]
-typing-extensions = {version = "*", markers = "python_version < \"3.8\""}
-
 [[package]]
 name = "labmaze"
 version = "1.0.6"
 description = "LabMaze: DeepMind Lab's text maze generator."
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -2078,11 +1707,6 @@ files = [
     {file = "labmaze-1.0.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70635d1cdb0147a02efb6b3f607a52cdc51723bc3dcc42717a0d4ef55fa0a987"},
     {file = "labmaze-1.0.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff472793238bd9b6dabea8094594d6074ad3c111455de3afcae72f6c40c6817e"},
     {file = "labmaze-1.0.6-cp311-cp311-win_amd64.whl", hash = "sha256:2317e65e12fa3d1abecda7e0488dab15456cee8a2e717a586bfc8f02a91579e7"},
-    {file = "labmaze-1.0.6-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e36b6fadcd78f22057b597c1c77823e806a0987b3bdfbf850e14b6b5b502075e"},
-    {file = "labmaze-1.0.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d1a4f8de29c2c3d7f14163759b69cd3f237093b85334c983619c1db5403a223b"},
-    {file = "labmaze-1.0.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a394f8bb857fcaa2884b809d63e750841c2662a106cfe8c045f2112d201ac7d5"},
-    {file = "labmaze-1.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d17abb69d4dfc56183afb5c317e8b2eaca0587abb3aabd2326efd3143c81f4e"},
-    {file = "labmaze-1.0.6-cp312-cp312-win_amd64.whl", hash = "sha256:5af997598cc46b1929d1c5a1febc32fd56c75874fe481a2a5982c65cee8450c9"},
     {file = "labmaze-1.0.6-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:a4c5bc6e56baa55ce63b97569afec2f80cab0f6b952752a131e1f83eed190a53"},
     {file = "labmaze-1.0.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3955f24fe5f708e1e97495b4cfe284b70ae4fd51be5e17b75a6fc04ffbd67bca"},
     {file = "labmaze-1.0.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed96ddc0bb8d66df36428c94db83949fd84a15867e8250763a4c5e3d82104c54"},
@@ -2107,102 +1731,115 @@ setuptools = "!=50.0.0"
 
 [[package]]
 name = "lxml"
-version = "4.9.2"
+version = "4.9.3"
 description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
-category = "main"
 optional = true
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
 files = [
-    {file = "lxml-4.9.2-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:76cf573e5a365e790396a5cc2b909812633409306c6531a6877c59061e42c4f2"},
-    {file = "lxml-4.9.2-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b1f42b6921d0e81b1bcb5e395bc091a70f41c4d4e55ba99c6da2b31626c44892"},
-    {file = "lxml-4.9.2-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9f102706d0ca011de571de32c3247c6476b55bb6bc65a20f682f000b07a4852a"},
-    {file = "lxml-4.9.2-cp27-cp27m-win32.whl", hash = "sha256:8d0b4612b66ff5d62d03bcaa043bb018f74dfea51184e53f067e6fdcba4bd8de"},
-    {file = "lxml-4.9.2-cp27-cp27m-win_amd64.whl", hash = "sha256:4c8f293f14abc8fd3e8e01c5bd86e6ed0b6ef71936ded5bf10fe7a5efefbaca3"},
-    {file = "lxml-4.9.2-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2899456259589aa38bfb018c364d6ae7b53c5c22d8e27d0ec7609c2a1ff78b50"},
-    {file = "lxml-4.9.2-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6749649eecd6a9871cae297bffa4ee76f90b4504a2a2ab528d9ebe912b101975"},
-    {file = "lxml-4.9.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a08cff61517ee26cb56f1e949cca38caabe9ea9fbb4b1e10a805dc39844b7d5c"},
-    {file = "lxml-4.9.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:85cabf64adec449132e55616e7ca3e1000ab449d1d0f9d7f83146ed5bdcb6d8a"},
-    {file = "lxml-4.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8340225bd5e7a701c0fa98284c849c9b9fc9238abf53a0ebd90900f25d39a4e4"},
-    {file = "lxml-4.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:1ab8f1f932e8f82355e75dda5413a57612c6ea448069d4fb2e217e9a4bed13d4"},
-    {file = "lxml-4.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:699a9af7dffaf67deeae27b2112aa06b41c370d5e7633e0ee0aea2e0b6c211f7"},
-    {file = "lxml-4.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9cc34af337a97d470040f99ba4282f6e6bac88407d021688a5d585e44a23184"},
-    {file = "lxml-4.9.2-cp310-cp310-win32.whl", hash = "sha256:d02a5399126a53492415d4906ab0ad0375a5456cc05c3fc0fc4ca11771745cda"},
-    {file = "lxml-4.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:a38486985ca49cfa574a507e7a2215c0c780fd1778bb6290c21193b7211702ab"},
-    {file = "lxml-4.9.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c83203addf554215463b59f6399835201999b5e48019dc17f182ed5ad87205c9"},
-    {file = "lxml-4.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:2a87fa548561d2f4643c99cd13131acb607ddabb70682dcf1dff5f71f781a4bf"},
-    {file = "lxml-4.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:d6b430a9938a5a5d85fc107d852262ddcd48602c120e3dbb02137c83d212b380"},
-    {file = "lxml-4.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3efea981d956a6f7173b4659849f55081867cf897e719f57383698af6f618a92"},
-    {file = "lxml-4.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:df0623dcf9668ad0445e0558a21211d4e9a149ea8f5666917c8eeec515f0a6d1"},
-    {file = "lxml-4.9.2-cp311-cp311-win32.whl", hash = "sha256:da248f93f0418a9e9d94b0080d7ebc407a9a5e6d0b57bb30db9b5cc28de1ad33"},
-    {file = "lxml-4.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:3818b8e2c4b5148567e1b09ce739006acfaa44ce3156f8cbbc11062994b8e8dd"},
-    {file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ca989b91cf3a3ba28930a9fc1e9aeafc2a395448641df1f387a2d394638943b0"},
-    {file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:822068f85e12a6e292803e112ab876bc03ed1f03dddb80154c395f891ca6b31e"},
-    {file = "lxml-4.9.2-cp35-cp35m-win32.whl", hash = "sha256:be7292c55101e22f2a3d4d8913944cbea71eea90792bf914add27454a13905df"},
-    {file = "lxml-4.9.2-cp35-cp35m-win_amd64.whl", hash = "sha256:998c7c41910666d2976928c38ea96a70d1aa43be6fe502f21a651e17483a43c5"},
-    {file = "lxml-4.9.2-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:b26a29f0b7fc6f0897f043ca366142d2b609dc60756ee6e4e90b5f762c6adc53"},
-    {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:ab323679b8b3030000f2be63e22cdeea5b47ee0abd2d6a1dc0c8103ddaa56cd7"},
-    {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:689bb688a1db722485e4610a503e3e9210dcc20c520b45ac8f7533c837be76fe"},
-    {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f49e52d174375a7def9915c9f06ec4e569d235ad428f70751765f48d5926678c"},
-    {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36c3c175d34652a35475a73762b545f4527aec044910a651d2bf50de9c3352b1"},
-    {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a35f8b7fa99f90dd2f5dc5a9fa12332642f087a7641289ca6c40d6e1a2637d8e"},
-    {file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:58bfa3aa19ca4c0f28c5dde0ff56c520fbac6f0daf4fac66ed4c8d2fb7f22e74"},
-    {file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc718cd47b765e790eecb74d044cc8d37d58562f6c314ee9484df26276d36a38"},
-    {file = "lxml-4.9.2-cp36-cp36m-win32.whl", hash = "sha256:d5bf6545cd27aaa8a13033ce56354ed9e25ab0e4ac3b5392b763d8d04b08e0c5"},
-    {file = "lxml-4.9.2-cp36-cp36m-win_amd64.whl", hash = "sha256:3ab9fa9d6dc2a7f29d7affdf3edebf6ece6fb28a6d80b14c3b2fb9d39b9322c3"},
-    {file = "lxml-4.9.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:05ca3f6abf5cf78fe053da9b1166e062ade3fa5d4f92b4ed688127ea7d7b1d03"},
-    {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:a5da296eb617d18e497bcf0a5c528f5d3b18dadb3619fbdadf4ed2356ef8d941"},
-    {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:04876580c050a8c5341d706dd464ff04fd597095cc8c023252566a8826505726"},
-    {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:c9ec3eaf616d67db0764b3bb983962b4f385a1f08304fd30c7283954e6a7869b"},
-    {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2a29ba94d065945944016b6b74e538bdb1751a1db6ffb80c9d3c2e40d6fa9894"},
-    {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a82d05da00a58b8e4c0008edbc8a4b6ec5a4bc1e2ee0fb6ed157cf634ed7fa45"},
-    {file = "lxml-4.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:223f4232855ade399bd409331e6ca70fb5578efef22cf4069a6090acc0f53c0e"},
-    {file = "lxml-4.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d17bc7c2ccf49c478c5bdd447594e82692c74222698cfc9b5daae7ae7e90743b"},
-    {file = "lxml-4.9.2-cp37-cp37m-win32.whl", hash = "sha256:b64d891da92e232c36976c80ed7ebb383e3f148489796d8d31a5b6a677825efe"},
-    {file = "lxml-4.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:a0a336d6d3e8b234a3aae3c674873d8f0e720b76bc1d9416866c41cd9500ffb9"},
-    {file = "lxml-4.9.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:da4dd7c9c50c059aba52b3524f84d7de956f7fef88f0bafcf4ad7dde94a064e8"},
-    {file = "lxml-4.9.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:821b7f59b99551c69c85a6039c65b75f5683bdc63270fec660f75da67469ca24"},
-    {file = "lxml-4.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:e5168986b90a8d1f2f9dc1b841467c74221bd752537b99761a93d2d981e04889"},
-    {file = "lxml-4.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:8e20cb5a47247e383cf4ff523205060991021233ebd6f924bca927fcf25cf86f"},
-    {file = "lxml-4.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:13598ecfbd2e86ea7ae45ec28a2a54fb87ee9b9fdb0f6d343297d8e548392c03"},
-    {file = "lxml-4.9.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:880bbbcbe2fca64e2f4d8e04db47bcdf504936fa2b33933efd945e1b429bea8c"},
-    {file = "lxml-4.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7d2278d59425777cfcb19735018d897ca8303abe67cc735f9f97177ceff8027f"},
-    {file = "lxml-4.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5344a43228767f53a9df6e5b253f8cdca7dfc7b7aeae52551958192f56d98457"},
-    {file = "lxml-4.9.2-cp38-cp38-win32.whl", hash = "sha256:925073b2fe14ab9b87e73f9a5fde6ce6392da430f3004d8b72cc86f746f5163b"},
-    {file = "lxml-4.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:9b22c5c66f67ae00c0199f6055705bc3eb3fcb08d03d2ec4059a2b1b25ed48d7"},
-    {file = "lxml-4.9.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:5f50a1c177e2fa3ee0667a5ab79fdc6b23086bc8b589d90b93b4bd17eb0e64d1"},
-    {file = "lxml-4.9.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:090c6543d3696cbe15b4ac6e175e576bcc3f1ccfbba970061b7300b0c15a2140"},
-    {file = "lxml-4.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:63da2ccc0857c311d764e7d3d90f429c252e83b52d1f8f1d1fe55be26827d1f4"},
-    {file = "lxml-4.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:5b4545b8a40478183ac06c073e81a5ce4cf01bf1734962577cf2bb569a5b3bbf"},
-    {file = "lxml-4.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2e430cd2824f05f2d4f687701144556646bae8f249fd60aa1e4c768ba7018947"},
-    {file = "lxml-4.9.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6804daeb7ef69e7b36f76caddb85cccd63d0c56dedb47555d2fc969e2af6a1a5"},
-    {file = "lxml-4.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a6e441a86553c310258aca15d1c05903aaf4965b23f3bc2d55f200804e005ee5"},
-    {file = "lxml-4.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ca34efc80a29351897e18888c71c6aca4a359247c87e0b1c7ada14f0ab0c0fb2"},
-    {file = "lxml-4.9.2-cp39-cp39-win32.whl", hash = "sha256:6b418afe5df18233fc6b6093deb82a32895b6bb0b1155c2cdb05203f583053f1"},
-    {file = "lxml-4.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:f1496ea22ca2c830cbcbd473de8f114a320da308438ae65abad6bab7867fe38f"},
-    {file = "lxml-4.9.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b264171e3143d842ded311b7dccd46ff9ef34247129ff5bf5066123c55c2431c"},
-    {file = "lxml-4.9.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0dc313ef231edf866912e9d8f5a042ddab56c752619e92dfd3a2c277e6a7299a"},
-    {file = "lxml-4.9.2-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:16efd54337136e8cd72fb9485c368d91d77a47ee2d42b057564aae201257d419"},
-    {file = "lxml-4.9.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0f2b1e0d79180f344ff9f321327b005ca043a50ece8713de61d1cb383fb8ac05"},
-    {file = "lxml-4.9.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:7b770ed79542ed52c519119473898198761d78beb24b107acf3ad65deae61f1f"},
-    {file = "lxml-4.9.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efa29c2fe6b4fdd32e8ef81c1528506895eca86e1d8c4657fda04c9b3786ddf9"},
-    {file = "lxml-4.9.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7e91ee82f4199af8c43d8158024cbdff3d931df350252288f0d4ce656df7f3b5"},
-    {file = "lxml-4.9.2-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b23e19989c355ca854276178a0463951a653309fb8e57ce674497f2d9f208746"},
-    {file = "lxml-4.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:01d36c05f4afb8f7c20fd9ed5badca32a2029b93b1750f571ccc0b142531caf7"},
-    {file = "lxml-4.9.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7b515674acfdcadb0eb5d00d8a709868173acece5cb0be3dd165950cbfdf5409"},
-    {file = "lxml-4.9.2.tar.gz", hash = "sha256:2455cfaeb7ac70338b3257f41e21f0724f4b5b0c0e7702da67ee6c3640835b67"},
+    {file = "lxml-4.9.3-cp27-cp27m-macosx_11_0_x86_64.whl", hash = "sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c"},
+    {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d"},
+    {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1e224d5755dba2f4a9498e150c43792392ac9b5380aa1b845f98a1618c94eeef"},
+    {file = "lxml-4.9.3-cp27-cp27m-win32.whl", hash = "sha256:2c74524e179f2ad6d2a4f7caf70e2d96639c0954c943ad601a9e146c76408ed7"},
+    {file = "lxml-4.9.3-cp27-cp27m-win_amd64.whl", hash = "sha256:4f1026bc732b6a7f96369f7bfe1a4f2290fb34dce00d8644bc3036fb351a4ca1"},
+    {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0781a98ff5e6586926293e59480b64ddd46282953203c76ae15dbbbf302e8bb"},
+    {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cef2502e7e8a96fe5ad686d60b49e1ab03e438bd9123987994528febd569868e"},
+    {file = "lxml-4.9.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b86164d2cff4d3aaa1f04a14685cbc072efd0b4f99ca5708b2ad1b9b5988a991"},
+    {file = "lxml-4.9.3-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:42871176e7896d5d45138f6d28751053c711ed4d48d8e30b498da155af39aebd"},
+    {file = "lxml-4.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae8b9c6deb1e634ba4f1930eb67ef6e6bf6a44b6eb5ad605642b2d6d5ed9ce3c"},
+    {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:411007c0d88188d9f621b11d252cce90c4a2d1a49db6c068e3c16422f306eab8"},
+    {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:cd47b4a0d41d2afa3e58e5bf1f62069255aa2fd6ff5ee41604418ca925911d76"},
+    {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e2cb47860da1f7e9a5256254b74ae331687b9672dfa780eed355c4c9c3dbd23"},
+    {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1247694b26342a7bf47c02e513d32225ededd18045264d40758abeb3c838a51f"},
+    {file = "lxml-4.9.3-cp310-cp310-win32.whl", hash = "sha256:cdb650fc86227eba20de1a29d4b2c1bfe139dc75a0669270033cb2ea3d391b85"},
+    {file = "lxml-4.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:97047f0d25cd4bcae81f9ec9dc290ca3e15927c192df17331b53bebe0e3ff96d"},
+    {file = "lxml-4.9.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:1f447ea5429b54f9582d4b955f5f1985f278ce5cf169f72eea8afd9502973dd5"},
+    {file = "lxml-4.9.3-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:57d6ba0ca2b0c462f339640d22882acc711de224d769edf29962b09f77129cbf"},
+    {file = "lxml-4.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:9767e79108424fb6c3edf8f81e6730666a50feb01a328f4a016464a5893f835a"},
+    {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:71c52db65e4b56b8ddc5bb89fb2e66c558ed9d1a74a45ceb7dcb20c191c3df2f"},
+    {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d73d8ecf8ecf10a3bd007f2192725a34bd62898e8da27eb9d32a58084f93962b"},
+    {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0a3d3487f07c1d7f150894c238299934a2a074ef590b583103a45002035be120"},
+    {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e28c51fa0ce5674be9f560c6761c1b441631901993f76700b1b30ca6c8378d6"},
+    {file = "lxml-4.9.3-cp311-cp311-win32.whl", hash = "sha256:0bfd0767c5c1de2551a120673b72e5d4b628737cb05414f03c3277bf9bed3305"},
+    {file = "lxml-4.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:25f32acefac14ef7bd53e4218fe93b804ef6f6b92ffdb4322bb6d49d94cad2bc"},
+    {file = "lxml-4.9.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:d3ff32724f98fbbbfa9f49d82852b159e9784d6094983d9a8b7f2ddaebb063d4"},
+    {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48d6ed886b343d11493129e019da91d4039826794a3e3027321c56d9e71505be"},
+    {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9a92d3faef50658dd2c5470af249985782bf754c4e18e15afb67d3ab06233f13"},
+    {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b4e4bc18382088514ebde9328da057775055940a1f2e18f6ad2d78aa0f3ec5b9"},
+    {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc9b106a1bf918db68619fdcd6d5ad4f972fdd19c01d19bdb6bf63f3589a9ec5"},
+    {file = "lxml-4.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:d37017287a7adb6ab77e1c5bee9bcf9660f90ff445042b790402a654d2ad81d8"},
+    {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56dc1f1ebccc656d1b3ed288f11e27172a01503fc016bcabdcbc0978b19352b7"},
+    {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:578695735c5a3f51569810dfebd05dd6f888147a34f0f98d4bb27e92b76e05c2"},
+    {file = "lxml-4.9.3-cp35-cp35m-win32.whl", hash = "sha256:704f61ba8c1283c71b16135caf697557f5ecf3e74d9e453233e4771d68a1f42d"},
+    {file = "lxml-4.9.3-cp35-cp35m-win_amd64.whl", hash = "sha256:c41bfca0bd3532d53d16fd34d20806d5c2b1ace22a2f2e4c0008570bf2c58833"},
+    {file = "lxml-4.9.3-cp36-cp36m-macosx_11_0_x86_64.whl", hash = "sha256:64f479d719dc9f4c813ad9bb6b28f8390360660b73b2e4beb4cb0ae7104f1c12"},
+    {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:dd708cf4ee4408cf46a48b108fb9427bfa00b9b85812a9262b5c668af2533ea5"},
+    {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c31c7462abdf8f2ac0577d9f05279727e698f97ecbb02f17939ea99ae8daa98"},
+    {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e3cd95e10c2610c360154afdc2f1480aea394f4a4f1ea0a5eacce49640c9b190"},
+    {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:4930be26af26ac545c3dffb662521d4e6268352866956672231887d18f0eaab2"},
+    {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4aec80cde9197340bc353d2768e2a75f5f60bacda2bab72ab1dc499589b3878c"},
+    {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:14e019fd83b831b2e61baed40cab76222139926b1fb5ed0e79225bc0cae14584"},
+    {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0c0850c8b02c298d3c7006b23e98249515ac57430e16a166873fc47a5d549287"},
+    {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:aca086dc5f9ef98c512bac8efea4483eb84abbf926eaeedf7b91479feb092458"},
+    {file = "lxml-4.9.3-cp36-cp36m-win32.whl", hash = "sha256:50baa9c1c47efcaef189f31e3d00d697c6d4afda5c3cde0302d063492ff9b477"},
+    {file = "lxml-4.9.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bef4e656f7d98aaa3486d2627e7d2df1157d7e88e7efd43a65aa5dd4714916cf"},
+    {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:46f409a2d60f634fe550f7133ed30ad5321ae2e6630f13657fb9479506b00601"},
+    {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:4c28a9144688aef80d6ea666c809b4b0e50010a2aca784c97f5e6bf143d9f129"},
+    {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:141f1d1a9b663c679dc524af3ea1773e618907e96075262726c7612c02b149a4"},
+    {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:53ace1c1fd5a74ef662f844a0413446c0629d151055340e9893da958a374f70d"},
+    {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17a753023436a18e27dd7769e798ce302963c236bc4114ceee5b25c18c52c693"},
+    {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7d298a1bd60c067ea75d9f684f5f3992c9d6766fadbc0bcedd39750bf344c2f4"},
+    {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:081d32421db5df44c41b7f08a334a090a545c54ba977e47fd7cc2deece78809a"},
+    {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:23eed6d7b1a3336ad92d8e39d4bfe09073c31bfe502f20ca5116b2a334f8ec02"},
+    {file = "lxml-4.9.3-cp37-cp37m-win32.whl", hash = "sha256:1509dd12b773c02acd154582088820893109f6ca27ef7291b003d0e81666109f"},
+    {file = "lxml-4.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:120fa9349a24c7043854c53cae8cec227e1f79195a7493e09e0c12e29f918e52"},
+    {file = "lxml-4.9.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4d2d1edbca80b510443f51afd8496be95529db04a509bc8faee49c7b0fb6d2cc"},
+    {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d7e43bd40f65f7d97ad8ef5c9b1778943d02f04febef12def25f7583d19baac"},
+    {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:71d66ee82e7417828af6ecd7db817913cb0cf9d4e61aa0ac1fde0583d84358db"},
+    {file = "lxml-4.9.3-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:6fc3c450eaa0b56f815c7b62f2b7fba7266c4779adcf1cece9e6deb1de7305ce"},
+    {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65299ea57d82fb91c7f019300d24050c4ddeb7c5a190e076b5f48a2b43d19c42"},
+    {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:eadfbbbfb41b44034a4c757fd5d70baccd43296fb894dba0295606a7cf3124aa"},
+    {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3e9bdd30efde2b9ccfa9cb5768ba04fe71b018a25ea093379c857c9dad262c40"},
+    {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fcdd00edfd0a3001e0181eab3e63bd5c74ad3e67152c84f93f13769a40e073a7"},
+    {file = "lxml-4.9.3-cp38-cp38-win32.whl", hash = "sha256:57aba1bbdf450b726d58b2aea5fe47c7875f5afb2c4a23784ed78f19a0462574"},
+    {file = "lxml-4.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:92af161ecbdb2883c4593d5ed4815ea71b31fafd7fd05789b23100d081ecac96"},
+    {file = "lxml-4.9.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:9bb6ad405121241e99a86efff22d3ef469024ce22875a7ae045896ad23ba2340"},
+    {file = "lxml-4.9.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8ed74706b26ad100433da4b9d807eae371efaa266ffc3e9191ea436087a9d6a7"},
+    {file = "lxml-4.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fbf521479bcac1e25a663df882c46a641a9bff6b56dc8b0fafaebd2f66fb231b"},
+    {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:303bf1edce6ced16bf67a18a1cf8339d0db79577eec5d9a6d4a80f0fb10aa2da"},
+    {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:5515edd2a6d1a5a70bfcdee23b42ec33425e405c5b351478ab7dc9347228f96e"},
+    {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:690dafd0b187ed38583a648076865d8c229661ed20e48f2335d68e2cf7dc829d"},
+    {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b6420a005548ad52154c8ceab4a1290ff78d757f9e5cbc68f8c77089acd3c432"},
+    {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bb3bb49c7a6ad9d981d734ef7c7193bc349ac338776a0360cc671eaee89bcf69"},
+    {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d27be7405547d1f958b60837dc4c1007da90b8b23f54ba1f8b728c78fdb19d50"},
+    {file = "lxml-4.9.3-cp39-cp39-win32.whl", hash = "sha256:8df133a2ea5e74eef5e8fc6f19b9e085f758768a16e9877a60aec455ed2609b2"},
+    {file = "lxml-4.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:4dd9a263e845a72eacb60d12401e37c616438ea2e5442885f65082c276dfb2b2"},
+    {file = "lxml-4.9.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6689a3d7fd13dc687e9102a27e98ef33730ac4fe37795d5036d18b4d527abd35"},
+    {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f6bdac493b949141b733c5345b6ba8f87a226029cbabc7e9e121a413e49441e0"},
+    {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:05186a0f1346ae12553d66df1cfce6f251589fea3ad3da4f3ef4e34b2d58c6a3"},
+    {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c2006f5c8d28dee289f7020f721354362fa304acbaaf9745751ac4006650254b"},
+    {file = "lxml-4.9.3-pp38-pypy38_pp73-macosx_11_0_x86_64.whl", hash = "sha256:5c245b783db29c4e4fbbbfc9c5a78be496c9fea25517f90606aa1f6b2b3d5f7b"},
+    {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4fb960a632a49f2f089d522f70496640fdf1218f1243889da3822e0a9f5f3ba7"},
+    {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:50670615eaf97227d5dc60de2dc99fb134a7130d310d783314e7724bf163f75d"},
+    {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9719fe17307a9e814580af1f5c6e05ca593b12fb7e44fe62450a5384dbf61b4b"},
+    {file = "lxml-4.9.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:3331bece23c9ee066e0fb3f96c61322b9e0f54d775fccefff4c38ca488de283a"},
+    {file = "lxml-4.9.3-pp39-pypy39_pp73-macosx_11_0_x86_64.whl", hash = "sha256:ed667f49b11360951e201453fc3967344d0d0263aa415e1619e85ae7fd17b4e0"},
+    {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8b77946fd508cbf0fccd8e400a7f71d4ac0e1595812e66025bac475a8e811694"},
+    {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e4da8ca0c0c0aea88fd46be8e44bd49716772358d648cce45fe387f7b92374a7"},
+    {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fe4bda6bd4340caa6e5cf95e73f8fea5c4bfc55763dd42f1b50a94c1b4a2fbd4"},
+    {file = "lxml-4.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f3df3db1d336b9356dd3112eae5f5c2b8b377f3bc826848567f10bfddfee77e9"},
+    {file = "lxml-4.9.3.tar.gz", hash = "sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c"},
 ]
 
 [package.extras]
 cssselect = ["cssselect (>=0.7)"]
 html5 = ["html5lib"]
 htmlsoup = ["BeautifulSoup4"]
-source = ["Cython (>=0.29.7)"]
+source = ["Cython (>=0.29.35)"]
 
 [[package]]
 name = "mako"
 version = "1.2.4"
 description = "A super-fast templating language that borrows the best ideas from the existing templating languages."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -2211,7 +1848,6 @@ files = [
 ]
 
 [package.dependencies]
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 MarkupSafe = ">=0.9.2"
 
 [package.extras]
@@ -2223,7 +1859,6 @@ testing = ["pytest"]
 name = "markdown"
 version = "3.3.7"
 description = "Python implementation of Markdown."
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2241,7 +1876,6 @@ testing = ["coverage", "pyyaml"]
 name = "markdown-include"
 version = "0.7.2"
 description = "A Python-Markdown extension which provides an 'include' function"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -2256,7 +1890,6 @@ markdown = ">=3.0"
 name = "markupsafe"
 version = "2.1.2"
 description = "Safely add untrusted strings to HTML/XML markup."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2316,7 +1949,6 @@ files = [
 name = "matplotlib"
 version = "3.5.3"
 description = "Python plotting package"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2371,7 +2003,6 @@ python-dateutil = ">=2.7"
 name = "mergedeep"
 version = "1.3.4"
 description = "A deep merge function for 🐍."
-category = "main"
 optional = true
 python-versions = ">=3.6"
 files = [
@@ -2383,7 +2014,6 @@ files = [
 name = "mkdocs"
 version = "1.4.3"
 description = "Project documentation with Markdown."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -2402,7 +2032,6 @@ mergedeep = ">=1.3.4"
 packaging = ">=20.5"
 pyyaml = ">=5.1"
 pyyaml-env-tag = ">=0.1"
-typing-extensions = {version = ">=3.10", markers = "python_version < \"3.8\""}
 watchdog = ">=2.0"
 
 [package.extras]
@@ -2413,7 +2042,6 @@ min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-imp
 name = "mkdocs-material"
 version = "8.5.11"
 description = "Documentation that simply works"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -2434,7 +2062,6 @@ requests = ">=2.26"
 name = "mkdocs-material-extensions"
 version = "1.1.1"
 description = "Extension pack for Python Markdown and MkDocs Material."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -2442,11 +2069,45 @@ files = [
     {file = "mkdocs_material_extensions-1.1.1.tar.gz", hash = "sha256:9c003da71e2cc2493d910237448c672e00cefc800d3d6ae93d2fc69979e3bd93"},
 ]
 
+[[package]]
+name = "ml-dtypes"
+version = "0.2.0"
+description = ""
+optional = true
+python-versions = ">=3.7"
+files = [
+    {file = "ml_dtypes-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:df6a76e1c8adf484feb138ed323f9f40a7b6c21788f120f7c78bec20ac37ee81"},
+    {file = "ml_dtypes-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc29a0524ef5e23a7fbb8d881bdecabeb3fc1d19d9db61785d077a86cb94fab2"},
+    {file = "ml_dtypes-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08c391c2794f2aad358e6f4c70785a9a7b1df980ef4c232b3ccd4f6fe39f719"},
+    {file = "ml_dtypes-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:75015818a7fccf99a5e8ed18720cb430f3e71a8838388840f4cdf225c036c983"},
+    {file = "ml_dtypes-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e70047ec2c83eaee01afdfdabee2c5b0c133804d90d0f7db4dd903360fcc537c"},
+    {file = "ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36d28b8861a8931695e5a31176cad5ae85f6504906650dea5598fbec06c94606"},
+    {file = "ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e85ba8e24cf48d456e564688e981cf379d4c8e644db0a2f719b78de281bac2ca"},
+    {file = "ml_dtypes-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:832a019a1b6db5c4422032ca9940a990fa104eee420f643713241b3a518977fa"},
+    {file = "ml_dtypes-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8faaf0897942c8253dd126662776ba45f0a5861968cf0f06d6d465f8a7bc298a"},
+    {file = "ml_dtypes-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35b984cddbe8173b545a0e3334fe56ea1a5c3eb67c507f60d0cfde1d3fa8f8c2"},
+    {file = "ml_dtypes-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:022d5a4ee6be14569c2a9d1549e16f1ec87ca949681d0dca59995445d5fcdd5b"},
+    {file = "ml_dtypes-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:50845af3e9a601810751b55091dee6c2562403fa1cb4e0123675cf3a4fc2c17a"},
+    {file = "ml_dtypes-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f00c71c8c63e03aff313bc6a7aeaac9a4f1483a921a6ffefa6d4404efd1af3d0"},
+    {file = "ml_dtypes-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80d304c836d73f10605c58ccf7789c171cc229bfb678748adfb7cea2510dfd0e"},
+    {file = "ml_dtypes-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32107e7fa9f62db9a5281de923861325211dfff87bd23faefb27b303314635ab"},
+    {file = "ml_dtypes-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:1749b60348da71fd3c2ab303fdbc1965958dc50775ead41f5669c932a341cafd"},
+    {file = "ml_dtypes-0.2.0.tar.gz", hash = "sha256:6488eb642acaaf08d8020f6de0a38acee7ac324c1e6e92ee0c0fea42422cb797"},
+]
+
+[package.dependencies]
+numpy = [
+    {version = ">1.20", markers = "python_version <= \"3.9\""},
+    {version = ">=1.21.2", markers = "python_version > \"3.9\""},
+]
+
+[package.extras]
+dev = ["absl-py", "pyink", "pylint (>=2.6.0)", "pytest", "pytest-xdist"]
+
 [[package]]
 name = "moderngl"
 version = "5.8.2"
 description = "ModernGL: High performance rendering for Python 3"
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -2505,23 +2166,10 @@ files = [
 [package.dependencies]
 glcontext = ">=2.3.6,<3"
 
-[[package]]
-name = "monotonic"
-version = "1.6"
-description = "An implementation of time.monotonic() for Python 2 & < 3.3"
-category = "main"
-optional = true
-python-versions = "*"
-files = [
-    {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"},
-    {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"},
-]
-
 [[package]]
 name = "moviepy"
 version = "1.0.3"
 description = "Video editing with Python"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2532,10 +2180,7 @@ files = [
 decorator = ">=4.0.2,<5.0"
 imageio = {version = ">=2.5,<3.0", markers = "python_version >= \"3.4\""}
 imageio_ffmpeg = {version = ">=0.2.0", markers = "python_version >= \"3.4\""}
-numpy = [
-    {version = ">=1.17.3", markers = "python_version != \"2.7\""},
-    {version = "*", markers = "python_version >= \"2.7\""},
-]
+numpy = {version = ">=1.17.3", markers = "python_version > \"2.7\""}
 proglog = "<=1.0.0"
 requests = ">=2.8.1,<3.0"
 tqdm = ">=4.11.2,<5.0"
@@ -2549,8 +2194,7 @@ test = ["coverage (<5.0)", "coveralls (>=1.1,<2.0)", "pytest (>=3.0.0,<4.0)", "p
 name = "msgpack"
 version = "1.0.5"
 description = "MessagePack serializer"
-category = "main"
-optional = false
+optional = true
 python-versions = "*"
 files = [
     {file = "msgpack-1.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9"},
@@ -2622,7 +2266,6 @@ files = [
 name = "mujoco"
 version = "2.3.3"
 description = "MuJoCo Physics Simulator"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -2663,7 +2306,6 @@ pyopengl = "*"
 name = "multi-agent-ale-py"
 version = "0.1.11"
 description = "Multi-Agent Arcade Learning Environment Python Interface"
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -2686,7 +2328,6 @@ numpy = "*"
 name = "multiprocess"
 version = "0.70.14"
 description = "better multiprocessing and multithreading in python"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -2709,41 +2350,10 @@ files = [
 [package.dependencies]
 dill = ">=0.3.6"
 
-[[package]]
-name = "ninja"
-version = "1.11.1"
-description = "Ninja is a small build system with a focus on speed"
-category = "dev"
-optional = false
-python-versions = "*"
-files = [
-    {file = "ninja-1.11.1-py2.py3-none-macosx_10_9_universal2.macosx_10_9_x86_64.macosx_11_0_arm64.macosx_11_0_universal2.whl", hash = "sha256:f48c3c6eea204062f6bbf089dfc63e1ad41a08640e1da46ef2b30fa426f7ce23"},
-    {file = "ninja-1.11.1-py2.py3-none-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:edec1053e141253076b2df7ec03a246ff581e9270aa1ca9759397b21e2760e57"},
-    {file = "ninja-1.11.1-py2.py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:817e2aee2a4d28a708a67bcfba1817ae502c32c6d8ef80e50d63b0f23adf3a08"},
-    {file = "ninja-1.11.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df11b8afea0501883e33faeb1c43d2ef67f466d5f4bd85f9c376e9a93a43a277"},
-    {file = "ninja-1.11.1-py2.py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a564fe755ddfbdbccb07b0b758e3f8460e5f8ba1adaab40a5eaa2f8c01ce68"},
-    {file = "ninja-1.11.1-py2.py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c474326e11fba3f8c2582715d79216292e327d3335367c0e87e9647a002cc4a"},
-    {file = "ninja-1.11.1-py2.py3-none-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f6465a7efe6473a2a34edab83633594de19d59406a727316e1367ebcc528908"},
-    {file = "ninja-1.11.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:642cb64d859276998f14972724850e0c5b7febbc1bce3d2065b7e0cb7d3a0b79"},
-    {file = "ninja-1.11.1-py2.py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:60179bb4f22c88279c53a5402bb5fe81c97c627a28d93c737d1fa067d892115d"},
-    {file = "ninja-1.11.1-py2.py3-none-musllinux_1_1_i686.whl", hash = "sha256:34753459493543782d87267e4cad63dd4639b07f8394ffe6d4417e9eda05c8a8"},
-    {file = "ninja-1.11.1-py2.py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:779f228e407c54a8b6e4cbf8f835489998dd250f67bf1b9bd7b8a8ab6bdcdc7b"},
-    {file = "ninja-1.11.1-py2.py3-none-musllinux_1_1_s390x.whl", hash = "sha256:ba50a32424912e5f3ee40d791b506a160dc0eeda7de5ad8faebe7aa8006244dc"},
-    {file = "ninja-1.11.1-py2.py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:3b28b595ed580752240ade7821b6cb7a5a4c6a604c865dc474bd38f06e2eb7f5"},
-    {file = "ninja-1.11.1-py2.py3-none-win32.whl", hash = "sha256:3329b4b7c1694730772522a3ba0ba40fd15c012476ed3e1c9f0fd9e76190394e"},
-    {file = "ninja-1.11.1-py2.py3-none-win_amd64.whl", hash = "sha256:4e547bc759c570773d83d110c41fd5ca9a94c0a9a8388f5a3ea37bdf97d002b0"},
-    {file = "ninja-1.11.1-py2.py3-none-win_arm64.whl", hash = "sha256:8cf96f92ccc851c600cb3e1251c34db06f1dd682de79188ad490c33cddc66981"},
-    {file = "ninja-1.11.1.tar.gz", hash = "sha256:c833a47d39b2d1eee3f9ca886fa1581efd5be6068b82734ac229961ee8748f90"},
-]
-
-[package.extras]
-test = ["codecov (>=2.0.5)", "coverage (>=4.2)", "flake8 (>=3.0.4)", "pytest (>=4.5.0)", "pytest-cov (>=2.7.1)", "pytest-runner (>=5.1)", "pytest-virtualenv (>=1.7.0)", "virtualenv (>=15.0.3)"]
-
 [[package]]
 name = "nodeenv"
 version = "1.7.0"
 description = "Node.js virtual environment builder"
-category = "dev"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
 files = [
@@ -2756,50 +2366,45 @@ setuptools = "*"
 
 [[package]]
 name = "numpy"
-version = "1.21.6"
-description = "NumPy is the fundamental package for array computing with Python."
-category = "main"
+version = "1.24.4"
+description = "Fundamental package for array computing in Python"
 optional = false
-python-versions = ">=3.7,<3.11"
-files = [
-    {file = "numpy-1.21.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8737609c3bbdd48e380d463134a35ffad3b22dc56295eff6f79fd85bd0eeeb25"},
-    {file = "numpy-1.21.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fdffbfb6832cd0b300995a2b08b8f6fa9f6e856d562800fea9182316d99c4e8e"},
-    {file = "numpy-1.21.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3820724272f9913b597ccd13a467cc492a0da6b05df26ea09e78b171a0bb9da6"},
-    {file = "numpy-1.21.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f17e562de9edf691a42ddb1eb4a5541c20dd3f9e65b09ded2beb0799c0cf29bb"},
-    {file = "numpy-1.21.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f30427731561ce75d7048ac254dbe47a2ba576229250fb60f0fb74db96501a1"},
-    {file = "numpy-1.21.6-cp310-cp310-win32.whl", hash = "sha256:d4bf4d43077db55589ffc9009c0ba0a94fa4908b9586d6ccce2e0b164c86303c"},
-    {file = "numpy-1.21.6-cp310-cp310-win_amd64.whl", hash = "sha256:d136337ae3cc69aa5e447e78d8e1514be8c3ec9b54264e680cf0b4bd9011574f"},
-    {file = "numpy-1.21.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6aaf96c7f8cebc220cdfc03f1d5a31952f027dda050e5a703a0d1c396075e3e7"},
-    {file = "numpy-1.21.6-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:67c261d6c0a9981820c3a149d255a76918278a6b03b6a036800359aba1256d46"},
-    {file = "numpy-1.21.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a6be4cb0ef3b8c9250c19cc122267263093eee7edd4e3fa75395dfda8c17a8e2"},
-    {file = "numpy-1.21.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c4068a8c44014b2d55f3c3f574c376b2494ca9cc73d2f1bd692382b6dffe3db"},
-    {file = "numpy-1.21.6-cp37-cp37m-win32.whl", hash = "sha256:7c7e5fa88d9ff656e067876e4736379cc962d185d5cd808014a8a928d529ef4e"},
-    {file = "numpy-1.21.6-cp37-cp37m-win_amd64.whl", hash = "sha256:bcb238c9c96c00d3085b264e5c1a1207672577b93fa666c3b14a45240b14123a"},
-    {file = "numpy-1.21.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:82691fda7c3f77c90e62da69ae60b5ac08e87e775b09813559f8901a88266552"},
-    {file = "numpy-1.21.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:643843bcc1c50526b3a71cd2ee561cf0d8773f062c8cbaf9ffac9fdf573f83ab"},
-    {file = "numpy-1.21.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:357768c2e4451ac241465157a3e929b265dfac85d9214074985b1786244f2ef3"},
-    {file = "numpy-1.21.6-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9f411b2c3f3d76bba0865b35a425157c5dcf54937f82bbeb3d3c180789dd66a6"},
-    {file = "numpy-1.21.6-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4aa48afdce4660b0076a00d80afa54e8a97cd49f457d68a4342d188a09451c1a"},
-    {file = "numpy-1.21.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a96eef20f639e6a97d23e57dd0c1b1069a7b4fd7027482a4c5c451cd7732f4"},
-    {file = "numpy-1.21.6-cp38-cp38-win32.whl", hash = "sha256:5c3c8def4230e1b959671eb959083661b4a0d2e9af93ee339c7dada6759a9470"},
-    {file = "numpy-1.21.6-cp38-cp38-win_amd64.whl", hash = "sha256:bf2ec4b75d0e9356edea834d1de42b31fe11f726a81dfb2c2112bc1eaa508fcf"},
-    {file = "numpy-1.21.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:4391bd07606be175aafd267ef9bea87cf1b8210c787666ce82073b05f202add1"},
-    {file = "numpy-1.21.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:67f21981ba2f9d7ba9ade60c9e8cbaa8cf8e9ae51673934480e45cf55e953673"},
-    {file = "numpy-1.21.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ee5ec40fdd06d62fe5d4084bef4fd50fd4bb6bfd2bf519365f569dc470163ab0"},
-    {file = "numpy-1.21.6-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1dbe1c91269f880e364526649a52eff93ac30035507ae980d2fed33aaee633ac"},
-    {file = "numpy-1.21.6-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d9caa9d5e682102453d96a0ee10c7241b72859b01a941a397fd965f23b3e016b"},
-    {file = "numpy-1.21.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58459d3bad03343ac4b1b42ed14d571b8743dc80ccbf27444f266729df1d6f5b"},
-    {file = "numpy-1.21.6-cp39-cp39-win32.whl", hash = "sha256:7f5ae4f304257569ef3b948810816bc87c9146e8c446053539947eedeaa32786"},
-    {file = "numpy-1.21.6-cp39-cp39-win_amd64.whl", hash = "sha256:e31f0bb5928b793169b87e3d1e070f2342b22d5245c755e2b81caa29756246c3"},
-    {file = "numpy-1.21.6-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dd1c8f6bd65d07d3810b90d02eba7997e32abbdf1277a481d698969e921a3be0"},
-    {file = "numpy-1.21.6.zip", hash = "sha256:ecb55251139706669fdec2ff073c98ef8e9a84473e51e716211b41aa0f18e656"},
+python-versions = ">=3.8"
+files = [
+    {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"},
+    {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"},
+    {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"},
+    {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"},
+    {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"},
+    {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"},
+    {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"},
+    {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"},
+    {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"},
+    {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"},
+    {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"},
+    {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"},
+    {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"},
+    {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"},
+    {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"},
+    {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"},
+    {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"},
+    {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"},
+    {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"},
+    {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"},
+    {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"},
+    {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"},
+    {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"},
+    {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"},
+    {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"},
+    {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"},
+    {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"},
+    {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"},
 ]
 
 [[package]]
 name = "oauthlib"
 version = "3.2.2"
 description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2812,27 +2417,10 @@ rsa = ["cryptography (>=3.0.0)"]
 signals = ["blinker (>=1.4.0)"]
 signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
 
-[[package]]
-name = "omegaconf"
-version = "2.3.0"
-description = "A flexible configuration library"
-category = "dev"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b"},
-    {file = "omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7"},
-]
-
-[package.dependencies]
-antlr4-python3-runtime = ">=4.9.0,<4.10.0"
-PyYAML = ">=5.1.0"
-
 [[package]]
 name = "opencv-python"
 version = "4.7.0.72"
 description = "Wrapper package for OpenCV python bindings."
-category = "main"
 optional = true
 python-versions = ">=3.6"
 files = [
@@ -2847,19 +2435,17 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.21.0", markers = "python_version <= \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
-    {version = ">=1.21.2", markers = "python_version >= \"3.10\""},
+    {version = ">=1.21.0", markers = "python_version <= \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\" and python_version >= \"3.8\""},
     {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""},
-    {version = ">=1.19.3", markers = "python_version >= \"3.6\" and platform_system == \"Linux\" and platform_machine == \"aarch64\" or python_version >= \"3.9\""},
-    {version = ">=1.17.0", markers = "python_version >= \"3.7\""},
-    {version = ">=1.17.3", markers = "python_version >= \"3.8\""},
+    {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\""},
+    {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
+    {version = ">=1.17.3", markers = "(platform_system != \"Darwin\" and platform_system != \"Linux\") and python_version >= \"3.8\" and python_version < \"3.9\" or platform_system != \"Darwin\" and python_version >= \"3.8\" and python_version < \"3.9\" and platform_machine != \"aarch64\" or platform_machine != \"arm64\" and python_version >= \"3.8\" and python_version < \"3.9\" and platform_system != \"Linux\" or (platform_machine != \"arm64\" and platform_machine != \"aarch64\") and python_version >= \"3.8\" and python_version < \"3.9\""},
 ]
 
 [[package]]
 name = "openrlbenchmark"
 version = "0.1.1b4"
 description = ""
-category = "main"
 optional = true
 python-versions = ">=3.7.1,<4.0.0"
 files = [
@@ -2883,7 +2469,6 @@ wandb = ">=0.13.7,<0.14.0"
 name = "opt-einsum"
 version = "3.3.0"
 description = "Optimizing numpys einsum function"
-category = "main"
 optional = true
 python-versions = ">=3.5"
 files = [
@@ -2902,7 +2487,6 @@ tests = ["pytest", "pytest-cov", "pytest-pep8"]
 name = "optax"
 version = "0.1.4"
 description = "A gradient processing and optimisation library in JAX."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -2920,19 +2504,18 @@ typing-extensions = ">=3.10.0"
 
 [[package]]
 name = "optuna"
-version = "3.1.1"
+version = "3.3.0"
 description = "A hyperparameter optimization framework"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "optuna-3.1.1-py3-none-any.whl", hash = "sha256:4d641b4bdd896f48a766803a5b64286281fa3e5dbbeedc549f8e1ee6c6e3eea8"},
-    {file = "optuna-3.1.1.tar.gz", hash = "sha256:f3c8fcdb7ed4dd473485f6f61de2f2ceb9d4ad56afbae42e663e15c3f00a4c10"},
+    {file = "optuna-3.3.0-py3-none-any.whl", hash = "sha256:3eefaed56a5fabf442036d51ef2001dbabfbe1a8ce33bc0c6b61ff5d15b30c82"},
+    {file = "optuna-3.3.0.tar.gz", hash = "sha256:76126c6e52354892488e75fc7743eaddcd397c5aba8fa1f964a1c5b5f942eaf6"},
 ]
 
 [package.dependencies]
 alembic = ">=1.5.0"
-cmaes = ">=0.9.1"
+cmaes = ">=0.10.0"
 colorlog = "*"
 numpy = "*"
 packaging = ">=20.0"
@@ -2942,17 +2525,16 @@ tqdm = "*"
 
 [package.extras]
 benchmark = ["asv (>=0.5.0)", "botorch", "cma", "scikit-optimize", "virtualenv"]
-checking = ["black", "blackdoc", "hacking", "isort", "mypy", "types-PyYAML", "types-redis", "types-setuptools", "typing-extensions (>=3.10.0.0)"]
-document = ["cma", "distributed", "fvcore", "lightgbm", "matplotlib (!=3.6.0)", "mlflow", "pandas", "pillow", "plotly (>=4.9.0)", "scikit-learn", "scikit-optimize", "sphinx (<6)", "sphinx-copybutton", "sphinx-gallery", "sphinx-plotly-directive", "sphinx-rtd-theme", "torch (==1.11.0)", "torchaudio (==0.11.0)", "torchvision (==0.12.0)"]
-integration = ["allennlp (>=2.2.0)", "botorch (>=0.4.0,<0.8.0)", "cached-path (<=1.1.2)", "catalyst (>=21.3)", "catboost (>=0.26)", "chainer (>=5.0.0)", "cma", "distributed", "fastai", "lightgbm", "mlflow", "mpi4py", "mxnet", "pandas", "pytorch-ignite", "pytorch-lightning (>=1.5.0)", "scikit-learn (>=0.24.2)", "scikit-optimize", "shap", "skorch", "tensorflow", "tensorflow-datasets", "torch (==1.11.0)", "torchaudio (==0.11.0)", "torchvision (==0.12.0)", "wandb", "xgboost"]
-optional = ["matplotlib (!=3.6.0)", "pandas", "plotly (>=4.9.0)", "redis", "scikit-learn (>=0.24.2)"]
-test = ["codecov", "fakeredis[lua]", "kaleido", "pytest", "scipy (>=1.9.2)"]
+checking = ["black", "blackdoc", "flake8", "isort", "mypy", "mypy-boto3-s3", "types-PyYAML", "types-redis", "types-setuptools", "types-tqdm", "typing-extensions (>=3.10.0.0)"]
+document = ["botorch", "cma", "distributed", "fvcore", "lightgbm", "matplotlib (!=3.6.0)", "mlflow", "pandas", "pillow", "plotly (>=4.9.0)", "scikit-learn", "scikit-optimize", "sphinx", "sphinx-copybutton", "sphinx-gallery", "sphinx-plotly-directive", "sphinx-rtd-theme (>=1.2.0)", "torch", "torchaudio", "torchvision"]
+integration = ["botorch (>=0.4.0)", "catboost (>=0.26)", "catboost (>=0.26,<1.2)", "cma", "distributed", "fastai", "lightgbm", "mlflow", "pandas", "pytorch-ignite", "pytorch-lightning (>=1.6.0)", "scikit-learn (>=0.24.2)", "scikit-optimize", "shap", "tensorflow", "torch", "torchaudio", "torchvision", "wandb", "xgboost"]
+optional = ["boto3", "botorch", "matplotlib (!=3.6.0)", "pandas", "plotly (>=4.9.0)", "redis", "scikit-learn (>=0.24.2)"]
+test = ["coverage", "fakeredis[lua]", "kaleido", "moto", "pytest", "scipy (>=1.9.2)"]
 
 [[package]]
 name = "optuna-dashboard"
 version = "0.7.3"
 description = "Real-time dashboard for Optuna"
-category = "main"
 optional = true
 python-versions = ">=3.6"
 files = [
@@ -2965,13 +2547,11 @@ bottle = "*"
 optuna = ">=2.4.0"
 packaging = "*"
 scikit-learn = "*"
-typing-extensions = {version = "*", markers = "python_version < \"3.8\""}
 
 [[package]]
 name = "orbax"
 version = "0.1.0"
 description = "Orbax"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -2999,7 +2579,6 @@ dev = ["pytest-xdist"]
 name = "packaging"
 version = "23.1"
 description = "Core utilities for Python packages"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3011,7 +2590,6 @@ files = [
 name = "pandas"
 version = "1.3.5"
 description = "Powerful data structures for data analysis, time series, and statistics"
-category = "main"
 optional = false
 python-versions = ">=3.7.1"
 files = [
@@ -3047,7 +2625,7 @@ numpy = [
     {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""},
     {version = ">=1.21.0", markers = "python_version >= \"3.10\""},
     {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""},
-    {version = ">=1.17.3", markers = "platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
+    {version = ">=1.17.3", markers = "(platform_machine != \"aarch64\" and platform_machine != \"arm64\") and python_version < \"3.10\""},
 ]
 python-dateutil = ">=2.7.3"
 pytz = ">=2017.3"
@@ -3059,7 +2637,6 @@ test = ["hypothesis (>=3.58)", "pytest (>=6.0)", "pytest-xdist"]
 name = "pathtools"
 version = "0.1.2"
 description = "File system general utilities"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3070,7 +2647,6 @@ files = [
 name = "pettingzoo"
 version = "1.18.1"
 description = "Gym for multi-agent reinforcement learning"
-category = "main"
 optional = true
 python-versions = ">=3.7, <3.11"
 files = [
@@ -3097,7 +2673,6 @@ tests = ["codespell", "flake8", "isort", "pynput", "pytest"]
 name = "pillow"
 version = "9.5.0"
 description = "Python Imaging Library (Fork)"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3177,7 +2752,6 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
 name = "pip"
 version = "22.3.1"
 description = "The PyPA recommended tool for installing Python packages."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -3185,23 +2759,10 @@ files = [
     {file = "pip-22.3.1.tar.gz", hash = "sha256:65fd48317359f3af8e593943e6ae1506b66325085ea64b706a998c6e83eeaf38"},
 ]
 
-[[package]]
-name = "pkgutil-resolve-name"
-version = "1.3.10"
-description = "Resolve a name to an object."
-category = "dev"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"},
-    {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"},
-]
-
 [[package]]
 name = "platformdirs"
 version = "3.5.0"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
-category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3209,9 +2770,6 @@ files = [
     {file = "platformdirs-3.5.0.tar.gz", hash = "sha256:7954a68d0ba23558d753f73437c55f89027cf8f5108c19844d4b82e5af396335"},
 ]
 
-[package.dependencies]
-typing-extensions = {version = ">=4.5", markers = "python_version < \"3.8\""}
-
 [package.extras]
 docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
 test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
@@ -3220,7 +2778,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-
 name = "pluggy"
 version = "1.0.0"
 description = "plugin and hook calling mechanisms for python"
-category = "main"
 optional = true
 python-versions = ">=3.6"
 files = [
@@ -3228,9 +2785,6 @@ files = [
     {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
 ]
 
-[package.dependencies]
-importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
-
 [package.extras]
 dev = ["pre-commit", "tox"]
 testing = ["pytest", "pytest-benchmark"]
@@ -3239,7 +2793,6 @@ testing = ["pytest", "pytest-benchmark"]
 name = "pre-commit"
 version = "2.21.0"
 description = "A framework for managing and maintaining multi-language pre-commit hooks."
-category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3250,7 +2803,6 @@ files = [
 [package.dependencies]
 cfgv = ">=2.0.0"
 identify = ">=1.0.0"
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 nodeenv = ">=0.11.1"
 pyyaml = ">=5.1"
 virtualenv = ">=20.10.0"
@@ -3259,7 +2811,6 @@ virtualenv = ">=20.10.0"
 name = "procgen"
 version = "0.10.7"
 description = "Procedurally Generated Game-Like RL Environments"
-category = "main"
 optional = true
 python-versions = ">=3.6.0"
 files = [
@@ -3290,7 +2841,6 @@ test = ["pytest (==6.2.5)", "pytest-benchmark (==3.4.1)"]
 name = "proglog"
 version = "0.1.10"
 description = "Log and progress bar manager for console, notebooks, web..."
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3305,7 +2855,6 @@ tqdm = "*"
 name = "protobuf"
 version = "3.20.3"
 description = "Protocol Buffers"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3337,7 +2886,6 @@ files = [
 name = "psutil"
 version = "5.9.5"
 description = "Cross-platform lib for process and system monitoring in Python."
-category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -3364,7 +2912,6 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
 name = "pyasn1"
 version = "0.5.0"
 description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
-category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
 files = [
@@ -3376,7 +2923,6 @@ files = [
 name = "pyasn1-modules"
 version = "0.3.0"
 description = "A collection of ASN.1-based protocols modules"
-category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
 files = [
@@ -3391,7 +2937,6 @@ pyasn1 = ">=0.4.6,<0.6.0"
 name = "pycparser"
 version = "2.21"
 description = "C parser in Python"
-category = "main"
 optional = true
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -3403,7 +2948,6 @@ files = [
 name = "pygame"
 version = "2.1.0"
 description = "Python Game Development"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -3471,7 +3015,6 @@ files = [
 name = "pygments"
 version = "2.15.1"
 description = "Pygments is a syntax highlighting package written in Python."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3486,7 +3029,6 @@ plugins = ["importlib-metadata"]
 name = "pymdown-extensions"
 version = "9.11"
 description = "Extension pack for Python Markdown."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -3502,7 +3044,6 @@ pyyaml = "*"
 name = "pyopengl"
 version = "3.1.6"
 description = "Standard OpenGL bindings for Python"
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -3515,7 +3056,6 @@ files = [
 name = "pyparsing"
 version = "3.0.9"
 description = "pyparsing module - Classes and methods to define and execute parsing grammars"
-category = "main"
 optional = false
 python-versions = ">=3.6.8"
 files = [
@@ -3526,48 +3066,10 @@ files = [
 [package.extras]
 diagrams = ["jinja2", "railroad-diagrams"]
 
-[[package]]
-name = "pyrsistent"
-version = "0.19.3"
-description = "Persistent/Functional/Immutable data structures"
-category = "dev"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "pyrsistent-0.19.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:20460ac0ea439a3e79caa1dbd560344b64ed75e85d8703943e0b66c2a6150e4a"},
-    {file = "pyrsistent-0.19.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c18264cb84b5e68e7085a43723f9e4c1fd1d935ab240ce02c0324a8e01ccb64"},
-    {file = "pyrsistent-0.19.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b774f9288dda8d425adb6544e5903f1fb6c273ab3128a355c6b972b7df39dcf"},
-    {file = "pyrsistent-0.19.3-cp310-cp310-win32.whl", hash = "sha256:5a474fb80f5e0d6c9394d8db0fc19e90fa540b82ee52dba7d246a7791712f74a"},
-    {file = "pyrsistent-0.19.3-cp310-cp310-win_amd64.whl", hash = "sha256:49c32f216c17148695ca0e02a5c521e28a4ee6c5089f97e34fe24163113722da"},
-    {file = "pyrsistent-0.19.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f0774bf48631f3a20471dd7c5989657b639fd2d285b861237ea9e82c36a415a9"},
-    {file = "pyrsistent-0.19.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab2204234c0ecd8b9368dbd6a53e83c3d4f3cab10ecaf6d0e772f456c442393"},
-    {file = "pyrsistent-0.19.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e42296a09e83028b3476f7073fcb69ffebac0e66dbbfd1bd847d61f74db30f19"},
-    {file = "pyrsistent-0.19.3-cp311-cp311-win32.whl", hash = "sha256:64220c429e42a7150f4bfd280f6f4bb2850f95956bde93c6fda1b70507af6ef3"},
-    {file = "pyrsistent-0.19.3-cp311-cp311-win_amd64.whl", hash = "sha256:016ad1afadf318eb7911baa24b049909f7f3bb2c5b1ed7b6a8f21db21ea3faa8"},
-    {file = "pyrsistent-0.19.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c4db1bd596fefd66b296a3d5d943c94f4fac5bcd13e99bffe2ba6a759d959a28"},
-    {file = "pyrsistent-0.19.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aeda827381f5e5d65cced3024126529ddc4289d944f75e090572c77ceb19adbf"},
-    {file = "pyrsistent-0.19.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:42ac0b2f44607eb92ae88609eda931a4f0dfa03038c44c772e07f43e738bcac9"},
-    {file = "pyrsistent-0.19.3-cp37-cp37m-win32.whl", hash = "sha256:e8f2b814a3dc6225964fa03d8582c6e0b6650d68a232df41e3cc1b66a5d2f8d1"},
-    {file = "pyrsistent-0.19.3-cp37-cp37m-win_amd64.whl", hash = "sha256:c9bb60a40a0ab9aba40a59f68214eed5a29c6274c83b2cc206a359c4a89fa41b"},
-    {file = "pyrsistent-0.19.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a2471f3f8693101975b1ff85ffd19bb7ca7dd7c38f8a81701f67d6b4f97b87d8"},
-    {file = "pyrsistent-0.19.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc5d149f31706762c1f8bda2e8c4f8fead6e80312e3692619a75301d3dbb819a"},
-    {file = "pyrsistent-0.19.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3311cb4237a341aa52ab8448c27e3a9931e2ee09561ad150ba94e4cfd3fc888c"},
-    {file = "pyrsistent-0.19.3-cp38-cp38-win32.whl", hash = "sha256:f0e7c4b2f77593871e918be000b96c8107da48444d57005b6a6bc61fb4331b2c"},
-    {file = "pyrsistent-0.19.3-cp38-cp38-win_amd64.whl", hash = "sha256:c147257a92374fde8498491f53ffa8f4822cd70c0d85037e09028e478cababb7"},
-    {file = "pyrsistent-0.19.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b735e538f74ec31378f5a1e3886a26d2ca6351106b4dfde376a26fc32a044edc"},
-    {file = "pyrsistent-0.19.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99abb85579e2165bd8522f0c0138864da97847875ecbd45f3e7e2af569bfc6f2"},
-    {file = "pyrsistent-0.19.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a8cb235fa6d3fd7aae6a4f1429bbb1fec1577d978098da1252f0489937786f3"},
-    {file = "pyrsistent-0.19.3-cp39-cp39-win32.whl", hash = "sha256:c74bed51f9b41c48366a286395c67f4e894374306b197e62810e0fdaf2364da2"},
-    {file = "pyrsistent-0.19.3-cp39-cp39-win_amd64.whl", hash = "sha256:878433581fc23e906d947a6814336eee031a00e6defba224234169ae3d3d6a98"},
-    {file = "pyrsistent-0.19.3-py3-none-any.whl", hash = "sha256:ccf0d6bd208f8111179f0c26fdf84ed7c3891982f2edaeae7422575f47e66b64"},
-    {file = "pyrsistent-0.19.3.tar.gz", hash = "sha256:1a2994773706bbb4995c31a97bc94f1418314923bd1048c6d964837040376440"},
-]
-
 [[package]]
 name = "pytest"
 version = "7.3.1"
 description = "pytest: simple powerful testing with Python"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -3578,7 +3080,6 @@ files = [
 [package.dependencies]
 colorama = {version = "*", markers = "sys_platform == \"win32\""}
 exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
-importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
 iniconfig = "*"
 packaging = "*"
 pluggy = ">=0.12,<2.0"
@@ -3591,7 +3092,6 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no
 name = "python-dateutil"
 version = "2.8.2"
 description = "Extensions to the standard Python datetime module"
-category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 files = [
@@ -3606,7 +3106,6 @@ six = ">=1.5"
 name = "pytimeparse"
 version = "1.1.8"
 description = "Time expression parser"
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -3618,7 +3117,6 @@ files = [
 name = "pytz"
 version = "2023.3"
 description = "World timezone definitions, modern and historical"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3626,62 +3124,69 @@ files = [
     {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"},
 ]
 
-[[package]]
-name = "pyvirtualdisplay"
-version = "3.0"
-description = "python wrapper for Xvfb, Xephyr and Xvnc"
-category = "dev"
-optional = false
-python-versions = "*"
-files = [
-    {file = "PyVirtualDisplay-3.0-py3-none-any.whl", hash = "sha256:40d4b8dfe4b8de8552e28eb367647f311f88a130bf837fe910e7f180d5477f0e"},
-    {file = "PyVirtualDisplay-3.0.tar.gz", hash = "sha256:09755bc3ceb6eb725fb07eca5425f43f2358d3bf08e00d2a9b792a1aedd16159"},
-]
-
 [[package]]
 name = "pyyaml"
-version = "5.4.1"
+version = "6.0.1"
 description = "YAML parser and emitter for Python"
-category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
+python-versions = ">=3.6"
 files = [
-    {file = "PyYAML-5.4.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922"},
-    {file = "PyYAML-5.4.1-cp27-cp27m-win32.whl", hash = "sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393"},
-    {file = "PyYAML-5.4.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4465124ef1b18d9ace298060f4eccc64b0850899ac4ac53294547536533800c8"},
-    {file = "PyYAML-5.4.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:bb4191dfc9306777bc594117aee052446b3fa88737cd13b7188d0e7aa8162185"},
-    {file = "PyYAML-5.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:6c78645d400265a062508ae399b60b8c167bf003db364ecb26dcab2bda048253"},
-    {file = "PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:4e0583d24c881e14342eaf4ec5fbc97f934b999a6828693a99157fde912540cc"},
-    {file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:72a01f726a9c7851ca9bfad6fd09ca4e090a023c00945ea05ba1638c09dc3347"},
-    {file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:895f61ef02e8fed38159bb70f7e100e00f471eae2bc838cd0f4ebb21e28f8541"},
-    {file = "PyYAML-5.4.1-cp36-cp36m-win32.whl", hash = "sha256:3bd0e463264cf257d1ffd2e40223b197271046d09dadf73a0fe82b9c1fc385a5"},
-    {file = "PyYAML-5.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:e4fac90784481d221a8e4b1162afa7c47ed953be40d31ab4629ae917510051df"},
-    {file = "PyYAML-5.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5accb17103e43963b80e6f837831f38d314a0495500067cb25afab2e8d7a4018"},
-    {file = "PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e1d4970ea66be07ae37a3c2e48b5ec63f7ba6804bdddfdbd3cfd954d25a82e63"},
-    {file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:cb333c16912324fd5f769fff6bc5de372e9e7a202247b48870bc251ed40239aa"},
-    {file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:fe69978f3f768926cfa37b867e3843918e012cf83f680806599ddce33c2c68b0"},
-    {file = "PyYAML-5.4.1-cp37-cp37m-win32.whl", hash = "sha256:dd5de0646207f053eb0d6c74ae45ba98c3395a571a2891858e87df7c9b9bd51b"},
-    {file = "PyYAML-5.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:08682f6b72c722394747bddaf0aa62277e02557c0fd1c42cb853016a38f8dedf"},
-    {file = "PyYAML-5.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d2d9808ea7b4af864f35ea216be506ecec180628aced0704e34aca0b040ffe46"},
-    {file = "PyYAML-5.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8c1be557ee92a20f184922c7b6424e8ab6691788e6d86137c5d93c1a6ec1b8fb"},
-    {file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:fd7f6999a8070df521b6384004ef42833b9bd62cfee11a09bda1079b4b704247"},
-    {file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:bfb51918d4ff3d77c1c856a9699f8492c612cde32fd3bcd344af9be34999bfdc"},
-    {file = "PyYAML-5.4.1-cp38-cp38-win32.whl", hash = "sha256:fa5ae20527d8e831e8230cbffd9f8fe952815b2b7dae6ffec25318803a7528fc"},
-    {file = "PyYAML-5.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:0f5f5786c0e09baddcd8b4b45f20a7b5d61a7e7e99846e3c799b05c7c53fa696"},
-    {file = "PyYAML-5.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:294db365efa064d00b8d1ef65d8ea2c3426ac366c0c4368d930bf1c5fb497f77"},
-    {file = "PyYAML-5.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:74c1485f7707cf707a7aef42ef6322b8f97921bd89be2ab6317fd782c2d53183"},
-    {file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:d483ad4e639292c90170eb6f7783ad19490e7a8defb3e46f97dfe4bacae89122"},
-    {file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:fdc842473cd33f45ff6bce46aea678a54e3d21f1b61a7750ce3c498eedfe25d6"},
-    {file = "PyYAML-5.4.1-cp39-cp39-win32.whl", hash = "sha256:49d4cdd9065b9b6e206d0595fee27a96b5dd22618e7520c33204a4a3239d5b10"},
-    {file = "PyYAML-5.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:c20cfa2d49991c8b4147af39859b167664f2ad4561704ee74c1de03318e898db"},
-    {file = "PyYAML-5.4.1.tar.gz", hash = "sha256:607774cbba28732bfa802b54baa7484215f530991055bb562efbed5b2f20a45e"},
+    {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"},
+    {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"},
+    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
+    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
+    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
+    {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
+    {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
+    {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
+    {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
+    {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"},
+    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
+    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
+    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
+    {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
+    {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
+    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
+    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
+    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
+    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
+    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
+    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"},
+    {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"},
+    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
+    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
+    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
+    {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
+    {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
+    {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
+    {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
+    {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"},
+    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
+    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
+    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
+    {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
+    {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
+    {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
+    {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
 ]
 
 [[package]]
 name = "pyyaml-env-tag"
 version = "0.1"
 description = "A custom YAML tag for referencing environment variables in YAML files. "
-category = "main"
 optional = true
 python-versions = ">=3.6"
 files = [
@@ -3692,76 +3197,10 @@ files = [
 [package.dependencies]
 pyyaml = "*"
 
-[[package]]
-name = "ray"
-version = "2.7.0"
-description = "Ray provides a simple, universal API for building distributed applications."
-category = "dev"
-optional = false
-python-versions = "*"
-files = [
-    {file = "ray-2.7.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:bc911655908b61b2e9f59b8df158fcc62cd32080c468b484b539ebf0a4111d04"},
-    {file = "ray-2.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0ee8c14e1521559cd5802bfad3f0aba4a77afdfba57dd446162a7449c6e8ff68"},
-    {file = "ray-2.7.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:ebde44af7d479ede21d1c2e68b5ccd8264e18df6e4f3c216d9e99c31e819bde6"},
-    {file = "ray-2.7.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:b83621f5d2d4079e6ae624c3bf30046a4fefa0ea7ea5e4a4dfe4b50c580b3768"},
-    {file = "ray-2.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:8e1b06abba6e227b8dde1ad861c587fb2608a6970d270e4755cd24a6f37ed565"},
-    {file = "ray-2.7.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5442d48719f033831a324f05b332d6e7181970d721e9504be2091cc9d9735394"},
-    {file = "ray-2.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ca8225878cce7b9e2d0ca9668d9370893a7cee35629d11a3889a1b66a0007218"},
-    {file = "ray-2.7.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:a3f59dbb0780f9fa11f5bf96bef853b4cb95245456d4400e1c7bf2e514d12ab2"},
-    {file = "ray-2.7.0-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:8384b3f30bc1446ef810e9e894afa03238c5ac40d3c40c0740d82f347112015d"},
-    {file = "ray-2.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:8d4530e7024375505552dabd3f4441fc9ac7a5562365a81ba9afa14185433879"},
-    {file = "ray-2.7.0-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:c491b8051eef82b77d136c48a23d16485c0e54233303ccf68e9fe69a06c517e6"},
-    {file = "ray-2.7.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:1684c434886cb7b263cdf98ed39d75dec343e949f7b14f3385d83bfe70ee8c80"},
-    {file = "ray-2.7.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:856a9ae164b9b0aeaad54f3e78986eb19900ed3c74e26f51b02a7d8826c97e59"},
-    {file = "ray-2.7.0-cp37-cp37m-win_amd64.whl", hash = "sha256:34925a90b6239de42592bb4524dcbdc59a9c65f1f74ad4d9f97f636bd59c73d7"},
-    {file = "ray-2.7.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:60db240f37d80a80492e09a8d1e29b79d034431c6fcb651401e9e2d24d850793"},
-    {file = "ray-2.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:835155fdeb8698eae426f3d9416e6b8165197fe5c1c74e1b02a429fc7f4ddcd2"},
-    {file = "ray-2.7.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:442b7568946081d38c8addbc528e7b09fc1ee25453b4800c86b7e5ba4bce9dd3"},
-    {file = "ray-2.7.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:3825292b777b423e2cd34bf66e8e1e7701b04c6a5308f9f291ad5929b289dc47"},
-    {file = "ray-2.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:ce700322662946ad5c62a39b78e81feebcb855d378c49f5df6477c22f0ac1e5a"},
-    {file = "ray-2.7.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:327c23aac5dd26ee4abe6cee70320322d63fdf97c6028fbb9555724b46a8f3e3"},
-    {file = "ray-2.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a09021d45312ab7a44109b251984718b65fbff77df0b55e30e651193cdf42bff"},
-    {file = "ray-2.7.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f05fcb609962d14f4d23cc88a9d07cafa7077ce3c5d5ee99cd08a19067b7eecf"},
-    {file = "ray-2.7.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:0e0f7dbeb4444940c72b64fdecd6f331593466914b2dffeed03ce97225acec14"},
-    {file = "ray-2.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:29a0866316756ae18e232dd074adbf408dcdabe95d135a9a96b9a8c24393c983"},
-]
-
-[package.dependencies]
-aiosignal = "*"
-click = ">=7.0"
-filelock = "*"
-frozenlist = "*"
-jsonschema = "*"
-msgpack = ">=1.0.0,<2.0.0"
-numpy = [
-    {version = ">=1.16", markers = "python_version < \"3.9\""},
-    {version = ">=1.19.3", markers = "python_version >= \"3.9\""},
-]
-packaging = "*"
-protobuf = ">=3.15.3,<3.19.5 || >3.19.5"
-pyyaml = "*"
-requests = "*"
-typing-extensions = {version = "*", markers = "python_version < \"3.8\""}
-
-[package.extras]
-air = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "numpy (>=1.20)", "opencensus", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "requests", "smart-open", "starlette", "tensorboardX (>=1.9)", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
-all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "gymnasium (==0.28.1)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "pyyaml", "ray-cpp (==2.7.0)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
-client = ["grpcio (!=1.56.0)"]
-cpp = ["ray-cpp (==2.7.0)"]
-data = ["fsspec", "numpy (>=1.20)", "pandas (>=1.3)", "pyarrow (>=6.0.1)"]
-default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "virtualenv (>=20.0.24,<20.21.1)"]
-observability = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"]
-rllib = ["dm-tree", "fsspec", "gymnasium (==0.28.1)", "lz4", "pandas", "pyarrow (>=6.0.1)", "pyyaml", "requests", "rich", "scikit-image", "scipy", "tensorboardX (>=1.9)", "typer"]
-serve = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
-serve-grpc = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
-train = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"]
-tune = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"]
-
 [[package]]
 name = "requests"
 version = "2.30.0"
 description = "Python HTTP for Humans."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3783,7 +3222,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 name = "requests-oauthlib"
 version = "1.3.1"
 description = "OAuthlib authentication support for Requests."
-category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -3802,7 +3240,6 @@ rsa = ["oauthlib[signedtoken] (>=3.0.0)"]
 name = "rich"
 version = "11.2.0"
 description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
-category = "main"
 optional = false
 python-versions = ">=3.6.2,<4.0.0"
 files = [
@@ -3814,39 +3251,14 @@ files = [
 colorama = ">=0.4.0,<0.5.0"
 commonmark = ">=0.9.0,<0.10.0"
 pygments = ">=2.6.0,<3.0.0"
-typing-extensions = {version = ">=3.7.4,<5.0", markers = "python_version < \"3.8\""}
 
 [package.extras]
 jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"]
 
-[[package]]
-name = "rl-games"
-version = "1.5.2"
-description = ""
-category = "dev"
-optional = false
-python-versions = "*"
-files = [
-    {file = "rl-games-1.5.2.tar.gz", hash = "sha256:6d4f5513c917115eed8ebdcab89d0086ea035ce1d0c992dbfba0401c64c63547"},
-    {file = "rl_games-1.5.2-py3-none-any.whl", hash = "sha256:104cf667c02c90e4604221bf6d4ea58f231cfb3d7678d62d453308d69d58e4e5"},
-]
-
-[package.dependencies]
-gym = ">=0.17.2"
-numpy = ">=1.16.0"
-psutil = "*"
-pyyaml = "*"
-ray = ">=1.1.0"
-setproctitle = "*"
-tensorboard = ">=1.14.0"
-tensorboardX = ">=1.6"
-torch = ">=1.7.0"
-
 [[package]]
 name = "rsa"
 version = "4.7.2"
 description = "Pure-Python RSA implementation"
-category = "main"
 optional = false
 python-versions = ">=3.5, <4"
 files = [
@@ -3859,27 +3271,25 @@ pyasn1 = ">=0.1.3"
 
 [[package]]
 name = "s3transfer"
-version = "0.6.1"
+version = "0.8.0"
 description = "An Amazon S3 Transfer Manager"
-category = "main"
 optional = true
 python-versions = ">= 3.7"
 files = [
-    {file = "s3transfer-0.6.1-py3-none-any.whl", hash = "sha256:3c0da2d074bf35d6870ef157158641178a4204a6e689e82546083e31e0311346"},
-    {file = "s3transfer-0.6.1.tar.gz", hash = "sha256:640bb492711f4c0c0905e1f62b6aaeb771881935ad27884852411f8e9cacbca9"},
+    {file = "s3transfer-0.8.0-py3-none-any.whl", hash = "sha256:baa479dc2e63e5c2ed51611b4d46cdf0295e2070d8d0b86b22f335ee5b954986"},
+    {file = "s3transfer-0.8.0.tar.gz", hash = "sha256:e8d6bd52ffd99841e3a57b34370a54841f12d3aab072af862cdcc50955288002"},
 ]
 
 [package.dependencies]
-botocore = ">=1.12.36,<2.0a.0"
+botocore = ">=1.32.7,<2.0a.0"
 
 [package.extras]
-crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"]
+crt = ["botocore[crt] (>=1.32.7,<2.0a.0)"]
 
 [[package]]
 name = "scikit-learn"
 version = "1.0.2"
 description = "A set of python modules for machine learning and data mining"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -3931,51 +3341,46 @@ tests = ["black (>=21.6b0)", "flake8 (>=3.8.2)", "matplotlib (>=2.2.3)", "mypy (
 
 [[package]]
 name = "scipy"
-version = "1.7.3"
-description = "SciPy: Scientific Library for Python"
-category = "main"
-optional = false
-python-versions = ">=3.7,<3.11"
-files = [
-    {file = "scipy-1.7.3-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c9e04d7e9b03a8a6ac2045f7c5ef741be86727d8f49c45db45f244bdd2bcff17"},
-    {file = "scipy-1.7.3-1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b0e0aeb061a1d7dcd2ed59ea57ee56c9b23dd60100825f98238c06ee5cc4467e"},
-    {file = "scipy-1.7.3-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:b78a35c5c74d336f42f44106174b9851c783184a85a3fe3e68857259b37b9ffb"},
-    {file = "scipy-1.7.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:173308efba2270dcd61cd45a30dfded6ec0085b4b6eb33b5eb11ab443005e088"},
-    {file = "scipy-1.7.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:21b66200cf44b1c3e86495e3a436fc7a26608f92b8d43d344457c54f1c024cbc"},
-    {file = "scipy-1.7.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceebc3c4f6a109777c0053dfa0282fddb8893eddfb0d598574acfb734a926168"},
-    {file = "scipy-1.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7eaea089345a35130bc9a39b89ec1ff69c208efa97b3f8b25ea5d4c41d88094"},
-    {file = "scipy-1.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:304dfaa7146cffdb75fbf6bb7c190fd7688795389ad060b970269c8576d038e9"},
-    {file = "scipy-1.7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:033ce76ed4e9f62923e1f8124f7e2b0800db533828c853b402c7eec6e9465d80"},
-    {file = "scipy-1.7.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4d242d13206ca4302d83d8a6388c9dfce49fc48fdd3c20efad89ba12f785bf9e"},
-    {file = "scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8499d9dd1459dc0d0fe68db0832c3d5fc1361ae8e13d05e6849b358dc3f2c279"},
-    {file = "scipy-1.7.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca36e7d9430f7481fc7d11e015ae16fbd5575615a8e9060538104778be84addf"},
-    {file = "scipy-1.7.3-cp37-cp37m-win32.whl", hash = "sha256:e2c036492e673aad1b7b0d0ccdc0cb30a968353d2c4bf92ac8e73509e1bf212c"},
-    {file = "scipy-1.7.3-cp37-cp37m-win_amd64.whl", hash = "sha256:866ada14a95b083dd727a845a764cf95dd13ba3dc69a16b99038001b05439709"},
-    {file = "scipy-1.7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:65bd52bf55f9a1071398557394203d881384d27b9c2cad7df9a027170aeaef93"},
-    {file = "scipy-1.7.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:f99d206db1f1ae735a8192ab93bd6028f3a42f6fa08467d37a14eb96c9dd34a3"},
-    {file = "scipy-1.7.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5f2cfc359379c56b3a41b17ebd024109b2049f878badc1e454f31418c3a18436"},
-    {file = "scipy-1.7.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb7ae2c4dbdb3c9247e07acc532f91077ae6dbc40ad5bd5dca0bb5a176ee9bda"},
-    {file = "scipy-1.7.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c2d250074cfa76715d58830579c64dff7354484b284c2b8b87e5a38321672c"},
-    {file = "scipy-1.7.3-cp38-cp38-win32.whl", hash = "sha256:87069cf875f0262a6e3187ab0f419f5b4280d3dcf4811ef9613c605f6e4dca95"},
-    {file = "scipy-1.7.3-cp38-cp38-win_amd64.whl", hash = "sha256:7edd9a311299a61e9919ea4192dd477395b50c014cdc1a1ac572d7c27e2207fa"},
-    {file = "scipy-1.7.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eef93a446114ac0193a7b714ce67659db80caf940f3232bad63f4c7a81bc18df"},
-    {file = "scipy-1.7.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb326658f9b73c07081300daba90a8746543b5ea177184daed26528273157294"},
-    {file = "scipy-1.7.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:93378f3d14fff07572392ce6a6a2ceb3a1f237733bd6dcb9eb6a2b29b0d19085"},
-    {file = "scipy-1.7.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edad1cf5b2ce1912c4d8ddad20e11d333165552aba262c882e28c78bbc09dbf6"},
-    {file = "scipy-1.7.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d1cc2c19afe3b5a546ede7e6a44ce1ff52e443d12b231823268019f608b9b12"},
-    {file = "scipy-1.7.3-cp39-cp39-win32.whl", hash = "sha256:2c56b820d304dffcadbbb6cbfbc2e2c79ee46ea291db17e288e73cd3c64fefa9"},
-    {file = "scipy-1.7.3-cp39-cp39-win_amd64.whl", hash = "sha256:3f78181a153fa21c018d346f595edd648344751d7f03ab94b398be2ad083ed3e"},
-    {file = "scipy-1.7.3.tar.gz", hash = "sha256:ab5875facfdef77e0a47d5fd39ea178b58e60e454a4c85aa1e52fcb80db7babf"},
+version = "1.10.1"
+description = "Fundamental algorithms for scientific computing in Python"
+optional = true
+python-versions = "<3.12,>=3.8"
+files = [
+    {file = "scipy-1.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e7354fd7527a4b0377ce55f286805b34e8c54b91be865bac273f527e1b839019"},
+    {file = "scipy-1.10.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4b3f429188c66603a1a5c549fb414e4d3bdc2a24792e061ffbd607d3d75fd84e"},
+    {file = "scipy-1.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1553b5dcddd64ba9a0d95355e63fe6c3fc303a8fd77c7bc91e77d61363f7433f"},
+    {file = "scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c0ff64b06b10e35215abce517252b375e580a6125fd5fdf6421b98efbefb2d2"},
+    {file = "scipy-1.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:fae8a7b898c42dffe3f7361c40d5952b6bf32d10c4569098d276b4c547905ee1"},
+    {file = "scipy-1.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f1564ea217e82c1bbe75ddf7285ba0709ecd503f048cb1236ae9995f64217bd"},
+    {file = "scipy-1.10.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d925fa1c81b772882aa55bcc10bf88324dadb66ff85d548c71515f6689c6dac5"},
+    {file = "scipy-1.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaea0a6be54462ec027de54fca511540980d1e9eea68b2d5c1dbfe084797be35"},
+    {file = "scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15a35c4242ec5f292c3dd364a7c71a61be87a3d4ddcc693372813c0b73c9af1d"},
+    {file = "scipy-1.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:43b8e0bcb877faf0abfb613d51026cd5cc78918e9530e375727bf0625c82788f"},
+    {file = "scipy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5678f88c68ea866ed9ebe3a989091088553ba12c6090244fdae3e467b1139c35"},
+    {file = "scipy-1.10.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:39becb03541f9e58243f4197584286e339029e8908c46f7221abeea4b749fa88"},
+    {file = "scipy-1.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bce5869c8d68cf383ce240e44c1d9ae7c06078a9396df68ce88a1230f93a30c1"},
+    {file = "scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07c3457ce0b3ad5124f98a86533106b643dd811dd61b548e78cf4c8786652f6f"},
+    {file = "scipy-1.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:049a8bbf0ad95277ffba9b3b7d23e5369cc39e66406d60422c8cfef40ccc8415"},
+    {file = "scipy-1.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cd9f1027ff30d90618914a64ca9b1a77a431159df0e2a195d8a9e8a04c78abf9"},
+    {file = "scipy-1.10.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:79c8e5a6c6ffaf3a2262ef1be1e108a035cf4f05c14df56057b64acc5bebffb6"},
+    {file = "scipy-1.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51af417a000d2dbe1ec6c372dfe688e041a7084da4fdd350aeb139bd3fb55353"},
+    {file = "scipy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b4735d6c28aad3cdcf52117e0e91d6b39acd4272f3f5cd9907c24ee931ad601"},
+    {file = "scipy-1.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:7ff7f37b1bf4417baca958d254e8e2875d0cc23aaadbe65b3d5b3077b0eb23ea"},
+    {file = "scipy-1.10.1.tar.gz", hash = "sha256:2cf9dfb80a7b4589ba4c40ce7588986d6d5cebc5457cad2c2880f6bc2d42f3a5"},
 ]
 
 [package.dependencies]
-numpy = ">=1.16.5,<1.23.0"
+numpy = ">=1.19.5,<1.27.0"
+
+[package.extras]
+dev = ["click", "doit (>=0.36.0)", "flake8", "mypy", "pycodestyle", "pydevtool", "rich-click", "typing_extensions"]
+doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"]
+test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
 
 [[package]]
 name = "seaborn"
 version = "0.12.2"
 description = "Statistical data visualization"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -3987,7 +3392,6 @@ files = [
 matplotlib = ">=3.1,<3.6.1 || >3.6.1"
 numpy = ">=1.17,<1.24.0 || >1.24.0"
 pandas = ">=0.25"
-typing_extensions = {version = "*", markers = "python_version < \"3.8\""}
 
 [package.extras]
 dev = ["flake8", "flit", "mypy", "pandas-stubs", "pre-commit", "pytest", "pytest-cov", "pytest-xdist"]
@@ -3998,7 +3402,6 @@ stats = ["scipy (>=1.3)", "statsmodels (>=0.10)"]
 name = "sentry-sdk"
 version = "1.22.2"
 description = "Python client for Sentry (https://sentry.io)"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4040,7 +3443,6 @@ tornado = ["tornado (>=5)"]
 name = "setproctitle"
 version = "1.3.2"
 description = "A Python module to customize the process title"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4125,7 +3527,6 @@ test = ["pytest"]
 name = "setuptools"
 version = "67.7.2"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4140,41 +3541,50 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (
 
 [[package]]
 name = "shimmy"
-version = "1.0.0"
+version = "1.1.0"
 description = "An API conversion tool providing Gymnasium and PettingZoo bindings for popular external reinforcement learning environments."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "Shimmy-1.0.0-py3-none-any.whl", hash = "sha256:f26540d595ad56c9d0e99462d6388dc0dbb7976a97095337365ec79668cdf836"},
-    {file = "Shimmy-1.0.0.tar.gz", hash = "sha256:30b9473402e846149137d5d71a0fbe47787d309c7e3a0c1aca97c95375de5f26"},
+    {file = "Shimmy-1.1.0-py3-none-any.whl", hash = "sha256:0d2f44cdc3384b792336eb54002d23eb8c0ddb67580760e9c4e234fdf6077a69"},
+    {file = "Shimmy-1.1.0.tar.gz", hash = "sha256:028ff42861fd8fa168927631f8f8cb2bda4ffef67e65633c51bf3116792e1f88"},
 ]
 
 [package.dependencies]
-dm-control = {version = ">=1.0.10", optional = true, markers = "extra == \"dm-control\""}
 gymnasium = ">=0.27.0"
-h5py = {version = ">=3.7.0", optional = true, markers = "extra == \"dm-control\""}
-imageio = {version = "*", optional = true, markers = "extra == \"dm-control\""}
 numpy = ">=1.18.0"
 
 [package.extras]
-all = ["ale-py (>=0.8.1,<0.9.0)", "bsuite (>=0.3.5)", "dm-control (>=1.0.10)", "dm-env (>=1.6)", "gym (>=0.21.0)", "gym (>=0.26.2)", "h5py (>=3.7.0)", "imageio", "open-spiel (>=1.2)", "pettingzoo (>=1.22.3)", "pyglet (==1.5.11)"]
+all = ["ale-py (>=0.8.1,<0.9.0)", "bsuite (>=0.3.5)", "dm-control (>=1.0.10)", "dm-env (>=1.6)", "gym (>=0.26.2)", "h5py (>=3.7.0)", "imageio", "open-spiel (>=1.2)", "pettingzoo (>=1.23)"]
 atari = ["ale-py (>=0.8.1,<0.9.0)"]
 bsuite = ["bsuite (>=0.3.5)"]
 dm-control = ["dm-control (>=1.0.10)", "h5py (>=3.7.0)", "imageio"]
-dm-control-multi-agent = ["dm-control (>=1.0.10)", "h5py (>=3.7.0)", "imageio", "pettingzoo (>=1.22.3)"]
+dm-control-multi-agent = ["dm-control (>=1.0.10)", "h5py (>=3.7.0)", "imageio", "pettingzoo (>=1.23)"]
 dm-lab = ["dm-env (>=1.6)"]
-gym-v21 = ["gym (>=0.21.0)", "pyglet (==1.5.11)"]
+gym-v21 = ["gym (>=0.21.0,<0.26)", "pyglet (==1.5.11)"]
 gym-v26 = ["gym (>=0.26.2)"]
-meltingpot = ["pettingzoo (>=1.22.3)"]
-openspiel = ["open-spiel (>=1.2)", "pettingzoo (>=1.22.3)"]
+meltingpot = ["pettingzoo (>=1.23)"]
+openspiel = ["open-spiel (>=1.2)", "pettingzoo (>=1.23)"]
 testing = ["autorom[accept-rom-license] (>=0.6.0,<0.7.0)", "pillow (>=9.3.0)", "pytest (==7.1.3)"]
 
+[[package]]
+name = "shtab"
+version = "1.6.4"
+description = "Automagic shell tab completion for Python CLI applications"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "shtab-1.6.4-py3-none-any.whl", hash = "sha256:4be38887a912091a1640e06f5ccbcbd24e176cf2fcb9ef0c2e011ee22d63834f"},
+    {file = "shtab-1.6.4.tar.gz", hash = "sha256:aba9e049bed54ffdb650cb2e02657282d8c0148024b0f500277052df124d47de"},
+]
+
+[package.extras]
+dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout"]
+
 [[package]]
 name = "six"
 version = "1.16.0"
 description = "Python 2 and 3 compatibility utilities"
-category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -4186,7 +3596,6 @@ files = [
 name = "smmap"
 version = "5.0.0"
 description = "A pure Python implementation of a sliding window memory map manager"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4198,7 +3607,6 @@ files = [
 name = "sqlalchemy"
 version = "2.0.13"
 description = "Database Abstraction Library"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -4247,7 +3655,6 @@ files = [
 
 [package.dependencies]
 greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""}
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 typing-extensions = ">=4.2.0"
 
 [package.extras]
@@ -4275,34 +3682,33 @@ sqlcipher = ["sqlcipher3-binary"]
 
 [[package]]
 name = "stable-baselines3"
-version = "1.2.0"
+version = "2.0.0"
 description = "Pytorch version of Stable Baselines, implementations of reinforcement learning algorithms."
-category = "main"
 optional = false
-python-versions = "*"
+python-versions = ">=3.7"
 files = [
-    {file = "stable_baselines3-1.2.0-py3-none-any.whl", hash = "sha256:15769fe983fd0c14067c87294a1f5cd081fdfdb02092bc1277ec3fad45e9bd13"},
-    {file = "stable_baselines3-1.2.0.tar.gz", hash = "sha256:170842f30c00adff0dcccef5be74921cfa0dd2650b3eb8600c62b5d43ff78c67"},
+    {file = "stable_baselines3-2.0.0-py3-none-any.whl", hash = "sha256:54a011a049d5cd923471e1e8c2a4c275de0a0d9257548cfa287ebf70fffa56dd"},
+    {file = "stable_baselines3-2.0.0.tar.gz", hash = "sha256:0b62b5148e8045c5d67f9f34d8de15c6248b325e15e5dcd931c88f236c735193"},
 ]
 
 [package.dependencies]
 cloudpickle = "*"
-gym = ">=0.17"
+gymnasium = "0.28.1"
 matplotlib = "*"
-numpy = "*"
+numpy = ">=1.20"
 pandas = "*"
-torch = ">=1.8.1"
+torch = ">=1.11"
 
 [package.extras]
-docs = ["sphinx", "sphinx-autobuild", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib.spelling"]
-extra = ["atari-py (>=0.2.0,<0.3.0)", "opencv-python", "pillow", "psutil", "tensorboard (>=2.2.0)"]
-tests = ["black", "flake8 (>=3.8)", "flake8-bugbear", "isort (>=5.0)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "pytype", "scipy (>=1.4.1)"]
+docs = ["sphinx (>=5.3,<7.0)", "sphinx-autobuild", "sphinx-autodoc-typehints", "sphinx-copybutton", "sphinx-rtd-theme", "sphinxcontrib.spelling"]
+extra = ["autorom[accept-rom-license] (>=0.6.0,<0.7.0)", "opencv-python", "pillow", "psutil", "pygame", "pygame (>=2.0,<2.1.3)", "rich", "shimmy[atari] (>=0.2.1,<0.3.0)", "tensorboard (>=2.9.1)", "tqdm"]
+extra-no-roms = ["opencv-python", "pillow", "psutil", "pygame", "pygame (>=2.0,<2.1.3)", "rich", "shimmy[atari] (>=0.2.1,<0.3.0)", "tensorboard (>=2.9.1)", "tqdm"]
+tests = ["black", "isort (>=5.0)", "mypy", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "pytype", "ruff"]
 
 [[package]]
 name = "supersuit"
 version = "3.4.0"
 description = "Wrappers for Gym and PettingZoo"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -4319,7 +3725,6 @@ tinyscaler = ">=1.0.4"
 name = "tabulate"
 version = "0.9.0"
 description = "Pretty-print tabular data"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -4334,7 +3739,6 @@ widechars = ["wcwidth"]
 name = "tenacity"
 version = "8.2.3"
 description = "Retry code until it succeeds"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4349,7 +3753,6 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"]
 name = "tensorboard"
 version = "2.11.2"
 description = "TensorBoard lets you watch Tensors Flow"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4375,7 +3778,6 @@ wheel = ">=0.26"
 name = "tensorboard-data-server"
 version = "0.6.1"
 description = "Fast data loading for TensorBoard"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4388,35 +3790,16 @@ files = [
 name = "tensorboard-plugin-wit"
 version = "1.8.1"
 description = "What-If Tool TensorBoard plugin."
-category = "main"
 optional = false
 python-versions = "*"
 files = [
     {file = "tensorboard_plugin_wit-1.8.1-py3-none-any.whl", hash = "sha256:ff26bdd583d155aa951ee3b152b3d0cffae8005dc697f72b44a8e8c2a77a8cbe"},
 ]
 
-[[package]]
-name = "tensorboardx"
-version = "2.6"
-description = "TensorBoardX lets you watch Tensors Flow without Tensorflow"
-category = "dev"
-optional = false
-python-versions = "*"
-files = [
-    {file = "tensorboardX-2.6-py2.py3-none-any.whl", hash = "sha256:24a7cd076488de1e9d15ef25371b8ebf90c4f8f622af2477c611198f03f4a606"},
-    {file = "tensorboardX-2.6.tar.gz", hash = "sha256:d4c036964dd2deb075a1909832b276daa383eab3f9db519ad90b99f5aea06b0c"},
-]
-
-[package.dependencies]
-numpy = "*"
-packaging = "*"
-protobuf = ">=3.8.0,<4"
-
 [[package]]
 name = "tensorstore"
 version = "0.1.28"
 description = "Read and write large, multi-dimensional arrays"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -4441,22 +3824,10 @@ files = [
 [package.dependencies]
 numpy = ">=1.16.0"
 
-[[package]]
-name = "termcolor"
-version = "1.1.0"
-description = "ANSII Color formatting for output in terminal."
-category = "dev"
-optional = false
-python-versions = "*"
-files = [
-    {file = "termcolor-1.1.0.tar.gz", hash = "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"},
-]
-
 [[package]]
 name = "threadpoolctl"
 version = "3.1.0"
 description = "threadpoolctl"
-category = "main"
 optional = true
 python-versions = ">=3.6"
 files = [
@@ -4468,7 +3839,6 @@ files = [
 name = "tinyscaler"
 version = "1.2.5"
 description = "A tiny, simple image scaler"
-category = "main"
 optional = true
 python-versions = ">=3.7, <3.11"
 files = [
@@ -4486,7 +3856,6 @@ numpy = "*"
 name = "tomli"
 version = "2.0.1"
 description = "A lil' TOML parser"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -4498,7 +3867,6 @@ files = [
 name = "toolz"
 version = "0.12.0"
 description = "List processing tools and functional utilities"
-category = "main"
 optional = true
 python-versions = ">=3.5"
 files = [
@@ -4510,7 +3878,6 @@ files = [
 name = "torch"
 version = "1.12.1"
 description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
-category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
@@ -4539,50 +3906,10 @@ files = [
 [package.dependencies]
 typing-extensions = "*"
 
-[[package]]
-name = "torchvision"
-version = "0.13.1"
-description = "image and video datasets and models for torch deep learning"
-category = "dev"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "torchvision-0.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:19286a733c69dcbd417b86793df807bd227db5786ed787c17297741a9b0d0fc7"},
-    {file = "torchvision-0.13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:08f592ea61836ebeceb5c97f4d7a813b9d7dc651bbf7ce4401563ccfae6a21fc"},
-    {file = "torchvision-0.13.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:ef5fe3ec1848123cd0ec74c07658192b3147dcd38e507308c790d5943e87b88c"},
-    {file = "torchvision-0.13.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:099874088df104d54d8008f2a28539ca0117b512daed8bf3c2bbfa2b7ccb187a"},
-    {file = "torchvision-0.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:8e4d02e4d8a203e0c09c10dfb478214c224d080d31efc0dbf36d9c4051f7f3c6"},
-    {file = "torchvision-0.13.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5e631241bee3661de64f83616656224af2e3512eb2580da7c08e08b8c965a8ac"},
-    {file = "torchvision-0.13.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:899eec0b9f3b99b96d6f85b9aa58c002db41c672437677b553015b9135b3be7e"},
-    {file = "torchvision-0.13.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:83e9e2457f23110fd53b0177e1bc621518d6ea2108f570e853b768ce36b7c679"},
-    {file = "torchvision-0.13.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7552e80fa222252b8b217a951c85e172a710ea4cad0ae0c06fbb67addece7871"},
-    {file = "torchvision-0.13.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f230a1a40ed70d51e463ce43df243ec520902f8725de2502e485efc5eea9d864"},
-    {file = "torchvision-0.13.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e9a563894f9fa40692e24d1aa58c3ef040450017cfed3598ff9637f404f3fe3b"},
-    {file = "torchvision-0.13.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7cb789ceefe6dcd0dc8eeda37bfc45efb7cf34770eac9533861d51ca508eb5b3"},
-    {file = "torchvision-0.13.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:87c137f343197769a51333076e66bfcd576301d2cd8614b06657187c71b06c4f"},
-    {file = "torchvision-0.13.1-cp38-cp38-win_amd64.whl", hash = "sha256:4d8bf321c4380854ef04613935fdd415dce29d1088a7ff99e06e113f0efe9203"},
-    {file = "torchvision-0.13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0298bae3b09ac361866088434008d82b99d6458fe8888c8df90720ef4b347d44"},
-    {file = "torchvision-0.13.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c5ed609c8bc88c575226400b2232e0309094477c82af38952e0373edef0003fd"},
-    {file = "torchvision-0.13.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:3567fb3def829229ec217c1e38f08c5128ff7fb65854cac17ebac358ff7aa309"},
-    {file = "torchvision-0.13.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b167934a5943242da7b1e59318f911d2d253feeca0d13ad5d832b58eed943401"},
-    {file = "torchvision-0.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:0e77706cc90462653620e336bb90daf03d7bf1b88c3a9a3037df8d111823a56e"},
-]
-
-[package.dependencies]
-numpy = "*"
-pillow = ">=5.3.0,<8.3.0 || >=8.4.0"
-requests = "*"
-torch = "1.12.1"
-typing-extensions = "*"
-
-[package.extras]
-scipy = ["scipy"]
-
 [[package]]
 name = "tqdm"
 version = "4.65.0"
 description = "Fast, Extensible Progress Meter"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4603,7 +3930,6 @@ telegram = ["requests"]
 name = "treevalue"
 version = "1.4.10"
 description = "A flexible, generalized tree-based data structure."
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -4657,7 +3983,6 @@ test = ["coverage (>=5)", "easydict (>=1.7,<2)", "flake8 (>=3.5,<4.0)", "hbutils
 name = "tueplots"
 version = "0.0.4"
 description = "Scientific plotting made easy"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -4676,7 +4001,6 @@ examples = ["jupyter"]
 name = "typeguard"
 version = "2.13.3"
 description = "Run-time type checker for Python"
-category = "main"
 optional = true
 python-versions = ">=3.5.3"
 files = [
@@ -4692,7 +4016,6 @@ test = ["mypy", "pytest", "typing-extensions"]
 name = "types-protobuf"
 version = "4.23.0.1"
 description = "Typing stubs for protobuf"
-category = "main"
 optional = true
 python-versions = "*"
 files = [
@@ -4704,7 +4027,6 @@ files = [
 name = "typing-extensions"
 version = "4.5.0"
 description = "Backported and Experimental Type Hints for Python 3.7+"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4712,11 +4034,31 @@ files = [
     {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"},
 ]
 
+[[package]]
+name = "tyro"
+version = "0.5.10"
+description = "Strongly typed, zero-effort CLI interfaces"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tyro-0.5.10-py3-none-any.whl", hash = "sha256:ebe0b71761dfaef4053d85201866874ec80d6d5df9b4ce27c93d09063bbc85f0"},
+    {file = "tyro-0.5.10.tar.gz", hash = "sha256:8d3cfecdf8a51151cc8c93c80677b91782c34cadcf3308838bb1152a81dfd074"},
+]
+
+[package.dependencies]
+colorama = {version = ">=0.4.0", markers = "platform_system == \"Windows\""}
+docstring-parser = ">=0.14.1"
+rich = ">=11.1.0"
+shtab = ">=1.5.6"
+typing-extensions = ">=4.3.0"
+
+[package.extras]
+dev = ["PyYAML (>=6.0)", "attrs (>=21.4.0)", "coverage[toml] (>=6.5.0)", "flax (>=0.6.9)", "frozendict (>=2.3.4)", "mypy (>=1.4.1)", "numpy (>=1.20.0)", "omegaconf (>=2.2.2)", "pydantic (>=2.3.0)", "pyright (>=1.1.264)", "pytest (>=7.1.2)", "pytest-cov (>=3.0.0)", "torch (>=1.10.0)"]
+
 [[package]]
 name = "urllib3"
 version = "1.26.15"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
-category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -4733,7 +4075,6 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 name = "virtualenv"
 version = "20.21.0"
 description = "Virtual Python Environment builder"
-category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4744,7 +4085,6 @@ files = [
 [package.dependencies]
 distlib = ">=0.3.6,<1"
 filelock = ">=3.4.1,<4"
-importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.8\""}
 platformdirs = ">=2.4,<4"
 
 [package.extras]
@@ -4755,7 +4095,6 @@ test = ["covdefaults (>=2.2.2)", "coverage (>=7.1)", "coverage-enable-subprocess
 name = "wandb"
 version = "0.13.11"
 description = "A CLI and library for interacting with the Weights and Biases API."
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4797,7 +4136,6 @@ sweeps = ["sweeps (>=0.2.0)"]
 name = "watchdog"
 version = "3.0.0"
 description = "Filesystem events monitoring"
-category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
@@ -4837,7 +4175,6 @@ watchmedo = ["PyYAML (>=3.10)"]
 name = "werkzeug"
 version = "2.2.3"
 description = "The comprehensive WSGI web application library."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4855,7 +4192,6 @@ watchdog = ["watchdog"]
 name = "wheel"
 version = "0.40.0"
 description = "A built-package format for Python"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4870,7 +4206,6 @@ test = ["pytest (>=6.0.0)"]
 name = "zipp"
 version = "3.15.0"
 description = "Backport of pathlib-compatible object wrapper for zip files"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4883,13 +4218,13 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker
 testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
 
 [extras]
-atari = ["AutoROM", "ale-py", "opencv-python"]
+atari = ["AutoROM", "ale-py", "opencv-python", "shimmy"]
 c51 = []
 c51-atari = ["AutoROM", "ale-py", "opencv-python"]
 c51-atari-jax = ["AutoROM", "ale-py", "flax", "jax", "jaxlib", "opencv-python"]
 c51-jax = ["flax", "jax", "jaxlib"]
 cloud = ["awscli", "boto3"]
-dm-control = ["mujoco", "shimmy"]
+dm-control = ["dm-control", "h5py", "mujoco", "shimmy"]
 docs = ["markdown-include", "mkdocs-material", "openrlbenchmark"]
 dqn = []
 dqn-atari = ["AutoROM", "ale-py", "opencv-python"]
@@ -4898,7 +4233,6 @@ dqn-jax = ["flax", "jax", "jaxlib"]
 envpool = ["envpool"]
 jax = ["flax", "jax", "jaxlib"]
 mujoco = ["imageio", "mujoco"]
-mujoco-py = ["free-mujoco-py"]
 optuna = ["optuna", "optuna-dashboard"]
 pettingzoo = ["PettingZoo", "SuperSuit", "multi-agent-ale-py"]
 plot = []
@@ -4910,5 +4244,5 @@ qdagger-dqn-atari-jax-impalacnn = ["AutoROM", "ale-py", "flax", "jax", "jaxlib",
 
 [metadata]
 lock-version = "2.0"
-python-versions = ">=3.7.1,<3.11"
-content-hash = "83763cefd7c948380a16349ea5ec80fd36816adace1f8101bc5a50fd686e5a81"
+python-versions = ">=3.8,<3.11"
+content-hash = "ce1dd6a428e94e30643d2fb0a3fd13f0132d176185a91f7685392d4ec0e7892b"
diff --git a/pyproject.toml b/pyproject.toml
index ef1652a6c..49c7fabca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "cleanrl"
-version = "1.1.0"
+version = "2.0.0b1"
 description = "High-quality single file implementation of Deep Reinforcement Learning algorithms with research-friendly features"
 authors = ["Costa Huang <costa.huang@outlook.com>"]
 packages = [
@@ -12,33 +12,34 @@ license="MIT"
 readme = "README.md"
 
 [tool.poetry.dependencies]
-python = ">=3.7.1,<3.11"
+python = ">=3.8,<3.11"
 tensorboard = "^2.10.0"
 wandb = "^0.13.11"
 gym = "0.23.1"
 torch = ">=1.12.1"
-stable-baselines3 = "1.2.0"
+stable-baselines3 = "2.0.0"
 gymnasium = ">=0.28.1"
 moviepy = "^1.0.3"
 pygame = "2.1.0"
 huggingface-hub = "^0.11.1"
 rich = "<12.0"
 tenacity = "^8.2.2"
+tyro = "^0.5.10"
+pyyaml = "^6.0.1"
 
-ale-py = {version = "0.7.4", optional = true}
-AutoROM = {extras = ["accept-rom-license"], version = "^0.4.2", optional = true}
+ale-py = {version = "0.8.1", optional = true}
+AutoROM = {extras = ["accept-rom-license"], version = "~0.4.2", optional = true}
 opencv-python = {version = "^4.6.0.66", optional = true}
 procgen = {version = "^0.10.7", optional = true}
 pytest = {version = "^7.1.3", optional = true}
 mujoco = {version = "<=2.3.3", optional = true}
 imageio = {version = "^2.14.1", optional = true}
-free-mujoco-py = {version = "^2.1.6", optional = true}
 mkdocs-material = {version = "^8.4.3", optional = true}
 markdown-include = {version = "^0.7.0", optional = true}
 openrlbenchmark = {version = "^0.1.1b4", optional = true}
-jax = {version = "^0.3.17", optional = true}
-jaxlib = {version = "^0.3.15", optional = true}
-flax = {version = "^0.6.0", optional = true}
+jax = {version = "0.4.8", optional = true}
+jaxlib = {version = "0.4.7", optional = true}
+flax = {version = "0.6.8", optional = true}
 optuna = {version = "^3.0.1", optional = true}
 optuna-dashboard = {version = "^0.7.2", optional = true}
 envpool = {version = "^0.6.4", optional = true}
@@ -46,38 +47,34 @@ PettingZoo = {version = "1.18.1", optional = true}
 SuperSuit = {version = "3.4.0", optional = true}
 multi-agent-ale-py = {version = "0.1.11", optional = true}
 boto3 = {version = "^1.24.70", optional = true}
-awscli = {version = "^1.25.71", optional = true}
-shimmy = {version = ">=1.0.0", extras = ["dm-control"], optional = true}
+awscli = {version = "^1.31.0", optional = true}
+shimmy = {version = ">=1.1.0", optional = true}
+dm-control = {version = ">=1.0.10", optional = true}
+h5py = {version = ">=3.7.0", optional = true}
+optax = {version = "0.1.4", optional = true}
+chex = {version = "0.1.5", optional = true}
+numpy = ">=1.21.6"
 
 [tool.poetry.group.dev.dependencies]
 pre-commit = "^2.20.0"
 
-
-[tool.poetry.group.isaacgym]
-optional = true
-[tool.poetry.group.isaacgym.dependencies]
-isaacgymenvs = {git = "https://github.com/vwxyzjn/IsaacGymEnvs.git", rev = "poetry", python = ">=3.7.1,<3.10"}
-isaacgym = {path = "cleanrl/ppo_continuous_action_isaacgym/isaacgym", develop = true}
-
-
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.poetry.extras]
-atari = ["ale-py", "AutoROM", "opencv-python"]
+atari = ["ale-py", "AutoROM", "opencv-python", "shimmy"]
 procgen = ["procgen"]
 plot = ["pandas", "seaborn"]
 pytest = ["pytest"]
 mujoco = ["mujoco", "imageio"]
-mujoco_py = ["free-mujoco-py"]
 jax = ["jax", "jaxlib", "flax"]
 docs = ["mkdocs-material", "markdown-include", "openrlbenchmark"]
 envpool = ["envpool"]
 optuna = ["optuna", "optuna-dashboard"]
 pettingzoo = ["PettingZoo", "SuperSuit", "multi-agent-ale-py"]
 cloud = ["boto3", "awscli"]
-dm_control = ["shimmy", "mujoco"]
+dm_control = ["shimmy", "mujoco", "dm-control", "h5py"]
 
 # dependencies for algorithm variant (useful when you want to run a specific algorithm)
 dqn = []
diff --git a/requirements/requirements-atari.txt b/requirements/requirements-atari.txt
index c4a8008f8..a3a54a949 100644
--- a/requirements/requirements-atari.txt
+++ b/requirements/requirements-atari.txt
@@ -1,78 +1,82 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-ale-py==0.7.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-autorom-accept-rom-license==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-autorom[accept-rom-license]==0.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
-importlib-resources==5.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-opencv-python==4.7.0.72 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+ale-py==0.8.1 ; python_version >= "3.8" and python_version < "3.11"
+appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+autorom-accept-rom-license==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+autorom[accept-rom-license]==0.4.2 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11"
+commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11"
+docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11"
+docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11"
+farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11"
+gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11"
+gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11"
+google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11"
+grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11"
+gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11"
+gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10"
+importlib-resources==5.12.0 ; python_version >= "3.8" and python_version < "3.11"
+jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11"
+moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11"
+oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
+opencv-python==4.7.0.72 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
+pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
+pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
+proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
+psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11"
+requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11"
+sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11"
+setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11"
+shimmy==1.1.0 ; python_version >= "3.8" and python_version < "3.11"
+shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
+smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
+stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
+torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11"
+wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11"
+werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10"
diff --git a/requirements/requirements-cloud.txt b/requirements/requirements-cloud.txt
index 02e73896f..4c8e0292d 100644
--- a/requirements/requirements-cloud.txt
+++ b/requirements/requirements-cloud.txt
@@ -1,79 +1,82 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-awscli==1.27.132 ; python_full_version >= "3.7.1" and python_version < "3.11"
-boto3==1.26.132 ; python_full_version >= "3.7.1" and python_version < "3.11"
-botocore==1.29.132 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docutils==0.16 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-jmespath==1.0.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-s3transfer==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+awscli==1.31.0 ; python_version >= "3.8" and python_version < "3.11"
+boto3==1.33.0 ; python_version >= "3.8" and python_version < "3.11"
+botocore==1.33.0 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11"
+commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11"
+docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11"
+docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11"
+docutils==0.16 ; python_version >= "3.8" and python_version < "3.11"
+farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11"
+gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11"
+gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11"
+google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11"
+grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11"
+gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11"
+gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10"
+jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+jmespath==1.0.1 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11"
+moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11"
+oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
+pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
+pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
+proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
+psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11"
+requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11"
+s3transfer==0.8.0 ; python_version >= "3.8" and python_version < "3.11"
+sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11"
+setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11"
+shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
+smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
+stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
+torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version < "3.11" and python_version >= "3.8"
+wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11"
+werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10"
diff --git a/requirements/requirements-dm_control.txt b/requirements/requirements-dm_control.txt
index 6c1974702..4653ae249 100644
--- a/requirements/requirements-dm_control.txt
+++ b/requirements/requirements-dm_control.txt
@@ -1,84 +1,87 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-dm-control==1.0.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-dm-env==1.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-dm-tree==0.1.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-glfw==1.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-h5py==3.8.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-labmaze==1.0.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-lxml==4.9.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-mujoco==2.3.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyopengl==3.1.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-scipy==1.7.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-shimmy[dm-control]==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11"
+commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11"
+dm-control==1.0.11 ; python_version >= "3.8" and python_version < "3.11"
+dm-env==1.6 ; python_version >= "3.8" and python_version < "3.11"
+dm-tree==0.1.8 ; python_version >= "3.8" and python_version < "3.11"
+docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11"
+docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11"
+farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11"
+gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11"
+gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11"
+glfw==1.12.0 ; python_version >= "3.8" and python_version < "3.11"
+google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11"
+grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11"
+gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11"
+gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11"
+h5py==3.8.0 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10"
+jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+labmaze==1.0.6 ; python_version >= "3.8" and python_version < "3.11"
+lxml==4.9.3 ; python_version >= "3.8" and python_version < "3.11"
+markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11"
+moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11"
+mujoco==2.3.3 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11"
+oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
+pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
+pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
+proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
+psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11"
+pyopengl==3.1.6 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11"
+requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11"
+scipy==1.10.1 ; python_version >= "3.8" and python_version < "3.11"
+sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11"
+setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11"
+shimmy==1.1.0 ; python_version >= "3.8" and python_version < "3.11"
+shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
+smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
+stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
+torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11"
+wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11"
+werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10"
diff --git a/requirements/requirements-docs.txt b/requirements/requirements-docs.txt
index 2a7fc6941..fe65f4879 100644
--- a/requirements/requirements-docs.txt
+++ b/requirements/requirements-docs.txt
@@ -1,94 +1,97 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-dataclasses==0.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-dill==0.3.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-expt==0.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-ghp-import==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-jinja2==3.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown-include==0.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-mergedeep==1.3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-mkdocs-material-extensions==1.1.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-mkdocs-material==8.5.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-mkdocs==1.4.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-multiprocess==0.70.14 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-openrlbenchmark==0.1.1b4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pip==22.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pymdown-extensions==9.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml-env-tag==0.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-scipy==1.7.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-seaborn==0.12.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tabulate==0.9.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tueplots==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typeguard==2.13.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-watchdog==3.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11"
+commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+dataclasses==0.6 ; python_version >= "3.8" and python_version < "3.11"
+decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11"
+dill==0.3.6 ; python_version >= "3.8" and python_version < "3.11"
+docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11"
+docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11"
+expt==0.4.1 ; python_version >= "3.8" and python_version < "3.11"
+farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11"
+ghp-import==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11"
+gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11"
+google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11"
+grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11"
+gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11"
+gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.11"
+jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+jinja2==3.1.2 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+markdown-include==0.7.2 ; python_version >= "3.8" and python_version < "3.11"
+markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11"
+mergedeep==1.3.4 ; python_version >= "3.8" and python_version < "3.11"
+mkdocs-material-extensions==1.1.1 ; python_version >= "3.8" and python_version < "3.11"
+mkdocs-material==8.5.11 ; python_version >= "3.8" and python_version < "3.11"
+mkdocs==1.4.3 ; python_version >= "3.8" and python_version < "3.11"
+moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11"
+multiprocess==0.70.14 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11"
+oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
+openrlbenchmark==0.1.1b4 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
+pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
+pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pip==22.3.1 ; python_version >= "3.8" and python_version < "3.11"
+proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
+psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11"
+pymdown-extensions==9.11 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml-env-tag==0.1 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11"
+requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11"
+scipy==1.10.1 ; python_version >= "3.8" and python_version < "3.11"
+seaborn==0.12.2 ; python_version >= "3.8" and python_version < "3.11"
+sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11"
+setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11"
+shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
+smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
+stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+tabulate==0.9.0 ; python_version >= "3.8" and python_version < "3.11"
+tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
+torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+tueplots==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+typeguard==2.13.3 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11"
+wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11"
+watchdog==3.0.0 ; python_version >= "3.8" and python_version < "3.11"
+werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.11"
diff --git a/requirements/requirements-envpool.txt b/requirements/requirements-envpool.txt
index 03e3a5909..1d491f4dd 100644
--- a/requirements/requirements-envpool.txt
+++ b/requirements/requirements-envpool.txt
@@ -1,85 +1,88 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-bitmath==1.3.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-chardet==4.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-dill==0.3.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-dm-env==1.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-dm-tree==0.1.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-enum-tools==0.9.0.post1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-envpool==0.6.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-graphviz==0.20.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-hbutils==0.8.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytimeparse==1.1.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-treevalue==1.4.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-types-protobuf==4.23.0.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+bitmath==1.3.3.1 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11"
+chardet==4.0.0 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11"
+commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11"
+dill==0.3.6 ; python_version >= "3.8" and python_version < "3.11"
+dm-env==1.6 ; python_version >= "3.8" and python_version < "3.11"
+dm-tree==0.1.8 ; python_version >= "3.8" and python_version < "3.11"
+docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11"
+docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11"
+enum-tools==0.9.0.post1 ; python_version >= "3.8" and python_version < "3.11"
+envpool==0.6.6 ; python_version >= "3.8" and python_version < "3.11"
+farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11"
+gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11"
+gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11"
+google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11"
+graphviz==0.20.1 ; python_version >= "3.8" and python_version < "3.11"
+grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11"
+gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11"
+gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11"
+hbutils==0.8.6 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10"
+jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11"
+moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11"
+oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
+pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
+pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
+proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
+psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+pytimeparse==1.1.8 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11"
+requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11"
+sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11"
+setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11"
+shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
+smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
+stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
+torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+treevalue==1.4.10 ; python_version >= "3.8" and python_version < "3.11"
+types-protobuf==4.23.0.1 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11"
+wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11"
+werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10"
diff --git a/requirements/requirements-jax.txt b/requirements/requirements-jax.txt
index 30f4a1223..12a3d0861 100644
--- a/requirements/requirements-jax.txt
+++ b/requirements/requirements-jax.txt
@@ -1,93 +1,97 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cached-property==1.5.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-chex==0.1.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-dm-tree==0.1.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-etils==0.9.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-exceptiongroup==1.1.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-flax==0.6.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
-importlib-resources==5.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-iniconfig==2.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-jax==0.3.25 ; python_full_version >= "3.7.1" and python_version < "3.11"
-jaxlib==0.3.25 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-msgpack==1.0.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-opt-einsum==3.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-optax==0.1.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-orbax==0.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pluggy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytest==7.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-scipy==1.7.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorstore==0.1.28 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tomli==2.0.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-toolz==0.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+cached-property==1.5.2 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+chex==0.1.5 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11"
+commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11"
+dm-tree==0.1.8 ; python_version >= "3.8" and python_version < "3.11"
+docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11"
+docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11"
+etils==0.9.0 ; python_version >= "3.8" and python_version < "3.11"
+exceptiongroup==1.1.1 ; python_version >= "3.8" and python_version < "3.11"
+farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11"
+flax==0.6.8 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11"
+gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11"
+gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11"
+google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11"
+grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11"
+gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11"
+gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10"
+importlib-resources==5.12.0 ; python_version >= "3.8" and python_version < "3.11"
+iniconfig==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+jax==0.4.8 ; python_version >= "3.8" and python_version < "3.11"
+jaxlib==0.4.7 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11"
+ml-dtypes==0.2.0 ; python_version >= "3.8" and python_version < "3.11"
+moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11"
+msgpack==1.0.5 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.4 ; python_version < "3.11" and python_version >= "3.8"
+oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
+opt-einsum==3.3.0 ; python_version >= "3.8" and python_version < "3.11"
+optax==0.1.4 ; python_version >= "3.8" and python_version < "3.11"
+orbax==0.1.0 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
+pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
+pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pluggy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
+psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+pytest==7.3.1 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11"
+requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11"
+scipy==1.10.1 ; python_version >= "3.8" and python_version < "3.11"
+sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11"
+setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11"
+shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
+smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
+stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
+tensorstore==0.1.28 ; python_version >= "3.8" and python_version < "3.11"
+tomli==2.0.1 ; python_version >= "3.8" and python_version < "3.11"
+toolz==0.12.0 ; python_version >= "3.8" and python_version < "3.11"
+torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11"
+wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11"
+werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10"
diff --git a/requirements/requirements-mujoco.txt b/requirements/requirements-mujoco.txt
index e14987e34..ac8aa1a23 100644
--- a/requirements/requirements-mujoco.txt
+++ b/requirements/requirements-mujoco.txt
@@ -1,76 +1,79 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-glfw==1.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-mujoco==2.3.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyopengl==3.1.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11"
+commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11"
+docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11"
+docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11"
+farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11"
+gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11"
+gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11"
+glfw==1.12.0 ; python_version >= "3.8" and python_version < "3.11"
+google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11"
+grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11"
+gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11"
+gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10"
+jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11"
+moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11"
+mujoco==2.3.3 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11"
+oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
+pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
+pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
+proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
+psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11"
+pyopengl==3.1.6 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11"
+requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11"
+sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11"
+setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11"
+shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
+smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
+stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
+torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11"
+wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11"
+werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10"
diff --git a/requirements/requirements-mujoco_py.txt b/requirements/requirements-mujoco_py.txt
deleted file mode 100644
index 60b2edbef..000000000
--- a/requirements/requirements-mujoco_py.txt
+++ /dev/null
@@ -1,80 +0,0 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cffi==1.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cython==0.29.34 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fasteners==0.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-free-mujoco-py==2.1.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-glfw==1.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-monotonic==1.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pycparser==2.21 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
diff --git a/requirements/requirements-optuna.txt b/requirements/requirements-optuna.txt
index 5e0840344..3b9f3a2e4 100644
--- a/requirements/requirements-optuna.txt
+++ b/requirements/requirements-optuna.txt
@@ -1,87 +1,90 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-alembic==1.10.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-bottle==0.12.25 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cmaes==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorlog==6.7.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-greenlet==2.0.2 ; python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "aarch64" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "ppc64le" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "x86_64" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "amd64" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "AMD64" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "win32" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "WIN32"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
-importlib-resources==5.12.0 ; python_full_version >= "3.7.1" and python_version < "3.9"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-joblib==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-mako==1.2.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-optuna-dashboard==0.7.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-optuna==3.1.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-scikit-learn==1.0.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-scipy==1.7.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sqlalchemy==2.0.13 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-threadpoolctl==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+alembic==1.10.4 ; python_version >= "3.8" and python_version < "3.11"
+appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+bottle==0.12.25 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+cmaes==0.10.0 ; python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11"
+colorlog==6.7.0 ; python_version >= "3.8" and python_version < "3.11"
+commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11"
+docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11"
+docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11"
+farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11"
+gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11"
+gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11"
+google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11"
+greenlet==2.0.2 ; python_version >= "3.8" and python_version < "3.11" and platform_machine == "aarch64" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "ppc64le" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "x86_64" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "amd64" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "AMD64" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "win32" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "WIN32"
+grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11"
+gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11"
+gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10"
+importlib-resources==5.12.0 ; python_version >= "3.8" and python_version < "3.9"
+jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+joblib==1.2.0 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+mako==1.2.4 ; python_version >= "3.8" and python_version < "3.11"
+markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11"
+moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11"
+oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
+optuna-dashboard==0.7.3 ; python_version >= "3.8" and python_version < "3.11"
+optuna==3.3.0 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
+pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
+pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
+proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
+psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11"
+requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11"
+scikit-learn==1.0.2 ; python_version >= "3.8" and python_version < "3.11"
+scipy==1.10.1 ; python_version >= "3.8" and python_version < "3.11"
+sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11"
+setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11"
+shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
+smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
+sqlalchemy==2.0.13 ; python_version >= "3.8" and python_version < "3.11"
+stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
+threadpoolctl==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11"
+wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11"
+werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10"
diff --git a/requirements/requirements-pettingzoo.txt b/requirements/requirements-pettingzoo.txt
index 1127adcd2..461c6023f 100644
--- a/requirements/requirements-pettingzoo.txt
+++ b/requirements/requirements-pettingzoo.txt
@@ -1,77 +1,80 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-multi-agent-ale-py==0.1.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pettingzoo==1.18.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-supersuit==3.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tinyscaler==1.2.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11"
+commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11"
+docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11"
+docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11"
+farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11"
+gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11"
+gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11"
+google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11"
+grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11"
+gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11"
+gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10"
+jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11"
+moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11"
+multi-agent-ale-py==0.1.11 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11"
+oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
+pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
+pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+pettingzoo==1.18.1 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
+proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
+psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11"
+requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11"
+sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11"
+setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11"
+shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
+smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
+stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+supersuit==3.4.0 ; python_version >= "3.8" and python_version < "3.11"
+tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
+tinyscaler==1.2.5 ; python_version >= "3.8" and python_version < "3.11"
+torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11"
+wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11"
+werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10"
diff --git a/requirements/requirements-procgen.txt b/requirements/requirements-procgen.txt
index da2a0dc91..f62fa2026 100644
--- a/requirements/requirements-procgen.txt
+++ b/requirements/requirements-procgen.txt
@@ -1,80 +1,83 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cffi==1.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-glcontext==2.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-glfw==1.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym3==0.3.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moderngl==5.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-procgen==0.10.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pycparser==2.21 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11"
+cffi==1.15.1 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11"
+commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11"
+docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11"
+docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11"
+farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11"
+gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11"
+gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11"
+glcontext==2.3.7 ; python_version >= "3.8" and python_version < "3.11"
+glfw==1.12.0 ; python_version >= "3.8" and python_version < "3.11"
+google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11"
+grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11"
+gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11"
+gym3==0.3.3 ; python_version >= "3.8" and python_version < "3.11"
+gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10"
+jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11"
+moderngl==5.8.2 ; python_version >= "3.8" and python_version < "3.11"
+moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11"
+oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
+pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
+pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
+procgen==0.10.7 ; python_version >= "3.8" and python_version < "3.11"
+proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
+psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pycparser==2.21 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11"
+requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11"
+sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11"
+setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11"
+shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
+smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
+stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
+torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11"
+wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11"
+werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10"
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 94b0b02bd..5cdc73d10 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -1,73 +1,76 @@
-absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
-jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11"
-markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11"
-oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11"
-protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1"
-psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11"
-python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11"
-torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11"
-tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11"
-werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11"
-wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11"
-zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10"
+absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11"
+appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11"
+certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11"
+charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11"
+click==8.1.3 ; python_version >= "3.8" and python_version < "3.11"
+cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11"
+colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11"
+commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11"
+cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11"
+decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11"
+docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11"
+docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11"
+farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11"
+filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11"
+fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11"
+gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11"
+gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11"
+google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11"
+google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11"
+grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11"
+gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11"
+gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11"
+gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11"
+huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11"
+idna==3.4 ; python_version >= "3.8" and python_version < "3.11"
+imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11"
+importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10"
+jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11"
+kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11"
+markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11"
+markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11"
+matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11"
+moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11"
+numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11"
+oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
+packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
+pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
+pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
+pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
+proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
+protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
+psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11"
+pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11"
+pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11"
+pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11"
+pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11"
+python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11"
+pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11"
+pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11"
+requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11"
+requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11"
+rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11"
+rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11"
+sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11"
+setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11"
+setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11"
+shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
+six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
+smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
+stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
+tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
+tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
+torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
+tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
+typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"
+tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11"
+urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11"
+wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11"
+werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11"
+wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11"
+zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10"
diff --git a/tests/test_atari_gymnasium.py b/tests/test_atari_gymnasium.py
index e3eb6f966..06f95a838 100644
--- a/tests/test_atari_gymnasium.py
+++ b/tests/test_atari_gymnasium.py
@@ -11,7 +11,7 @@ def test_dqn():
 
 def test_dqn_eval():
     subprocess.run(
-        "python cleanrl/dqn_atari.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4",
+        "python cleanrl/dqn_atari.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4",
         shell=True,
         check=True,
     )
@@ -27,7 +27,7 @@ def test_qdagger_dqn_atari_impalacnn():
 
 def test_qdagger_dqn_atari_impalacnn_eval():
     subprocess.run(
-        "python cleanrl/qdagger_dqn_atari_impalacnn.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4 --teacher-steps 16 --offline-steps 16 --teacher-eval-episodes 1",
+        "python cleanrl/qdagger_dqn_atari_impalacnn.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4 --teacher-steps 16 --offline-steps 16 --teacher-eval-episodes 1",
         shell=True,
         check=True,
     )
@@ -43,7 +43,7 @@ def test_c51_atari():
 
 def test_c51_atari_eval():
     subprocess.run(
-        "python cleanrl/c51_atari.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4",
+        "python cleanrl/c51_atari.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4",
         shell=True,
         check=True,
     )
diff --git a/tests/test_atari_jax_gymnasium.py b/tests/test_atari_jax_gymnasium.py
index a9e91a781..aa51ac0cb 100644
--- a/tests/test_atari_jax_gymnasium.py
+++ b/tests/test_atari_jax_gymnasium.py
@@ -11,7 +11,7 @@ def test_dqn_jax():
 
 def test_dqn_jax_eval():
     subprocess.run(
-        "python cleanrl/dqn_atari_jax.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4",
+        "python cleanrl/dqn_atari_jax.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4",
         shell=True,
         check=True,
     )
@@ -27,7 +27,7 @@ def test_qdagger_dqn_atari_jax_impalacnn():
 
 def test_qdagger_dqn_atari_jax_impalacnn_eval():
     subprocess.run(
-        "python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4 --teacher-steps 16 --offline-steps 16 --teacher-eval-episodes 1",
+        "python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4 --teacher-steps 16 --offline-steps 16 --teacher-eval-episodes 1",
         shell=True,
         check=True,
     )
@@ -43,7 +43,7 @@ def test_c51_atari_jax():
 
 def test_c51_atari_jax_eval():
     subprocess.run(
-        "python cleanrl/c51_atari_jax.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4",
+        "python cleanrl/c51_atari_jax.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4",
         shell=True,
         check=True,
     )
diff --git a/tests/test_classic_control_gymnasium.py b/tests/test_classic_control_gymnasium.py
index 438143af7..ff639ea0d 100644
--- a/tests/test_classic_control_gymnasium.py
+++ b/tests/test_classic_control_gymnasium.py
@@ -19,7 +19,7 @@ def test_c51():
 
 def test_c51_eval():
     subprocess.run(
-        "python cleanrl/c51.py --save-model True --learning-starts 200 --total-timesteps 205",
+        "python cleanrl/c51.py --save-model --learning-starts 200 --total-timesteps 205",
         shell=True,
         check=True,
     )
diff --git a/tests/test_classic_control_jax_gymnasium.py b/tests/test_classic_control_jax_gymnasium.py
index e413e3588..638bb2215 100644
--- a/tests/test_classic_control_jax_gymnasium.py
+++ b/tests/test_classic_control_jax_gymnasium.py
@@ -19,7 +19,7 @@ def test_c51_jax():
 
 def test_c51_jax_eval():
     subprocess.run(
-        "python cleanrl/c51_jax.py --save-model True --learning-starts 200 --total-timesteps 205",
+        "python cleanrl/c51_jax.py --save-model --learning-starts 200 --total-timesteps 205",
         shell=True,
         check=True,
     )
diff --git a/tests/test_envpool.py b/tests/test_envpool.py
index d16325ea0..cbf90e230 100644
--- a/tests/test_envpool.py
+++ b/tests/test_envpool.py
@@ -35,7 +35,7 @@ def test_ppo_atari_envpool_xla_jax_scan():
 
 def test_ppo_atari_envpool_xla_jax_scan_eval():
     subprocess.run(
-        "python cleanrl/ppo_atari_envpool_xla_jax_scan.py --save-model True --num-envs 8 --num-steps 6 --update-epochs 1 --num-minibatches 1 --total-timesteps 256",
+        "python cleanrl/ppo_atari_envpool_xla_jax_scan.py --save-model --num-envs 8 --num-steps 6 --update-epochs 1 --num-minibatches 1 --total-timesteps 256",
         shell=True,
         check=True,
     )
diff --git a/tests/test_mujoco.py b/tests/test_mujoco.py
index bf0b5204b..77e91540f 100644
--- a/tests/test_mujoco.py
+++ b/tests/test_mujoco.py
@@ -57,12 +57,12 @@ def test_mujoco_eval():
     Test mujoco_eval
     """
     subprocess.run(
-        "python cleanrl/ddpg_continuous_action.py --save-model True --env-id Hopper-v4 --learning-starts 100 --batch-size 32 --total-timesteps 105",
+        "python cleanrl/ddpg_continuous_action.py --save-model --env-id Hopper-v4 --learning-starts 100 --batch-size 32 --total-timesteps 105",
         shell=True,
         check=True,
     )
     subprocess.run(
-        "python cleanrl/ddpg_continuous_action_jax.py --save-model True --env-id Hopper-v4 --learning-starts 100 --batch-size 32 --total-timesteps 105",
+        "python cleanrl/ddpg_continuous_action_jax.py --save-model --env-id Hopper-v4 --learning-starts 100 --batch-size 32 --total-timesteps 105",
         shell=True,
         check=True,
     )
diff --git a/tests/test_mujoco_py.py b/tests/test_mujoco_py.py
deleted file mode 100644
index f97654f8f..000000000
--- a/tests/test_mujoco_py.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import subprocess
-
-
-def test_mujoco_py():
-    """
-    Test mujoco_py
-    """
-    subprocess.run(
-        "python cleanrl/ddpg_continuous_action.py --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105",
-        shell=True,
-        check=True,
-    )
-    subprocess.run(
-        "python cleanrl/ddpg_continuous_action_jax.py --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105",
-        shell=True,
-        check=True,
-    )
-    subprocess.run(
-        "python cleanrl/td3_continuous_action_jax.py --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105",
-        shell=True,
-        check=True,
-    )
-    subprocess.run(
-        "python cleanrl/td3_continuous_action.py --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105",
-        shell=True,
-        check=True,
-    )
-    subprocess.run(
-        "python cleanrl/ppo_continuous_action.py --env-id Hopper-v2 --num-envs 1 --num-steps 64 --total-timesteps 256",
-        shell=True,
-        check=True,
-    )
-    subprocess.run(
-        "python cleanrl/sac_continuous_action.py --env-id Hopper-v2 --batch-size 128 --total-timesteps 135",
-        shell=True,
-        check=True,
-    )
-
-
-def test_mujoco_py_eval():
-    """
-    Test mujoco_py_eval
-    """
-    subprocess.run(
-        "python cleanrl/ddpg_continuous_action.py --save-model True --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105",
-        shell=True,
-        check=True,
-    )
-    subprocess.run(
-        "python cleanrl/ddpg_continuous_action_jax.py --save-model True --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105",
-        shell=True,
-        check=True,
-    )
diff --git a/tests/test_tuner.py b/tests/test_tuner.py
index c4c0d1d32..d6f1933e0 100644
--- a/tests/test_tuner.py
+++ b/tests/test_tuner.py
@@ -14,12 +14,12 @@ def test_tuner():
             "Acrobot-v1": [-500, 0],
         },
         params_fn=lambda trial: {
-            "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003),
+            "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True),
             "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]),
             "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4]),
             "num-steps": trial.suggest_categorical("num-steps", [1200]),
-            "vf-coef": trial.suggest_uniform("vf-coef", 0, 5),
-            "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5),
+            "vf-coef": trial.suggest_float("vf-coef", 0, 5),
+            "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5),
             "total-timesteps": 1200,
             "num-envs": 1,
         },
diff --git a/tuner_example.py b/tuner_example.py
index 9e01a6048..5db4b2f9e 100644
--- a/tuner_example.py
+++ b/tuner_example.py
@@ -13,12 +13,12 @@
         "Acrobot-v1": [-500, 0],
     },
     params_fn=lambda trial: {
-        "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003),
+        "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True),
         "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]),
         "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]),
         "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]),
-        "vf-coef": trial.suggest_uniform("vf-coef", 0, 5),
-        "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5),
+        "vf-coef": trial.suggest_float("vf-coef", 0, 5),
+        "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5),
         "total-timesteps": 100000,
         "num-envs": 16,
     },