diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index ff239510f..52a0aa5b4 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -22,7 +22,7 @@ If you need to run benchmark experiments for a performance-impacting changes: - [ ] I have contacted @vwxyzjn to obtain access to the [openrlbenchmark W&B team](https://wandb.ai/openrlbenchmark). -- [ ] I have used the [benchmark utility](/get-started/benchmark-utility/) to submit the tracked experiments to the [openrlbenchmark/cleanrl](https://wandb.ai/openrlbenchmark/cleanrl) W&B project, optionally with `--capture-video`. +- [ ] I have used the [benchmark utility](/get-started/benchmark-utility/) to submit the tracked experiments to the [openrlbenchmark/cleanrl](https://wandb.ai/openrlbenchmark/cleanrl) W&B project, optionally with `--capture_video`. - [ ] I have performed RLops with `python -m openrlbenchmark.rlops`. - For new feature or bug fix: - [ ] I have used the RLops utility to understand the performance impact of the changes and confirmed there is no regression. diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index ae4bc1540..ebdd57e4c 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,10 +1,5 @@ name: tests on: - push: - paths-ignore: - - '**/README.md' - - 'docs/**/*' - - 'cloud/**/*' pull_request: paths-ignore: - '**/README.md' @@ -15,8 +10,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8] - poetry-version: [1.3.1] + python-version: ["3.8", "3.9", "3.10"] + poetry-version: ["1.7"] os: [ubuntu-22.04, macos-latest, windows-latest] runs-on: ${{ matrix.os }} steps: @@ -58,8 +53,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8] - poetry-version: [1.3.1] + python-version: ["3.8", "3.9", "3.10"] + poetry-version: ["1.7"] os: [ubuntu-22.04, macos-latest, windows-latest] runs-on: ${{ matrix.os }} steps: @@ -94,8 +89,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8] - poetry-version: [1.3.1] + python-version: ["3.8", "3.9", "3.10"] + poetry-version: ["1.7"] os: [ubuntu-22.04, macos-latest, windows-latest] runs-on: ${{ matrix.os }} steps: @@ -120,8 +115,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8] - poetry-version: [1.3.1] + python-version: ["3.8", "3.9", "3.10"] + poetry-version: ["1.7"] os: [ubuntu-22.04] runs-on: ${{ matrix.os }} steps: @@ -180,8 +175,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8] - poetry-version: [1.3.1] + python-version: ["3.8", "3.9", "3.10"] + poetry-version: ["1.7"] os: [ubuntu-22.04] runs-on: ${{ matrix.os }} steps: @@ -194,29 +189,12 @@ jobs: with: poetry-version: ${{ matrix.poetry-version }} - # mujoco_py tests - - name: Install dependencies - run: poetry install -E "pytest mujoco_py mujoco jax" - - name: Run gymnasium migration dependencies - run: poetry run pip install "stable_baselines3==2.0.0a1" - - name: Downgrade setuptools - run: poetry run pip install setuptools==59.5.0 - - name: install mujoco_py dependencies - run: | - sudo apt-get update && sudo apt-get -y install wget unzip software-properties-common \ - libgl1-mesa-dev \ - libgl1-mesa-glx \ - libglew-dev \ - libosmesa6-dev patchelf - - name: Run mujoco_py tests - run: poetry run pytest tests/test_mujoco_py.py - test-envpool-envs: strategy: fail-fast: false matrix: - python-version: [3.8] - poetry-version: [1.3.1] + python-version: ["3.8", "3.9", "3.10"] + poetry-version: ["1.7"] os: [ubuntu-22.04] runs-on: ${{ matrix.os }} steps: @@ -241,8 +219,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8] - poetry-version: [1.3.1] + python-version: ["3.8", "3.9", "3.10"] + poetry-version: ["1.7"] os: [ubuntu-22.04] runs-on: ${{ matrix.os }} steps: @@ -267,8 +245,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8] - poetry-version: [1.3.1] + python-version: ["3.8", "3.9", "3.10"] + poetry-version: ["1.7"] os: [ubuntu-22.04] runs-on: ${{ matrix.os }} steps: diff --git a/.github/workflows/utils_test.yaml b/.github/workflows/utils_test.yaml index 8b1929503..cd668166f 100644 --- a/.github/workflows/utils_test.yaml +++ b/.github/workflows/utils_test.yaml @@ -15,8 +15,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8] - poetry-version: [1.3.1] + python-version: ["3.8", "3.9", "3.10"] + poetry-version: ["1.7"] os: [ubuntu-22.04] runs-on: ${{ matrix.os }} steps: diff --git a/.gitignore b/.gitignore index 4784f1086..1d4cfa0e4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +slurm +.aim runs balance_bot.xml cleanrl/ppo_continuous_action_isaacgym/isaacgym/examples diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ccb3fc71a..516cd23bc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,10 +56,6 @@ repos: name: poetry-export requirements-dm_control.txt args: ["--without-hashes", "-o", "requirements/requirements-dm_control.txt", "-E", "dm_control"] stages: [manual] - - id: poetry-export - name: poetry-export requirements-mujoco_py.txt - args: ["--without-hashes", "-o", "requirements/requirements-mujoco_py.txt", "-E", "mujoco_py"] - stages: [manual] - id: poetry-export name: poetry-export requirements-procgen.txt args: ["--without-hashes", "-o", "requirements/requirements-procgen.txt", "-E", "procgen"] diff --git a/README.md b/README.md index 5e645ab46..790ad9933 100644 --- a/README.md +++ b/README.md @@ -191,3 +191,8 @@ If you use CleanRL in your work, please cite our technical [paper](https://www.j url = {http://jmlr.org/papers/v23/21-1342.html} } ``` + + +## Acknowledgement + +We thank [Hugging Face](https://huggingface.co/)'s cluster for providing GPU computational resources to this project. diff --git a/benchmark/c51.sh b/benchmark/c51.sh index fb46bb6b4..6aba77810 100644 --- a/benchmark/c51.sh +++ b/benchmark/c51.sh @@ -1,29 +1,29 @@ poetry install OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \ - --command "poetry run python cleanrl/c51.py --cuda False --track --capture-video" \ + --command "poetry run python cleanrl/c51.py --no_cuda --track --capture_video" \ --num-seeds 3 \ --workers 9 poetry install -E atari OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --command "poetry run python cleanrl/c51_atari.py --track --capture-video" \ + --command "poetry run python cleanrl/c51_atari.py --track --capture_video" \ --num-seeds 3 \ --workers 1 poetry install -E "jax" -poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html CUDA_VISIBLE_DEVICES=-1 xvfb-run -a python -m cleanrl_utils.benchmark \ --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \ - --command "poetry run python cleanrl/c51_jax.py --track --capture-video" \ + --command "poetry run python cleanrl/c51_jax.py --track --capture_video" \ --num-seeds 3 \ --workers 1 poetry install -E "atari jax" -poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html xvfb-run -a python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --command "poetry run python cleanrl/c51_atari_jax.py --track --capture-video" \ + --command "poetry run python cleanrl/c51_atari_jax.py --track --capture_video" \ --num-seeds 3 \ --workers 1 diff --git a/benchmark/cleanrl_1gpu.slurm_template b/benchmark/cleanrl_1gpu.slurm_template new file mode 100644 index 000000000..b7c76c297 --- /dev/null +++ b/benchmark/cleanrl_1gpu.slurm_template @@ -0,0 +1,21 @@ +#!/bin/bash +#SBATCH --job-name=low-priority +#SBATCH --partition=production-cluster +#SBATCH --gpus-per-task={{gpus_per_task}} +#SBATCH --cpus-per-gpu={{cpus_per_gpu}} +#SBATCH --ntasks={{ntasks}} +#SBATCH --output=slurm/logs/%x_%j.out +#SBATCH --array={{array}} +#SBATCH --mem-per-cpu=12G +#SBATCH --exclude=ip-26-0-146-[33,100,122-123,149,183,212,249],ip-26-0-147-[6,94,120,141],ip-26-0-152-[71,101,119,178,186,207,211],ip-26-0-153-[6,62,112,132,166,251],ip-26-0-154-[38,65],ip-26-0-155-[164,174,187,217],ip-26-0-156-[13,40],ip-26-0-157-27 +##SBATCH --nodelist=ip-26-0-147-204 +{{nodes}} + +env_ids={{env_ids}} +seeds={{seeds}} +env_id=${env_ids[$SLURM_ARRAY_TASK_ID / {{len_seeds}}]} +seed=${seeds[$SLURM_ARRAY_TASK_ID % {{len_seeds}}]} + +echo "Running task $SLURM_ARRAY_TASK_ID with env_id: $env_id and seed: $seed" + +srun {{command}} --env-id $env_id --seed $seed # diff --git a/benchmark/ddpg.sh b/benchmark/ddpg.sh index 9f26b302e..3746b4d99 100755 --- a/benchmark/ddpg.sh +++ b/benchmark/ddpg.sh @@ -1,16 +1,22 @@ -poetry install -E "mujoco_py" -python -c "import mujoco_py" -xvfb-run -a python -m cleanrl_utils.benchmark \ - --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 InvertedPendulum-v2 Humanoid-v2 Pusher-v2 \ - --command "poetry run python cleanrl/ddpg_continuous_action.py --track --capture-video" \ +poetry install -E "mujoco" +python -m cleanrl_utils.benchmark \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ + --command "poetry run python cleanrl/ddpg_continuous_action.py --track" \ --num-seeds 3 \ - --workers 1 + --workers 18 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template -poetry install -E "mujoco_py jax" -poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html -poetry run python -c "import mujoco_py" -xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ - --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \ - --command "poetry run python cleanrl/ddpg_continuous_action_jax.py --track --capture-video" \ +poetry install -E "mujoco jax" +poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +poetry run python -m cleanrl_utils.benchmark \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ + --command "poetry run python cleanrl/ddpg_continuous_action_jax.py --track" \ --num-seeds 3 \ - --workers 1 + --workers 18 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template diff --git a/benchmark/ddpg_plot.sh b/benchmark/ddpg_plot.sh new file mode 100755 index 000000000..d36db199e --- /dev/null +++ b/benchmark/ddpg_plot.sh @@ -0,0 +1,20 @@ +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'ddpg_continuous_action?tag=pr-424' \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/ddpg \ + --scan-history + +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'ddpg_continuous_action?tag=pr-424' \ + 'ddpg_continuous_action_jax?tag=pr-424' \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/ddpg_jax \ + --scan-history diff --git a/benchmark/dqn.sh b/benchmark/dqn.sh index 9a8d8e32e..dcd90446b 100644 --- a/benchmark/dqn.sh +++ b/benchmark/dqn.sh @@ -1,29 +1,29 @@ poetry install OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \ - --command "poetry run python cleanrl/dqn.py --cuda False --track --capture-video" \ + --command "poetry run python cleanrl/dqn.py --no_cuda --track --capture_video" \ --num-seeds 3 \ --workers 9 poetry install -E atari OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --command "poetry run python cleanrl/dqn_atari.py --track --capture-video" \ + --command "poetry run python cleanrl/dqn_atari.py --track --capture_video" \ --num-seeds 3 \ --workers 1 poetry install -E jax -poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html xvfb-run -a python -m cleanrl_utils.benchmark \ --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \ - --command "poetry run python cleanrl/dqn_jax.py --track --capture-video" \ + --command "poetry run python cleanrl/dqn_jax.py --track --capture_video" \ --num-seeds 3 \ --workers 1 poetry install -E "atari jax" -poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html xvfb-run -a python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --command "poetry run python cleanrl/dqn_atari_jax.py --track --capture-video" \ + --command "poetry run python cleanrl/dqn_atari_jax.py --track --capture_video" \ --num-seeds 3 \ --workers 1 diff --git a/benchmark/ppg.sh b/benchmark/ppg.sh index 20fde68cf..ee5580f33 100644 --- a/benchmark/ppg.sh +++ b/benchmark/ppg.sh @@ -3,6 +3,6 @@ poetry install -E procgen xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids starpilot bossfight bigfish \ - --command "poetry run python cleanrl/ppg_procgen.py --track --capture-video" \ + --command "poetry run python cleanrl/ppg_procgen.py --track --capture_video" \ --num-seeds 3 \ --workers 1 diff --git a/benchmark/ppo.sh b/benchmark/ppo.sh index 7fefcd933..70f374785 100644 --- a/benchmark/ppo.sh +++ b/benchmark/ppo.sh @@ -3,118 +3,143 @@ poetry install OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \ - --command "poetry run python cleanrl/ppo.py --cuda False --track --capture-video" \ + --command "poetry run python cleanrl/ppo.py --no_cuda --track --capture_video" \ --num-seeds 3 \ - --workers 9 + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template poetry install -E atari OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --command "poetry run python cleanrl/ppo_atari.py --track --capture-video" \ + --command "poetry run python cleanrl/ppo_atari.py --track --capture_video" \ --num-seeds 3 \ - --workers 3 + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template + +poetry install -E mujoco +OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ + --command "poetry run python cleanrl/ppo_continuous_action.py --no_cuda --track --capture_video" \ + --num-seeds 3 \ + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template + +poetry install -E "mujoco dm_control" +OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ + --env-ids dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 dm_control/cartpole-balance-v0 dm_control/cartpole-balance_sparse-v0 dm_control/cartpole-swingup-v0 dm_control/cartpole-swingup_sparse-v0 dm_control/cartpole-two_poles-v0 dm_control/cartpole-three_poles-v0 dm_control/cheetah-run-v0 dm_control/dog-stand-v0 dm_control/dog-walk-v0 dm_control/dog-trot-v0 dm_control/dog-run-v0 dm_control/dog-fetch-v0 dm_control/finger-spin-v0 dm_control/finger-turn_easy-v0 dm_control/finger-turn_hard-v0 dm_control/fish-upright-v0 dm_control/fish-swim-v0 dm_control/hopper-stand-v0 dm_control/hopper-hop-v0 dm_control/humanoid-stand-v0 dm_control/humanoid-walk-v0 dm_control/humanoid-run-v0 dm_control/humanoid-run_pure_state-v0 dm_control/humanoid_CMU-stand-v0 dm_control/humanoid_CMU-run-v0 dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 dm_control/manipulator-bring_ball-v0 dm_control/manipulator-bring_peg-v0 dm_control/manipulator-insert_ball-v0 dm_control/manipulator-insert_peg-v0 dm_control/pendulum-swingup-v0 dm_control/point_mass-easy-v0 dm_control/point_mass-hard-v0 dm_control/quadruped-walk-v0 dm_control/quadruped-run-v0 dm_control/quadruped-escape-v0 dm_control/quadruped-fetch-v0 dm_control/reacher-easy-v0 dm_control/reacher-hard-v0 dm_control/stacker-stack_2-v0 dm_control/stacker-stack_4-v0 dm_control/swimmer-swimmer6-v0 dm_control/swimmer-swimmer15-v0 dm_control/walker-stand-v0 dm_control/walker-walk-v0 dm_control/walker-run-v0 \ + --command "poetry run python cleanrl/ppo_continuous_action.py --exp-name ppo_continuous_action_8M --total-timesteps 8000000 --no_cuda --track" \ + --num-seeds 10 \ + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template poetry install -E atari OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --command "poetry run python cleanrl/ppo_atari_lstm.py --track --capture-video" \ + --command "poetry run python cleanrl/ppo_atari_lstm.py --track --capture_video" \ --num-seeds 3 \ - --workers 3 + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template poetry install -E envpool -xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ +poetry run python -m cleanrl_utils.benchmark \ --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \ - --command "poetry run python cleanrl/ppo_atari_envpool.py --track --capture-video" \ + --command "poetry run python cleanrl/ppo_atari_envpool.py --track --capture_video" \ --num-seeds 3 \ - --workers 1 + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template -poetry install -E "mujoco_py mujoco" -poetry run python -c "import mujoco_py" -OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ - --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 InvertedPendulum-v2 Humanoid-v2 Pusher-v2 \ - --command "poetry run python cleanrl/ppo_continuous_action.py --cuda False --track --capture-video" \ +poetry install -E "envpool jax" +poetry run python -m cleanrl_utils.benchmark \ + --env-ids Alien-v5 Amidar-v5 Assault-v5 Asterix-v5 Asteroids-v5 Atlantis-v5 BankHeist-v5 BattleZone-v5 BeamRider-v5 Berzerk-v5 Bowling-v5 Boxing-v5 Breakout-v5 Centipede-v5 ChopperCommand-v5 CrazyClimber-v5 Defender-v5 DemonAttack-v5 DoubleDunk-v5 Enduro-v5 FishingDerby-v5 Freeway-v5 Frostbite-v5 Gopher-v5 Gravitar-v5 Hero-v5 IceHockey-v5 Jamesbond-v5 Kangaroo-v5 Krull-v5 KungFuMaster-v5 MontezumaRevenge-v5 MsPacman-v5 NameThisGame-v5 Phoenix-v5 Pitfall-v5 Pong-v5 PrivateEye-v5 Qbert-v5 Riverraid-v5 RoadRunner-v5 Robotank-v5 Seaquest-v5 Skiing-v5 Solaris-v5 SpaceInvaders-v5 StarGunner-v5 Surround-v5 Tennis-v5 TimePilot-v5 Tutankham-v5 UpNDown-v5 Venture-v5 VideoPinball-v5 WizardOfWor-v5 YarsRevenge-v5 Zaxxon-v5 \ + --command "poetry run python ppo_atari_envpool_xla_jax.py --track --wandb-project-name envpool-atari --wandb-entity openrlbenchmark" \ + --num-seeds 3 \ + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template + +poetry install -E "envpool jax" +python -m cleanrl_utils.benchmark \ + --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \ + --command "poetry run python cleanrl/ppo_atari_envpool_xla_jax_scan.py --track --capture_video" \ --num-seeds 3 \ - --workers 6 + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template poetry install -E procgen -xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ +poetry run python -m cleanrl_utils.benchmark \ --env-ids starpilot bossfight bigfish \ - --command "poetry run python cleanrl/ppo_procgen.py --track --capture-video" \ + --command "poetry run python cleanrl/ppo_procgen.py --track --capture_video" \ --num-seeds 3 \ - --workers 1 + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template poetry install -E atari xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --command "poetry run torchrun --standalone --nnodes=1 --nproc_per_node=2 cleanrl/ppo_atari_multigpu.py --track --capture-video" \ + --command "poetry run torchrun --standalone --nnodes=1 --nproc_per_node=2 cleanrl/ppo_atari_multigpu.py --local-num-envs 4 --track --capture_video" \ --num-seeds 3 \ - --workers 1 + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template -poetry install "pettingzoo atari" +poetry install -E "pettingzoo atari" poetry run AutoROM --accept-license xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids pong_v3 surround_v2 tennis_v3 \ - --command "poetry run python cleanrl/ppo_pettingzoo_ma_atari.py --track --capture-video" \ + --command "poetry run python cleanrl/ppo_pettingzoo_ma_atari.py --track --capture_video" \ --num-seeds 3 \ - --workers 3 + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template # IMPORTANT: see specific Isaac Gym installation at # https://docs.cleanrl.dev/rl-algorithms/ppo/#usage_8 poetry install --with isaacgym xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids Cartpole Ant Humanoid BallBalance Anymal \ - --command "poetry run python cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py --track --capture-video" \ + --command "poetry run python cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py --track --capture_video" \ --num-seeds 3 \ - --workers 1 + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids AllegroHand ShadowHand \ - --command "poetry run python cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py --track --capture-video --num-envs 8192 --num-steps 8 --update-epochs 5 --num-minibatches 4 --reward-scaler 0.01 --total-timesteps 600000000 --record-video-step-frequency 3660" \ + --command "poetry run python cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py --track --capture_video --num-envs 8192 --num-steps 8 --update-epochs 5 --num-minibatches 4 --reward-scaler 0.01 --total-timesteps 600000000 --record-video-step-frequency 3660" \ --num-seeds 3 \ - --workers 1 - - -poetry install "envpool jax" -poetry run python -m cleanrl_utils.benchmark \ - --env-ids Alien-v5 Amidar-v5 Assault-v5 Asterix-v5 Asteroids-v5 Atlantis-v5 BankHeist-v5 BattleZone-v5 BeamRider-v5 Berzerk-v5 Bowling-v5 Boxing-v5 Breakout-v5 Centipede-v5 ChopperCommand-v5 CrazyClimber-v5 Defender-v5 DemonAttack-v5 \ - --command "poetry run python ppo_atari_envpool_xla_jax.py --track --wandb-project-name envpool-atari --wandb-entity openrlbenchmark" \ - --num-seeds 3 \ - --workers 1 -poetry run python -m cleanrl_utils.benchmark \ - --env-ids DoubleDunk-v5 Enduro-v5 FishingDerby-v5 Freeway-v5 Frostbite-v5 Gopher-v5 Gravitar-v5 Hero-v5 IceHockey-v5 Jamesbond-v5 Kangaroo-v5 Krull-v5 KungFuMaster-v5 MontezumaRevenge-v5 MsPacman-v5 NameThisGame-v5 Phoenix-v5 Pitfall-v5 Pong-v5 \ - --command "poetry run python ppo_atari_envpool_xla_jax.py --track --wandb-project-name envpool-atari --wandb-entity openrlbenchmark" \ - --num-seeds 3 \ - --workers 1 -poetry run python -m cleanrl_utils.benchmark \ - --env-ids PrivateEye-v5 Qbert-v5 Riverraid-v5 RoadRunner-v5 Robotank-v5 Seaquest-v5 Skiing-v5 Solaris-v5 SpaceInvaders-v5 StarGunner-v5 Surround-v5 Tennis-v5 TimePilot-v5 Tutankham-v5 UpNDown-v5 Venture-v5 VideoPinball-v5 WizardOfWor-v5 YarsRevenge-v5 Zaxxon-v5 \ - --command "poetry run python ppo_atari_envpool_xla_jax.py --track --wandb-project-name envpool-atari --wandb-entity openrlbenchmark" \ - --num-seeds 3 \ - --workers 1 - -# gymnasium support -poetry install -E mujoco -OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \ - --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ - --command "poetry run python cleanrl/gymnasium_support/ppo_continuous_action.py --cuda False --track" \ - --num-seeds 3 \ - --workers 1 - -poetry install "dm_control mujoco" -OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ - --env-ids dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 dm_control/cartpole-balance-v0 dm_control/cartpole-balance_sparse-v0 dm_control/cartpole-swingup-v0 dm_control/cartpole-swingup_sparse-v0 dm_control/cartpole-two_poles-v0 dm_control/cartpole-three_poles-v0 dm_control/cheetah-run-v0 dm_control/dog-stand-v0 dm_control/dog-walk-v0 dm_control/dog-trot-v0 dm_control/dog-run-v0 dm_control/dog-fetch-v0 dm_control/finger-spin-v0 dm_control/finger-turn_easy-v0 dm_control/finger-turn_hard-v0 dm_control/fish-upright-v0 dm_control/fish-swim-v0 dm_control/hopper-stand-v0 dm_control/hopper-hop-v0 dm_control/humanoid-stand-v0 dm_control/humanoid-walk-v0 dm_control/humanoid-run-v0 dm_control/humanoid-run_pure_state-v0 dm_control/humanoid_CMU-stand-v0 dm_control/humanoid_CMU-run-v0 dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 dm_control/manipulator-bring_ball-v0 dm_control/manipulator-bring_peg-v0 dm_control/manipulator-insert_ball-v0 dm_control/manipulator-insert_peg-v0 dm_control/pendulum-swingup-v0 dm_control/point_mass-easy-v0 dm_control/point_mass-hard-v0 dm_control/quadruped-walk-v0 dm_control/quadruped-run-v0 dm_control/quadruped-escape-v0 dm_control/quadruped-fetch-v0 dm_control/reacher-easy-v0 dm_control/reacher-hard-v0 dm_control/stacker-stack_2-v0 dm_control/stacker-stack_4-v0 dm_control/swimmer-swimmer6-v0 dm_control/swimmer-swimmer15-v0 dm_control/walker-stand-v0 dm_control/walker-walk-v0 dm_control/walker-run-v0 \ - --command "poetry run python cleanrl/gymnasium_support/ppo_continuous_action.py --cuda False --track" \ - --num-seeds 3 \ - --workers 9 - -poetry install "envpool jax" -python -m cleanrl_utils.benchmark \ - --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \ - --command "poetry run python cleanrl/ppo_atari_envpool_xla_jax_scan.py --track --capture-video" \ - --num-seeds 3 \ - --workers 1 - -poetry install "mujoco dm_control" -OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ - --env-ids dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 dm_control/cartpole-balance-v0 dm_control/cartpole-balance_sparse-v0 dm_control/cartpole-swingup-v0 dm_control/cartpole-swingup_sparse-v0 dm_control/cartpole-two_poles-v0 dm_control/cartpole-three_poles-v0 dm_control/cheetah-run-v0 dm_control/dog-stand-v0 dm_control/dog-walk-v0 dm_control/dog-trot-v0 dm_control/dog-run-v0 dm_control/dog-fetch-v0 dm_control/finger-spin-v0 dm_control/finger-turn_easy-v0 dm_control/finger-turn_hard-v0 dm_control/fish-upright-v0 dm_control/fish-swim-v0 dm_control/hopper-stand-v0 dm_control/hopper-hop-v0 dm_control/humanoid-stand-v0 dm_control/humanoid-walk-v0 dm_control/humanoid-run-v0 dm_control/humanoid-run_pure_state-v0 dm_control/humanoid_CMU-stand-v0 dm_control/humanoid_CMU-run-v0 dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 dm_control/manipulator-bring_ball-v0 dm_control/manipulator-bring_peg-v0 dm_control/manipulator-insert_ball-v0 dm_control/manipulator-insert_peg-v0 dm_control/pendulum-swingup-v0 dm_control/point_mass-easy-v0 dm_control/point_mass-hard-v0 dm_control/quadruped-walk-v0 dm_control/quadruped-run-v0 dm_control/quadruped-escape-v0 dm_control/quadruped-fetch-v0 dm_control/reacher-easy-v0 dm_control/reacher-hard-v0 dm_control/stacker-stack_2-v0 dm_control/stacker-stack_4-v0 dm_control/swimmer-swimmer6-v0 dm_control/swimmer-swimmer15-v0 dm_control/walker-stand-v0 dm_control/walker-walk-v0 dm_control/walker-run-v0 \ - --command "poetry run python cleanrl/ppo_continuous_action.py --exp-name ppo_continuous_action_8M --total-timesteps 8000000 --cuda False --track" \ - --num-seeds 10 \ - --workers 1 + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template diff --git a/benchmark/ppo_plot.sh b/benchmark/ppo_plot.sh new file mode 100644 index 000000000..95678d986 --- /dev/null +++ b/benchmark/ppo_plot.sh @@ -0,0 +1,117 @@ +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'ppo?tag=pr-424' \ + --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/ppo \ + --scan-history + +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'ppo_atari?tag=pr-424' \ + --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/ppo_atari \ + --scan-history + +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'ppo_continuous_action?tag=pr-424' \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/ppo_continuous_action \ + --scan-history + +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'ppo_continuous_action?tag=v1.0.0-13-gcbd83f6' \ + --env-ids dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 dm_control/cartpole-balance-v0 dm_control/cartpole-balance_sparse-v0 dm_control/cartpole-swingup-v0 dm_control/cartpole-swingup_sparse-v0 dm_control/cartpole-two_poles-v0 dm_control/cartpole-three_poles-v0 dm_control/cheetah-run-v0 dm_control/dog-stand-v0 dm_control/dog-walk-v0 dm_control/dog-trot-v0 dm_control/dog-run-v0 dm_control/dog-fetch-v0 dm_control/finger-spin-v0 dm_control/finger-turn_easy-v0 dm_control/finger-turn_hard-v0 dm_control/fish-upright-v0 dm_control/fish-swim-v0 dm_control/hopper-stand-v0 dm_control/hopper-hop-v0 dm_control/humanoid-stand-v0 dm_control/humanoid-walk-v0 dm_control/humanoid-run-v0 dm_control/humanoid-run_pure_state-v0 dm_control/humanoid_CMU-stand-v0 dm_control/humanoid_CMU-run-v0 dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 dm_control/manipulator-bring_ball-v0 dm_control/manipulator-bring_peg-v0 dm_control/manipulator-insert_ball-v0 dm_control/manipulator-insert_peg-v0 dm_control/pendulum-swingup-v0 dm_control/point_mass-easy-v0 dm_control/point_mass-hard-v0 dm_control/quadruped-walk-v0 dm_control/quadruped-run-v0 dm_control/quadruped-escape-v0 dm_control/quadruped-fetch-v0 dm_control/reacher-easy-v0 dm_control/reacher-hard-v0 dm_control/stacker-stack_2-v0 dm_control/stacker-stack_4-v0 dm_control/swimmer-swimmer6-v0 dm_control/swimmer-swimmer15-v0 dm_control/walker-stand-v0 dm_control/walker-walk-v0 dm_control/walker-run-v0 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/ppo_continuous_action_dm_control \ + --scan-history + +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'ppo_atari_lstm?tag=pr-424' \ + --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/ppo_atari_lstm \ + --scan-history + +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/avg_episodic_return' \ + 'ppo_atari_envpool?tag=pr-424' \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'ppo_atari?tag=pr-424' \ + --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \ + --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/ppo_atari_envpool \ + --scan-history + +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=envpool-atari&ceik=env_id&cen=exp_name&metric=charts/avg_episodic_return' \ + 'ppo_atari_envpool_xla_jax' \ + --filters '?we=openrlbenchmark&wpn=baselines&ceik=env&cen=exp_name&metric=charts/episodic_return' \ + 'baselines-ppo2-cnn' \ + --env-ids Alien-v5 Amidar-v5 Assault-v5 Asterix-v5 Asteroids-v5 Atlantis-v5 BankHeist-v5 BattleZone-v5 BeamRider-v5 Berzerk-v5 Bowling-v5 Boxing-v5 Breakout-v5 Centipede-v5 ChopperCommand-v5 CrazyClimber-v5 Defender-v5 DemonAttack-v5 DoubleDunk-v5 Enduro-v5 FishingDerby-v5 Freeway-v5 Frostbite-v5 Gopher-v5 Gravitar-v5 Hero-v5 IceHockey-v5 Jamesbond-v5 Kangaroo-v5 Krull-v5 KungFuMaster-v5 MontezumaRevenge-v5 MsPacman-v5 NameThisGame-v5 Phoenix-v5 Pitfall-v5 Pong-v5 PrivateEye-v5 Qbert-v5 Riverraid-v5 RoadRunner-v5 Robotank-v5 Seaquest-v5 Skiing-v5 Solaris-v5 SpaceInvaders-v5 StarGunner-v5 Surround-v5 Tennis-v5 TimePilot-v5 Tutankham-v5 UpNDown-v5 Venture-v5 VideoPinball-v5 WizardOfWor-v5 YarsRevenge-v5 Zaxxon-v5 \ + --env-ids AlienNoFrameskip-v4 AmidarNoFrameskip-v4 AssaultNoFrameskip-v4 AsterixNoFrameskip-v4 AsteroidsNoFrameskip-v4 AtlantisNoFrameskip-v4 BankHeistNoFrameskip-v4 BattleZoneNoFrameskip-v4 BeamRiderNoFrameskip-v4 BerzerkNoFrameskip-v4 BowlingNoFrameskip-v4 BoxingNoFrameskip-v4 BreakoutNoFrameskip-v4 CentipedeNoFrameskip-v4 ChopperCommandNoFrameskip-v4 CrazyClimberNoFrameskip-v4 DefenderNoFrameskip-v4 DemonAttackNoFrameskip-v4 DoubleDunkNoFrameskip-v4 EnduroNoFrameskip-v4 FishingDerbyNoFrameskip-v4 FreewayNoFrameskip-v4 FrostbiteNoFrameskip-v4 GopherNoFrameskip-v4 GravitarNoFrameskip-v4 HeroNoFrameskip-v4 IceHockeyNoFrameskip-v4 JamesbondNoFrameskip-v4 KangarooNoFrameskip-v4 KrullNoFrameskip-v4 KungFuMasterNoFrameskip-v4 MontezumaRevengeNoFrameskip-v4 MsPacmanNoFrameskip-v4 NameThisGameNoFrameskip-v4 PhoenixNoFrameskip-v4 PitfallNoFrameskip-v4 PongNoFrameskip-v4 PrivateEyeNoFrameskip-v4 QbertNoFrameskip-v4 RiverraidNoFrameskip-v4 RoadRunnerNoFrameskip-v4 RobotankNoFrameskip-v4 SeaquestNoFrameskip-v4 SkiingNoFrameskip-v4 SolarisNoFrameskip-v4 SpaceInvadersNoFrameskip-v4 StarGunnerNoFrameskip-v4 SurroundNoFrameskip-v4 TennisNoFrameskip-v4 TimePilotNoFrameskip-v4 TutankhamNoFrameskip-v4 UpNDownNoFrameskip-v4 VentureNoFrameskip-v4 VideoPinballNoFrameskip-v4 WizardOfWorNoFrameskip-v4 YarsRevengeNoFrameskip-v4 ZaxxonNoFrameskip-v4 \ + --no-check-empty-runs \ + --pc.ncols 4 \ + --pc.ncols-legend 2 \ + --rliable \ + --rc.score_normalization_method atari \ + --rc.normalized_score_threshold 8.0 \ + --rc.sample_efficiency_plots \ + --rc.sample_efficiency_and_walltime_efficiency_method Median \ + --rc.performance_profile_plots \ + --rc.aggregate_metrics_plots \ + --rc.sample_efficiency_num_bootstrap_reps 50000 \ + --rc.performance_profile_num_bootstrap_reps 50000 \ + --rc.interval_estimates_num_bootstrap_reps 50000 \ + --output-filename benchmark/cleanrl/ppo_atari_envpool_xla_jax \ + --scan-history + +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/avg_episodic_return' \ + 'ppo_atari_envpool_xla_jax?tag=pr-424' \ + 'ppo_atari_envpool_xla_jax_scan?tag=pr-424' \ + --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/ppo_atari_envpool_xla_jax_scan \ + --scan-history + +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'ppo_procgen?tag=pr-424' \ + --env-ids starpilot bossfight bigfish \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/ppo_procgen \ + --scan-history + +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'ppo_atari_multigpu?tag=pr-424' \ + 'ppo_atari?tag=pr-424' \ + --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/ppo_atari_multigpu \ + --scan-history diff --git a/benchmark/qdagger.sh b/benchmark/qdagger.sh index 2491716a0..dc7851fb3 100644 --- a/benchmark/qdagger.sh +++ b/benchmark/qdagger.sh @@ -1,15 +1,15 @@ poetry install -E atari OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --command "poetry run python cleanrl/qdagger_dqn_atari_impalacnn.py --track --capture-video" \ + --command "poetry run python cleanrl/qdagger_dqn_atari_impalacnn.py --track --capture_video" \ --num-seeds 3 \ --workers 1 poetry install -E "atari jax" -poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --command "poetry run python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --track --capture-video" \ + --command "poetry run python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --track --capture_video" \ --num-seeds 3 \ --workers 1 diff --git a/benchmark/rpo.sh b/benchmark/rpo.sh index cbb551bac..d389197fa 100644 --- a/benchmark/rpo.sh +++ b/benchmark/rpo.sh @@ -1,42 +1,42 @@ poetry install "mujoco dm_control" OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids dm_control/acrobot-swingup-v0 dm_control/acrobot-swingup_sparse-v0 dm_control/ball_in_cup-catch-v0 dm_control/cartpole-balance-v0 dm_control/cartpole-balance_sparse-v0 dm_control/cartpole-swingup-v0 dm_control/cartpole-swingup_sparse-v0 dm_control/cartpole-two_poles-v0 dm_control/cartpole-three_poles-v0 dm_control/cheetah-run-v0 dm_control/dog-stand-v0 dm_control/dog-walk-v0 dm_control/dog-trot-v0 dm_control/dog-run-v0 dm_control/dog-fetch-v0 dm_control/finger-spin-v0 dm_control/finger-turn_easy-v0 dm_control/finger-turn_hard-v0 dm_control/fish-upright-v0 dm_control/fish-swim-v0 dm_control/hopper-stand-v0 dm_control/hopper-hop-v0 dm_control/humanoid-stand-v0 dm_control/humanoid-walk-v0 dm_control/humanoid-run-v0 dm_control/humanoid-run_pure_state-v0 dm_control/humanoid_CMU-stand-v0 dm_control/humanoid_CMU-run-v0 dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 dm_control/manipulator-bring_ball-v0 dm_control/manipulator-bring_peg-v0 dm_control/manipulator-insert_ball-v0 dm_control/manipulator-insert_peg-v0 dm_control/pendulum-swingup-v0 dm_control/point_mass-easy-v0 dm_control/point_mass-hard-v0 dm_control/quadruped-walk-v0 dm_control/quadruped-run-v0 dm_control/quadruped-escape-v0 dm_control/quadruped-fetch-v0 dm_control/reacher-easy-v0 dm_control/reacher-hard-v0 dm_control/stacker-stack_2-v0 dm_control/stacker-stack_4-v0 dm_control/swimmer-swimmer6-v0 dm_control/swimmer-swimmer15-v0 dm_control/walker-stand-v0 dm_control/walker-walk-v0 dm_control/walker-run-v0 \ - --command "poetry run python cleanrl/rpo_continuous_action.py --cuda False --track" \ + --command "poetry run python cleanrl/rpo_continuous_action.py --no_cuda --track" \ --num-seeds 10 \ --workers 1 poetry run pip install box2d-py==2.3.5 OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids Pendulum-v1 BipedalWalker-v3 \ - --command "poetry run python cleanrl/rpo_continuous_action.py --cuda False --track --capture-video" \ + --command "poetry run python cleanrl/rpo_continuous_action.py --no_cuda --track --capture_video" \ --num-seeds 1 \ --workers 1 poetry install -E mujoco OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids HumanoidStandup-v4 Humanoid-v4 InvertedPendulum-v4 Walker2d-v4 \ - --command "poetry run python cleanrl/rpo_continuous_action.py --cuda False --track --capture-video" \ + --command "poetry run python cleanrl/rpo_continuous_action.py --no_cuda --track --capture_video" \ --num-seeds 10 \ --workers 1 poetry install -E mujoco OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids HumanoidStandup-v2 Humanoid-v2 InvertedPendulum-v2 Walker2d-v2 \ - --command "poetry run python cleanrl/rpo_continuous_action.py --cuda False --track --capture-video" \ + --command "poetry run python cleanrl/rpo_continuous_action.py --no_cuda --track --capture_video" \ --num-seeds 10 \ --workers 1 poetry install -E mujoco OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids Ant-v4 InvertedDoublePendulum-v4 Reacher-v4 Pusher-v4 Hopper-v4 HalfCheetah-v4 Swimmer-v4 \ - --command "poetry run python cleanrl/rpo_continuous_action.py --rpo-alpha 0.01 --cuda False --track --capture-video" \ + --command "poetry run python cleanrl/rpo_continuous_action.py --rpo-alpha 0.01 --no_cuda --track --capture_video" \ --num-seeds 10 \ --workers 1 poetry install -E mujoco OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids Ant-v2 InvertedDoublePendulum-v2 Reacher-v2 Pusher-v2 Hopper-v2 HalfCheetah-v2 Swimmer-v2 \ - --command "poetry run python cleanrl/rpo_continuous_action.py --rpo-alpha 0.01 --cuda False --track --capture-video" \ + --command "poetry run python cleanrl/rpo_continuous_action.py --rpo-alpha 0.01 --no_cuda --track --capture_video" \ --num-seeds 10 \ --workers 1 diff --git a/benchmark/sac.sh b/benchmark/sac.sh index e94e11192..2c948bc93 100644 --- a/benchmark/sac.sh +++ b/benchmark/sac.sh @@ -1,7 +1,10 @@ -poetry install -E mujoco_py -poetry run python -c "import mujoco_py" -OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ - --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \ - --command "poetry run python cleanrl/sac_continuous_action.py --track --capture-video" \ +poetry install -E mujoco +poetry run python -m cleanrl_utils.benchmark \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ + --command "poetry run python cleanrl/sac_continuous_action.py --track" \ --num-seeds 3 \ - --workers 3 \ No newline at end of file + --workers 18 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template diff --git a/benchmark/sac_atari.sh b/benchmark/sac_atari.sh index 13f9e3c9d..a8e8a78ed 100755 --- a/benchmark/sac_atari.sh +++ b/benchmark/sac_atari.sh @@ -1,6 +1,6 @@ poetry install -E atari OMP_NUM_THREADS=1 python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BreakoutNoFrameskip-v4 BeamRiderNoFrameskip-v4 \ - --command "poetry run python cleanrl/sac_atari.py --cuda True --track" \ + --command "poetry run python cleanrl/sac_atari.py --track" \ --num-seeds 3 \ --workers 2 diff --git a/benchmark/sac_plot.sh b/benchmark/sac_plot.sh new file mode 100644 index 000000000..7d82406fa --- /dev/null +++ b/benchmark/sac_plot.sh @@ -0,0 +1,9 @@ +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'sac_continuous_action?tag=pr-424' \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/sac \ + --scan-history diff --git a/benchmark/td3.sh b/benchmark/td3.sh index ea94c2c32..e68004c73 100644 --- a/benchmark/td3.sh +++ b/benchmark/td3.sh @@ -1,16 +1,22 @@ -poetry install -E mujoco_py -python -c "import mujoco_py" -OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \ - --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 InvertedPendulum-v2 Humanoid-v2 Pusher-v2 \ - --command "poetry run python cleanrl/td3_continuous_action.py --track --capture-video" \ +poetry install -E "mujoco" +python -m cleanrl_utils.benchmark \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ + --command "poetry run python cleanrl/td3_continuous_action.py --track" \ --num-seeds 3 \ - --workers 1 + --workers 18 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template -poetry install -E "mujoco_py jax" -poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html -poetry run python -c "import mujoco_py" -xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ - --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \ - --command "poetry run python cleanrl/td3_continuous_action_jax.py --track --capture-video" \ +poetry install -E "mujoco jax" +poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +poetry run python -m cleanrl_utils.benchmark \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ + --command "poetry run python cleanrl/td3_continuous_action_jax.py --track" \ --num-seeds 3 \ - --workers 1 + --workers 18 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template diff --git a/benchmark/td3_plot.sh b/benchmark/td3_plot.sh new file mode 100644 index 000000000..ad37305cc --- /dev/null +++ b/benchmark/td3_plot.sh @@ -0,0 +1,21 @@ +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'td3_continuous_action?tag=pr-424' \ + 'td3_continuous_action_jax?tag=pr-424' \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/td3 \ + --scan-history + +python -m openrlbenchmark.rlops \ + --filters '?we=openrlbenchmark&wpn=cleanrl&ceik=env_id&cen=exp_name&metric=charts/episodic_return' \ + 'sac_continuous_action?tag=pr-424' \ + --env-ids HalfCheetah-v4 Walker2d-v4 Hopper-v4 InvertedPendulum-v4 Humanoid-v4 Pusher-v4 \ + --no-check-empty-runs \ + --pc.ncols 3 \ + --pc.ncols-legend 2 \ + --output-filename benchmark/cleanrl/sac \ + --scan-history diff --git a/benchmark/zoo.sh b/benchmark/zoo.sh index f7646c5d5..a5ab38e14 100644 --- a/benchmark/zoo.sh +++ b/benchmark/zoo.sh @@ -3,25 +3,25 @@ poetry run python cleanrl/dqn_atari_jax.py --env-id SeaquestNoFrameskip-v4 --sa xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \ - --command "poetry run python cleanrl/dqn.py --cuda False --track --capture-video --save-model --upload-model --hf-entity cleanrl" \ + --command "poetry run python cleanrl/dqn.py --no_cuda --track --capture_video --save-model --upload-model --hf-entity cleanrl" \ --num-seeds 1 \ --workers 1 CUDA_VISIBLE_DEVICES="-1" xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \ - --command "poetry run python cleanrl/dqn_jax.py --track --capture-video --save-model --upload-model --hf-entity cleanrl" \ + --command "poetry run python cleanrl/dqn_jax.py --track --capture_video --save-model --upload-model --hf-entity cleanrl" \ --num-seeds 1 \ --workers 1 xvfb-run -a python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --command "poetry run python cleanrl/dqn_atari_jax.py --track --capture-video --save-model --upload-model --hf-entity cleanrl" \ + --command "poetry run python cleanrl/dqn_atari_jax.py --track --capture_video --save-model --upload-model --hf-entity cleanrl" \ --num-seeds 1 \ --workers 1 xvfb-run -a python -m cleanrl_utils.benchmark \ --env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \ - --command "poetry run python cleanrl/dqn_atari.py --track --capture-video --save-model --upload-model --hf-entity cleanrl" \ + --command "poetry run python cleanrl/dqn_atari.py --track --capture_video --save-model --upload-model --hf-entity cleanrl" \ --num-seeds 1 \ --workers 1 diff --git a/cleanrl/c51.py b/cleanrl/c51.py index 3959466f1..9f99a7a31 100755 --- a/cleanrl/c51.py +++ b/cleanrl/c51.py @@ -1,83 +1,77 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/c51/#c51py -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gymnasium as gym import numpy as np import torch import torch.nn as nn import torch.optim as optim +import tyro from stable_baselines3.common.buffers import ReplayBuffer from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="CartPole-v1", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=500000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--n-atoms", type=int, default=101, - help="the number of atoms") - parser.add_argument("--v-min", type=float, default=-100, - help="the return lower bound") - parser.add_argument("--v-max", type=float, default=100, - help="the return upper bound") - parser.add_argument("--buffer-size", type=int, default=10000, - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--target-network-frequency", type=int, default=500, - help="the timesteps it takes to update the target network") - parser.add_argument("--batch-size", type=int, default=128, - help="the batch size of sample from the reply memory") - parser.add_argument("--start-e", type=float, default=1, - help="the starting epsilon for exploration") - parser.add_argument("--end-e", type=float, default=0.05, - help="the ending epsilon for exploration") - parser.add_argument("--exploration-fraction", type=float, default=0.5, - help="the fraction of `total-timesteps` it takes from start-e to go end-e") - parser.add_argument("--learning-starts", type=int, default=10000, - help="timestep to start learning") - parser.add_argument("--train-frequency", type=int, default=10, - help="the frequency of training") - args = parser.parse_args() - # fmt: on - assert args.num_envs == 1, "vectorized envs are not supported at the moment" - - return args + env_id: str = "CartPole-v1" + """the id of the environment""" + total_timesteps: int = 500000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + n_atoms: int = 101 + """the number of atoms""" + v_min: float = -100 + """the return lower bound""" + v_max: float = 100 + """the return upper bound""" + buffer_size: int = 10000 + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + target_network_frequency: int = 500 + """the timesteps it takes to update the target network""" + batch_size: int = 128 + """the batch size of sample from the reply memory""" + start_e: float = 1 + """the starting epsilon for exploration""" + end_e: float = 0.05 + """the ending epsilon for exploration""" + exploration_fraction: float = 0.5 + """the fraction of `total-timesteps` it takes from start-e to go end-e""" + learning_starts: int = 10000 + """timestep to start learning""" + train_frequency: int = 10 + """the frequency of training""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -136,7 +130,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): poetry run pip install "stable_baselines3==2.0.0a1" """ ) - args = parse_args() + args = tyro.cli(Args) + assert args.num_envs == 1, "vectorized envs are not supported at the moment" run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -201,14 +196,10 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): # TRY NOT TO MODIFY: record rewards for plotting purposes if "final_info" in infos: for info in infos["final_info"]: - # Skip the envs that are not done - if "episode" not in info: - continue - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` real_next_obs = next_obs.copy() diff --git a/cleanrl/c51_atari.py b/cleanrl/c51_atari.py index 8e47bacc5..97b790759 100755 --- a/cleanrl/c51_atari.py +++ b/cleanrl/c51_atari.py @@ -1,15 +1,15 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/c51/#c51_ataripy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gymnasium as gym import numpy as np import torch import torch.nn as nn import torch.optim as optim +import tyro from stable_baselines3.common.atari_wrappers import ( ClipRewardEnv, EpisodicLifeEnv, @@ -21,70 +21,64 @@ from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--n-atoms", type=int, default=51, - help="the number of atoms") - parser.add_argument("--v-min", type=float, default=-10, - help="the return lower bound") - parser.add_argument("--v-max", type=float, default=10, - help="the return upper bound") - parser.add_argument("--buffer-size", type=int, default=1000000, - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--target-network-frequency", type=int, default=10000, - help="the timesteps it takes to update the target network") - parser.add_argument("--batch-size", type=int, default=32, - help="the batch size of sample from the reply memory") - parser.add_argument("--start-e", type=float, default=1, - help="the starting epsilon for exploration") - parser.add_argument("--end-e", type=float, default=0.01, - help="the ending epsilon for exploration") - parser.add_argument("--exploration-fraction", type=float, default=0.10, - help="the fraction of `total-timesteps` it takes from start-e to go end-e") - parser.add_argument("--learning-starts", type=int, default=80000, - help="timestep to start learning") - parser.add_argument("--train-frequency", type=int, default=4, - help="the frequency of training") - args = parser.parse_args() - # fmt: on - assert args.num_envs == 1, "vectorized envs are not supported at the moment" - - return args + env_id: str = "BreakoutNoFrameskip-v4" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + n_atoms: int = 51 + """the number of atoms""" + v_min: float = -10 + """the return lower bound""" + v_max: float = 10 + """the return upper bound""" + buffer_size: int = 1000000 + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + target_network_frequency: int = 10000 + """the timesteps it takes to update the target network""" + batch_size: int = 32 + """the batch size of sample from the reply memory""" + start_e: float = 1 + """the starting epsilon for exploration""" + end_e: float = 0.01 + """the ending epsilon for exploration""" + exploration_fraction: float = 0.10 + """the fraction of `total-timesteps` it takes from start-e to go end-e""" + learning_starts: int = 80000 + """timestep to start learning""" + train_frequency: int = 4 + """the frequency of training""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -158,7 +152,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1" "ale-py==0.8.1" """ ) - args = parse_args() + args = tyro.cli(Args) + assert args.num_envs == 1, "vectorized envs are not supported at the moment" run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -224,14 +219,10 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): # TRY NOT TO MODIFY: record rewards for plotting purposes if "final_info" in infos: for info in infos["final_info"]: - # Skip the envs that are not done - if "episode" not in info: - continue - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` real_next_obs = next_obs.copy() diff --git a/cleanrl/c51_atari_jax.py b/cleanrl/c51_atari_jax.py index 93c436ec5..8cd46e855 100644 --- a/cleanrl/c51_atari_jax.py +++ b/cleanrl/c51_atari_jax.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/c51/#c51_atari_jaxpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass os.environ[ "XLA_PYTHON_CLIENT_MEM_FRACTION" @@ -16,6 +15,7 @@ import jax.numpy as jnp import numpy as np import optax +import tyro from flax.training.train_state import TrainState from stable_baselines3.common.atari_wrappers import ( ClipRewardEnv, @@ -28,66 +28,60 @@ from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--n-atoms", type=int, default=51, - help="the number of atoms") - parser.add_argument("--v-min", type=float, default=-10, - help="the return lower bound") - parser.add_argument("--v-max", type=float, default=10, - help="the return upper bound") - parser.add_argument("--buffer-size", type=int, default=1000000, - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--target-network-frequency", type=int, default=10000, - help="the timesteps it takes to update the target network") - parser.add_argument("--batch-size", type=int, default=32, - help="the batch size of sample from the reply memory") - parser.add_argument("--start-e", type=float, default=1, - help="the starting epsilon for exploration") - parser.add_argument("--end-e", type=float, default=0.01, - help="the ending epsilon for exploration") - parser.add_argument("--exploration-fraction", type=float, default=0.1, - help="the fraction of `total-timesteps` it takes from start-e to go end-e") - parser.add_argument("--learning-starts", type=int, default=80000, - help="timestep to start learning") - parser.add_argument("--train-frequency", type=int, default=4, - help="the frequency of training") - args = parser.parse_args() - # fmt: on - assert args.num_envs == 1, "vectorized envs are not supported at the moment" - - return args + env_id: str = "BreakoutNoFrameskip-v4" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + n_atoms: int = 51 + """the number of atoms""" + v_min: float = -10 + """the return lower bound""" + v_max: float = 10 + """the return upper bound""" + buffer_size: int = 1000000 + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + target_network_frequency: int = 10000 + """the timesteps it takes to update the target network""" + batch_size: int = 32 + """the batch size of sample from the reply memory""" + start_e: float = 1 + """the starting epsilon for exploration""" + end_e: float = 0.01 + """the ending epsilon for exploration""" + exploration_fraction: float = 0.10 + """the fraction of `total-timesteps` it takes from start-e to go end-e""" + learning_starts: int = 80000 + """timestep to start learning""" + train_frequency: int = 4 + """the frequency of training""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -159,7 +153,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1" "ale-py==0.8.1" """ ) - args = parse_args() + args = tyro.cli(Args) + assert args.num_envs == 1, "vectorized envs are not supported at the moment" run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -284,14 +279,10 @@ def get_action(q_state, obs): # TRY NOT TO MODIFY: record rewards for plotting purposes if "final_info" in infos: for info in infos["final_info"]: - # Skip the envs that are not done - if "episode" not in info: - continue - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` real_next_obs = next_obs.copy() diff --git a/cleanrl/c51_jax.py b/cleanrl/c51_jax.py index 4b65f3595..7ad810cdb 100644 --- a/cleanrl/c51_jax.py +++ b/cleanrl/c51_jax.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/c51/#c51_jaxpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import flax import flax.linen as nn @@ -12,71 +11,66 @@ import jax.numpy as jnp import numpy as np import optax +import tyro from flax.training.train_state import TrainState from stable_baselines3.common.buffers import ReplayBuffer from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="CartPole-v1", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=500000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--n-atoms", type=int, default=101, - help="the number of atoms") - parser.add_argument("--v-min", type=float, default=-100, - help="the return lower bound") - parser.add_argument("--v-max", type=float, default=100, - help="the return upper bound") - parser.add_argument("--buffer-size", type=int, default=10000, - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--target-network-frequency", type=int, default=500, - help="the timesteps it takes to update the target network") - parser.add_argument("--batch-size", type=int, default=128, - help="the batch size of sample from the reply memory") - parser.add_argument("--start-e", type=float, default=1, - help="the starting epsilon for exploration") - parser.add_argument("--end-e", type=float, default=0.05, - help="the ending epsilon for exploration") - parser.add_argument("--exploration-fraction", type=float, default=0.5, - help="the fraction of `total-timesteps` it takes from start-e to go end-e") - parser.add_argument("--learning-starts", type=int, default=10000, - help="timestep to start learning") - parser.add_argument("--train-frequency", type=int, default=10, - help="the frequency of training") - args = parser.parse_args() - # fmt: on - assert args.num_envs == 1, "vectorized envs are not supported at the moment" - - return args + env_id: str = "CartPole-v1" + """the id of the environment""" + total_timesteps: int = 500000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + n_atoms: int = 101 + """the number of atoms""" + v_min: float = -100 + """the return lower bound""" + v_max: float = 100 + """the return upper bound""" + buffer_size: int = 10000 + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + target_network_frequency: int = 500 + """the timesteps it takes to update the target network""" + batch_size: int = 128 + """the batch size of sample from the reply memory""" + start_e: float = 1 + """the starting epsilon for exploration""" + end_e: float = 0.05 + """the ending epsilon for exploration""" + exploration_fraction: float = 0.5 + """the fraction of `total-timesteps` it takes from start-e to go end-e""" + learning_starts: int = 10000 + """timestep to start learning""" + train_frequency: int = 10 + """the frequency of training""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -131,7 +125,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): poetry run pip install "stable_baselines3==2.0.0a1" """ ) - args = parse_args() + args = tyro.cli(Args) + assert args.num_envs == 1, "vectorized envs are not supported at the moment" run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -247,14 +242,10 @@ def loss(q_params, observations, actions, target_pmfs): # TRY NOT TO MODIFY: record rewards for plotting purposes if "final_info" in infos: for info in infos["final_info"]: - # Skip the envs that are not done - if "episode" not in info: - continue - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` real_next_obs = next_obs.copy() diff --git a/cleanrl/ddpg_continuous_action.py b/cleanrl/ddpg_continuous_action.py index d42d3bc5a..1aa8b9972 100644 --- a/cleanrl/ddpg_continuous_action.py +++ b/cleanrl/ddpg_continuous_action.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ddpg/#ddpg_continuous_actionpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gymnasium as gym import numpy as np @@ -11,62 +10,59 @@ import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +import tyro from stable_baselines3.common.buffers import ReplayBuffer from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="Hopper-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=1000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=3e-4, - help="the learning rate of the optimizer") - parser.add_argument("--buffer-size", type=int, default=int(1e6), - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=0.005, - help="target smoothing coefficient (default: 0.005)") - parser.add_argument("--batch-size", type=int, default=256, - help="the batch size of sample from the reply memory") - parser.add_argument("--exploration-noise", type=float, default=0.1, - help="the scale of exploration noise") - parser.add_argument("--learning-starts", type=int, default=25e3, - help="timestep to start learning") - parser.add_argument("--policy-frequency", type=int, default=2, - help="the frequency of training policy (delayed)") - parser.add_argument("--noise-clip", type=float, default=0.5, - help="noise clip parameter of the Target Policy Smoothing Regularization") - args = parser.parse_args() - # fmt: on - return args + env_id: str = "Hopper-v4" + """the environment id of the Atari game""" + total_timesteps: int = 1000000 + """total timesteps of the experiments""" + learning_rate: float = 3e-4 + """the learning rate of the optimizer""" + buffer_size: int = int(1e6) + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 0.005 + """target smoothing coefficient (default: 0.005)""" + batch_size: int = 256 + """the batch size of sample from the reply memory""" + exploration_noise: float = 0.1 + """the scale of exploration noise""" + learning_starts: int = 25e3 + """timestep to start learning""" + policy_frequency: int = 2 + """the frequency of training policy (delayed)""" + noise_clip: float = 0.5 + """noise clip parameter of the Target Policy Smoothing Regularization""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -129,8 +125,7 @@ def forward(self, x): poetry run pip install "stable_baselines3==2.0.0a1" """ ) - - args = parse_args() + args = tyro.cli(Args) run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb diff --git a/cleanrl/ddpg_continuous_action_jax.py b/cleanrl/ddpg_continuous_action_jax.py index e074acd60..b12a14a28 100644 --- a/cleanrl/ddpg_continuous_action_jax.py +++ b/cleanrl/ddpg_continuous_action_jax.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ddpg/#ddpg_continuous_action_jaxpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import flax import flax.linen as nn @@ -12,59 +11,56 @@ import jax.numpy as jnp import numpy as np import optax +import tyro from flax.training.train_state import TrainState from stable_baselines3.common.buffers import ReplayBuffer from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="HalfCheetah-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=1000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=3e-4, - help="the learning rate of the optimizer") - parser.add_argument("--buffer-size", type=int, default=int(1e6), - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=0.005, - help="target smoothing coefficient (default: 0.005)") - parser.add_argument("--batch-size", type=int, default=256, - help="the batch size of sample from the reply memory") - parser.add_argument("--exploration-noise", type=float, default=0.1, - help="the scale of exploration noise") - parser.add_argument("--learning-starts", type=int, default=25e3, - help="timestep to start learning") - parser.add_argument("--policy-frequency", type=int, default=2, - help="the frequency of training policy (delayed)") - parser.add_argument("--noise-clip", type=float, default=0.5, - help="noise clip parameter of the Target Policy Smoothing Regularization") - args = parser.parse_args() - # fmt: on - return args + env_id: str = "Hopper-v4" + """the environment id of the Atari game""" + total_timesteps: int = 1000000 + """total timesteps of the experiments""" + learning_rate: float = 3e-4 + """the learning rate of the optimizer""" + buffer_size: int = int(1e6) + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 0.005 + """target smoothing coefficient (default: 0.005)""" + batch_size: int = 256 + """the batch size of sample from the reply memory""" + exploration_noise: float = 0.1 + """the scale of exploration noise""" + learning_starts: int = 25e3 + """timestep to start learning""" + policy_frequency: int = 2 + """the frequency of training policy (delayed)""" + noise_clip: float = 0.5 + """noise clip parameter of the Target Policy Smoothing Regularization""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -124,7 +120,7 @@ class TrainState(TrainState): poetry run pip install "stable_baselines3==2.0.0a1" """ ) - args = parse_args() + args = tyro.cli(Args) run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb diff --git a/cleanrl/dqn.py b/cleanrl/dqn.py index 2aa8f9bc6..e74e289c3 100644 --- a/cleanrl/dqn.py +++ b/cleanrl/dqn.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/dqn/#dqnpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gymnasium as gym import numpy as np @@ -11,70 +10,65 @@ import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +import tyro from stable_baselines3.common.buffers import ReplayBuffer from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="CartPole-v1", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=500000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--buffer-size", type=int, default=10000, - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=1., - help="the target network update rate") - parser.add_argument("--target-network-frequency", type=int, default=500, - help="the timesteps it takes to update the target network") - parser.add_argument("--batch-size", type=int, default=128, - help="the batch size of sample from the reply memory") - parser.add_argument("--start-e", type=float, default=1, - help="the starting epsilon for exploration") - parser.add_argument("--end-e", type=float, default=0.05, - help="the ending epsilon for exploration") - parser.add_argument("--exploration-fraction", type=float, default=0.5, - help="the fraction of `total-timesteps` it takes from start-e to go end-e") - parser.add_argument("--learning-starts", type=int, default=10000, - help="timestep to start learning") - parser.add_argument("--train-frequency", type=int, default=10, - help="the frequency of training") - args = parser.parse_args() - # fmt: on - assert args.num_envs == 1, "vectorized envs are not supported at the moment" - - return args + env_id: str = "CartPole-v1" + """the id of the environment""" + total_timesteps: int = 500000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + buffer_size: int = 10000 + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 1.0 + """the target network update rate""" + target_network_frequency: int = 500 + """the timesteps it takes to update the target network""" + batch_size: int = 128 + """the batch size of sample from the reply memory""" + start_e: float = 1 + """the starting epsilon for exploration""" + end_e: float = 0.05 + """the ending epsilon for exploration""" + exploration_fraction: float = 0.5 + """the fraction of `total-timesteps` it takes from start-e to go end-e""" + learning_starts: int = 10000 + """timestep to start learning""" + train_frequency: int = 10 + """the frequency of training""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -123,7 +117,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): poetry run pip install "stable_baselines3==2.0.0a1" """ ) - args = parse_args() + args = tyro.cli(Args) + assert args.num_envs == 1, "vectorized envs are not supported at the moment" run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -188,14 +183,10 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): # TRY NOT TO MODIFY: record rewards for plotting purposes if "final_info" in infos: for info in infos["final_info"]: - # Skip the envs that are not done - if "episode" not in info: - continue - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` real_next_obs = next_obs.copy() diff --git a/cleanrl/dqn_atari.py b/cleanrl/dqn_atari.py index a4c3df339..a23b84391 100644 --- a/cleanrl/dqn_atari.py +++ b/cleanrl/dqn_atari.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/dqn/#dqn_ataripy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gymnasium as gym import numpy as np @@ -11,6 +10,7 @@ import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +import tyro from stable_baselines3.common.atari_wrappers import ( ClipRewardEnv, EpisodicLifeEnv, @@ -22,66 +22,60 @@ from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=1e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--buffer-size", type=int, default=1000000, - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=1., - help="the target network update rate") - parser.add_argument("--target-network-frequency", type=int, default=1000, - help="the timesteps it takes to update the target network") - parser.add_argument("--batch-size", type=int, default=32, - help="the batch size of sample from the reply memory") - parser.add_argument("--start-e", type=float, default=1, - help="the starting epsilon for exploration") - parser.add_argument("--end-e", type=float, default=0.01, - help="the ending epsilon for exploration") - parser.add_argument("--exploration-fraction", type=float, default=0.10, - help="the fraction of `total-timesteps` it takes from start-e to go end-e") - parser.add_argument("--learning-starts", type=int, default=80000, - help="timestep to start learning") - parser.add_argument("--train-frequency", type=int, default=4, - help="the frequency of training") - args = parser.parse_args() - # fmt: on - assert args.num_envs == 1, "vectorized envs are not supported at the moment" - - return args + env_id: str = "BreakoutNoFrameskip-v4" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 1e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + buffer_size: int = 1000000 + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 1.0 + """the target network update rate""" + target_network_frequency: int = 1000 + """the timesteps it takes to update the target network""" + batch_size: int = 32 + """the batch size of sample from the reply memory""" + start_e: float = 1 + """the starting epsilon for exploration""" + end_e: float = 0.01 + """the ending epsilon for exploration""" + exploration_fraction: float = 0.10 + """the fraction of `total-timesteps` it takes from start-e to go end-e""" + learning_starts: int = 80000 + """timestep to start learning""" + train_frequency: int = 4 + """the frequency of training""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -145,7 +139,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1" "ale-py==0.8.1" """ ) - args = parse_args() + args = tyro.cli(Args) + assert args.num_envs == 1, "vectorized envs are not supported at the moment" run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -211,14 +206,10 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): # TRY NOT TO MODIFY: record rewards for plotting purposes if "final_info" in infos: for info in infos["final_info"]: - # Skip the envs that are not done - if "episode" not in info: - continue - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` real_next_obs = next_obs.copy() diff --git a/cleanrl/dqn_atari_jax.py b/cleanrl/dqn_atari_jax.py index 5f74d57a9..383ceeef8 100644 --- a/cleanrl/dqn_atari_jax.py +++ b/cleanrl/dqn_atari_jax.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/dqn/#dqn_atari_jaxpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass os.environ[ "XLA_PYTHON_CLIENT_MEM_FRACTION" @@ -16,6 +15,7 @@ import jax.numpy as jnp import numpy as np import optax +import tyro from flax.training.train_state import TrainState from stable_baselines3.common.atari_wrappers import ( ClipRewardEnv, @@ -28,62 +28,56 @@ from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=1e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--buffer-size", type=int, default=1000000, - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=1., - help="the target network update rate") - parser.add_argument("--target-network-frequency", type=int, default=1000, - help="the timesteps it takes to update the target network") - parser.add_argument("--batch-size", type=int, default=32, - help="the batch size of sample from the reply memory") - parser.add_argument("--start-e", type=float, default=1, - help="the starting epsilon for exploration") - parser.add_argument("--end-e", type=float, default=0.01, - help="the ending epsilon for exploration") - parser.add_argument("--exploration-fraction", type=float, default=0.10, - help="the fraction of `total-timesteps` it takes from start-e to go end-e") - parser.add_argument("--learning-starts", type=int, default=80000, - help="timestep to start learning") - parser.add_argument("--train-frequency", type=int, default=4, - help="the frequency of training") - args = parser.parse_args() - # fmt: on - assert args.num_envs == 1, "vectorized envs are not supported at the moment" - - return args + env_id: str = "BreakoutNoFrameskip-v4" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 1e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + buffer_size: int = 1000000 + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 1.0 + """the target network update rate""" + target_network_frequency: int = 1000 + """the timesteps it takes to update the target network""" + batch_size: int = 32 + """the batch size of sample from the reply memory""" + start_e: float = 1 + """the starting epsilon for exploration""" + end_e: float = 0.01 + """the ending epsilon for exploration""" + exploration_fraction: float = 0.10 + """the fraction of `total-timesteps` it takes from start-e to go end-e""" + learning_starts: int = 80000 + """timestep to start learning""" + train_frequency: int = 4 + """the frequency of training""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -151,7 +145,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1" "ale-py==0.8.1" """ ) - args = parse_args() + args = tyro.cli(Args) + assert args.num_envs == 1, "vectorized envs are not supported at the moment" run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -242,14 +237,10 @@ def mse_loss(params): # TRY NOT TO MODIFY: record rewards for plotting purposes if "final_info" in infos: for info in infos["final_info"]: - # Skip the envs that are not done - if "episode" not in info: - continue - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` real_next_obs = next_obs.copy() diff --git a/cleanrl/dqn_jax.py b/cleanrl/dqn_jax.py index 1f0eaf623..917282578 100644 --- a/cleanrl/dqn_jax.py +++ b/cleanrl/dqn_jax.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/dqn/#dqn_jaxpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import flax import flax.linen as nn @@ -12,67 +11,62 @@ import jax.numpy as jnp import numpy as np import optax +import tyro from flax.training.train_state import TrainState from stable_baselines3.common.buffers import ReplayBuffer from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="CartPole-v1", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=500000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--buffer-size", type=int, default=10000, - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=1., - help="the target network update rate") - parser.add_argument("--target-network-frequency", type=int, default=500, - help="the timesteps it takes to update the target network") - parser.add_argument("--batch-size", type=int, default=128, - help="the batch size of sample from the reply memory") - parser.add_argument("--start-e", type=float, default=1, - help="the starting epsilon for exploration") - parser.add_argument("--end-e", type=float, default=0.05, - help="the ending epsilon for exploration") - parser.add_argument("--exploration-fraction", type=float, default=0.5, - help="the fraction of `total-timesteps` it takes from start-e to go end-e") - parser.add_argument("--learning-starts", type=int, default=10000, - help="timestep to start learning") - parser.add_argument("--train-frequency", type=int, default=10, - help="the frequency of training") - args = parser.parse_args() - # fmt: on - assert args.num_envs == 1, "vectorized envs are not supported at the moment" - - return args + env_id: str = "CartPole-v1" + """the id of the environment""" + total_timesteps: int = 500000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + buffer_size: int = 10000 + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 1.0 + """the target network update rate""" + target_network_frequency: int = 500 + """the timesteps it takes to update the target network""" + batch_size: int = 128 + """the batch size of sample from the reply memory""" + start_e: float = 1 + """the starting epsilon for exploration""" + end_e: float = 0.05 + """the ending epsilon for exploration""" + exploration_fraction: float = 0.5 + """the fraction of `total-timesteps` it takes from start-e to go end-e""" + learning_starts: int = 10000 + """timestep to start learning""" + train_frequency: int = 10 + """the frequency of training""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -123,7 +117,8 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): poetry run pip install "stable_baselines3==2.0.0a1" """ ) - args = parse_args() + args = tyro.cli(Args) + assert args.num_envs == 1, "vectorized envs are not supported at the moment" run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -211,14 +206,10 @@ def mse_loss(params): # TRY NOT TO MODIFY: record rewards for plotting purposes if "final_info" in infos: for info in infos["final_info"]: - # Skip the envs that are not done - if "episode" not in info: - continue - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) - writer.add_scalar("charts/epsilon", epsilon, global_step) - break + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # TRY NOT TO MODIFY: save data to reply buffer; handle `final_observation` real_next_obs = next_obs.copy() diff --git a/cleanrl/ppg_procgen.py b/cleanrl/ppg_procgen.py index ea574a814..845fe1726 100644 --- a/cleanrl/ppg_procgen.py +++ b/cleanrl/ppg_procgen.py @@ -1,99 +1,101 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppg/#ppg_procgenpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gym import numpy as np import torch import torch.nn as nn import torch.optim as optim +import tyro from procgen import ProcgenEnv from torch import distributions as td from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="starpilot", - help="the id of the environment") - parser.add_argument("--learning-rate", type=float, default=5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--total-timesteps", type=int, default=25e6, - help="total timesteps of the experiments") - parser.add_argument("--num-envs", type=int, default=64, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=256, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Use GAE for advantage computation") - parser.add_argument("--gamma", type=float, default=0.999, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=8, - help="the number of mini-batches") - parser.add_argument("--adv-norm-fullbatch", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Full batch advantage normalization as used in PPG code") - parser.add_argument("--clip-coef", type=float, default=0.2, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") + env_id: str = "starpilot" + """the id of the environment""" + total_timesteps: int = int(25e6) + """total timesteps of the experiments""" + learning_rate: float = 5e-4 + """the learning rate of the optimizer""" + num_envs: int = 64 + """the number of parallel game environments""" + num_steps: int = 256 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = False + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.999 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 8 + """the number of mini-batches""" + adv_norm_fullbatch: bool = True + """Toggle full batch advantage normalization as used in PPG code""" + clip_coef: float = 0.2 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.01 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" # PPG specific arguments - parser.add_argument("--n-iteration", type=int, default=32, - help="N_pi: the number of policy update in the policy phase ") - parser.add_argument("--e-policy", type=int, default=1, - help="E_pi: the number of policy update in the policy phase ") - parser.add_argument("--v-value", type=int, default=1, - help="E_V: the number of policy update in the policy phase ") - parser.add_argument("--e-auxiliary", type=int, default=6, - help="E_aux:the K epochs to update the policy") - parser.add_argument("--beta-clone", type=float, default=1.0, - help="the behavior cloning coefficient") - parser.add_argument("--num-aux-rollouts", type=int, default=4, - help="the number of mini batch in the auxiliary phase") - parser.add_argument("--n-aux-grad-accum", type=int, default=1, - help="the number of gradient accumulation in mini batch") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - args.aux_batch_rollouts = int(args.num_envs * args.n_iteration) - assert args.v_value == 1, "Multiple value epoch (v_value != 1) is not supported yet" - # fmt: on - return args + n_iteration: int = 32 + """N_pi: the number of policy update in the policy phase """ + e_policy: int = 1 + """E_pi: the number of policy update in the policy phase """ + v_value: int = 1 + """E_V: the number of policy update in the policy phase """ + e_auxiliary: int = 6 + """E_aux:the K epochs to update the policy""" + beta_clone: float = 1.0 + """the behavior cloning coefficient""" + num_aux_rollouts: int = 4 + """the number of mini batch in the auxiliary phase""" + n_aux_grad_accum: int = 1 + """the number of gradient accumulation in mini batch""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" + num_phases: int = 0 + """the number of phases (computed in runtime)""" + aux_batch_rollouts: int = 0 + """the number of rollouts in the auxiliary phase (computed in runtime)""" def layer_init_normed(layer, norm_dim, scale=1.0): @@ -210,7 +212,13 @@ def get_pi(self, x): if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size + args.num_phases = int(args.num_iterations // args.n_iteration) + args.aux_batch_rollouts = int(args.num_envs * args.n_iteration) + assert args.v_value == 1, "Multiple value epoch (v_value != 1) is not supported yet" run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -273,16 +281,14 @@ def get_pi(self, x): start_time = time.time() next_obs = torch.Tensor(envs.reset()).to(device) next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // args.batch_size - num_phases = int(num_updates // args.n_iteration) - for phase in range(1, num_phases + 1): + for phase in range(1, args.num_phases + 1): # POLICY PHASE for update in range(1, args.n_iteration + 1): # Annealing the rate if instructed to do so. if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates + frac = 1.0 - (update - 1.0) / args.num_iterations lrnow = frac * args.learning_rate optimizer.param_groups[0]["lr"] = lrnow @@ -313,30 +319,18 @@ def get_pi(self, x): # bootstrap value if not done with torch.no_grad(): next_value = agent.get_value(next_obs).reshape(1, -1) - if args.gae: - advantages = torch.zeros_like(rewards).to(device) - lastgaelam = 0 - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - nextvalues = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - nextvalues = values[t + 1] - delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] - advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam - returns = advantages + values - else: - returns = torch.zeros_like(rewards).to(device) - for t in reversed(range(args.num_steps)): - if t == args.num_steps - 1: - nextnonterminal = 1.0 - next_done - next_return = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - next_return = returns[t + 1] - returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return - advantages = returns - values + advantages = torch.zeros_like(rewards).to(device) + lastgaelam = 0 + for t in reversed(range(args.num_steps)): + if t == args.num_steps - 1: + nextnonterminal = 1.0 - next_done + nextvalues = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + nextvalues = values[t + 1] + delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] + advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam + returns = advantages + values # flatten the batch b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) @@ -399,9 +393,8 @@ def get_pi(self, x): nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) optimizer.step() - if args.target_kl is not None: - if approx_kl > args.target_kl: - break + if args.target_kl is not None and approx_kl > args.target_kl: + break y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() var_y = np.var(y_true) diff --git a/cleanrl/ppo.py b/cleanrl/ppo.py index 091378209..bfec99563 100644 --- a/cleanrl/ppo.py +++ b/cleanrl/ppo.py @@ -1,82 +1,84 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppopy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass -import gym +import gymnasium as gym import numpy as np import torch import torch.nn as nn import torch.optim as optim +import tyro from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="CartPole-v1", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=500000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=4, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=128, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=4, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=4, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.2, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args - - -def make_env(env_id, seed, idx, capture_video, run_name): + env_id: str = "CartPole-v1" + """the id of the environment""" + total_timesteps: int = 500000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 4 + """the number of parallel game environments""" + num_steps: int = 128 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = True + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.99 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 4 + """the number of mini-batches""" + update_epochs: int = 4 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.2 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.01 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" + + +def make_env(env_id, idx, capture_video, run_name): def thunk(): if capture_video and idx == 0: env = gym.make(env_id) @@ -84,9 +86,6 @@ def thunk(): else: env = gym.make(env_id) env = gym.wrappers.RecordEpisodeStatistics(env) - env.seed(seed) - env.action_space.seed(seed) - env.observation_space.seed(seed) return env return thunk @@ -128,7 +127,10 @@ def get_action_and_value(self, x, action=None): if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -158,7 +160,7 @@ def get_action_and_value(self, x, action=None): # env setup envs = gym.vector.SyncVectorEnv( - [make_env(args.env_id, args.seed + i, i, args.capture_video, run_name) for i in range(args.num_envs)] + [make_env(args.env_id, i, args.capture_video, run_name) for i in range(args.num_envs)], ) assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" @@ -176,19 +178,19 @@ def get_action_and_value(self, x, action=None): # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) + next_obs, _ = envs.reset(seed=args.seed) + next_obs = torch.Tensor(next_obs).to(device) next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // args.batch_size - for update in range(1, num_updates + 1): + for iteration in range(1, args.num_iterations + 1): # Annealing the rate if instructed to do so. if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates + frac = 1.0 - (iteration - 1.0) / args.num_iterations lrnow = frac * args.learning_rate optimizer.param_groups[0]["lr"] = lrnow for step in range(0, args.num_steps): - global_step += 1 * args.num_envs + global_step += args.num_envs obs[step] = next_obs dones[step] = next_done @@ -200,16 +202,17 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminations, truncations, infos = envs.step(action.cpu().numpy()) + next_done = np.logical_or(terminations, truncations) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device) - for item in info: - if "episode" in item.keys(): - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) - break + if "final_info" in infos: + for info in infos["final_info"]: + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # bootstrap value if not done with torch.no_grad(): @@ -286,9 +289,8 @@ def get_action_and_value(self, x, action=None): nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) optimizer.step() - if args.target_kl is not None: - if approx_kl > args.target_kl: - break + if args.target_kl is not None and approx_kl > args.target_kl: + break y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() var_y = np.var(y_true) diff --git a/cleanrl/ppo_atari.py b/cleanrl/ppo_atari.py index 14be7a470..5bc9859cc 100644 --- a/cleanrl/ppo_atari.py +++ b/cleanrl/ppo_atari.py @@ -1,15 +1,15 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_ataripy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass -import gym +import gymnasium as gym import numpy as np import torch import torch.nn as nn import torch.optim as optim +import tyro from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter @@ -22,71 +22,77 @@ ) -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=8, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=128, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=4, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=4, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.1, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args - - -def make_env(env_id, seed, idx, capture_video, run_name): + env_id: str = "BreakoutNoFrameskip-v4" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 8 + """the number of parallel game environments""" + num_steps: int = 128 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = True + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.99 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 4 + """the number of mini-batches""" + update_epochs: int = 4 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.1 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.01 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" + + +def make_env(env_id, idx, capture_video, run_name): def thunk(): - env = gym.make(env_id) + if capture_video and idx == 0: + env = gym.make(env_id) + env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") + else: + env = gym.make(env_id) env = gym.wrappers.RecordEpisodeStatistics(env) if capture_video: if idx == 0: @@ -100,9 +106,6 @@ def thunk(): env = gym.wrappers.ResizeObservation(env, (84, 84)) env = gym.wrappers.GrayScaleObservation(env) env = gym.wrappers.FrameStack(env, 4) - env.seed(seed) - env.action_space.seed(seed) - env.observation_space.seed(seed) return env return thunk @@ -144,7 +147,10 @@ def get_action_and_value(self, x, action=None): if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -174,7 +180,7 @@ def get_action_and_value(self, x, action=None): # env setup envs = gym.vector.SyncVectorEnv( - [make_env(args.env_id, args.seed + i, i, args.capture_video, run_name) for i in range(args.num_envs)] + [make_env(args.env_id, i, args.capture_video, run_name) for i in range(args.num_envs)], ) assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" @@ -192,19 +198,19 @@ def get_action_and_value(self, x, action=None): # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) + next_obs, _ = envs.reset(seed=args.seed) + next_obs = torch.Tensor(next_obs).to(device) next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // args.batch_size - for update in range(1, num_updates + 1): + for iteration in range(1, args.num_iterations + 1): # Annealing the rate if instructed to do so. if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates + frac = 1.0 - (iteration - 1.0) / args.num_iterations lrnow = frac * args.learning_rate optimizer.param_groups[0]["lr"] = lrnow for step in range(0, args.num_steps): - global_step += 1 * args.num_envs + global_step += args.num_envs obs[step] = next_obs dones[step] = next_done @@ -216,16 +222,17 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminations, truncations, infos = envs.step(action.cpu().numpy()) + next_done = np.logical_or(terminations, truncations) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device) - for item in info: - if "episode" in item.keys(): - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) - break + if "final_info" in infos: + for info in infos["final_info"]: + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # bootstrap value if not done with torch.no_grad(): @@ -302,9 +309,8 @@ def get_action_and_value(self, x, action=None): nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) optimizer.step() - if args.target_kl is not None: - if approx_kl > args.target_kl: - break + if args.target_kl is not None and approx_kl > args.target_kl: + break y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() var_y = np.var(y_true) diff --git a/cleanrl/ppo_atari_envpool.py b/cleanrl/ppo_atari_envpool.py index f72f223ed..1be166fba 100644 --- a/cleanrl/ppo_atari_envpool.py +++ b/cleanrl/ppo_atari_envpool.py @@ -1,10 +1,9 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_atari_envpoolpy -import argparse import os import random import time from collections import deque -from distutils.util import strtobool +from dataclasses import dataclass import envpool import gym @@ -12,70 +11,73 @@ import torch import torch.nn as nn import torch.optim as optim +import tyro from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="Pong-v5", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=8, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=128, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=4, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=4, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.1, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args + env_id: str = "Breakout-v5" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 8 + """the number of parallel game environments""" + num_steps: int = 128 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = True + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.99 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 4 + """the number of mini-batches""" + update_epochs: int = 4 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.1 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.01 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" class RecordEpisodeStatistics(gym.Wrapper): @@ -148,7 +150,10 @@ def get_action_and_value(self, x, action=None): if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -208,17 +213,16 @@ def get_action_and_value(self, x, action=None): start_time = time.time() next_obs = torch.Tensor(envs.reset()).to(device) next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // args.batch_size - for update in range(1, num_updates + 1): + for iteration in range(1, args.num_iterations + 1): # Annealing the rate if instructed to do so. if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates + frac = 1.0 - (iteration - 1.0) / args.num_iterations lrnow = frac * args.learning_rate optimizer.param_groups[0]["lr"] = lrnow for step in range(0, args.num_steps): - global_step += 1 * args.num_envs + global_step += args.num_envs obs[step] = next_obs dones[step] = next_done @@ -230,11 +234,11 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, next_done, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device) - for idx, d in enumerate(done): + for idx, d in enumerate(next_done): if d and info["lives"][idx] == 0: print(f"global_step={global_step}, episodic_return={info['r'][idx]}") avg_returns.append(info["r"][idx]) @@ -317,9 +321,8 @@ def get_action_and_value(self, x, action=None): nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) optimizer.step() - if args.target_kl is not None: - if approx_kl > args.target_kl: - break + if args.target_kl is not None and approx_kl > args.target_kl: + break y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() var_y = np.var(y_true) diff --git a/cleanrl/ppo_atari_envpool_xla_jax.py b/cleanrl/ppo_atari_envpool_xla_jax.py index f47d28513..8e7da71f5 100644 --- a/cleanrl/ppo_atari_envpool_xla_jax.py +++ b/cleanrl/ppo_atari_envpool_xla_jax.py @@ -1,15 +1,10 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_atari_envpool_xla_jaxpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass from typing import Sequence -os.environ[ - "XLA_PYTHON_CLIENT_MEM_FRACTION" -] = "0.7" # see https://github.com/google/jax/discussions/6332#discussioncomment-1279991 - import envpool import flax import flax.linen as nn @@ -18,70 +13,80 @@ import jax.numpy as jnp import numpy as np import optax +import tyro from flax.linen.initializers import constant, orthogonal from flax.training.train_state import TrainState from torch.utils.tensorboard import SummaryWriter - -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +# Fix weird OOM https://github.com/google/jax/discussions/6332#discussioncomment-1279991 +os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = "0.6" +# Fix CUDNN non-determinisim; https://github.com/google/jax/issues/4823#issuecomment-952835771 +os.environ["TF_XLA_FLAGS"] = "--xla_gpu_autotune_level=2 --xla_gpu_deterministic_reductions" +os.environ["TF_CUDNN DETERMINISTIC"] = "1" + + +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="Pong-v5", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=8, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=128, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=4, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=4, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.1, - help="the surrogate clipping coefficient") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - args.num_updates = args.total_timesteps // args.batch_size - # fmt: on - return args + env_id: str = "Breakout-v5" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 8 + """the number of parallel game environments""" + num_steps: int = 128 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = True + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.99 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 4 + """the number of mini-batches""" + update_epochs: int = 4 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.1 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.01 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" class Network(nn.Module): @@ -164,7 +169,10 @@ class EpisodeStatistics: if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -233,7 +241,7 @@ def step_env_wrappeed(episode_stats, handle, action): def linear_schedule(count): # anneal learning rate linearly after one training iteration which contains # (args.num_minibatches * args.update_epochs) gradient updates - frac = 1.0 - (count // (args.num_minibatches * args.update_epochs)) / args.num_updates + frac = 1.0 - (count // (args.num_minibatches * args.update_epochs)) / args.num_iterations return args.learning_rate * frac network = Network() @@ -401,7 +409,7 @@ def ppo_loss(params, x, a, logp, mb_advantages, mb_returns): @jax.jit def rollout(agent_state, episode_stats, next_obs, next_done, storage, key, handle, global_step): for step in range(0, args.num_steps): - global_step += 1 * args.num_envs + global_step += args.num_envs storage, action, key = get_action_and_value(agent_state, next_obs, next_done, storage, step, key) # TRY NOT TO MODIFY: execute the game and log data. @@ -409,8 +417,8 @@ def rollout(agent_state, episode_stats, next_obs, next_done, storage, key, handl storage = storage.replace(rewards=storage.rewards.at[step].set(reward)) return agent_state, episode_stats, next_obs, next_done, storage, key, handle, global_step - for update in range(1, args.num_updates + 1): - update_time_start = time.time() + for iteration in range(1, args.num_iterations + 1): + iteration_time_start = time.time() agent_state, episode_stats, next_obs, next_done, storage, key, handle, global_step = rollout( agent_state, episode_stats, next_obs, next_done, storage, key, handle, global_step ) @@ -437,7 +445,7 @@ def rollout(agent_state, episode_stats, next_obs, next_done, storage, key, handl print("SPS:", int(global_step / (time.time() - start_time))) writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) writer.add_scalar( - "charts/SPS_update", int(args.num_envs * args.num_steps / (time.time() - update_time_start)), global_step + "charts/SPS_update", int(args.num_envs * args.num_steps / (time.time() - iteration_time_start)), global_step ) envs.close() diff --git a/cleanrl/ppo_atari_envpool_xla_jax_scan.py b/cleanrl/ppo_atari_envpool_xla_jax_scan.py index 19f97b1d1..cf8d8f88d 100644 --- a/cleanrl/ppo_atari_envpool_xla_jax_scan.py +++ b/cleanrl/ppo_atari_envpool_xla_jax_scan.py @@ -1,16 +1,11 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_atari_envpool_xla_jaxpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass from functools import partial from typing import Sequence -os.environ[ - "XLA_PYTHON_CLIENT_MEM_FRACTION" -] = "0.7" # see https://github.com/google/jax/discussions/6332#discussioncomment-1279991 - import envpool import flax import flax.linen as nn @@ -19,76 +14,86 @@ import jax.numpy as jnp import numpy as np import optax +import tyro from flax.linen.initializers import constant, orthogonal from flax.training.train_state import TrainState from torch.utils.tensorboard import SummaryWriter - -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +# Fix weird OOM https://github.com/google/jax/discussions/6332#discussioncomment-1279991 +os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = "0.6" +# Fix CUDNN non-determinisim; https://github.com/google/jax/issues/4823#issuecomment-952835771 +os.environ["TF_XLA_FLAGS"] = "--xla_gpu_autotune_level=2 --xla_gpu_deterministic_reductions" +os.environ["TF_CUDNN DETERMINISTIC"] = "1" + + +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="Pong-v5", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=8, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=128, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=4, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=4, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.1, - help="the surrogate clipping coefficient") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - args.num_updates = args.total_timesteps // args.batch_size - # fmt: on - return args + env_id: str = "Breakout-v5" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 8 + """the number of parallel game environments""" + num_steps: int = 128 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = True + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.99 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 4 + """the number of mini-batches""" + update_epochs: int = 4 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.1 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.01 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" def make_env(env_id, seed, num_envs): @@ -190,7 +195,10 @@ class EpisodeStatistics: if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -248,7 +256,7 @@ def step_env_wrappeed(episode_stats, handle, action): def linear_schedule(count): # anneal learning rate linearly after one training iteration which contains # (args.num_minibatches * args.update_epochs) gradient updates - frac = 1.0 - (count // (args.num_minibatches * args.update_epochs)) / args.num_updates + frac = 1.0 - (count // (args.num_minibatches * args.update_epochs)) / args.num_iterations return args.learning_rate * frac network = Network() @@ -442,8 +450,8 @@ def rollout(agent_state, episode_stats, next_obs, next_done, key, handle, step_o rollout = partial(rollout, step_once_fn=partial(step_once, env_step_fn=step_env_wrappeed), max_steps=args.num_steps) - for update in range(1, args.num_updates + 1): - update_time_start = time.time() + for iteration in range(1, args.num_iterations + 1): + iteration_time_start = time.time() agent_state, episode_stats, next_obs, next_done, storage, key, handle = rollout( agent_state, episode_stats, next_obs, next_done, key, handle ) @@ -471,7 +479,7 @@ def rollout(agent_state, episode_stats, next_obs, next_done, key, handle, step_o print("SPS:", int(global_step / (time.time() - start_time))) writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) writer.add_scalar( - "charts/SPS_update", int(args.num_envs * args.num_steps / (time.time() - update_time_start)), global_step + "charts/SPS_update", int(args.num_envs * args.num_steps / (time.time() - iteration_time_start)), global_step ) if args.save_model: diff --git a/cleanrl/ppo_atari_lstm.py b/cleanrl/ppo_atari_lstm.py index a90aa4ce8..630414dfb 100644 --- a/cleanrl/ppo_atari_lstm.py +++ b/cleanrl/ppo_atari_lstm.py @@ -1,15 +1,15 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_atari_lstmpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass -import gym +import gymnasium as gym import numpy as np import torch import torch.nn as nn import torch.optim as optim +import tyro from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter @@ -22,71 +22,77 @@ ) -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=8, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=128, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=4, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=4, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.1, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args - - -def make_env(env_id, seed, idx, capture_video, run_name): + env_id: str = "BreakoutNoFrameskip-v4" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 8 + """the number of parallel game environments""" + num_steps: int = 128 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = True + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.99 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 4 + """the number of mini-batches""" + update_epochs: int = 4 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.1 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.01 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" + + +def make_env(env_id, idx, capture_video, run_name): def thunk(): - env = gym.make(env_id) + if capture_video and idx == 0: + env = gym.make(env_id) + env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") + else: + env = gym.make(env_id) env = gym.wrappers.RecordEpisodeStatistics(env) if capture_video: if idx == 0: @@ -100,9 +106,6 @@ def thunk(): env = gym.wrappers.ResizeObservation(env, (84, 84)) env = gym.wrappers.GrayScaleObservation(env) env = gym.wrappers.FrameStack(env, 1) - env.seed(seed) - env.action_space.seed(seed) - env.observation_space.seed(seed) return env return thunk @@ -171,7 +174,10 @@ def get_action_and_value(self, x, lstm_state, done, action=None): if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -201,7 +207,7 @@ def get_action_and_value(self, x, lstm_state, done, action=None): # env setup envs = gym.vector.SyncVectorEnv( - [make_env(args.env_id, args.seed + i, i, args.capture_video, run_name) for i in range(args.num_envs)] + [make_env(args.env_id, i, args.capture_video, run_name) for i in range(args.num_envs)], ) assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" @@ -219,24 +225,24 @@ def get_action_and_value(self, x, lstm_state, done, action=None): # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) + next_obs, _ = envs.reset(seed=args.seed) + next_obs = torch.Tensor(next_obs).to(device) next_done = torch.zeros(args.num_envs).to(device) next_lstm_state = ( torch.zeros(agent.lstm.num_layers, args.num_envs, agent.lstm.hidden_size).to(device), torch.zeros(agent.lstm.num_layers, args.num_envs, agent.lstm.hidden_size).to(device), ) # hidden and cell states (see https://youtu.be/8HyCNIVRbSU) - num_updates = args.total_timesteps // args.batch_size - for update in range(1, num_updates + 1): + for iteration in range(1, args.num_iterations + 1): initial_lstm_state = (next_lstm_state[0].clone(), next_lstm_state[1].clone()) # Annealing the rate if instructed to do so. if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates + frac = 1.0 - (iteration - 1.0) / args.num_iterations lrnow = frac * args.learning_rate optimizer.param_groups[0]["lr"] = lrnow for step in range(0, args.num_steps): - global_step += 1 * args.num_envs + global_step += args.num_envs obs[step] = next_obs dones[step] = next_done @@ -248,16 +254,17 @@ def get_action_and_value(self, x, lstm_state, done, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminations, truncations, infos = envs.step(action.cpu().numpy()) + next_done = np.logical_or(terminations, truncations) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device) - for item in info: - if "episode" in item.keys(): - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) - break + if "final_info" in infos: + for info in infos["final_info"]: + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # bootstrap value if not done with torch.no_grad(): @@ -348,9 +355,8 @@ def get_action_and_value(self, x, lstm_state, done, action=None): nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) optimizer.step() - if args.target_kl is not None: - if approx_kl > args.target_kl: - break + if args.target_kl is not None and approx_kl > args.target_kl: + break y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() var_y = np.var(y_true) diff --git a/cleanrl/ppo_atari_multigpu.py b/cleanrl/ppo_atari_multigpu.py index 8955e1298..5fedd7881 100644 --- a/cleanrl/ppo_atari_multigpu.py +++ b/cleanrl/ppo_atari_multigpu.py @@ -1,17 +1,19 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_atari_multigpupy -import argparse import os import random import time import warnings -from distutils.util import strtobool +from dataclasses import dataclass, field +from typing import List, Literal -import gym +import gymnasium as gym import numpy as np import torch import torch.distributed as dist import torch.nn as nn import torch.optim as optim +import tyro +from rich.pretty import pprint from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter @@ -24,75 +26,89 @@ ) -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=8, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=128, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=4, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=4, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.1, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - parser.add_argument("--device-ids", nargs="+", default=[], - help="the device ids that subprocess workers will use") - parser.add_argument("--backend", type=str, default="gloo", choices=["gloo", "nccl", "mpi"], - help="the id of the environment") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args - - -def make_env(env_id, seed, idx, capture_video, run_name): + env_id: str = "BreakoutNoFrameskip-v4" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + local_num_envs: int = 8 + """the number of parallel game environments (in the local rank)""" + num_steps: int = 128 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = True + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.99 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 4 + """the number of mini-batches""" + update_epochs: int = 4 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.1 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.01 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + device_ids: List[int] = field(default_factory=lambda: []) + """the device ids that subprocess workers will use""" + backend: Literal["gloo", "nccl", "mpi"] = "gloo" + """the backend for distributed training""" + + # to be filled in runtime + local_batch_size: int = 0 + """the local batch size in the local rank (computed in runtime)""" + local_minibatch_size: int = 0 + """the local mini-batch size in the local rank (computed in runtime)""" + num_envs: int = 0 + """the number of parallel game environments (computed in runtime)""" + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" + world_size: int = 0 + """the number of processes (computed in runtime)""" + + +def make_env(env_id, idx, capture_video, run_name): def thunk(): - env = gym.make(env_id) + if capture_video and idx == 0: + env = gym.make(env_id) + env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") + else: + env = gym.make(env_id) env = gym.wrappers.RecordEpisodeStatistics(env) if capture_video: if idx == 0: @@ -106,9 +122,6 @@ def thunk(): env = gym.wrappers.ResizeObservation(env, (84, 84)) env = gym.wrappers.GrayScaleObservation(env) env = gym.wrappers.FrameStack(env, 4) - env.seed(seed) - env.action_space.seed(seed) - env.observation_space.seed(seed) return env return thunk @@ -152,15 +165,17 @@ def get_action_and_value(self, x, action=None): if __name__ == "__main__": # torchrun --standalone --nnodes=1 --nproc_per_node=2 ppo_atari_multigpu.py # taken from https://pytorch.org/docs/stable/elastic/run.html + args = tyro.cli(Args) local_rank = int(os.getenv("LOCAL_RANK", "0")) - world_size = int(os.getenv("WORLD_SIZE", "1")) - args = parse_args() - args.world_size = world_size - args.num_envs = int(args.num_envs / world_size) + args.world_size = int(os.getenv("WORLD_SIZE", "1")) + args.local_batch_size = int(args.local_num_envs * args.num_steps) + args.local_minibatch_size = int(args.local_batch_size // args.num_minibatches) + args.num_envs = args.local_num_envs * args.world_size args.batch_size = int(args.num_envs * args.num_steps) args.minibatch_size = int(args.batch_size // args.num_minibatches) - if world_size > 1: - dist.init_process_group(args.backend, rank=local_rank, world_size=world_size) + args.num_iterations = args.total_timesteps // args.batch_size + if args.world_size > 1: + dist.init_process_group(args.backend, rank=local_rank, world_size=args.world_size) else: warnings.warn( """ @@ -169,8 +184,6 @@ def get_action_and_value(self, x, action=None): E.g., `torchrun --standalone --nnodes=1 --nproc_per_node=2 ppo_atari_multigpu.py` """ ) - print(f"================================") - print(f"args.num_envs: {args.num_envs}, args.batch_size: {args.batch_size}, args.minibatch_size: {args.minibatch_size}") run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" writer = None if local_rank == 0: @@ -191,6 +204,7 @@ def get_action_and_value(self, x, action=None): "hyperparameters", "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), ) + pprint(args) # TRY NOT TO MODIFY: seeding # CRUCIAL: note that we needed to pass a different seed for each data parallelism worker @@ -201,18 +215,18 @@ def get_action_and_value(self, x, action=None): torch.backends.cudnn.deterministic = args.torch_deterministic if len(args.device_ids) > 0: - assert len(args.device_ids) == world_size, "you must specify the same number of device ids as `--nproc_per_node`" + assert len(args.device_ids) == args.world_size, "you must specify the same number of device ids as `--nproc_per_node`" device = torch.device(f"cuda:{args.device_ids[local_rank]}" if torch.cuda.is_available() and args.cuda else "cpu") else: device_count = torch.cuda.device_count() - if device_count < world_size: + if device_count < args.world_size: device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") else: device = torch.device(f"cuda:{local_rank}" if torch.cuda.is_available() and args.cuda else "cpu") # env setup envs = gym.vector.SyncVectorEnv( - [make_env(args.env_id, args.seed + i, i, args.capture_video, run_name) for i in range(args.num_envs)] + [make_env(args.env_id, i, args.capture_video, run_name) for i in range(args.local_num_envs)], ) assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" @@ -221,29 +235,29 @@ def get_action_and_value(self, x, action=None): optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) # ALGO Logic: Storage setup - obs = torch.zeros((args.num_steps, args.num_envs) + envs.single_observation_space.shape).to(device) - actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) - logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) - rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) - values = torch.zeros((args.num_steps, args.num_envs)).to(device) + obs = torch.zeros((args.num_steps, args.local_num_envs) + envs.single_observation_space.shape).to(device) + actions = torch.zeros((args.num_steps, args.local_num_envs) + envs.single_action_space.shape).to(device) + logprobs = torch.zeros((args.num_steps, args.local_num_envs)).to(device) + rewards = torch.zeros((args.num_steps, args.local_num_envs)).to(device) + dones = torch.zeros((args.num_steps, args.local_num_envs)).to(device) + values = torch.zeros((args.num_steps, args.local_num_envs)).to(device) # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // (args.batch_size * world_size) + next_obs, _ = envs.reset(seed=args.seed) + next_obs = torch.Tensor(next_obs).to(device) + next_done = torch.zeros(args.local_num_envs).to(device) - for update in range(1, num_updates + 1): + for iteration in range(1, args.num_iterations + 1): # Annealing the rate if instructed to do so. if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates + frac = 1.0 - (iteration - 1.0) / args.num_iterations lrnow = frac * args.learning_rate optimizer.param_groups[0]["lr"] = lrnow for step in range(0, args.num_steps): - global_step += 1 * args.num_envs * world_size + global_step += args.num_envs obs[step] = next_obs dones[step] = next_done @@ -255,19 +269,23 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, terminations, truncations, infos = envs.step(action.cpu().numpy()) + next_done = np.logical_or(terminations, truncations) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device) + + if not writer: + continue - for item in info: - if "episode" in item.keys() and local_rank == 0: - print(f"global_step={global_step}, episodic_return={item['episode']['r']}") - writer.add_scalar("charts/episodic_return", item["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", item["episode"]["l"], global_step) - break + if "final_info" in infos: + for info in infos["final_info"]: + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) print( - f"local_rank: {local_rank}, action.sum(): {action.sum()}, update: {update}, agent.actor.weight.sum(): {agent.actor.weight.sum()}" + f"local_rank: {local_rank}, action.sum(): {action.sum()}, iteration: {iteration}, agent.actor.weight.sum(): {agent.actor.weight.sum()}" ) # bootstrap value if not done with torch.no_grad(): @@ -294,12 +312,12 @@ def get_action_and_value(self, x, action=None): b_values = values.reshape(-1) # Optimizing the policy and value network - b_inds = np.arange(args.batch_size) + b_inds = np.arange(args.local_batch_size) clipfracs = [] for epoch in range(args.update_epochs): np.random.shuffle(b_inds) - for start in range(0, args.batch_size, args.minibatch_size): - end = start + args.minibatch_size + for start in range(0, args.local_batch_size, args.local_minibatch_size): + end = start + args.local_minibatch_size mb_inds = b_inds[start:end] _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds]) @@ -342,7 +360,7 @@ def get_action_and_value(self, x, action=None): optimizer.zero_grad() loss.backward() - if world_size > 1: + if args.world_size > 1: # batch allreduce ops: see https://github.com/entity-neural-network/incubator/pull/220 all_grads_list = [] for param in agent.parameters(): @@ -354,16 +372,15 @@ def get_action_and_value(self, x, action=None): for param in agent.parameters(): if param.grad is not None: param.grad.data.copy_( - all_grads[offset : offset + param.numel()].view_as(param.grad.data) / world_size + all_grads[offset : offset + param.numel()].view_as(param.grad.data) / args.world_size ) offset += param.numel() nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) optimizer.step() - if args.target_kl is not None: - if approx_kl > args.target_kl: - break + if args.target_kl is not None and approx_kl > args.target_kl: + break y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() var_y = np.var(y_true) diff --git a/cleanrl/ppo_continuous_action.py b/cleanrl/ppo_continuous_action.py index 0f2f3b033..b454521b6 100644 --- a/cleanrl/ppo_continuous_action.py +++ b/cleanrl/ppo_continuous_action.py @@ -1,85 +1,87 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_continuous_actionpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gymnasium as gym import numpy as np import torch import torch.nn as nn import torch.optim as optim +import tyro from torch.distributions.normal import Normal from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="HalfCheetah-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=1000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=3e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=2048, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=32, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=10, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.2, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.0, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args + env_id: str = "HalfCheetah-v4" + """the id of the environment""" + total_timesteps: int = 1000000 + """total timesteps of the experiments""" + learning_rate: float = 3e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + num_steps: int = 2048 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = True + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.99 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 32 + """the number of mini-batches""" + update_epochs: int = 10 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.2 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.0 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" def make_env(env_id, idx, capture_video, run_name, gamma): @@ -140,7 +142,10 @@ def get_action_and_value(self, x, action=None): if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -191,17 +196,16 @@ def get_action_and_value(self, x, action=None): next_obs, _ = envs.reset(seed=args.seed) next_obs = torch.Tensor(next_obs).to(device) next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // args.batch_size - for update in range(1, num_updates + 1): + for iteration in range(1, args.num_iterations + 1): # Annealing the rate if instructed to do so. if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates + frac = 1.0 - (iteration - 1.0) / args.num_iterations lrnow = frac * args.learning_rate optimizer.param_groups[0]["lr"] = lrnow for step in range(0, args.num_steps): - global_step += 1 * args.num_envs + global_step += args.num_envs obs[step] = next_obs dones[step] = next_done @@ -214,21 +218,16 @@ def get_action_and_value(self, x, action=None): # TRY NOT TO MODIFY: execute the game and log data. next_obs, reward, terminations, truncations, infos = envs.step(action.cpu().numpy()) - done = np.logical_or(terminations, truncations) + next_done = np.logical_or(terminations, truncations) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) - - # Only print when at least 1 env is done - if "final_info" not in infos: - continue + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device) - for info in infos["final_info"]: - # Skip the envs that are not done - if info is None: - continue - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) + if "final_info" in infos: + for info in infos["final_info"]: + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # bootstrap value if not done with torch.no_grad(): @@ -305,9 +304,8 @@ def get_action_and_value(self, x, action=None): nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) optimizer.step() - if args.target_kl is not None: - if approx_kl > args.target_kl: - break + if args.target_kl is not None and approx_kl > args.target_kl: + break y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() var_y = np.var(y_true) diff --git a/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py b/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py index ddf3cf898..9ba45735d 100644 --- a/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py +++ b/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py @@ -27,11 +27,10 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_continuous_action_isaacgympy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gym import isaacgym # noqa @@ -40,75 +39,77 @@ import torch import torch.nn as nn import torch.optim as optim +import tyro from torch.distributions.normal import Normal from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="Ant", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=30000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=0.0026, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=4096, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=16, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=2, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=4, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.2, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.0, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=2, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=1, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - - parser.add_argument("--reward-scaler", type=float, default=1, - help="the scale factor applied to the reward during training") - parser.add_argument("--record-video-step-frequency", type=int, default=1464, - help="the frequency at which to record the videos") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args + env_id: str = "Ant" + """the id of the environment""" + total_timesteps: int = 30000000 + """total timesteps of the experiments""" + learning_rate: float = 0.0026 + """the learning rate of the optimizer""" + num_envs: int = 4096 + """the number of parallel game environments""" + num_steps: int = 16 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = False + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.99 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 2 + """the number of mini-batches""" + update_epochs: int = 4 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.2 + """the surrogate clipping coefficient""" + clip_vloss: bool = False + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.0 + """coefficient of the entropy""" + vf_coef: float = 2 + """coefficient of the value function""" + max_grad_norm: float = 1 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + reward_scaler: float = 1 + """the scale factor applied to the reward during training""" + record_video_step_frequency: int = 1464 + """the frequency at which to record the videos""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" class RecordEpisodeStatisticsTorch(gym.Wrapper): @@ -189,7 +190,10 @@ def observation(self, obs): if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -262,17 +266,16 @@ def observation(self, obs): start_time = time.time() next_obs = envs.reset() next_done = torch.zeros(args.num_envs, dtype=torch.float).to(device) - num_updates = args.total_timesteps // args.batch_size - for update in range(1, num_updates + 1): + for iteration in range(1, args.num_iterations + 1): # Annealing the rate if instructed to do so. if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates + frac = 1.0 - (iteration - 1.0) / args.num_iterations lrnow = frac * args.learning_rate optimizer.param_groups[0]["lr"] = lrnow for step in range(0, args.num_steps): - global_step += 1 * args.num_envs + global_step += args.num_envs obs[step] = next_obs dones[step] = next_done @@ -372,9 +375,8 @@ def observation(self, obs): nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) optimizer.step() - if args.target_kl is not None: - if approx_kl > args.target_kl: - break + if args.target_kl is not None and approx_kl > args.target_kl: + break # TRY NOT TO MODIFY: record rewards for plotting purposes writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step) diff --git a/cleanrl/ppo_pettingzoo_ma_atari.py b/cleanrl/ppo_pettingzoo_ma_atari.py index bc51c703c..87b2b3123 100644 --- a/cleanrl/ppo_pettingzoo_ma_atari.py +++ b/cleanrl/ppo_pettingzoo_ma_atari.py @@ -33,7 +33,7 @@ def parse_args(): help="the wandb's project name") parser.add_argument("--wandb-entity", type=str, default=None, help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, + parser.add_argument("--capture_video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, help="whether to capture videos of the agent performances (check out `videos` folder)") # Algorithm specific arguments diff --git a/cleanrl/ppo_procgen.py b/cleanrl/ppo_procgen.py index 9a93eb0cd..0a13317da 100644 --- a/cleanrl/ppo_procgen.py +++ b/cleanrl/ppo_procgen.py @@ -1,80 +1,82 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_procgenpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gym import numpy as np import torch import torch.nn as nn import torch.optim as optim +import tyro from procgen import ProcgenEnv from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="starpilot", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=int(25e6), - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=64, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=256, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.999, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=8, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=3, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.2, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args + env_id: str = "starpilot" + """the id of the environment""" + total_timesteps: int = int(25e6) + """total timesteps of the experiments""" + learning_rate: float = 5e-4 + """the learning rate of the optimizer""" + num_envs: int = 64 + """the number of parallel game environments""" + num_steps: int = 256 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = False + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.999 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 8 + """the number of mini-batches""" + update_epochs: int = 3 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.2 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.01 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" def layer_init(layer, std=np.sqrt(2), bias_const=0.0): @@ -154,7 +156,10 @@ def get_action_and_value(self, x, action=None): if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -211,17 +216,16 @@ def get_action_and_value(self, x, action=None): start_time = time.time() next_obs = torch.Tensor(envs.reset()).to(device) next_done = torch.zeros(args.num_envs).to(device) - num_updates = args.total_timesteps // args.batch_size - for update in range(1, num_updates + 1): + for iteration in range(1, args.num_iterations + 1): # Annealing the rate if instructed to do so. if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates + frac = 1.0 - (iteration - 1.0) / args.num_iterations lrnow = frac * args.learning_rate optimizer.param_groups[0]["lr"] = lrnow for step in range(0, args.num_steps): - global_step += 1 * args.num_envs + global_step += args.num_envs obs[step] = next_obs dones[step] = next_done @@ -233,9 +237,9 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, next_done, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device) for item in info: if "episode" in item.keys(): @@ -319,9 +323,8 @@ def get_action_and_value(self, x, action=None): nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) optimizer.step() - if args.target_kl is not None: - if approx_kl > args.target_kl: - break + if args.target_kl is not None and approx_kl > args.target_kl: + break y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() var_y = np.var(y_true) diff --git a/cleanrl/ppo_rnd_envpool.py b/cleanrl/ppo_rnd_envpool.py index 32676d08b..0c1758274 100644 --- a/cleanrl/ppo_rnd_envpool.py +++ b/cleanrl/ppo_rnd_envpool.py @@ -1,10 +1,9 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo-rnd/#ppo_rnd_envpoolpy -import argparse import os import random import time from collections import deque -from distutils.util import strtobool +from dataclasses import dataclass import envpool import gym @@ -13,84 +12,86 @@ import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +import tyro from gym.wrappers.normalize import RunningMeanStd from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="MontezumaRevenge-v5", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=2000000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=1e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=128, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=128, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.999, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=4, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=4, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.1, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.001, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - parser.add_argument("--sticky-action", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, sticky action will be used") + env_id: str = "MontezumaRevenge-v5" + """the id of the environment""" + total_timesteps: int = 2000000000 + """total timesteps of the experiments""" + learning_rate: float = 1e-4 + """the learning rate of the optimizer""" + num_envs: int = 128 + """the number of parallel game environments""" + num_steps: int = 128 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = True + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.999 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 4 + """the number of mini-batches""" + update_epochs: int = 4 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.1 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.001 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" # RND arguments - parser.add_argument("--update-proportion", type=float, default=0.25, - help="proportion of exp used for predictor update") - parser.add_argument("--int-coef", type=float, default=1.0, - help="coefficient of extrinsic reward") - parser.add_argument("--ext-coef", type=float, default=2.0, - help="coefficient of intrinsic reward") - parser.add_argument("--int-gamma", type=float, default=0.99, - help="Intrinsic reward discount rate") - parser.add_argument("--num-iterations-obs-norm-init", type=int, default=50, - help="number of iterations to initialize the observations normalization parameters") - - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args + update_proportion: float = 0.25 + """proportion of exp used for predictor update""" + int_coef: float = 1.0 + """coefficient of extrinsic reward""" + ext_coef: float = 2.0 + """coefficient of intrinsic reward""" + int_gamma: float = 0.99 + """Intrinsic reward discount rate""" + num_iterations_obs_norm_init: int = 50 + """number of iterations to initialize the observations normalization parameters""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" class RecordEpisodeStatistics(gym.Wrapper): @@ -242,7 +243,10 @@ def update(self, rews): if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb diff --git a/cleanrl/qdagger_dqn_atari_impalacnn.py b/cleanrl/qdagger_dqn_atari_impalacnn.py index ef7922a91..6cde11c99 100644 --- a/cleanrl/qdagger_dqn_atari_impalacnn.py +++ b/cleanrl/qdagger_dqn_atari_impalacnn.py @@ -1,10 +1,9 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/qdagger/#qdagger_dqn_atari_jax_impalacnnpy -import argparse import os import random import time from collections import deque -from distutils.util import strtobool +from dataclasses import dataclass import gymnasium as gym import numpy as np @@ -12,6 +11,7 @@ import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +import tyro from huggingface_hub import hf_hub_download from rich.progress import track from stable_baselines3.common.atari_wrappers import ( @@ -28,81 +28,74 @@ from cleanrl_utils.evals.dqn_eval import evaluate -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=1e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--buffer-size", type=int, default=1000000, - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=1., - help="the target network update rate") - parser.add_argument("--target-network-frequency", type=int, default=1000, - help="the timesteps it takes to update the target network") - parser.add_argument("--batch-size", type=int, default=32, - help="the batch size of sample from the reply memory") - parser.add_argument("--start-e", type=float, default=1, - help="the starting epsilon for exploration") - parser.add_argument("--end-e", type=float, default=0.01, - help="the ending epsilon for exploration") - parser.add_argument("--exploration-fraction", type=float, default=0.10, - help="the fraction of `total-timesteps` it takes from start-e to go end-e") - parser.add_argument("--learning-starts", type=int, default=80000, - help="timestep to start learning") - parser.add_argument("--train-frequency", type=int, default=4, - help="the frequency of training") + env_id: str = "BreakoutNoFrameskip-v4" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 1e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + buffer_size: int = 1000000 + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 1.0 + """the target network update rate""" + target_network_frequency: int = 1000 + """the timesteps it takes to update the target network""" + batch_size: int = 32 + """the batch size of sample from the reply memory""" + start_e: float = 1.0 + """the starting epsilon for exploration""" + end_e: float = 0.01 + """the ending epsilon for exploration""" + exploration_fraction: float = 0.10 + """the fraction of `total-timesteps` it takes from start-e to go end-e""" + learning_starts: int = 80000 + """timestep to start learning""" + train_frequency: int = 4 + """the frequency of training""" # QDagger specific arguments - parser.add_argument("--teacher-policy-hf-repo", type=str, default=None, - help="the huggingface repo of the teacher policy") - parser.add_argument("--teacher-eval-episodes", type=int, default=10, - help="the number of episodes to run the teacher policy evaluate") - parser.add_argument("--teacher-steps", type=int, default=500000, - help="the number of steps to run the teacher policy to generate the replay buffer") - parser.add_argument("--offline-steps", type=int, default=500000, - help="the number of steps to run the student policy with the teacher's replay buffer") - parser.add_argument("--temperature", type=float, default=1.0, - help="the temperature parameter for qdagger") - args = parser.parse_args() - # fmt: on - assert args.num_envs == 1, "vectorized envs are not supported at the moment" - - if args.teacher_policy_hf_repo is None: - args.teacher_policy_hf_repo = f"cleanrl/{args.env_id}-dqn_atari-seed1" - - return args + teacher_policy_hf_repo: str = None + """the huggingface repo of the teacher policy""" + teacher_model_exp_name: str = "dqn_atari" + """the experiment name of the teacher model""" + teacher_eval_episodes: int = 10 + """the number of episodes to run the teacher policy evaluate""" + teacher_steps: int = 500000 + """the number of steps to run the teacher policy to generate the replay buffer""" + offline_steps: int = 500000 + """the number of steps to run the student policy with the teacher's replay buffer""" + temperature: float = 1.0 + """the temperature parameter for qdagger""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -212,7 +205,10 @@ def kl_divergence_with_logits(target_logits, prediction_logits): poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1" "ale-py==0.8.1" """ ) - args = parse_args() + args = tyro.cli(Args) + assert args.num_envs == 1, "vectorized envs are not supported at the moment" + if args.teacher_policy_hf_repo is None: + args.teacher_policy_hf_repo = f"cleanrl/{args.env_id}-{args.teacher_model_exp_name}-seed1" run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -252,7 +248,9 @@ def kl_divergence_with_logits(target_logits, prediction_logits): target_network.load_state_dict(q_network.state_dict()) # QDAGGER LOGIC: - teacher_model_path = hf_hub_download(repo_id=args.teacher_policy_hf_repo, filename="dqn_atari.cleanrl_model") + teacher_model_path = hf_hub_download( + repo_id=args.teacher_policy_hf_repo, filename=f"{args.teacher_model_exp_name}.cleanrl_model" + ) teacher_model = TeacherModel(envs).to(device) teacher_model.load_state_dict(torch.load(teacher_model_path, map_location=device)) teacher_model.eval() diff --git a/cleanrl/qdagger_dqn_atari_jax_impalacnn.py b/cleanrl/qdagger_dqn_atari_jax_impalacnn.py index ce55baf4c..7ecbb5c47 100644 --- a/cleanrl/qdagger_dqn_atari_jax_impalacnn.py +++ b/cleanrl/qdagger_dqn_atari_jax_impalacnn.py @@ -1,10 +1,9 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/qdagger/#qdagger_dqn_atari_jax_impalacnnpy -import argparse import os import random import time from collections import deque -from distutils.util import strtobool +from dataclasses import dataclass from typing import Sequence os.environ[ @@ -18,6 +17,7 @@ import jax.numpy as jnp import numpy as np import optax +import tyro from flax.training.train_state import TrainState from huggingface_hub import hf_hub_download from rich.progress import track @@ -35,77 +35,70 @@ from cleanrl_utils.evals.dqn_jax_eval import evaluate -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="BreakoutNoFrameskip-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=10000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=1e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--buffer-size", type=int, default=1000000, - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=1., - help="the target network update rate") - parser.add_argument("--target-network-frequency", type=int, default=1000, - help="the timesteps it takes to update the target network") - parser.add_argument("--batch-size", type=int, default=32, - help="the batch size of sample from the reply memory") - parser.add_argument("--start-e", type=float, default=1, - help="the starting epsilon for exploration") - parser.add_argument("--end-e", type=float, default=0.01, - help="the ending epsilon for exploration") - parser.add_argument("--exploration-fraction", type=float, default=0.10, - help="the fraction of `total-timesteps` it takes from start-e to go end-e") - parser.add_argument("--learning-starts", type=int, default=80000, - help="timestep to start learning") - parser.add_argument("--train-frequency", type=int, default=4, - help="the frequency of training") + env_id: str = "BreakoutNoFrameskip-v4" + """the id of the environment""" + total_timesteps: int = 10000000 + """total timesteps of the experiments""" + learning_rate: float = 1e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + buffer_size: int = 1000000 + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 1.0 + """the target network update rate""" + target_network_frequency: int = 1000 + """the timesteps it takes to update the target network""" + batch_size: int = 32 + """the batch size of sample from the reply memory""" + start_e: float = 1.0 + """the starting epsilon for exploration""" + end_e: float = 0.01 + """the ending epsilon for exploration""" + exploration_fraction: float = 0.10 + """the fraction of `total-timesteps` it takes from start-e to go end-e""" + learning_starts: int = 80000 + """timestep to start learning""" + train_frequency: int = 4 + """the frequency of training""" # QDagger specific arguments - parser.add_argument("--teacher-policy-hf-repo", type=str, default=None, - help="the huggingface repo of the teacher policy") - parser.add_argument("--teacher-eval-episodes", type=int, default=10, - help="the number of episodes to run the teacher policy evaluate") - parser.add_argument("--teacher-steps", type=int, default=500000, - help="the number of steps to run the teacher policy to generate the replay buffer") - parser.add_argument("--offline-steps", type=int, default=500000, - help="the number of steps to run the student policy with the teacher's replay buffer") - parser.add_argument("--temperature", type=float, default=1.0, - help="the temperature parameter for qdagger") - args = parser.parse_args() - # fmt: on - assert args.num_envs == 1, "vectorized envs are not supported at the moment" - - if args.teacher_policy_hf_repo is None: - args.teacher_policy_hf_repo = f"cleanrl/{args.env_id}-dqn_atari_jax-seed1" - - return args + teacher_policy_hf_repo: str = None + """the huggingface repo of the teacher policy""" + teacher_model_exp_name: str = "dqn_atari_jax" + """the experiment name of the teacher model""" + teacher_eval_episodes: int = 10 + """the number of episodes to run the teacher policy evaluate""" + teacher_steps: int = 500000 + """the number of steps to run the teacher policy to generate the replay buffer""" + offline_steps: int = 500000 + """the number of steps to run the student policy with the teacher's replay buffer""" + temperature: float = 1.0 + """the temperature parameter for qdagger""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -205,7 +198,10 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1" "ale-py==0.8.1" """ ) - args = parse_args() + args = tyro.cli(Args) + assert args.num_envs == 1, "vectorized envs are not supported at the moment" + if args.teacher_policy_hf_repo is None: + args.teacher_policy_hf_repo = f"cleanrl/{args.env_id}-{args.teacher_model_exp_name}-seed1" run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -248,7 +244,9 @@ def linear_schedule(start_e: float, end_e: float, duration: int, t: int): q_network.apply = jax.jit(q_network.apply) # QDAGGER LOGIC: - teacher_model_path = hf_hub_download(repo_id=args.teacher_policy_hf_repo, filename="dqn_atari_jax.cleanrl_model") + teacher_model_path = hf_hub_download( + repo_id=args.teacher_policy_hf_repo, filename=f"{args.teacher_model_exp_name}.cleanrl_model" + ) teacher_model = TeacherModel(action_dim=envs.single_action_space.n) teacher_model_key = jax.random.PRNGKey(args.seed) teacher_params = teacher_model.init(teacher_model_key, envs.observation_space.sample()) diff --git a/cleanrl/rpo_continuous_action.py b/cleanrl/rpo_continuous_action.py index 919ee72ae..6db3d696b 100644 --- a/cleanrl/rpo_continuous_action.py +++ b/cleanrl/rpo_continuous_action.py @@ -1,81 +1,83 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/rpo/#rpo_continuous_actionpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gymnasium as gym import numpy as np import torch import torch.nn as nn import torch.optim as optim +import tyro from torch.distributions.normal import Normal from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="HalfCheetah-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=8000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=3e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=1, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=2048, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=32, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=10, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.2, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.0, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - parser.add_argument("--rpo-alpha", type=float, default=0.5, - help="the alpha parameter for RPO") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args + env_id: str = "HalfCheetah-v4" + """the id of the environment""" + total_timesteps: int = 8000000 + """total timesteps of the experiments""" + learning_rate: float = 3e-4 + """the learning rate of the optimizer""" + num_envs: int = 1 + """the number of parallel game environments""" + num_steps: int = 2048 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = True + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.99 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 32 + """the number of mini-batches""" + update_epochs: int = 10 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.2 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.0 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + rpo_alpha: float = 0.5 + """the alpha parameter for RPO""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" def make_env(env_id, idx, capture_video, run_name, gamma): @@ -143,7 +145,10 @@ def get_action_and_value(self, x, action=None): if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -221,17 +226,12 @@ def get_action_and_value(self, x, action=None): rewards[step] = torch.tensor(reward).to(device).view(-1) next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) - # Only print when at least 1 env is done - if "final_info" not in infos: - continue - - for info in infos["final_info"]: - # Skip the envs that are not done - if info is None: - continue - print(f"global_step={global_step}, episodic_return={info['episode']['r']}") - writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) - writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) + if "final_info" in infos: + for info in infos["final_info"]: + if info and "episode" in info: + print(f"global_step={global_step}, episodic_return={info['episode']['r']}") + writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step) + writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step) # bootstrap value if not done with torch.no_grad(): diff --git a/cleanrl/sac_atari.py b/cleanrl/sac_atari.py index f7f4ccb99..36c8c5d59 100644 --- a/cleanrl/sac_atari.py +++ b/cleanrl/sac_atari.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/sac/#sac_ataripy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gymnasium as gym import numpy as np @@ -11,6 +10,7 @@ import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +import tyro from stable_baselines3.common.atari_wrappers import ( ClipRewardEnv, EpisodicLifeEnv, @@ -23,58 +23,54 @@ from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="weather to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="BeamRiderNoFrameskip-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=5000000, - help="total timesteps of the experiments") - parser.add_argument("--buffer-size", type=int, default=int(1e6), - help="the replay memory buffer size") # smaller than in original paper but evaluation is done only for 100k steps anyway - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=1.0, - help="target smoothing coefficient (default: 1)") # Default is 1 to perform replacement update - parser.add_argument("--batch-size", type=int, default=64, - help="the batch size of sample from the reply memory") - parser.add_argument("--learning-starts", type=int, default=2e4, - help="timestep to start learning") - parser.add_argument("--policy-lr", type=float, default=3e-4, - help="the learning rate of the policy network optimizer") - parser.add_argument("--q-lr", type=float, default=3e-4, - help="the learning rate of the Q network network optimizer") - parser.add_argument("--update-frequency", type=int, default=4, - help="the frequency of training updates") - parser.add_argument("--target-network-frequency", type=int, default=8000, - help="the frequency of updates for the target networks") - parser.add_argument("--alpha", type=float, default=0.2, - help="Entropy regularization coefficient.") - parser.add_argument("--autotune", type=lambda x:bool(strtobool(x)), default=True, nargs="?", const=True, - help="automatic tuning of the entropy coefficient") - parser.add_argument("--target-entropy-scale", type=float, default=0.89, - help="coefficient for scaling the autotune entropy target") - args = parser.parse_args() - # fmt: on - return args + env_id: str = "BeamRiderNoFrameskip-v4" + """the id of the environment""" + total_timesteps: int = 5000000 + """total timesteps of the experiments""" + buffer_size: int = int(1e6) + """the replay memory buffer size""" # smaller than in original paper but evaluation is done only for 100k steps anyway + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 1.0 + """target smoothing coefficient (default: 1)""" + batch_size: int = 64 + """the batch size of sample from the reply memory""" + learning_starts: int = 2e4 + """timestep to start learning""" + policy_lr: float = 3e-4 + """the learning rate of the policy network optimizer""" + q_lr: float = 3e-4 + """the learning rate of the Q network network optimizer""" + update_frequency: int = 4 + """the frequency of training updates""" + target_network_frequency: int = 8000 + """the frequency of updates for the target networks""" + alpha: float = 0.2 + """Entropy regularization coefficient.""" + autotune: bool = True + """automatic tuning of the entropy coefficient""" + target_entropy_scale: float = 0.89 + """coefficient for scaling the autotune entropy target""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -184,7 +180,7 @@ def get_action(self, x): poetry run pip install "stable_baselines3==2.0.0a1" "gymnasium[atari,accept-rom-license]==0.28.1" "ale-py==0.8.1" """ ) - args = parse_args() + args = tyro.cli(Args) run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb diff --git a/cleanrl/sac_continuous_action.py b/cleanrl/sac_continuous_action.py index a12beec64..019608442 100644 --- a/cleanrl/sac_continuous_action.py +++ b/cleanrl/sac_continuous_action.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/sac/#sac_continuous_actionpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gymnasium as gym import numpy as np @@ -11,62 +10,59 @@ import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +import tyro from stable_baselines3.common.buffers import ReplayBuffer from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="Hopper-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=1000000, - help="total timesteps of the experiments") - parser.add_argument("--buffer-size", type=int, default=int(1e6), - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=0.005, - help="target smoothing coefficient (default: 0.005)") - parser.add_argument("--batch-size", type=int, default=256, - help="the batch size of sample from the reply memory") - parser.add_argument("--learning-starts", type=int, default=5e3, - help="timestep to start learning") - parser.add_argument("--policy-lr", type=float, default=3e-4, - help="the learning rate of the policy network optimizer") - parser.add_argument("--q-lr", type=float, default=1e-3, - help="the learning rate of the Q network network optimizer") - parser.add_argument("--policy-frequency", type=int, default=2, - help="the frequency of training policy (delayed)") - parser.add_argument("--target-network-frequency", type=int, default=1, # Denis Yarats' implementation delays this by 2. - help="the frequency of updates for the target nerworks") - parser.add_argument("--noise-clip", type=float, default=0.5, - help="noise clip parameter of the Target Policy Smoothing Regularization") - parser.add_argument("--alpha", type=float, default=0.2, - help="Entropy regularization coefficient.") - parser.add_argument("--autotune", type=lambda x:bool(strtobool(x)), default=True, nargs="?", const=True, - help="automatic tuning of the entropy coefficient") - args = parser.parse_args() - # fmt: on - return args + env_id: str = "Hopper-v4" + """the environment id of the task""" + total_timesteps: int = 1000000 + """total timesteps of the experiments""" + buffer_size: int = int(1e6) + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 0.005 + """target smoothing coefficient (default: 0.005)""" + batch_size: int = 256 + """the batch size of sample from the reply memory""" + learning_starts: int = 5e3 + """timestep to start learning""" + policy_lr: float = 3e-4 + """the learning rate of the policy network optimizer""" + q_lr: float = 1e-3 + """the learning rate of the Q network network optimizer""" + policy_frequency: int = 2 + """the frequency of training policy (delayed)""" + target_network_frequency: int = 1 # Denis Yarats' implementation delays this by 2. + """the frequency of updates for the target nerworks""" + noise_clip: float = 0.5 + """noise clip parameter of the Target Policy Smoothing Regularization""" + alpha: float = 0.2 + """Entropy regularization coefficient.""" + autotune: bool = True + """automatic tuning of the entropy coefficient""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -153,7 +149,7 @@ def get_action(self, x): """ ) - args = parse_args() + args = tyro.cli(Args) run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb diff --git a/cleanrl/td3_continuous_action.py b/cleanrl/td3_continuous_action.py index 837e27faf..418c4b3b3 100644 --- a/cleanrl/td3_continuous_action.py +++ b/cleanrl/td3_continuous_action.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/td3/#td3_continuous_actionpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import gymnasium as gym import numpy as np @@ -11,64 +10,61 @@ import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +import tyro from stable_baselines3.common.buffers import ReplayBuffer from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="Hopper-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=1000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=3e-4, - help="the learning rate of the optimizer") - parser.add_argument("--buffer-size", type=int, default=int(1e6), - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=0.005, - help="target smoothing coefficient (default: 0.005)") - parser.add_argument("--batch-size", type=int, default=256, - help="the batch size of sample from the reply memory") - parser.add_argument("--policy-noise", type=float, default=0.2, - help="the scale of policy noise") - parser.add_argument("--exploration-noise", type=float, default=0.1, - help="the scale of exploration noise") - parser.add_argument("--learning-starts", type=int, default=25e3, - help="timestep to start learning") - parser.add_argument("--policy-frequency", type=int, default=2, - help="the frequency of training policy (delayed)") - parser.add_argument("--noise-clip", type=float, default=0.5, - help="noise clip parameter of the Target Policy Smoothing Regularization") - args = parser.parse_args() - # fmt: on - return args + env_id: str = "Hopper-v4" + """the id of the environment""" + total_timesteps: int = 1000000 + """total timesteps of the experiments""" + learning_rate: float = 3e-4 + """the learning rate of the optimizer""" + buffer_size: int = int(1e6) + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 0.005 + """target smoothing coefficient (default: 0.005)""" + batch_size: int = 256 + """the batch size of sample from the reply memory""" + policy_noise: float = 0.2 + """the scale of policy noise""" + exploration_noise: float = 0.1 + """the scale of exploration noise""" + learning_starts: int = 25e3 + """timestep to start learning""" + policy_frequency: int = 2 + """the frequency of training policy (delayed)""" + noise_clip: float = 0.5 + """noise clip parameter of the Target Policy Smoothing Regularization""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -132,7 +128,7 @@ def forward(self, x): """ ) - args = parse_args() + args = tyro.cli(Args) run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb diff --git a/cleanrl/td3_continuous_action_jax.py b/cleanrl/td3_continuous_action_jax.py index 3c584c6f3..b69f0a0d1 100644 --- a/cleanrl/td3_continuous_action_jax.py +++ b/cleanrl/td3_continuous_action_jax.py @@ -1,9 +1,8 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/td3/#td3_continuous_action_jaxpy -import argparse import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass import flax import flax.linen as nn @@ -12,61 +11,58 @@ import jax.numpy as jnp import numpy as np import optax +import tyro from flax.training.train_state import TrainState from stable_baselines3.common.buffers import ReplayBuffer from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") - parser.add_argument("--save-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to save model into the `runs/{run_name}` folder") - parser.add_argument("--upload-model", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to upload the saved model to huggingface") - parser.add_argument("--hf-entity", type=str, default="", - help="the user or org name of the model repository from the Hugging Face Hub") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" + save_model: bool = False + """whether to save model into the `runs/{run_name}` folder""" + upload_model: bool = False + """whether to upload the saved model to huggingface""" + hf_entity: str = "" + """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="HalfCheetah-v4", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=1000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=3e-4, - help="the learning rate of the optimizer") - parser.add_argument("--buffer-size", type=int, default=int(1e6), - help="the replay memory buffer size") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--tau", type=float, default=0.005, - help="target smoothing coefficient (default: 0.005)") - parser.add_argument("--policy-noise", type=float, default=0.2, - help="the scale of policy noise") - parser.add_argument("--batch-size", type=int, default=256, - help="the batch size of sample from the reply memory") - parser.add_argument("--exploration-noise", type=float, default=0.1, - help="the scale of exploration noise") - parser.add_argument("--learning-starts", type=int, default=25e3, - help="timestep to start learning") - parser.add_argument("--policy-frequency", type=int, default=2, - help="the frequency of training policy (delayed)") - parser.add_argument("--noise-clip", type=float, default=0.5, - help="noise clip parameter of the Target Policy Smoothing Regularization") - args = parser.parse_args() - # fmt: on - return args + env_id: str = "Hopper-v4" + """the id of the environment""" + total_timesteps: int = 1000000 + """total timesteps of the experiments""" + learning_rate: float = 3e-4 + """the learning rate of the optimizer""" + buffer_size: int = int(1e6) + """the replay memory buffer size""" + gamma: float = 0.99 + """the discount factor gamma""" + tau: float = 0.005 + """target smoothing coefficient (default: 0.005)""" + batch_size: int = 256 + """the batch size of sample from the reply memory""" + policy_noise: float = 0.2 + """the scale of policy noise""" + exploration_noise: float = 0.1 + """the scale of exploration noise""" + learning_starts: int = 25e3 + """timestep to start learning""" + policy_frequency: int = 2 + """the frequency of training policy (delayed)""" + noise_clip: float = 0.5 + """noise clip parameter of the Target Policy Smoothing Regularization""" def make_env(env_id, seed, idx, capture_video, run_name): @@ -126,7 +122,7 @@ class TrainState(TrainState): poetry run pip install "stable_baselines3==2.0.0a1" """ ) - args = parse_args() + args = tyro.cli(Args) run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb diff --git a/cleanrl_utils/benchmark.py b/cleanrl_utils/benchmark.py index 5274810ba..042a223f7 100644 --- a/cleanrl_utils/benchmark.py +++ b/cleanrl_utils/benchmark.py @@ -1,49 +1,74 @@ -import argparse +import math import os import shlex import subprocess -from distutils.util import strtobool +import uuid +from dataclasses import dataclass +from typing import List, Optional import requests +import tyro -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--env-ids", nargs="+", default=["CartPole-v1", "Acrobot-v1", "MountainCar-v0"], - help="the ids of the environment to benchmark") - parser.add_argument("--command", type=str, default="poetry run python cleanrl/ppo.py", - help="the command to run") - parser.add_argument("--num-seeds", type=int, default=3, - help="the number of random seeds") - parser.add_argument("--start-seed", type=int, default=1, - help="the number of the starting seed") - parser.add_argument("--workers", type=int, default=0, - help="the number of workers to run benchmark experimenets") - parser.add_argument("--auto-tag", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, the runs will be tagged with git tags, commit, and pull request number if possible") - args = parser.parse_args() - # fmt: on - return args +@dataclass +class Args: + env_ids: List[str] + """the ids of the environment to compare""" + command: str + """the command to run""" + num_seeds: int = 3 + """the number of random seeds""" + start_seed: int = 1 + """the number of the starting seed""" + workers: int = 0 + """the number of workers to run benchmark experimenets""" + auto_tag: bool = True + """if toggled, the runs will be tagged with git tags, commit, and pull request number if possible""" + slurm_template_path: Optional[str] = None + """the path to the slurm template file (see docs for more details)""" + slurm_gpus_per_task: Optional[int] = None + """the number of gpus per task to use for slurm jobs""" + slurm_total_cpus: Optional[int] = None + """the number of gpus per task to use for slurm jobs""" + slurm_ntasks: Optional[int] = None + """the number of tasks to use for slurm jobs""" + slurm_nodes: Optional[int] = None + """the number of nodes to use for slurm jobs""" def run_experiment(command: str): command_list = shlex.split(command) print(f"running {command}") - fd = subprocess.Popen(command_list) - return_code = fd.wait() - assert return_code == 0 + + # Use subprocess.PIPE to capture the output + fd = subprocess.Popen(command_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, errors = fd.communicate() + + return_code = fd.returncode + assert return_code == 0, f"Command failed with error: {errors.decode('utf-8')}" + + # Convert bytes to string and strip leading/trailing whitespaces + return output.decode("utf-8").strip() def autotag() -> str: wandb_tag = "" print("autotag feature is enabled") + git_tag = "" try: git_tag = subprocess.check_output(["git", "describe", "--tags"]).decode("ascii").strip() - wandb_tag = f"{git_tag}" print(f"identified git tag: {git_tag}") - except subprocess.CalledProcessError: - return wandb_tag + except subprocess.CalledProcessError as e: + print(e) + if len(git_tag) == 0: + try: + count = int(subprocess.check_output(["git", "rev-list", "--count", "HEAD"]).decode("ascii").strip()) + hash = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("ascii").strip() + git_tag = f"no-tag-{count}-g{hash}" + print(f"identified git tag: {git_tag}") + except subprocess.CalledProcessError as e: + print(e) + wandb_tag = git_tag git_commit = subprocess.check_output(["git", "rev-parse", "--verify", "HEAD"]).decode("ascii").strip() try: @@ -63,16 +88,16 @@ def autotag() -> str: if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) if args.auto_tag: - if "WANDB_TAGS" in os.environ: - raise ValueError( - "WANDB_TAGS is already set. Please unset it before running this script or run the script with --auto-tag False" - ) + existing_wandb_tag = os.environ.get("WANDB_TAGS", "") wandb_tag = autotag() if len(wandb_tag) > 0: - os.environ["WANDB_TAGS"] = wandb_tag - + if len(existing_wandb_tag) > 0: + os.environ["WANDB_TAGS"] = ",".join([existing_wandb_tag, wandb_tag]) + else: + os.environ["WANDB_TAGS"] = wandb_tag + print("WANDB_TAGS: ", os.environ.get("WANDB_TAGS", "")) commands = [] for seed in range(0, args.num_seeds): for env_id in args.env_ids: @@ -82,7 +107,7 @@ def autotag() -> str: for command in commands: print(command) - if args.workers > 0: + if args.workers > 0 and args.slurm_template_path is None: from concurrent.futures import ThreadPoolExecutor executor = ThreadPoolExecutor(max_workers=args.workers, thread_name_prefix="cleanrl-benchmark-worker-") @@ -91,3 +116,37 @@ def autotag() -> str: executor.shutdown(wait=True) else: print("not running the experiments because --workers is set to 0; just printing the commands to run") + + # SLURM logic + if args.slurm_template_path is not None: + if not os.path.exists("slurm"): + os.makedirs("slurm") + if not os.path.exists("slurm/logs"): + os.makedirs("slurm/logs") + print("======= slurm commands to run:") + with open(args.slurm_template_path) as f: + slurm_template = f.read() + slurm_template = slurm_template.replace("{{array}}", f"0-{len(commands) - 1}%{args.workers}") + slurm_template = slurm_template.replace("{{env_ids}}", f"({' '.join(args.env_ids)})") + slurm_template = slurm_template.replace( + "{{seeds}}", + f"({' '.join([str(args.start_seed + int(seed)) for seed in range(args.num_seeds)])})", + ) + slurm_template = slurm_template.replace("{{len_seeds}}", f"{args.num_seeds}") + slurm_template = slurm_template.replace("{{command}}", args.command) + slurm_template = slurm_template.replace("{{gpus_per_task}}", f"{args.slurm_gpus_per_task}") + total_gpus = args.slurm_gpus_per_task * args.slurm_ntasks + slurm_cpus_per_gpu = math.ceil(args.slurm_total_cpus / total_gpus) + slurm_template = slurm_template.replace("{{cpus_per_gpu}}", f"{slurm_cpus_per_gpu}") + slurm_template = slurm_template.replace("{{ntasks}}", f"{args.slurm_ntasks}") + if args.slurm_nodes is not None: + slurm_template = slurm_template.replace("{{nodes}}", f"#SBATCH --nodes={args.slurm_nodes}") + else: + slurm_template = slurm_template.replace("{{nodes}}", "") + filename = str(uuid.uuid4()) + open(os.path.join("slurm", f"{filename}.slurm"), "w").write(slurm_template) + slurm_path = os.path.join("slurm", f"{filename}.slurm") + print(f"saving command in {slurm_path}") + if args.workers > 0: + job_id = run_experiment(f"sbatch --parsable {slurm_path}") + print(f"Job ID: {job_id}") diff --git a/cleanrl_utils/enjoy.py b/cleanrl_utils/enjoy.py index a9ab51b78..afc869669 100644 --- a/cleanrl_utils/enjoy.py +++ b/cleanrl_utils/enjoy.py @@ -1,5 +1,4 @@ import argparse -from distutils.util import strtobool from huggingface_hub import hf_hub_download @@ -21,8 +20,6 @@ def parse_args(): help="the id of the environment") parser.add_argument("--eval-episodes", type=int, default=10, help="the number of evaluation episodes") - parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") args = parser.parse_args() # fmt: on return args diff --git a/cleanrl_utils/tuner.py b/cleanrl_utils/tuner.py index ed576e559..d72af58ab 100644 --- a/cleanrl_utils/tuner.py +++ b/cleanrl_utils/tuner.py @@ -87,7 +87,7 @@ def objective(trial: optuna.Trial): for seed in range(num_seeds): normalized_scores = [] for env_id in self.target_scores.keys(): - sys.argv = algo_command + [f"--env-id={env_id}", f"--seed={seed}", "--track=False"] + sys.argv = algo_command + [f"--env-id={env_id}", f"--seed={seed}"] with HiddenPrints(): experiment = runpy.run_path(path_name=self.script, run_name="__main__") diff --git a/cloud/examples/submit_exp.sh b/cloud/examples/submit_exp.sh index 56344cea7..de54cec3c 100644 --- a/cloud/examples/submit_exp.sh +++ b/cloud/examples/submit_exp.sh @@ -13,7 +13,7 @@ python -m cleanrl.submit_exp --exp-script offline_dqn_cql_atari_visual.sh \ --num-hours 48.0 \ --submit-aws $SUBMIT_AWS -python ppg_procgen_impala_cnn.py --env-id starpilot --capture-video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1 +python ppg_procgen_impala_cnn.py --env-id starpilot --capture_video --track --wandb-entity cleanrl --wandb-project cleanrl.benchmark --seed 1 python -m cleanrl.utils.submit_exp --exp-script ppo.sh \ --algo ppo.py \ diff --git a/docs/advanced/hyperparameter-tuning.md b/docs/advanced/hyperparameter-tuning.md index 65eb7ffc5..849632969 100644 --- a/docs/advanced/hyperparameter-tuning.md +++ b/docs/advanced/hyperparameter-tuning.md @@ -23,12 +23,12 @@ tuner = Tuner( "Acrobot-v1": [-500, 0], }, params_fn=lambda trial: { - "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003), + "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True), "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]), "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]), "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]), - "vf-coef": trial.suggest_uniform("vf-coef", 0, 5), - "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5), + "vf-coef": trial.suggest_float("vf-coef", 0, 5), + "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5), "total-timesteps": 100000, "num-envs": 16, }, @@ -143,12 +143,12 @@ tuner = Tuner( "CartPole-v1": None, }, params_fn=lambda trial: { - "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003), + "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True), "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]), "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]), "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]), - "vf-coef": trial.suggest_uniform("vf-coef", 0, 5), - "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5), + "vf-coef": trial.suggest_float("vf-coef", 0, 5), + "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5), "total-timesteps": 100000, "num-envs": 16, }, @@ -183,12 +183,12 @@ tuner = Tuner( "CartPole-v1": None, }, params_fn=lambda trial: { - "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003), + "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True), "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]), "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]), "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]), - "vf-coef": trial.suggest_uniform("vf-coef", 0, 5), - "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5), + "vf-coef": trial.suggest_float("vf-coef", 0, 5), + "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5), "total-timesteps": 100000, "num-envs": 16, }, @@ -222,12 +222,12 @@ tuner = Tuner( "CartPole-v1": None, }, params_fn=lambda trial: { - "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003), + "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True), "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]), "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]), "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]), - "vf-coef": trial.suggest_uniform("vf-coef", 0, 5), - "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5), + "vf-coef": trial.suggest_float("vf-coef", 0, 5), + "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5), "total-timesteps": 100000, "num-envs": 16, }, diff --git a/docs/advanced/resume-training.md b/docs/advanced/resume-training.md index a3f8a7f59..97ded751e 100644 --- a/docs/advanced/resume-training.md +++ b/docs/advanced/resume-training.md @@ -27,7 +27,7 @@ for update in range(starting_update, num_updates + 1): Then we could run the following to train our agents ``` -python ppo_gridnet.py --prod-mode --capture-video +python ppo_gridnet.py --prod-mode --capture_video ``` If the training was terminated early, we can still see the last updated model `agent.pt` in W&B like in this URL [https://wandb.ai/costa-huang/cleanRL/runs/21421tda/files](https://wandb.ai/costa-huang/cleanRL/runs/21421tda/files) or as follows @@ -72,5 +72,5 @@ for update in range(starting_update, num_updates + 1): To resume training, note the ID of the experiment is `21421tda` as in the URL [https://wandb.ai/costa-huang/cleanRL/runs/21421tda](https://wandb.ai/costa-huang/cleanRL/runs/21421tda), so we need to pass in the ID via environment variable to trigger the resume mode of W&B: ``` -WANDB_RUN_ID=21421tda WANDB_RESUME=must python ppo_gridnet.py --prod-mode --capture-video +WANDB_RUN_ID=21421tda WANDB_RESUME=must python ppo_gridnet.py --prod-mode --capture_video ``` \ No newline at end of file diff --git a/docs/benchmark/ddpg.md b/docs/benchmark/ddpg.md new file mode 100644 index 000000000..cb03c0f31 --- /dev/null +++ b/docs/benchmark/ddpg.md @@ -0,0 +1,8 @@ +| | openrlbenchmark/cleanrl/ddpg_continuous_action ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/ddpg_continuous_action_jax ({'tag': ['pr-424']}) | +|:--------------------|:-----------------------------------------------------------------------|:---------------------------------------------------------------------------| +| HalfCheetah-v4 | 10374.07 ± 157.37 | 8638.60 ± 1954.46 | +| Walker2d-v4 | 1240.16 ± 390.10 | 1427.23 ± 104.91 | +| Hopper-v4 | 1576.78 ± 818.98 | 1208.52 ± 659.22 | +| InvertedPendulum-v4 | 642.68 ± 69.56 | 804.30 ± 87.60 | +| Humanoid-v4 | 1699.56 ± 694.22 | 1513.61 ± 248.60 | +| Pusher-v4 | -77.30 ± 38.78 | -38.56 ± 4.47 | \ No newline at end of file diff --git a/docs/benchmark/ppo.md b/docs/benchmark/ppo.md new file mode 100644 index 000000000..07fa4506c --- /dev/null +++ b/docs/benchmark/ppo.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo ({'tag': ['pr-424']}) | +|:---------------|:----------------------------------------------------| +| CartPole-v1 | 490.04 ± 6.12 | +| Acrobot-v1 | -86.36 ± 1.32 | +| MountainCar-v0 | -200.00 ± 0.00 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari.md b/docs/benchmark/ppo_atari.md new file mode 100644 index 000000000..f8a0094cd --- /dev/null +++ b/docs/benchmark/ppo_atari.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari ({'tag': ['pr-424']}) | +|:------------------------|:----------------------------------------------------------| +| PongNoFrameskip-v4 | 20.36 ± 0.20 | +| BeamRiderNoFrameskip-v4 | 1915.93 ± 484.58 | +| BreakoutNoFrameskip-v4 | 414.66 ± 28.09 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari_envpool.md b/docs/benchmark/ppo_atari_envpool.md new file mode 100644 index 000000000..4f6f20afc --- /dev/null +++ b/docs/benchmark/ppo_atari_envpool.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari_envpool ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/ppo_atari ({'tag': ['pr-424']}) | +|:-------------|:------------------------------------------------------------------|:----------------------------------------------------------| +| Pong-v5 | 20.45 ± 0.09 | 20.36 ± 0.20 | +| BeamRider-v5 | 2501.85 ± 210.52 | 1915.93 ± 484.58 | +| Breakout-v5 | 211.24 ± 151.84 | 414.66 ± 28.09 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari_envpool_runtimes.md b/docs/benchmark/ppo_atari_envpool_runtimes.md new file mode 100644 index 000000000..dcd106cbe --- /dev/null +++ b/docs/benchmark/ppo_atari_envpool_runtimes.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari_envpool ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/ppo_atari ({'tag': ['pr-424']}) | +|:-------------|------------------------------------------------------------------:|----------------------------------------------------------:| +| Pong-v5 | 178.375 | 281.071 | +| BeamRider-v5 | 182.944 | 284.941 | +| Breakout-v5 | 151.384 | 264.077 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari_envpool_xla_jax.md b/docs/benchmark/ppo_atari_envpool_xla_jax.md new file mode 100644 index 000000000..e85cd8e55 --- /dev/null +++ b/docs/benchmark/ppo_atari_envpool_xla_jax.md @@ -0,0 +1,59 @@ +| | openrlbenchmark/envpool-atari/ppo_atari_envpool_xla_jax ({}) | openrlbenchmark/baselines/baselines-ppo2-cnn ({}) | +|:--------------------|:---------------------------------------------------------------|:----------------------------------------------------| +| Alien-v5 | 1736.39 ± 68.65 | 1705.80 ± 439.74 | +| Amidar-v5 | 653.53 ± 44.06 | 585.99 ± 52.92 | +| Assault-v5 | 6791.74 ± 420.03 | 4878.67 ± 815.64 | +| Asterix-v5 | 4820.33 ± 1091.83 | 3738.50 ± 745.13 | +| Asteroids-v5 | 1633.67 ± 247.21 | 1556.90 ± 151.20 | +| Atlantis-v5 | 3778458.33 ± 117680.68 | 2036749.00 ± 95929.75 | +| BankHeist-v5 | 1195.44 ± 18.54 | 1213.47 ± 14.46 | +| BattleZone-v5 | 24283.75 ± 1841.94 | 19980.00 ± 1355.21 | +| BeamRider-v5 | 2478.44 ± 336.55 | 2835.71 ± 387.92 | +| Berzerk-v5 | 992.88 ± 196.90 | 1049.77 ± 144.58 | +| Bowling-v5 | 51.62 ± 13.53 | 59.66 ± 0.62 | +| Boxing-v5 | 92.68 ± 1.41 | 93.32 ± 0.36 | +| Breakout-v5 | 430.09 ± 8.12 | 405.73 ± 11.47 | +| Centipede-v5 | 3309.34 ± 325.05 | 3688.54 ± 412.24 | +| ChopperCommand-v5 | 5642.83 ± 802.34 | 816.33 ± 114.14 | +| CrazyClimber-v5 | 118763.04 ± 4915.34 | 119344.67 ± 4902.83 | +| Defender-v5 | 48558.98 ± 4466.76 | 50161.67 ± 4477.49 | +| DemonAttack-v5 | 29283.83 ± 7007.31 | 13788.43 ± 1313.44 | +| DoubleDunk-v5 | -6.81 ± 0.24 | -12.96 ± 0.31 | +| Enduro-v5 | 1297.23 ± 143.71 | 986.69 ± 25.28 | +| FishingDerby-v5 | 21.21 ± 6.73 | 26.23 ± 2.76 | +| Freeway-v5 | 33.10 ± 0.31 | 32.97 ± 0.37 | +| Frostbite-v5 | 1137.34 ± 1192.05 | 933.60 ± 885.92 | +| Gopher-v5 | 6505.29 ± 7655.20 | 3672.53 ± 1749.20 | +| Gravitar-v5 | 1099.33 ± 603.06 | 881.67 ± 33.73 | +| Hero-v5 | 26429.65 ± 924.74 | 24746.88 ± 3530.10 | +| IceHockey-v5 | -4.33 ± 0.43 | -4.12 ± 0.20 | +| Jamesbond-v5 | 496.08 ± 24.60 | 536.50 ± 82.33 | +| Kangaroo-v5 | 6582.12 ± 5395.44 | 5325.33 ± 3464.80 | +| Krull-v5 | 9718.09 ± 649.15 | 8737.10 ± 294.58 | +| KungFuMaster-v5 | 26000.25 ± 1965.22 | 30451.67 ± 5515.45 | +| MontezumaRevenge-v5 | 0.08 ± 0.12 | 1.00 ± 1.41 | +| MsPacman-v5 | 2345.67 ± 185.94 | 2152.83 ± 152.80 | +| NameThisGame-v5 | 5750.00 ± 181.32 | 6815.63 ± 1098.95 | +| Phoenix-v5 | 14474.11 ± 1794.83 | 9517.73 ± 1176.62 | +| Pitfall-v5 | 0.00 ± 0.00 | -0.76 ± 0.55 | +| Pong-v5 | 20.39 ± 0.24 | 20.45 ± 0.81 | +| PrivateEye-v5 | 100.00 ± 0.00 | 31.83 ± 43.74 | +| Qbert-v5 | 17246.27 ± 605.40 | 15228.25 ± 920.95 | +| Riverraid-v5 | 8275.25 ± 256.63 | 9023.57 ± 1386.85 | +| RoadRunner-v5 | 33040.38 ± 16488.95 | 40125.33 ± 7249.13 | +| Robotank-v5 | 14.43 ± 4.98 | 16.45 ± 3.37 | +| Seaquest-v5 | 1240.30 ± 419.36 | 1518.33 ± 400.35 | +| Skiing-v5 | -18483.46 ± 8684.71 | -22978.48 ± 9894.25 | +| Solaris-v5 | 2198.36 ± 147.23 | 2365.33 ± 157.75 | +| SpaceInvaders-v5 | 1188.82 ± 80.52 | 1019.75 ± 49.08 | +| StarGunner-v5 | 43519.12 ± 4709.23 | 44457.67 ± 3031.86 | +| Surround-v5 | -2.58 ± 2.31 | -4.97 ± 0.99 | +| Tennis-v5 | -17.64 ± 4.60 | -16.44 ± 1.46 | +| TimePilot-v5 | 6476.46 ± 993.30 | 6346.67 ± 663.31 | +| Tutankham-v5 | 249.05 ± 16.56 | 190.73 ± 12.00 | +| UpNDown-v5 | 487495.41 ± 39751.49 | 156143.70 ± 70620.88 | +| Venture-v5 | 0.00 ± 0.00 | 109.33 ± 61.57 | +| VideoPinball-v5 | 43133.94 ± 6362.12 | 53121.26 ± 2580.70 | +| WizardOfWor-v5 | 6353.58 ± 116.59 | 5346.33 ± 277.11 | +| YarsRevenge-v5 | 55757.68 ± 7467.49 | 9394.97 ± 2743.74 | +| Zaxxon-v5 | 3689.67 ± 2477.25 | 5532.67 ± 2607.65 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari_envpool_xla_jax_runtimes.md b/docs/benchmark/ppo_atari_envpool_xla_jax_runtimes.md new file mode 100644 index 000000000..09fe628f0 --- /dev/null +++ b/docs/benchmark/ppo_atari_envpool_xla_jax_runtimes.md @@ -0,0 +1,59 @@ +| | openrlbenchmark/envpool-atari/ppo_atari_envpool_xla_jax ({}) | openrlbenchmark/baselines/baselines-ppo2-cnn ({}) | +|:--------------------|---------------------------------------------------------------:|----------------------------------------------------:| +| Alien-v5 | 50.3275 | 117.397 | +| Amidar-v5 | 42.8176 | 114.093 | +| Assault-v5 | 35.9245 | 108.094 | +| Asterix-v5 | 37.7117 | 113.386 | +| Asteroids-v5 | 39.9731 | 114.409 | +| Atlantis-v5 | 40.1527 | 123.05 | +| BankHeist-v5 | 38.7443 | 137.308 | +| BattleZone-v5 | 45.0654 | 138.489 | +| BeamRider-v5 | 42.0778 | 119.437 | +| Berzerk-v5 | 38.7173 | 135.316 | +| Bowling-v5 | 35.0156 | 131.365 | +| Boxing-v5 | 48.8149 | 151.607 | +| Breakout-v5 | 42.3547 | 122.828 | +| Centipede-v5 | 43.6886 | 150.112 | +| ChopperCommand-v5 | 45.9308 | 131.192 | +| CrazyClimber-v5 | 36.0841 | 127.942 | +| Defender-v5 | 35.1029 | 132.29 | +| DemonAttack-v5 | 35.41 | 128.476 | +| DoubleDunk-v5 | 41.4521 | 108.028 | +| Enduro-v5 | 44.9909 | 142.046 | +| FishingDerby-v5 | 51.6075 | 151.286 | +| Freeway-v5 | 50.7103 | 154.163 | +| Frostbite-v5 | 47.5474 | 146.092 | +| Gopher-v5 | 36.2977 | 139.496 | +| Gravitar-v5 | 41.9322 | 138.746 | +| Hero-v5 | 50.5106 | 152.413 | +| IceHockey-v5 | 43.0228 | 144.455 | +| Jamesbond-v5 | 38.8264 | 137.321 | +| Kangaroo-v5 | 44.4304 | 142.436 | +| Krull-v5 | 47.7748 | 147.313 | +| KungFuMaster-v5 | 43.1534 | 141.903 | +| MontezumaRevenge-v5 | 44.8838 | 146.777 | +| MsPacman-v5 | 42.6463 | 138.382 | +| NameThisGame-v5 | 43.8473 | 136.264 | +| Phoenix-v5 | 36.7586 | 129.716 | +| Pitfall-v5 | 44.6369 | 137.36 | +| Pong-v5 | 36.7657 | 118.745 | +| PrivateEye-v5 | 43.3399 | 143.957 | +| Qbert-v5 | 40.1475 | 135.255 | +| Riverraid-v5 | 44.2555 | 142.627 | +| RoadRunner-v5 | 46.1059 | 145.451 | +| Robotank-v5 | 48.3364 | 149.681 | +| Seaquest-v5 | 38.3639 | 136.942 | +| Skiing-v5 | 38.6402 | 132.061 | +| Solaris-v5 | 50.2944 | 136.9 | +| SpaceInvaders-v5 | 39.4931 | 125.83 | +| StarGunner-v5 | 33.7096 | 119.18 | +| Surround-v5 | 33.923 | 132.017 | +| Tennis-v5 | 39.6194 | 97.019 | +| TimePilot-v5 | 37.0124 | 130.693 | +| Tutankham-v5 | 36.9677 | 139.694 | +| UpNDown-v5 | 52.9895 | 140.876 | +| Venture-v5 | 37.9828 | 144.236 | +| VideoPinball-v5 | 47.1716 | 179.866 | +| WizardOfWor-v5 | 37.5751 | 142.086 | +| YarsRevenge-v5 | 36.5889 | 127.358 | +| Zaxxon-v5 | 41.9785 | 133.922 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari_envpool_xla_jax_scan.md b/docs/benchmark/ppo_atari_envpool_xla_jax_scan.md new file mode 100644 index 000000000..7fca897b7 --- /dev/null +++ b/docs/benchmark/ppo_atari_envpool_xla_jax_scan.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax_scan ({'tag': ['pr-424']}) | +|:-------------|:--------------------------------------------------------------------------|:-------------------------------------------------------------------------------| +| Pong-v5 | 20.82 ± 0.21 | 20.52 ± 0.32 | +| BeamRider-v5 | 2678.73 ± 426.42 | 2860.61 ± 801.30 | +| Breakout-v5 | 420.92 ± 16.75 | 423.90 ± 5.49 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari_envpool_xla_jax_scan_runtimes.md b/docs/benchmark/ppo_atari_envpool_xla_jax_scan_runtimes.md new file mode 100644 index 000000000..7c77fc420 --- /dev/null +++ b/docs/benchmark/ppo_atari_envpool_xla_jax_scan_runtimes.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax_scan ({'tag': ['pr-424']}) | +|:-------------|--------------------------------------------------------------------------:|-------------------------------------------------------------------------------:| +| Pong-v5 | 34.3237 | 34.701 | +| BeamRider-v5 | 37.1076 | 37.2449 | +| Breakout-v5 | 39.576 | 39.775 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari_lstm.md b/docs/benchmark/ppo_atari_lstm.md new file mode 100644 index 000000000..3fad61873 --- /dev/null +++ b/docs/benchmark/ppo_atari_lstm.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari_lstm ({'tag': ['pr-424']}) | +|:------------------------|:---------------------------------------------------------------| +| PongNoFrameskip-v4 | 19.81 ± 0.62 | +| BeamRiderNoFrameskip-v4 | 1299.25 ± 509.90 | +| BreakoutNoFrameskip-v4 | 113.42 ± 5.85 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari_lstm_runtimes.md b/docs/benchmark/ppo_atari_lstm_runtimes.md new file mode 100644 index 000000000..079df7642 --- /dev/null +++ b/docs/benchmark/ppo_atari_lstm_runtimes.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari_lstm ({'tag': ['pr-424']}) | +|:------------------------|---------------------------------------------------------------:| +| PongNoFrameskip-v4 | 317.607 | +| BeamRiderNoFrameskip-v4 | 314.864 | +| BreakoutNoFrameskip-v4 | 383.724 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari_multigpu.md b/docs/benchmark/ppo_atari_multigpu.md new file mode 100644 index 000000000..7cec5206e --- /dev/null +++ b/docs/benchmark/ppo_atari_multigpu.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari_multigpu ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/ppo_atari ({'tag': ['pr-424']}) | +|:------------------------|:-------------------------------------------------------------------|:----------------------------------------------------------| +| PongNoFrameskip-v4 | 20.34 ± 0.43 | 20.36 ± 0.20 | +| BeamRiderNoFrameskip-v4 | 2414.65 ± 643.74 | 1915.93 ± 484.58 | +| BreakoutNoFrameskip-v4 | 414.94 ± 20.60 | 414.66 ± 28.09 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari_multigpu_runtimes.md b/docs/benchmark/ppo_atari_multigpu_runtimes.md new file mode 100644 index 000000000..60b18bad3 --- /dev/null +++ b/docs/benchmark/ppo_atari_multigpu_runtimes.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari_multigpu ({'tag': ['pr-424']}) | +|:------------------------|-------------------------------------------------------------------:| +| PongNoFrameskip-v4 | 276.599 | +| BeamRiderNoFrameskip-v4 | 280.902 | +| BreakoutNoFrameskip-v4 | 270.532 | \ No newline at end of file diff --git a/docs/benchmark/ppo_atari_runtimes.md b/docs/benchmark/ppo_atari_runtimes.md new file mode 100644 index 000000000..2d189947e --- /dev/null +++ b/docs/benchmark/ppo_atari_runtimes.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari ({'tag': ['pr-424']}) | +|:------------------------|----------------------------------------------------------:| +| PongNoFrameskip-v4 | 281.071 | +| BeamRiderNoFrameskip-v4 | 284.941 | +| BreakoutNoFrameskip-v4 | 264.077 | \ No newline at end of file diff --git a/docs/benchmark/ppo_continuous_action.md b/docs/benchmark/ppo_continuous_action.md new file mode 100644 index 000000000..bab9ae421 --- /dev/null +++ b/docs/benchmark/ppo_continuous_action.md @@ -0,0 +1,11 @@ +| | openrlbenchmark/cleanrl/ppo_continuous_action ({'tag': ['pr-424']}) | +|:-------------------------------------|:----------------------------------------------------------------------| +| HalfCheetah-v4 | 1442.64 ± 46.03 | +| Walker2d-v4 | 2287.95 ± 571.78 | +| Hopper-v4 | 2382.86 ± 271.74 | +| InvertedPendulum-v4 | 963.09 ± 22.20 | +| Humanoid-v4 | 716.11 ± 49.08 | +| Pusher-v4 | -40.38 ± 7.15 | +| dm_control/acrobot-swingup-v0 | 25.60 ± 6.30 | +| dm_control/acrobot-swingup_sparse-v0 | 1.35 ± 0.27 | +| dm_control/ball_in_cup-catch-v0 | 619.26 ± 278.67 | \ No newline at end of file diff --git a/docs/benchmark/ppo_continuous_action_runtimes.md b/docs/benchmark/ppo_continuous_action_runtimes.md new file mode 100644 index 000000000..4ffc2e28d --- /dev/null +++ b/docs/benchmark/ppo_continuous_action_runtimes.md @@ -0,0 +1,11 @@ +| | openrlbenchmark/cleanrl/ppo_continuous_action ({'tag': ['pr-424']}) | +|:-------------------------------------|----------------------------------------------------------------------:| +| HalfCheetah-v4 | 25.3589 | +| Walker2d-v4 | 24.3157 | +| Hopper-v4 | 25.7066 | +| InvertedPendulum-v4 | 23.7672 | +| Humanoid-v4 | 49.5592 | +| Pusher-v4 | 28.8162 | +| dm_control/acrobot-swingup-v0 | 26.5793 | +| dm_control/acrobot-swingup_sparse-v0 | 25.1265 | +| dm_control/ball_in_cup-catch-v0 | 26.1947 | \ No newline at end of file diff --git a/docs/benchmark/ppo_envpool.md b/docs/benchmark/ppo_envpool.md new file mode 100644 index 000000000..f295403e0 --- /dev/null +++ b/docs/benchmark/ppo_envpool.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax_scan ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/ppo_atari_envpool ({'tag': ['pr-424']}) | +|:-------------|:--------------------------------------------------------------------------|:-------------------------------------------------------------------------------|:------------------------------------------------------------------| +| Pong-v5 | 20.82 ± 0.21 | 20.52 ± 0.32 | 20.45 ± 0.09 | +| BeamRider-v5 | 2678.73 ± 426.42 | 2860.61 ± 801.30 | 2501.85 ± 210.52 | +| Breakout-v5 | 420.92 ± 16.75 | 423.90 ± 5.49 | 211.24 ± 151.84 | \ No newline at end of file diff --git a/docs/benchmark/ppo_envpool_runtimes.md b/docs/benchmark/ppo_envpool_runtimes.md new file mode 100644 index 000000000..2a923348b --- /dev/null +++ b/docs/benchmark/ppo_envpool_runtimes.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/ppo_atari_envpool_xla_jax_scan ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/ppo_atari_envpool ({'tag': ['pr-424']}) | +|:-------------|--------------------------------------------------------------------------:|-------------------------------------------------------------------------------:|------------------------------------------------------------------:| +| Pong-v5 | 34.3237 | 34.701 | 178.375 | +| BeamRider-v5 | 37.1076 | 37.2449 | 182.944 | +| Breakout-v5 | 39.576 | 39.775 | 151.384 | \ No newline at end of file diff --git a/docs/benchmark/ppo_procgen.md b/docs/benchmark/ppo_procgen.md new file mode 100644 index 000000000..bc1865ca3 --- /dev/null +++ b/docs/benchmark/ppo_procgen.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_procgen ({'tag': ['pr-424']}) | +|:----------|:------------------------------------------------------------| +| starpilot | 30.99 ± 1.96 | +| bossfight | 8.85 ± 0.33 | +| bigfish | 16.46 ± 2.71 | \ No newline at end of file diff --git a/docs/benchmark/ppo_procgen_runtimes.md b/docs/benchmark/ppo_procgen_runtimes.md new file mode 100644 index 000000000..5b956125e --- /dev/null +++ b/docs/benchmark/ppo_procgen_runtimes.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo_procgen ({'tag': ['pr-424']}) | +|:----------|------------------------------------------------------------:| +| starpilot | 114.649 | +| bossfight | 128.679 | +| bigfish | 107.788 | \ No newline at end of file diff --git a/docs/benchmark/ppo_runtimes.md b/docs/benchmark/ppo_runtimes.md new file mode 100644 index 000000000..0277e1f85 --- /dev/null +++ b/docs/benchmark/ppo_runtimes.md @@ -0,0 +1,5 @@ +| | openrlbenchmark/cleanrl/ppo ({'tag': ['pr-424']}) | +|:---------------|----------------------------------------------------:| +| CartPole-v1 | 10.4737 | +| Acrobot-v1 | 15.4606 | +| MountainCar-v0 | 6.95995 | \ No newline at end of file diff --git a/docs/benchmark/sac.md b/docs/benchmark/sac.md new file mode 100644 index 000000000..b56e478db --- /dev/null +++ b/docs/benchmark/sac.md @@ -0,0 +1,8 @@ +| | openrlbenchmark/cleanrl/sac_continuous_action ({'tag': ['pr-424']}) | +|:--------------------|:----------------------------------------------------------------------| +| HalfCheetah-v4 | 9634.89 ± 1423.73 | +| Walker2d-v4 | 3591.45 ± 911.33 | +| Hopper-v4 | 2310.46 ± 342.82 | +| InvertedPendulum-v4 | 909.37 ± 55.66 | +| Humanoid-v4 | 4996.29 ± 686.40 | +| Pusher-v4 | -22.45 ± 0.51 | \ No newline at end of file diff --git a/docs/benchmark/sac_runtimes.md b/docs/benchmark/sac_runtimes.md new file mode 100644 index 000000000..b35f21121 --- /dev/null +++ b/docs/benchmark/sac_runtimes.md @@ -0,0 +1,8 @@ +| | openrlbenchmark/cleanrl/sac_continuous_action ({'tag': ['pr-424']}) | +|:--------------------|----------------------------------------------------------------------:| +| HalfCheetah-v4 | 174.778 | +| Walker2d-v4 | 161.161 | +| Hopper-v4 | 173.242 | +| InvertedPendulum-v4 | 179.042 | +| Humanoid-v4 | 177.31 | +| Pusher-v4 | 172.123 | \ No newline at end of file diff --git a/docs/benchmark/td3.md b/docs/benchmark/td3.md new file mode 100644 index 000000000..383b08a0c --- /dev/null +++ b/docs/benchmark/td3.md @@ -0,0 +1,8 @@ +| | openrlbenchmark/cleanrl/td3_continuous_action ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/td3_continuous_action_jax ({'tag': ['pr-424']}) | +|:--------------------|:----------------------------------------------------------------------|:--------------------------------------------------------------------------| +| HalfCheetah-v4 | 9583.22 ± 126.09 | 9345.93 ± 770.54 | +| Walker2d-v4 | 4057.59 ± 658.78 | 3686.19 ± 141.23 | +| Hopper-v4 | 3134.61 ± 360.18 | 2940.10 ± 655.63 | +| InvertedPendulum-v4 | 968.99 ± 25.80 | 988.94 ± 8.86 | +| Humanoid-v4 | 5035.36 ± 21.67 | 5033.22 ± 122.14 | +| Pusher-v4 | -30.92 ± 1.05 | -29.18 ± 1.02 | \ No newline at end of file diff --git a/docs/benchmark/td3_runtimes.md b/docs/benchmark/td3_runtimes.md new file mode 100644 index 000000000..76451881e --- /dev/null +++ b/docs/benchmark/td3_runtimes.md @@ -0,0 +1,8 @@ +| | openrlbenchmark/cleanrl/td3_continuous_action ({'tag': ['pr-424']}) | openrlbenchmark/cleanrl/td3_continuous_action_jax ({'tag': ['pr-424']}) | +|:--------------------|----------------------------------------------------------------------:|--------------------------------------------------------------------------:| +| HalfCheetah-v4 | 87.353 | 39.5119 | +| Walker2d-v4 | 80.8592 | 34.0497 | +| Hopper-v4 | 90.9921 | 33.4079 | +| InvertedPendulum-v4 | 70.4218 | 30.2624 | +| Humanoid-v4 | 79.1624 | 70.2437 | +| Pusher-v4 | 95.2208 | 39.6051 | \ No newline at end of file diff --git a/docs/blog/posts/cleanrl-v1.md b/docs/blog/posts/cleanrl-v1.md index e40d48294..cfd820212 100644 --- a/docs/blog/posts/cleanrl-v1.md +++ b/docs/blog/posts/cleanrl-v1.md @@ -133,12 +133,12 @@ tuner = Tuner( "Acrobot-v1": [-500, 0], }, params_fn=lambda trial: { - "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003), + "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True), "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]), "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]), "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]), - "vf-coef": trial.suggest_uniform("vf-coef", 0, 5), - "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5), + "vf-coef": trial.suggest_float("vf-coef", 0, 5), + "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5), "total-timesteps": 100000, "num-envs": 16, }, @@ -158,12 +158,12 @@ We also added a new tool for running benchmark experiments. The tool is designed ```bash OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \ --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \ - --command "poetry run python cleanrl/ppo.py --cuda False --track --capture-video" \ + --command "poetry run python cleanrl/ppo.py --no_cuda --track --capture_video" \ --num-seeds 3 \ --workers 5 ``` -which will run the `ppo.py` script with `--cuda False --track --capture-video` arguments across 3 random seeds for 3 environments. It uses `multiprocessing` to create a pool of 5 workers run the experiments in parallel. +which will run the `ppo.py` script with `--no_cuda --track --capture_video` arguments across 3 random seeds for 3 environments. It uses `multiprocessing` to create a pool of 5 workers run the experiments in parallel. diff --git a/docs/cloud/submit-experiments.md b/docs/cloud/submit-experiments.md index 8cac08458..dc776247f 100644 --- a/docs/cloud/submit-experiments.md +++ b/docs/cloud/submit-experiments.md @@ -6,13 +6,13 @@ Dry run to inspect the generated docker command ``` poetry run python -m cleanrl_utils.submit_exp \ --docker-tag vwxyzjn/cleanrl:latest \ - --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video" \ + --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video" \ --num-seed 1 ``` The generated docker command should look like ``` -docker run -d --cpuset-cpus="0" -e WANDB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx vwxyzjn/cleanrl:latest /bin/bash -c "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video --seed 1" +docker run -d --cpuset-cpus="0" -e WANDB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx vwxyzjn/cleanrl:latest /bin/bash -c "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video --seed 1" ``` ### Run on AWS @@ -21,7 +21,7 @@ Submit a job using AWS's compute-optimized spot instances ``` poetry run python -m cleanrl_utils.submit_exp \ --docker-tag vwxyzjn/cleanrl:latest \ - --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video" \ + --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video" \ --job-queue c5a-large-spot \ --num-seed 1 \ --num-vcpu 1 \ @@ -34,7 +34,7 @@ Submit a job using AWS's accelerated-computing spot instances ``` poetry run python -m cleanrl_utils.submit_exp \ --docker-tag vwxyzjn/cleanrl:latest \ - --command "poetry run python cleanrl/ppo_atari.py --env-id BreakoutNoFrameskip-v4 --track --capture-video" \ + --command "poetry run python cleanrl/ppo_atari.py --env-id BreakoutNoFrameskip-v4 --track --capture_video" \ --job-queue g4dn-xlarge-spot \ --num-seed 1 \ --num-vcpu 1 \ @@ -48,7 +48,7 @@ Submit a job using AWS's compute-optimized on-demand instances ``` poetry run python -m cleanrl_utils.submit_exp \ --docker-tag vwxyzjn/cleanrl:latest \ - --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video" \ + --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video" \ --job-queue c5a-large \ --num-seed 1 \ --num-vcpu 1 \ @@ -61,7 +61,7 @@ Submit a job using AWS's accelerated-computing on-demand instances ``` poetry run python -m cleanrl_utils.submit_exp \ --docker-tag vwxyzjn/cleanrl:latest \ - --command "poetry run python cleanrl/ppo_atari.py --env-id BreakoutNoFrameskip-v4 --track --capture-video" \ + --command "poetry run python cleanrl/ppo_atari.py --env-id BreakoutNoFrameskip-v4 --track --capture_video" \ --job-queue g4dn-xlarge \ --num-seed 1 \ --num-vcpu 1 \ @@ -94,7 +94,7 @@ Then you could build a container using the `--build` flag based on the `Dockerfi ``` poetry run python -m cleanrl_utils.submit_exp \ --docker-tag vwxyzjn/cleanrl:latest \ - --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video" \ + --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video" \ --build --push ``` @@ -103,7 +103,7 @@ To build a multi-arch image using `--archs linux/arm64,linux/amd64`: ``` poetry run python -m cleanrl_utils.submit_exp \ --docker-tag vwxyzjn/cleanrl:latest \ - --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture-video" \ + --command "poetry run python cleanrl/ppo.py --env-id CartPole-v1 --total-timesteps 100000 --track --capture_video" \ --archs linux/arm64,linux/amd64 --build --push ``` diff --git a/docs/contribution.md b/docs/contribution.md index aa38905ac..4f4b45580 100644 --- a/docs/contribution.md +++ b/docs/contribution.md @@ -81,7 +81,7 @@ poetry install -E "docs mujoco_py" python -c "import mujoco_py" xvfb-run -a python -m cleanrl_utils.benchmark \ --env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \ - --command "poetry run python cleanrl/ddpg_continuous_action.py --track --capture-video" \ + --command "poetry run python cleanrl/ddpg_continuous_action.py --track --capture_video" \ --num-seeds 3 \ --workers 1 ``` @@ -209,7 +209,7 @@ Here is a checklist of the contribution process. See [:material-github: #331](ht If you need to run benchmark experiments for a performance-impacting changes: - [ ] I have contacted @vwxyzjn to obtain access to the [openrlbenchmark W&B team](https://wandb.ai/openrlbenchmark). -- [ ] I have used the [benchmark utility](/get-started/benchmark-utility/) to submit the tracked experiments to the [openrlbenchmark/cleanrl](https://wandb.ai/openrlbenchmark/cleanrl) W&B project, optionally with `--capture-video`. +- [ ] I have used the [benchmark utility](/get-started/benchmark-utility/) to submit the tracked experiments to the [openrlbenchmark/cleanrl](https://wandb.ai/openrlbenchmark/cleanrl) W&B project, optionally with `--capture_video`. - [ ] I have performed RLops with `python -m openrlbenchmark.rlops`. - For new feature or bug fix: - [ ] I have used the RLops utility to understand the performance impact of the changes and confirmed there is no regression. diff --git a/docs/get-started/CleanRL_Huggingface_Integration_Demo.ipynb b/docs/get-started/CleanRL_Huggingface_Integration_Demo.ipynb index 756f1d099..4cb022ec3 100644 --- a/docs/get-started/CleanRL_Huggingface_Integration_Demo.ipynb +++ b/docs/get-started/CleanRL_Huggingface_Integration_Demo.ipynb @@ -292,7 +292,7 @@ "source": [ "## Enjoy Utility\n", "\n", - "We have a simple way to load the model by running our \"enjoy\" utility, which automatically pull the model from 🤗 HuggingFace and run for a few episodes. It also produces a rendered video through the `--capture-video` flag. See more at our [📜 Documentation](https://docs.cleanrl.dev/get-started/zoo/)." + "We have a simple way to load the model by running our \"enjoy\" utility, which automatically pull the model from 🤗 HuggingFace and run for a few episodes. It also produces a rendered video through the `--capture_video` flag. See more at our [📜 Documentation](https://docs.cleanrl.dev/get-started/zoo/)." ] }, { @@ -338,7 +338,7 @@ } ], "source": [ - "!python -m cleanrl_utils.enjoy --exp-name dqn_atari_jax --env-id BreakoutNoFrameskip-v4 --eval-episodes 2 --capture-video" + "!python -m cleanrl_utils.enjoy --exp-name dqn_atari_jax --env-id BreakoutNoFrameskip-v4 --eval-episodes 2 --capture_video" ] }, { diff --git a/docs/get-started/basic-usage.md b/docs/get-started/basic-usage.md index 5571c3e7e..55aeb3311 100644 --- a/docs/get-started/basic-usage.md +++ b/docs/get-started/basic-usage.md @@ -75,7 +75,7 @@ poetry run pip install sb3==2.0.0a1 This is because the `torch` wheel on PyPi is built with cuda 10.2. You would need to manually install the cuda 11.3 wheel like this: ```bash - poetry run pip install torch --upgrade --extra-index-url https://download.pytorch.org/whl/cu113 + poetry run pip install torch==1.12.1 --upgrade --extra-index-url https://download.pytorch.org/whl/cu113 ``` Then, you can run the script again. @@ -93,7 +93,7 @@ tensorboard --logdir runs ## Visualize the Agent's Gameplay Videos -CleanRL helps record the agent's gameplay videos with a `--capture-video` flag, +CleanRL helps record the agent's gameplay videos with a `--capture_video` flag, which will save the videos in the `videos/{$run_name}` folder. ```bash linenums="1" hl_lines="5" @@ -101,7 +101,7 @@ python cleanrl/ppo.py \ --seed 1 \ --env-id CartPole-v0 \ --total-timesteps 50000 \ - --capture-video + --capture_video ``` ![videos](videos.png) @@ -119,7 +119,7 @@ usage: ppo.py [-h] [--exp-name EXP_NAME] [--env-id ENV_ID] [--total-timesteps TOTAL_TIMESTEPS] [--torch-deterministic [TORCH_DETERMINISTIC]] [--cuda [CUDA]] [--track [TRACK]] [--wandb-project-name WANDB_PROJECT_NAME] - [--wandb-entity WANDB_ENTITY] [--capture-video [CAPTURE_VIDEO]] + [--wandb-entity WANDB_ENTITY] [--capture_video [CAPTURE_VIDEO]] [--num-envs NUM_ENVS] [--num-steps NUM_STEPS] [--anneal-lr [ANNEAL_LR]] [--gae [GAE]] [--gamma GAMMA] [--gae-lambda GAE_LAMBDA] [--num-minibatches NUM_MINIBATCHES] @@ -146,7 +146,7 @@ optional arguments: the wandb's project name --wandb-entity WANDB_ENTITY the entity (team) of wandb's project - --capture-video [CAPTURE_VIDEO] + --capture_video [CAPTURE_VIDEO] weather to capture videos of the agent performances (check out `videos` folder) --num-envs NUM_ENVS the number of parallel game environments diff --git a/docs/get-started/benchmark-utility.md b/docs/get-started/benchmark-utility.md index eee448b73..7233d4111 100644 --- a/docs/get-started/benchmark-utility.md +++ b/docs/get-started/benchmark-utility.md @@ -7,22 +7,44 @@ CleanRL comes with a utility module `cleanrl_utils.benchmark` to help schedule a Try running `python -m cleanrl_utils.benchmark --help` to get the help text. ```bash -python -m cleanrl_utils.benchmark --help -usage: benchmark.py [-h] [--env-ids ENV_IDS [ENV_IDS ...]] [--command COMMAND] [--num-seeds NUM_SEEDS] [--start-seed START_SEED] [--workers WORKERS] - [--auto-tag [AUTO_TAG]] - -optional arguments: - -h, --help show this help message and exit - --env-ids ENV_IDS [ENV_IDS ...] - the ids of the environment to benchmark - --command COMMAND the command to run - --num-seeds NUM_SEEDS - the number of random seeds - --start-seed START_SEED - the number of the starting seed - --workers WORKERS the number of workers to run benchmark experimenets - --auto-tag [AUTO_TAG] - if toggled, the runs will be tagged with git tags, commit, and pull request number if possible +$ python -m cleanrl_utils.benchmark --help +usage: benchmark.py [-h] --env-ids [STR + [STR ...]] --command STR [--num-seeds INT] + [--start-seed INT] [--workers INT] + [--auto-tag | --no-auto-tag] + [--slurm-template-path {None}|STR] + [--slurm-gpus-per-task {None}|INT] + [--slurm-total-cpus {None}|INT] + [--slurm-ntasks {None}|INT] [--slurm-nodes {None}|INT] + +╭─ arguments ──────────────────────────────────────────────────────────────╮ +│ -h, --help │ +│ show this help message and exit │ +│ --env-ids [STR [STR ...]] │ +│ the ids of the environment to compare (required) │ +│ --command STR │ +│ the command to run (required) │ +│ --num-seeds INT │ +│ the number of random seeds (default: 3) │ +│ --start-seed INT │ +│ the number of the starting seed (default: 1) │ +│ --workers INT │ +│ the number of workers to run benchmark experimenets (default: 0) │ +│ --auto-tag, --no-auto-tag │ +│ if toggled, the runs will be tagged with git tags, commit, and pull │ +│ request number if possible (default: True) │ +│ --slurm-template-path {None}|STR │ +│ the path to the slurm template file (see docs for more details) │ +│ (default: None) │ +│ --slurm-gpus-per-task {None}|INT │ +│ the number of gpus per task to use for slurm jobs (default: None) │ +│ --slurm-total-cpus {None}|INT │ +│ the number of gpus per task to use for slurm jobs (default: None) │ +│ --slurm-ntasks {None}|INT │ +│ the number of tasks to use for slurm jobs (default: None) │ +│ --slurm-nodes {None}|INT │ +│ the number of nodes to use for slurm jobs (default: None) │ +╰──────────────────────────────────────────────────────────────────────────╯ ``` ## Examples @@ -32,7 +54,7 @@ The following example demonstrates how to run classic control benchmark experime ```bash OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \ --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \ - --command "poetry run python cleanrl/ppo.py --cuda False --track --capture-video" \ + --command "poetry run python cleanrl/ppo.py --no_cuda --track --capture_video" \ --num-seeds 3 \ --workers 5 ``` @@ -40,24 +62,24 @@ OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \ What just happened here? In principle the helps run the following commands in 5 subprocesses: ```bash -poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id CartPole-v1 --seed 1 -poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id Acrobot-v1 --seed 1 -poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id MountainCar-v0 --seed 1 -poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id CartPole-v1 --seed 2 -poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id Acrobot-v1 --seed 2 -poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id MountainCar-v0 --seed 2 -poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id CartPole-v1 --seed 3 -poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id Acrobot-v1 --seed 3 -poetry run python cleanrl/ppo.py --cuda False --track --capture-video --env-id MountainCar-v0 --seed 3 +poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id CartPole-v1 --seed 1 +poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id Acrobot-v1 --seed 1 +poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id MountainCar-v0 --seed 1 +poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id CartPole-v1 --seed 2 +poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id Acrobot-v1 --seed 2 +poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id MountainCar-v0 --seed 2 +poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id CartPole-v1 --seed 3 +poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id Acrobot-v1 --seed 3 +poetry run python cleanrl/ppo.py --no_cuda --track --capture_video --env-id MountainCar-v0 --seed 3 ``` More specifically: 1. `--env-ids CartPole-v1 Acrobot-v1 MountainCar-v0` specifies that running experiments against these three environments -1. `--command "poetry run python cleanrl/ppo.py --cuda False --track --capture-video"` suggests running `ppo.py` with these settings: - * turn off GPU usage via `--cuda False`: because `ppo.py` has such as small neural network it often runs faster on CPU only +1. `--command "poetry run python cleanrl/ppo.py --no_cuda --track --capture_video"` suggests running `ppo.py` with these settings: + * turn off GPU usage via `--no_cuda`: because `ppo.py` has such as small neural network it often runs faster on CPU only * track the experiments via `--track` - * render the agent gameplay videos via `--capture-video`; these videos algo get saved to the tracked experiments + * render the agent gameplay videos via `--capture_video`; these videos algo get saved to the tracked experiments * ` xvfb-run -a` virtualizes a display for video recording, enabling these commands on a headless linux system 1. `--num-seeds 3` suggests running the the command with 3 random seeds for each `env-id` 1. `--workers 5` suggests at maximum using 5 subprocesses to run the experiments @@ -70,9 +92,68 @@ Note that when you run with high-throughput environments such as `envpool` or `p ```bash xvfb-run -a python -m cleanrl_utils.benchmark \ --env-ids Pong-v5 BeamRider-v5 Breakout-v5 \ - --command "poetry run python cleanrl/ppo_atari_envpool.py --track --capture-video" \ + --command "poetry run python cleanrl/ppo_atari_envpool.py --track --capture_video" \ --num-seeds 3 \ --workers 1 ``` -For more example usage, see [https://github.com/vwxyzjn/cleanrl/blob/master/benchmark](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark) \ No newline at end of file +For more example usage, see [https://github.com/vwxyzjn/cleanrl/blob/master/benchmark](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark) + + +## Slurm integration + +If you have access to a slurm cluster, you can use `cleanrl_utils.benchmark` to schedule jobs on the cluster. The following example demonstrates how to run classic control benchmark experiments on a slurm cluster. + +``` title="benchmark/ppo.sh" linenums="1" +--8<-- "benchmark/ppo.sh:3:12" +``` + +``` +poetry install +OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \ + --env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \ + --command "poetry run python cleanrl/ppo.py --no_cuda --track --capture_video" \ + --num-seeds 3 \ + --workers 9 \ + --slurm-gpus-per-task 1 \ + --slurm-ntasks 1 \ + --slurm-total-cpus 10 \ + --slurm-template-path benchmark/cleanrl_1gpu.slurm_template +``` + +Here, we have +* `--slurm-gpus-per-task 1` suggests that each slurm job should use 1 GPU +* `--slurm-ntasks 1` suggests that each slurm job should use 1 CPU +* `--slurm-total-cpus 10` suggests that each slurm job should use 10 CPUs in total +* `--slurm-template-path benchmark/cleanrl_1gpu.slurm_template` suggests that we should use the template file `benchmark/cleanrl_1gpu.slurm_template` to generate the slurm job scripts. The template file looks like this: + +``` title="benchmark/cleanrl_1gpu.slurm_template" linenums="1" +--8<-- "benchmark/cleanrl_1gpu.slurm_template" +``` + +The utility will generate a slurm script based on the template file and submit the job to the cluster. The generated slurm script looks like this: + +``` +#!/bin/bash +#SBATCH --job-name=low-priority +#SBATCH --partition=production-cluster +#SBATCH --gpus-per-task=1 +#SBATCH --cpus-per-gpu=10 +#SBATCH --ntasks=1 +#SBATCH --output=slurm/logs/%x_%j.out +#SBATCH --array=0-8%9 +#SBATCH --mem-per-cpu=12G +#SBATCH --exclude=ip-26-0-147-[245,247],ip-26-0-156-239 +##SBATCH --nodelist=ip-26-0-156-13 + + +env_ids=(CartPole-v1 Acrobot-v1 MountainCar-v0) +seeds=(1 2 3) +env_id=${env_ids[$SLURM_ARRAY_TASK_ID / 3]} +seed=${seeds[$SLURM_ARRAY_TASK_ID % 3]} + +echo "Running task $SLURM_ARRAY_TASK_ID with env_id: $env_id and seed: $seed" + +srun poetry run python cleanrl/ppo.py --no_cuda --track --env-id $env_id --seed $seed # +``` + diff --git a/docs/get-started/experiment-tracking.md b/docs/get-started/experiment-tracking.md index 89f91f42b..395aaabf1 100644 --- a/docs/get-started/experiment-tracking.md +++ b/docs/get-started/experiment-tracking.md @@ -1,11 +1,11 @@ # Experiment tracking To use experiment tracking with wandb, run with the `--track` flag, which will also -upload the videos recorded by the `--capture-video` flag. +upload the videos recorded by the `--capture_video` flag. ```bash poetry shell wandb login # only required for the first time -python cleanrl/ppo.py --track --capture-video +python cleanrl/ppo.py --track --capture_video ``` diff --git a/docs/rl-algorithms/c51.md b/docs/rl-algorithms/c51.md index c5dd66503..e5d1356cf 100644 --- a/docs/rl-algorithms/c51.md +++ b/docs/rl-algorithms/c51.md @@ -216,7 +216,7 @@ The [c51_atari_jax.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/c5 ```bash poetry install -E "atari jax" - poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html poetry run python cleanrl/c51_atari_jax.py --env-id BreakoutNoFrameskip-v4 poetry run python cleanrl/c51_atari_jax.py --env-id PongNoFrameskip-v4 ``` @@ -291,7 +291,7 @@ The [c51_jax.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/c51_jax. ```bash poetry install -E jax - poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html poetry run python cleanrl/c51_jax.py --env-id CartPole-v1 ``` diff --git a/docs/rl-algorithms/ddpg.md b/docs/rl-algorithms/ddpg.md index 77d2f6837..671d4e903 100644 --- a/docs/rl-algorithms/ddpg.md +++ b/docs/rl-algorithms/ddpg.md @@ -41,8 +41,6 @@ The [ddpg_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/c ```bash poetry install poetry run python cleanrl/ddpg_continuous_action.py --help - poetry install -E mujoco_py # only works in Linux - poetry run python cleanrl/ddpg_continuous_action.py --env-id Hopper-v2 poetry install -E mujoco poetry run python cleanrl/ddpg_continuous_action.py --env-id Hopper-v4 ``` @@ -51,8 +49,6 @@ The [ddpg_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/c ```bash python cleanrl/ddpg_continuous_action.py --help - pip install -r requirements/requirements-mujoco_py.txt # only works in Linux, you have to pick either `mujoco` or `mujoco_py` - python cleanrl/ddpg_continuous_action.py --env-id Hopper-v2 pip install -r requirements/requirements-mujoco.txt python cleanrl/ddpg_continuous_actions.py --env-id Hopper-v4 ``` @@ -232,23 +228,28 @@ Additionally, when drawing exploration noise that is added to the actions produc To run benchmark experiments, see :material-github: [benchmark/ddpg.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ddpg.sh). Specifically, execute the following command: - + +``` title="benchmark/ddpg.sh" linenums="1" +--8<-- "benchmark/ddpg.sh::7" +``` + Below are the average episodic returns for [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) (3 random seeds). To ensure the quality of the implementation, we compared the results against (Fujimoto et al., 2018)[^2]. | Environment | [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) | [`OurDDPG.py`](https://github.com/sfujim/TD3/blob/master/OurDDPG.py) (Fujimoto et al., 2018, Table 1)[^2] | [`DDPG.py`](https://github.com/sfujim/TD3/blob/master/DDPG.py) using settings from (Lillicrap et al., 2016)[^1] in (Fujimoto et al., 2018, Table 1)[^2] | | ----------- | ----------- | ----------- | ----------- | -| HalfCheetah | 10210.57 ± 196.22 |8577.29 | 3305.60| -| Walker2d | 1661.14 ± 250.01 | 3098.11 | 1843.85 | -| Hopper | 1007.44 ± 148.29 | 1860.02 | 2020.46 | -| Humanoid | 910.61 ± 97.58 | not available | -| Pusher | -39.39 ± 9.54 | not available | -| InvertedPendulum | 684.61 ± 94.41 | 1000.00 ± 0.00 | +| HalfCheetah-v4 | 10374.07 ± 157.37 |8577.29 | 3305.60| +| Walker2d-v4 | 1240.16 ± 390.10 | 3098.11 | 1843.85 | +| Hopper-v4 | 1576.78 ± 818.98 | 1860.02 | 2020.46 | +| InvertedPendulum-v4 | 642.68 ± 69.56 | 1000.00 ± 0.00 | +| Humanoid-v4 | 1699.56 ± 694.22 | not available | +| Pusher-v4 | -77.30 ± 38.78 | not available | + ???+ info - Note that [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) uses gym MuJoCo v2 environments while [`OurDDPG.py`](https://github.com/sfujim/TD3/blob/master/OurDDPG.py) (Fujimoto et al., 2018)[^2] uses the gym MuJoCo v1 environments. According to the :material-github: [openai/gym#834](https://github.com/openai/gym/pull/834), gym MuJoCo v2 environments should be equivalent to the gym MuJoCo v1 environments. + Note that [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) uses gym MuJoCo v4 environments while [`OurDDPG.py`](https://github.com/sfujim/TD3/blob/master/OurDDPG.py) (Fujimoto et al., 2018)[^2] uses the gym MuJoCo v1 environments. Also note the performance of our `ddpg_continuous_action.py` seems to be worse than the reference implementation on Walker2d and Hopper. This is likely due to :material-github: [openai/gym#938](https://github.com/openai/baselines/issues/938). We would have a hard time reproducing gym MuJoCo v1 environments because they have been long deprecated. @@ -256,7 +257,12 @@ Below are the average episodic returns for [`ddpg_continuous_action.py`](https:/ Learning curves: - +``` title="benchmark/ddpg_plot.sh" linenums="1" +--8<-- "benchmark/ddpg_plot.sh::9" +``` + + + @@ -314,41 +320,59 @@ See [related docs](/rl-algorithms/ddpg/#implementation-details) for `ddpg_contin To run benchmark experiments, see :material-github: [benchmark/ddpg.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ddpg.sh). Specifically, execute the following command: - -Below are the average episodic returns for [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (3 random seeds). To ensure the quality of the implementation, we compared the results against (Fujimoto et al., 2018)[^2]. +``` title="benchmark/ddpg.sh" linenums="1" +--8<-- "benchmark/ddpg.sh:12:19" +``` + +Below are the average episodic returns for [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (3 random seeds). + + +{!benchmark/ddpg.md!} + +Learning curves: + + +``` title="benchmark/ddpg_plot.sh" linenums="1" +--8<-- "benchmark/ddpg_plot.sh:11:20" +``` + + + -| Environment | [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (RTX 3060 TI) | [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (VM w/ TPU) | [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) (RTX 3060 TI) | [`OurDDPG.py`](https://github.com/sfujim/TD3/blob/master/OurDDPG.py) (Fujimoto et al., 2018, Table 1)[^2] | -| ----------- | ----------- | ----------- | ----------- | ----------- | -| HalfCheetah | 9592.25 ± 135.10 | 9125.06 ± 1477.58 | 10210.57 ± 196.22 |8577.29 | -| Walker2d | 1083.15 ± 567.65 | 1303.82 ± 448.41 | 1661.14 ± 250.01 | 3098.11 | -| Hopper | 1275.28 ± 209.60 | 1145.05 ± 41.95 | 1007.44 ± 148.29 | 1860.02 | ???+ info - Note that the experiments were conducted on different hardwares, so your mileage might vary. This inconsistency is because 1) re-running expeirments on the same hardware is computationally expensive and 2) requiring the same hardware is not inclusive nor feasible to other contributors who might have different hardwares. + These are some previous experiments with TPUs. Note the results are very similar to the ones above, but the runtime can be different due to different hardware used. - That said, we roughly expect to see a 2-4x speed improvement from using [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) under the same hardware. And if you disable the `--capture-video` overhead, the speed improvement will be even higher. + Note that the experiments were conducted on different hardwares, so your mileage might vary. This inconsistency is because 1) re-running expeirments on the same hardware is computationally expensive and 2) requiring the same hardware is not inclusive nor feasible to other contributors who might have different hardwares. + + That said, we roughly expect to see a 2-4x speed improvement from using [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) under the same hardware. And if you disable the `--capture_video` overhead, the speed improvement will be even higher. -Learning curves: -
- - + | Environment | [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (RTX 3060 TI) | [`ddpg_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action_jax.py) (VM w/ TPU) | [`ddpg_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_continuous_action.py) (RTX 3060 TI) | [`OurDDPG.py`](https://github.com/sfujim/TD3/blob/master/OurDDPG.py) (Fujimoto et al., 2018, Table 1)[^2] | + | ----------- | ----------- | ----------- | ----------- | ----------- | + | HalfCheetah | 9592.25 ± 135.10 | 9125.06 ± 1477.58 | 10210.57 ± 196.22 |8577.29 | + | Walker2d | 1083.15 ± 567.65 | 1303.82 ± 448.41 | 1661.14 ± 250.01 | 3098.11 | + | Hopper | 1275.28 ± 209.60 | 1145.05 ± 41.95 | 1007.44 ± 148.29 | 1860.02 | - - + Learning curves: - - -
+
+ + + + + + +
-Tracked experiments and game play videos: + Tracked experiments and game play videos: - + [^1]:Lillicrap, T.P., Hunt, J.J., Pritzel, A., Heess, N.M., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2016). Continuous control with deep reinforcement learning. CoRR, abs/1509.02971. https://arxiv.org/abs/1509.02971 diff --git a/docs/rl-algorithms/dqn.md b/docs/rl-algorithms/dqn.md index ce64b1847..e2316e9ab 100644 --- a/docs/rl-algorithms/dqn.md +++ b/docs/rl-algorithms/dqn.md @@ -250,7 +250,7 @@ The [dqn_atari_jax.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/dq ```bash poetry install -E "atari jax" - poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html poetry run python cleanrl/dqn_atari_jax.py --env-id BreakoutNoFrameskip-v4 poetry run python cleanrl/dqn_atari_jax.py --env-id PongNoFrameskip-v4 ``` @@ -333,7 +333,7 @@ python cleanrl/dqn_jax.py --env-id CartPole-v1 === "poetry" ```bash - poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html poetry run python cleanrl/dqn_jax.py --env-id CartPole-v1 ``` diff --git a/docs/rl-algorithms/ppo-isaacgymenvs.md b/docs/rl-algorithms/ppo-isaacgymenvs.md index 7508b591e..38ff65d7f 100644 --- a/docs/rl-algorithms/ppo-isaacgymenvs.md +++ b/docs/rl-algorithms/ppo-isaacgymenvs.md @@ -23,7 +23,11 @@ ## `ppo_continuous_action_isaacgym.py` -:octicons-beaker-24: Experimental + +???+ warning + + `ppo_continuous_action_isaacgym.py` is temporarily deprecated. Please checkout the code in [https://github.com/vwxyzjn/cleanrl/releases/tag/v1.0.0](https://github.com/vwxyzjn/cleanrl/releases/tag/v1.0.0) + The [ppo_continuous_action_isaacgym.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_continuous_action_isaacgym/ppo_continuous_action_isaacgym.py) has the following features: @@ -250,6 +254,6 @@ Old Learning curves w/ Isaac Gym Preview 3 (no longer available in Nvidia's webs ???+ info - Note the `AllegroHand` and `ShadowHand` experiments used the following command `ppo_continuous_action_isaacgym.py --track --capture-video --num-envs 16384 --num-steps 8 --update-epochs 5 --reward-scaler 0.01 --total-timesteps 600000000 --record-video-step-frequency 3660`. Costa: I was able to run this during my internship at NVIDIA, but in my home setup, the computer has less GPU memory which makes it hard to replicate the results w/ `--num-envs 16384`. + Note the `AllegroHand` and `ShadowHand` experiments used the following command `ppo_continuous_action_isaacgym.py --track --capture_video --num-envs 16384 --num-steps 8 --update-epochs 5 --reward-scaler 0.01 --total-timesteps 600000000 --record-video-step-frequency 3660`. Costa: I was able to run this during my internship at NVIDIA, but in my home setup, the computer has less GPU memory which makes it hard to replicate the results w/ `--num-envs 16384`. diff --git a/docs/rl-algorithms/ppo.md b/docs/rl-algorithms/ppo.md index 4f588bf03..e83b38e63 100644 --- a/docs/rl-algorithms/ppo.md +++ b/docs/rl-algorithms/ppo.md @@ -100,27 +100,28 @@ Running `python cleanrl/ppo.py` will automatically record various metrics such a To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command: - - +``` title="benchmark/ppo.sh" linenums="1" +--8<-- "benchmark/ppo.sh:3:8" +``` Below are the average episodic returns for `ppo.py`. To ensure the quality of the implementation, we compared the results against `openai/baselies`' PPO. | Environment | `ppo.py` | `openai/baselies`' PPO (Huang et al., 2022)[^1] | ----------- | ----------- | ----------- | -| CartPole-v1 | 492.40 ± 13.05 |497.54 ± 4.02 | -| Acrobot-v1 | -89.93 ± 6.34 | -81.82 ± 5.58 | +| CartPole-v1 | 490.04 ± 6.12 |497.54 ± 4.02 | +| Acrobot-v1 | -86.36 ± 1.32 | -81.82 ± 5.58 | | MountainCar-v0 | -200.00 ± 0.00 | -200.00 ± 0.00 | Learning curves: -
- +``` title="benchmark/ppo_plot.sh" linenums="1" +--8<-- "benchmark/ppo_plot.sh::9" +``` - - -
+ + Tracked experiments and game play videos: @@ -186,27 +187,28 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command: - +``` title="benchmark/ppo.sh" linenums="1" +--8<-- "benchmark/ppo.sh:14:19" +``` Below are the average episodic returns for `ppo_atari.py`. To ensure the quality of the implementation, we compared the results against `openai/baselies`' PPO. | Environment | `ppo_atari.py` | `openai/baselies`' PPO (Huang et al., 2022)[^1] | ----------- | ----------- | ----------- | -| BreakoutNoFrameskip-v4 | 416.31 ± 43.92 | 406.57 ± 31.554 | -| PongNoFrameskip-v4 | 20.59 ± 0.35 | 20.512 ± 0.50 | -| BeamRiderNoFrameskip-v4 | 2445.38 ± 528.91 | 2642.97 ± 670.37 | +| BreakoutNoFrameskip-v4 | 414.66 ± 28.09 | 406.57 ± 31.554 | +| PongNoFrameskip-v4 | 20.36 ± 0.20 | 20.512 ± 0.50 | +| BeamRiderNoFrameskip-v4 | 1915.93 ± 484.58 | 2642.97 ± 670.37 | Learning curves: -
- +``` title="benchmark/ppo_plot.sh" linenums="1" +--8<-- "benchmark/ppo_plot.sh:11:19" +``` - - - -
+ + Tracked experiments and game play videos: @@ -248,9 +250,6 @@ The [ppo_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/cl # dm_control environments poetry install -E "mujoco dm_control" python cleanrl/ppo_continuous_action.py --env-id dm_control/cartpole-balance-v0 - # backwards compatibility with mujoco v2 environments - poetry install -E mujoco_py # only works in Linux - python cleanrl/ppo_continuous_action.py --env-id Hopper-v2 ``` === "pip" @@ -261,8 +260,6 @@ The [ppo_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/cl python cleanrl/ppo_continuous_action.py --env-id Hopper-v4 pip install -r requirements/requirements-dm_control.txt python cleanrl/ppo_continuous_action.py --env-id dm_control/cartpole-balance-v0 - pip install -r requirements/requirements-mujoco_py.txt - python cleanrl/ppo_continuous_action.py --env-id Hopper-v2 ``` ???+ warning "dm_control installation issue" @@ -301,122 +298,97 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command: - - - - - -???+ note "Result tables, learning curves, and interactive reports" - - === "MuJoCo v2" - - Below are the average episodic returns for `ppo_continuous_action.py`. To ensure the quality of the implementation, we compared the results against `openai/baselies`' PPO. - - | | ppo_continuous_action ({'tag': ['v1.0.0-27-gde3f410']}) | `openai/baselies`' PPO (results taken from [here](https://wandb.ai/openrlbenchmark/openrlbenchmark/reports/MuJoCo-openai-baselines--VmlldzoyMTgyNjM0)) | - |:--------------------|:----------------------------------------------------------|:---------------------------------------------------------------------------------------------| - | HalfCheetah-v2 | 2262.50 ± 1196.81 | 1428.55 ± 62.40 | - | Walker2d-v2 | 3312.32 ± 429.87 | 3356.49 ± 322.61 | - | Hopper-v2 | 2311.49 ± 440.99 | 2158.65 ± 302.33 | - | InvertedPendulum-v2 | 852.04 ± 17.04 | 901.25 ± 35.73 | - | Humanoid-v2 | 676.34 ± 78.68 | 673.11 ± 53.02 | - | Pusher-v2 | -60.49 ± 4.37 | -56.83 ± 13.33 | - - Learning curves: - - ![](../ppo/ppo_continuous_action_gymnasium_mujoco_v2.png) - - Tracked experiments and game play videos: +MuJoCo v4 - +``` title="benchmark/ppo.sh" linenums="1" +--8<-- "benchmark/ppo.sh:25:30" +``` - === "MuJoCo v4" +{!benchmark/ppo_continuous_action.md!} - Below are the average episodic returns for `ppo_continuous_action.py` in MuJoCo v4 environments and `dm_control` environments. +Learning curves: - | | ppo_continuous_action ({'tag': ['v1.0.0-12-g99f7789']}) | - |:--------------------|:----------------------------------------------------------| - | HalfCheetah-v4 | 2905.85 ± 1129.37 | - | Walker2d-v4 | 2890.97 ± 231.40 | - | Hopper-v4 | 2051.80 ± 313.94 | - | InvertedPendulum-v4 | 950.98 ± 36.39 | - | Humanoid-v4 | 742.19 ± 155.77 | - | Pusher-v4 | -55.60 ± 3.98 | +``` title="benchmark/ppo_plot.sh" linenums="1" +--8<-- "benchmark/ppo_plot.sh:11:19" +``` + + - Learning curves: +Tracked experiments and game play videos: - ![](../ppo/ppo_continuous_action_gymnasium_mujoco_v4.png) + + + + +``` title="benchmark/ppo.sh" linenums="1" +--8<-- "benchmark/ppo.sh:36:41" +``` + +Below are the average episodic returns for `ppo_continuous_action.py` in `dm_control` environments. + +| | ppo_continuous_action ({'tag': ['v1.0.0-13-gcbd83f6']}) | +|:--------------------------------------|:----------------------------------------------------------| +| dm_control/acrobot-swingup-v0 | 27.84 ± 9.25 | +| dm_control/acrobot-swingup_sparse-v0 | 1.60 ± 1.17 | +| dm_control/ball_in_cup-catch-v0 | 900.78 ± 5.26 | +| dm_control/cartpole-balance-v0 | 855.47 ± 22.06 | +| dm_control/cartpole-balance_sparse-v0 | 999.93 ± 0.10 | +| dm_control/cartpole-swingup-v0 | 640.86 ± 11.44 | +| dm_control/cartpole-swingup_sparse-v0 | 51.34 ± 58.35 | +| dm_control/cartpole-two_poles-v0 | 203.86 ± 11.84 | +| dm_control/cartpole-three_poles-v0 | 164.59 ± 3.23 | +| dm_control/cheetah-run-v0 | 432.56 ± 82.54 | +| dm_control/dog-stand-v0 | 307.79 ± 46.26 | +| dm_control/dog-walk-v0 | 120.05 ± 8.80 | +| dm_control/dog-trot-v0 | 76.56 ± 6.44 | +| dm_control/dog-run-v0 | 60.25 ± 1.33 | +| dm_control/dog-fetch-v0 | 34.26 ± 2.24 | +| dm_control/finger-spin-v0 | 590.49 ± 171.09 | +| dm_control/finger-turn_easy-v0 | 180.42 ± 44.91 | +| dm_control/finger-turn_hard-v0 | 61.40 ± 9.59 | +| dm_control/fish-upright-v0 | 516.21 ± 59.52 | +| dm_control/fish-swim-v0 | 87.91 ± 6.83 | +| dm_control/hopper-stand-v0 | 2.72 ± 1.72 | +| dm_control/hopper-hop-v0 | 0.52 ± 0.48 | +| dm_control/humanoid-stand-v0 | 6.59 ± 0.18 | +| dm_control/humanoid-walk-v0 | 1.73 ± 0.03 | +| dm_control/humanoid-run-v0 | 1.11 ± 0.04 | +| dm_control/humanoid-run_pure_state-v0 | 0.98 ± 0.03 | +| dm_control/humanoid_CMU-stand-v0 | 4.79 ± 0.18 | +| dm_control/humanoid_CMU-run-v0 | 0.88 ± 0.05 | +| dm_control/manipulator-bring_ball-v0 | 0.50 ± 0.29 | +| dm_control/manipulator-bring_peg-v0 | 1.80 ± 1.58 | +| dm_control/manipulator-insert_ball-v0 | 35.50 ± 13.04 | +| dm_control/manipulator-insert_peg-v0 | 60.40 ± 21.76 | +| dm_control/pendulum-swingup-v0 | 242.81 ± 245.95 | +| dm_control/point_mass-easy-v0 | 273.95 ± 362.28 | +| dm_control/point_mass-hard-v0 | 143.25 ± 38.12 | +| dm_control/quadruped-walk-v0 | 239.03 ± 66.17 | +| dm_control/quadruped-run-v0 | 180.44 ± 32.91 | +| dm_control/quadruped-escape-v0 | 28.92 ± 11.21 | +| dm_control/quadruped-fetch-v0 | 193.97 ± 22.20 | +| dm_control/reacher-easy-v0 | 626.28 ± 15.51 | +| dm_control/reacher-hard-v0 | 443.80 ± 9.64 | +| dm_control/stacker-stack_2-v0 | 75.68 ± 4.83 | +| dm_control/stacker-stack_4-v0 | 68.02 ± 4.02 | +| dm_control/swimmer-swimmer6-v0 | 158.19 ± 10.22 | +| dm_control/swimmer-swimmer15-v0 | 131.94 ± 0.88 | +| dm_control/walker-stand-v0 | 564.46 ± 235.22 | +| dm_control/walker-walk-v0 | 392.51 ± 56.25 | +| dm_control/walker-run-v0 | 125.92 ± 10.01 | + +Note that the dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 environments are never terminated or truncated. See https://wandb.ai/openrlbenchmark/cleanrl/runs/3tm00923 and https://wandb.ai/openrlbenchmark/cleanrl/runs/1z9us07j as an example. - Tracked experiments and game play videos: - - +Learning curves: - === "dm_control" +![](../ppo/ppo_continuous_action_gymnasium_dm_control.png) - Below are the average episodic returns for `ppo_continuous_action.py` in `dm_control` environments. +Tracked experiments and game play videos: - | | ppo_continuous_action ({'tag': ['v1.0.0-13-gcbd83f6']}) | - |:--------------------------------------|:----------------------------------------------------------| - | dm_control/acrobot-swingup-v0 | 27.84 ± 9.25 | - | dm_control/acrobot-swingup_sparse-v0 | 1.60 ± 1.17 | - | dm_control/ball_in_cup-catch-v0 | 900.78 ± 5.26 | - | dm_control/cartpole-balance-v0 | 855.47 ± 22.06 | - | dm_control/cartpole-balance_sparse-v0 | 999.93 ± 0.10 | - | dm_control/cartpole-swingup-v0 | 640.86 ± 11.44 | - | dm_control/cartpole-swingup_sparse-v0 | 51.34 ± 58.35 | - | dm_control/cartpole-two_poles-v0 | 203.86 ± 11.84 | - | dm_control/cartpole-three_poles-v0 | 164.59 ± 3.23 | - | dm_control/cheetah-run-v0 | 432.56 ± 82.54 | - | dm_control/dog-stand-v0 | 307.79 ± 46.26 | - | dm_control/dog-walk-v0 | 120.05 ± 8.80 | - | dm_control/dog-trot-v0 | 76.56 ± 6.44 | - | dm_control/dog-run-v0 | 60.25 ± 1.33 | - | dm_control/dog-fetch-v0 | 34.26 ± 2.24 | - | dm_control/finger-spin-v0 | 590.49 ± 171.09 | - | dm_control/finger-turn_easy-v0 | 180.42 ± 44.91 | - | dm_control/finger-turn_hard-v0 | 61.40 ± 9.59 | - | dm_control/fish-upright-v0 | 516.21 ± 59.52 | - | dm_control/fish-swim-v0 | 87.91 ± 6.83 | - | dm_control/hopper-stand-v0 | 2.72 ± 1.72 | - | dm_control/hopper-hop-v0 | 0.52 ± 0.48 | - | dm_control/humanoid-stand-v0 | 6.59 ± 0.18 | - | dm_control/humanoid-walk-v0 | 1.73 ± 0.03 | - | dm_control/humanoid-run-v0 | 1.11 ± 0.04 | - | dm_control/humanoid-run_pure_state-v0 | 0.98 ± 0.03 | - | dm_control/humanoid_CMU-stand-v0 | 4.79 ± 0.18 | - | dm_control/humanoid_CMU-run-v0 | 0.88 ± 0.05 | - | dm_control/manipulator-bring_ball-v0 | 0.50 ± 0.29 | - | dm_control/manipulator-bring_peg-v0 | 1.80 ± 1.58 | - | dm_control/manipulator-insert_ball-v0 | 35.50 ± 13.04 | - | dm_control/manipulator-insert_peg-v0 | 60.40 ± 21.76 | - | dm_control/pendulum-swingup-v0 | 242.81 ± 245.95 | - | dm_control/point_mass-easy-v0 | 273.95 ± 362.28 | - | dm_control/point_mass-hard-v0 | 143.25 ± 38.12 | - | dm_control/quadruped-walk-v0 | 239.03 ± 66.17 | - | dm_control/quadruped-run-v0 | 180.44 ± 32.91 | - | dm_control/quadruped-escape-v0 | 28.92 ± 11.21 | - | dm_control/quadruped-fetch-v0 | 193.97 ± 22.20 | - | dm_control/reacher-easy-v0 | 626.28 ± 15.51 | - | dm_control/reacher-hard-v0 | 443.80 ± 9.64 | - | dm_control/stacker-stack_2-v0 | 75.68 ± 4.83 | - | dm_control/stacker-stack_4-v0 | 68.02 ± 4.02 | - | dm_control/swimmer-swimmer6-v0 | 158.19 ± 10.22 | - | dm_control/swimmer-swimmer15-v0 | 131.94 ± 0.88 | - | dm_control/walker-stand-v0 | 564.46 ± 235.22 | - | dm_control/walker-walk-v0 | 392.51 ± 56.25 | - | dm_control/walker-run-v0 | 125.92 ± 10.01 | - - Note that the dm_control/lqr-lqr_2_1-v0 dm_control/lqr-lqr_6_2-v0 environments are never terminated or truncated. See https://wandb.ai/openrlbenchmark/cleanrl/runs/3tm00923 and https://wandb.ai/openrlbenchmark/cleanrl/runs/1z9us07j as an example. - - Learning curves: - - ![](../ppo/ppo_continuous_action_gymnasium_dm_control.png) - - Tracked experiments and game play videos: - - - + + ???+ info @@ -484,8 +456,9 @@ To help test out the memory, we remove the 4 stacked frames from the observation To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command: - - +``` title="benchmark/ppo.sh" linenums="1" +--8<-- "benchmark/ppo.sh:47:52" +``` Below are the average episodic returns for `ppo_atari_lstm.py`. To ensure the quality of the implementation, we compared the results against `openai/baselies`' PPO. @@ -499,14 +472,12 @@ Below are the average episodic returns for `ppo_atari_lstm.py`. To ensure the qu Learning curves: -
- - - - - -
+``` title="benchmark/ppo_plot.sh" linenums="1" +--8<-- "benchmark/ppo_plot.sh:11:19" +``` + + Tracked experiments and game play videos: @@ -568,34 +539,22 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command: - - - -Below are the average episodic returns for `ppo_atari_envpool.py`. Notice it has the same sample efficiency as `ppo_atari.py`, but runs about 3x faster. - - - -| Environment | `ppo_atari_envpool.py` (~80 mins) | `ppo_atari.py` (~220 mins) -| ----------- | ----------- | ----------- | -| BreakoutNoFrameskip-v4 | 389.57 ± 29.62 | 416.31 ± 43.92 -| PongNoFrameskip-v4 | 20.55 ± 0.37 | 20.59 ± 0.35 -| BeamRiderNoFrameskip-v4 | 2039.83 ± 1146.62 | 2445.38 ± 528.91 +``` title="benchmark/ppo.sh" linenums="1" +--8<-- "benchmark/ppo.sh:58:63" +``` +{!benchmark/ppo_atari_envpool.md!} Learning curves: -
- - +``` title="benchmark/ppo_plot.sh" linenums="1" +--8<-- "benchmark/ppo_plot.sh:51:62" +``` - - - - - -
+ + Tracked experiments and game play videos: @@ -637,7 +596,7 @@ The [ppo_atari_envpool_xla_jax.py](https://github.com/vwxyzjn/cleanrl/blob/maste ```bash poetry install -E "envpool jax" - poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html poetry run python cleanrl/ppo_atari_envpool_xla_jax.py --help poetry run python cleanrl/ppo_atari_envpool_xla_jax.py --env-id Breakout-v5 ``` @@ -684,96 +643,25 @@ Additionally, we record the following metric: To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command: - +``` title="benchmark/ppo.sh" linenums="1" +--8<-- "benchmark/ppo.sh:69:74" +``` -Below are the average episodic returns for `ppo_atari_envpool_xla_jax.py`. Notice it has the same sample efficiency as `ppo_atari.py`, but runs about 3x faster. -???+ info +{!benchmark/ppo_atari_envpool_xla_jax.md!} - The following table and charts are generated by [atari_hns_new.py](https://github.com/openrlbenchmark/openrlbenchmark/blob/0c16fda7d7873143a632865010c74263ea487339/atari_hns_new.py), [ours_vs_baselines_hns.py](https://github.com/openrlbenchmark/openrlbenchmark/blob/0c16fda7d7873143a632865010c74263ea487339/ours_vs_baselines_hns.py), and [ours_vs_seedrl_hns.py](https://github.com/openrlbenchmark/openrlbenchmark/blob/0c16fda7d7873143a632865010c74263ea487339/ours_vs_seedrl_hns.py). +Learning curves: + +``` title="benchmark/ppo_plot.sh" linenums="1" +--8<-- "benchmark/ppo_plot.sh:64:85" +``` + + + + - -| Environment | CleanRL ppo_atari_envpool_xla_jax.py | openai/baselines' PPO | -|:--------------------|---------------------------------------:|------------------------:| -| Alien-v5 | 1744.76 | 1549.42 | -| Amidar-v5 | 617.137 | 546.406 | -| Assault-v5 | 5734.04 | 4050.78 | -| Asterix-v5 | 3341.9 | 3459.9 | -| Asteroids-v5 | 1669.3 | 1467.19 | -| Atlantis-v5 | 3.92929e+06 | 3.09748e+06 | -| BankHeist-v5 | 1192.68 | 1195.34 | -| BattleZone-v5 | 24937.9 | 20314.3 | -| BeamRider-v5 | 2447.84 | 2740.02 | -| Berzerk-v5 | 1082.72 | 887.019 | -| Bowling-v5 | 44.0681 | 62.2634 | -| Boxing-v5 | 92.0554 | 93.3596 | -| Breakout-v5 | 431.795 | 388.891 | -| Centipede-v5 | 2910.69 | 3688.16 | -| ChopperCommand-v5 | 5555.84 | 933.333 | -| CrazyClimber-v5 | 116114 | 111675 | -| Defender-v5 | 51439.2 | 50045.1 | -| DemonAttack-v5 | 22824.8 | 12173.9 | -| DoubleDunk-v5 | -8.56781 | -9 | -| Enduro-v5 | 1262.79 | 1061.12 | -| FishingDerby-v5 | 21.6222 | 23.8876 | -| Freeway-v5 | 33.1075 | 32.9167 | -| Frostbite-v5 | 904.346 | 924.5 | -| Gopher-v5 | 11369.6 | 2899.57 | -| Gravitar-v5 | 1141.95 | 870.755 | -| Hero-v5 | 24628.3 | 25984.5 | -| IceHockey-v5 | -4.91917 | -4.71505 | -| Jamesbond-v5 | 504.105 | 516.489 | -| Kangaroo-v5 | 7281.59 | 3791.5 | -| Krull-v5 | 9384.7 | 8672.95 | -| KungFuMaster-v5 | 26594.5 | 29116.1 | -| MontezumaRevenge-v5 | 0.240385 | 0 | -| MsPacman-v5 | 2461.62 | 2113.44 | -| NameThisGame-v5 | 5442.67 | 5713.89 | -| Phoenix-v5 | 14008.5 | 8693.21 | -| Pitfall-v5 | -0.0801282 | -1.47059 | -| Pong-v5 | 20.309 | 20.4043 | -| PrivateEye-v5 | 99.5283 | 21.2121 | -| Qbert-v5 | 16430.7 | 14283.4 | -| Riverraid-v5 | 8297.21 | 9267.48 | -| RoadRunner-v5 | 19342.2 | 40325 | -| Robotank-v5 | 15.45 | 16 | -| Seaquest-v5 | 1230.02 | 1754.44 | -| Skiing-v5 | -14684.3 | -13901.7 | -| Solaris-v5 | 2353.62 | 2088.12 | -| SpaceInvaders-v5 | 1162.16 | 1017.65 | -| StarGunner-v5 | 53535.9 | 40906 | -| Surround-v5 | -2.94558 | -6.08095 | -| Tennis-v5 | -15.0446 | -9.71429 | -| TimePilot-v5 | 6224.87 | 5775.53 | -| Tutankham-v5 | 238.419 | 197.929 | -| UpNDown-v5 | 430177 | 129459 | -| Venture-v5 | 0 | 115.278 | -| VideoPinball-v5 | 42975.3 | 32777.4 | -| WizardOfWor-v5 | 6247.83 | 5024.03 | -| YarsRevenge-v5 | 56696.7 | 8238.44 | -| Zaxxon-v5 | 6015.8 | 6379.79 | - - - -Median Human Normalized Score (HNS) compared to openai/baselines. - -![](../ppo/ppo_atari_envpool_xla_jax/hns_ppo_vs_baselines.svg) - - -Learning curves (left y-axis is the return and right y-axis is the human normalized score): - -![](../ppo/ppo_atari_envpool_xla_jax/hms_each_game.svg) - - -Percentage of human normalized score (HMS) for each game. -![](../ppo/ppo_atari_envpool_xla_jax/runset_0_hms_bar.svg) ???+ info @@ -812,7 +700,7 @@ The [ppo_atari_envpool_xla_jax_scan.py](https://github.com/vwxyzjn/cleanrl/blob/ ```bash poetry install -E "envpool jax" - poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html poetry run python cleanrl/ppo_atari_envpool_xla_jax_scan.py --help poetry run python cleanrl/ppo_atari_envpool_xla_jax_scan.py --env-id Breakout-v5 ``` @@ -839,15 +727,23 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command: - -Below are the average episodic returns for `ppo_atari_envpool_xla_jax_scan.py` in 3 atari games. It has the same sample efficiency as `ppo_atari_envpool_xla_jax.py`. +``` title="benchmark/ppo.sh" linenums="1" +--8<-- "benchmark/ppo.sh:80:85" +``` + + +{!benchmark/ppo_atari_envpool_xla_jax_scan.md!} -| | ppo_atari_envpool_xla_jax_scan ({'tag': ['pr-328'], 'user': ['51616']}) | ppo_atari_envpool_xla_jax ({'tag': ['pr-328'], 'user': ['51616']}) | baselines-ppo2-cnn ({}) | ppo_atari_envpool_xla_jax_truncation ({'user': ['costa-huang']}) | -|:-------------|:--------------------------------------------------------------------------|:---------------------------------------------------------------------|:--------------------------|:-------------------------------------------------------------------| -| BeamRider-v5 | 2899.62 ± 482.12 | 2222.09 ± 1047.86 | 2835.71 ± 387.92 | 3133.78 ± 293.02 | -| Breakout-v5 | 451.27 ± 45.52 | 424.97 ± 18.37 | 405.73 ± 11.47 | 465.90 ± 14.30 | -| Pong-v5 | 20.37 ± 0.20 | 20.59 ± 0.40 | 20.45 ± 0.81 | 20.62 ± 0.18 | + +Learning curves: + +``` title="benchmark/ppo_plot.sh" linenums="1" +--8<-- "benchmark/ppo_plot.sh:87:96" +``` + + + Learning curves: @@ -855,15 +751,15 @@ Learning curves: The trainig time of this variant and that of [ppo_atari_envpool_xla_jax.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_atari_envpool_xla_jax.py) are very similar but the compilation time is reduced significantly (see [vwxyzjn/cleanrl#328](https://github.com/vwxyzjn/cleanrl/pull/328#issuecomment-1340474894)). Note that the hardware also affects the speed in the learning curve below. Runs from [`costa-huang`](https://github.com/vwxyzjn/) (red) are slower from those of [`51616`](https://github.com/51616/) (blue and orange) because of hardware differences. -![](../ppo/ppo_atari_envpool_xla_jax_scan/compare.png) -![](../ppo/ppo_atari_envpool_xla_jax_scan/compare-time.png) + ![](../ppo/ppo_atari_envpool_xla_jax_scan/compare.png) + ![](../ppo/ppo_atari_envpool_xla_jax_scan/compare-time.png) + Tracked experiments: - ## `ppo_procgen.py` The [ppo_procgen.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_procgen.py) has the following features: @@ -908,8 +804,10 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command: - +``` title="benchmark/ppo.sh" linenums="1" +--8<-- "benchmark/ppo.sh:91:100" +``` We try to match the default setting in [openai/train-procgen](https://github.com/openai/train-procgen) except that we use the `easy` distribution mode and `total_timesteps=25e6` to save compute. Notice [openai/train-procgen](https://github.com/openai/train-procgen) has the following settings: @@ -921,24 +819,25 @@ Below are the average episodic returns for `ppo_procgen.py`. To ensure the quali | Environment | `ppo_procgen.py` | `openai/baselies`' PPO (Huang et al., 2022)[^1] | ----------- | ----------- | ----------- | -| StarPilot (easy) | 32.47 ± 11.21 | 33.97 ± 7.86 | -| BossFight (easy) | 9.63 ± 2.35 | 9.35 ± 2.04 | -| BigFish (easy) | 16.80 ± 9.49 | 20.06 ± 5.34 | - +| StarPilot (easy) | 30.99 ± 1.96 | 33.97 ± 7.86 | +| BossFight (easy) | 8.85 ± 0.33 | 9.35 ± 2.04 | +| BigFish (easy) | 16.46 ± 2.71 | 20.06 ± 5.34 | -???+ info - Note that we have run the procgen experiments using the `easy` distribution for reducing the computational cost. Learning curves: -
- +``` title="benchmark/ppo_plot.sh" linenums="1" +--8<-- "benchmark/ppo_plot.sh:98:106" +``` - + + - -
+ +???+ info + + Note that we have run the procgen experiments using the `easy` distribution for reducing the computational cost. Tracked experiments and game play videos: @@ -1010,13 +909,13 @@ See [related docs](/rl-algorithms/ppo/#explanation-of-the-logged-metrics) for `p [ppo_atari_multigpu.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_atari_multigpu.py) is based on `ppo_atari.py` (see its [related docs](/rl-algorithms/ppo/#implementation-details_1)). -We use [Pytorch's distributed API](https://pytorch.org/tutorials/intermediate/dist_tuto.html) to implement the data parallelism paradigm. The basic idea is that the user can spawn $N$ processes each holding a copy of the model, step the environments, and averages their gradients together for the backward pass. Here are a few note-worthy implementation details. +We use [Pytorch's distributed API](https://pytorch.org/tutorials/intermediate/dist_tuto.html) to implement the data parallelism paradigm. The basic idea is that the user can spawn $N$ processes each running a copy of `ppo_atari.py`, holding a copy of the model, stepping the environments, and averaging their gradients together for the backward pass. Here are a few note-worthy implementation details. -1. **Shard the environments**: by default, `ppo_atari_multigpu.py` uses `--num-envs=8`. When calling `torchrun --standalone --nnodes=1 --nproc_per_node=2 cleanrl/ppo_atari_multigpu.py --env-id BreakoutNoFrameskip-v4`, it spawns $N=2$ (by `--nproc_per_node=2`) subprocesses and shard the environments across these 2 subprocesses. In particular, each subprocess will have `8/2=4` environments. Implementation wise, we do `args.num_envs = int(args.num_envs / world_size)`. Here `world_size=2` refers to the size of the **world**, which means the group of subprocesses. We also need to adjust various variables as follows: - * **batch size**: by default it is `(num_envs * num_steps) = 8 * 128 = 1024` and we adjust it to `(num_envs / world_size * num_steps) = (4 * 128) = 512`. - * **minibatch size**: by default it is `(num_envs * num_steps) / num_minibatches = (8 * 128) / 4 = 256` and we adjust it to `(num_envs / world_size * num_steps) / num_minibatches = (4 * 128) / 4 = 128`. - * **number of updates**: by default it is `total_timesteps // batch_size = 10000000 // (8 * 128) = 9765` and we adjust it to `total_timesteps // (batch_size * world_size) = 10000000 // (8 * 128 * 2) = 4882`. - * **global step increment**: by default it is `num_envs` and we adjust it to `num_envs * world_size`. +1. **Local versus global parameters**: All of the parameters in `ppo_atari.py` are global (such as batch size), but in `ppo_atari_multigpu.py` we have local parameters as well. Say we run `torchrun --standalone --nnodes=1 --nproc_per_node=2 cleanrl/ppo_atari_multigpu.py --env-id BreakoutNoFrameskip-v4 --local-num-envs=4`; here are how all multi-gpu related parameters are adjusted: + * **number of environments**: `num_envs = local_num_envs * world_size = 4 * 2 = 8` + * **batch size**: `local_batch_size = local_num_envs * num_steps = 4 * 128 = 512`, `batch_size = num_envs * num_steps) = 8 * 128 = 1024` + * **minibatch size**: `local_minibatch_size = int(args.local_batch_size // args.num_minibatches) = 512 // 4 = 128`, `minibatch_size = int(args.batch_size // args.num_minibatches) = 1024 // 4 = 256` + * **number of updates**: `num_iterations = args.total_timesteps // args.batch_size = 10000000 // 1024 = 9765` 1. **Adjust seed per process**: we need be very careful with seeding: we could have used the exact same seed for each subprocess. To ensure this does not happen, we do the following ```python hl_lines="2 5 16" @@ -1070,100 +969,6 @@ We use [Pytorch's distributed API](https://pytorch.org/tutorials/intermediate/di -We can see how `ppo_atari_multigpu.py` can result in no loss of sample efficiency. In this example, the `ppo_atari.py`'s minibatch size is `256` and the `ppo_atari_multigpu.py`'s minibatch size is `128` with world size 2. Because we average gradient across the world, the gradient under `ppo_atari_multigpu.py` should be virtually the same as the gradient under `ppo_atari.py`. - - - - ### Experiment results @@ -1171,35 +976,37 @@ We can see how `ppo_atari_multigpu.py` can result in no loss of sample efficienc To run benchmark experiments, see :material-github: [benchmark/ppo.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/ppo.sh). Specifically, execute the following command: - +``` title="benchmark/ppo.sh" linenums="1" +--8<-- "benchmark/ppo.sh:102:107" +``` Below are the average episodic returns for `ppo_atari_multigpu.py`. To ensure no loss of sample efficiency, we compared the results against `ppo_atari.py`. -| Environment | `ppo_atari_multigpu.py` (in ~160 mins) | `ppo_atari.py` (in ~215 mins) -| ----------- | ----------- | ----------- | -| BreakoutNoFrameskip-v4 | 429.06 ± 52.09 | 416.31 ± 43.92 | -| PongNoFrameskip-v4 | 20.40 ± 0.46 | 20.59 ± 0.35 | -| BeamRiderNoFrameskip-v4 | 2454.54 ± 740.49 | 2445.38 ± 528.91 | + +{!benchmark/ppo_atari_multigpu.md!} Learning curves: -
- - +``` title="benchmark/ppo_plot.sh" linenums="1" +--8<-- "benchmark/ppo_plot.sh:108:117" +``` + + + - - - - -
Under the same hardware, we see that `ppo_atari_multigpu.py` is about **30% faster** than `ppo_atari.py` with no loss of sample efficiency. +???+ info + + The experiments above is to show correctness -- we show that by aligning the same hyperparameters of `ppo_atari.py` and `ppo_atari_multigpu.py`, we can achieve the same sample efficiency. However, we can train even faster by simply running a much larger batch size. For example, we can run `torchrun --standalone --nnodes=1 --nproc_per_node=8 cleanrl/ppo_atari_multigpu.py --env-id BreakoutNoFrameskip-v4 --local-num-envs=8`, which will run 8 x 8 = 64 environments in parallel and achieve a batch size of 64 x 128 = 8192. This will likely result in a sample efficiency but should increase the wall time efficiency. + + ???+ info Although `ppo_atari_multigpu.py` is 30% faster than `ppo_atari.py`, `ppo_atari_multigpu.py` is still slower than `ppo_atari_envpool.py`, as shown below. This comparison really highlights the different kinds of optimization possible. diff --git a/docs/rl-algorithms/qdagger.md b/docs/rl-algorithms/qdagger.md index 678c73818..bdb9f2690 100644 --- a/docs/rl-algorithms/qdagger.md +++ b/docs/rl-algorithms/qdagger.md @@ -143,7 +143,7 @@ The [qdagger_dqn_atari_jax_impalacnn.py](https://github.com/vwxyzjn/cleanrl/blob ```bash poetry install -E "atari jax" - poetry run pip install --upgrade "jax[cuda]==0.3.17" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + poetry run pip install --upgrade "jax[cuda11_cudnn82]==0.4.8" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html poetry run python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --env-id BreakoutNoFrameskip-v4 poetry run python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --env-id PongNoFrameskip-v4 ``` diff --git a/docs/rl-algorithms/sac.md b/docs/rl-algorithms/sac.md index 0bbc119dd..1594ac2c5 100644 --- a/docs/rl-algorithms/sac.md +++ b/docs/rl-algorithms/sac.md @@ -200,11 +200,13 @@ CleanRL's [`sac_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/m 3. [`sac_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/sac_continuous_action.py) uses `--batch-size=256` while :material-github: [openai/spinningup](https://github.com/openai/spinningup/blob/038665d62d569055401d91856abb287263096178/spinup/algos/tf1/sac/sac.py#L44)'s uses `--batch-size=100` by default. -### Pybullet experiment results for SAC +### Experiment results To run benchmark experiments, see :material-github: [benchmark/sac.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/sac.sh). Specifically, execute the following command: - +``` title="benchmark/sac.sh" linenums="1" +--8<-- "benchmark/sac.sh::7" +``` The table below compares the results of CleanRL's [`sac_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/sac_continuous_action.py) with the [latest published results](https://arxiv.org/abs/1812.05905) by the original authors of the SAC algorithm. @@ -213,19 +215,22 @@ The table below compares the results of CleanRL's [`sac_continuous_action.py`](h | Environment | [`sac_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/sac_continuous_action.py) |[SAC: Algorithms and Applications](https://arxiv.org/abs/1812.05905) @ 1M steps| | --------------- | ------------------ | ---------------- | -| HalfCheetah-v2 | 10310.37 ± 1873.21 | ~11,250 | -| Walker2d-v2 | 4418.15 ± 592.82 | ~4,800 | -| Hopper-v2 | 2685.76 ± 762.16 | ~3,250 | +| HalfCheetah-v2 | 9634.89 ± 1423.73 | ~11,250 | +| Walker2d-v2 | 3591.45 ± 911.33 | ~4,800 | +| Hopper-v2 | 2310.46 ± 342.82 | ~3,250 | +| InvertedPendulum-v4 | 909.37 ± 55.66 | N/A | +| Humanoid-v4 | 4996.29 ± 686.40 | ~4500 +| Pusher-v4 | -22.45 ± 0.51 | N/A | Learning curves: -
- - - -
+``` title="benchmark/sac_plot.sh" linenums="1" +--8<-- "benchmark/sac_plot.sh::9" +``` -
+ + + Tracked experiments and game play videos: diff --git a/docs/rl-algorithms/td3.md b/docs/rl-algorithms/td3.md index e1d595f11..6bf4494f9 100644 --- a/docs/rl-algorithms/td3.md +++ b/docs/rl-algorithms/td3.md @@ -42,8 +42,6 @@ The [td3_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/cl poetry install -E mujoco poetry run python cleanrl/td3_continuous_action.py --help poetry run python cleanrl/td3_continuous_action.py --env-id Hopper-v4 - poetry install -E mujoco_py # only works in Linux - poetry run python cleanrl/td3_continuous_action.py --env-id Hopper-v2 ``` === "pip" @@ -52,8 +50,6 @@ The [td3_continuous_action.py](https://github.com/vwxyzjn/cleanrl/blob/master/cl pip install -r requirements/requirements-mujoco.txt python cleanrl/td3_continuous_action.py --help python cleanrl/td3_continuous_action.py --env-id Hopper-v4 - pip install -r requirements/requirements-mujoco_py.txt # only works in Linux, - python cleanrl/td3_continuous_action.py --env-id Hopper-v2 ``` ### Explanation of the logged metrics @@ -128,25 +124,27 @@ Additionally, when drawing exploration noise that is added to the actions produc To run benchmark experiments, see :material-github: [benchmark/td3.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/td3.sh). Specifically, execute the following command: - +``` title="benchmark/td3.sh" linenums="1" +--8<-- "benchmark/td3.sh::7" +``` Below are the average episodic returns for [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) (3 random seeds). To ensure the quality of the implementation, we compared the results against (Fujimoto et al., 2018)[^2]. | Environment | [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) | [`TD3.py`](https://github.com/sfujim/TD3/blob/master/TD3.py) (Fujimoto et al., 2018, Table 1)[^2] | | ----------- | ----------- | ----------- | -| HalfCheetah | 9449.94 ± 1586.49 |9636.95 ± 859.065 | -| Walker2d | 3851.55 ± 335.29 | 4682.82 ± 539.64 | -| Hopper | 3162.21 ± 261.08 | 3564.07 ± 114.74 | -| Humanoid | 5011.05 ± 254.89 | not available | -| Pusher | -37.49 ± 10.22 | not available | -| InvertedPendulum | 996.81 ± 4.50 | 1000.00 ± 0.00 | +| HalfCheetah-v4 | 9583.22 ± 126.09 |9636.95 ± 859.065 | +| Walker2d-v4 | 4057.59 ± 658.78 | 4682.82 ± 539.64 | +| Hopper-v4 | 3134.61 ± 360.18 | 3564.07 ± 114.74 | +| InvertedPendulum-v4 | 968.99 ± 25.80 | 1000.00 ± 0.00 | +| Humanoid-v4 | 5035.36 ± 21.67 | not available | +| Pusher-v4 | -30.92 ± 1.05 | not available | ???+ info - Note that [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) uses gym MuJoCo v2 environments while [`TD3.py`](https://github.com/sfujim/TD3/blob/master/TD3.py) (Fujimoto et al., 2018)[^2] uses the gym MuJoCo v1 environments. According to the :material-github: [openai/gym#834](https://github.com/openai/gym/pull/834), gym MuJoCo v2 environments should be equivalent to the gym MuJoCo v1 environments. + Note that [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) uses gym MuJoCo v4 environments while [`TD3.py`](https://github.com/sfujim/TD3/blob/master/TD3.py) (Fujimoto et al., 2018)[^2] uses the gym MuJoCo v1 environments. Also note the performance of our `td3_continuous_action.py` seems to be worse than the reference implementation on Walker2d. This is likely due to :material-github: [openai/gym#938](https://github.com/openai/baselines/issues/938). We would have a hard time reproducing gym MuJoCo v1 environments because they have been long deprecated. @@ -154,20 +152,12 @@ Below are the average episodic returns for [`td3_continuous_action.py`](https:// Learning curves: -
- - - - - - - - - - - -
+``` title="benchmark/td3_plot.sh" linenums="1" +--8<-- "benchmark/td3_plot.sh::9" +``` + + Tracked experiments and game play videos: @@ -194,8 +184,6 @@ The [td3_continuous_action_jax.py](https://github.com/vwxyzjn/cleanrl/blob/maste poetry install -E "mujoco jax" poetry run python cleanrl/td3_continuous_action_jax.py --help poetry run python cleanrl/td3_continuous_action_jax.py --env-id Hopper-v4 - poetry install -E mujoco_py # only works in Linux - poetry run python cleanrl/td3_continuous_action_jax.py --env-id Hopper-v2 ``` === "pip" @@ -205,8 +193,6 @@ The [td3_continuous_action_jax.py](https://github.com/vwxyzjn/cleanrl/blob/maste pip install -r requirements/requirements-jax.txt python cleanrl/td3_continuous_action_jax.py --help python cleanrl/td3_continuous_action_jax.py --env-id Hopper-v4 - pip install -r requirements/requirements-mujoco_py.txt # only works in Linux - python cleanrl/td3_continuous_action_jax.py --env-id Hopper-v2 ``` ### Explanation of the logged metrics @@ -223,42 +209,53 @@ See [related docs](/rl-algorithms/td3/#implementation-details) for `td3_continuo To run benchmark experiments, see :material-github: [benchmark/td3.sh](https://github.com/vwxyzjn/cleanrl/blob/master/benchmark/td3.sh). Specifically, execute the following command: - +``` title="benchmark/td3.sh" linenums="1" +--8<-- "benchmark/td3.sh:12:19" +``` -Below are the average episodic returns for [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) (3 random seeds). To ensure the quality of the implementation, we compared the results against (Fujimoto et al., 2018)[^2]. +Below are the average episodic returns for [`td3_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action_jax.py) (3 random seeds). -| Environment | [`td3_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action_jax.py) (RTX 3060 TI) | [`td3_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action_jax.py) (VM w/ TPU) | [`td3_continuous_action.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action.py) (RTX 3060 TI) | [`TD3.py`](https://github.com/sfujim/TD3/blob/master/TD3.py) (Fujimoto et al., 2018, Table 1)[^2] | -| ----------- | ----------- | ----------- | ----------- | ----------- | -| HalfCheetah | 9408.62 ± 473.23 | 8948.33 ± 1196.87 | 9449.94 ± 1586.49 |9636.95 ± 859.065 | -| Walker2d | 3512.14 ± 1576.59 | 4107.63 ± 173.93 | 3851.55 ± 335.29 | 4682.82 ± 539.64 | -| Hopper | 2898.62 ± 485.18 | 3151.80 ± 458.68 | 3162.21 ± 261.08 | 3564.07 ± 114.74 | +{!benchmark/td3.md!} +Learning curves: + + +``` title="benchmark/td3_plot.sh" linenums="1" +--8<-- "benchmark/td3_plot.sh:11:20" +``` + + + ???+ info + These are some previous experiments with TPUs. Note the results are very similar to the ones above, but the runtime can be different due to different hardware used. + Note that the experiments were conducted on different hardwares, so your mileage might vary. This inconsistency is because 1) re-running expeirments on the same hardware is computationally expensive and 2) requiring the same hardware is not inclusive nor feasible to other contributors who might have different hardwares. - That said, we roughly expect to see a 2-4x speed improvement from using [`td3_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action_jax.py) under the same hardware. And if you disable the `--capture-video` overhead, the speed improvement will be even higher. + That said, we roughly expect to see a 2-4x speed improvement from using [`td3_continuous_action_jax.py`](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/td3_continuous_action_jax.py) under the same hardware. And if you disable the `--capture_video` overhead, the speed improvement will be even higher. + Learning curves: -Learning curves: -
- - +
+ + - - + + - - -
+ + +
-Tracked experiments and game play videos: + Tracked experiments and game play videos: + + + - [^1]:Lillicrap, T.P., Hunt, J.J., Pritzel, A., Heess, N.M., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2016). Continuous control with deep reinforcement learning. CoRR, abs/1509.02971. https://arxiv.org/abs/1509.02971 diff --git a/poetry.lock b/poetry.lock index 21c891792..f30baf28c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "absl-py" version = "1.4.0" description = "Abseil Python Common Libraries, see https://github.com/abseil/abseil-py." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -12,59 +11,47 @@ files = [ {file = "absl_py-1.4.0-py3-none-any.whl", hash = "sha256:0d3fe606adfa4f7db64792dd4c7aee4ee0c38ab75dfd353b7a83ed3e957fcb47"}, ] -[[package]] -name = "aiosignal" -version = "1.3.1" -description = "aiosignal: a list of registered asynchronous callbacks" -category = "dev" -optional = false -python-versions = ">=3.7" -files = [ - {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, - {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, -] - -[package.dependencies] -frozenlist = ">=1.1.0" - [[package]] name = "ale-py" -version = "0.7.4" +version = "0.8.1" description = "The Arcade Learning Environment (ALE) - a platform for AI research." -category = "main" optional = true python-versions = ">=3.7" files = [ - {file = "ale_py-0.7.4-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:418eea1539c2669c799274fedead4d44d05dfc3dcd6c536378d5984c42bc340b"}, - {file = "ale_py-0.7.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:38e4823be04761a2ebc0167ed710a318cc9f0fec3815576c45030fe8e67f9c98"}, - {file = "ale_py-0.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9af49488ec1b4facb299975a665e9e706921dd2d756daad813e2897debc5fc3c"}, - {file = "ale_py-0.7.4-cp310-cp310-win_amd64.whl", hash = "sha256:f600c55d6a7c6c30f5592b30afc34366101fc7561842bdd5740d5bca390201eb"}, - {file = "ale_py-0.7.4-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:da3e1400e02fb46659dfb3af92e8a4cf4c5b2d4f9d19a008ce9d5fa8eebb4ab6"}, - {file = "ale_py-0.7.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c073005b68901f0003ffe871d56021245eda9e88f27cc91745627c099932499f"}, - {file = "ale_py-0.7.4-cp37-cp37m-win_amd64.whl", hash = "sha256:913394ad1dbe22a8d489378d702f296234721ca0a0e76e5354764e8bf40bc623"}, - {file = "ale_py-0.7.4-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:4841f395e3166d4a7b1e9207cafab08de4b9e9b4178afd97a36f53844ade98a2"}, - {file = "ale_py-0.7.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5b2899b4cf659bc14a20047455e681e991cb96ceed937d22a5dac1a97a16bf3e"}, - {file = "ale_py-0.7.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9aff7a8ce37d00a87ef4114666db0b45d499744d08f5ff1683dbbbcac4783569"}, - {file = "ale_py-0.7.4-cp38-cp38-win_amd64.whl", hash = "sha256:a23f4c858a2c5cbfa3c0cb2c9ab167359c368104b67e19b332710c19b43c6091"}, - {file = "ale_py-0.7.4-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:0b9ab62f12a325e92ba2af99c5b231ad3b219a46913b14068c857d37837025fb"}, - {file = "ale_py-0.7.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:269dcf94024ba7a8276d4dcf04c526df695cb383aa2372e9903a08ec6f679262"}, - {file = "ale_py-0.7.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f65371c180779b115d8600d99780e9e83b229812e94c6b49be1686ce4d82573"}, - {file = "ale_py-0.7.4-cp39-cp39-win_amd64.whl", hash = "sha256:b53e7d0c8f8e8610ebaec88887da2427ce16811f9697ccbb39588ec784bea145"}, + {file = "ale_py-0.8.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:b2aa2f69a4169742800615970efe6914fa856e33eaf7fa9133c0e06a617a80e2"}, + {file = "ale_py-0.8.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f2f6b92c8fd6189654979bbf0b305dbe0ecf82176c47f244d8c1cbc36286b89"}, + {file = "ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9b168eb88c87d0f3e2a778e6c5cdde4ad951d1ca8a6dc3d3679fd45398df7d1"}, + {file = "ale_py-0.8.1-cp310-cp310-win_amd64.whl", hash = "sha256:5fcc31f495de79ee1d6bfc0f4b7c4619948851e679bbf010035e25f23146a687"}, + {file = "ale_py-0.8.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:0856ca777473ec4ae8a59f3af9580259adb0fd4a47d586a125a440c62e82fc10"}, + {file = "ale_py-0.8.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f10b1df8774bbe3b00365748b5e0e07cf35f6a703bbaff991bc7b3b2247dccc9"}, + {file = "ale_py-0.8.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0006d80dfe7745eb5a93444492337203c8bc7eb594a2c24c6a651c5c5b0eaf09"}, + {file = "ale_py-0.8.1-cp311-cp311-win_amd64.whl", hash = "sha256:9773eea7505484e024beb2fff0f3bfd363db151bdb9799d70995448e196b1ded"}, + {file = "ale_py-0.8.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:87557db05be0e04130e2ec1bf909d3bb0b0bc034645d4f664e6baa573fe32191"}, + {file = "ale_py-0.8.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae2ba24557e0ce541ea3be13b148db2a9cfa730d83537b4cbed5e10449826e51"}, + {file = "ale_py-0.8.1-cp37-cp37m-win_amd64.whl", hash = "sha256:ade5c32af567629164a6b49378978c728a15dc4db07ad6b679e8832d4fd3ea1f"}, + {file = "ale_py-0.8.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:0ffecb5c956749596030e464827642945162170a132d093c3d4fa2d7e5725c18"}, + {file = "ale_py-0.8.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7cd74b7ee0248ef11a086c9764e142e71defd40ec8989a99232bfd2d9e8023be"}, + {file = "ale_py-0.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eadf9f3990b4ff2f9e5ca35889f5e2e95cddd6a353d9d857d9b4601a6e1c4e7c"}, + {file = "ale_py-0.8.1-cp38-cp38-win_amd64.whl", hash = "sha256:817adf9a3a82c4923c731e634520a5ecf296aca0367f5c69959a96b32119d831"}, + {file = "ale_py-0.8.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:2d9fcfa06c74a613c5419e942ef4d3e0959533f52e94d2d4bda61d07fbfffeee"}, + {file = "ale_py-0.8.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f278036f9b6066062abcdf0987a0ec5a8e0f22a2c7cfac925e39378d4343d490"}, + {file = "ale_py-0.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b00f74e27815131c1a2791f3d48114363fa2708e19f09ce6b7b614cb14c9d469"}, + {file = "ale_py-0.8.1-cp39-cp39-win_amd64.whl", hash = "sha256:d49b550a2d9c25b63c343aa680fd81f253a3714cdc0e1835640933ebff1798ff"}, ] [package.dependencies] importlib-metadata = {version = ">=4.10.0", markers = "python_version < \"3.10\""} importlib-resources = "*" numpy = "*" +typing-extensions = {version = "*", markers = "python_version < \"3.11\""} [package.extras] -test = ["gym", "pytest"] +test = ["gym (>=0.23,<1.0)", "pytest (>=7.0)"] [[package]] name = "alembic" version = "1.10.4" description = "A database migration tool for SQLAlchemy." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -82,22 +69,10 @@ typing-extensions = ">=4" [package.extras] tz = ["python-dateutil"] -[[package]] -name = "antlr4-python3-runtime" -version = "4.9.3" -description = "ANTLR 4.9.3 runtime for Python 3.7" -category = "dev" -optional = false -python-versions = "*" -files = [ - {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"}, -] - [[package]] name = "appdirs" version = "1.4.4" description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "main" optional = false python-versions = "*" files = [ @@ -105,33 +80,10 @@ files = [ {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, ] -[[package]] -name = "attrs" -version = "23.1.0" -description = "Classes Without Boilerplate" -category = "dev" -optional = false -python-versions = ">=3.7" -files = [ - {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, - {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, -] - -[package.dependencies] -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} - -[package.extras] -cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] -dev = ["attrs[docs,tests]", "pre-commit"] -docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] -tests = ["attrs[tests-no-zope]", "zope-interface"] -tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] - [[package]] name = "autorom" version = "0.4.2" description = "Automated installation of Atari ROMs for Gym/ALE-Py" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -153,7 +105,6 @@ accept-rom-license = ["AutoROM.accept-rom-license"] name = "autorom-accept-rom-license" version = "0.6.1" description = "Automated installation of Atari ROMs for Gym/ALE-Py" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -170,29 +121,27 @@ tests = ["ale_py", "multi_agent_ale_py"] [[package]] name = "awscli" -version = "1.27.132" +version = "1.31.0" description = "Universal Command Line Environment for AWS." -category = "main" optional = true python-versions = ">= 3.7" files = [ - {file = "awscli-1.27.132-py3-none-any.whl", hash = "sha256:85e183827e2f89f7de2d3ccea2f4cf10dc48c763fabec1fd11546eca9324fd0e"}, - {file = "awscli-1.27.132.tar.gz", hash = "sha256:cf1464b9dc1ffdee6c1e5f8e33cb9f5d0c7e1feb1e7638c8b437df3f6f40f186"}, + {file = "awscli-1.31.0-py3-none-any.whl", hash = "sha256:182499f95fd3a6bf7d6ebd72ee68609990008c64a3646161b80023d3c9e42e95"}, + {file = "awscli-1.31.0.tar.gz", hash = "sha256:6e8d396a8fb95fcdb8d2713153596ce0d8d4a1f62ab9e365e832e10f78f4237e"}, ] [package.dependencies] -botocore = "1.29.132" +botocore = "1.33.0" colorama = ">=0.2.5,<0.4.5" docutils = ">=0.10,<0.17" -PyYAML = ">=3.10,<5.5" +PyYAML = ">=3.10,<6.1" rsa = ">=3.1.2,<4.8" -s3transfer = ">=0.6.0,<0.7.0" +s3transfer = ">=0.8.0,<0.9.0" [[package]] name = "bitmath" version = "1.3.3.1" description = "Pythonic module for representing and manipulating file sizes with different prefix notations (file size unit conversion)" -category = "main" optional = true python-versions = "*" files = [ @@ -201,49 +150,49 @@ files = [ [[package]] name = "boto3" -version = "1.26.132" +version = "1.33.0" description = "The AWS SDK for Python" -category = "main" optional = true python-versions = ">= 3.7" files = [ - {file = "boto3-1.26.132-py3-none-any.whl", hash = "sha256:e579b70028cdc4194fe92c745256b04880e7db39259a4c8a61b71117713d3c17"}, - {file = "boto3-1.26.132.tar.gz", hash = "sha256:d45672571da9bf4ba130d525832013aef95aee83b1711e847ef7cdb54cc5ac41"}, + {file = "boto3-1.33.0-py3-none-any.whl", hash = "sha256:799fe8399ea132aa5aa868caf78c47ef9ed675d5ef61be97cb7131081bb8a861"}, + {file = "boto3-1.33.0.tar.gz", hash = "sha256:ebf6d86217c37986f965dbe35a3bbd0318127d23a65737ab6486667496decb54"}, ] [package.dependencies] -botocore = ">=1.29.132,<1.30.0" +botocore = ">=1.33.0,<1.34.0" jmespath = ">=0.7.1,<2.0.0" -s3transfer = ">=0.6.0,<0.7.0" +s3transfer = ">=0.8.0,<0.9.0" [package.extras] crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.29.132" +version = "1.33.0" description = "Low-level, data-driven core of boto 3." -category = "main" optional = true python-versions = ">= 3.7" files = [ - {file = "botocore-1.29.132-py3-none-any.whl", hash = "sha256:422186c13406a2c2668e4b2d9070097b4b024a9290a6af2a8e21eb2bd17322d6"}, - {file = "botocore-1.29.132.tar.gz", hash = "sha256:9b6d2b60325b815ff9123f172af83b7b866c8813088d969eeb9030fa189417f6"}, + {file = "botocore-1.33.0-py3-none-any.whl", hash = "sha256:ccf3d67fd046265ae73bc9862d1618c6e774a61a96beac832edb63d9a21fe1ba"}, + {file = "botocore-1.33.0.tar.gz", hash = "sha256:e35526421fe8ee180b6aed3102929594aa51e4d60e3f29366a603707c37c0d52"}, ] [package.dependencies] jmespath = ">=0.7.1,<2.0.0" python-dateutil = ">=2.1,<3.0.0" -urllib3 = ">=1.25.4,<1.27" +urllib3 = [ + {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, + {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""}, +] [package.extras] -crt = ["awscrt (==0.16.9)"] +crt = ["awscrt (==0.19.17)"] [[package]] name = "bottle" version = "0.12.25" description = "Fast and simple WSGI-framework for small web-applications." -category = "main" optional = true python-versions = "*" files = [ @@ -255,7 +204,6 @@ files = [ name = "cached-property" version = "1.5.2" description = "A decorator for caching properties in classes." -category = "main" optional = true python-versions = "*" files = [ @@ -267,7 +215,6 @@ files = [ name = "cachetools" version = "5.3.0" description = "Extensible memoizing collections and decorators" -category = "main" optional = false python-versions = "~=3.7" files = [ @@ -279,7 +226,6 @@ files = [ name = "certifi" version = "2023.5.7" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -291,7 +237,6 @@ files = [ name = "cffi" version = "1.15.1" description = "Foreign Function Interface for Python calling C code." -category = "main" optional = true python-versions = "*" files = [ @@ -368,7 +313,6 @@ pycparser = "*" name = "cfgv" version = "3.3.1" description = "Validate configuration and produce human readable error messages." -category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -380,7 +324,6 @@ files = [ name = "chardet" version = "4.0.0" description = "Universal encoding detector for Python 2 and 3" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -392,7 +335,6 @@ files = [ name = "charset-normalizer" version = "3.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -477,7 +419,6 @@ files = [ name = "chex" version = "0.1.5" description = "Chex: Testing made fun, in JAX!" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -497,7 +438,6 @@ toolz = ">=0.9.0" name = "click" version = "8.1.3" description = "Composable command line interface toolkit" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -507,13 +447,11 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} [[package]] name = "cloudpickle" version = "2.2.1" description = "Extended pickling support for Python objects" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -523,14 +461,13 @@ files = [ [[package]] name = "cmaes" -version = "0.9.1" +version = "0.10.0" description = "Lightweight Covariance Matrix Adaptation Evolution Strategy (CMA-ES) implementation for Python 3." -category = "main" optional = true python-versions = ">=3.7" files = [ - {file = "cmaes-0.9.1-py3-none-any.whl", hash = "sha256:6e2930b6a99dd94621bf62966c13d29e6a7f90a909b4e4266010d5f3a7fb74b8"}, - {file = "cmaes-0.9.1.tar.gz", hash = "sha256:d122f8d46377f643a150c85ffc81c4e33909a34cfdcb522ee7a6fb17ea4f232c"}, + {file = "cmaes-0.10.0-py3-none-any.whl", hash = "sha256:72cea747ad37b1780b0eb6f3c098cee33907fafbf6690c0c02db1e010cab72f6"}, + {file = "cmaes-0.10.0.tar.gz", hash = "sha256:48afc70df027114739872b50489ae6b32461c307b92d084a63c7090a9742faf9"}, ] [package.dependencies] @@ -543,7 +480,6 @@ cmawm = ["scipy"] name = "colorama" version = "0.4.4" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -555,7 +491,6 @@ files = [ name = "colorlog" version = "6.7.0" description = "Add colours to the output of Python's logging module." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -573,7 +508,6 @@ development = ["black", "flake8", "mypy", "pytest", "types-colorama"] name = "commonmark" version = "0.9.1" description = "Python parser for the CommonMark Markdown spec" -category = "main" optional = false python-versions = "*" files = [ @@ -588,7 +522,6 @@ test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] name = "cycler" version = "0.11.0" description = "Composable style cycles" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -596,61 +529,10 @@ files = [ {file = "cycler-0.11.0.tar.gz", hash = "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"}, ] -[[package]] -name = "cython" -version = "0.29.34" -description = "The Cython compiler for writing C extensions for the Python language." -category = "main" -optional = true -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "Cython-0.29.34-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:742544024ddb74314e2d597accdb747ed76bd126e61fcf49940a5b5be0a8f381"}, - {file = "Cython-0.29.34-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:03daae07f8cbf797506446adae512c3dd86e7f27a62a541fa1ee254baf43e32c"}, - {file = "Cython-0.29.34-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5a8de3e793a576e40ca9b4f5518610cd416273c7dc5e254115656b6e4ec70663"}, - {file = "Cython-0.29.34-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:60969d38e6a456a67e7ef8ae20668eff54e32ba439d4068ccf2854a44275a30f"}, - {file = "Cython-0.29.34-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:21b88200620d80cfe193d199b259cdad2b9af56f916f0f7f474b5a3631ca0caa"}, - {file = "Cython-0.29.34-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:308c8f1e58bf5e6e8a1c4dcf8abbd2d13d0f9b1e582f4d9ae8b89857342d8bb5"}, - {file = "Cython-0.29.34-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:d8f822fb6ecd5d88c42136561f82960612421154fc5bf23c57103a367bb91356"}, - {file = "Cython-0.29.34-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:56866323f1660cecb4d5ff3a1fba92a56b91b7cfae0a8253777aa4bdb3bdf9a8"}, - {file = "Cython-0.29.34-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:e971db8aeb12e7c0697cefafe65eefcc33ff1224ae3d8c7f83346cbc42c6c270"}, - {file = "Cython-0.29.34-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e4401270b0dc464c23671e2e9d52a60985f988318febaf51b047190e855bbe7d"}, - {file = "Cython-0.29.34-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:dce0a36d163c05ae8b21200059511217d79b47baf2b7b0f926e8367bd7a3cc24"}, - {file = "Cython-0.29.34-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dbd79221869ee9a6ccc4953b2c8838bb6ae08ab4d50ea4b60d7894f03739417b"}, - {file = "Cython-0.29.34-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a0f4229df10bc4545ebbeaaf96ebb706011d8b333e54ed202beb03f2bee0a50e"}, - {file = "Cython-0.29.34-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fd1ea21f1cebf33ae288caa0f3e9b5563a709f4df8925d53bad99be693fc0d9b"}, - {file = "Cython-0.29.34-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:d7ef5f68f4c5baa93349ea54a352f8716d18bee9a37f3e93eff38a5d4e9b7262"}, - {file = "Cython-0.29.34-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:459994d1de0f99bb18fad9f2325f760c4b392b1324aef37bcc1cd94922dfce41"}, - {file = "Cython-0.29.34-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:1d6c809e2f9ce5950bbc52a1d2352ef3d4fc56186b64cb0d50c8c5a3c1d17661"}, - {file = "Cython-0.29.34-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f674ceb5f722d364395f180fbac273072fc1a266aab924acc9cfd5afc645aae1"}, - {file = "Cython-0.29.34-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9489de5b2044dcdfd9d6ca8242a02d560137b3c41b1f5ae1c4f6707d66d6e44d"}, - {file = "Cython-0.29.34-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:5c121dc185040f4333bfded68963b4529698e1b6d994da56be32c97a90c896b6"}, - {file = "Cython-0.29.34-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:b6149f7cc5b31bccb158c5b968e5a8d374fdc629792e7b928a9b66e08b03fca5"}, - {file = "Cython-0.29.34-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0ab3cbf3d62b0354631a45dc93cfcdf79098663b1c65a6033af4a452b52217a7"}, - {file = "Cython-0.29.34-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:4a2723447d1334484681d5aede34184f2da66317891f94b80e693a2f96a8f1a7"}, - {file = "Cython-0.29.34-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e40cf86aadc29ecd1cb6de67b0d9488705865deea4fc185c7ad56d7a6fc78703"}, - {file = "Cython-0.29.34-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:8c3cd8bb8e880a3346f5685601004d96e0a2221e73edcaeea57ea848618b4ac6"}, - {file = "Cython-0.29.34-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0e9032cd650b0cb1d2c2ef2623f5714c14d14c28d7647d589c3eeed0baf7428e"}, - {file = "Cython-0.29.34-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:bdb3285660e3068438791ace7dd7b1efd6b442a10b5c8d7a4f0c9d184d08c8ed"}, - {file = "Cython-0.29.34-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a8ad755f9364e720f10a36734a1c7a5ced5c679446718b589259261438a517c9"}, - {file = "Cython-0.29.34-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:7595d29eaee95633dd8060f50f0e54b27472d01587659557ebcfe39da3ea946b"}, - {file = "Cython-0.29.34-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e6ef7879668214d80ea3914c17e7d4e1ebf4242e0dd4dabe95ca5ccbe75589a5"}, - {file = "Cython-0.29.34-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:ccb223b5f0fd95d8d27561efc0c14502c0945f1a32274835831efa5d5baddfc1"}, - {file = "Cython-0.29.34-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:11b1b278b8edef215caaa5250ad65a10023bfa0b5a93c776552248fc6f60098d"}, - {file = "Cython-0.29.34-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:5718319a01489688fdd22ddebb8e2fcbbd60be5f30de4336ea7063c3ae29fbe5"}, - {file = "Cython-0.29.34-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:cfb2302ef617d647ee590a4c0a00ba3c2da05f301dcefe7721125565d2e51351"}, - {file = "Cython-0.29.34-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:67b850cf46b861bc27226d31e1d87c0e69869a02f8d3cc5d5bef549764029879"}, - {file = "Cython-0.29.34-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0963266dad685812c1dbb758fcd4de78290e3adc7db271c8664dcde27380b13e"}, - {file = "Cython-0.29.34-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7879992487d9060a61393eeefe00d299210256928dce44d887b6be313d342bac"}, - {file = "Cython-0.29.34-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:44733366f1604b0c327613b6918469284878d2f5084297d10d26072fc6948d51"}, - {file = "Cython-0.29.34-py2.py3-none-any.whl", hash = "sha256:be4f6b7be75a201c290c8611c0978549c60353890204573078e865423dbe3c83"}, - {file = "Cython-0.29.34.tar.gz", hash = "sha256:1909688f5d7b521a60c396d20bba9e47a1b2d2784bfb085401e1e1e7d29a29a8"}, -] - [[package]] name = "dataclasses" version = "0.6" description = "A backport of the dataclasses module for Python 3.6" -category = "main" optional = true python-versions = "*" files = [ @@ -662,7 +544,6 @@ files = [ name = "decorator" version = "4.4.2" description = "Decorators for Humans" -category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*" files = [ @@ -674,7 +555,6 @@ files = [ name = "dill" version = "0.3.6" description = "serialize all of python" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -689,7 +569,6 @@ graph = ["objgraph (>=1.7.2)"] name = "distlib" version = "0.3.6" description = "Distribution utilities" -category = "dev" optional = false python-versions = "*" files = [ @@ -701,7 +580,6 @@ files = [ name = "dm-control" version = "1.0.11" description = "Continuous control environments and MuJoCo Python bindings." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -733,7 +611,6 @@ hdf5 = ["h5py"] name = "dm-env" version = "1.6" description = "A Python interface for Reinforcement Learning environments." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -750,7 +627,6 @@ numpy = "*" name = "dm-tree" version = "0.1.8" description = "Tree is a library for working with nested data structures." -category = "main" optional = true python-versions = "*" files = [ @@ -799,7 +675,6 @@ files = [ name = "docker-pycreds" version = "0.4.0" description = "Python bindings for the docker credentials store API" -category = "main" optional = false python-versions = "*" files = [ @@ -810,11 +685,21 @@ files = [ [package.dependencies] six = ">=1.4.0" +[[package]] +name = "docstring-parser" +version = "0.15" +description = "Parse Python docstrings in reST, Google and Numpydoc format" +optional = false +python-versions = ">=3.6,<4.0" +files = [ + {file = "docstring_parser-0.15-py3-none-any.whl", hash = "sha256:d1679b86250d269d06a99670924d6bce45adc00b08069dae8c47d98e89b667a9"}, + {file = "docstring_parser-0.15.tar.gz", hash = "sha256:48ddc093e8b1865899956fcc03b03e66bb7240c310fac5af81814580c55bf682"}, +] + [[package]] name = "docutils" version = "0.16" description = "Docutils -- Python Documentation Utilities" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -826,7 +711,6 @@ files = [ name = "enum-tools" version = "0.9.0.post1" description = "Tools to expand Python's enum module." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -846,7 +730,6 @@ sphinx = ["sphinx (>=3.2.0)", "sphinx-toolbox (>=2.16.0)"] name = "envpool" version = "0.6.6" description = "\"C++-based high-performance parallel environment execution engine (vectorized env) for general RL environments.\"" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -869,7 +752,6 @@ typing-extensions = "*" name = "etils" version = "0.9.0" description = "Collection of common python utils" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -898,7 +780,6 @@ lazy-imports = ["etils[ecolab]"] name = "exceptiongroup" version = "1.1.1" description = "Backport of PEP 654 (exception groups)" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -913,7 +794,6 @@ test = ["pytest (>=6)"] name = "expt" version = "0.4.1" description = "EXperiment. Plot. Tabulate." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -936,7 +816,6 @@ test = ["mock (>=2.0.0)", "pytest (>=5.0)", "pytest-asyncio", "pytest-cov", "ten name = "farama-notifications" version = "0.0.4" description = "Notifications for all Farama Foundation maintained libraries." -category = "main" optional = false python-versions = "*" files = [ @@ -944,27 +823,10 @@ files = [ {file = "Farama_Notifications-0.0.4-py3-none-any.whl", hash = "sha256:14de931035a41961f7c056361dc7f980762a143d05791ef5794a751a2caf05ae"}, ] -[[package]] -name = "fasteners" -version = "0.15" -description = "A python package that provides useful locks." -category = "main" -optional = true -python-versions = "*" -files = [ - {file = "fasteners-0.15-py2.py3-none-any.whl", hash = "sha256:007e4d2b2d4a10093f67e932e5166722d2eab83b77724156e92ad013c6226574"}, - {file = "fasteners-0.15.tar.gz", hash = "sha256:3a176da6b70df9bb88498e1a18a9e4a8579ed5b9141207762368a1017bf8f5ef"}, -] - -[package.dependencies] -monotonic = ">=0.1" -six = "*" - [[package]] name = "filelock" version = "3.12.0" description = "A platform independent file lock." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -978,19 +840,17 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "diff-cover (>=7.5)", "p [[package]] name = "flax" -version = "0.6.4" +version = "0.6.8" description = "Flax: A neural network library for JAX designed for flexibility" -category = "main" optional = true python-versions = "*" files = [ - {file = "flax-0.6.4-py3-none-any.whl", hash = "sha256:fe5010525202241fdc960920033d2e4c0b35f06090c1ad9e280b1f4415ae308f"}, - {file = "flax-0.6.4.tar.gz", hash = "sha256:d06465a3e6636c3c23c29f651a13f5367d06c41373b441dc8ec1bfaa4db06a48"}, + {file = "flax-0.6.8-py3-none-any.whl", hash = "sha256:221225804c263e39fe3cc8f754dc4192597cb0f063926b2338ea6563604747ed"}, + {file = "flax-0.6.8.tar.gz", hash = "sha256:bf1f81dd5dfbb10c603490531a86b1174ebbc38e5c5e8116a98115c135194c10"}, ] [package.dependencies] -jax = ">=0.3.16" -matplotlib = "*" +jax = ">=0.4.2" msgpack = "*" numpy = ">=1.12" optax = "*" @@ -1001,13 +861,13 @@ tensorstore = "*" typing-extensions = ">=4.1.1" [package.extras] -testing = ["atari-py (==0.2.5)", "clu", "gym (==0.18.3)", "jaxlib", "jraph (>=0.0.6dev0)", "ml-collections", "mypy", "opencv-python", "pytest", "pytest-cov", "pytest-custom-exit-code", "pytest-xdist (==1.34.0)", "pytype", "sentencepiece", "tensorflow", "tensorflow-datasets", "tensorflow-text (>=2.4.0)", "torch"] +all = ["matplotlib"] +testing = ["atari-py (==0.2.5)", "clu", "einops", "gym (==0.18.3)", "jaxlib", "jraph (>=0.0.6dev0)", "ml-collections", "mypy", "nbstripout", "opencv-python", "pytest", "pytest-cov", "pytest-custom-exit-code", "pytest-xdist (==1.34.0)", "pytype", "sentencepiece", "tensorflow", "tensorflow-datasets", "tensorflow-text (>=2.11.0)", "torch"] [[package]] name = "fonttools" version = "4.38.0" description = "Tools to manipulate font files" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1029,115 +889,10 @@ ufo = ["fs (>=2.2.0,<3)"] unicode = ["unicodedata2 (>=14.0.0)"] woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] -[[package]] -name = "free-mujoco-py" -version = "2.1.6" -description = "" -category = "main" -optional = true -python-versions = ">=3.7.1,<3.11" -files = [ - {file = "free-mujoco-py-2.1.6.tar.gz", hash = "sha256:77e18302e21979bbd77a7c1584070815843cab1b1249f8a17667e15aba528a9a"}, - {file = "free_mujoco_py-2.1.6-py3-none-any.whl", hash = "sha256:f541d84b6bd87919ccf28f5a708681ca90560a945d104aca393d89275790efb8"}, -] - -[package.dependencies] -cffi = ">=1.15.0,<2.0.0" -Cython = ">=0.29.24,<0.30.0" -fasteners = "0.15" -glfw = ">=1.4.0,<2.0.0" -imageio = ">=2.9.0,<3.0.0" -numpy = ">=1.21.3,<2.0.0" - -[[package]] -name = "frozenlist" -version = "1.3.3" -description = "A list-like structure which implements collections.abc.MutableSequence" -category = "dev" -optional = false -python-versions = ">=3.7" -files = [ - {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff8bf625fe85e119553b5383ba0fb6aa3d0ec2ae980295aaefa552374926b3f4"}, - {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dfbac4c2dfcc082fcf8d942d1e49b6aa0766c19d3358bd86e2000bf0fa4a9cf0"}, - {file = "frozenlist-1.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b1c63e8d377d039ac769cd0926558bb7068a1f7abb0f003e3717ee003ad85530"}, - {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fdfc24dcfce5b48109867c13b4cb15e4660e7bd7661741a391f821f23dfdca7"}, - {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c926450857408e42f0bbc295e84395722ce74bae69a3b2aa2a65fe22cb14b99"}, - {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1841e200fdafc3d51f974d9d377c079a0694a8f06de2e67b48150328d66d5483"}, - {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f470c92737afa7d4c3aacc001e335062d582053d4dbe73cda126f2d7031068dd"}, - {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:783263a4eaad7c49983fe4b2e7b53fa9770c136c270d2d4bbb6d2192bf4d9caf"}, - {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:924620eef691990dfb56dc4709f280f40baee568c794b5c1885800c3ecc69816"}, - {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ae4dc05c465a08a866b7a1baf360747078b362e6a6dbeb0c57f234db0ef88ae0"}, - {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:bed331fe18f58d844d39ceb398b77d6ac0b010d571cba8267c2e7165806b00ce"}, - {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:02c9ac843e3390826a265e331105efeab489ffaf4dd86384595ee8ce6d35ae7f"}, - {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9545a33965d0d377b0bc823dcabf26980e77f1b6a7caa368a365a9497fb09420"}, - {file = "frozenlist-1.3.3-cp310-cp310-win32.whl", hash = "sha256:d5cd3ab21acbdb414bb6c31958d7b06b85eeb40f66463c264a9b343a4e238642"}, - {file = "frozenlist-1.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b756072364347cb6aa5b60f9bc18e94b2f79632de3b0190253ad770c5df17db1"}, - {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b4395e2f8d83fbe0c627b2b696acce67868793d7d9750e90e39592b3626691b7"}, - {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14143ae966a6229350021384870458e4777d1eae4c28d1a7aa47f24d030e6678"}, - {file = "frozenlist-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d8860749e813a6f65bad8285a0520607c9500caa23fea6ee407e63debcdbef6"}, - {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23d16d9f477bb55b6154654e0e74557040575d9d19fe78a161bd33d7d76808e8"}, - {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb82dbba47a8318e75f679690190c10a5e1f447fbf9df41cbc4c3afd726d88cb"}, - {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9309869032abb23d196cb4e4db574232abe8b8be1339026f489eeb34a4acfd91"}, - {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a97b4fe50b5890d36300820abd305694cb865ddb7885049587a5678215782a6b"}, - {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c188512b43542b1e91cadc3c6c915a82a5eb95929134faf7fd109f14f9892ce4"}, - {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:303e04d422e9b911a09ad499b0368dc551e8c3cd15293c99160c7f1f07b59a48"}, - {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0771aed7f596c7d73444c847a1c16288937ef988dc04fb9f7be4b2aa91db609d"}, - {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:66080ec69883597e4d026f2f71a231a1ee9887835902dbe6b6467d5a89216cf6"}, - {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:41fe21dc74ad3a779c3d73a2786bdf622ea81234bdd4faf90b8b03cad0c2c0b4"}, - {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f20380df709d91525e4bee04746ba612a4df0972c1b8f8e1e8af997e678c7b81"}, - {file = "frozenlist-1.3.3-cp311-cp311-win32.whl", hash = "sha256:f30f1928162e189091cf4d9da2eac617bfe78ef907a761614ff577ef4edfb3c8"}, - {file = "frozenlist-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:a6394d7dadd3cfe3f4b3b186e54d5d8504d44f2d58dcc89d693698e8b7132b32"}, - {file = "frozenlist-1.3.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8df3de3a9ab8325f94f646609a66cbeeede263910c5c0de0101079ad541af332"}, - {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0693c609e9742c66ba4870bcee1ad5ff35462d5ffec18710b4ac89337ff16e27"}, - {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd4210baef299717db0a600d7a3cac81d46ef0e007f88c9335db79f8979c0d3d"}, - {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:394c9c242113bfb4b9aa36e2b80a05ffa163a30691c7b5a29eba82e937895d5e"}, - {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6327eb8e419f7d9c38f333cde41b9ae348bec26d840927332f17e887a8dcb70d"}, - {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e24900aa13212e75e5b366cb9065e78bbf3893d4baab6052d1aca10d46d944c"}, - {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3843f84a6c465a36559161e6c59dce2f2ac10943040c2fd021cfb70d58c4ad56"}, - {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:84610c1502b2461255b4c9b7d5e9c48052601a8957cd0aea6ec7a7a1e1fb9420"}, - {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:c21b9aa40e08e4f63a2f92ff3748e6b6c84d717d033c7b3438dd3123ee18f70e"}, - {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:efce6ae830831ab6a22b9b4091d411698145cb9b8fc869e1397ccf4b4b6455cb"}, - {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:40de71985e9042ca00b7953c4f41eabc3dc514a2d1ff534027f091bc74416401"}, - {file = "frozenlist-1.3.3-cp37-cp37m-win32.whl", hash = "sha256:180c00c66bde6146a860cbb81b54ee0df350d2daf13ca85b275123bbf85de18a"}, - {file = "frozenlist-1.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:9bbbcedd75acdfecf2159663b87f1bb5cfc80e7cd99f7ddd9d66eb98b14a8411"}, - {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:034a5c08d36649591be1cbb10e09da9f531034acfe29275fc5454a3b101ce41a"}, - {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ba64dc2b3b7b158c6660d49cdb1d872d1d0bf4e42043ad8d5006099479a194e5"}, - {file = "frozenlist-1.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:47df36a9fe24054b950bbc2db630d508cca3aa27ed0566c0baf661225e52c18e"}, - {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:008a054b75d77c995ea26629ab3a0c0d7281341f2fa7e1e85fa6153ae29ae99c"}, - {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:841ea19b43d438a80b4de62ac6ab21cfe6827bb8a9dc62b896acc88eaf9cecba"}, - {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e235688f42b36be2b6b06fc37ac2126a73b75fb8d6bc66dd632aa35286238703"}, - {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca713d4af15bae6e5d79b15c10c8522859a9a89d3b361a50b817c98c2fb402a2"}, - {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ac5995f2b408017b0be26d4a1d7c61bce106ff3d9e3324374d66b5964325448"}, - {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a4ae8135b11652b08a8baf07631d3ebfe65a4c87909dbef5fa0cdde440444ee4"}, - {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4ea42116ceb6bb16dbb7d526e242cb6747b08b7710d9782aa3d6732bd8d27649"}, - {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:810860bb4bdce7557bc0febb84bbd88198b9dbc2022d8eebe5b3590b2ad6c842"}, - {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ee78feb9d293c323b59a6f2dd441b63339a30edf35abcb51187d2fc26e696d13"}, - {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0af2e7c87d35b38732e810befb9d797a99279cbb85374d42ea61c1e9d23094b3"}, - {file = "frozenlist-1.3.3-cp38-cp38-win32.whl", hash = "sha256:899c5e1928eec13fd6f6d8dc51be23f0d09c5281e40d9cf4273d188d9feeaf9b"}, - {file = "frozenlist-1.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:7f44e24fa70f6fbc74aeec3e971f60a14dde85da364aa87f15d1be94ae75aeef"}, - {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2b07ae0c1edaa0a36339ec6cce700f51b14a3fc6545fdd32930d2c83917332cf"}, - {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ebb86518203e12e96af765ee89034a1dbb0c3c65052d1b0c19bbbd6af8a145e1"}, - {file = "frozenlist-1.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5cf820485f1b4c91e0417ea0afd41ce5cf5965011b3c22c400f6d144296ccbc0"}, - {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c11e43016b9024240212d2a65043b70ed8dfd3b52678a1271972702d990ac6d"}, - {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8fa3c6e3305aa1146b59a09b32b2e04074945ffcfb2f0931836d103a2c38f936"}, - {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:352bd4c8c72d508778cf05ab491f6ef36149f4d0cb3c56b1b4302852255d05d5"}, - {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65a5e4d3aa679610ac6e3569e865425b23b372277f89b5ef06cf2cdaf1ebf22b"}, - {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e2c1185858d7e10ff045c496bbf90ae752c28b365fef2c09cf0fa309291669"}, - {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f163d2fd041c630fed01bc48d28c3ed4a3b003c00acd396900e11ee5316b56bb"}, - {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:05cdb16d09a0832eedf770cb7bd1fe57d8cf4eaf5aced29c4e41e3f20b30a784"}, - {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:8bae29d60768bfa8fb92244b74502b18fae55a80eac13c88eb0b496d4268fd2d"}, - {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eedab4c310c0299961ac285591acd53dc6723a1ebd90a57207c71f6e0c2153ab"}, - {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3bbdf44855ed8f0fbcd102ef05ec3012d6a4fd7c7562403f76ce6a52aeffb2b1"}, - {file = "frozenlist-1.3.3-cp39-cp39-win32.whl", hash = "sha256:efa568b885bca461f7c7b9e032655c0c143d305bf01c30caf6db2854a4532b38"}, - {file = "frozenlist-1.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:cfe33efc9cb900a4c46f91a5ceba26d6df370ffddd9ca386eb1d4f0ad97b9ea9"}, - {file = "frozenlist-1.3.3.tar.gz", hash = "sha256:58bcc55721e8a90b88332d6cd441261ebb22342e238296bb330968952fbb3a6a"}, -] - [[package]] name = "ghp-import" version = "2.1.0" description = "Copy your docs directly to the gh-pages branch." -category = "main" optional = true python-versions = "*" files = [ @@ -1155,7 +910,6 @@ dev = ["flake8", "markdown", "twine", "wheel"] name = "gitdb" version = "4.0.10" description = "Git Object Database" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1170,7 +924,6 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.31" description = "GitPython is a Python library used to interact with Git repositories" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1180,13 +933,11 @@ files = [ [package.dependencies] gitdb = ">=4.0.1,<5" -typing-extensions = {version = ">=3.7.4.3", markers = "python_version < \"3.8\""} [[package]] name = "glcontext" version = "2.3.7" description = "Portable OpenGL Context" -category = "main" optional = true python-versions = "*" files = [ @@ -1246,7 +997,6 @@ files = [ name = "glfw" version = "1.12.0" description = "A ctypes-based wrapper for GLFW3." -category = "main" optional = true python-versions = "*" files = [ @@ -1263,7 +1013,6 @@ files = [ name = "google-auth" version = "2.18.0" description = "Google Authentication Library" -category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" files = [ @@ -1289,7 +1038,6 @@ requests = ["requests (>=2.20.0,<3.0.0dev)"] name = "google-auth-oauthlib" version = "0.4.6" description = "Google Authentication Library" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1308,7 +1056,6 @@ tool = ["click (>=6.0.0)"] name = "graphviz" version = "0.20.1" description = "Simple Python interface for Graphviz" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1325,7 +1072,6 @@ test = ["coverage", "mock (>=4)", "pytest (>=7)", "pytest-cov", "pytest-mock (>= name = "greenlet" version = "2.0.2" description = "Lightweight in-process concurrent programming" -category = "main" optional = true python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" files = [ @@ -1403,7 +1149,6 @@ test = ["objgraph", "psutil"] name = "grpcio" version = "1.54.0" description = "HTTP/2-based RPC framework" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1461,7 +1206,6 @@ protobuf = ["grpcio-tools (>=1.54.0)"] name = "gym" version = "0.23.1" description = "Gym: A universal API for reinforcement learning environments" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1489,7 +1233,6 @@ toy-text = ["pygame (==2.1.0)", "scipy (>=1.4.1)"] name = "gym-notices" version = "0.0.8" description = "Notices for gym" -category = "main" optional = false python-versions = "*" files = [ @@ -1501,7 +1244,6 @@ files = [ name = "gym3" version = "0.3.3" description = "Vectorized Reinforcement Learning Environment Interface" -category = "main" optional = true python-versions = ">=3.6.0" files = [ @@ -1523,7 +1265,6 @@ test = ["gym (==0.17.2)", "gym-retro (==0.8.0)", "mpi4py (==3.0.3)", "pytest (== name = "gymnasium" version = "0.28.1" description = "A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1541,9 +1282,9 @@ typing-extensions = ">=4.3.0" [package.extras] accept-rom-license = ["autorom[accept-rom-license] (>=0.4.2,<0.5.0)"] -all = ["box2d-py (==2.3.5)", "imageio (>=2.14.1)", "jax (==0.3.24)", "jaxlib (==0.3.24)", "lz4 (>=3.1.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "mujoco (>=2.3.2)", "mujoco-py (>=2.1,<2.2)", "opencv-python (>=3.0)", "pygame (==2.1.3)", "shimmy[atari] (>=0.1.0,<1.0)", "swig (>=4.0.0,<5.0.0)", "torch (>=1.0.0)"] +all = ["box2d-py (==2.3.5)", "imageio (>=2.14.1)", "jax (==0.3.24)", "jaxlib (==0.3.24)", "lz4 (>=3.1.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "mujoco (>=2.3.2)", "mujoco-py (>=2.1,<2.2)", "opencv-python (>=3.0)", "pygame (==2.1.3)", "shimmy[atari] (>=0.1.0,<1.0)", "swig (==4.*)", "torch (>=1.0.0)"] atari = ["shimmy[atari] (>=0.1.0,<1.0)"] -box2d = ["box2d-py (==2.3.5)", "pygame (==2.1.3)", "swig (>=4.0.0,<5.0.0)"] +box2d = ["box2d-py (==2.3.5)", "pygame (==2.1.3)", "swig (==4.*)"] classic-control = ["pygame (==2.1.3)", "pygame (==2.1.3)"] jax = ["jax (==0.3.24)", "jaxlib (==0.3.24)"] mujoco = ["imageio (>=2.14.1)", "mujoco (>=2.3.2)"] @@ -1556,7 +1297,6 @@ toy-text = ["pygame (==2.1.3)", "pygame (==2.1.3)"] name = "h5py" version = "3.8.0" description = "Read and write HDF5 files from Python" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1594,7 +1334,6 @@ numpy = ">=1.14.5" name = "hbutils" version = "0.8.6" description = "Some useful functions and classes in Python infrastructure development." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1617,7 +1356,6 @@ test = ["click (>=7.0.0)", "coverage (>=5)", "easydict (>=1.7,<2)", "faker", "fl name = "huggingface-hub" version = "0.11.1" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -1627,7 +1365,6 @@ files = [ [package.dependencies] filelock = "*" -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} packaging = ">=20.9" pyyaml = ">=5.1" requests = "*" @@ -1645,29 +1382,10 @@ testing = ["InquirerPy (==0.3.4)", "Jinja2", "isort (>=5.5.4)", "jedi", "pytest" torch = ["torch"] typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] -[[package]] -name = "hydra-core" -version = "1.3.2" -description = "A framework for elegantly configuring complex applications" -category = "dev" -optional = false -python-versions = "*" -files = [ - {file = "hydra-core-1.3.2.tar.gz", hash = "sha256:8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824"}, - {file = "hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b"}, -] - -[package.dependencies] -antlr4-python3-runtime = ">=4.9.0,<4.10.0" -importlib-resources = {version = "*", markers = "python_version < \"3.9\""} -omegaconf = ">=2.2,<2.4" -packaging = "*" - [[package]] name = "identify" version = "2.5.24" description = "File identification library for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1682,7 +1400,6 @@ license = ["ukkonen"] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1694,7 +1411,6 @@ files = [ name = "imageio" version = "2.28.1" description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1726,7 +1442,6 @@ tifffile = ["tifffile"] name = "imageio-ffmpeg" version = "0.3.0" description = "FFMPEG wrapper for Python" -category = "main" optional = false python-versions = "*" files = [ @@ -1741,7 +1456,6 @@ files = [ name = "importlib-metadata" version = "5.2.0" description = "Read metadata from Python packages" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1750,7 +1464,6 @@ files = [ ] [package.dependencies] -typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} zipp = ">=0.5" [package.extras] @@ -1762,8 +1475,7 @@ testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packag name = "importlib-resources" version = "5.12.0" description = "Read resources from Python packages" -category = "main" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"}, @@ -1781,7 +1493,6 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1789,88 +1500,40 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -[[package]] -name = "isaacgym" -version = "1.0.preview4" -description = "" -category = "dev" -optional = false -python-versions = ">=3.7.1" -files = [] -develop = true - -[package.dependencies] -gym = "0.23.1" -imageio = "^2.19.5" -ninja = "^1.10.2" -numpy = ">=1.16.4" -Pillow = "^9.2.0" -PyYAML = ">=5.3.1" -scipy = ">=1.5.0" -torch = "^1.12.0" -torchvision = "^0.13.0" - -[package.source] -type = "directory" -url = "cleanrl/ppo_continuous_action_isaacgym/isaacgym" - -[[package]] -name = "isaacgymenvs" -version = "0.1.0" -description = "" -category = "dev" -optional = false -python-versions = ">=3.7.1,<3.10" -files = [] -develop = false - -[package.dependencies] -gym = "0.23.1" -hydra-core = "^1.2.0" -numpy = ">=1.16.4" -omegaconf = "^2.2.2" -PyVirtualDisplay = "^3.0" -rl-games = "1.5.2" -termcolor = "^1.1.0" - -[package.source] -type = "git" -url = "https://github.com/vwxyzjn/IsaacGymEnvs.git" -reference = "poetry" -resolved_reference = "27cc130a811b2305056c2f03f5f4cc0819b7867c" - [[package]] name = "jax" -version = "0.3.25" +version = "0.4.8" description = "Differentiate, compile, and transform Numpy code." -category = "main" optional = true -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "jax-0.3.25.tar.gz", hash = "sha256:18bea69321cb95ea5ea913adfe5e2c1d453cade9d4cfd0dc814ecba9fc0cb6e3"}, + {file = "jax-0.4.8.tar.gz", hash = "sha256:08116481f7336db16c24812bfb5e6f9786915f4c2f6ff4028331fa69e7535202"}, ] [package.dependencies] -numpy = ">=1.20" +ml_dtypes = ">=0.0.3" +numpy = ">=1.21" opt_einsum = "*" -scipy = ">=1.5" -typing_extensions = "*" +scipy = ">=1.7" [package.extras] australis = ["protobuf (>=3.13,<4)"] -ci = ["jaxlib (==0.3.24)"] -cpu = ["jaxlib (==0.3.25)"] -cuda = ["jaxlib (==0.3.25+cuda11.cudnn82)"] -cuda11-cudnn805 = ["jaxlib (==0.3.25+cuda11.cudnn805)"] -cuda11-cudnn82 = ["jaxlib (==0.3.25+cuda11.cudnn82)"] -minimum-jaxlib = ["jaxlib (==0.3.22)"] -tpu = ["jaxlib (==0.3.25)", "libtpu-nightly (==0.1.dev20221109)", "requests"] +ci = ["jaxlib (==0.4.7)"] +cpu = ["jaxlib (==0.4.7)"] +cuda = ["jaxlib (==0.4.7+cuda11.cudnn86)"] +cuda11-cudnn82 = ["jaxlib (==0.4.7+cuda11.cudnn82)"] +cuda11-cudnn86 = ["jaxlib (==0.4.7+cuda11.cudnn86)"] +cuda11-local = ["jaxlib (==0.4.7+cuda11.cudnn86)"] +cuda11-pip = ["jaxlib (==0.4.7+cuda11.cudnn86)", "nvidia-cublas-cu11 (>=11.11)", "nvidia-cuda-nvcc-cu11 (>=11.8)", "nvidia-cuda-runtime-cu11 (>=11.8)", "nvidia-cudnn-cu11 (>=8.6)", "nvidia-cufft-cu11 (>=10.9)", "nvidia-cusolver-cu11 (>=11.4)", "nvidia-cusparse-cu11 (>=11.7)"] +cuda12-local = ["jaxlib (==0.4.7+cuda12.cudnn88)"] +cuda12-pip = ["jaxlib (==0.4.7+cuda12.cudnn88)", "nvidia-cublas-cu12", "nvidia-cuda-nvcc-cu12", "nvidia-cuda-runtime-cu12", "nvidia-cudnn-cu12", "nvidia-cufft-cu12", "nvidia-cusolver-cu12", "nvidia-cusparse-cu12"] +minimum-jaxlib = ["jaxlib (==0.4.7)"] +tpu = ["jaxlib (==0.4.7)", "libtpu-nightly (==0.1.dev20230327)", "requests"] [[package]] name = "jax-jumpy" version = "1.0.0" description = "Common backend for Jax or Numpy." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1887,37 +1550,34 @@ testing = ["pytest (==7.1.3)"] [[package]] name = "jaxlib" -version = "0.3.25" +version = "0.4.7" description = "XLA library for JAX" -category = "main" optional = true -python-versions = ">=3.7" -files = [ - {file = "jaxlib-0.3.25-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:09508f7000c0fa958fba29267338e8de75b31d7ea29bd79719a568c38f0f8d31"}, - {file = "jaxlib-0.3.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3c75c8efd3702687968820446e3fb9ff997f8a2a07ab92e33b80e2f12eab3d9a"}, - {file = "jaxlib-0.3.25-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:6e2f4e51041b8371aa3976b5a3a9cdcdccb1bd7b040c9b1345cbf24bd28a8d19"}, - {file = "jaxlib-0.3.25-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:f2d517635fd77e2729c0ab7863be0d290927d01b2abb2f5dc955c821f8b0d53e"}, - {file = "jaxlib-0.3.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:13446a8382aa9ed944c16af636ca111d0afbbead91eed5cc2dc71195045e71b3"}, - {file = "jaxlib-0.3.25-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:71866aeaafbc9a12b59dcbe443353772ef235b40c53f8bd7403d39311822c276"}, - {file = "jaxlib-0.3.25-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:1e59ba58c9e93c1e1cef243f2609ec0b0c0a81160c20b9555aecdea32ccd6a78"}, - {file = "jaxlib-0.3.25-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:5295354ed5db111e6f3e88cdfa4010d11c33dd926ac61735b9096b4e0746aa7b"}, - {file = "jaxlib-0.3.25-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:9f3116389ee834b3cdeb30001b085f4b55d7741366034f041c1d377154aa5afa"}, - {file = "jaxlib-0.3.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:78b29c72d0680829db9377ed9be326875849258a60b8173b4a388b34ad18bc78"}, - {file = "jaxlib-0.3.25-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:2e008e0a6c10aa7e949555e98dc0471e0d550d5d7c109771e38a971b49480538"}, - {file = "jaxlib-0.3.25-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:fec043cdd55f3257d02e9d8880b33860eacadcae1bd5e26f43fdd08ada23614d"}, - {file = "jaxlib-0.3.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a50193ba0cbf879021c9d73d7bcfa7eafb9138895d057b774c301aac3701f9a5"}, - {file = "jaxlib-0.3.25-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:1f1448f102a9d05186f579b6931fa0c607783ecc915fdfaa482c19538affa180"}, +python-versions = ">=3.8" +files = [ + {file = "jaxlib-0.4.7-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:63c2890978e8646516db3d8a680b43d2bed8b63543a70556391f589a261bd85f"}, + {file = "jaxlib-0.4.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c16f922507277d5630e81d9c1a4974366a27aad5230d645d063bc2011564d01"}, + {file = "jaxlib-0.4.7-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:da88382e6487805974cea6facc61ba92b5828a7a1f2dd80f762c487d873a2b47"}, + {file = "jaxlib-0.4.7-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:022b216036c009989d4c0683538820c19247215bb99fdd35c7bf32838d596be6"}, + {file = "jaxlib-0.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d0f1d3b6ef6c68013898cca958ab1507d6809b523275037efbdb9aaaaab158ba"}, + {file = "jaxlib-0.4.7-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:0ae7178c33460822d9d8d03718cba395e02e6bac2402709c35826c94f0c9cc7b"}, + {file = "jaxlib-0.4.7-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:ea07605e37d2b4e25f3c639e0d22ab4605fbc1a10ea918fd14ce09077bdaffb6"}, + {file = "jaxlib-0.4.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:48b85d3c8923b1619ddf8cbf14c4e4daf6919796d8aa9d006ce2a085e8202930"}, + {file = "jaxlib-0.4.7-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:a860f2990c97bee5ffcdbb5111751591e5e7a66d5e32b4f6d9e6aa14ac82bf27"}, + {file = "jaxlib-0.4.7-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:c78dc2b6fa1c92ead137a23d1bd3e10d04c58b268e77eca811502abac05b2b19"}, + {file = "jaxlib-0.4.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f1f3726e374d0d6fcc14da540b71b758d37356c6726f0f4b48e2f5530a5f8769"}, + {file = "jaxlib-0.4.7-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:d4629205dbe342153941db5f69c4a1bfe35fd8d2947aebe34f4dff3771d3fff7"}, ] [package.dependencies] -numpy = ">=1.20" -scipy = ">=1.5" +ml-dtypes = ">=0.0.3" +numpy = ">=1.21" +scipy = ">=1.7" [[package]] name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1935,7 +1595,6 @@ i18n = ["Babel (>=2.7)"] name = "jmespath" version = "1.0.1" description = "JSON Matching Expressions" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1947,7 +1606,6 @@ files = [ name = "joblib" version = "1.2.0" description = "Lightweight pipelining with Python functions" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1955,35 +1613,10 @@ files = [ {file = "joblib-1.2.0.tar.gz", hash = "sha256:e1cee4a79e4af22881164f218d4311f60074197fb707e082e803b61f6d137018"}, ] -[[package]] -name = "jsonschema" -version = "4.17.3" -description = "An implementation of JSON Schema validation for Python" -category = "dev" -optional = false -python-versions = ">=3.7" -files = [ - {file = "jsonschema-4.17.3-py3-none-any.whl", hash = "sha256:a870ad254da1a8ca84b6a2905cac29d265f805acc57af304784962a2aa6508f6"}, - {file = "jsonschema-4.17.3.tar.gz", hash = "sha256:0f864437ab8b6076ba6707453ef8f98a6a0d512a80e93f8abdb676f737ecb60d"}, -] - -[package.dependencies] -attrs = ">=17.4.0" -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} -importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} -pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""} -pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2" -typing-extensions = {version = "*", markers = "python_version < \"3.8\""} - -[package.extras] -format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] -format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] - [[package]] name = "kiwisolver" version = "1.4.4" description = "A fast implementation of the Cassowary constraint solver" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2057,14 +1690,10 @@ files = [ {file = "kiwisolver-1.4.4.tar.gz", hash = "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955"}, ] -[package.dependencies] -typing-extensions = {version = "*", markers = "python_version < \"3.8\""} - [[package]] name = "labmaze" version = "1.0.6" description = "LabMaze: DeepMind Lab's text maze generator." -category = "main" optional = true python-versions = "*" files = [ @@ -2078,11 +1707,6 @@ files = [ {file = "labmaze-1.0.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70635d1cdb0147a02efb6b3f607a52cdc51723bc3dcc42717a0d4ef55fa0a987"}, {file = "labmaze-1.0.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff472793238bd9b6dabea8094594d6074ad3c111455de3afcae72f6c40c6817e"}, {file = "labmaze-1.0.6-cp311-cp311-win_amd64.whl", hash = "sha256:2317e65e12fa3d1abecda7e0488dab15456cee8a2e717a586bfc8f02a91579e7"}, - {file = "labmaze-1.0.6-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e36b6fadcd78f22057b597c1c77823e806a0987b3bdfbf850e14b6b5b502075e"}, - {file = "labmaze-1.0.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d1a4f8de29c2c3d7f14163759b69cd3f237093b85334c983619c1db5403a223b"}, - {file = "labmaze-1.0.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a394f8bb857fcaa2884b809d63e750841c2662a106cfe8c045f2112d201ac7d5"}, - {file = "labmaze-1.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d17abb69d4dfc56183afb5c317e8b2eaca0587abb3aabd2326efd3143c81f4e"}, - {file = "labmaze-1.0.6-cp312-cp312-win_amd64.whl", hash = "sha256:5af997598cc46b1929d1c5a1febc32fd56c75874fe481a2a5982c65cee8450c9"}, {file = "labmaze-1.0.6-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:a4c5bc6e56baa55ce63b97569afec2f80cab0f6b952752a131e1f83eed190a53"}, {file = "labmaze-1.0.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3955f24fe5f708e1e97495b4cfe284b70ae4fd51be5e17b75a6fc04ffbd67bca"}, {file = "labmaze-1.0.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed96ddc0bb8d66df36428c94db83949fd84a15867e8250763a4c5e3d82104c54"}, @@ -2107,102 +1731,115 @@ setuptools = "!=50.0.0" [[package]] name = "lxml" -version = "4.9.2" +version = "4.9.3" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" files = [ - {file = "lxml-4.9.2-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:76cf573e5a365e790396a5cc2b909812633409306c6531a6877c59061e42c4f2"}, - {file = "lxml-4.9.2-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b1f42b6921d0e81b1bcb5e395bc091a70f41c4d4e55ba99c6da2b31626c44892"}, - {file = "lxml-4.9.2-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9f102706d0ca011de571de32c3247c6476b55bb6bc65a20f682f000b07a4852a"}, - {file = "lxml-4.9.2-cp27-cp27m-win32.whl", hash = "sha256:8d0b4612b66ff5d62d03bcaa043bb018f74dfea51184e53f067e6fdcba4bd8de"}, - {file = "lxml-4.9.2-cp27-cp27m-win_amd64.whl", hash = "sha256:4c8f293f14abc8fd3e8e01c5bd86e6ed0b6ef71936ded5bf10fe7a5efefbaca3"}, - {file = "lxml-4.9.2-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2899456259589aa38bfb018c364d6ae7b53c5c22d8e27d0ec7609c2a1ff78b50"}, - {file = "lxml-4.9.2-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6749649eecd6a9871cae297bffa4ee76f90b4504a2a2ab528d9ebe912b101975"}, - {file = "lxml-4.9.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a08cff61517ee26cb56f1e949cca38caabe9ea9fbb4b1e10a805dc39844b7d5c"}, - {file = "lxml-4.9.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:85cabf64adec449132e55616e7ca3e1000ab449d1d0f9d7f83146ed5bdcb6d8a"}, - {file = "lxml-4.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8340225bd5e7a701c0fa98284c849c9b9fc9238abf53a0ebd90900f25d39a4e4"}, - {file = "lxml-4.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:1ab8f1f932e8f82355e75dda5413a57612c6ea448069d4fb2e217e9a4bed13d4"}, - {file = "lxml-4.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:699a9af7dffaf67deeae27b2112aa06b41c370d5e7633e0ee0aea2e0b6c211f7"}, - {file = "lxml-4.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9cc34af337a97d470040f99ba4282f6e6bac88407d021688a5d585e44a23184"}, - {file = "lxml-4.9.2-cp310-cp310-win32.whl", hash = "sha256:d02a5399126a53492415d4906ab0ad0375a5456cc05c3fc0fc4ca11771745cda"}, - {file = "lxml-4.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:a38486985ca49cfa574a507e7a2215c0c780fd1778bb6290c21193b7211702ab"}, - {file = "lxml-4.9.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c83203addf554215463b59f6399835201999b5e48019dc17f182ed5ad87205c9"}, - {file = "lxml-4.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:2a87fa548561d2f4643c99cd13131acb607ddabb70682dcf1dff5f71f781a4bf"}, - {file = "lxml-4.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:d6b430a9938a5a5d85fc107d852262ddcd48602c120e3dbb02137c83d212b380"}, - {file = "lxml-4.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3efea981d956a6f7173b4659849f55081867cf897e719f57383698af6f618a92"}, - {file = "lxml-4.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:df0623dcf9668ad0445e0558a21211d4e9a149ea8f5666917c8eeec515f0a6d1"}, - {file = "lxml-4.9.2-cp311-cp311-win32.whl", hash = "sha256:da248f93f0418a9e9d94b0080d7ebc407a9a5e6d0b57bb30db9b5cc28de1ad33"}, - {file = "lxml-4.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:3818b8e2c4b5148567e1b09ce739006acfaa44ce3156f8cbbc11062994b8e8dd"}, - {file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ca989b91cf3a3ba28930a9fc1e9aeafc2a395448641df1f387a2d394638943b0"}, - {file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:822068f85e12a6e292803e112ab876bc03ed1f03dddb80154c395f891ca6b31e"}, - {file = "lxml-4.9.2-cp35-cp35m-win32.whl", hash = "sha256:be7292c55101e22f2a3d4d8913944cbea71eea90792bf914add27454a13905df"}, - {file = "lxml-4.9.2-cp35-cp35m-win_amd64.whl", hash = "sha256:998c7c41910666d2976928c38ea96a70d1aa43be6fe502f21a651e17483a43c5"}, - {file = "lxml-4.9.2-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:b26a29f0b7fc6f0897f043ca366142d2b609dc60756ee6e4e90b5f762c6adc53"}, - {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:ab323679b8b3030000f2be63e22cdeea5b47ee0abd2d6a1dc0c8103ddaa56cd7"}, - {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:689bb688a1db722485e4610a503e3e9210dcc20c520b45ac8f7533c837be76fe"}, - {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f49e52d174375a7def9915c9f06ec4e569d235ad428f70751765f48d5926678c"}, - {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36c3c175d34652a35475a73762b545f4527aec044910a651d2bf50de9c3352b1"}, - {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a35f8b7fa99f90dd2f5dc5a9fa12332642f087a7641289ca6c40d6e1a2637d8e"}, - {file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:58bfa3aa19ca4c0f28c5dde0ff56c520fbac6f0daf4fac66ed4c8d2fb7f22e74"}, - {file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc718cd47b765e790eecb74d044cc8d37d58562f6c314ee9484df26276d36a38"}, - {file = "lxml-4.9.2-cp36-cp36m-win32.whl", hash = "sha256:d5bf6545cd27aaa8a13033ce56354ed9e25ab0e4ac3b5392b763d8d04b08e0c5"}, - {file = "lxml-4.9.2-cp36-cp36m-win_amd64.whl", hash = "sha256:3ab9fa9d6dc2a7f29d7affdf3edebf6ece6fb28a6d80b14c3b2fb9d39b9322c3"}, - {file = "lxml-4.9.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:05ca3f6abf5cf78fe053da9b1166e062ade3fa5d4f92b4ed688127ea7d7b1d03"}, - {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:a5da296eb617d18e497bcf0a5c528f5d3b18dadb3619fbdadf4ed2356ef8d941"}, - {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:04876580c050a8c5341d706dd464ff04fd597095cc8c023252566a8826505726"}, - {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:c9ec3eaf616d67db0764b3bb983962b4f385a1f08304fd30c7283954e6a7869b"}, - {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2a29ba94d065945944016b6b74e538bdb1751a1db6ffb80c9d3c2e40d6fa9894"}, - {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a82d05da00a58b8e4c0008edbc8a4b6ec5a4bc1e2ee0fb6ed157cf634ed7fa45"}, - {file = "lxml-4.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:223f4232855ade399bd409331e6ca70fb5578efef22cf4069a6090acc0f53c0e"}, - {file = "lxml-4.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d17bc7c2ccf49c478c5bdd447594e82692c74222698cfc9b5daae7ae7e90743b"}, - {file = "lxml-4.9.2-cp37-cp37m-win32.whl", hash = "sha256:b64d891da92e232c36976c80ed7ebb383e3f148489796d8d31a5b6a677825efe"}, - {file = "lxml-4.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:a0a336d6d3e8b234a3aae3c674873d8f0e720b76bc1d9416866c41cd9500ffb9"}, - {file = "lxml-4.9.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:da4dd7c9c50c059aba52b3524f84d7de956f7fef88f0bafcf4ad7dde94a064e8"}, - {file = "lxml-4.9.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:821b7f59b99551c69c85a6039c65b75f5683bdc63270fec660f75da67469ca24"}, - {file = "lxml-4.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:e5168986b90a8d1f2f9dc1b841467c74221bd752537b99761a93d2d981e04889"}, - {file = "lxml-4.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:8e20cb5a47247e383cf4ff523205060991021233ebd6f924bca927fcf25cf86f"}, - {file = "lxml-4.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:13598ecfbd2e86ea7ae45ec28a2a54fb87ee9b9fdb0f6d343297d8e548392c03"}, - {file = "lxml-4.9.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:880bbbcbe2fca64e2f4d8e04db47bcdf504936fa2b33933efd945e1b429bea8c"}, - {file = "lxml-4.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7d2278d59425777cfcb19735018d897ca8303abe67cc735f9f97177ceff8027f"}, - {file = "lxml-4.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5344a43228767f53a9df6e5b253f8cdca7dfc7b7aeae52551958192f56d98457"}, - {file = "lxml-4.9.2-cp38-cp38-win32.whl", hash = "sha256:925073b2fe14ab9b87e73f9a5fde6ce6392da430f3004d8b72cc86f746f5163b"}, - {file = "lxml-4.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:9b22c5c66f67ae00c0199f6055705bc3eb3fcb08d03d2ec4059a2b1b25ed48d7"}, - {file = "lxml-4.9.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:5f50a1c177e2fa3ee0667a5ab79fdc6b23086bc8b589d90b93b4bd17eb0e64d1"}, - {file = "lxml-4.9.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:090c6543d3696cbe15b4ac6e175e576bcc3f1ccfbba970061b7300b0c15a2140"}, - {file = "lxml-4.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:63da2ccc0857c311d764e7d3d90f429c252e83b52d1f8f1d1fe55be26827d1f4"}, - {file = "lxml-4.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:5b4545b8a40478183ac06c073e81a5ce4cf01bf1734962577cf2bb569a5b3bbf"}, - {file = "lxml-4.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2e430cd2824f05f2d4f687701144556646bae8f249fd60aa1e4c768ba7018947"}, - {file = "lxml-4.9.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6804daeb7ef69e7b36f76caddb85cccd63d0c56dedb47555d2fc969e2af6a1a5"}, - {file = "lxml-4.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a6e441a86553c310258aca15d1c05903aaf4965b23f3bc2d55f200804e005ee5"}, - {file = "lxml-4.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ca34efc80a29351897e18888c71c6aca4a359247c87e0b1c7ada14f0ab0c0fb2"}, - {file = "lxml-4.9.2-cp39-cp39-win32.whl", hash = "sha256:6b418afe5df18233fc6b6093deb82a32895b6bb0b1155c2cdb05203f583053f1"}, - {file = "lxml-4.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:f1496ea22ca2c830cbcbd473de8f114a320da308438ae65abad6bab7867fe38f"}, - {file = "lxml-4.9.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b264171e3143d842ded311b7dccd46ff9ef34247129ff5bf5066123c55c2431c"}, - {file = "lxml-4.9.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0dc313ef231edf866912e9d8f5a042ddab56c752619e92dfd3a2c277e6a7299a"}, - {file = "lxml-4.9.2-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:16efd54337136e8cd72fb9485c368d91d77a47ee2d42b057564aae201257d419"}, - {file = "lxml-4.9.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0f2b1e0d79180f344ff9f321327b005ca043a50ece8713de61d1cb383fb8ac05"}, - {file = "lxml-4.9.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:7b770ed79542ed52c519119473898198761d78beb24b107acf3ad65deae61f1f"}, - {file = "lxml-4.9.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efa29c2fe6b4fdd32e8ef81c1528506895eca86e1d8c4657fda04c9b3786ddf9"}, - {file = "lxml-4.9.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7e91ee82f4199af8c43d8158024cbdff3d931df350252288f0d4ce656df7f3b5"}, - {file = "lxml-4.9.2-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b23e19989c355ca854276178a0463951a653309fb8e57ce674497f2d9f208746"}, - {file = "lxml-4.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:01d36c05f4afb8f7c20fd9ed5badca32a2029b93b1750f571ccc0b142531caf7"}, - {file = "lxml-4.9.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7b515674acfdcadb0eb5d00d8a709868173acece5cb0be3dd165950cbfdf5409"}, - {file = "lxml-4.9.2.tar.gz", hash = "sha256:2455cfaeb7ac70338b3257f41e21f0724f4b5b0c0e7702da67ee6c3640835b67"}, + {file = "lxml-4.9.3-cp27-cp27m-macosx_11_0_x86_64.whl", hash = "sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c"}, + {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d"}, + {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1e224d5755dba2f4a9498e150c43792392ac9b5380aa1b845f98a1618c94eeef"}, + {file = "lxml-4.9.3-cp27-cp27m-win32.whl", hash = "sha256:2c74524e179f2ad6d2a4f7caf70e2d96639c0954c943ad601a9e146c76408ed7"}, + {file = "lxml-4.9.3-cp27-cp27m-win_amd64.whl", hash = "sha256:4f1026bc732b6a7f96369f7bfe1a4f2290fb34dce00d8644bc3036fb351a4ca1"}, + {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0781a98ff5e6586926293e59480b64ddd46282953203c76ae15dbbbf302e8bb"}, + {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cef2502e7e8a96fe5ad686d60b49e1ab03e438bd9123987994528febd569868e"}, + {file = "lxml-4.9.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b86164d2cff4d3aaa1f04a14685cbc072efd0b4f99ca5708b2ad1b9b5988a991"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:42871176e7896d5d45138f6d28751053c711ed4d48d8e30b498da155af39aebd"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae8b9c6deb1e634ba4f1930eb67ef6e6bf6a44b6eb5ad605642b2d6d5ed9ce3c"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:411007c0d88188d9f621b11d252cce90c4a2d1a49db6c068e3c16422f306eab8"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:cd47b4a0d41d2afa3e58e5bf1f62069255aa2fd6ff5ee41604418ca925911d76"}, + {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e2cb47860da1f7e9a5256254b74ae331687b9672dfa780eed355c4c9c3dbd23"}, + {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1247694b26342a7bf47c02e513d32225ededd18045264d40758abeb3c838a51f"}, + {file = "lxml-4.9.3-cp310-cp310-win32.whl", hash = "sha256:cdb650fc86227eba20de1a29d4b2c1bfe139dc75a0669270033cb2ea3d391b85"}, + {file = "lxml-4.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:97047f0d25cd4bcae81f9ec9dc290ca3e15927c192df17331b53bebe0e3ff96d"}, + {file = "lxml-4.9.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:1f447ea5429b54f9582d4b955f5f1985f278ce5cf169f72eea8afd9502973dd5"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:57d6ba0ca2b0c462f339640d22882acc711de224d769edf29962b09f77129cbf"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:9767e79108424fb6c3edf8f81e6730666a50feb01a328f4a016464a5893f835a"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:71c52db65e4b56b8ddc5bb89fb2e66c558ed9d1a74a45ceb7dcb20c191c3df2f"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d73d8ecf8ecf10a3bd007f2192725a34bd62898e8da27eb9d32a58084f93962b"}, + {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0a3d3487f07c1d7f150894c238299934a2a074ef590b583103a45002035be120"}, + {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e28c51fa0ce5674be9f560c6761c1b441631901993f76700b1b30ca6c8378d6"}, + {file = "lxml-4.9.3-cp311-cp311-win32.whl", hash = "sha256:0bfd0767c5c1de2551a120673b72e5d4b628737cb05414f03c3277bf9bed3305"}, + {file = "lxml-4.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:25f32acefac14ef7bd53e4218fe93b804ef6f6b92ffdb4322bb6d49d94cad2bc"}, + {file = "lxml-4.9.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:d3ff32724f98fbbbfa9f49d82852b159e9784d6094983d9a8b7f2ddaebb063d4"}, + {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48d6ed886b343d11493129e019da91d4039826794a3e3027321c56d9e71505be"}, + {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9a92d3faef50658dd2c5470af249985782bf754c4e18e15afb67d3ab06233f13"}, + {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b4e4bc18382088514ebde9328da057775055940a1f2e18f6ad2d78aa0f3ec5b9"}, + {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc9b106a1bf918db68619fdcd6d5ad4f972fdd19c01d19bdb6bf63f3589a9ec5"}, + {file = "lxml-4.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:d37017287a7adb6ab77e1c5bee9bcf9660f90ff445042b790402a654d2ad81d8"}, + {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56dc1f1ebccc656d1b3ed288f11e27172a01503fc016bcabdcbc0978b19352b7"}, + {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:578695735c5a3f51569810dfebd05dd6f888147a34f0f98d4bb27e92b76e05c2"}, + {file = "lxml-4.9.3-cp35-cp35m-win32.whl", hash = "sha256:704f61ba8c1283c71b16135caf697557f5ecf3e74d9e453233e4771d68a1f42d"}, + {file = "lxml-4.9.3-cp35-cp35m-win_amd64.whl", hash = "sha256:c41bfca0bd3532d53d16fd34d20806d5c2b1ace22a2f2e4c0008570bf2c58833"}, + {file = "lxml-4.9.3-cp36-cp36m-macosx_11_0_x86_64.whl", hash = "sha256:64f479d719dc9f4c813ad9bb6b28f8390360660b73b2e4beb4cb0ae7104f1c12"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:dd708cf4ee4408cf46a48b108fb9427bfa00b9b85812a9262b5c668af2533ea5"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c31c7462abdf8f2ac0577d9f05279727e698f97ecbb02f17939ea99ae8daa98"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e3cd95e10c2610c360154afdc2f1480aea394f4a4f1ea0a5eacce49640c9b190"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:4930be26af26ac545c3dffb662521d4e6268352866956672231887d18f0eaab2"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4aec80cde9197340bc353d2768e2a75f5f60bacda2bab72ab1dc499589b3878c"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:14e019fd83b831b2e61baed40cab76222139926b1fb5ed0e79225bc0cae14584"}, + {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0c0850c8b02c298d3c7006b23e98249515ac57430e16a166873fc47a5d549287"}, + {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:aca086dc5f9ef98c512bac8efea4483eb84abbf926eaeedf7b91479feb092458"}, + {file = "lxml-4.9.3-cp36-cp36m-win32.whl", hash = "sha256:50baa9c1c47efcaef189f31e3d00d697c6d4afda5c3cde0302d063492ff9b477"}, + {file = "lxml-4.9.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bef4e656f7d98aaa3486d2627e7d2df1157d7e88e7efd43a65aa5dd4714916cf"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:46f409a2d60f634fe550f7133ed30ad5321ae2e6630f13657fb9479506b00601"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:4c28a9144688aef80d6ea666c809b4b0e50010a2aca784c97f5e6bf143d9f129"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:141f1d1a9b663c679dc524af3ea1773e618907e96075262726c7612c02b149a4"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:53ace1c1fd5a74ef662f844a0413446c0629d151055340e9893da958a374f70d"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17a753023436a18e27dd7769e798ce302963c236bc4114ceee5b25c18c52c693"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7d298a1bd60c067ea75d9f684f5f3992c9d6766fadbc0bcedd39750bf344c2f4"}, + {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:081d32421db5df44c41b7f08a334a090a545c54ba977e47fd7cc2deece78809a"}, + {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:23eed6d7b1a3336ad92d8e39d4bfe09073c31bfe502f20ca5116b2a334f8ec02"}, + {file = "lxml-4.9.3-cp37-cp37m-win32.whl", hash = "sha256:1509dd12b773c02acd154582088820893109f6ca27ef7291b003d0e81666109f"}, + {file = "lxml-4.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:120fa9349a24c7043854c53cae8cec227e1f79195a7493e09e0c12e29f918e52"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4d2d1edbca80b510443f51afd8496be95529db04a509bc8faee49c7b0fb6d2cc"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d7e43bd40f65f7d97ad8ef5c9b1778943d02f04febef12def25f7583d19baac"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:71d66ee82e7417828af6ecd7db817913cb0cf9d4e61aa0ac1fde0583d84358db"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:6fc3c450eaa0b56f815c7b62f2b7fba7266c4779adcf1cece9e6deb1de7305ce"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65299ea57d82fb91c7f019300d24050c4ddeb7c5a190e076b5f48a2b43d19c42"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:eadfbbbfb41b44034a4c757fd5d70baccd43296fb894dba0295606a7cf3124aa"}, + {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3e9bdd30efde2b9ccfa9cb5768ba04fe71b018a25ea093379c857c9dad262c40"}, + {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fcdd00edfd0a3001e0181eab3e63bd5c74ad3e67152c84f93f13769a40e073a7"}, + {file = "lxml-4.9.3-cp38-cp38-win32.whl", hash = "sha256:57aba1bbdf450b726d58b2aea5fe47c7875f5afb2c4a23784ed78f19a0462574"}, + {file = "lxml-4.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:92af161ecbdb2883c4593d5ed4815ea71b31fafd7fd05789b23100d081ecac96"}, + {file = "lxml-4.9.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:9bb6ad405121241e99a86efff22d3ef469024ce22875a7ae045896ad23ba2340"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8ed74706b26ad100433da4b9d807eae371efaa266ffc3e9191ea436087a9d6a7"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fbf521479bcac1e25a663df882c46a641a9bff6b56dc8b0fafaebd2f66fb231b"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:303bf1edce6ced16bf67a18a1cf8339d0db79577eec5d9a6d4a80f0fb10aa2da"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:5515edd2a6d1a5a70bfcdee23b42ec33425e405c5b351478ab7dc9347228f96e"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:690dafd0b187ed38583a648076865d8c229661ed20e48f2335d68e2cf7dc829d"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b6420a005548ad52154c8ceab4a1290ff78d757f9e5cbc68f8c77089acd3c432"}, + {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bb3bb49c7a6ad9d981d734ef7c7193bc349ac338776a0360cc671eaee89bcf69"}, + {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d27be7405547d1f958b60837dc4c1007da90b8b23f54ba1f8b728c78fdb19d50"}, + {file = "lxml-4.9.3-cp39-cp39-win32.whl", hash = "sha256:8df133a2ea5e74eef5e8fc6f19b9e085f758768a16e9877a60aec455ed2609b2"}, + {file = "lxml-4.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:4dd9a263e845a72eacb60d12401e37c616438ea2e5442885f65082c276dfb2b2"}, + {file = "lxml-4.9.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6689a3d7fd13dc687e9102a27e98ef33730ac4fe37795d5036d18b4d527abd35"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f6bdac493b949141b733c5345b6ba8f87a226029cbabc7e9e121a413e49441e0"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:05186a0f1346ae12553d66df1cfce6f251589fea3ad3da4f3ef4e34b2d58c6a3"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c2006f5c8d28dee289f7020f721354362fa304acbaaf9745751ac4006650254b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-macosx_11_0_x86_64.whl", hash = "sha256:5c245b783db29c4e4fbbbfc9c5a78be496c9fea25517f90606aa1f6b2b3d5f7b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4fb960a632a49f2f089d522f70496640fdf1218f1243889da3822e0a9f5f3ba7"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:50670615eaf97227d5dc60de2dc99fb134a7130d310d783314e7724bf163f75d"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9719fe17307a9e814580af1f5c6e05ca593b12fb7e44fe62450a5384dbf61b4b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:3331bece23c9ee066e0fb3f96c61322b9e0f54d775fccefff4c38ca488de283a"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-macosx_11_0_x86_64.whl", hash = "sha256:ed667f49b11360951e201453fc3967344d0d0263aa415e1619e85ae7fd17b4e0"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8b77946fd508cbf0fccd8e400a7f71d4ac0e1595812e66025bac475a8e811694"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e4da8ca0c0c0aea88fd46be8e44bd49716772358d648cce45fe387f7b92374a7"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fe4bda6bd4340caa6e5cf95e73f8fea5c4bfc55763dd42f1b50a94c1b4a2fbd4"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f3df3db1d336b9356dd3112eae5f5c2b8b377f3bc826848567f10bfddfee77e9"}, + {file = "lxml-4.9.3.tar.gz", hash = "sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c"}, ] [package.extras] cssselect = ["cssselect (>=0.7)"] html5 = ["html5lib"] htmlsoup = ["BeautifulSoup4"] -source = ["Cython (>=0.29.7)"] +source = ["Cython (>=0.29.35)"] [[package]] name = "mako" version = "1.2.4" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2211,7 +1848,6 @@ files = [ ] [package.dependencies] -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} MarkupSafe = ">=0.9.2" [package.extras] @@ -2223,7 +1859,6 @@ testing = ["pytest"] name = "markdown" version = "3.3.7" description = "Python implementation of Markdown." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2241,7 +1876,6 @@ testing = ["coverage", "pyyaml"] name = "markdown-include" version = "0.7.2" description = "A Python-Markdown extension which provides an 'include' function" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2256,7 +1890,6 @@ markdown = ">=3.0" name = "markupsafe" version = "2.1.2" description = "Safely add untrusted strings to HTML/XML markup." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2316,7 +1949,6 @@ files = [ name = "matplotlib" version = "3.5.3" description = "Python plotting package" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2371,7 +2003,6 @@ python-dateutil = ">=2.7" name = "mergedeep" version = "1.3.4" description = "A deep merge function for 🐍." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -2383,7 +2014,6 @@ files = [ name = "mkdocs" version = "1.4.3" description = "Project documentation with Markdown." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2402,7 +2032,6 @@ mergedeep = ">=1.3.4" packaging = ">=20.5" pyyaml = ">=5.1" pyyaml-env-tag = ">=0.1" -typing-extensions = {version = ">=3.10", markers = "python_version < \"3.8\""} watchdog = ">=2.0" [package.extras] @@ -2413,7 +2042,6 @@ min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-imp name = "mkdocs-material" version = "8.5.11" description = "Documentation that simply works" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2434,7 +2062,6 @@ requests = ">=2.26" name = "mkdocs-material-extensions" version = "1.1.1" description = "Extension pack for Python Markdown and MkDocs Material." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2442,11 +2069,45 @@ files = [ {file = "mkdocs_material_extensions-1.1.1.tar.gz", hash = "sha256:9c003da71e2cc2493d910237448c672e00cefc800d3d6ae93d2fc69979e3bd93"}, ] +[[package]] +name = "ml-dtypes" +version = "0.2.0" +description = "" +optional = true +python-versions = ">=3.7" +files = [ + {file = "ml_dtypes-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:df6a76e1c8adf484feb138ed323f9f40a7b6c21788f120f7c78bec20ac37ee81"}, + {file = "ml_dtypes-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc29a0524ef5e23a7fbb8d881bdecabeb3fc1d19d9db61785d077a86cb94fab2"}, + {file = "ml_dtypes-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08c391c2794f2aad358e6f4c70785a9a7b1df980ef4c232b3ccd4f6fe39f719"}, + {file = "ml_dtypes-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:75015818a7fccf99a5e8ed18720cb430f3e71a8838388840f4cdf225c036c983"}, + {file = "ml_dtypes-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e70047ec2c83eaee01afdfdabee2c5b0c133804d90d0f7db4dd903360fcc537c"}, + {file = "ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36d28b8861a8931695e5a31176cad5ae85f6504906650dea5598fbec06c94606"}, + {file = "ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e85ba8e24cf48d456e564688e981cf379d4c8e644db0a2f719b78de281bac2ca"}, + {file = "ml_dtypes-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:832a019a1b6db5c4422032ca9940a990fa104eee420f643713241b3a518977fa"}, + {file = "ml_dtypes-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8faaf0897942c8253dd126662776ba45f0a5861968cf0f06d6d465f8a7bc298a"}, + {file = "ml_dtypes-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35b984cddbe8173b545a0e3334fe56ea1a5c3eb67c507f60d0cfde1d3fa8f8c2"}, + {file = "ml_dtypes-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:022d5a4ee6be14569c2a9d1549e16f1ec87ca949681d0dca59995445d5fcdd5b"}, + {file = "ml_dtypes-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:50845af3e9a601810751b55091dee6c2562403fa1cb4e0123675cf3a4fc2c17a"}, + {file = "ml_dtypes-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f00c71c8c63e03aff313bc6a7aeaac9a4f1483a921a6ffefa6d4404efd1af3d0"}, + {file = "ml_dtypes-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80d304c836d73f10605c58ccf7789c171cc229bfb678748adfb7cea2510dfd0e"}, + {file = "ml_dtypes-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32107e7fa9f62db9a5281de923861325211dfff87bd23faefb27b303314635ab"}, + {file = "ml_dtypes-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:1749b60348da71fd3c2ab303fdbc1965958dc50775ead41f5669c932a341cafd"}, + {file = "ml_dtypes-0.2.0.tar.gz", hash = "sha256:6488eb642acaaf08d8020f6de0a38acee7ac324c1e6e92ee0c0fea42422cb797"}, +] + +[package.dependencies] +numpy = [ + {version = ">1.20", markers = "python_version <= \"3.9\""}, + {version = ">=1.21.2", markers = "python_version > \"3.9\""}, +] + +[package.extras] +dev = ["absl-py", "pyink", "pylint (>=2.6.0)", "pytest", "pytest-xdist"] + [[package]] name = "moderngl" version = "5.8.2" description = "ModernGL: High performance rendering for Python 3" -category = "main" optional = true python-versions = "*" files = [ @@ -2505,23 +2166,10 @@ files = [ [package.dependencies] glcontext = ">=2.3.6,<3" -[[package]] -name = "monotonic" -version = "1.6" -description = "An implementation of time.monotonic() for Python 2 & < 3.3" -category = "main" -optional = true -python-versions = "*" -files = [ - {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, - {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"}, -] - [[package]] name = "moviepy" version = "1.0.3" description = "Video editing with Python" -category = "main" optional = false python-versions = "*" files = [ @@ -2532,10 +2180,7 @@ files = [ decorator = ">=4.0.2,<5.0" imageio = {version = ">=2.5,<3.0", markers = "python_version >= \"3.4\""} imageio_ffmpeg = {version = ">=0.2.0", markers = "python_version >= \"3.4\""} -numpy = [ - {version = ">=1.17.3", markers = "python_version != \"2.7\""}, - {version = "*", markers = "python_version >= \"2.7\""}, -] +numpy = {version = ">=1.17.3", markers = "python_version > \"2.7\""} proglog = "<=1.0.0" requests = ">=2.8.1,<3.0" tqdm = ">=4.11.2,<5.0" @@ -2549,8 +2194,7 @@ test = ["coverage (<5.0)", "coveralls (>=1.1,<2.0)", "pytest (>=3.0.0,<4.0)", "p name = "msgpack" version = "1.0.5" description = "MessagePack serializer" -category = "main" -optional = false +optional = true python-versions = "*" files = [ {file = "msgpack-1.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9"}, @@ -2622,7 +2266,6 @@ files = [ name = "mujoco" version = "2.3.3" description = "MuJoCo Physics Simulator" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2663,7 +2306,6 @@ pyopengl = "*" name = "multi-agent-ale-py" version = "0.1.11" description = "Multi-Agent Arcade Learning Environment Python Interface" -category = "main" optional = true python-versions = "*" files = [ @@ -2686,7 +2328,6 @@ numpy = "*" name = "multiprocess" version = "0.70.14" description = "better multiprocessing and multithreading in python" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2709,41 +2350,10 @@ files = [ [package.dependencies] dill = ">=0.3.6" -[[package]] -name = "ninja" -version = "1.11.1" -description = "Ninja is a small build system with a focus on speed" -category = "dev" -optional = false -python-versions = "*" -files = [ - {file = "ninja-1.11.1-py2.py3-none-macosx_10_9_universal2.macosx_10_9_x86_64.macosx_11_0_arm64.macosx_11_0_universal2.whl", hash = "sha256:f48c3c6eea204062f6bbf089dfc63e1ad41a08640e1da46ef2b30fa426f7ce23"}, - {file = "ninja-1.11.1-py2.py3-none-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:edec1053e141253076b2df7ec03a246ff581e9270aa1ca9759397b21e2760e57"}, - {file = "ninja-1.11.1-py2.py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:817e2aee2a4d28a708a67bcfba1817ae502c32c6d8ef80e50d63b0f23adf3a08"}, - {file = "ninja-1.11.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df11b8afea0501883e33faeb1c43d2ef67f466d5f4bd85f9c376e9a93a43a277"}, - {file = "ninja-1.11.1-py2.py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a564fe755ddfbdbccb07b0b758e3f8460e5f8ba1adaab40a5eaa2f8c01ce68"}, - {file = "ninja-1.11.1-py2.py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c474326e11fba3f8c2582715d79216292e327d3335367c0e87e9647a002cc4a"}, - {file = "ninja-1.11.1-py2.py3-none-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f6465a7efe6473a2a34edab83633594de19d59406a727316e1367ebcc528908"}, - {file = "ninja-1.11.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:642cb64d859276998f14972724850e0c5b7febbc1bce3d2065b7e0cb7d3a0b79"}, - {file = "ninja-1.11.1-py2.py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:60179bb4f22c88279c53a5402bb5fe81c97c627a28d93c737d1fa067d892115d"}, - {file = "ninja-1.11.1-py2.py3-none-musllinux_1_1_i686.whl", hash = "sha256:34753459493543782d87267e4cad63dd4639b07f8394ffe6d4417e9eda05c8a8"}, - {file = "ninja-1.11.1-py2.py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:779f228e407c54a8b6e4cbf8f835489998dd250f67bf1b9bd7b8a8ab6bdcdc7b"}, - {file = "ninja-1.11.1-py2.py3-none-musllinux_1_1_s390x.whl", hash = "sha256:ba50a32424912e5f3ee40d791b506a160dc0eeda7de5ad8faebe7aa8006244dc"}, - {file = "ninja-1.11.1-py2.py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:3b28b595ed580752240ade7821b6cb7a5a4c6a604c865dc474bd38f06e2eb7f5"}, - {file = "ninja-1.11.1-py2.py3-none-win32.whl", hash = "sha256:3329b4b7c1694730772522a3ba0ba40fd15c012476ed3e1c9f0fd9e76190394e"}, - {file = "ninja-1.11.1-py2.py3-none-win_amd64.whl", hash = "sha256:4e547bc759c570773d83d110c41fd5ca9a94c0a9a8388f5a3ea37bdf97d002b0"}, - {file = "ninja-1.11.1-py2.py3-none-win_arm64.whl", hash = "sha256:8cf96f92ccc851c600cb3e1251c34db06f1dd682de79188ad490c33cddc66981"}, - {file = "ninja-1.11.1.tar.gz", hash = "sha256:c833a47d39b2d1eee3f9ca886fa1581efd5be6068b82734ac229961ee8748f90"}, -] - -[package.extras] -test = ["codecov (>=2.0.5)", "coverage (>=4.2)", "flake8 (>=3.0.4)", "pytest (>=4.5.0)", "pytest-cov (>=2.7.1)", "pytest-runner (>=5.1)", "pytest-virtualenv (>=1.7.0)", "virtualenv (>=15.0.3)"] - [[package]] name = "nodeenv" version = "1.7.0" description = "Node.js virtual environment builder" -category = "dev" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" files = [ @@ -2756,50 +2366,45 @@ setuptools = "*" [[package]] name = "numpy" -version = "1.21.6" -description = "NumPy is the fundamental package for array computing with Python." -category = "main" +version = "1.24.4" +description = "Fundamental package for array computing in Python" optional = false -python-versions = ">=3.7,<3.11" -files = [ - {file = "numpy-1.21.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8737609c3bbdd48e380d463134a35ffad3b22dc56295eff6f79fd85bd0eeeb25"}, - {file = "numpy-1.21.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fdffbfb6832cd0b300995a2b08b8f6fa9f6e856d562800fea9182316d99c4e8e"}, - {file = "numpy-1.21.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3820724272f9913b597ccd13a467cc492a0da6b05df26ea09e78b171a0bb9da6"}, - {file = "numpy-1.21.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f17e562de9edf691a42ddb1eb4a5541c20dd3f9e65b09ded2beb0799c0cf29bb"}, - {file = "numpy-1.21.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f30427731561ce75d7048ac254dbe47a2ba576229250fb60f0fb74db96501a1"}, - {file = "numpy-1.21.6-cp310-cp310-win32.whl", hash = "sha256:d4bf4d43077db55589ffc9009c0ba0a94fa4908b9586d6ccce2e0b164c86303c"}, - {file = "numpy-1.21.6-cp310-cp310-win_amd64.whl", hash = "sha256:d136337ae3cc69aa5e447e78d8e1514be8c3ec9b54264e680cf0b4bd9011574f"}, - {file = "numpy-1.21.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6aaf96c7f8cebc220cdfc03f1d5a31952f027dda050e5a703a0d1c396075e3e7"}, - {file = "numpy-1.21.6-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:67c261d6c0a9981820c3a149d255a76918278a6b03b6a036800359aba1256d46"}, - {file = "numpy-1.21.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a6be4cb0ef3b8c9250c19cc122267263093eee7edd4e3fa75395dfda8c17a8e2"}, - {file = "numpy-1.21.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c4068a8c44014b2d55f3c3f574c376b2494ca9cc73d2f1bd692382b6dffe3db"}, - {file = "numpy-1.21.6-cp37-cp37m-win32.whl", hash = "sha256:7c7e5fa88d9ff656e067876e4736379cc962d185d5cd808014a8a928d529ef4e"}, - {file = "numpy-1.21.6-cp37-cp37m-win_amd64.whl", hash = "sha256:bcb238c9c96c00d3085b264e5c1a1207672577b93fa666c3b14a45240b14123a"}, - {file = "numpy-1.21.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:82691fda7c3f77c90e62da69ae60b5ac08e87e775b09813559f8901a88266552"}, - {file = "numpy-1.21.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:643843bcc1c50526b3a71cd2ee561cf0d8773f062c8cbaf9ffac9fdf573f83ab"}, - {file = "numpy-1.21.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:357768c2e4451ac241465157a3e929b265dfac85d9214074985b1786244f2ef3"}, - {file = "numpy-1.21.6-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9f411b2c3f3d76bba0865b35a425157c5dcf54937f82bbeb3d3c180789dd66a6"}, - {file = "numpy-1.21.6-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4aa48afdce4660b0076a00d80afa54e8a97cd49f457d68a4342d188a09451c1a"}, - {file = "numpy-1.21.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a96eef20f639e6a97d23e57dd0c1b1069a7b4fd7027482a4c5c451cd7732f4"}, - {file = "numpy-1.21.6-cp38-cp38-win32.whl", hash = "sha256:5c3c8def4230e1b959671eb959083661b4a0d2e9af93ee339c7dada6759a9470"}, - {file = "numpy-1.21.6-cp38-cp38-win_amd64.whl", hash = "sha256:bf2ec4b75d0e9356edea834d1de42b31fe11f726a81dfb2c2112bc1eaa508fcf"}, - {file = "numpy-1.21.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:4391bd07606be175aafd267ef9bea87cf1b8210c787666ce82073b05f202add1"}, - {file = "numpy-1.21.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:67f21981ba2f9d7ba9ade60c9e8cbaa8cf8e9ae51673934480e45cf55e953673"}, - {file = "numpy-1.21.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ee5ec40fdd06d62fe5d4084bef4fd50fd4bb6bfd2bf519365f569dc470163ab0"}, - {file = "numpy-1.21.6-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1dbe1c91269f880e364526649a52eff93ac30035507ae980d2fed33aaee633ac"}, - {file = "numpy-1.21.6-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d9caa9d5e682102453d96a0ee10c7241b72859b01a941a397fd965f23b3e016b"}, - {file = "numpy-1.21.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58459d3bad03343ac4b1b42ed14d571b8743dc80ccbf27444f266729df1d6f5b"}, - {file = "numpy-1.21.6-cp39-cp39-win32.whl", hash = "sha256:7f5ae4f304257569ef3b948810816bc87c9146e8c446053539947eedeaa32786"}, - {file = "numpy-1.21.6-cp39-cp39-win_amd64.whl", hash = "sha256:e31f0bb5928b793169b87e3d1e070f2342b22d5245c755e2b81caa29756246c3"}, - {file = "numpy-1.21.6-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dd1c8f6bd65d07d3810b90d02eba7997e32abbdf1277a481d698969e921a3be0"}, - {file = "numpy-1.21.6.zip", hash = "sha256:ecb55251139706669fdec2ff073c98ef8e9a84473e51e716211b41aa0f18e656"}, +python-versions = ">=3.8" +files = [ + {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, + {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, + {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, + {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, + {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, + {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, + {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, + {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, + {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, + {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, + {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, ] [[package]] name = "oauthlib" version = "3.2.2" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2812,27 +2417,10 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] -[[package]] -name = "omegaconf" -version = "2.3.0" -description = "A flexible configuration library" -category = "dev" -optional = false -python-versions = ">=3.6" -files = [ - {file = "omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b"}, - {file = "omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7"}, -] - -[package.dependencies] -antlr4-python3-runtime = ">=4.9.0,<4.10.0" -PyYAML = ">=5.1.0" - [[package]] name = "opencv-python" version = "4.7.0.72" description = "Wrapper package for OpenCV python bindings." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -2847,19 +2435,17 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.21.0", markers = "python_version <= \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""}, - {version = ">=1.21.2", markers = "python_version >= \"3.10\""}, + {version = ">=1.21.0", markers = "python_version <= \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\" and python_version >= \"3.8\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""}, - {version = ">=1.19.3", markers = "python_version >= \"3.6\" and platform_system == \"Linux\" and platform_machine == \"aarch64\" or python_version >= \"3.9\""}, - {version = ">=1.17.0", markers = "python_version >= \"3.7\""}, - {version = ">=1.17.3", markers = "python_version >= \"3.8\""}, + {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\""}, + {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.17.3", markers = "(platform_system != \"Darwin\" and platform_system != \"Linux\") and python_version >= \"3.8\" and python_version < \"3.9\" or platform_system != \"Darwin\" and python_version >= \"3.8\" and python_version < \"3.9\" and platform_machine != \"aarch64\" or platform_machine != \"arm64\" and python_version >= \"3.8\" and python_version < \"3.9\" and platform_system != \"Linux\" or (platform_machine != \"arm64\" and platform_machine != \"aarch64\") and python_version >= \"3.8\" and python_version < \"3.9\""}, ] [[package]] name = "openrlbenchmark" version = "0.1.1b4" description = "" -category = "main" optional = true python-versions = ">=3.7.1,<4.0.0" files = [ @@ -2883,7 +2469,6 @@ wandb = ">=0.13.7,<0.14.0" name = "opt-einsum" version = "3.3.0" description = "Optimizing numpys einsum function" -category = "main" optional = true python-versions = ">=3.5" files = [ @@ -2902,7 +2487,6 @@ tests = ["pytest", "pytest-cov", "pytest-pep8"] name = "optax" version = "0.1.4" description = "A gradient processing and optimisation library in JAX." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2920,19 +2504,18 @@ typing-extensions = ">=3.10.0" [[package]] name = "optuna" -version = "3.1.1" +version = "3.3.0" description = "A hyperparameter optimization framework" -category = "main" optional = true python-versions = ">=3.7" files = [ - {file = "optuna-3.1.1-py3-none-any.whl", hash = "sha256:4d641b4bdd896f48a766803a5b64286281fa3e5dbbeedc549f8e1ee6c6e3eea8"}, - {file = "optuna-3.1.1.tar.gz", hash = "sha256:f3c8fcdb7ed4dd473485f6f61de2f2ceb9d4ad56afbae42e663e15c3f00a4c10"}, + {file = "optuna-3.3.0-py3-none-any.whl", hash = "sha256:3eefaed56a5fabf442036d51ef2001dbabfbe1a8ce33bc0c6b61ff5d15b30c82"}, + {file = "optuna-3.3.0.tar.gz", hash = "sha256:76126c6e52354892488e75fc7743eaddcd397c5aba8fa1f964a1c5b5f942eaf6"}, ] [package.dependencies] alembic = ">=1.5.0" -cmaes = ">=0.9.1" +cmaes = ">=0.10.0" colorlog = "*" numpy = "*" packaging = ">=20.0" @@ -2942,17 +2525,16 @@ tqdm = "*" [package.extras] benchmark = ["asv (>=0.5.0)", "botorch", "cma", "scikit-optimize", "virtualenv"] -checking = ["black", "blackdoc", "hacking", "isort", "mypy", "types-PyYAML", "types-redis", "types-setuptools", "typing-extensions (>=3.10.0.0)"] -document = ["cma", "distributed", "fvcore", "lightgbm", "matplotlib (!=3.6.0)", "mlflow", "pandas", "pillow", "plotly (>=4.9.0)", "scikit-learn", "scikit-optimize", "sphinx (<6)", "sphinx-copybutton", "sphinx-gallery", "sphinx-plotly-directive", "sphinx-rtd-theme", "torch (==1.11.0)", "torchaudio (==0.11.0)", "torchvision (==0.12.0)"] -integration = ["allennlp (>=2.2.0)", "botorch (>=0.4.0,<0.8.0)", "cached-path (<=1.1.2)", "catalyst (>=21.3)", "catboost (>=0.26)", "chainer (>=5.0.0)", "cma", "distributed", "fastai", "lightgbm", "mlflow", "mpi4py", "mxnet", "pandas", "pytorch-ignite", "pytorch-lightning (>=1.5.0)", "scikit-learn (>=0.24.2)", "scikit-optimize", "shap", "skorch", "tensorflow", "tensorflow-datasets", "torch (==1.11.0)", "torchaudio (==0.11.0)", "torchvision (==0.12.0)", "wandb", "xgboost"] -optional = ["matplotlib (!=3.6.0)", "pandas", "plotly (>=4.9.0)", "redis", "scikit-learn (>=0.24.2)"] -test = ["codecov", "fakeredis[lua]", "kaleido", "pytest", "scipy (>=1.9.2)"] +checking = ["black", "blackdoc", "flake8", "isort", "mypy", "mypy-boto3-s3", "types-PyYAML", "types-redis", "types-setuptools", "types-tqdm", "typing-extensions (>=3.10.0.0)"] +document = ["botorch", "cma", "distributed", "fvcore", "lightgbm", "matplotlib (!=3.6.0)", "mlflow", "pandas", "pillow", "plotly (>=4.9.0)", "scikit-learn", "scikit-optimize", "sphinx", "sphinx-copybutton", "sphinx-gallery", "sphinx-plotly-directive", "sphinx-rtd-theme (>=1.2.0)", "torch", "torchaudio", "torchvision"] +integration = ["botorch (>=0.4.0)", "catboost (>=0.26)", "catboost (>=0.26,<1.2)", "cma", "distributed", "fastai", "lightgbm", "mlflow", "pandas", "pytorch-ignite", "pytorch-lightning (>=1.6.0)", "scikit-learn (>=0.24.2)", "scikit-optimize", "shap", "tensorflow", "torch", "torchaudio", "torchvision", "wandb", "xgboost"] +optional = ["boto3", "botorch", "matplotlib (!=3.6.0)", "pandas", "plotly (>=4.9.0)", "redis", "scikit-learn (>=0.24.2)"] +test = ["coverage", "fakeredis[lua]", "kaleido", "moto", "pytest", "scipy (>=1.9.2)"] [[package]] name = "optuna-dashboard" version = "0.7.3" description = "Real-time dashboard for Optuna" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -2965,13 +2547,11 @@ bottle = "*" optuna = ">=2.4.0" packaging = "*" scikit-learn = "*" -typing-extensions = {version = "*", markers = "python_version < \"3.8\""} [[package]] name = "orbax" version = "0.1.0" description = "Orbax" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2999,7 +2579,6 @@ dev = ["pytest-xdist"] name = "packaging" version = "23.1" description = "Core utilities for Python packages" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3011,7 +2590,6 @@ files = [ name = "pandas" version = "1.3.5" description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" optional = false python-versions = ">=3.7.1" files = [ @@ -3047,7 +2625,7 @@ numpy = [ {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""}, {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""}, - {version = ">=1.17.3", markers = "platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.17.3", markers = "(platform_machine != \"aarch64\" and platform_machine != \"arm64\") and python_version < \"3.10\""}, ] python-dateutil = ">=2.7.3" pytz = ">=2017.3" @@ -3059,7 +2637,6 @@ test = ["hypothesis (>=3.58)", "pytest (>=6.0)", "pytest-xdist"] name = "pathtools" version = "0.1.2" description = "File system general utilities" -category = "main" optional = false python-versions = "*" files = [ @@ -3070,7 +2647,6 @@ files = [ name = "pettingzoo" version = "1.18.1" description = "Gym for multi-agent reinforcement learning" -category = "main" optional = true python-versions = ">=3.7, <3.11" files = [ @@ -3097,7 +2673,6 @@ tests = ["codespell", "flake8", "isort", "pynput", "pytest"] name = "pillow" version = "9.5.0" description = "Python Imaging Library (Fork)" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3177,7 +2752,6 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa name = "pip" version = "22.3.1" description = "The PyPA recommended tool for installing Python packages." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3185,23 +2759,10 @@ files = [ {file = "pip-22.3.1.tar.gz", hash = "sha256:65fd48317359f3af8e593943e6ae1506b66325085ea64b706a998c6e83eeaf38"}, ] -[[package]] -name = "pkgutil-resolve-name" -version = "1.3.10" -description = "Resolve a name to an object." -category = "dev" -optional = false -python-versions = ">=3.6" -files = [ - {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, - {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, -] - [[package]] name = "platformdirs" version = "3.5.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3209,9 +2770,6 @@ files = [ {file = "platformdirs-3.5.0.tar.gz", hash = "sha256:7954a68d0ba23558d753f73437c55f89027cf8f5108c19844d4b82e5af396335"}, ] -[package.dependencies] -typing-extensions = {version = ">=4.5", markers = "python_version < \"3.8\""} - [package.extras] docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] @@ -3220,7 +2778,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest- name = "pluggy" version = "1.0.0" description = "plugin and hook calling mechanisms for python" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -3228,9 +2785,6 @@ files = [ {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, ] -[package.dependencies] -importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} - [package.extras] dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] @@ -3239,7 +2793,6 @@ testing = ["pytest", "pytest-benchmark"] name = "pre-commit" version = "2.21.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3250,7 +2803,6 @@ files = [ [package.dependencies] cfgv = ">=2.0.0" identify = ">=1.0.0" -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} nodeenv = ">=0.11.1" pyyaml = ">=5.1" virtualenv = ">=20.10.0" @@ -3259,7 +2811,6 @@ virtualenv = ">=20.10.0" name = "procgen" version = "0.10.7" description = "Procedurally Generated Game-Like RL Environments" -category = "main" optional = true python-versions = ">=3.6.0" files = [ @@ -3290,7 +2841,6 @@ test = ["pytest (==6.2.5)", "pytest-benchmark (==3.4.1)"] name = "proglog" version = "0.1.10" description = "Log and progress bar manager for console, notebooks, web..." -category = "main" optional = false python-versions = "*" files = [ @@ -3305,7 +2855,6 @@ tqdm = "*" name = "protobuf" version = "3.20.3" description = "Protocol Buffers" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3337,7 +2886,6 @@ files = [ name = "psutil" version = "5.9.5" description = "Cross-platform lib for process and system monitoring in Python." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -3364,7 +2912,6 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] name = "pyasn1" version = "0.5.0" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -3376,7 +2923,6 @@ files = [ name = "pyasn1-modules" version = "0.3.0" description = "A collection of ASN.1-based protocols modules" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -3391,7 +2937,6 @@ pyasn1 = ">=0.4.6,<0.6.0" name = "pycparser" version = "2.21" description = "C parser in Python" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -3403,7 +2948,6 @@ files = [ name = "pygame" version = "2.1.0" description = "Python Game Development" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -3471,7 +3015,6 @@ files = [ name = "pygments" version = "2.15.1" description = "Pygments is a syntax highlighting package written in Python." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3486,7 +3029,6 @@ plugins = ["importlib-metadata"] name = "pymdown-extensions" version = "9.11" description = "Extension pack for Python Markdown." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3502,7 +3044,6 @@ pyyaml = "*" name = "pyopengl" version = "3.1.6" description = "Standard OpenGL bindings for Python" -category = "main" optional = true python-versions = "*" files = [ @@ -3515,7 +3056,6 @@ files = [ name = "pyparsing" version = "3.0.9" description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "main" optional = false python-versions = ">=3.6.8" files = [ @@ -3526,48 +3066,10 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] -[[package]] -name = "pyrsistent" -version = "0.19.3" -description = "Persistent/Functional/Immutable data structures" -category = "dev" -optional = false -python-versions = ">=3.7" -files = [ - {file = "pyrsistent-0.19.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:20460ac0ea439a3e79caa1dbd560344b64ed75e85d8703943e0b66c2a6150e4a"}, - {file = "pyrsistent-0.19.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c18264cb84b5e68e7085a43723f9e4c1fd1d935ab240ce02c0324a8e01ccb64"}, - {file = "pyrsistent-0.19.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b774f9288dda8d425adb6544e5903f1fb6c273ab3128a355c6b972b7df39dcf"}, - {file = "pyrsistent-0.19.3-cp310-cp310-win32.whl", hash = "sha256:5a474fb80f5e0d6c9394d8db0fc19e90fa540b82ee52dba7d246a7791712f74a"}, - {file = "pyrsistent-0.19.3-cp310-cp310-win_amd64.whl", hash = "sha256:49c32f216c17148695ca0e02a5c521e28a4ee6c5089f97e34fe24163113722da"}, - {file = "pyrsistent-0.19.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f0774bf48631f3a20471dd7c5989657b639fd2d285b861237ea9e82c36a415a9"}, - {file = "pyrsistent-0.19.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab2204234c0ecd8b9368dbd6a53e83c3d4f3cab10ecaf6d0e772f456c442393"}, - {file = "pyrsistent-0.19.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e42296a09e83028b3476f7073fcb69ffebac0e66dbbfd1bd847d61f74db30f19"}, - {file = "pyrsistent-0.19.3-cp311-cp311-win32.whl", hash = "sha256:64220c429e42a7150f4bfd280f6f4bb2850f95956bde93c6fda1b70507af6ef3"}, - {file = "pyrsistent-0.19.3-cp311-cp311-win_amd64.whl", hash = "sha256:016ad1afadf318eb7911baa24b049909f7f3bb2c5b1ed7b6a8f21db21ea3faa8"}, - {file = "pyrsistent-0.19.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c4db1bd596fefd66b296a3d5d943c94f4fac5bcd13e99bffe2ba6a759d959a28"}, - {file = "pyrsistent-0.19.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aeda827381f5e5d65cced3024126529ddc4289d944f75e090572c77ceb19adbf"}, - {file = "pyrsistent-0.19.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:42ac0b2f44607eb92ae88609eda931a4f0dfa03038c44c772e07f43e738bcac9"}, - {file = "pyrsistent-0.19.3-cp37-cp37m-win32.whl", hash = "sha256:e8f2b814a3dc6225964fa03d8582c6e0b6650d68a232df41e3cc1b66a5d2f8d1"}, - {file = "pyrsistent-0.19.3-cp37-cp37m-win_amd64.whl", hash = "sha256:c9bb60a40a0ab9aba40a59f68214eed5a29c6274c83b2cc206a359c4a89fa41b"}, - {file = "pyrsistent-0.19.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a2471f3f8693101975b1ff85ffd19bb7ca7dd7c38f8a81701f67d6b4f97b87d8"}, - {file = "pyrsistent-0.19.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc5d149f31706762c1f8bda2e8c4f8fead6e80312e3692619a75301d3dbb819a"}, - {file = "pyrsistent-0.19.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3311cb4237a341aa52ab8448c27e3a9931e2ee09561ad150ba94e4cfd3fc888c"}, - {file = "pyrsistent-0.19.3-cp38-cp38-win32.whl", hash = "sha256:f0e7c4b2f77593871e918be000b96c8107da48444d57005b6a6bc61fb4331b2c"}, - {file = "pyrsistent-0.19.3-cp38-cp38-win_amd64.whl", hash = "sha256:c147257a92374fde8498491f53ffa8f4822cd70c0d85037e09028e478cababb7"}, - {file = "pyrsistent-0.19.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b735e538f74ec31378f5a1e3886a26d2ca6351106b4dfde376a26fc32a044edc"}, - {file = "pyrsistent-0.19.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99abb85579e2165bd8522f0c0138864da97847875ecbd45f3e7e2af569bfc6f2"}, - {file = "pyrsistent-0.19.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a8cb235fa6d3fd7aae6a4f1429bbb1fec1577d978098da1252f0489937786f3"}, - {file = "pyrsistent-0.19.3-cp39-cp39-win32.whl", hash = "sha256:c74bed51f9b41c48366a286395c67f4e894374306b197e62810e0fdaf2364da2"}, - {file = "pyrsistent-0.19.3-cp39-cp39-win_amd64.whl", hash = "sha256:878433581fc23e906d947a6814336eee031a00e6defba224234169ae3d3d6a98"}, - {file = "pyrsistent-0.19.3-py3-none-any.whl", hash = "sha256:ccf0d6bd208f8111179f0c26fdf84ed7c3891982f2edaeae7422575f47e66b64"}, - {file = "pyrsistent-0.19.3.tar.gz", hash = "sha256:1a2994773706bbb4995c31a97bc94f1418314923bd1048c6d964837040376440"}, -] - [[package]] name = "pytest" version = "7.3.1" description = "pytest: simple powerful testing with Python" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3578,7 +3080,6 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} -importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} iniconfig = "*" packaging = "*" pluggy = ">=0.12,<2.0" @@ -3591,7 +3092,6 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -3606,7 +3106,6 @@ six = ">=1.5" name = "pytimeparse" version = "1.1.8" description = "Time expression parser" -category = "main" optional = true python-versions = "*" files = [ @@ -3618,7 +3117,6 @@ files = [ name = "pytz" version = "2023.3" description = "World timezone definitions, modern and historical" -category = "main" optional = false python-versions = "*" files = [ @@ -3626,62 +3124,69 @@ files = [ {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, ] -[[package]] -name = "pyvirtualdisplay" -version = "3.0" -description = "python wrapper for Xvfb, Xephyr and Xvnc" -category = "dev" -optional = false -python-versions = "*" -files = [ - {file = "PyVirtualDisplay-3.0-py3-none-any.whl", hash = "sha256:40d4b8dfe4b8de8552e28eb367647f311f88a130bf837fe910e7f180d5477f0e"}, - {file = "PyVirtualDisplay-3.0.tar.gz", hash = "sha256:09755bc3ceb6eb725fb07eca5425f43f2358d3bf08e00d2a9b792a1aedd16159"}, -] - [[package]] name = "pyyaml" -version = "5.4.1" +version = "6.0.1" description = "YAML parser and emitter for Python" -category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +python-versions = ">=3.6" files = [ - {file = "PyYAML-5.4.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922"}, - {file = "PyYAML-5.4.1-cp27-cp27m-win32.whl", hash = "sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393"}, - {file = "PyYAML-5.4.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4465124ef1b18d9ace298060f4eccc64b0850899ac4ac53294547536533800c8"}, - {file = "PyYAML-5.4.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:bb4191dfc9306777bc594117aee052446b3fa88737cd13b7188d0e7aa8162185"}, - {file = "PyYAML-5.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:6c78645d400265a062508ae399b60b8c167bf003db364ecb26dcab2bda048253"}, - {file = "PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:4e0583d24c881e14342eaf4ec5fbc97f934b999a6828693a99157fde912540cc"}, - {file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:72a01f726a9c7851ca9bfad6fd09ca4e090a023c00945ea05ba1638c09dc3347"}, - {file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:895f61ef02e8fed38159bb70f7e100e00f471eae2bc838cd0f4ebb21e28f8541"}, - {file = "PyYAML-5.4.1-cp36-cp36m-win32.whl", hash = "sha256:3bd0e463264cf257d1ffd2e40223b197271046d09dadf73a0fe82b9c1fc385a5"}, - {file = "PyYAML-5.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:e4fac90784481d221a8e4b1162afa7c47ed953be40d31ab4629ae917510051df"}, - {file = "PyYAML-5.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5accb17103e43963b80e6f837831f38d314a0495500067cb25afab2e8d7a4018"}, - {file = "PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e1d4970ea66be07ae37a3c2e48b5ec63f7ba6804bdddfdbd3cfd954d25a82e63"}, - {file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:cb333c16912324fd5f769fff6bc5de372e9e7a202247b48870bc251ed40239aa"}, - {file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:fe69978f3f768926cfa37b867e3843918e012cf83f680806599ddce33c2c68b0"}, - {file = "PyYAML-5.4.1-cp37-cp37m-win32.whl", hash = "sha256:dd5de0646207f053eb0d6c74ae45ba98c3395a571a2891858e87df7c9b9bd51b"}, - {file = "PyYAML-5.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:08682f6b72c722394747bddaf0aa62277e02557c0fd1c42cb853016a38f8dedf"}, - {file = "PyYAML-5.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d2d9808ea7b4af864f35ea216be506ecec180628aced0704e34aca0b040ffe46"}, - {file = "PyYAML-5.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8c1be557ee92a20f184922c7b6424e8ab6691788e6d86137c5d93c1a6ec1b8fb"}, - {file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:fd7f6999a8070df521b6384004ef42833b9bd62cfee11a09bda1079b4b704247"}, - {file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:bfb51918d4ff3d77c1c856a9699f8492c612cde32fd3bcd344af9be34999bfdc"}, - {file = "PyYAML-5.4.1-cp38-cp38-win32.whl", hash = "sha256:fa5ae20527d8e831e8230cbffd9f8fe952815b2b7dae6ffec25318803a7528fc"}, - {file = "PyYAML-5.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:0f5f5786c0e09baddcd8b4b45f20a7b5d61a7e7e99846e3c799b05c7c53fa696"}, - {file = "PyYAML-5.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:294db365efa064d00b8d1ef65d8ea2c3426ac366c0c4368d930bf1c5fb497f77"}, - {file = "PyYAML-5.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:74c1485f7707cf707a7aef42ef6322b8f97921bd89be2ab6317fd782c2d53183"}, - {file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:d483ad4e639292c90170eb6f7783ad19490e7a8defb3e46f97dfe4bacae89122"}, - {file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:fdc842473cd33f45ff6bce46aea678a54e3d21f1b61a7750ce3c498eedfe25d6"}, - {file = "PyYAML-5.4.1-cp39-cp39-win32.whl", hash = "sha256:49d4cdd9065b9b6e206d0595fee27a96b5dd22618e7520c33204a4a3239d5b10"}, - {file = "PyYAML-5.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:c20cfa2d49991c8b4147af39859b167664f2ad4561704ee74c1de03318e898db"}, - {file = "PyYAML-5.4.1.tar.gz", hash = "sha256:607774cbba28732bfa802b54baa7484215f530991055bb562efbed5b2f20a45e"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] [[package]] name = "pyyaml-env-tag" version = "0.1" description = "A custom YAML tag for referencing environment variables in YAML files. " -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -3692,76 +3197,10 @@ files = [ [package.dependencies] pyyaml = "*" -[[package]] -name = "ray" -version = "2.7.0" -description = "Ray provides a simple, universal API for building distributed applications." -category = "dev" -optional = false -python-versions = "*" -files = [ - {file = "ray-2.7.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:bc911655908b61b2e9f59b8df158fcc62cd32080c468b484b539ebf0a4111d04"}, - {file = "ray-2.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0ee8c14e1521559cd5802bfad3f0aba4a77afdfba57dd446162a7449c6e8ff68"}, - {file = "ray-2.7.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:ebde44af7d479ede21d1c2e68b5ccd8264e18df6e4f3c216d9e99c31e819bde6"}, - {file = "ray-2.7.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:b83621f5d2d4079e6ae624c3bf30046a4fefa0ea7ea5e4a4dfe4b50c580b3768"}, - {file = "ray-2.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:8e1b06abba6e227b8dde1ad861c587fb2608a6970d270e4755cd24a6f37ed565"}, - {file = "ray-2.7.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5442d48719f033831a324f05b332d6e7181970d721e9504be2091cc9d9735394"}, - {file = "ray-2.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ca8225878cce7b9e2d0ca9668d9370893a7cee35629d11a3889a1b66a0007218"}, - {file = "ray-2.7.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:a3f59dbb0780f9fa11f5bf96bef853b4cb95245456d4400e1c7bf2e514d12ab2"}, - {file = "ray-2.7.0-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:8384b3f30bc1446ef810e9e894afa03238c5ac40d3c40c0740d82f347112015d"}, - {file = "ray-2.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:8d4530e7024375505552dabd3f4441fc9ac7a5562365a81ba9afa14185433879"}, - {file = "ray-2.7.0-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:c491b8051eef82b77d136c48a23d16485c0e54233303ccf68e9fe69a06c517e6"}, - {file = "ray-2.7.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:1684c434886cb7b263cdf98ed39d75dec343e949f7b14f3385d83bfe70ee8c80"}, - {file = "ray-2.7.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:856a9ae164b9b0aeaad54f3e78986eb19900ed3c74e26f51b02a7d8826c97e59"}, - {file = "ray-2.7.0-cp37-cp37m-win_amd64.whl", hash = "sha256:34925a90b6239de42592bb4524dcbdc59a9c65f1f74ad4d9f97f636bd59c73d7"}, - {file = "ray-2.7.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:60db240f37d80a80492e09a8d1e29b79d034431c6fcb651401e9e2d24d850793"}, - {file = "ray-2.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:835155fdeb8698eae426f3d9416e6b8165197fe5c1c74e1b02a429fc7f4ddcd2"}, - {file = "ray-2.7.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:442b7568946081d38c8addbc528e7b09fc1ee25453b4800c86b7e5ba4bce9dd3"}, - {file = "ray-2.7.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:3825292b777b423e2cd34bf66e8e1e7701b04c6a5308f9f291ad5929b289dc47"}, - {file = "ray-2.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:ce700322662946ad5c62a39b78e81feebcb855d378c49f5df6477c22f0ac1e5a"}, - {file = "ray-2.7.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:327c23aac5dd26ee4abe6cee70320322d63fdf97c6028fbb9555724b46a8f3e3"}, - {file = "ray-2.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a09021d45312ab7a44109b251984718b65fbff77df0b55e30e651193cdf42bff"}, - {file = "ray-2.7.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f05fcb609962d14f4d23cc88a9d07cafa7077ce3c5d5ee99cd08a19067b7eecf"}, - {file = "ray-2.7.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:0e0f7dbeb4444940c72b64fdecd6f331593466914b2dffeed03ce97225acec14"}, - {file = "ray-2.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:29a0866316756ae18e232dd074adbf408dcdabe95d135a9a96b9a8c24393c983"}, -] - -[package.dependencies] -aiosignal = "*" -click = ">=7.0" -filelock = "*" -frozenlist = "*" -jsonschema = "*" -msgpack = ">=1.0.0,<2.0.0" -numpy = [ - {version = ">=1.16", markers = "python_version < \"3.9\""}, - {version = ">=1.19.3", markers = "python_version >= \"3.9\""}, -] -packaging = "*" -protobuf = ">=3.15.3,<3.19.5 || >3.19.5" -pyyaml = "*" -requests = "*" -typing-extensions = {version = "*", markers = "python_version < \"3.8\""} - -[package.extras] -air = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "numpy (>=1.20)", "opencensus", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "requests", "smart-open", "starlette", "tensorboardX (>=1.9)", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] -all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "gymnasium (==0.28.1)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "pyyaml", "ray-cpp (==2.7.0)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] -client = ["grpcio (!=1.56.0)"] -cpp = ["ray-cpp (==2.7.0)"] -data = ["fsspec", "numpy (>=1.20)", "pandas (>=1.3)", "pyarrow (>=6.0.1)"] -default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "virtualenv (>=20.0.24,<20.21.1)"] -observability = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"] -rllib = ["dm-tree", "fsspec", "gymnasium (==0.28.1)", "lz4", "pandas", "pyarrow (>=6.0.1)", "pyyaml", "requests", "rich", "scikit-image", "scipy", "tensorboardX (>=1.9)", "typer"] -serve = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] -serve-grpc = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] -train = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"] -tune = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"] - [[package]] name = "requests" version = "2.30.0" description = "Python HTTP for Humans." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3783,7 +3222,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requests-oauthlib" version = "1.3.1" description = "OAuthlib authentication support for Requests." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -3802,7 +3240,6 @@ rsa = ["oauthlib[signedtoken] (>=3.0.0)"] name = "rich" version = "11.2.0" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "main" optional = false python-versions = ">=3.6.2,<4.0.0" files = [ @@ -3814,39 +3251,14 @@ files = [ colorama = ">=0.4.0,<0.5.0" commonmark = ">=0.9.0,<0.10.0" pygments = ">=2.6.0,<3.0.0" -typing-extensions = {version = ">=3.7.4,<5.0", markers = "python_version < \"3.8\""} [package.extras] jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] -[[package]] -name = "rl-games" -version = "1.5.2" -description = "" -category = "dev" -optional = false -python-versions = "*" -files = [ - {file = "rl-games-1.5.2.tar.gz", hash = "sha256:6d4f5513c917115eed8ebdcab89d0086ea035ce1d0c992dbfba0401c64c63547"}, - {file = "rl_games-1.5.2-py3-none-any.whl", hash = "sha256:104cf667c02c90e4604221bf6d4ea58f231cfb3d7678d62d453308d69d58e4e5"}, -] - -[package.dependencies] -gym = ">=0.17.2" -numpy = ">=1.16.0" -psutil = "*" -pyyaml = "*" -ray = ">=1.1.0" -setproctitle = "*" -tensorboard = ">=1.14.0" -tensorboardX = ">=1.6" -torch = ">=1.7.0" - [[package]] name = "rsa" version = "4.7.2" description = "Pure-Python RSA implementation" -category = "main" optional = false python-versions = ">=3.5, <4" files = [ @@ -3859,27 +3271,25 @@ pyasn1 = ">=0.1.3" [[package]] name = "s3transfer" -version = "0.6.1" +version = "0.8.0" description = "An Amazon S3 Transfer Manager" -category = "main" optional = true python-versions = ">= 3.7" files = [ - {file = "s3transfer-0.6.1-py3-none-any.whl", hash = "sha256:3c0da2d074bf35d6870ef157158641178a4204a6e689e82546083e31e0311346"}, - {file = "s3transfer-0.6.1.tar.gz", hash = "sha256:640bb492711f4c0c0905e1f62b6aaeb771881935ad27884852411f8e9cacbca9"}, + {file = "s3transfer-0.8.0-py3-none-any.whl", hash = "sha256:baa479dc2e63e5c2ed51611b4d46cdf0295e2070d8d0b86b22f335ee5b954986"}, + {file = "s3transfer-0.8.0.tar.gz", hash = "sha256:e8d6bd52ffd99841e3a57b34370a54841f12d3aab072af862cdcc50955288002"}, ] [package.dependencies] -botocore = ">=1.12.36,<2.0a.0" +botocore = ">=1.32.7,<2.0a.0" [package.extras] -crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] +crt = ["botocore[crt] (>=1.32.7,<2.0a.0)"] [[package]] name = "scikit-learn" version = "1.0.2" description = "A set of python modules for machine learning and data mining" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3931,51 +3341,46 @@ tests = ["black (>=21.6b0)", "flake8 (>=3.8.2)", "matplotlib (>=2.2.3)", "mypy ( [[package]] name = "scipy" -version = "1.7.3" -description = "SciPy: Scientific Library for Python" -category = "main" -optional = false -python-versions = ">=3.7,<3.11" -files = [ - {file = "scipy-1.7.3-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c9e04d7e9b03a8a6ac2045f7c5ef741be86727d8f49c45db45f244bdd2bcff17"}, - {file = "scipy-1.7.3-1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b0e0aeb061a1d7dcd2ed59ea57ee56c9b23dd60100825f98238c06ee5cc4467e"}, - {file = "scipy-1.7.3-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:b78a35c5c74d336f42f44106174b9851c783184a85a3fe3e68857259b37b9ffb"}, - {file = "scipy-1.7.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:173308efba2270dcd61cd45a30dfded6ec0085b4b6eb33b5eb11ab443005e088"}, - {file = "scipy-1.7.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:21b66200cf44b1c3e86495e3a436fc7a26608f92b8d43d344457c54f1c024cbc"}, - {file = "scipy-1.7.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceebc3c4f6a109777c0053dfa0282fddb8893eddfb0d598574acfb734a926168"}, - {file = "scipy-1.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7eaea089345a35130bc9a39b89ec1ff69c208efa97b3f8b25ea5d4c41d88094"}, - {file = "scipy-1.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:304dfaa7146cffdb75fbf6bb7c190fd7688795389ad060b970269c8576d038e9"}, - {file = "scipy-1.7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:033ce76ed4e9f62923e1f8124f7e2b0800db533828c853b402c7eec6e9465d80"}, - {file = "scipy-1.7.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4d242d13206ca4302d83d8a6388c9dfce49fc48fdd3c20efad89ba12f785bf9e"}, - {file = "scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8499d9dd1459dc0d0fe68db0832c3d5fc1361ae8e13d05e6849b358dc3f2c279"}, - {file = "scipy-1.7.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca36e7d9430f7481fc7d11e015ae16fbd5575615a8e9060538104778be84addf"}, - {file = "scipy-1.7.3-cp37-cp37m-win32.whl", hash = "sha256:e2c036492e673aad1b7b0d0ccdc0cb30a968353d2c4bf92ac8e73509e1bf212c"}, - {file = "scipy-1.7.3-cp37-cp37m-win_amd64.whl", hash = "sha256:866ada14a95b083dd727a845a764cf95dd13ba3dc69a16b99038001b05439709"}, - {file = "scipy-1.7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:65bd52bf55f9a1071398557394203d881384d27b9c2cad7df9a027170aeaef93"}, - {file = "scipy-1.7.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:f99d206db1f1ae735a8192ab93bd6028f3a42f6fa08467d37a14eb96c9dd34a3"}, - {file = "scipy-1.7.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5f2cfc359379c56b3a41b17ebd024109b2049f878badc1e454f31418c3a18436"}, - {file = "scipy-1.7.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb7ae2c4dbdb3c9247e07acc532f91077ae6dbc40ad5bd5dca0bb5a176ee9bda"}, - {file = "scipy-1.7.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c2d250074cfa76715d58830579c64dff7354484b284c2b8b87e5a38321672c"}, - {file = "scipy-1.7.3-cp38-cp38-win32.whl", hash = "sha256:87069cf875f0262a6e3187ab0f419f5b4280d3dcf4811ef9613c605f6e4dca95"}, - {file = "scipy-1.7.3-cp38-cp38-win_amd64.whl", hash = "sha256:7edd9a311299a61e9919ea4192dd477395b50c014cdc1a1ac572d7c27e2207fa"}, - {file = "scipy-1.7.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eef93a446114ac0193a7b714ce67659db80caf940f3232bad63f4c7a81bc18df"}, - {file = "scipy-1.7.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb326658f9b73c07081300daba90a8746543b5ea177184daed26528273157294"}, - {file = "scipy-1.7.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:93378f3d14fff07572392ce6a6a2ceb3a1f237733bd6dcb9eb6a2b29b0d19085"}, - {file = "scipy-1.7.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edad1cf5b2ce1912c4d8ddad20e11d333165552aba262c882e28c78bbc09dbf6"}, - {file = "scipy-1.7.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d1cc2c19afe3b5a546ede7e6a44ce1ff52e443d12b231823268019f608b9b12"}, - {file = "scipy-1.7.3-cp39-cp39-win32.whl", hash = "sha256:2c56b820d304dffcadbbb6cbfbc2e2c79ee46ea291db17e288e73cd3c64fefa9"}, - {file = "scipy-1.7.3-cp39-cp39-win_amd64.whl", hash = "sha256:3f78181a153fa21c018d346f595edd648344751d7f03ab94b398be2ad083ed3e"}, - {file = "scipy-1.7.3.tar.gz", hash = "sha256:ab5875facfdef77e0a47d5fd39ea178b58e60e454a4c85aa1e52fcb80db7babf"}, +version = "1.10.1" +description = "Fundamental algorithms for scientific computing in Python" +optional = true +python-versions = "<3.12,>=3.8" +files = [ + {file = "scipy-1.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e7354fd7527a4b0377ce55f286805b34e8c54b91be865bac273f527e1b839019"}, + {file = "scipy-1.10.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4b3f429188c66603a1a5c549fb414e4d3bdc2a24792e061ffbd607d3d75fd84e"}, + {file = "scipy-1.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1553b5dcddd64ba9a0d95355e63fe6c3fc303a8fd77c7bc91e77d61363f7433f"}, + {file = "scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c0ff64b06b10e35215abce517252b375e580a6125fd5fdf6421b98efbefb2d2"}, + {file = "scipy-1.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:fae8a7b898c42dffe3f7361c40d5952b6bf32d10c4569098d276b4c547905ee1"}, + {file = "scipy-1.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f1564ea217e82c1bbe75ddf7285ba0709ecd503f048cb1236ae9995f64217bd"}, + {file = "scipy-1.10.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d925fa1c81b772882aa55bcc10bf88324dadb66ff85d548c71515f6689c6dac5"}, + {file = "scipy-1.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaea0a6be54462ec027de54fca511540980d1e9eea68b2d5c1dbfe084797be35"}, + {file = "scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15a35c4242ec5f292c3dd364a7c71a61be87a3d4ddcc693372813c0b73c9af1d"}, + {file = "scipy-1.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:43b8e0bcb877faf0abfb613d51026cd5cc78918e9530e375727bf0625c82788f"}, + {file = "scipy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5678f88c68ea866ed9ebe3a989091088553ba12c6090244fdae3e467b1139c35"}, + {file = "scipy-1.10.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:39becb03541f9e58243f4197584286e339029e8908c46f7221abeea4b749fa88"}, + {file = "scipy-1.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bce5869c8d68cf383ce240e44c1d9ae7c06078a9396df68ce88a1230f93a30c1"}, + {file = "scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07c3457ce0b3ad5124f98a86533106b643dd811dd61b548e78cf4c8786652f6f"}, + {file = "scipy-1.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:049a8bbf0ad95277ffba9b3b7d23e5369cc39e66406d60422c8cfef40ccc8415"}, + {file = "scipy-1.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cd9f1027ff30d90618914a64ca9b1a77a431159df0e2a195d8a9e8a04c78abf9"}, + {file = "scipy-1.10.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:79c8e5a6c6ffaf3a2262ef1be1e108a035cf4f05c14df56057b64acc5bebffb6"}, + {file = "scipy-1.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51af417a000d2dbe1ec6c372dfe688e041a7084da4fdd350aeb139bd3fb55353"}, + {file = "scipy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b4735d6c28aad3cdcf52117e0e91d6b39acd4272f3f5cd9907c24ee931ad601"}, + {file = "scipy-1.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:7ff7f37b1bf4417baca958d254e8e2875d0cc23aaadbe65b3d5b3077b0eb23ea"}, + {file = "scipy-1.10.1.tar.gz", hash = "sha256:2cf9dfb80a7b4589ba4c40ce7588986d6d5cebc5457cad2c2880f6bc2d42f3a5"}, ] [package.dependencies] -numpy = ">=1.16.5,<1.23.0" +numpy = ">=1.19.5,<1.27.0" + +[package.extras] +dev = ["click", "doit (>=0.36.0)", "flake8", "mypy", "pycodestyle", "pydevtool", "rich-click", "typing_extensions"] +doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] +test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] [[package]] name = "seaborn" version = "0.12.2" description = "Statistical data visualization" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3987,7 +3392,6 @@ files = [ matplotlib = ">=3.1,<3.6.1 || >3.6.1" numpy = ">=1.17,<1.24.0 || >1.24.0" pandas = ">=0.25" -typing_extensions = {version = "*", markers = "python_version < \"3.8\""} [package.extras] dev = ["flake8", "flit", "mypy", "pandas-stubs", "pre-commit", "pytest", "pytest-cov", "pytest-xdist"] @@ -3998,7 +3402,6 @@ stats = ["scipy (>=1.3)", "statsmodels (>=0.10)"] name = "sentry-sdk" version = "1.22.2" description = "Python client for Sentry (https://sentry.io)" -category = "main" optional = false python-versions = "*" files = [ @@ -4040,7 +3443,6 @@ tornado = ["tornado (>=5)"] name = "setproctitle" version = "1.3.2" description = "A Python module to customize the process title" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4125,7 +3527,6 @@ test = ["pytest"] name = "setuptools" version = "67.7.2" description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4140,41 +3541,50 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( [[package]] name = "shimmy" -version = "1.0.0" +version = "1.1.0" description = "An API conversion tool providing Gymnasium and PettingZoo bindings for popular external reinforcement learning environments." -category = "main" optional = true python-versions = ">=3.7" files = [ - {file = "Shimmy-1.0.0-py3-none-any.whl", hash = "sha256:f26540d595ad56c9d0e99462d6388dc0dbb7976a97095337365ec79668cdf836"}, - {file = "Shimmy-1.0.0.tar.gz", hash = "sha256:30b9473402e846149137d5d71a0fbe47787d309c7e3a0c1aca97c95375de5f26"}, + {file = "Shimmy-1.1.0-py3-none-any.whl", hash = "sha256:0d2f44cdc3384b792336eb54002d23eb8c0ddb67580760e9c4e234fdf6077a69"}, + {file = "Shimmy-1.1.0.tar.gz", hash = "sha256:028ff42861fd8fa168927631f8f8cb2bda4ffef67e65633c51bf3116792e1f88"}, ] [package.dependencies] -dm-control = {version = ">=1.0.10", optional = true, markers = "extra == \"dm-control\""} gymnasium = ">=0.27.0" -h5py = {version = ">=3.7.0", optional = true, markers = "extra == \"dm-control\""} -imageio = {version = "*", optional = true, markers = "extra == \"dm-control\""} numpy = ">=1.18.0" [package.extras] -all = ["ale-py (>=0.8.1,<0.9.0)", "bsuite (>=0.3.5)", "dm-control (>=1.0.10)", "dm-env (>=1.6)", "gym (>=0.21.0)", "gym (>=0.26.2)", "h5py (>=3.7.0)", "imageio", "open-spiel (>=1.2)", "pettingzoo (>=1.22.3)", "pyglet (==1.5.11)"] +all = ["ale-py (>=0.8.1,<0.9.0)", "bsuite (>=0.3.5)", "dm-control (>=1.0.10)", "dm-env (>=1.6)", "gym (>=0.26.2)", "h5py (>=3.7.0)", "imageio", "open-spiel (>=1.2)", "pettingzoo (>=1.23)"] atari = ["ale-py (>=0.8.1,<0.9.0)"] bsuite = ["bsuite (>=0.3.5)"] dm-control = ["dm-control (>=1.0.10)", "h5py (>=3.7.0)", "imageio"] -dm-control-multi-agent = ["dm-control (>=1.0.10)", "h5py (>=3.7.0)", "imageio", "pettingzoo (>=1.22.3)"] +dm-control-multi-agent = ["dm-control (>=1.0.10)", "h5py (>=3.7.0)", "imageio", "pettingzoo (>=1.23)"] dm-lab = ["dm-env (>=1.6)"] -gym-v21 = ["gym (>=0.21.0)", "pyglet (==1.5.11)"] +gym-v21 = ["gym (>=0.21.0,<0.26)", "pyglet (==1.5.11)"] gym-v26 = ["gym (>=0.26.2)"] -meltingpot = ["pettingzoo (>=1.22.3)"] -openspiel = ["open-spiel (>=1.2)", "pettingzoo (>=1.22.3)"] +meltingpot = ["pettingzoo (>=1.23)"] +openspiel = ["open-spiel (>=1.2)", "pettingzoo (>=1.23)"] testing = ["autorom[accept-rom-license] (>=0.6.0,<0.7.0)", "pillow (>=9.3.0)", "pytest (==7.1.3)"] +[[package]] +name = "shtab" +version = "1.6.4" +description = "Automagic shell tab completion for Python CLI applications" +optional = false +python-versions = ">=3.7" +files = [ + {file = "shtab-1.6.4-py3-none-any.whl", hash = "sha256:4be38887a912091a1640e06f5ccbcbd24e176cf2fcb9ef0c2e011ee22d63834f"}, + {file = "shtab-1.6.4.tar.gz", hash = "sha256:aba9e049bed54ffdb650cb2e02657282d8c0148024b0f500277052df124d47de"}, +] + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout"] + [[package]] name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -4186,7 +3596,6 @@ files = [ name = "smmap" version = "5.0.0" description = "A pure Python implementation of a sliding window memory map manager" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4198,7 +3607,6 @@ files = [ name = "sqlalchemy" version = "2.0.13" description = "Database Abstraction Library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4247,7 +3655,6 @@ files = [ [package.dependencies] greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} typing-extensions = ">=4.2.0" [package.extras] @@ -4275,34 +3682,33 @@ sqlcipher = ["sqlcipher3-binary"] [[package]] name = "stable-baselines3" -version = "1.2.0" +version = "2.0.0" description = "Pytorch version of Stable Baselines, implementations of reinforcement learning algorithms." -category = "main" optional = false -python-versions = "*" +python-versions = ">=3.7" files = [ - {file = "stable_baselines3-1.2.0-py3-none-any.whl", hash = "sha256:15769fe983fd0c14067c87294a1f5cd081fdfdb02092bc1277ec3fad45e9bd13"}, - {file = "stable_baselines3-1.2.0.tar.gz", hash = "sha256:170842f30c00adff0dcccef5be74921cfa0dd2650b3eb8600c62b5d43ff78c67"}, + {file = "stable_baselines3-2.0.0-py3-none-any.whl", hash = "sha256:54a011a049d5cd923471e1e8c2a4c275de0a0d9257548cfa287ebf70fffa56dd"}, + {file = "stable_baselines3-2.0.0.tar.gz", hash = "sha256:0b62b5148e8045c5d67f9f34d8de15c6248b325e15e5dcd931c88f236c735193"}, ] [package.dependencies] cloudpickle = "*" -gym = ">=0.17" +gymnasium = "0.28.1" matplotlib = "*" -numpy = "*" +numpy = ">=1.20" pandas = "*" -torch = ">=1.8.1" +torch = ">=1.11" [package.extras] -docs = ["sphinx", "sphinx-autobuild", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib.spelling"] -extra = ["atari-py (>=0.2.0,<0.3.0)", "opencv-python", "pillow", "psutil", "tensorboard (>=2.2.0)"] -tests = ["black", "flake8 (>=3.8)", "flake8-bugbear", "isort (>=5.0)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "pytype", "scipy (>=1.4.1)"] +docs = ["sphinx (>=5.3,<7.0)", "sphinx-autobuild", "sphinx-autodoc-typehints", "sphinx-copybutton", "sphinx-rtd-theme", "sphinxcontrib.spelling"] +extra = ["autorom[accept-rom-license] (>=0.6.0,<0.7.0)", "opencv-python", "pillow", "psutil", "pygame", "pygame (>=2.0,<2.1.3)", "rich", "shimmy[atari] (>=0.2.1,<0.3.0)", "tensorboard (>=2.9.1)", "tqdm"] +extra-no-roms = ["opencv-python", "pillow", "psutil", "pygame", "pygame (>=2.0,<2.1.3)", "rich", "shimmy[atari] (>=0.2.1,<0.3.0)", "tensorboard (>=2.9.1)", "tqdm"] +tests = ["black", "isort (>=5.0)", "mypy", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "pytype", "ruff"] [[package]] name = "supersuit" version = "3.4.0" description = "Wrappers for Gym and PettingZoo" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4319,7 +3725,6 @@ tinyscaler = ">=1.0.4" name = "tabulate" version = "0.9.0" description = "Pretty-print tabular data" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4334,7 +3739,6 @@ widechars = ["wcwidth"] name = "tenacity" version = "8.2.3" description = "Retry code until it succeeds" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4349,7 +3753,6 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "tensorboard" version = "2.11.2" description = "TensorBoard lets you watch Tensors Flow" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4375,7 +3778,6 @@ wheel = ">=0.26" name = "tensorboard-data-server" version = "0.6.1" description = "Fast data loading for TensorBoard" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4388,35 +3790,16 @@ files = [ name = "tensorboard-plugin-wit" version = "1.8.1" description = "What-If Tool TensorBoard plugin." -category = "main" optional = false python-versions = "*" files = [ {file = "tensorboard_plugin_wit-1.8.1-py3-none-any.whl", hash = "sha256:ff26bdd583d155aa951ee3b152b3d0cffae8005dc697f72b44a8e8c2a77a8cbe"}, ] -[[package]] -name = "tensorboardx" -version = "2.6" -description = "TensorBoardX lets you watch Tensors Flow without Tensorflow" -category = "dev" -optional = false -python-versions = "*" -files = [ - {file = "tensorboardX-2.6-py2.py3-none-any.whl", hash = "sha256:24a7cd076488de1e9d15ef25371b8ebf90c4f8f622af2477c611198f03f4a606"}, - {file = "tensorboardX-2.6.tar.gz", hash = "sha256:d4c036964dd2deb075a1909832b276daa383eab3f9db519ad90b99f5aea06b0c"}, -] - -[package.dependencies] -numpy = "*" -packaging = "*" -protobuf = ">=3.8.0,<4" - [[package]] name = "tensorstore" version = "0.1.28" description = "Read and write large, multi-dimensional arrays" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4441,22 +3824,10 @@ files = [ [package.dependencies] numpy = ">=1.16.0" -[[package]] -name = "termcolor" -version = "1.1.0" -description = "ANSII Color formatting for output in terminal." -category = "dev" -optional = false -python-versions = "*" -files = [ - {file = "termcolor-1.1.0.tar.gz", hash = "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"}, -] - [[package]] name = "threadpoolctl" version = "3.1.0" description = "threadpoolctl" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -4468,7 +3839,6 @@ files = [ name = "tinyscaler" version = "1.2.5" description = "A tiny, simple image scaler" -category = "main" optional = true python-versions = ">=3.7, <3.11" files = [ @@ -4486,7 +3856,6 @@ numpy = "*" name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4498,7 +3867,6 @@ files = [ name = "toolz" version = "0.12.0" description = "List processing tools and functional utilities" -category = "main" optional = true python-versions = ">=3.5" files = [ @@ -4510,7 +3878,6 @@ files = [ name = "torch" version = "1.12.1" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -4539,50 +3906,10 @@ files = [ [package.dependencies] typing-extensions = "*" -[[package]] -name = "torchvision" -version = "0.13.1" -description = "image and video datasets and models for torch deep learning" -category = "dev" -optional = false -python-versions = ">=3.7" -files = [ - {file = "torchvision-0.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:19286a733c69dcbd417b86793df807bd227db5786ed787c17297741a9b0d0fc7"}, - {file = "torchvision-0.13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:08f592ea61836ebeceb5c97f4d7a813b9d7dc651bbf7ce4401563ccfae6a21fc"}, - {file = "torchvision-0.13.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:ef5fe3ec1848123cd0ec74c07658192b3147dcd38e507308c790d5943e87b88c"}, - {file = "torchvision-0.13.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:099874088df104d54d8008f2a28539ca0117b512daed8bf3c2bbfa2b7ccb187a"}, - {file = "torchvision-0.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:8e4d02e4d8a203e0c09c10dfb478214c224d080d31efc0dbf36d9c4051f7f3c6"}, - {file = "torchvision-0.13.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5e631241bee3661de64f83616656224af2e3512eb2580da7c08e08b8c965a8ac"}, - {file = "torchvision-0.13.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:899eec0b9f3b99b96d6f85b9aa58c002db41c672437677b553015b9135b3be7e"}, - {file = "torchvision-0.13.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:83e9e2457f23110fd53b0177e1bc621518d6ea2108f570e853b768ce36b7c679"}, - {file = "torchvision-0.13.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7552e80fa222252b8b217a951c85e172a710ea4cad0ae0c06fbb67addece7871"}, - {file = "torchvision-0.13.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f230a1a40ed70d51e463ce43df243ec520902f8725de2502e485efc5eea9d864"}, - {file = "torchvision-0.13.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e9a563894f9fa40692e24d1aa58c3ef040450017cfed3598ff9637f404f3fe3b"}, - {file = "torchvision-0.13.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7cb789ceefe6dcd0dc8eeda37bfc45efb7cf34770eac9533861d51ca508eb5b3"}, - {file = "torchvision-0.13.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:87c137f343197769a51333076e66bfcd576301d2cd8614b06657187c71b06c4f"}, - {file = "torchvision-0.13.1-cp38-cp38-win_amd64.whl", hash = "sha256:4d8bf321c4380854ef04613935fdd415dce29d1088a7ff99e06e113f0efe9203"}, - {file = "torchvision-0.13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0298bae3b09ac361866088434008d82b99d6458fe8888c8df90720ef4b347d44"}, - {file = "torchvision-0.13.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c5ed609c8bc88c575226400b2232e0309094477c82af38952e0373edef0003fd"}, - {file = "torchvision-0.13.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:3567fb3def829229ec217c1e38f08c5128ff7fb65854cac17ebac358ff7aa309"}, - {file = "torchvision-0.13.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b167934a5943242da7b1e59318f911d2d253feeca0d13ad5d832b58eed943401"}, - {file = "torchvision-0.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:0e77706cc90462653620e336bb90daf03d7bf1b88c3a9a3037df8d111823a56e"}, -] - -[package.dependencies] -numpy = "*" -pillow = ">=5.3.0,<8.3.0 || >=8.4.0" -requests = "*" -torch = "1.12.1" -typing-extensions = "*" - -[package.extras] -scipy = ["scipy"] - [[package]] name = "tqdm" version = "4.65.0" description = "Fast, Extensible Progress Meter" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4603,7 +3930,6 @@ telegram = ["requests"] name = "treevalue" version = "1.4.10" description = "A flexible, generalized tree-based data structure." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4657,7 +3983,6 @@ test = ["coverage (>=5)", "easydict (>=1.7,<2)", "flake8 (>=3.5,<4.0)", "hbutils name = "tueplots" version = "0.0.4" description = "Scientific plotting made easy" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4676,7 +4001,6 @@ examples = ["jupyter"] name = "typeguard" version = "2.13.3" description = "Run-time type checker for Python" -category = "main" optional = true python-versions = ">=3.5.3" files = [ @@ -4692,7 +4016,6 @@ test = ["mypy", "pytest", "typing-extensions"] name = "types-protobuf" version = "4.23.0.1" description = "Typing stubs for protobuf" -category = "main" optional = true python-versions = "*" files = [ @@ -4704,7 +4027,6 @@ files = [ name = "typing-extensions" version = "4.5.0" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4712,11 +4034,31 @@ files = [ {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, ] +[[package]] +name = "tyro" +version = "0.5.10" +description = "Strongly typed, zero-effort CLI interfaces" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tyro-0.5.10-py3-none-any.whl", hash = "sha256:ebe0b71761dfaef4053d85201866874ec80d6d5df9b4ce27c93d09063bbc85f0"}, + {file = "tyro-0.5.10.tar.gz", hash = "sha256:8d3cfecdf8a51151cc8c93c80677b91782c34cadcf3308838bb1152a81dfd074"}, +] + +[package.dependencies] +colorama = {version = ">=0.4.0", markers = "platform_system == \"Windows\""} +docstring-parser = ">=0.14.1" +rich = ">=11.1.0" +shtab = ">=1.5.6" +typing-extensions = ">=4.3.0" + +[package.extras] +dev = ["PyYAML (>=6.0)", "attrs (>=21.4.0)", "coverage[toml] (>=6.5.0)", "flax (>=0.6.9)", "frozendict (>=2.3.4)", "mypy (>=1.4.1)", "numpy (>=1.20.0)", "omegaconf (>=2.2.2)", "pydantic (>=2.3.0)", "pyright (>=1.1.264)", "pytest (>=7.1.2)", "pytest-cov (>=3.0.0)", "torch (>=1.10.0)"] + [[package]] name = "urllib3" version = "1.26.15" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -4733,7 +4075,6 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "virtualenv" version = "20.21.0" description = "Virtual Python Environment builder" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4744,7 +4085,6 @@ files = [ [package.dependencies] distlib = ">=0.3.6,<1" filelock = ">=3.4.1,<4" -importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.8\""} platformdirs = ">=2.4,<4" [package.extras] @@ -4755,7 +4095,6 @@ test = ["covdefaults (>=2.2.2)", "coverage (>=7.1)", "coverage-enable-subprocess name = "wandb" version = "0.13.11" description = "A CLI and library for interacting with the Weights and Biases API." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4797,7 +4136,6 @@ sweeps = ["sweeps (>=0.2.0)"] name = "watchdog" version = "3.0.0" description = "Filesystem events monitoring" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4837,7 +4175,6 @@ watchmedo = ["PyYAML (>=3.10)"] name = "werkzeug" version = "2.2.3" description = "The comprehensive WSGI web application library." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4855,7 +4192,6 @@ watchdog = ["watchdog"] name = "wheel" version = "0.40.0" description = "A built-package format for Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4870,7 +4206,6 @@ test = ["pytest (>=6.0.0)"] name = "zipp" version = "3.15.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4883,13 +4218,13 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] [extras] -atari = ["AutoROM", "ale-py", "opencv-python"] +atari = ["AutoROM", "ale-py", "opencv-python", "shimmy"] c51 = [] c51-atari = ["AutoROM", "ale-py", "opencv-python"] c51-atari-jax = ["AutoROM", "ale-py", "flax", "jax", "jaxlib", "opencv-python"] c51-jax = ["flax", "jax", "jaxlib"] cloud = ["awscli", "boto3"] -dm-control = ["mujoco", "shimmy"] +dm-control = ["dm-control", "h5py", "mujoco", "shimmy"] docs = ["markdown-include", "mkdocs-material", "openrlbenchmark"] dqn = [] dqn-atari = ["AutoROM", "ale-py", "opencv-python"] @@ -4898,7 +4233,6 @@ dqn-jax = ["flax", "jax", "jaxlib"] envpool = ["envpool"] jax = ["flax", "jax", "jaxlib"] mujoco = ["imageio", "mujoco"] -mujoco-py = ["free-mujoco-py"] optuna = ["optuna", "optuna-dashboard"] pettingzoo = ["PettingZoo", "SuperSuit", "multi-agent-ale-py"] plot = [] @@ -4910,5 +4244,5 @@ qdagger-dqn-atari-jax-impalacnn = ["AutoROM", "ale-py", "flax", "jax", "jaxlib", [metadata] lock-version = "2.0" -python-versions = ">=3.7.1,<3.11" -content-hash = "83763cefd7c948380a16349ea5ec80fd36816adace1f8101bc5a50fd686e5a81" +python-versions = ">=3.8,<3.11" +content-hash = "ce1dd6a428e94e30643d2fb0a3fd13f0132d176185a91f7685392d4ec0e7892b" diff --git a/pyproject.toml b/pyproject.toml index ef1652a6c..49c7fabca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cleanrl" -version = "1.1.0" +version = "2.0.0b1" description = "High-quality single file implementation of Deep Reinforcement Learning algorithms with research-friendly features" authors = ["Costa Huang "] packages = [ @@ -12,33 +12,34 @@ license="MIT" readme = "README.md" [tool.poetry.dependencies] -python = ">=3.7.1,<3.11" +python = ">=3.8,<3.11" tensorboard = "^2.10.0" wandb = "^0.13.11" gym = "0.23.1" torch = ">=1.12.1" -stable-baselines3 = "1.2.0" +stable-baselines3 = "2.0.0" gymnasium = ">=0.28.1" moviepy = "^1.0.3" pygame = "2.1.0" huggingface-hub = "^0.11.1" rich = "<12.0" tenacity = "^8.2.2" +tyro = "^0.5.10" +pyyaml = "^6.0.1" -ale-py = {version = "0.7.4", optional = true} -AutoROM = {extras = ["accept-rom-license"], version = "^0.4.2", optional = true} +ale-py = {version = "0.8.1", optional = true} +AutoROM = {extras = ["accept-rom-license"], version = "~0.4.2", optional = true} opencv-python = {version = "^4.6.0.66", optional = true} procgen = {version = "^0.10.7", optional = true} pytest = {version = "^7.1.3", optional = true} mujoco = {version = "<=2.3.3", optional = true} imageio = {version = "^2.14.1", optional = true} -free-mujoco-py = {version = "^2.1.6", optional = true} mkdocs-material = {version = "^8.4.3", optional = true} markdown-include = {version = "^0.7.0", optional = true} openrlbenchmark = {version = "^0.1.1b4", optional = true} -jax = {version = "^0.3.17", optional = true} -jaxlib = {version = "^0.3.15", optional = true} -flax = {version = "^0.6.0", optional = true} +jax = {version = "0.4.8", optional = true} +jaxlib = {version = "0.4.7", optional = true} +flax = {version = "0.6.8", optional = true} optuna = {version = "^3.0.1", optional = true} optuna-dashboard = {version = "^0.7.2", optional = true} envpool = {version = "^0.6.4", optional = true} @@ -46,38 +47,34 @@ PettingZoo = {version = "1.18.1", optional = true} SuperSuit = {version = "3.4.0", optional = true} multi-agent-ale-py = {version = "0.1.11", optional = true} boto3 = {version = "^1.24.70", optional = true} -awscli = {version = "^1.25.71", optional = true} -shimmy = {version = ">=1.0.0", extras = ["dm-control"], optional = true} +awscli = {version = "^1.31.0", optional = true} +shimmy = {version = ">=1.1.0", optional = true} +dm-control = {version = ">=1.0.10", optional = true} +h5py = {version = ">=3.7.0", optional = true} +optax = {version = "0.1.4", optional = true} +chex = {version = "0.1.5", optional = true} +numpy = ">=1.21.6" [tool.poetry.group.dev.dependencies] pre-commit = "^2.20.0" - -[tool.poetry.group.isaacgym] -optional = true -[tool.poetry.group.isaacgym.dependencies] -isaacgymenvs = {git = "https://github.com/vwxyzjn/IsaacGymEnvs.git", rev = "poetry", python = ">=3.7.1,<3.10"} -isaacgym = {path = "cleanrl/ppo_continuous_action_isaacgym/isaacgym", develop = true} - - [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.poetry.extras] -atari = ["ale-py", "AutoROM", "opencv-python"] +atari = ["ale-py", "AutoROM", "opencv-python", "shimmy"] procgen = ["procgen"] plot = ["pandas", "seaborn"] pytest = ["pytest"] mujoco = ["mujoco", "imageio"] -mujoco_py = ["free-mujoco-py"] jax = ["jax", "jaxlib", "flax"] docs = ["mkdocs-material", "markdown-include", "openrlbenchmark"] envpool = ["envpool"] optuna = ["optuna", "optuna-dashboard"] pettingzoo = ["PettingZoo", "SuperSuit", "multi-agent-ale-py"] cloud = ["boto3", "awscli"] -dm_control = ["shimmy", "mujoco"] +dm_control = ["shimmy", "mujoco", "dm-control", "h5py"] # dependencies for algorithm variant (useful when you want to run a specific algorithm) dqn = [] diff --git a/requirements/requirements-atari.txt b/requirements/requirements-atari.txt index c4a8008f8..a3a54a949 100644 --- a/requirements/requirements-atari.txt +++ b/requirements/requirements-atari.txt @@ -1,78 +1,82 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -ale-py==0.7.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -autorom-accept-rom-license==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -autorom[accept-rom-license]==0.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10" -importlib-resources==5.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -opencv-python==4.7.0.72 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10" +absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11" +ale-py==0.8.1 ; python_version >= "3.8" and python_version < "3.11" +appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +autorom-accept-rom-license==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +autorom[accept-rom-license]==0.4.2 ; python_version >= "3.8" and python_version < "3.11" +cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11" +certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11" +charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +click==8.1.3 ; python_version >= "3.8" and python_version < "3.11" +cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11" +colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11" +commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11" +cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11" +decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11" +docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11" +docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11" +farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11" +fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11" +gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11" +gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11" +google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11" +google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11" +grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11" +gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11" +gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11" +gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11" +huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11" +idna==3.4 ; python_version >= "3.8" and python_version < "3.11" +imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11" +importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10" +importlib-resources==5.12.0 ; python_version >= "3.8" and python_version < "3.11" +jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11" +markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11" +matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11" +moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11" +numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11" +oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" +opencv-python==4.7.0.72 ; python_version >= "3.8" and python_version < "3.11" +packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" +pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" +pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" +pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" +proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" +protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" +psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11" +pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11" +pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11" +pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11" +pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11" +pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11" +requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11" +requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11" +rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11" +rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11" +sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11" +setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11" +setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11" +shimmy==1.1.0 ; python_version >= "3.8" and python_version < "3.11" +shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" +six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" +smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" +stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" +torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" +tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" +typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11" +tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11" +urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11" +wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11" +werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11" +wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11" +zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10" diff --git a/requirements/requirements-cloud.txt b/requirements/requirements-cloud.txt index 02e73896f..4c8e0292d 100644 --- a/requirements/requirements-cloud.txt +++ b/requirements/requirements-cloud.txt @@ -1,79 +1,82 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -awscli==1.27.132 ; python_full_version >= "3.7.1" and python_version < "3.11" -boto3==1.26.132 ; python_full_version >= "3.7.1" and python_version < "3.11" -botocore==1.29.132 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -docutils==0.16 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -jmespath==1.0.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -s3transfer==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10" +absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11" +appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +awscli==1.31.0 ; python_version >= "3.8" and python_version < "3.11" +boto3==1.33.0 ; python_version >= "3.8" and python_version < "3.11" +botocore==1.33.0 ; python_version >= "3.8" and python_version < "3.11" +cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11" +certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11" +charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +click==8.1.3 ; python_version >= "3.8" and python_version < "3.11" +cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11" +colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11" +commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11" +cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11" +decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11" +docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11" +docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11" +docutils==0.16 ; python_version >= "3.8" and python_version < "3.11" +farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11" +fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11" +gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11" +gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11" +google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11" +google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11" +grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11" +gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11" +gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11" +gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11" +huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11" +idna==3.4 ; python_version >= "3.8" and python_version < "3.11" +imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11" +importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10" +jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +jmespath==1.0.1 ; python_version >= "3.8" and python_version < "3.11" +kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11" +markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11" +matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11" +moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11" +numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11" +oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" +packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" +pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" +pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" +pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" +proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" +protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" +psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11" +pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11" +pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11" +pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11" +pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11" +pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11" +requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11" +requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11" +rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11" +rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11" +s3transfer==0.8.0 ; python_version >= "3.8" and python_version < "3.11" +sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11" +setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11" +setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11" +shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" +six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" +smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" +stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" +torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" +tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" +typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11" +tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11" +urllib3==1.26.15 ; python_version < "3.11" and python_version >= "3.8" +wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11" +werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11" +wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11" +zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10" diff --git a/requirements/requirements-dm_control.txt b/requirements/requirements-dm_control.txt index 6c1974702..4653ae249 100644 --- a/requirements/requirements-dm_control.txt +++ b/requirements/requirements-dm_control.txt @@ -1,84 +1,87 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -dm-control==1.0.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -dm-env==1.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -dm-tree==0.1.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -glfw==1.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -h5py==3.8.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -labmaze==1.0.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -lxml==4.9.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -mujoco==2.3.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyopengl==3.1.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -scipy==1.7.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -shimmy[dm-control]==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10" +absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11" +appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11" +certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11" +charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +click==8.1.3 ; python_version >= "3.8" and python_version < "3.11" +cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11" +colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11" +commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11" +cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11" +decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11" +dm-control==1.0.11 ; python_version >= "3.8" and python_version < "3.11" +dm-env==1.6 ; python_version >= "3.8" and python_version < "3.11" +dm-tree==0.1.8 ; python_version >= "3.8" and python_version < "3.11" +docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11" +docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11" +farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11" +fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11" +gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11" +gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11" +glfw==1.12.0 ; python_version >= "3.8" and python_version < "3.11" +google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11" +google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11" +grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11" +gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11" +gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11" +gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11" +h5py==3.8.0 ; python_version >= "3.8" and python_version < "3.11" +huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11" +idna==3.4 ; python_version >= "3.8" and python_version < "3.11" +imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11" +importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10" +jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +labmaze==1.0.6 ; python_version >= "3.8" and python_version < "3.11" +lxml==4.9.3 ; python_version >= "3.8" and python_version < "3.11" +markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11" +markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11" +matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11" +moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11" +mujoco==2.3.3 ; python_version >= "3.8" and python_version < "3.11" +numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11" +oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" +packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" +pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" +pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" +pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" +proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" +protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" +psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11" +pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11" +pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11" +pyopengl==3.1.6 ; python_version >= "3.8" and python_version < "3.11" +pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11" +pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11" +pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11" +requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11" +requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11" +rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11" +rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11" +scipy==1.10.1 ; python_version >= "3.8" and python_version < "3.11" +sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11" +setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11" +setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11" +shimmy==1.1.0 ; python_version >= "3.8" and python_version < "3.11" +shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" +six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" +smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" +stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" +torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" +tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" +typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11" +tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11" +urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11" +wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11" +werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11" +wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11" +zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10" diff --git a/requirements/requirements-docs.txt b/requirements/requirements-docs.txt index 2a7fc6941..fe65f4879 100644 --- a/requirements/requirements-docs.txt +++ b/requirements/requirements-docs.txt @@ -1,94 +1,97 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -dataclasses==0.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -dill==0.3.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -expt==0.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -ghp-import==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -jinja2==3.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown-include==0.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -mergedeep==1.3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -mkdocs-material-extensions==1.1.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -mkdocs-material==8.5.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -mkdocs==1.4.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -multiprocess==0.70.14 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -openrlbenchmark==0.1.1b4 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pip==22.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pymdown-extensions==9.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml-env-tag==0.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -scipy==1.7.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -seaborn==0.12.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tabulate==0.9.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tueplots==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -typeguard==2.13.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -watchdog==3.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.11" +absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11" +appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11" +certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11" +charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +click==8.1.3 ; python_version >= "3.8" and python_version < "3.11" +cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11" +colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11" +commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11" +cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11" +dataclasses==0.6 ; python_version >= "3.8" and python_version < "3.11" +decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11" +dill==0.3.6 ; python_version >= "3.8" and python_version < "3.11" +docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11" +docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11" +expt==0.4.1 ; python_version >= "3.8" and python_version < "3.11" +farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11" +fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11" +ghp-import==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11" +gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11" +google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11" +google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11" +grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11" +gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11" +gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11" +gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11" +huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11" +idna==3.4 ; python_version >= "3.8" and python_version < "3.11" +imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11" +importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.11" +jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +jinja2==3.1.2 ; python_version >= "3.8" and python_version < "3.11" +kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +markdown-include==0.7.2 ; python_version >= "3.8" and python_version < "3.11" +markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11" +markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11" +matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11" +mergedeep==1.3.4 ; python_version >= "3.8" and python_version < "3.11" +mkdocs-material-extensions==1.1.1 ; python_version >= "3.8" and python_version < "3.11" +mkdocs-material==8.5.11 ; python_version >= "3.8" and python_version < "3.11" +mkdocs==1.4.3 ; python_version >= "3.8" and python_version < "3.11" +moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11" +multiprocess==0.70.14 ; python_version >= "3.8" and python_version < "3.11" +numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11" +oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" +openrlbenchmark==0.1.1b4 ; python_version >= "3.8" and python_version < "3.11" +packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" +pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" +pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" +pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" +pip==22.3.1 ; python_version >= "3.8" and python_version < "3.11" +proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" +protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" +psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11" +pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11" +pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11" +pymdown-extensions==9.11 ; python_version >= "3.8" and python_version < "3.11" +pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11" +pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11" +pyyaml-env-tag==0.1 ; python_version >= "3.8" and python_version < "3.11" +pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11" +requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11" +requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11" +rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11" +rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11" +scipy==1.10.1 ; python_version >= "3.8" and python_version < "3.11" +seaborn==0.12.2 ; python_version >= "3.8" and python_version < "3.11" +sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11" +setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11" +setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11" +shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" +six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" +smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" +stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +tabulate==0.9.0 ; python_version >= "3.8" and python_version < "3.11" +tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" +torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" +tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" +tueplots==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +typeguard==2.13.3 ; python_version >= "3.8" and python_version < "3.11" +typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11" +tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11" +urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11" +wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11" +watchdog==3.0.0 ; python_version >= "3.8" and python_version < "3.11" +werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11" +wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11" +zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.11" diff --git a/requirements/requirements-envpool.txt b/requirements/requirements-envpool.txt index 03e3a5909..1d491f4dd 100644 --- a/requirements/requirements-envpool.txt +++ b/requirements/requirements-envpool.txt @@ -1,85 +1,88 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -bitmath==1.3.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -chardet==4.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -dill==0.3.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -dm-env==1.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -dm-tree==0.1.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -enum-tools==0.9.0.post1 ; python_full_version >= "3.7.1" and python_version < "3.11" -envpool==0.6.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -graphviz==0.20.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -hbutils==0.8.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytimeparse==1.1.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -treevalue==1.4.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -types-protobuf==4.23.0.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10" +absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11" +appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +bitmath==1.3.3.1 ; python_version >= "3.8" and python_version < "3.11" +cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11" +certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11" +chardet==4.0.0 ; python_version >= "3.8" and python_version < "3.11" +charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +click==8.1.3 ; python_version >= "3.8" and python_version < "3.11" +cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11" +colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11" +commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11" +cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11" +decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11" +dill==0.3.6 ; python_version >= "3.8" and python_version < "3.11" +dm-env==1.6 ; python_version >= "3.8" and python_version < "3.11" +dm-tree==0.1.8 ; python_version >= "3.8" and python_version < "3.11" +docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11" +docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11" +enum-tools==0.9.0.post1 ; python_version >= "3.8" and python_version < "3.11" +envpool==0.6.6 ; python_version >= "3.8" and python_version < "3.11" +farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11" +fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11" +gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11" +gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11" +google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11" +google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11" +graphviz==0.20.1 ; python_version >= "3.8" and python_version < "3.11" +grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11" +gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11" +gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11" +gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11" +hbutils==0.8.6 ; python_version >= "3.8" and python_version < "3.11" +huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11" +idna==3.4 ; python_version >= "3.8" and python_version < "3.11" +imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11" +importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10" +jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11" +markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11" +matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11" +moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11" +numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11" +oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" +packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" +pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" +pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" +pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" +proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" +protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" +psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11" +pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11" +pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11" +pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11" +pytimeparse==1.1.8 ; python_version >= "3.8" and python_version < "3.11" +pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11" +pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11" +requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11" +requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11" +rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11" +rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11" +sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11" +setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11" +setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11" +shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" +six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" +smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" +stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" +torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" +tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" +treevalue==1.4.10 ; python_version >= "3.8" and python_version < "3.11" +types-protobuf==4.23.0.1 ; python_version >= "3.8" and python_version < "3.11" +typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11" +tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11" +urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11" +wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11" +werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11" +wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11" +zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10" diff --git a/requirements/requirements-jax.txt b/requirements/requirements-jax.txt index 30f4a1223..12a3d0861 100644 --- a/requirements/requirements-jax.txt +++ b/requirements/requirements-jax.txt @@ -1,93 +1,97 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -cached-property==1.5.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -chex==0.1.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -dm-tree==0.1.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -etils==0.9.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -exceptiongroup==1.1.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -flax==0.6.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10" -importlib-resources==5.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -iniconfig==2.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -jax==0.3.25 ; python_full_version >= "3.7.1" and python_version < "3.11" -jaxlib==0.3.25 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -msgpack==1.0.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -opt-einsum==3.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -optax==0.1.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -orbax==0.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pluggy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytest==7.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -scipy==1.7.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorstore==0.1.28 ; python_full_version >= "3.7.1" and python_version < "3.11" -tomli==2.0.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -toolz==0.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10" +absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11" +appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +cached-property==1.5.2 ; python_version >= "3.8" and python_version < "3.11" +cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11" +certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11" +charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +chex==0.1.5 ; python_version >= "3.8" and python_version < "3.11" +click==8.1.3 ; python_version >= "3.8" and python_version < "3.11" +cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11" +colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11" +commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11" +cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11" +decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11" +dm-tree==0.1.8 ; python_version >= "3.8" and python_version < "3.11" +docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11" +docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11" +etils==0.9.0 ; python_version >= "3.8" and python_version < "3.11" +exceptiongroup==1.1.1 ; python_version >= "3.8" and python_version < "3.11" +farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11" +flax==0.6.8 ; python_version >= "3.8" and python_version < "3.11" +fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11" +gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11" +gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11" +google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11" +google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11" +grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11" +gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11" +gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11" +gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11" +huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11" +idna==3.4 ; python_version >= "3.8" and python_version < "3.11" +imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11" +importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10" +importlib-resources==5.12.0 ; python_version >= "3.8" and python_version < "3.11" +iniconfig==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +jax==0.4.8 ; python_version >= "3.8" and python_version < "3.11" +jaxlib==0.4.7 ; python_version >= "3.8" and python_version < "3.11" +kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11" +markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11" +matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11" +ml-dtypes==0.2.0 ; python_version >= "3.8" and python_version < "3.11" +moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11" +msgpack==1.0.5 ; python_version >= "3.8" and python_version < "3.11" +numpy==1.24.4 ; python_version < "3.11" and python_version >= "3.8" +oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" +opt-einsum==3.3.0 ; python_version >= "3.8" and python_version < "3.11" +optax==0.1.4 ; python_version >= "3.8" and python_version < "3.11" +orbax==0.1.0 ; python_version >= "3.8" and python_version < "3.11" +packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" +pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" +pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" +pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" +pluggy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" +protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" +psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11" +pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11" +pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11" +pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11" +pytest==7.3.1 ; python_version >= "3.8" and python_version < "3.11" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11" +pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11" +pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11" +requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11" +requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11" +rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11" +rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11" +scipy==1.10.1 ; python_version >= "3.8" and python_version < "3.11" +sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11" +setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11" +setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11" +shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" +six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" +smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" +stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" +tensorstore==0.1.28 ; python_version >= "3.8" and python_version < "3.11" +tomli==2.0.1 ; python_version >= "3.8" and python_version < "3.11" +toolz==0.12.0 ; python_version >= "3.8" and python_version < "3.11" +torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" +tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" +typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11" +tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11" +urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11" +wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11" +werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11" +wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11" +zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10" diff --git a/requirements/requirements-mujoco.txt b/requirements/requirements-mujoco.txt index e14987e34..ac8aa1a23 100644 --- a/requirements/requirements-mujoco.txt +++ b/requirements/requirements-mujoco.txt @@ -1,76 +1,79 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -glfw==1.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -mujoco==2.3.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyopengl==3.1.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10" +absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11" +appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11" +certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11" +charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +click==8.1.3 ; python_version >= "3.8" and python_version < "3.11" +cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11" +colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11" +commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11" +cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11" +decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11" +docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11" +docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11" +farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11" +fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11" +gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11" +gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11" +glfw==1.12.0 ; python_version >= "3.8" and python_version < "3.11" +google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11" +google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11" +grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11" +gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11" +gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11" +gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11" +huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11" +idna==3.4 ; python_version >= "3.8" and python_version < "3.11" +imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11" +importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10" +jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11" +markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11" +matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11" +moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11" +mujoco==2.3.3 ; python_version >= "3.8" and python_version < "3.11" +numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11" +oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" +packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" +pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" +pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" +pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" +proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" +protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" +psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11" +pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11" +pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11" +pyopengl==3.1.6 ; python_version >= "3.8" and python_version < "3.11" +pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11" +pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11" +pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11" +requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11" +requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11" +rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11" +rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11" +sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11" +setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11" +setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11" +shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" +six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" +smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" +stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" +torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" +tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" +typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11" +tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11" +urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11" +wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11" +werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11" +wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11" +zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10" diff --git a/requirements/requirements-mujoco_py.txt b/requirements/requirements-mujoco_py.txt deleted file mode 100644 index 60b2edbef..000000000 --- a/requirements/requirements-mujoco_py.txt +++ /dev/null @@ -1,80 +0,0 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -cffi==1.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -cython==0.29.34 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -fasteners==0.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -free-mujoco-py==2.1.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -glfw==1.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -monotonic==1.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pycparser==2.21 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10" diff --git a/requirements/requirements-optuna.txt b/requirements/requirements-optuna.txt index 5e0840344..3b9f3a2e4 100644 --- a/requirements/requirements-optuna.txt +++ b/requirements/requirements-optuna.txt @@ -1,87 +1,90 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -alembic==1.10.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -bottle==0.12.25 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cmaes==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorlog==6.7.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -greenlet==2.0.2 ; python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "aarch64" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "ppc64le" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "x86_64" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "amd64" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "AMD64" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "win32" or python_full_version >= "3.7.1" and python_version < "3.11" and platform_machine == "WIN32" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10" -importlib-resources==5.12.0 ; python_full_version >= "3.7.1" and python_version < "3.9" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -joblib==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -mako==1.2.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -optuna-dashboard==0.7.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -optuna==3.1.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -scikit-learn==1.0.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -scipy==1.7.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -sqlalchemy==2.0.13 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -threadpoolctl==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10" +absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11" +alembic==1.10.4 ; python_version >= "3.8" and python_version < "3.11" +appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +bottle==0.12.25 ; python_version >= "3.8" and python_version < "3.11" +cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11" +certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11" +charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +click==8.1.3 ; python_version >= "3.8" and python_version < "3.11" +cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11" +cmaes==0.10.0 ; python_version >= "3.8" and python_version < "3.11" +colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11" +colorlog==6.7.0 ; python_version >= "3.8" and python_version < "3.11" +commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11" +cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11" +decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11" +docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11" +docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11" +farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11" +fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11" +gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11" +gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11" +google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11" +google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11" +greenlet==2.0.2 ; python_version >= "3.8" and python_version < "3.11" and platform_machine == "aarch64" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "ppc64le" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "x86_64" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "amd64" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "AMD64" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "win32" or python_version >= "3.8" and python_version < "3.11" and platform_machine == "WIN32" +grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11" +gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11" +gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11" +gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11" +huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11" +idna==3.4 ; python_version >= "3.8" and python_version < "3.11" +imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11" +importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10" +importlib-resources==5.12.0 ; python_version >= "3.8" and python_version < "3.9" +jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +joblib==1.2.0 ; python_version >= "3.8" and python_version < "3.11" +kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +mako==1.2.4 ; python_version >= "3.8" and python_version < "3.11" +markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11" +markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11" +matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11" +moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11" +numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11" +oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" +optuna-dashboard==0.7.3 ; python_version >= "3.8" and python_version < "3.11" +optuna==3.3.0 ; python_version >= "3.8" and python_version < "3.11" +packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" +pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" +pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" +pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" +proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" +protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" +psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11" +pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11" +pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11" +pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11" +pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11" +pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11" +requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11" +requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11" +rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11" +rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11" +scikit-learn==1.0.2 ; python_version >= "3.8" and python_version < "3.11" +scipy==1.10.1 ; python_version >= "3.8" and python_version < "3.11" +sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11" +setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11" +setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11" +shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" +six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" +smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" +sqlalchemy==2.0.13 ; python_version >= "3.8" and python_version < "3.11" +stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" +threadpoolctl==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" +tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" +typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11" +tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11" +urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11" +wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11" +werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11" +wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11" +zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10" diff --git a/requirements/requirements-pettingzoo.txt b/requirements/requirements-pettingzoo.txt index 1127adcd2..461c6023f 100644 --- a/requirements/requirements-pettingzoo.txt +++ b/requirements/requirements-pettingzoo.txt @@ -1,77 +1,80 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -multi-agent-ale-py==0.1.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pettingzoo==1.18.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -supersuit==3.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -tinyscaler==1.2.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10" +absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11" +appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11" +certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11" +charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +click==8.1.3 ; python_version >= "3.8" and python_version < "3.11" +cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11" +colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11" +commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11" +cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11" +decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11" +docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11" +docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11" +farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11" +fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11" +gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11" +gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11" +google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11" +google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11" +grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11" +gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11" +gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11" +gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11" +huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11" +idna==3.4 ; python_version >= "3.8" and python_version < "3.11" +imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11" +importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10" +jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11" +markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11" +matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11" +moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11" +multi-agent-ale-py==0.1.11 ; python_version >= "3.8" and python_version < "3.11" +numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11" +oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" +packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" +pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" +pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" +pettingzoo==1.18.1 ; python_version >= "3.8" and python_version < "3.11" +pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" +proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" +protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" +psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11" +pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11" +pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11" +pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11" +pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11" +pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11" +requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11" +requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11" +rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11" +rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11" +sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11" +setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11" +setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11" +shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" +six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" +smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" +stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +supersuit==3.4.0 ; python_version >= "3.8" and python_version < "3.11" +tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" +tinyscaler==1.2.5 ; python_version >= "3.8" and python_version < "3.11" +torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" +tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" +typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11" +tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11" +urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11" +wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11" +werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11" +wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11" +zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10" diff --git a/requirements/requirements-procgen.txt b/requirements/requirements-procgen.txt index da2a0dc91..f62fa2026 100644 --- a/requirements/requirements-procgen.txt +++ b/requirements/requirements-procgen.txt @@ -1,80 +1,83 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -cffi==1.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -glcontext==2.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -glfw==1.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym3==0.3.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -moderngl==5.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -procgen==0.10.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pycparser==2.21 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10" +absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11" +appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11" +certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11" +cffi==1.15.1 ; python_version >= "3.8" and python_version < "3.11" +charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +click==8.1.3 ; python_version >= "3.8" and python_version < "3.11" +cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11" +colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11" +commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11" +cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11" +decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11" +docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11" +docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11" +farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11" +fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11" +gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11" +gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11" +glcontext==2.3.7 ; python_version >= "3.8" and python_version < "3.11" +glfw==1.12.0 ; python_version >= "3.8" and python_version < "3.11" +google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11" +google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11" +grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11" +gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11" +gym3==0.3.3 ; python_version >= "3.8" and python_version < "3.11" +gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11" +gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11" +huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11" +idna==3.4 ; python_version >= "3.8" and python_version < "3.11" +imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11" +importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10" +jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11" +markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11" +matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11" +moderngl==5.8.2 ; python_version >= "3.8" and python_version < "3.11" +moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11" +numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11" +oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" +packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" +pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" +pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" +pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" +procgen==0.10.7 ; python_version >= "3.8" and python_version < "3.11" +proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" +protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" +psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11" +pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11" +pycparser==2.21 ; python_version >= "3.8" and python_version < "3.11" +pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11" +pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11" +pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11" +pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11" +requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11" +requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11" +rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11" +rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11" +sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11" +setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11" +setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11" +shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" +six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" +smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" +stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" +torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" +tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" +typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11" +tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11" +urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11" +wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11" +werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11" +wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11" +zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10" diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 94b0b02bd..5cdc73d10 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,73 +1,76 @@ -absl-py==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -appdirs==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -cachetools==5.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2023.5.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==3.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -cloudpickle==2.2.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -commonmark==0.9.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -cycler==0.11.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -decorator==4.4.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -docker-pycreds==0.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -farama-notifications==0.0.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -filelock==3.12.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -fonttools==4.38.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitdb==4.0.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -gitpython==3.1.31 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==2.18.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.54.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym-notices==0.0.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -gym==0.23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -gymnasium==0.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -huggingface-hub==0.11.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio-ffmpeg==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -imageio==2.28.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==5.2.0 ; python_full_version >= "3.7.1" and python_version < "3.10" -jax-jumpy==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -kiwisolver==1.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -markdown==3.3.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -matplotlib==3.5.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -moviepy==1.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==23.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pathtools==0.1.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pillow==9.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -proglog==0.1.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.20.3 ; python_version < "3.11" and python_full_version >= "3.7.1" -psutil==5.9.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygame==2.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pygments==2.15.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2023.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==5.4.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.30.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rich==11.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -sentry-sdk==1.22.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setproctitle==1.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==67.7.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -smmap==5.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -stable-baselines3==1.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tenacity==8.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-data-server==0.6.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard-plugin-wit==1.8.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tensorboard==2.11.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -torch==1.12.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.65.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.5.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -urllib3==1.26.15 ; python_full_version >= "3.7.1" and python_version < "3.11" -wandb==0.13.11 ; python_full_version >= "3.7.1" and python_version < "3.11" -werkzeug==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -wheel==0.40.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -zipp==3.15.0 ; python_full_version >= "3.7.1" and python_version < "3.10" +absl-py==1.4.0 ; python_version >= "3.8" and python_version < "3.11" +appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +cachetools==5.3.0 ; python_version >= "3.8" and python_version < "3.11" +certifi==2023.5.7 ; python_version >= "3.8" and python_version < "3.11" +charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "3.11" +click==8.1.3 ; python_version >= "3.8" and python_version < "3.11" +cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "3.11" +colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.11" +commonmark==0.9.1 ; python_version >= "3.8" and python_version < "3.11" +cycler==0.11.0 ; python_version >= "3.8" and python_version < "3.11" +decorator==4.4.2 ; python_version >= "3.8" and python_version < "3.11" +docker-pycreds==0.4.0 ; python_version >= "3.8" and python_version < "3.11" +docstring-parser==0.15 ; python_version >= "3.8" and python_version < "3.11" +farama-notifications==0.0.4 ; python_version >= "3.8" and python_version < "3.11" +filelock==3.12.0 ; python_version >= "3.8" and python_version < "3.11" +fonttools==4.38.0 ; python_version >= "3.8" and python_version < "3.11" +gitdb==4.0.10 ; python_version >= "3.8" and python_version < "3.11" +gitpython==3.1.31 ; python_version >= "3.8" and python_version < "3.11" +google-auth-oauthlib==0.4.6 ; python_version >= "3.8" and python_version < "3.11" +google-auth==2.18.0 ; python_version >= "3.8" and python_version < "3.11" +grpcio==1.54.0 ; python_version >= "3.8" and python_version < "3.11" +gym-notices==0.0.8 ; python_version >= "3.8" and python_version < "3.11" +gym==0.23.1 ; python_version >= "3.8" and python_version < "3.11" +gymnasium==0.28.1 ; python_version >= "3.8" and python_version < "3.11" +huggingface-hub==0.11.1 ; python_version >= "3.8" and python_version < "3.11" +idna==3.4 ; python_version >= "3.8" and python_version < "3.11" +imageio-ffmpeg==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +imageio==2.28.1 ; python_version >= "3.8" and python_version < "3.11" +importlib-metadata==5.2.0 ; python_version >= "3.8" and python_version < "3.10" +jax-jumpy==1.0.0 ; python_version >= "3.8" and python_version < "3.11" +kiwisolver==1.4.4 ; python_version >= "3.8" and python_version < "3.11" +markdown==3.3.7 ; python_version >= "3.8" and python_version < "3.11" +markupsafe==2.1.2 ; python_version >= "3.8" and python_version < "3.11" +matplotlib==3.5.3 ; python_version >= "3.8" and python_version < "3.11" +moviepy==1.0.3 ; python_version >= "3.8" and python_version < "3.11" +numpy==1.24.4 ; python_version >= "3.8" and python_version < "3.11" +oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" +packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" +pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" +pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" +pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" +proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" +protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" +psutil==5.9.5 ; python_version >= "3.8" and python_version < "3.11" +pyasn1-modules==0.3.0 ; python_version >= "3.8" and python_version < "3.11" +pyasn1==0.5.0 ; python_version >= "3.8" and python_version < "3.11" +pygame==2.1.0 ; python_version >= "3.8" and python_version < "3.11" +pygments==2.15.1 ; python_version >= "3.8" and python_version < "3.11" +pyparsing==3.0.9 ; python_version >= "3.8" and python_version < "3.11" +python-dateutil==2.8.2 ; python_version >= "3.8" and python_version < "3.11" +pytz==2023.3 ; python_version >= "3.8" and python_version < "3.11" +pyyaml==6.0.1 ; python_version >= "3.8" and python_version < "3.11" +requests-oauthlib==1.3.1 ; python_version >= "3.8" and python_version < "3.11" +requests==2.30.0 ; python_version >= "3.8" and python_version < "3.11" +rich==11.2.0 ; python_version >= "3.8" and python_version < "3.11" +rsa==4.7.2 ; python_version >= "3.8" and python_version < "3.11" +sentry-sdk==1.22.2 ; python_version >= "3.8" and python_version < "3.11" +setproctitle==1.3.2 ; python_version >= "3.8" and python_version < "3.11" +setuptools==67.7.2 ; python_version >= "3.8" and python_version < "3.11" +shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" +six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" +smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" +stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" +tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" +tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" +torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" +tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" +typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11" +tyro==0.5.10 ; python_version >= "3.8" and python_version < "3.11" +urllib3==1.26.15 ; python_version >= "3.8" and python_version < "3.11" +wandb==0.13.11 ; python_version >= "3.8" and python_version < "3.11" +werkzeug==2.2.3 ; python_version >= "3.8" and python_version < "3.11" +wheel==0.40.0 ; python_version >= "3.8" and python_version < "3.11" +zipp==3.15.0 ; python_version >= "3.8" and python_version < "3.10" diff --git a/tests/test_atari_gymnasium.py b/tests/test_atari_gymnasium.py index e3eb6f966..06f95a838 100644 --- a/tests/test_atari_gymnasium.py +++ b/tests/test_atari_gymnasium.py @@ -11,7 +11,7 @@ def test_dqn(): def test_dqn_eval(): subprocess.run( - "python cleanrl/dqn_atari.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4", + "python cleanrl/dqn_atari.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4", shell=True, check=True, ) @@ -27,7 +27,7 @@ def test_qdagger_dqn_atari_impalacnn(): def test_qdagger_dqn_atari_impalacnn_eval(): subprocess.run( - "python cleanrl/qdagger_dqn_atari_impalacnn.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4 --teacher-steps 16 --offline-steps 16 --teacher-eval-episodes 1", + "python cleanrl/qdagger_dqn_atari_impalacnn.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4 --teacher-steps 16 --offline-steps 16 --teacher-eval-episodes 1", shell=True, check=True, ) @@ -43,7 +43,7 @@ def test_c51_atari(): def test_c51_atari_eval(): subprocess.run( - "python cleanrl/c51_atari.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4", + "python cleanrl/c51_atari.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4", shell=True, check=True, ) diff --git a/tests/test_atari_jax_gymnasium.py b/tests/test_atari_jax_gymnasium.py index a9e91a781..aa51ac0cb 100644 --- a/tests/test_atari_jax_gymnasium.py +++ b/tests/test_atari_jax_gymnasium.py @@ -11,7 +11,7 @@ def test_dqn_jax(): def test_dqn_jax_eval(): subprocess.run( - "python cleanrl/dqn_atari_jax.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4", + "python cleanrl/dqn_atari_jax.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4", shell=True, check=True, ) @@ -27,7 +27,7 @@ def test_qdagger_dqn_atari_jax_impalacnn(): def test_qdagger_dqn_atari_jax_impalacnn_eval(): subprocess.run( - "python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4 --teacher-steps 16 --offline-steps 16 --teacher-eval-episodes 1", + "python cleanrl/qdagger_dqn_atari_jax_impalacnn.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4 --teacher-steps 16 --offline-steps 16 --teacher-eval-episodes 1", shell=True, check=True, ) @@ -43,7 +43,7 @@ def test_c51_atari_jax(): def test_c51_atari_jax_eval(): subprocess.run( - "python cleanrl/c51_atari_jax.py --save-model True --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4", + "python cleanrl/c51_atari_jax.py --save-model --learning-starts 10 --total-timesteps 16 --buffer-size 10 --batch-size 4", shell=True, check=True, ) diff --git a/tests/test_classic_control_gymnasium.py b/tests/test_classic_control_gymnasium.py index 438143af7..ff639ea0d 100644 --- a/tests/test_classic_control_gymnasium.py +++ b/tests/test_classic_control_gymnasium.py @@ -19,7 +19,7 @@ def test_c51(): def test_c51_eval(): subprocess.run( - "python cleanrl/c51.py --save-model True --learning-starts 200 --total-timesteps 205", + "python cleanrl/c51.py --save-model --learning-starts 200 --total-timesteps 205", shell=True, check=True, ) diff --git a/tests/test_classic_control_jax_gymnasium.py b/tests/test_classic_control_jax_gymnasium.py index e413e3588..638bb2215 100644 --- a/tests/test_classic_control_jax_gymnasium.py +++ b/tests/test_classic_control_jax_gymnasium.py @@ -19,7 +19,7 @@ def test_c51_jax(): def test_c51_jax_eval(): subprocess.run( - "python cleanrl/c51_jax.py --save-model True --learning-starts 200 --total-timesteps 205", + "python cleanrl/c51_jax.py --save-model --learning-starts 200 --total-timesteps 205", shell=True, check=True, ) diff --git a/tests/test_envpool.py b/tests/test_envpool.py index d16325ea0..cbf90e230 100644 --- a/tests/test_envpool.py +++ b/tests/test_envpool.py @@ -35,7 +35,7 @@ def test_ppo_atari_envpool_xla_jax_scan(): def test_ppo_atari_envpool_xla_jax_scan_eval(): subprocess.run( - "python cleanrl/ppo_atari_envpool_xla_jax_scan.py --save-model True --num-envs 8 --num-steps 6 --update-epochs 1 --num-minibatches 1 --total-timesteps 256", + "python cleanrl/ppo_atari_envpool_xla_jax_scan.py --save-model --num-envs 8 --num-steps 6 --update-epochs 1 --num-minibatches 1 --total-timesteps 256", shell=True, check=True, ) diff --git a/tests/test_mujoco.py b/tests/test_mujoco.py index bf0b5204b..77e91540f 100644 --- a/tests/test_mujoco.py +++ b/tests/test_mujoco.py @@ -57,12 +57,12 @@ def test_mujoco_eval(): Test mujoco_eval """ subprocess.run( - "python cleanrl/ddpg_continuous_action.py --save-model True --env-id Hopper-v4 --learning-starts 100 --batch-size 32 --total-timesteps 105", + "python cleanrl/ddpg_continuous_action.py --save-model --env-id Hopper-v4 --learning-starts 100 --batch-size 32 --total-timesteps 105", shell=True, check=True, ) subprocess.run( - "python cleanrl/ddpg_continuous_action_jax.py --save-model True --env-id Hopper-v4 --learning-starts 100 --batch-size 32 --total-timesteps 105", + "python cleanrl/ddpg_continuous_action_jax.py --save-model --env-id Hopper-v4 --learning-starts 100 --batch-size 32 --total-timesteps 105", shell=True, check=True, ) diff --git a/tests/test_mujoco_py.py b/tests/test_mujoco_py.py deleted file mode 100644 index f97654f8f..000000000 --- a/tests/test_mujoco_py.py +++ /dev/null @@ -1,53 +0,0 @@ -import subprocess - - -def test_mujoco_py(): - """ - Test mujoco_py - """ - subprocess.run( - "python cleanrl/ddpg_continuous_action.py --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105", - shell=True, - check=True, - ) - subprocess.run( - "python cleanrl/ddpg_continuous_action_jax.py --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105", - shell=True, - check=True, - ) - subprocess.run( - "python cleanrl/td3_continuous_action_jax.py --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105", - shell=True, - check=True, - ) - subprocess.run( - "python cleanrl/td3_continuous_action.py --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105", - shell=True, - check=True, - ) - subprocess.run( - "python cleanrl/ppo_continuous_action.py --env-id Hopper-v2 --num-envs 1 --num-steps 64 --total-timesteps 256", - shell=True, - check=True, - ) - subprocess.run( - "python cleanrl/sac_continuous_action.py --env-id Hopper-v2 --batch-size 128 --total-timesteps 135", - shell=True, - check=True, - ) - - -def test_mujoco_py_eval(): - """ - Test mujoco_py_eval - """ - subprocess.run( - "python cleanrl/ddpg_continuous_action.py --save-model True --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105", - shell=True, - check=True, - ) - subprocess.run( - "python cleanrl/ddpg_continuous_action_jax.py --save-model True --env-id Hopper-v2 --learning-starts 100 --batch-size 32 --total-timesteps 105", - shell=True, - check=True, - ) diff --git a/tests/test_tuner.py b/tests/test_tuner.py index c4c0d1d32..d6f1933e0 100644 --- a/tests/test_tuner.py +++ b/tests/test_tuner.py @@ -14,12 +14,12 @@ def test_tuner(): "Acrobot-v1": [-500, 0], }, params_fn=lambda trial: { - "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003), + "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True), "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]), "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4]), "num-steps": trial.suggest_categorical("num-steps", [1200]), - "vf-coef": trial.suggest_uniform("vf-coef", 0, 5), - "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5), + "vf-coef": trial.suggest_float("vf-coef", 0, 5), + "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5), "total-timesteps": 1200, "num-envs": 1, }, diff --git a/tuner_example.py b/tuner_example.py index 9e01a6048..5db4b2f9e 100644 --- a/tuner_example.py +++ b/tuner_example.py @@ -13,12 +13,12 @@ "Acrobot-v1": [-500, 0], }, params_fn=lambda trial: { - "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003), + "learning-rate": trial.suggest_float("learning-rate", 0.0003, 0.003, log=True), "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]), "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]), "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]), - "vf-coef": trial.suggest_uniform("vf-coef", 0, 5), - "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5), + "vf-coef": trial.suggest_float("vf-coef", 0, 5), + "max-grad-norm": trial.suggest_float("max-grad-norm", 0, 5), "total-timesteps": 100000, "num-envs": 16, },