diff --git a/cleanrl/ppo_pettingzoo_ma_atari.py b/cleanrl/ppo_pettingzoo_ma_atari.py index 87b2b3123..d92ce6c58 100644 --- a/cleanrl/ppo_pettingzoo_ma_atari.py +++ b/cleanrl/ppo_pettingzoo_ma_atari.py @@ -1,81 +1,83 @@ # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_pettingzoo_ma_ataripy -import argparse import importlib import os import random import time -from distutils.util import strtobool +from dataclasses import dataclass -import gym +import gymnasium as gym import numpy as np import supersuit as ss import torch import torch.nn as nn import torch.optim as optim +import tyro from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"), - help="the name of this experiment") - parser.add_argument("--seed", type=int, default=1, - help="seed of the experiment") - parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, `torch.backends.cudnn.deterministic=False`") - parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="if toggled, cuda will be enabled by default") - parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="if toggled, this experiment will be tracked with Weights and Biases") - parser.add_argument("--wandb-project-name", type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument("--wandb-entity", type=str, default=None, - help="the entity (team) of wandb's project") - parser.add_argument("--capture_video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True, - help="whether to capture videos of the agent performances (check out `videos` folder)") +@dataclass +class Args: + exp_name: str = os.path.basename(__file__)[: -len(".py")] + """the name of this experiment""" + seed: int = 1 + """seed of the experiment""" + torch_deterministic: bool = True + """if toggled, `torch.backends.cudnn.deterministic=False`""" + cuda: bool = True + """if toggled, cuda will be enabled by default""" + track: bool = False + """if toggled, this experiment will be tracked with Weights and Biases""" + wandb_project_name: str = "cleanRL" + """the wandb's project name""" + wandb_entity: str = None + """the entity (team) of wandb's project""" + capture_video: bool = False + """whether to capture videos of the agent performances (check out `videos` folder)""" # Algorithm specific arguments - parser.add_argument("--env-id", type=str, default="pong_v3", - help="the id of the environment") - parser.add_argument("--total-timesteps", type=int, default=20000000, - help="total timesteps of the experiments") - parser.add_argument("--learning-rate", type=float, default=2.5e-4, - help="the learning rate of the optimizer") - parser.add_argument("--num-envs", type=int, default=16, - help="the number of parallel game environments") - parser.add_argument("--num-steps", type=int, default=128, - help="the number of steps to run in each environment per policy rollout") - parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggle learning rate annealing for policy and value networks") - parser.add_argument("--gamma", type=float, default=0.99, - help="the discount factor gamma") - parser.add_argument("--gae-lambda", type=float, default=0.95, - help="the lambda for the general advantage estimation") - parser.add_argument("--num-minibatches", type=int, default=4, - help="the number of mini-batches") - parser.add_argument("--update-epochs", type=int, default=4, - help="the K epochs to update the policy") - parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles advantages normalization") - parser.add_argument("--clip-coef", type=float, default=0.1, - help="the surrogate clipping coefficient") - parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True, - help="Toggles whether or not to use a clipped loss for the value function, as per the paper.") - parser.add_argument("--ent-coef", type=float, default=0.01, - help="coefficient of the entropy") - parser.add_argument("--vf-coef", type=float, default=0.5, - help="coefficient of the value function") - parser.add_argument("--max-grad-norm", type=float, default=0.5, - help="the maximum norm for the gradient clipping") - parser.add_argument("--target-kl", type=float, default=None, - help="the target KL divergence threshold") - args = parser.parse_args() - args.batch_size = int(args.num_envs * args.num_steps) - args.minibatch_size = int(args.batch_size // args.num_minibatches) - # fmt: on - return args + env_id: str = "pong_v3" + """the id of the environment""" + total_timesteps: int = 20000000 + """total timesteps of the experiments""" + learning_rate: float = 2.5e-4 + """the learning rate of the optimizer""" + num_envs: int = 16 + """the number of parallel game environments""" + num_steps: int = 128 + """the number of steps to run in each environment per policy rollout""" + anneal_lr: bool = True + """Toggle learning rate annealing for policy and value networks""" + gamma: float = 0.99 + """the discount factor gamma""" + gae_lambda: float = 0.95 + """the lambda for the general advantage estimation""" + num_minibatches: int = 4 + """the number of mini-batches""" + update_epochs: int = 4 + """the K epochs to update the policy""" + norm_adv: bool = True + """Toggles advantages normalization""" + clip_coef: float = 0.1 + """the surrogate clipping coefficient""" + clip_vloss: bool = True + """Toggles whether or not to use a clipped loss for the value function, as per the paper.""" + ent_coef: float = 0.01 + """coefficient of the entropy""" + vf_coef: float = 0.5 + """coefficient of the value function""" + max_grad_norm: float = 0.5 + """the maximum norm for the gradient clipping""" + target_kl: float = None + """the target KL divergence threshold""" + + # to be filled in runtime + batch_size: int = 0 + """the batch size (computed in runtime)""" + minibatch_size: int = 0 + """the mini-batch size (computed in runtime)""" + num_iterations: int = 0 + """the number of iterations (computed in runtime)""" def layer_init(layer, std=np.sqrt(2), bias_const=0.0): @@ -118,7 +120,10 @@ def get_action_and_value(self, x, action=None): if __name__ == "__main__": - args = parse_args() + args = tyro.cli(Args) + args.batch_size = int(args.num_envs * args.num_steps) + args.minibatch_size = int(args.batch_size // args.num_minibatches) + args.num_iterations = args.total_timesteps // args.batch_size run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}" if args.track: import wandb @@ -156,11 +161,10 @@ def get_action_and_value(self, x, action=None): env = ss.frame_stack_v1(env, 4) env = ss.agent_indicator_v0(env, type_only=False) env = ss.pettingzoo_env_to_vec_env_v1(env) - envs = ss.concat_vec_envs_v1(env, args.num_envs // 2, num_cpus=0, base_class="gym") + envs = ss.concat_vec_envs_v1(env, args.num_envs // 2, num_cpus=0, base_class="gymnasium") envs.single_observation_space = envs.observation_space envs.single_action_space = envs.action_space envs.is_vector_env = True - envs = gym.wrappers.RecordEpisodeStatistics(envs) if args.capture_video: envs = gym.wrappers.RecordVideo(envs, f"videos/{run_name}") assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported" @@ -173,27 +177,31 @@ def get_action_and_value(self, x, action=None): actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device) logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) - dones = torch.zeros((args.num_steps, args.num_envs)).to(device) + terminations = torch.zeros((args.num_steps, args.num_envs)).to(device) + truncations = torch.zeros((args.num_steps, args.num_envs)).to(device) values = torch.zeros((args.num_steps, args.num_envs)).to(device) # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) + next_obs, info = envs.reset(seed=args.seed) + next_obs = torch.Tensor(next_obs).to(device) + next_termination = torch.zeros(args.num_envs).to(device) + next_truncation = torch.zeros(args.num_envs).to(device) num_updates = args.total_timesteps // args.batch_size - for update in range(1, num_updates + 1): + for iteration in range(1, args.num_iterations + 1): # Annealing the rate if instructed to do so. if args.anneal_lr: - frac = 1.0 - (update - 1.0) / num_updates + frac = 1.0 - (iteration - 1.0) / args.num_iterations lrnow = frac * args.learning_rate optimizer.param_groups[0]["lr"] = lrnow for step in range(0, args.num_steps): - global_step += 1 * args.num_envs + global_step += args.num_envs obs[step] = next_obs - dones[step] = next_done + terminations[step] = next_termination + truncations[step] = next_truncation # ALGO LOGIC: action logic with torch.no_grad(): @@ -203,10 +211,15 @@ def get_action_and_value(self, x, action=None): logprobs[step] = logprob # TRY NOT TO MODIFY: execute the game and log data. - next_obs, reward, done, info = envs.step(action.cpu().numpy()) + next_obs, reward, termination, truncation, info = envs.step(action.cpu().numpy()) rewards[step] = torch.tensor(reward).to(device).view(-1) - next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device) + next_obs, next_termination, next_truncation = ( + torch.Tensor(next_obs).to(device), + torch.Tensor(termination).to(device), + torch.Tensor(truncation).to(device), + ) + # TODO: fix this for idx, item in enumerate(info): player_idx = idx % 2 if "episode" in item.keys(): @@ -219,6 +232,8 @@ def get_action_and_value(self, x, action=None): next_value = agent.get_value(next_obs).reshape(1, -1) advantages = torch.zeros_like(rewards).to(device) lastgaelam = 0 + next_done = np.logical_or(next_termination, next_truncation) + dones = np.logical_or(terminations, truncations) for t in reversed(range(args.num_steps)): if t == args.num_steps - 1: nextnonterminal = 1.0 - next_done @@ -289,9 +304,8 @@ def get_action_and_value(self, x, action=None): nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm) optimizer.step() - if args.target_kl is not None: - if approx_kl > args.target_kl: - break + if args.target_kl is not None and approx_kl > args.target_kl: + break y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() var_y = np.var(y_true) diff --git a/docs/rl-algorithms/ppo.md b/docs/rl-algorithms/ppo.md index e83b38e63..c2cc19181 100644 --- a/docs/rl-algorithms/ppo.md +++ b/docs/rl-algorithms/ppo.md @@ -1029,7 +1029,7 @@ Tracked experiments and game play videos: ## `ppo_pettingzoo_ma_atari.py` -[ppo_pettingzoo_ma_atari.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_pettingzoo_ma_atari.py) trains an agent to learn playing Atari games via selfplay. The selfplay environment is implemented as a vectorized environment from [PettingZoo.ml](https://www.pettingzoo.ml/atari). The basic idea is to create vectorized environment $E$ with `num_envs = N`, where $N$ is the number of players in the game. Say $N = 2$, then the 0-th sub environment of $E$ will return the observation for player 0 and 1-th sub environment will return the observation of player 1. Then the two environments takes a batch of 2 actions and execute them for player 0 and player 1, respectively. See "Vectorized architecture" in [The 37 Implementation Details of Proximal Policy Optimization](https://iclr-blog-track.github.io/2022/03/25/ppo-implementation-details/) for more detail. +[ppo_pettingzoo_ma_atari.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_pettingzoo_ma_atari.py) trains an agent to learn playing Atari games via selfplay. The selfplay environment is implemented as a vectorized environment from [PettingZoo](https://pettingzoo.farama.org/environments/atari/). The basic idea is to create vectorized environment $E$ with `num_envs = N`, where $N$ is the number of players in the game. Say $N = 2$, then the 0-th sub environment of $E$ will return the observation for player 0 and 1-th sub environment will return the observation of player 1. Then the two environments takes a batch of 2 actions and execute them for player 0 and player 1, respectively. See "Vectorized architecture" in [The 37 Implementation Details of Proximal Policy Optimization](https://iclr-blog-track.github.io/2022/03/25/ppo-implementation-details/) for more detail. `ppo_pettingzoo_ma_atari.py` has the following features: @@ -1064,7 +1064,7 @@ Tracked experiments and game play videos: python cleanrl/ppo_pettingzoo_ma_atari.py --env-id surround_v2 ``` -See [https://www.pettingzoo.ml/atari](https://www.pettingzoo.ml/atari) for a full-list of supported environments such as `basketball_pong_v3`. Notice pettingzoo sometimes introduces breaking changes, so make sure to install the pinned dependencies via `poetry`. +See [https://pettingzoo.farama.org/environments/atari/](https://pettingzoo.farama.org/environments/atari/) for a full-list of supported environments such as `basketball_pong_v3`. Notice pettingzoo sometimes introduces breaking changes, so make sure to install the pinned dependencies via `poetry`. ### Explanation of the logged metrics diff --git a/poetry.lock b/poetry.lock index f30baf28c..4cc2106a0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "absl-py" @@ -1707,6 +1707,11 @@ files = [ {file = "labmaze-1.0.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70635d1cdb0147a02efb6b3f607a52cdc51723bc3dcc42717a0d4ef55fa0a987"}, {file = "labmaze-1.0.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff472793238bd9b6dabea8094594d6074ad3c111455de3afcae72f6c40c6817e"}, {file = "labmaze-1.0.6-cp311-cp311-win_amd64.whl", hash = "sha256:2317e65e12fa3d1abecda7e0488dab15456cee8a2e717a586bfc8f02a91579e7"}, + {file = "labmaze-1.0.6-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e36b6fadcd78f22057b597c1c77823e806a0987b3bdfbf850e14b6b5b502075e"}, + {file = "labmaze-1.0.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d1a4f8de29c2c3d7f14163759b69cd3f237093b85334c983619c1db5403a223b"}, + {file = "labmaze-1.0.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a394f8bb857fcaa2884b809d63e750841c2662a106cfe8c045f2112d201ac7d5"}, + {file = "labmaze-1.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d17abb69d4dfc56183afb5c317e8b2eaca0587abb3aabd2326efd3143c81f4e"}, + {file = "labmaze-1.0.6-cp312-cp312-win_amd64.whl", hash = "sha256:5af997598cc46b1929d1c5a1febc32fd56c75874fe481a2a5982c65cee8450c9"}, {file = "labmaze-1.0.6-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:a4c5bc6e56baa55ce63b97569afec2f80cab0f6b952752a131e1f83eed190a53"}, {file = "labmaze-1.0.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3955f24fe5f708e1e97495b4cfe284b70ae4fd51be5e17b75a6fc04ffbd67bca"}, {file = "labmaze-1.0.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed96ddc0bb8d66df36428c94db83949fd84a15867e8250763a4c5e3d82104c54"}, @@ -2645,29 +2650,28 @@ files = [ [[package]] name = "pettingzoo" -version = "1.18.1" -description = "Gym for multi-agent reinforcement learning" +version = "1.24.3" +description = "Gymnasium for multi-agent reinforcement learning." optional = true -python-versions = ">=3.7, <3.11" +python-versions = ">=3.8" files = [ - {file = "PettingZoo-1.18.1-py3-none-any.whl", hash = "sha256:25ae45fcfa2c623800e1f81b98ae50f5f5a1af6caabc5946764248de71a0371d"}, - {file = "PettingZoo-1.18.1.tar.gz", hash = "sha256:7e6a3231dc3fc3801af83fe880f199f570d46a9acdcb990f2a223f121b6e5038"}, + {file = "pettingzoo-1.24.3-py3-none-any.whl", hash = "sha256:23ed90517d2e8a7098bdaf5e31234b3a7f7b73ca578d70d1ca7b9d0cb0e37982"}, + {file = "pettingzoo-1.24.3.tar.gz", hash = "sha256:91f9094f18e06fb74b98f4099cd22e8ae4396125e51719d50b30c9f1c7ab07e6"}, ] [package.dependencies] -gym = ">=0.21.0" -numpy = ">=1.18.0" +gymnasium = ">=0.28.0" +numpy = ">=1.21.0" [package.extras] -all = ["box2d-py (==2.3.5)", "chess (==1.7.0)", "hanabi-learning-environment (==0.0.1)", "magent (==0.2.2)", "multi-agent-ale-py (==0.1.11)", "pillow (>=8.0.1)", "pygame (==2.1.0)", "pyglet (>=1.4.0)", "pymunk (==6.2.0)", "rlcard (==1.0.4)", "scipy (>=1.4.1)"] -atari = ["multi-agent-ale-py (==0.1.11)", "pygame (==2.1.0)"] -butterfly = ["pygame (==2.1.0)", "pymunk (==6.2.0)"] -classic = ["chess (==1.7.0)", "hanabi-learning-environment (==0.0.1)", "pygame (==2.1.0)", "rlcard (==1.0.4)"] -magent = ["magent (==0.2.2)"] -mpe = ["pyglet (>=1.4.0)"] +all = ["box2d-py (==2.3.5)", "chess (==1.9.4)", "multi-agent-ale-py (==0.1.11)", "pillow (>=8.0.1)", "pygame (==2.3.0)", "pymunk (==6.2.0)", "rlcard (==1.0.5)", "scipy (>=1.4.1)", "shimmy[openspiel] (>=1.2.0)"] +atari = ["multi-agent-ale-py (==0.1.11)", "pygame (==2.3.0)"] +butterfly = ["pygame (==2.3.0)", "pymunk (==6.2.0)"] +classic = ["chess (==1.9.4)", "pygame (==2.3.0)", "rlcard (==1.0.5)", "shimmy[openspiel] (>=1.2.0)"] +mpe = ["pygame (==2.3.0)"] other = ["pillow (>=8.0.1)"] -sisl = ["box2d-py (==2.3.5)", "pygame (==2.1.0)", "scipy (>=1.4.1)"] -tests = ["codespell", "flake8", "isort", "pynput", "pytest"] +sisl = ["box2d-py (==2.3.5)", "pygame (==2.3.0)", "pymunk (==6.2.0)", "scipy (>=1.4.1)"] +testing = ["AutoROM", "pre-commit", "pynput", "pytest", "pytest-cov", "pytest-markdown-docs", "pytest-xdist"] [[package]] name = "pillow" @@ -3707,19 +3711,22 @@ tests = ["black", "isort (>=5.0)", "mypy", "pytest", "pytest-cov", "pytest-env", [[package]] name = "supersuit" -version = "3.4.0" -description = "Wrappers for Gym and PettingZoo" +version = "3.9.2" +description = "Wrappers for Gymnasium and PettingZoo" optional = true -python-versions = ">=3.7" +python-versions = "<3.12,>=3.8" files = [ - {file = "SuperSuit-3.4.0-py3-none-any.whl", hash = "sha256:45b541b2b29faffd6494b53d649c8d94889966f407fd380b3e3211f9e68a49e9"}, - {file = "SuperSuit-3.4.0.tar.gz", hash = "sha256:5999beec8d7923c11c9511eaa9dec8a38269cb0d7af029e17903c79234233409"}, + {file = "SuperSuit-3.9.2-py3-none-any.whl", hash = "sha256:1dcecd419100eeed19c51444a341dd7ab14deaf3cd775ba475de4e63eba6159c"}, + {file = "SuperSuit-3.9.2.tar.gz", hash = "sha256:60e384fe63ab6752acbfc34f991f48d6346592b1dd3475138e3599ab41eaaf24"}, ] [package.dependencies] -gym = ">=0.22.0" -pettingzoo = ">=1.15.0" -tinyscaler = ">=1.0.4" +gymnasium = ">=0.28.1" +numpy = ">=1.19.0" +tinyscaler = ">=1.2.6" + +[package.extras] +testing = ["moviepy (>=1.0.0)", "pettingzoo[butterfly,classic] (>=1.23.1)", "pytest", "stable-baselines3 (>=2.0.0)"] [[package]] name = "tabulate" @@ -3837,20 +3844,31 @@ files = [ [[package]] name = "tinyscaler" -version = "1.2.5" -description = "A tiny, simple image scaler" +version = "1.2.7" +description = "A tiny, simple image scaler." optional = true -python-versions = ">=3.7, <3.11" -files = [ - {file = "tinyscaler-1.2.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f32d794fc2b9c5e4aa3b435d040f9e75b11f55ab41b32580f2c8e8dfb350f25"}, - {file = "tinyscaler-1.2.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4af0a9502e9ef118c84de80b09544407c8dbbe815af215b1abb8eb170271ab71"}, - {file = "tinyscaler-1.2.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0bde14fb15027d73f4cc5ac837e849feb1cbedbfc0a0c0928f11756f08f6626"}, - {file = "tinyscaler-1.2.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46c75799068330ff7c28fd01f10409d4f12c22f1adbe732f1699228449a4d712"}, - {file = "tinyscaler-1.2.5.tar.gz", hash = "sha256:deb47df1a53a55b53f0ae15b89b4814af184d149a8149385e54e11afc57364a5"}, +python-versions = "<3.12,>=3.7" +files = [ + {file = "tinyscaler-1.2.7-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:bbb98ced396d4829a41aa9c7c895df4bcb3801a3bbe963978c90d12b07110731"}, + {file = "tinyscaler-1.2.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d062e0e33f6104d625fff9b57aa53511c39d2dc3bb711686f6992a7fbfe41336"}, + {file = "tinyscaler-1.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:a96f008975d4d167102a2671fb54fb6ace6ff2580fede3b79daeca99a01e5d6e"}, + {file = "tinyscaler-1.2.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:bf63243a08e214e3db149435741b779db357c376636e17ddf153bf9f6ada041c"}, + {file = "tinyscaler-1.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d2d129f71c518d9c0c25f5e9f3a7f7a31af62e7e7e6f8750ddf0154ed76a58a"}, + {file = "tinyscaler-1.2.7-cp311-cp311-win_amd64.whl", hash = "sha256:c14d302cd609d8c8e53ddf15b3ab43fa3c975d648ffcf16276c8b131ab849f85"}, + {file = "tinyscaler-1.2.7-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:3ef723fbe119614dfdd8a7bd40d73c17defaac6765f60c44693858bd5cd70fbc"}, + {file = "tinyscaler-1.2.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6700a37bd42615944099994f2aa473be215e25d79803fcac9de849205c7b149"}, + {file = "tinyscaler-1.2.7-cp37-cp37m-win_amd64.whl", hash = "sha256:26d488778686392a0441e598df7ebc45ad014663e60384ef6170dd793f80d275"}, + {file = "tinyscaler-1.2.7-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:f80203589d883896c86fe94165967be453fbb0fe47c9bc64521aee15e125f202"}, + {file = "tinyscaler-1.2.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef4843aaa2647d7ae7a26ba66dbd1d1b31d161ca558f2c385bc6b02277d27fdb"}, + {file = "tinyscaler-1.2.7-cp38-cp38-win_amd64.whl", hash = "sha256:8e6b605ef00fc65a27f294742514f67d9b4c37d41bfe586e2609ab03a41f2e74"}, + {file = "tinyscaler-1.2.7-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:2847420c81064c8bd3397bdcd83e2706cd914cdb9cbde5300ed968c14954b9d3"}, + {file = "tinyscaler-1.2.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d86fe85fa37cfaedb521c9eb3a804b6ab202924be221049b784220c5ca49546"}, + {file = "tinyscaler-1.2.7-cp39-cp39-win_amd64.whl", hash = "sha256:ce1e10fc54d02bb49ea1f72f76d320c50739eb4ff3e6cbb82148b4f84272220b"}, + {file = "tinyscaler-1.2.7.tar.gz", hash = "sha256:1c0b34b41cca3ae9b09c20fee27499833345b9264617bdd23c896733676d82d8"}, ] [package.dependencies] -numpy = "*" +numpy = ">=1.21.0" [[package]] name = "tomli" @@ -4245,4 +4263,4 @@ qdagger-dqn-atari-jax-impalacnn = ["AutoROM", "ale-py", "flax", "jax", "jaxlib", [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.11" -content-hash = "ce1dd6a428e94e30643d2fb0a3fd13f0132d176185a91f7685392d4ec0e7892b" +content-hash = "aaa9d84a456774e5f9ecf02beade2b4f42d71980872f1f929305b05c57d73958" diff --git a/pyproject.toml b/pyproject.toml index 49c7fabca..e5b4282c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,8 +43,8 @@ flax = {version = "0.6.8", optional = true} optuna = {version = "^3.0.1", optional = true} optuna-dashboard = {version = "^0.7.2", optional = true} envpool = {version = "^0.6.4", optional = true} -PettingZoo = {version = "1.18.1", optional = true} -SuperSuit = {version = "3.4.0", optional = true} +PettingZoo = {version = "^1.24.3", optional = true} +SuperSuit = {version = ">=3.9.2", optional = true} multi-agent-ale-py = {version = "0.1.11", optional = true} boto3 = {version = "^1.24.70", optional = true} awscli = {version = "^1.31.0", optional = true} diff --git a/requirements/requirements-pettingzoo.txt b/requirements/requirements-pettingzoo.txt index 461c6023f..f997ba402 100644 --- a/requirements/requirements-pettingzoo.txt +++ b/requirements/requirements-pettingzoo.txt @@ -39,7 +39,7 @@ oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11" packaging==23.1 ; python_version >= "3.8" and python_version < "3.11" pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11" pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11" -pettingzoo==1.18.1 ; python_version >= "3.8" and python_version < "3.11" +pettingzoo==1.24.3 ; python_version >= "3.8" and python_version < "3.11" pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11" proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11" protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8" @@ -63,12 +63,12 @@ shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11" six==1.16.0 ; python_version >= "3.8" and python_version < "3.11" smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11" stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11" -supersuit==3.4.0 ; python_version >= "3.8" and python_version < "3.11" +supersuit==3.9.2 ; python_version >= "3.8" and python_version < "3.11" tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11" tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11" tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11" tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11" -tinyscaler==1.2.5 ; python_version >= "3.8" and python_version < "3.11" +tinyscaler==1.2.7 ; python_version >= "3.8" and python_version < "3.11" torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11" tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11" typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"