diff --git a/cleanrl/ppo_pettingzoo_ma_atari.py b/cleanrl/ppo_pettingzoo_ma_atari.py
index 87b2b3123..d92ce6c58 100644
--- a/cleanrl/ppo_pettingzoo_ma_atari.py
+++ b/cleanrl/ppo_pettingzoo_ma_atari.py
@@ -1,81 +1,83 @@
 # docs and experiment results can be found at https://docs.cleanrl.dev/rl-algorithms/ppo/#ppo_pettingzoo_ma_ataripy
-import argparse
 import importlib
 import os
 import random
 import time
-from distutils.util import strtobool
+from dataclasses import dataclass
 
-import gym
+import gymnasium as gym
 import numpy as np
 import supersuit as ss
 import torch
 import torch.nn as nn
 import torch.optim as optim
+import tyro
 from torch.distributions.categorical import Categorical
 from torch.utils.tensorboard import SummaryWriter
 
 
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help="the name of this experiment")
-    parser.add_argument("--seed", type=int, default=1,
-        help="seed of the experiment")
-    parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, `torch.backends.cudnn.deterministic=False`")
-    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="if toggled, cuda will be enabled by default")
-    parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="if toggled, this experiment will be tracked with Weights and Biases")
-    parser.add_argument("--wandb-project-name", type=str, default="cleanRL",
-        help="the wandb's project name")
-    parser.add_argument("--wandb-entity", type=str, default=None,
-        help="the entity (team) of wandb's project")
-    parser.add_argument("--capture_video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
-        help="whether to capture videos of the agent performances (check out `videos` folder)")
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
 
     # Algorithm specific arguments
-    parser.add_argument("--env-id", type=str, default="pong_v3",
-        help="the id of the environment")
-    parser.add_argument("--total-timesteps", type=int, default=20000000,
-        help="total timesteps of the experiments")
-    parser.add_argument("--learning-rate", type=float, default=2.5e-4,
-        help="the learning rate of the optimizer")
-    parser.add_argument("--num-envs", type=int, default=16,
-        help="the number of parallel game environments")
-    parser.add_argument("--num-steps", type=int, default=128,
-        help="the number of steps to run in each environment per policy rollout")
-    parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument("--gamma", type=float, default=0.99,
-        help="the discount factor gamma")
-    parser.add_argument("--gae-lambda", type=float, default=0.95,
-        help="the lambda for the general advantage estimation")
-    parser.add_argument("--num-minibatches", type=int, default=4,
-        help="the number of mini-batches")
-    parser.add_argument("--update-epochs", type=int, default=4,
-        help="the K epochs to update the policy")
-    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument("--clip-coef", type=float, default=0.1,
-        help="the surrogate clipping coefficient")
-    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
-        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
-    parser.add_argument("--ent-coef", type=float, default=0.01,
-        help="coefficient of the entropy")
-    parser.add_argument("--vf-coef", type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument("--max-grad-norm", type=float, default=0.5,
-        help="the maximum norm for the gradient clipping")
-    parser.add_argument("--target-kl", type=float, default=None,
-        help="the target KL divergence threshold")
-    args = parser.parse_args()
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.num_minibatches)
-    # fmt: on
-    return args
+    env_id: str = "pong_v3"
+    """the id of the environment"""
+    total_timesteps: int = 20000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 2.5e-4
+    """the learning rate of the optimizer"""
+    num_envs: int = 16
+    """the number of parallel game environments"""
+    num_steps: int = 128
+    """the number of steps to run in each environment per policy rollout"""
+    anneal_lr: bool = True
+    """Toggle learning rate annealing for policy and value networks"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    gae_lambda: float = 0.95
+    """the lambda for the general advantage estimation"""
+    num_minibatches: int = 4
+    """the number of mini-batches"""
+    update_epochs: int = 4
+    """the K epochs to update the policy"""
+    norm_adv: bool = True
+    """Toggles advantages normalization"""
+    clip_coef: float = 0.1
+    """the surrogate clipping coefficient"""
+    clip_vloss: bool = True
+    """Toggles whether or not to use a clipped loss for the value function, as per the paper."""
+    ent_coef: float = 0.01
+    """coefficient of the entropy"""
+    vf_coef: float = 0.5
+    """coefficient of the value function"""
+    max_grad_norm: float = 0.5
+    """the maximum norm for the gradient clipping"""
+    target_kl: float = None
+    """the target KL divergence threshold"""
+
+    # to be filled in runtime
+    batch_size: int = 0
+    """the batch size (computed in runtime)"""
+    minibatch_size: int = 0
+    """the mini-batch size (computed in runtime)"""
+    num_iterations: int = 0
+    """the number of iterations (computed in runtime)"""
 
 
 def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
@@ -118,7 +120,10 @@ def get_action_and_value(self, x, action=None):
 
 
 if __name__ == "__main__":
-    args = parse_args()
+    args = tyro.cli(Args)
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
     if args.track:
         import wandb
@@ -156,11 +161,10 @@ def get_action_and_value(self, x, action=None):
     env = ss.frame_stack_v1(env, 4)
     env = ss.agent_indicator_v0(env, type_only=False)
     env = ss.pettingzoo_env_to_vec_env_v1(env)
-    envs = ss.concat_vec_envs_v1(env, args.num_envs // 2, num_cpus=0, base_class="gym")
+    envs = ss.concat_vec_envs_v1(env, args.num_envs // 2, num_cpus=0, base_class="gymnasium")
     envs.single_observation_space = envs.observation_space
     envs.single_action_space = envs.action_space
     envs.is_vector_env = True
-    envs = gym.wrappers.RecordEpisodeStatistics(envs)
     if args.capture_video:
         envs = gym.wrappers.RecordVideo(envs, f"videos/{run_name}")
     assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported"
@@ -173,27 +177,31 @@ def get_action_and_value(self, x, action=None):
     actions = torch.zeros((args.num_steps, args.num_envs) + envs.single_action_space.shape).to(device)
     logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device)
     rewards = torch.zeros((args.num_steps, args.num_envs)).to(device)
-    dones = torch.zeros((args.num_steps, args.num_envs)).to(device)
+    terminations = torch.zeros((args.num_steps, args.num_envs)).to(device)
+    truncations = torch.zeros((args.num_steps, args.num_envs)).to(device)
     values = torch.zeros((args.num_steps, args.num_envs)).to(device)
 
     # TRY NOT TO MODIFY: start the game
     global_step = 0
     start_time = time.time()
-    next_obs = torch.Tensor(envs.reset()).to(device)
-    next_done = torch.zeros(args.num_envs).to(device)
+    next_obs, info = envs.reset(seed=args.seed)
+    next_obs = torch.Tensor(next_obs).to(device)
+    next_termination = torch.zeros(args.num_envs).to(device)
+    next_truncation = torch.zeros(args.num_envs).to(device)
     num_updates = args.total_timesteps // args.batch_size
 
-    for update in range(1, num_updates + 1):
+    for iteration in range(1, args.num_iterations + 1):
         # Annealing the rate if instructed to do so.
         if args.anneal_lr:
-            frac = 1.0 - (update - 1.0) / num_updates
+            frac = 1.0 - (iteration - 1.0) / args.num_iterations
             lrnow = frac * args.learning_rate
             optimizer.param_groups[0]["lr"] = lrnow
 
         for step in range(0, args.num_steps):
-            global_step += 1 * args.num_envs
+            global_step += args.num_envs
             obs[step] = next_obs
-            dones[step] = next_done
+            terminations[step] = next_termination
+            truncations[step] = next_truncation
 
             # ALGO LOGIC: action logic
             with torch.no_grad():
@@ -203,10 +211,15 @@ def get_action_and_value(self, x, action=None):
             logprobs[step] = logprob
 
             # TRY NOT TO MODIFY: execute the game and log data.
-            next_obs, reward, done, info = envs.step(action.cpu().numpy())
+            next_obs, reward, termination, truncation, info = envs.step(action.cpu().numpy())
             rewards[step] = torch.tensor(reward).to(device).view(-1)
-            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
+            next_obs, next_termination, next_truncation = (
+                torch.Tensor(next_obs).to(device),
+                torch.Tensor(termination).to(device),
+                torch.Tensor(truncation).to(device),
+            )
 
+            # TODO: fix this
             for idx, item in enumerate(info):
                 player_idx = idx % 2
                 if "episode" in item.keys():
@@ -219,6 +232,8 @@ def get_action_and_value(self, x, action=None):
             next_value = agent.get_value(next_obs).reshape(1, -1)
             advantages = torch.zeros_like(rewards).to(device)
             lastgaelam = 0
+            next_done = np.logical_or(next_termination, next_truncation)
+            dones = np.logical_or(terminations, truncations)
             for t in reversed(range(args.num_steps)):
                 if t == args.num_steps - 1:
                     nextnonterminal = 1.0 - next_done
@@ -289,9 +304,8 @@ def get_action_and_value(self, x, action=None):
                 nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
                 optimizer.step()
 
-            if args.target_kl is not None:
-                if approx_kl > args.target_kl:
-                    break
+            if args.target_kl is not None and approx_kl > args.target_kl:
+                break
 
         y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
         var_y = np.var(y_true)
diff --git a/docs/rl-algorithms/ppo.md b/docs/rl-algorithms/ppo.md
index e83b38e63..c2cc19181 100644
--- a/docs/rl-algorithms/ppo.md
+++ b/docs/rl-algorithms/ppo.md
@@ -1029,7 +1029,7 @@ Tracked experiments and game play videos:
 
 
 ## `ppo_pettingzoo_ma_atari.py`
-[ppo_pettingzoo_ma_atari.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_pettingzoo_ma_atari.py) trains an agent to learn playing Atari games via selfplay. The selfplay environment is implemented as a vectorized environment from [PettingZoo.ml](https://www.pettingzoo.ml/atari). The basic idea is to create vectorized environment $E$ with `num_envs = N`, where $N$ is the number of players in the game. Say $N = 2$, then the 0-th sub environment of $E$ will return the observation for player 0 and 1-th sub environment will return the observation of player 1. Then the two environments takes a batch of 2 actions and execute them for player 0 and player 1, respectively. See "Vectorized architecture" in [The 37 Implementation Details of Proximal Policy Optimization](https://iclr-blog-track.github.io/2022/03/25/ppo-implementation-details/) for more detail.
+[ppo_pettingzoo_ma_atari.py](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_pettingzoo_ma_atari.py) trains an agent to learn playing Atari games via selfplay. The selfplay environment is implemented as a vectorized environment from [PettingZoo](https://pettingzoo.farama.org/environments/atari/). The basic idea is to create vectorized environment $E$ with `num_envs = N`, where $N$ is the number of players in the game. Say $N = 2$, then the 0-th sub environment of $E$ will return the observation for player 0 and 1-th sub environment will return the observation of player 1. Then the two environments takes a batch of 2 actions and execute them for player 0 and player 1, respectively. See "Vectorized architecture" in [The 37 Implementation Details of Proximal Policy Optimization](https://iclr-blog-track.github.io/2022/03/25/ppo-implementation-details/) for more detail.
 
 `ppo_pettingzoo_ma_atari.py` has the following features:
 
@@ -1064,7 +1064,7 @@ Tracked experiments and game play videos:
     python cleanrl/ppo_pettingzoo_ma_atari.py --env-id surround_v2
     ```
 
-See [https://www.pettingzoo.ml/atari](https://www.pettingzoo.ml/atari) for a full-list of supported environments such as `basketball_pong_v3`. Notice pettingzoo sometimes introduces breaking changes, so make sure to install the pinned dependencies via `poetry`.
+See [https://pettingzoo.farama.org/environments/atari/](https://pettingzoo.farama.org/environments/atari/) for a full-list of supported environments such as `basketball_pong_v3`. Notice pettingzoo sometimes introduces breaking changes, so make sure to install the pinned dependencies via `poetry`.
 
 ### Explanation of the logged metrics
 
diff --git a/poetry.lock b/poetry.lock
index f30baf28c..4cc2106a0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -1707,6 +1707,11 @@ files = [
     {file = "labmaze-1.0.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70635d1cdb0147a02efb6b3f607a52cdc51723bc3dcc42717a0d4ef55fa0a987"},
     {file = "labmaze-1.0.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff472793238bd9b6dabea8094594d6074ad3c111455de3afcae72f6c40c6817e"},
     {file = "labmaze-1.0.6-cp311-cp311-win_amd64.whl", hash = "sha256:2317e65e12fa3d1abecda7e0488dab15456cee8a2e717a586bfc8f02a91579e7"},
+    {file = "labmaze-1.0.6-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e36b6fadcd78f22057b597c1c77823e806a0987b3bdfbf850e14b6b5b502075e"},
+    {file = "labmaze-1.0.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d1a4f8de29c2c3d7f14163759b69cd3f237093b85334c983619c1db5403a223b"},
+    {file = "labmaze-1.0.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a394f8bb857fcaa2884b809d63e750841c2662a106cfe8c045f2112d201ac7d5"},
+    {file = "labmaze-1.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d17abb69d4dfc56183afb5c317e8b2eaca0587abb3aabd2326efd3143c81f4e"},
+    {file = "labmaze-1.0.6-cp312-cp312-win_amd64.whl", hash = "sha256:5af997598cc46b1929d1c5a1febc32fd56c75874fe481a2a5982c65cee8450c9"},
     {file = "labmaze-1.0.6-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:a4c5bc6e56baa55ce63b97569afec2f80cab0f6b952752a131e1f83eed190a53"},
     {file = "labmaze-1.0.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3955f24fe5f708e1e97495b4cfe284b70ae4fd51be5e17b75a6fc04ffbd67bca"},
     {file = "labmaze-1.0.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed96ddc0bb8d66df36428c94db83949fd84a15867e8250763a4c5e3d82104c54"},
@@ -2645,29 +2650,28 @@ files = [
 
 [[package]]
 name = "pettingzoo"
-version = "1.18.1"
-description = "Gym for multi-agent reinforcement learning"
+version = "1.24.3"
+description = "Gymnasium for multi-agent reinforcement learning."
 optional = true
-python-versions = ">=3.7, <3.11"
+python-versions = ">=3.8"
 files = [
-    {file = "PettingZoo-1.18.1-py3-none-any.whl", hash = "sha256:25ae45fcfa2c623800e1f81b98ae50f5f5a1af6caabc5946764248de71a0371d"},
-    {file = "PettingZoo-1.18.1.tar.gz", hash = "sha256:7e6a3231dc3fc3801af83fe880f199f570d46a9acdcb990f2a223f121b6e5038"},
+    {file = "pettingzoo-1.24.3-py3-none-any.whl", hash = "sha256:23ed90517d2e8a7098bdaf5e31234b3a7f7b73ca578d70d1ca7b9d0cb0e37982"},
+    {file = "pettingzoo-1.24.3.tar.gz", hash = "sha256:91f9094f18e06fb74b98f4099cd22e8ae4396125e51719d50b30c9f1c7ab07e6"},
 ]
 
 [package.dependencies]
-gym = ">=0.21.0"
-numpy = ">=1.18.0"
+gymnasium = ">=0.28.0"
+numpy = ">=1.21.0"
 
 [package.extras]
-all = ["box2d-py (==2.3.5)", "chess (==1.7.0)", "hanabi-learning-environment (==0.0.1)", "magent (==0.2.2)", "multi-agent-ale-py (==0.1.11)", "pillow (>=8.0.1)", "pygame (==2.1.0)", "pyglet (>=1.4.0)", "pymunk (==6.2.0)", "rlcard (==1.0.4)", "scipy (>=1.4.1)"]
-atari = ["multi-agent-ale-py (==0.1.11)", "pygame (==2.1.0)"]
-butterfly = ["pygame (==2.1.0)", "pymunk (==6.2.0)"]
-classic = ["chess (==1.7.0)", "hanabi-learning-environment (==0.0.1)", "pygame (==2.1.0)", "rlcard (==1.0.4)"]
-magent = ["magent (==0.2.2)"]
-mpe = ["pyglet (>=1.4.0)"]
+all = ["box2d-py (==2.3.5)", "chess (==1.9.4)", "multi-agent-ale-py (==0.1.11)", "pillow (>=8.0.1)", "pygame (==2.3.0)", "pymunk (==6.2.0)", "rlcard (==1.0.5)", "scipy (>=1.4.1)", "shimmy[openspiel] (>=1.2.0)"]
+atari = ["multi-agent-ale-py (==0.1.11)", "pygame (==2.3.0)"]
+butterfly = ["pygame (==2.3.0)", "pymunk (==6.2.0)"]
+classic = ["chess (==1.9.4)", "pygame (==2.3.0)", "rlcard (==1.0.5)", "shimmy[openspiel] (>=1.2.0)"]
+mpe = ["pygame (==2.3.0)"]
 other = ["pillow (>=8.0.1)"]
-sisl = ["box2d-py (==2.3.5)", "pygame (==2.1.0)", "scipy (>=1.4.1)"]
-tests = ["codespell", "flake8", "isort", "pynput", "pytest"]
+sisl = ["box2d-py (==2.3.5)", "pygame (==2.3.0)", "pymunk (==6.2.0)", "scipy (>=1.4.1)"]
+testing = ["AutoROM", "pre-commit", "pynput", "pytest", "pytest-cov", "pytest-markdown-docs", "pytest-xdist"]
 
 [[package]]
 name = "pillow"
@@ -3707,19 +3711,22 @@ tests = ["black", "isort (>=5.0)", "mypy", "pytest", "pytest-cov", "pytest-env",
 
 [[package]]
 name = "supersuit"
-version = "3.4.0"
-description = "Wrappers for Gym and PettingZoo"
+version = "3.9.2"
+description = "Wrappers for Gymnasium and PettingZoo"
 optional = true
-python-versions = ">=3.7"
+python-versions = "<3.12,>=3.8"
 files = [
-    {file = "SuperSuit-3.4.0-py3-none-any.whl", hash = "sha256:45b541b2b29faffd6494b53d649c8d94889966f407fd380b3e3211f9e68a49e9"},
-    {file = "SuperSuit-3.4.0.tar.gz", hash = "sha256:5999beec8d7923c11c9511eaa9dec8a38269cb0d7af029e17903c79234233409"},
+    {file = "SuperSuit-3.9.2-py3-none-any.whl", hash = "sha256:1dcecd419100eeed19c51444a341dd7ab14deaf3cd775ba475de4e63eba6159c"},
+    {file = "SuperSuit-3.9.2.tar.gz", hash = "sha256:60e384fe63ab6752acbfc34f991f48d6346592b1dd3475138e3599ab41eaaf24"},
 ]
 
 [package.dependencies]
-gym = ">=0.22.0"
-pettingzoo = ">=1.15.0"
-tinyscaler = ">=1.0.4"
+gymnasium = ">=0.28.1"
+numpy = ">=1.19.0"
+tinyscaler = ">=1.2.6"
+
+[package.extras]
+testing = ["moviepy (>=1.0.0)", "pettingzoo[butterfly,classic] (>=1.23.1)", "pytest", "stable-baselines3 (>=2.0.0)"]
 
 [[package]]
 name = "tabulate"
@@ -3837,20 +3844,31 @@ files = [
 
 [[package]]
 name = "tinyscaler"
-version = "1.2.5"
-description = "A tiny, simple image scaler"
+version = "1.2.7"
+description = "A tiny, simple image scaler."
 optional = true
-python-versions = ">=3.7, <3.11"
-files = [
-    {file = "tinyscaler-1.2.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f32d794fc2b9c5e4aa3b435d040f9e75b11f55ab41b32580f2c8e8dfb350f25"},
-    {file = "tinyscaler-1.2.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4af0a9502e9ef118c84de80b09544407c8dbbe815af215b1abb8eb170271ab71"},
-    {file = "tinyscaler-1.2.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0bde14fb15027d73f4cc5ac837e849feb1cbedbfc0a0c0928f11756f08f6626"},
-    {file = "tinyscaler-1.2.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46c75799068330ff7c28fd01f10409d4f12c22f1adbe732f1699228449a4d712"},
-    {file = "tinyscaler-1.2.5.tar.gz", hash = "sha256:deb47df1a53a55b53f0ae15b89b4814af184d149a8149385e54e11afc57364a5"},
+python-versions = "<3.12,>=3.7"
+files = [
+    {file = "tinyscaler-1.2.7-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:bbb98ced396d4829a41aa9c7c895df4bcb3801a3bbe963978c90d12b07110731"},
+    {file = "tinyscaler-1.2.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d062e0e33f6104d625fff9b57aa53511c39d2dc3bb711686f6992a7fbfe41336"},
+    {file = "tinyscaler-1.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:a96f008975d4d167102a2671fb54fb6ace6ff2580fede3b79daeca99a01e5d6e"},
+    {file = "tinyscaler-1.2.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:bf63243a08e214e3db149435741b779db357c376636e17ddf153bf9f6ada041c"},
+    {file = "tinyscaler-1.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d2d129f71c518d9c0c25f5e9f3a7f7a31af62e7e7e6f8750ddf0154ed76a58a"},
+    {file = "tinyscaler-1.2.7-cp311-cp311-win_amd64.whl", hash = "sha256:c14d302cd609d8c8e53ddf15b3ab43fa3c975d648ffcf16276c8b131ab849f85"},
+    {file = "tinyscaler-1.2.7-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:3ef723fbe119614dfdd8a7bd40d73c17defaac6765f60c44693858bd5cd70fbc"},
+    {file = "tinyscaler-1.2.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6700a37bd42615944099994f2aa473be215e25d79803fcac9de849205c7b149"},
+    {file = "tinyscaler-1.2.7-cp37-cp37m-win_amd64.whl", hash = "sha256:26d488778686392a0441e598df7ebc45ad014663e60384ef6170dd793f80d275"},
+    {file = "tinyscaler-1.2.7-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:f80203589d883896c86fe94165967be453fbb0fe47c9bc64521aee15e125f202"},
+    {file = "tinyscaler-1.2.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef4843aaa2647d7ae7a26ba66dbd1d1b31d161ca558f2c385bc6b02277d27fdb"},
+    {file = "tinyscaler-1.2.7-cp38-cp38-win_amd64.whl", hash = "sha256:8e6b605ef00fc65a27f294742514f67d9b4c37d41bfe586e2609ab03a41f2e74"},
+    {file = "tinyscaler-1.2.7-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:2847420c81064c8bd3397bdcd83e2706cd914cdb9cbde5300ed968c14954b9d3"},
+    {file = "tinyscaler-1.2.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d86fe85fa37cfaedb521c9eb3a804b6ab202924be221049b784220c5ca49546"},
+    {file = "tinyscaler-1.2.7-cp39-cp39-win_amd64.whl", hash = "sha256:ce1e10fc54d02bb49ea1f72f76d320c50739eb4ff3e6cbb82148b4f84272220b"},
+    {file = "tinyscaler-1.2.7.tar.gz", hash = "sha256:1c0b34b41cca3ae9b09c20fee27499833345b9264617bdd23c896733676d82d8"},
 ]
 
 [package.dependencies]
-numpy = "*"
+numpy = ">=1.21.0"
 
 [[package]]
 name = "tomli"
@@ -4245,4 +4263,4 @@ qdagger-dqn-atari-jax-impalacnn = ["AutoROM", "ale-py", "flax", "jax", "jaxlib",
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8,<3.11"
-content-hash = "ce1dd6a428e94e30643d2fb0a3fd13f0132d176185a91f7685392d4ec0e7892b"
+content-hash = "aaa9d84a456774e5f9ecf02beade2b4f42d71980872f1f929305b05c57d73958"
diff --git a/pyproject.toml b/pyproject.toml
index 49c7fabca..e5b4282c0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,8 +43,8 @@ flax = {version = "0.6.8", optional = true}
 optuna = {version = "^3.0.1", optional = true}
 optuna-dashboard = {version = "^0.7.2", optional = true}
 envpool = {version = "^0.6.4", optional = true}
-PettingZoo = {version = "1.18.1", optional = true}
-SuperSuit = {version = "3.4.0", optional = true}
+PettingZoo = {version = "^1.24.3", optional = true}
+SuperSuit = {version = ">=3.9.2", optional = true}
 multi-agent-ale-py = {version = "0.1.11", optional = true}
 boto3 = {version = "^1.24.70", optional = true}
 awscli = {version = "^1.31.0", optional = true}
diff --git a/requirements/requirements-pettingzoo.txt b/requirements/requirements-pettingzoo.txt
index 461c6023f..f997ba402 100644
--- a/requirements/requirements-pettingzoo.txt
+++ b/requirements/requirements-pettingzoo.txt
@@ -39,7 +39,7 @@ oauthlib==3.2.2 ; python_version >= "3.8" and python_version < "3.11"
 packaging==23.1 ; python_version >= "3.8" and python_version < "3.11"
 pandas==1.3.5 ; python_version >= "3.8" and python_version < "3.11"
 pathtools==0.1.2 ; python_version >= "3.8" and python_version < "3.11"
-pettingzoo==1.18.1 ; python_version >= "3.8" and python_version < "3.11"
+pettingzoo==1.24.3 ; python_version >= "3.8" and python_version < "3.11"
 pillow==9.5.0 ; python_version >= "3.8" and python_version < "3.11"
 proglog==0.1.10 ; python_version >= "3.8" and python_version < "3.11"
 protobuf==3.20.3 ; python_version < "3.11" and python_version >= "3.8"
@@ -63,12 +63,12 @@ shtab==1.6.4 ; python_version >= "3.8" and python_version < "3.11"
 six==1.16.0 ; python_version >= "3.8" and python_version < "3.11"
 smmap==5.0.0 ; python_version >= "3.8" and python_version < "3.11"
 stable-baselines3==2.0.0 ; python_version >= "3.8" and python_version < "3.11"
-supersuit==3.4.0 ; python_version >= "3.8" and python_version < "3.11"
+supersuit==3.9.2 ; python_version >= "3.8" and python_version < "3.11"
 tenacity==8.2.3 ; python_version >= "3.8" and python_version < "3.11"
 tensorboard-data-server==0.6.1 ; python_version >= "3.8" and python_version < "3.11"
 tensorboard-plugin-wit==1.8.1 ; python_version >= "3.8" and python_version < "3.11"
 tensorboard==2.11.2 ; python_version >= "3.8" and python_version < "3.11"
-tinyscaler==1.2.5 ; python_version >= "3.8" and python_version < "3.11"
+tinyscaler==1.2.7 ; python_version >= "3.8" and python_version < "3.11"
 torch==1.12.1 ; python_version >= "3.8" and python_version < "3.11"
 tqdm==4.65.0 ; python_version >= "3.8" and python_version < "3.11"
 typing-extensions==4.5.0 ; python_version >= "3.8" and python_version < "3.11"