From b5e3515821c097c0db3dab09bb7db3e13b88c4aa Mon Sep 17 00:00:00 2001 From: Elliot Tower Date: Sun, 23 Apr 2023 16:02:04 -0400 Subject: [PATCH 1/7] Update usage scripts to use break, fix bugs --- docs/api/aec.md | 58 ++++++++++++++++++++++++++++++---- docs/api/parallel.md | 11 ++++--- docs/environments/atari.md | 17 ++++++---- docs/environments/butterfly.md | 22 ++++++++----- docs/environments/classic.md | 16 ++++++---- docs/environments/mpe.md | 14 ++++---- docs/environments/sisl.md | 12 ++++--- docs/index.md | 19 +++++++---- 8 files changed, 120 insertions(+), 49 deletions(-) diff --git a/docs/api/aec.md b/docs/api/aec.md index 70b07962f..abb03c569 100644 --- a/docs/api/aec.md +++ b/docs/api/aec.md @@ -2,25 +2,71 @@ By default, PettingZoo models games as [*Agent Environment Cycle*](https://arxiv.org/abs/2009.13051) (AEC) environments. This allows it to support any type of game multi-agent RL can consider. -## Example Usage +## Usage AEC environments can be interacted with as follows: -``` python +```python +from pettingzoo.classic import rps_v2 + +env = rps_v2.env(render_mode="human") +env.reset(seed=42) + +for agent in env.agent_iter(): + observation, reward, termination, truncation, info = env.last() + + if termination or truncation: + break + + action = env.action_space(agent).sample() # this is where you would insert your policy + + env.step(action) # execute the action in the environment +env.close() +``` + +### Action Masking +AEC environments often include action masks, in order to mark valid & invalid actions for the agent. + +[//]: # (For details about action masking, see [A Closer Look at Invalid Action Masking in Policy Gradient Algorithms](https://arxiv.org/abs/2006.14171) (Huang, 2022).) + +To sample actions using invalid action masking: +```python from pettingzoo.classic import chess_v5 + env = chess_v5.env(render_mode="human") +env.reset(seed=42) -env.reset() for agent in env.agent_iter(): observation, reward, termination, truncation, info = env.last() + if termination or truncation: - action = None + break + + # invalid action masking is optional and environment-dependent + if "action_mask" in info: + mask = info["action_mask"] # used in Shimmy OpenSpiel compatibility wrapper + elif isinstance(observation, dict) and "action_mask" in observation: + mask = observation["action_mask"] # used in PettingZoo Classic environments else: - action = env.action_space(agent).sample(observation["action_mask"]) # this is where you would insert your policy - env.step(action) + mask = None + + action = env.action_space(agent).sample(mask) # this is where you would insert your policy + + env.step(action) # execute the action in the environment env.close() ``` +Note: invalid action masking is optional, and can be stored either in `observation` or `info`. For example: +* [PettingZoo Classic](https://pettingzoo.farama.org/environments/classic/) environments store illegal action mask in the `observation` dict: + * `mask = observation["action_mask"]` +* [Shimmy](https://shimmy.farama.org/)'s [OpenSpiel environments](https://shimmy.farama.org/environments/open_spiel/) stores illegal action mask in the `info` dict: + * `mask = info["action_mask"` + +To implement action masking in a custom environment, see [Environment Creation: Action Masking](https://pettingzoo.farama.org/tutorials/environmentcreation/3-action-masking/) + +For more information on action masking, see [A Closer Look at Invalid Action Masking in Policy Gradient Algorithms](https://arxiv.org/abs/2006.14171) (Huang, 2022) + + ## AECEnv ```{eval-rst} diff --git a/docs/api/parallel.md b/docs/api/parallel.md index 31f20194c..912ba00b9 100644 --- a/docs/api/parallel.md +++ b/docs/api/parallel.md @@ -2,17 +2,20 @@ In addition to the main API, we have a secondary parallel API for environments where all agents have simultaneous actions and observations. An environment with parallel API support can be created via `.parallel_env()`. This API is based around the paradigm of *Partially Observable Stochastic Games* (POSGs) and the details are similar to [RLLib's MultiAgent environment specification](https://docs.ray.io/en/latest/rllib-env.html#multi-agent-and-hierarchical), except we allow for different observation and action spaces between the agents. -## Example Usage +## Usage Parallel environments can be interacted with as follows: ``` python from pettingzoo.butterfly import pistonball_v6 -parallel_env = pistonball_v6.parallel_env() -observations = parallel_env.reset() +parallel_env = pistonball_v6.parallel_env(render_mode="human") +observations = parallel_env.reset(seed=42) while env.agents: - actions = {agent: parallel_env.action_space(agent).sample() for agent in parallel_env.agents} # this is where you would insert your policy + # this is where you would insert your policy + actions = {agent: parallel_env.action_space(agent).sample() for agent in parallel_env.agents} + + # execute the actions in the environment observations, rewards, terminations, truncations, infos = parallel_env.step(actions) ``` diff --git a/docs/environments/atari.md b/docs/environments/atari.md index 34eed4dcc..c6ac4ba1b 100644 --- a/docs/environments/atari.md +++ b/docs/environments/atari.md @@ -52,19 +52,22 @@ Install ROMs using [AutoROM](https://github.com/Farama-Foundation/AutoROM), or s ### Usage -To launch a [Space Invaders](https://pettingzoo.farama.org/environments/atari/space_invaders/) environment with agents taking random actions: -``` python +To launch a [Space Invaders](https://pettingzoo.farama.org/environments/atari/space_invaders/) environment with random agents: +```python from pettingzoo.atari import space_invaders_v2 + env = space_invaders_v2.env(render_mode="human") +env.reset(seed=42) -env.reset() for agent in env.agent_iter(): observation, reward, termination, truncation, info = env.last() + if termination or truncation: - action = None - else: - env.action_space(agent).sample() # this is where you would insert your policy - env.step(action) + break + + action = env.action_space(agent).sample() # this is where you would insert your policy + + env.step(action) # execute the action in the environment env.close() ``` diff --git a/docs/environments/butterfly.md b/docs/environments/butterfly.md index e4f9e7431..010026265 100644 --- a/docs/environments/butterfly.md +++ b/docs/environments/butterfly.md @@ -34,38 +34,44 @@ pip install pettingzoo[butterfly] ### Usage -To launch a [Pistonball](https://pettingzoo.farama.org/environments/butterfly/pistonball/) environment with agents taking random actions: -``` python +To launch a [Pistonball](https://pettingzoo.farama.org/environments/butterfly/pistonball/) environment with random agents: +```python from pettingzoo.butterfly import pistonball_v6 -env = pistonball_v6.parallel_env(render_mode="human") +env = pistonball_v6.parallel_env(render_mode="human") observations = env.reset() + while env.agents: - actions = {agent: env.action_space(agent).sample() for agent in env.agents} # this is where you would insert your policy + # this is where you would insert your policy + actions = {agent: env.action_space(agent).sample() for agent in env.agents} + + # execute the actions in the environment observations, rewards, terminations, truncations, infos = env.step(actions) env.close() ``` -To launch a [Knights Archers Zombies](https://pettingzoo.farama.org/environments/butterfly/knights_archers_zombies/) environment with interactive user input (see [manual_policy.py](https://github.com/Farama-Foundation/PettingZoo/blob/master/pettingzoo/butterfly/knights_archers_zombies/manual_policy.py), controls are WASD and space): +To launch a [Knights Archers Zombies](https://pettingzoo.farama.org/environments/butterfly/knights_archers_zombies/) environment with interactive user input (see [manual_policy.py](https://github.com/Farama-Foundation/PettingZoo/blob/master/pettingzoo/butterfly/knights_archers_zombies/manual_policy.py)): ``` python import pygame from pettingzoo.butterfly import knights_archers_zombies_v10 env = knights_archers_zombies_v10.env(render_mode="human") -env.reset() +env.reset(seed=42) clock = pygame.time.Clock() manual_policy = knights_archers_zombies_v10.ManualPolicy(env) for agent in env.agent_iter(): clock.tick(env.metadata["render_fps"]) - observation, reward, termination, truncation, info = env.last() + if agent == manual_policy.agent: + # get user input (controls are WASD and space) action = manual_policy(observation, agent) else: + # this is where you would insert your policy (for non-player agents) action = env.action_space(agent).sample() - env.step(action) + env.step(action) # execute the action in the environment ``` diff --git a/docs/environments/classic.md b/docs/environments/classic.md index 1af3ff89f..0e9cd4819 100644 --- a/docs/environments/classic.md +++ b/docs/environments/classic.md @@ -36,19 +36,23 @@ pip install pettingzoo[classic] ### Usage -To launch a [Texas Holdem](https://pettingzoo.farama.org/environments/classic/texas_holdem/) environment with agents taking random actions: +To launch a [Texas Holdem](https://pettingzoo.farama.org/environments/classic/texas_holdem/) environment with random agents: ``` python from pettingzoo.classic import texas_holdem_v4 + env = texas_holdem_v4.env(render_mode="human") +env.reset(seed=42) -env.reset() for agent in env.agent_iter(): observation, reward, termination, truncation, info = env.last() + if termination or truncation: - action = None - else: - action = env.action_space(agent).sample(observation["action_mask"]) # this is where you would insert your policy - env.step(action) + break + + mask = observation["action_mask"] + action = env.action_space(agent).sample(mask) # this is where you would insert your policy + + env.step(action) # execute the action in the environment env.close() ``` diff --git a/docs/environments/mpe.md b/docs/environments/mpe.md index 6460f5c23..245ae4f44 100644 --- a/docs/environments/mpe.md +++ b/docs/environments/mpe.md @@ -34,20 +34,22 @@ pip install pettingzoo[mpe] ```` ### Usage -To launch a [Simple Tag](https://pettingzoo.farama.org/environments/mpe/simple_tag/) environment with agents taking random actions: +To launch a [Simple Tag](https://pettingzoo.farama.org/environments/mpe/simple_tag/) environment with random agents: -``` python +```python from pettingzoo.mpe import simple_tag_v2 env = simple_tag_v2.env(render_mode='human') env.reset() for agent in env.agent_iter(): observation, reward, termination, truncation, info = env.last() + if termination or truncation: - action = None - else: - action = env.action_space(agent).sample() - env.step(action) + break + + action = env.action_space(agent).sample() # this is where you would insert your policy + + env.step(action) # execute the action in the environment env.close() ``` diff --git a/docs/environments/sisl.md b/docs/environments/sisl.md index adb2de431..5b5def34b 100644 --- a/docs/environments/sisl.md +++ b/docs/environments/sisl.md @@ -27,7 +27,7 @@ pip install pettingzoo[sisl] ```` ### Usage -To launch a [Waterworld](https://pettingzoo.farama.org/environments/sisl/waterworld/) environment with agents taking random actions: +To launch a [Waterworld](https://pettingzoo.farama.org/environments/sisl/waterworld/) environment with random agents: ```python from pettingzoo.sisl import waterworld_v4 @@ -36,11 +36,13 @@ env = waterworld_v4.env(render_mode='human') env.reset() for agent in env.agent_iter(): observation, reward, termination, truncation, info = env.last() + if termination or truncation: - action = None - else: - action = env.action_space(agent).sample() - env.step(action) + break + + action = env.action_space(agent).sample() # this is where you would insert your policy + + env.step(action) # execute the action in the environment env.close() ``` diff --git a/docs/index.md b/docs/index.md index 9e3945be2..f53321ff7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -69,11 +69,16 @@ Contribute to the Docs Date: Sun, 23 Apr 2023 16:42:40 -0400 Subject: [PATCH 2/7] Replace reak with action=None in usage examples --- docs/api/aec.md | 43 ++++++++++++++++------------------ docs/environments/atari.md | 6 ++--- docs/environments/butterfly.md | 2 +- docs/environments/mpe.md | 8 +++---- docs/environments/sisl.md | 8 +++---- 5 files changed, 32 insertions(+), 35 deletions(-) diff --git a/docs/api/aec.md b/docs/api/aec.md index abb03c569..bfc4fa2e4 100644 --- a/docs/api/aec.md +++ b/docs/api/aec.md @@ -16,20 +16,18 @@ for agent in env.agent_iter(): observation, reward, termination, truncation, info = env.last() if termination or truncation: - break - - action = env.action_space(agent).sample() # this is where you would insert your policy + action = None + else: + action = env.action_space(agent).sample() # this is where you would insert your policy env.step(action) # execute the action in the environment env.close() ``` ### Action Masking -AEC environments often include action masks, in order to mark valid & invalid actions for the agent. +AEC environments often include action masks, in order to mark valid/invalid actions for the agent. -[//]: # (For details about action masking, see [A Closer Look at Invalid Action Masking in Policy Gradient Algorithms](https://arxiv.org/abs/2006.14171) (Huang, 2022).) - -To sample actions using invalid action masking: +To sample actions using action masking: ```python from pettingzoo.classic import chess_v5 @@ -40,27 +38,26 @@ for agent in env.agent_iter(): observation, reward, termination, truncation, info = env.last() if termination or truncation: - break - - # invalid action masking is optional and environment-dependent - if "action_mask" in info: - mask = info["action_mask"] # used in Shimmy OpenSpiel compatibility wrapper - elif isinstance(observation, dict) and "action_mask" in observation: - mask = observation["action_mask"] # used in PettingZoo Classic environments - else: - mask = None - - action = env.action_space(agent).sample(mask) # this is where you would insert your policy - + action = None + else: + # invalid action masking is optional and environment-dependent + if "action_mask" in info: + mask = info["action_mask"] + elif isinstance(observation, dict) and "action_mask" in observation: + mask = observation["action_mask"] + else: + mask = None + action = env.action_space(agent).sample(mask) # this is where you would insert your policy env.step(action) # execute the action in the environment env.close() ``` -Note: invalid action masking is optional, and can be stored either in `observation` or `info`. For example: -* [PettingZoo Classic](https://pettingzoo.farama.org/environments/classic/) environments store illegal action mask in the `observation` dict: +Note: action masking is optional, and can be implemented using either `observation` or `info`. + +* [PettingZoo Classic](https://pettingzoo.farama.org/environments/classic/) environments store action masks in the `observation` dict: * `mask = observation["action_mask"]` -* [Shimmy](https://shimmy.farama.org/)'s [OpenSpiel environments](https://shimmy.farama.org/environments/open_spiel/) stores illegal action mask in the `info` dict: - * `mask = info["action_mask"` +* [Shimmy](https://shimmy.farama.org/)'s [OpenSpiel environments](https://shimmy.farama.org/environments/open_spiel/) stores action masks in the `info` dict: + * `mask = info["action_mask"]` To implement action masking in a custom environment, see [Environment Creation: Action Masking](https://pettingzoo.farama.org/tutorials/environmentcreation/3-action-masking/) diff --git a/docs/environments/atari.md b/docs/environments/atari.md index c6ac4ba1b..061aba2c5 100644 --- a/docs/environments/atari.md +++ b/docs/environments/atari.md @@ -63,9 +63,9 @@ for agent in env.agent_iter(): observation, reward, termination, truncation, info = env.last() if termination or truncation: - break - - action = env.action_space(agent).sample() # this is where you would insert your policy + action = None + else: + action = env.action_space(agent).sample() # this is where you would insert your policy env.step(action) # execute the action in the environment env.close() diff --git a/docs/environments/butterfly.md b/docs/environments/butterfly.md index 010026265..34c08135e 100644 --- a/docs/environments/butterfly.md +++ b/docs/environments/butterfly.md @@ -51,7 +51,7 @@ env.close() ``` To launch a [Knights Archers Zombies](https://pettingzoo.farama.org/environments/butterfly/knights_archers_zombies/) environment with interactive user input (see [manual_policy.py](https://github.com/Farama-Foundation/PettingZoo/blob/master/pettingzoo/butterfly/knights_archers_zombies/manual_policy.py)): -``` python +```python import pygame from pettingzoo.butterfly import knights_archers_zombies_v10 diff --git a/docs/environments/mpe.md b/docs/environments/mpe.md index 245ae4f44..856c44cf9 100644 --- a/docs/environments/mpe.md +++ b/docs/environments/mpe.md @@ -45,10 +45,10 @@ for agent in env.agent_iter(): observation, reward, termination, truncation, info = env.last() if termination or truncation: - break - - action = env.action_space(agent).sample() # this is where you would insert your policy - + action = None + else: + action = env.action_space(agent).sample() # this is where you would insert your policy + env.step(action) # execute the action in the environment env.close() ``` diff --git a/docs/environments/sisl.md b/docs/environments/sisl.md index 5b5def34b..858320b1b 100644 --- a/docs/environments/sisl.md +++ b/docs/environments/sisl.md @@ -38,10 +38,10 @@ for agent in env.agent_iter(): observation, reward, termination, truncation, info = env.last() if termination or truncation: - break - - action = env.action_space(agent).sample() # this is where you would insert your policy - + action = None + else: + action = env.action_space(agent).sample() # this is where you would insert your policy + env.step(action) # execute the action in the environment env.close() ``` From f959669c61da2065bac92140937ac4b912925a1c Mon Sep 17 00:00:00 2001 From: elliottower Date: Wed, 26 Apr 2023 12:04:00 -0400 Subject: [PATCH 3/7] Remove 'execute the action in the environment' comment --- docs/api/aec.md | 4 ++-- docs/environments/atari.md | 2 +- docs/environments/butterfly.md | 2 +- docs/environments/classic.md | 2 +- docs/environments/mpe.md | 2 +- docs/environments/sisl.md | 2 +- docs/index.md | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/api/aec.md b/docs/api/aec.md index bfc4fa2e4..0dceef409 100644 --- a/docs/api/aec.md +++ b/docs/api/aec.md @@ -20,7 +20,7 @@ for agent in env.agent_iter(): else: action = env.action_space(agent).sample() # this is where you would insert your policy - env.step(action) # execute the action in the environment + env.step(action) env.close() ``` @@ -48,7 +48,7 @@ for agent in env.agent_iter(): else: mask = None action = env.action_space(agent).sample(mask) # this is where you would insert your policy - env.step(action) # execute the action in the environment + env.step(action) env.close() ``` diff --git a/docs/environments/atari.md b/docs/environments/atari.md index 061aba2c5..55351200f 100644 --- a/docs/environments/atari.md +++ b/docs/environments/atari.md @@ -67,7 +67,7 @@ for agent in env.agent_iter(): else: action = env.action_space(agent).sample() # this is where you would insert your policy - env.step(action) # execute the action in the environment + env.step(action) env.close() ``` diff --git a/docs/environments/butterfly.md b/docs/environments/butterfly.md index 34c08135e..206f6a7e2 100644 --- a/docs/environments/butterfly.md +++ b/docs/environments/butterfly.md @@ -72,6 +72,6 @@ for agent in env.agent_iter(): # this is where you would insert your policy (for non-player agents) action = env.action_space(agent).sample() - env.step(action) # execute the action in the environment + env.step(action) ``` diff --git a/docs/environments/classic.md b/docs/environments/classic.md index 0e9cd4819..4905e9264 100644 --- a/docs/environments/classic.md +++ b/docs/environments/classic.md @@ -52,7 +52,7 @@ for agent in env.agent_iter(): mask = observation["action_mask"] action = env.action_space(agent).sample(mask) # this is where you would insert your policy - env.step(action) # execute the action in the environment + env.step(action) env.close() ``` diff --git a/docs/environments/mpe.md b/docs/environments/mpe.md index 856c44cf9..2ad185e05 100644 --- a/docs/environments/mpe.md +++ b/docs/environments/mpe.md @@ -49,7 +49,7 @@ for agent in env.agent_iter(): else: action = env.action_space(agent).sample() # this is where you would insert your policy - env.step(action) # execute the action in the environment + env.step(action) env.close() ``` diff --git a/docs/environments/sisl.md b/docs/environments/sisl.md index 858320b1b..f73d610bb 100644 --- a/docs/environments/sisl.md +++ b/docs/environments/sisl.md @@ -42,7 +42,7 @@ for agent in env.agent_iter(): else: action = env.action_space(agent).sample() # this is where you would insert your policy - env.step(action) # execute the action in the environment + env.step(action) env.close() ``` diff --git a/docs/index.md b/docs/index.md index 443a7739e..4bb1bf936 100644 --- a/docs/index.md +++ b/docs/index.md @@ -79,7 +79,7 @@ env.reset() for agent in env.agent_iter(): observation, reward, termination, truncation, info = env.last() action = env.action_space(agent).sample() # this is where you would insert your policy - env.step(action) # execute the action in the environment + env.step(action) env.close() ``` From ca3c23e9c1c85e7125b2f97b73f4d2f8eced0348 Mon Sep 17 00:00:00 2001 From: Elliot Tower Date: Wed, 3 May 2023 00:20:33 -0400 Subject: [PATCH 4/7] Update butterfly.md --- docs/environments/butterfly.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/environments/butterfly.md b/docs/environments/butterfly.md index 206f6a7e2..116cfeca4 100644 --- a/docs/environments/butterfly.md +++ b/docs/environments/butterfly.md @@ -44,8 +44,7 @@ observations = env.reset() while env.agents: # this is where you would insert your policy actions = {agent: env.action_space(agent).sample() for agent in env.agents} - - # execute the actions in the environment + observations, rewards, terminations, truncations, infos = env.step(actions) env.close() ``` From 0647e2afc0df6adcb3d6e4f7111b8d07c7e1879d Mon Sep 17 00:00:00 2001 From: Elliot Tower Date: Wed, 3 May 2023 00:21:10 -0400 Subject: [PATCH 5/7] Update butterfly.md --- docs/environments/butterfly.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/environments/butterfly.md b/docs/environments/butterfly.md index 116cfeca4..443daf850 100644 --- a/docs/environments/butterfly.md +++ b/docs/environments/butterfly.md @@ -72,5 +72,6 @@ for agent in env.agent_iter(): action = env.action_space(agent).sample() env.step(action) +env.close() ``` From fb929107b03f98f1df2cca21021078eb65654a2f Mon Sep 17 00:00:00 2001 From: Elliot Tower Date: Wed, 3 May 2023 00:22:02 -0400 Subject: [PATCH 6/7] Update parallel.md --- docs/api/parallel.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/parallel.md b/docs/api/parallel.md index 912ba00b9..e67082a12 100644 --- a/docs/api/parallel.md +++ b/docs/api/parallel.md @@ -15,8 +15,8 @@ while env.agents: # this is where you would insert your policy actions = {agent: parallel_env.action_space(agent).sample() for agent in parallel_env.agents} - # execute the actions in the environment observations, rewards, terminations, truncations, infos = parallel_env.step(actions) +env.close() ``` ## ParallelEnv From e2fec98c87a52fb18b29014a4dee6ee932c4dc23 Mon Sep 17 00:00:00 2001 From: Elliot Tower Date: Wed, 3 May 2023 12:29:23 -0400 Subject: [PATCH 7/7] Finish merging AEC.md --- docs/api/aec.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/api/aec.md b/docs/api/aec.md index 0dceef409..6719d990d 100644 --- a/docs/api/aec.md +++ b/docs/api/aec.md @@ -1,3 +1,9 @@ +--- +title: AEC +--- + + + # AEC API By default, PettingZoo models games as [*Agent Environment Cycle*](https://arxiv.org/abs/2009.13051) (AEC) environments. This allows it to support any type of game multi-agent RL can consider. @@ -48,6 +54,7 @@ for agent in env.agent_iter(): else: mask = None action = env.action_space(agent).sample(mask) # this is where you would insert your policy + env.step(action) env.close() ```