From b5e3515821c097c0db3dab09bb7db3e13b88c4aa Mon Sep 17 00:00:00 2001
From: Elliot Tower <elliot@elliottower.com>
Date: Sun, 23 Apr 2023 16:02:04 -0400
Subject: [PATCH 1/7] Update usage scripts to use break, fix bugs

---
 docs/api/aec.md                | 58 ++++++++++++++++++++++++++++++----
 docs/api/parallel.md           | 11 ++++---
 docs/environments/atari.md     | 17 ++++++----
 docs/environments/butterfly.md | 22 ++++++++-----
 docs/environments/classic.md   | 16 ++++++----
 docs/environments/mpe.md       | 14 ++++----
 docs/environments/sisl.md      | 12 ++++---
 docs/index.md                  | 19 +++++++----
 8 files changed, 120 insertions(+), 49 deletions(-)

diff --git a/docs/api/aec.md b/docs/api/aec.md
index 70b07962f..abb03c569 100644
--- a/docs/api/aec.md
+++ b/docs/api/aec.md
@@ -2,25 +2,71 @@
 
 By default, PettingZoo models games as [*Agent Environment Cycle*](https://arxiv.org/abs/2009.13051) (AEC) environments. This allows it to support any type of game multi-agent RL can consider.
 
-## Example Usage
+## Usage
 
 AEC environments can be interacted with as follows:
 
-``` python
+```python
+from pettingzoo.classic import rps_v2
+
+env = rps_v2.env(render_mode="human")
+env.reset(seed=42)
+
+for agent in env.agent_iter():
+    observation, reward, termination, truncation, info = env.last()
+    
+    if termination or truncation:
+        break
+        
+    action = env.action_space(agent).sample() # this is where you would insert your policy
+    
+    env.step(action) # execute the action in the environment
+env.close()
+```
+
+### Action Masking
+AEC environments often include action masks, in order to mark valid & invalid actions for the agent. 
+
+[//]: # (For details about action masking, see [A Closer Look at Invalid Action Masking in Policy Gradient Algorithms]&#40;https://arxiv.org/abs/2006.14171&#41; &#40;Huang, 2022&#41;.)
+
+To sample actions using invalid action masking: 
+```python
 from pettingzoo.classic import chess_v5
+
 env = chess_v5.env(render_mode="human")
+env.reset(seed=42)
 
-env.reset()
 for agent in env.agent_iter():
     observation, reward, termination, truncation, info = env.last()
+
     if termination or truncation:
-        action = None
+        break
+        
+    # invalid action masking is optional and environment-dependent
+    if "action_mask" in info:
+        mask = info["action_mask"] # used in Shimmy OpenSpiel compatibility wrapper
+    elif isinstance(observation, dict) and "action_mask" in observation:
+        mask = observation["action_mask"] # used in PettingZoo Classic environments
     else:
-        action = env.action_space(agent).sample(observation["action_mask"])  # this is where you would insert your policy
-    env.step(action)
+        mask = None 
+        
+    action = env.action_space(agent).sample(mask) # this is where you would insert your policy
+
+    env.step(action) # execute the action in the environment
 env.close()
 ```
 
+Note: invalid action masking is optional, and can be stored either in `observation` or `info`. For example:
+* [PettingZoo Classic](https://pettingzoo.farama.org/environments/classic/) environments store illegal action mask in the `observation` dict:
+  * `mask = observation["action_mask"]`
+* [Shimmy](https://shimmy.farama.org/)'s [OpenSpiel environments](https://shimmy.farama.org/environments/open_spiel/) stores illegal action mask in the `info` dict:
+  * `mask = info["action_mask"` 
+
+To implement action masking in a custom environment, see [Environment Creation: Action Masking](https://pettingzoo.farama.org/tutorials/environmentcreation/3-action-masking/)
+
+For more information on action masking, see [A Closer Look at Invalid Action Masking in Policy Gradient Algorithms](https://arxiv.org/abs/2006.14171) (Huang, 2022)
+
+
 ## AECEnv
 
 ```{eval-rst}
diff --git a/docs/api/parallel.md b/docs/api/parallel.md
index 31f20194c..912ba00b9 100644
--- a/docs/api/parallel.md
+++ b/docs/api/parallel.md
@@ -2,17 +2,20 @@
 
 In addition to the main API, we have a secondary parallel API for environments where all agents have simultaneous actions and observations. An environment with parallel API support can be created via `<game>.parallel_env()`. This API is based around the paradigm of *Partially Observable Stochastic Games* (POSGs) and the details are similar to [RLLib's MultiAgent environment specification](https://docs.ray.io/en/latest/rllib-env.html#multi-agent-and-hierarchical), except we allow for different observation and action spaces between the agents.
 
-## Example Usage
+## Usage
 
 Parallel environments can be interacted with as follows:
 
 ``` python
 from pettingzoo.butterfly import pistonball_v6
-parallel_env = pistonball_v6.parallel_env()
-observations = parallel_env.reset()
+parallel_env = pistonball_v6.parallel_env(render_mode="human")
+observations = parallel_env.reset(seed=42)
 
 while env.agents:
-    actions = {agent: parallel_env.action_space(agent).sample() for agent in parallel_env.agents}  # this is where you would insert your policy
+    # this is where you would insert your policy
+    actions = {agent: parallel_env.action_space(agent).sample() for agent in parallel_env.agents}  
+    
+    # execute the actions in the environment
     observations, rewards, terminations, truncations, infos = parallel_env.step(actions)
 ```
 
diff --git a/docs/environments/atari.md b/docs/environments/atari.md
index 34eed4dcc..c6ac4ba1b 100644
--- a/docs/environments/atari.md
+++ b/docs/environments/atari.md
@@ -52,19 +52,22 @@ Install ROMs using [AutoROM](https://github.com/Farama-Foundation/AutoROM), or s
 
 ### Usage
 
-To launch a [Space Invaders](https://pettingzoo.farama.org/environments/atari/space_invaders/) environment with agents taking random actions:
-``` python
+To launch a [Space Invaders](https://pettingzoo.farama.org/environments/atari/space_invaders/) environment with random agents:
+```python
 from pettingzoo.atari import space_invaders_v2
+
 env = space_invaders_v2.env(render_mode="human")
+env.reset(seed=42)
 
-env.reset()
 for agent in env.agent_iter():
     observation, reward, termination, truncation, info = env.last()
+
     if termination or truncation:
-        action = None
-    else:
-        env.action_space(agent).sample()  # this is where you would insert your policy
-    env.step(action)
+        break
+    
+    action = env.action_space(agent).sample() # this is where you would insert your policy
+        
+    env.step(action) # execute the action in the environment
 env.close()
 ```
 
diff --git a/docs/environments/butterfly.md b/docs/environments/butterfly.md
index e4f9e7431..010026265 100644
--- a/docs/environments/butterfly.md
+++ b/docs/environments/butterfly.md
@@ -34,38 +34,44 @@ pip install pettingzoo[butterfly]
 
 ### Usage
 
-To launch a [Pistonball](https://pettingzoo.farama.org/environments/butterfly/pistonball/) environment with agents taking random actions:
-``` python
+To launch a [Pistonball](https://pettingzoo.farama.org/environments/butterfly/pistonball/) environment with random agents:
+```python
 from pettingzoo.butterfly import pistonball_v6
-env = pistonball_v6.parallel_env(render_mode="human")
 
+env = pistonball_v6.parallel_env(render_mode="human")
 observations = env.reset()
+
 while env.agents:
-    actions = {agent: env.action_space(agent).sample() for agent in env.agents}  # this is where you would insert your policy
+    # this is where you would insert your policy
+    actions = {agent: env.action_space(agent).sample() for agent in env.agents}  
+    
+    # execute the actions in the environment
     observations, rewards, terminations, truncations, infos = env.step(actions)
 env.close()
 ```
 
-To launch a [Knights Archers Zombies](https://pettingzoo.farama.org/environments/butterfly/knights_archers_zombies/) environment with interactive user input (see [manual_policy.py](https://github.com/Farama-Foundation/PettingZoo/blob/master/pettingzoo/butterfly/knights_archers_zombies/manual_policy.py), controls are WASD and space):
+To launch a [Knights Archers Zombies](https://pettingzoo.farama.org/environments/butterfly/knights_archers_zombies/) environment with interactive user input (see [manual_policy.py](https://github.com/Farama-Foundation/PettingZoo/blob/master/pettingzoo/butterfly/knights_archers_zombies/manual_policy.py)):
 ``` python
 import pygame
 from pettingzoo.butterfly import knights_archers_zombies_v10
 
 env = knights_archers_zombies_v10.env(render_mode="human")
-env.reset()
+env.reset(seed=42)
 
 clock = pygame.time.Clock()
 manual_policy = knights_archers_zombies_v10.ManualPolicy(env)
 
 for agent in env.agent_iter():
     clock.tick(env.metadata["render_fps"])
-
     observation, reward, termination, truncation, info = env.last()
+    
     if agent == manual_policy.agent:
+        # get user input (controls are WASD and space)
         action = manual_policy(observation, agent)
     else:
+        # this is where you would insert your policy (for non-player agents)
         action = env.action_space(agent).sample()
 
-    env.step(action)
+    env.step(action) # execute the action in the environment
 ```
 
diff --git a/docs/environments/classic.md b/docs/environments/classic.md
index 1af3ff89f..0e9cd4819 100644
--- a/docs/environments/classic.md
+++ b/docs/environments/classic.md
@@ -36,19 +36,23 @@ pip install pettingzoo[classic]
 
 ### Usage
 
-To launch a [Texas Holdem](https://pettingzoo.farama.org/environments/classic/texas_holdem/) environment with agents taking random actions:
+To launch a [Texas Holdem](https://pettingzoo.farama.org/environments/classic/texas_holdem/) environment with random agents:
 ``` python
 from pettingzoo.classic import texas_holdem_v4
+
 env = texas_holdem_v4.env(render_mode="human")
+env.reset(seed=42)
 
-env.reset()
 for agent in env.agent_iter():
     observation, reward, termination, truncation, info = env.last()
+    
     if termination or truncation:
-        action = None
-    else:
-        action = env.action_space(agent).sample(observation["action_mask"])  # this is where you would insert your policy
-    env.step(action)
+        break
+    
+    mask = observation["action_mask"]
+    action = env.action_space(agent).sample(mask)  # this is where you would insert your policy
+        
+    env.step(action)  # execute the action in the environment
 env.close()
 ```
 
diff --git a/docs/environments/mpe.md b/docs/environments/mpe.md
index 6460f5c23..245ae4f44 100644
--- a/docs/environments/mpe.md
+++ b/docs/environments/mpe.md
@@ -34,20 +34,22 @@ pip install pettingzoo[mpe]
 ````
 
 ### Usage
-To launch a [Simple Tag](https://pettingzoo.farama.org/environments/mpe/simple_tag/) environment with agents taking random actions:
+To launch a [Simple Tag](https://pettingzoo.farama.org/environments/mpe/simple_tag/) environment with random agents:
 
-``` python
+```python
 from pettingzoo.mpe import simple_tag_v2
 env = simple_tag_v2.env(render_mode='human')
 
 env.reset()
 for agent in env.agent_iter():
     observation, reward, termination, truncation, info = env.last()
+    
     if termination or truncation:
-        action = None
-    else:
-        action = env.action_space(agent).sample()
-    env.step(action)
+        break
+    
+    action = env.action_space(agent).sample() # this is where you would insert your policy
+
+    env.step(action) # execute the action in the environment
 env.close()
 ```
 
diff --git a/docs/environments/sisl.md b/docs/environments/sisl.md
index adb2de431..5b5def34b 100644
--- a/docs/environments/sisl.md
+++ b/docs/environments/sisl.md
@@ -27,7 +27,7 @@ pip install pettingzoo[sisl]
 ````
 
 ### Usage
-To launch a [Waterworld](https://pettingzoo.farama.org/environments/sisl/waterworld/) environment with agents taking random actions:
+To launch a [Waterworld](https://pettingzoo.farama.org/environments/sisl/waterworld/) environment with random agents:
 
 ```python
 from pettingzoo.sisl import waterworld_v4
@@ -36,11 +36,13 @@ env = waterworld_v4.env(render_mode='human')
 env.reset()
 for agent in env.agent_iter():
     observation, reward, termination, truncation, info = env.last()
+    
     if termination or truncation:
-        action = None
-    else:
-        action = env.action_space(agent).sample()
-    env.step(action)
+        break
+    
+    action = env.action_space(agent).sample() # this is where you would insert your policy
+
+    env.step(action) # execute the action in the environment
 env.close()
 ```
 
diff --git a/docs/index.md b/docs/index.md
index 9e3945be2..f53321ff7 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -69,11 +69,16 @@ Contribute to the Docs <https://github.com/Farama-Foundation/PettingZoo/tree/mas
 Environments can be interacted with in a manner very similar to [Gymnasium](https://gymnasium.farama.org):
 
 ```python
-  from pettingzoo.butterfly import knights_archers_zombies_v10
-  env = knights_archers_zombies_v10.env()
-  env.reset()
-  for agent in env.agent_iter():
-      observation, reward, termination, truncation, info = env.last()
-      action = policy(observation, agent)
-      env.step(action)
+from pettingzoo.butterfly import knights_archers_zombies_v10
+
+env = knights_archers_zombies_v10.env()
+env.reset()
+
+for agent in env.agent_iter():
+    observation, reward, termination, truncation, info = env.last()
+    action = env.action_space(agent).sample() # this is where you would insert your policy
+    env.step(action) # execute the action in the environment
+env.close()
 ```
+
+For detailed usage information, see [AEC API](https://pettingzoo.farama.org/api/aec/) and [Parallel API](https://pettingzoo.farama.org/api/parallel/).

From a98918f305682e1538bc1459e827d6b07f76f034 Mon Sep 17 00:00:00 2001
From: Elliot Tower <elliot@elliottower.com>
Date: Sun, 23 Apr 2023 16:42:40 -0400
Subject: [PATCH 2/7] Replace reak with action=None in usage examples

---
 docs/api/aec.md                | 43 ++++++++++++++++------------------
 docs/environments/atari.md     |  6 ++---
 docs/environments/butterfly.md |  2 +-
 docs/environments/mpe.md       |  8 +++----
 docs/environments/sisl.md      |  8 +++----
 5 files changed, 32 insertions(+), 35 deletions(-)

diff --git a/docs/api/aec.md b/docs/api/aec.md
index abb03c569..bfc4fa2e4 100644
--- a/docs/api/aec.md
+++ b/docs/api/aec.md
@@ -16,20 +16,18 @@ for agent in env.agent_iter():
     observation, reward, termination, truncation, info = env.last()
     
     if termination or truncation:
-        break
-        
-    action = env.action_space(agent).sample() # this is where you would insert your policy
+        action = None
+    else:    
+        action = env.action_space(agent).sample() # this is where you would insert your policy
     
     env.step(action) # execute the action in the environment
 env.close()
 ```
 
 ### Action Masking
-AEC environments often include action masks, in order to mark valid & invalid actions for the agent. 
+AEC environments often include action masks, in order to mark valid/invalid actions for the agent. 
 
-[//]: # (For details about action masking, see [A Closer Look at Invalid Action Masking in Policy Gradient Algorithms]&#40;https://arxiv.org/abs/2006.14171&#41; &#40;Huang, 2022&#41;.)
-
-To sample actions using invalid action masking: 
+To sample actions using action masking: 
 ```python
 from pettingzoo.classic import chess_v5
 
@@ -40,27 +38,26 @@ for agent in env.agent_iter():
     observation, reward, termination, truncation, info = env.last()
 
     if termination or truncation:
-        break
-        
-    # invalid action masking is optional and environment-dependent
-    if "action_mask" in info:
-        mask = info["action_mask"] # used in Shimmy OpenSpiel compatibility wrapper
-    elif isinstance(observation, dict) and "action_mask" in observation:
-        mask = observation["action_mask"] # used in PettingZoo Classic environments
-    else:
-        mask = None 
-        
-    action = env.action_space(agent).sample(mask) # this is where you would insert your policy
-
+        action = None
+    else:  
+        # invalid action masking is optional and environment-dependent
+        if "action_mask" in info:
+            mask = info["action_mask"]
+        elif isinstance(observation, dict) and "action_mask" in observation:
+            mask = observation["action_mask"]
+        else:
+            mask = None 
+        action = env.action_space(agent).sample(mask) # this is where you would insert your policy
     env.step(action) # execute the action in the environment
 env.close()
 ```
 
-Note: invalid action masking is optional, and can be stored either in `observation` or `info`. For example:
-* [PettingZoo Classic](https://pettingzoo.farama.org/environments/classic/) environments store illegal action mask in the `observation` dict:
+Note: action masking is optional, and can be implemented using either `observation` or `info`.
+
+* [PettingZoo Classic](https://pettingzoo.farama.org/environments/classic/) environments store action masks in the `observation` dict:
   * `mask = observation["action_mask"]`
-* [Shimmy](https://shimmy.farama.org/)'s [OpenSpiel environments](https://shimmy.farama.org/environments/open_spiel/) stores illegal action mask in the `info` dict:
-  * `mask = info["action_mask"` 
+* [Shimmy](https://shimmy.farama.org/)'s [OpenSpiel environments](https://shimmy.farama.org/environments/open_spiel/) stores action masks in the `info` dict:
+  * `mask = info["action_mask"]` 
 
 To implement action masking in a custom environment, see [Environment Creation: Action Masking](https://pettingzoo.farama.org/tutorials/environmentcreation/3-action-masking/)
 
diff --git a/docs/environments/atari.md b/docs/environments/atari.md
index c6ac4ba1b..061aba2c5 100644
--- a/docs/environments/atari.md
+++ b/docs/environments/atari.md
@@ -63,9 +63,9 @@ for agent in env.agent_iter():
     observation, reward, termination, truncation, info = env.last()
 
     if termination or truncation:
-        break
-    
-    action = env.action_space(agent).sample() # this is where you would insert your policy
+        action = None
+    else:
+        action = env.action_space(agent).sample() # this is where you would insert your policy
         
     env.step(action) # execute the action in the environment
 env.close()
diff --git a/docs/environments/butterfly.md b/docs/environments/butterfly.md
index 010026265..34c08135e 100644
--- a/docs/environments/butterfly.md
+++ b/docs/environments/butterfly.md
@@ -51,7 +51,7 @@ env.close()
 ```
 
 To launch a [Knights Archers Zombies](https://pettingzoo.farama.org/environments/butterfly/knights_archers_zombies/) environment with interactive user input (see [manual_policy.py](https://github.com/Farama-Foundation/PettingZoo/blob/master/pettingzoo/butterfly/knights_archers_zombies/manual_policy.py)):
-``` python
+```python
 import pygame
 from pettingzoo.butterfly import knights_archers_zombies_v10
 
diff --git a/docs/environments/mpe.md b/docs/environments/mpe.md
index 245ae4f44..856c44cf9 100644
--- a/docs/environments/mpe.md
+++ b/docs/environments/mpe.md
@@ -45,10 +45,10 @@ for agent in env.agent_iter():
     observation, reward, termination, truncation, info = env.last()
     
     if termination or truncation:
-        break
-    
-    action = env.action_space(agent).sample() # this is where you would insert your policy
-
+        action = None
+    else:
+        action = env.action_space(agent).sample() # this is where you would insert your policy
+        
     env.step(action) # execute the action in the environment
 env.close()
 ```
diff --git a/docs/environments/sisl.md b/docs/environments/sisl.md
index 5b5def34b..858320b1b 100644
--- a/docs/environments/sisl.md
+++ b/docs/environments/sisl.md
@@ -38,10 +38,10 @@ for agent in env.agent_iter():
     observation, reward, termination, truncation, info = env.last()
     
     if termination or truncation:
-        break
-    
-    action = env.action_space(agent).sample() # this is where you would insert your policy
-
+        action = None
+    else:
+        action = env.action_space(agent).sample() # this is where you would insert your policy
+        
     env.step(action) # execute the action in the environment
 env.close()
 ```

From f959669c61da2065bac92140937ac4b912925a1c Mon Sep 17 00:00:00 2001
From: elliottower <elliot@elliottower.com>
Date: Wed, 26 Apr 2023 12:04:00 -0400
Subject: [PATCH 3/7] Remove 'execute the action in the environment' comment

---
 docs/api/aec.md                | 4 ++--
 docs/environments/atari.md     | 2 +-
 docs/environments/butterfly.md | 2 +-
 docs/environments/classic.md   | 2 +-
 docs/environments/mpe.md       | 2 +-
 docs/environments/sisl.md      | 2 +-
 docs/index.md                  | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/api/aec.md b/docs/api/aec.md
index bfc4fa2e4..0dceef409 100644
--- a/docs/api/aec.md
+++ b/docs/api/aec.md
@@ -20,7 +20,7 @@ for agent in env.agent_iter():
     else:    
         action = env.action_space(agent).sample() # this is where you would insert your policy
     
-    env.step(action) # execute the action in the environment
+    env.step(action) 
 env.close()
 ```
 
@@ -48,7 +48,7 @@ for agent in env.agent_iter():
         else:
             mask = None 
         action = env.action_space(agent).sample(mask) # this is where you would insert your policy
-    env.step(action) # execute the action in the environment
+    env.step(action) 
 env.close()
 ```
 
diff --git a/docs/environments/atari.md b/docs/environments/atari.md
index 061aba2c5..55351200f 100644
--- a/docs/environments/atari.md
+++ b/docs/environments/atari.md
@@ -67,7 +67,7 @@ for agent in env.agent_iter():
     else:
         action = env.action_space(agent).sample() # this is where you would insert your policy
         
-    env.step(action) # execute the action in the environment
+    env.step(action) 
 env.close()
 ```
 
diff --git a/docs/environments/butterfly.md b/docs/environments/butterfly.md
index 34c08135e..206f6a7e2 100644
--- a/docs/environments/butterfly.md
+++ b/docs/environments/butterfly.md
@@ -72,6 +72,6 @@ for agent in env.agent_iter():
         # this is where you would insert your policy (for non-player agents)
         action = env.action_space(agent).sample()
 
-    env.step(action) # execute the action in the environment
+    env.step(action) 
 ```
 
diff --git a/docs/environments/classic.md b/docs/environments/classic.md
index 0e9cd4819..4905e9264 100644
--- a/docs/environments/classic.md
+++ b/docs/environments/classic.md
@@ -52,7 +52,7 @@ for agent in env.agent_iter():
     mask = observation["action_mask"]
     action = env.action_space(agent).sample(mask)  # this is where you would insert your policy
         
-    env.step(action)  # execute the action in the environment
+    env.step(action)  
 env.close()
 ```
 
diff --git a/docs/environments/mpe.md b/docs/environments/mpe.md
index 856c44cf9..2ad185e05 100644
--- a/docs/environments/mpe.md
+++ b/docs/environments/mpe.md
@@ -49,7 +49,7 @@ for agent in env.agent_iter():
     else:
         action = env.action_space(agent).sample() # this is where you would insert your policy
         
-    env.step(action) # execute the action in the environment
+    env.step(action) 
 env.close()
 ```
 
diff --git a/docs/environments/sisl.md b/docs/environments/sisl.md
index 858320b1b..f73d610bb 100644
--- a/docs/environments/sisl.md
+++ b/docs/environments/sisl.md
@@ -42,7 +42,7 @@ for agent in env.agent_iter():
     else:
         action = env.action_space(agent).sample() # this is where you would insert your policy
         
-    env.step(action) # execute the action in the environment
+    env.step(action) 
 env.close()
 ```
 
diff --git a/docs/index.md b/docs/index.md
index 443a7739e..4bb1bf936 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -79,7 +79,7 @@ env.reset()
 for agent in env.agent_iter():
     observation, reward, termination, truncation, info = env.last()
     action = env.action_space(agent).sample() # this is where you would insert your policy
-    env.step(action) # execute the action in the environment
+    env.step(action)
 env.close()
 ```
 

From ca3c23e9c1c85e7125b2f97b73f4d2f8eced0348 Mon Sep 17 00:00:00 2001
From: Elliot Tower <elliot@elliottower.com>
Date: Wed, 3 May 2023 00:20:33 -0400
Subject: [PATCH 4/7] Update butterfly.md

---
 docs/environments/butterfly.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/environments/butterfly.md b/docs/environments/butterfly.md
index 206f6a7e2..116cfeca4 100644
--- a/docs/environments/butterfly.md
+++ b/docs/environments/butterfly.md
@@ -44,8 +44,7 @@ observations = env.reset()
 while env.agents:
     # this is where you would insert your policy
     actions = {agent: env.action_space(agent).sample() for agent in env.agents}  
-    
-    # execute the actions in the environment
+
     observations, rewards, terminations, truncations, infos = env.step(actions)
 env.close()
 ```

From 0647e2afc0df6adcb3d6e4f7111b8d07c7e1879d Mon Sep 17 00:00:00 2001
From: Elliot Tower <elliot@elliottower.com>
Date: Wed, 3 May 2023 00:21:10 -0400
Subject: [PATCH 5/7] Update butterfly.md

---
 docs/environments/butterfly.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/environments/butterfly.md b/docs/environments/butterfly.md
index 116cfeca4..443daf850 100644
--- a/docs/environments/butterfly.md
+++ b/docs/environments/butterfly.md
@@ -72,5 +72,6 @@ for agent in env.agent_iter():
         action = env.action_space(agent).sample()
 
     env.step(action) 
+env.close()
 ```
 

From fb929107b03f98f1df2cca21021078eb65654a2f Mon Sep 17 00:00:00 2001
From: Elliot Tower <elliot@elliottower.com>
Date: Wed, 3 May 2023 00:22:02 -0400
Subject: [PATCH 6/7] Update parallel.md

---
 docs/api/parallel.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/api/parallel.md b/docs/api/parallel.md
index 912ba00b9..e67082a12 100644
--- a/docs/api/parallel.md
+++ b/docs/api/parallel.md
@@ -15,8 +15,8 @@ while env.agents:
     # this is where you would insert your policy
     actions = {agent: parallel_env.action_space(agent).sample() for agent in parallel_env.agents}  
     
-    # execute the actions in the environment
     observations, rewards, terminations, truncations, infos = parallel_env.step(actions)
+env.close()
 ```
 
 ## ParallelEnv

From e2fec98c87a52fb18b29014a4dee6ee932c4dc23 Mon Sep 17 00:00:00 2001
From: Elliot Tower <elliot@elliottower.com>
Date: Wed, 3 May 2023 12:29:23 -0400
Subject: [PATCH 7/7] Finish merging AEC.md

---
 docs/api/aec.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/api/aec.md b/docs/api/aec.md
index 0dceef409..6719d990d 100644
--- a/docs/api/aec.md
+++ b/docs/api/aec.md
@@ -1,3 +1,9 @@
+---
+title: AEC
+---
+
+
+
 # AEC API
 
 By default, PettingZoo models games as [*Agent Environment Cycle*](https://arxiv.org/abs/2009.13051) (AEC) environments. This allows it to support any type of game multi-agent RL can consider.
@@ -48,6 +54,7 @@ for agent in env.agent_iter():
         else:
             mask = None 
         action = env.action_space(agent).sample(mask) # this is where you would insert your policy
+        
     env.step(action) 
 env.close()
 ```