From 3770f6bd65003b95583fbe69b4f4011807d318ea Mon Sep 17 00:00:00 2001
From: giovanni <giovannijgrotto@gmail.com>
Date: Sun, 3 Sep 2023 12:40:23 +0200
Subject: [PATCH 1/3] Added comments to CustomEnvironment tutorial 2 and 3

---
 .../custom_environment/2-environment-logic.md |  4 +-
 .../tutorial2_adding_game_logic.py            | 58 ++++++++++++++++++
 .../tutorial3_action_masking.py               | 59 +++++++++++++++++++
 3 files changed, 119 insertions(+), 2 deletions(-)

diff --git a/docs/tutorials/custom_environment/2-environment-logic.md b/docs/tutorials/custom_environment/2-environment-logic.md
index f917b6cd8..0fa50d3af 100644
--- a/docs/tutorials/custom_environment/2-environment-logic.md
+++ b/docs/tutorials/custom_environment/2-environment-logic.md
@@ -10,8 +10,8 @@ Now that we have a basic understanding of the structure of environment repositor
 
 For this tutorial, we will be creating a two-player game consisting of a prisoner, trying to escape, and a guard, trying to catch the prisoner. This game will be played on a 7x7 grid, where:
 - The prisoner starts in the top left corner,
-- the guard starts in the bottom right corner,
-- the escape door is randomly placed in the middle of the grid, and
+- The guard starts in the bottom right corner,
+- The escape door is randomly placed in the middle of the grid
 - Both the prisoner and the guard can move in any of the four cardinal directions (up, down, left, right).
 
 ## Code
diff --git a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py
index a52222815..b82f2b461 100644
--- a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py
+++ b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py
@@ -9,11 +9,33 @@
 
 
 class CustomEnvironment(ParallelEnv):
+    """
+    The metadata holds environment constants.
+    The "name" metadata allows the environment to be pretty printed.
+    """
+
     metadata = {
         "name": "custom_environment_v0",
     }
 
     def __init__(self):
+        """
+        The init method takes in environment arguments.
+        Should define the following attributes:
+
+        - escape x and y coordinates
+        - guard x and y coordinates
+        - prisoner x and y coordinates
+        - timestamp
+        - possible_agents
+
+        Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated.
+        Spaces should be defined in the action_space() and observation_space() methods.
+        If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning.
+
+        These attributes should not be changed after initialization.
+        """
+
         self.escape_y = None
         self.escape_x = None
         self.guard_y = None
@@ -24,6 +46,20 @@ def __init__(self):
         self.possible_agents = ["prisoner", "guard"]
 
     def reset(self, seed=None, options=None):
+        """
+        Reset needs to initialize the following attributes:
+
+        - agents
+        - timestamp
+        - prisoner x and y coordinates
+        - guard x and y coordinates
+        - escape x and y coordinates
+        - observation
+        - infos
+
+        And must set up the environment so that render(), step(), and observe() can be called without issues.
+        """
+
         self.agents = copy(self.possible_agents)
         self.timestep = 0
 
@@ -51,6 +87,21 @@ def reset(self, seed=None, options=None):
         return observations, infos
 
     def step(self, actions):
+        """
+        Takes in an action for the current agent (specified by agent_selection).
+        Needs to update:
+
+        - prisoner x and y coordinates
+        - guard x and y coordinates
+        - terminations
+        - truncations
+        - rewards
+        - timestamp
+        - infos
+
+        And any internal state used by observe() or render()
+        """
+
         # Execute actions
         prisoner_action = actions["prisoner"]
         guard_action = actions["guard"]
@@ -110,16 +161,23 @@ def step(self, actions):
         return observations, rewards, terminations, truncations, infos
 
     def render(self):
+        """Renders the environment."""
         grid = np.full((7, 7), " ")
         grid[self.prisoner_y, self.prisoner_x] = "P"
         grid[self.guard_y, self.guard_x] = "G"
         grid[self.escape_y, self.escape_x] = "E"
         print(f"{grid} \n")
 
+    # Observation space should be defined here.
+    # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space.
+    # If your spaces change over time, remove this line (disable caching).
     @functools.lru_cache(maxsize=None)
     def observation_space(self, agent):
+        # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/
         return MultiDiscrete([7 * 7] * 3)
 
+    # Action space should be defined here.
+    # If your spaces change over time, remove this line (disable caching).
     @functools.lru_cache(maxsize=None)
     def action_space(self, agent):
         return Discrete(4)
diff --git a/tutorials/CustomEnvironment/tutorial3_action_masking.py b/tutorials/CustomEnvironment/tutorial3_action_masking.py
index 3d70d4893..bc73c0d86 100644
--- a/tutorials/CustomEnvironment/tutorial3_action_masking.py
+++ b/tutorials/CustomEnvironment/tutorial3_action_masking.py
@@ -9,11 +9,33 @@
 
 
 class CustomActionMaskedEnvironment(ParallelEnv):
+    """
+    The metadata holds environment constants.
+    The "name" metadata allows the environment to be pretty printed.
+    """
+
     metadata = {
         "name": "custom_environment_v0",
     }
 
     def __init__(self):
+        """
+        The init method takes in environment arguments.
+        Should define the following attributes:
+
+        - escape x and y coordinates
+        - guard x and y coordinates
+        - prisoner x and y coordinates
+        - timestamp
+        - possible_agents
+
+        Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated.
+        Spaces should be defined in the action_space() and observation_space() methods.
+        If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning.
+
+        These attributes should not be changed after initialization.
+        """
+
         self.escape_y = None
         self.escape_x = None
         self.guard_y = None
@@ -24,6 +46,20 @@ def __init__(self):
         self.possible_agents = ["prisoner", "guard"]
 
     def reset(self, seed=None, options=None):
+        """
+        Reset needs to initialize the following attributes:
+
+        - agents
+        - timestamp
+        - prisoner x and y coordinates
+        - guard x and y coordinates
+        - escape x and y coordinates
+        - observation
+        - infos
+
+        And must set up the environment so that render(), step(), and observe() can be called without issues.
+        """
+
         self.agents = copy(self.possible_agents)
         self.timestep = 0
 
@@ -52,6 +88,21 @@ def reset(self, seed=None, options=None):
         return observations, infos
 
     def step(self, actions):
+        """
+        Takes in an action for the current agent (specified by agent_selection).
+        Needs to update:
+
+        - prisoner x and y coordinates
+        - guard x and y coordinates
+        - terminations
+        - truncations
+        - rewards
+        - timestamp
+        - infos
+
+        And any internal state used by observe() or render()
+        """
+
         # Execute actions
         prisoner_action = actions["prisoner"]
         guard_action = actions["guard"]
@@ -95,6 +146,7 @@ def step(self, actions):
         elif self.guard_y == 6:
             guard_action_mask[3] = 0
 
+        # Action mask to prevent guard from going over escape cell
         if self.guard_x - 1 == self.escape_x:
             guard_action_mask[0] = 0
         elif self.guard_x + 1 == self.escape_x:
@@ -145,16 +197,23 @@ def step(self, actions):
         return observations, rewards, terminations, truncations, infos
 
     def render(self):
+        """Renders the environment."""
         grid = np.zeros((7, 7))
         grid[self.prisoner_y, self.prisoner_x] = "P"
         grid[self.guard_y, self.guard_x] = "G"
         grid[self.escape_y, self.escape_x] = "E"
         print(f"{grid} \n")
 
+    # Observation space should be defined here.
+    # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space.
+    # If your spaces change over time, remove this line (disable caching).
     @functools.lru_cache(maxsize=None)
     def observation_space(self, agent):
+        # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/
         return MultiDiscrete([7 * 7 - 1] * 3)
 
+    # Action space should be defined here.
+    # If your spaces change over time, remove this line (disable caching).
     @functools.lru_cache(maxsize=None)
     def action_space(self, agent):
         return Discrete(4)

From 308c10dc131ada845aec9238cb46dfb336ff507c Mon Sep 17 00:00:00 2001
From: giovanni <giovannijgrotto@gmail.com>
Date: Sun, 3 Sep 2023 17:10:28 +0200
Subject: [PATCH 2/3] Added comments to CustomEnvironment tutorial 2 and 3

---
 .../tutorial3_action_masking.py               | 21 +++++++------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/tutorials/CustomEnvironment/tutorial3_action_masking.py b/tutorials/CustomEnvironment/tutorial3_action_masking.py
index bc73c0d86..babfa0a4e 100644
--- a/tutorials/CustomEnvironment/tutorial3_action_masking.py
+++ b/tutorials/CustomEnvironment/tutorial3_action_masking.py
@@ -9,8 +9,8 @@
 
 
 class CustomActionMaskedEnvironment(ParallelEnv):
-    """
-    The metadata holds environment constants.
+    """The metadata holds environment constants.
+
     The "name" metadata allows the environment to be pretty printed.
     """
 
@@ -19,10 +19,9 @@ class CustomActionMaskedEnvironment(ParallelEnv):
     }
 
     def __init__(self):
-        """
-        The init method takes in environment arguments.
-        Should define the following attributes:
+        """The init method takes in environment arguments.
 
+        Should define the following attributes:
         - escape x and y coordinates
         - guard x and y coordinates
         - prisoner x and y coordinates
@@ -35,7 +34,6 @@ def __init__(self):
 
         These attributes should not be changed after initialization.
         """
-
         self.escape_y = None
         self.escape_x = None
         self.guard_y = None
@@ -46,9 +44,9 @@ def __init__(self):
         self.possible_agents = ["prisoner", "guard"]
 
     def reset(self, seed=None, options=None):
-        """
-        Reset needs to initialize the following attributes:
+        """Reset make the environment go back to a starting point.
 
+        It needs to initialize the following attributes:
         - agents
         - timestamp
         - prisoner x and y coordinates
@@ -59,7 +57,6 @@ def reset(self, seed=None, options=None):
 
         And must set up the environment so that render(), step(), and observe() can be called without issues.
         """
-
         self.agents = copy(self.possible_agents)
         self.timestep = 0
 
@@ -88,10 +85,9 @@ def reset(self, seed=None, options=None):
         return observations, infos
 
     def step(self, actions):
-        """
-        Takes in an action for the current agent (specified by agent_selection).
-        Needs to update:
+        """Takes in an action for the current agent (specified by agent_selection).
 
+        Needs to update:
         - prisoner x and y coordinates
         - guard x and y coordinates
         - terminations
@@ -102,7 +98,6 @@ def step(self, actions):
 
         And any internal state used by observe() or render()
         """
-
         # Execute actions
         prisoner_action = actions["prisoner"]
         guard_action = actions["guard"]

From ee10f0b18210c1e2737edd9d5f9d7a2a2d778f0e Mon Sep 17 00:00:00 2001
From: giovanni <giovannijgrotto@gmail.com>
Date: Sun, 3 Sep 2023 17:11:51 +0200
Subject: [PATCH 3/3] Added comments to CustomEnvironment tutorial 2 and 3

---
 .../tutorial2_adding_game_logic.py            | 21 +++++++------------
 .../tutorial3_action_masking.py               |  2 +-
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py
index b82f2b461..6096e0ea8 100644
--- a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py
+++ b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py
@@ -9,8 +9,8 @@
 
 
 class CustomEnvironment(ParallelEnv):
-    """
-    The metadata holds environment constants.
+    """The metadata holds environment constants.
+
     The "name" metadata allows the environment to be pretty printed.
     """
 
@@ -19,10 +19,9 @@ class CustomEnvironment(ParallelEnv):
     }
 
     def __init__(self):
-        """
-        The init method takes in environment arguments.
-        Should define the following attributes:
+        """The init method takes in environment arguments.
 
+        Should define the following attributes:
         - escape x and y coordinates
         - guard x and y coordinates
         - prisoner x and y coordinates
@@ -35,7 +34,6 @@ def __init__(self):
 
         These attributes should not be changed after initialization.
         """
-
         self.escape_y = None
         self.escape_x = None
         self.guard_y = None
@@ -46,9 +44,9 @@ def __init__(self):
         self.possible_agents = ["prisoner", "guard"]
 
     def reset(self, seed=None, options=None):
-        """
-        Reset needs to initialize the following attributes:
+        """Reset set the environment to a starting point.
 
+        It needs to initialize the following attributes:
         - agents
         - timestamp
         - prisoner x and y coordinates
@@ -59,7 +57,6 @@ def reset(self, seed=None, options=None):
 
         And must set up the environment so that render(), step(), and observe() can be called without issues.
         """
-
         self.agents = copy(self.possible_agents)
         self.timestep = 0
 
@@ -87,10 +84,9 @@ def reset(self, seed=None, options=None):
         return observations, infos
 
     def step(self, actions):
-        """
-        Takes in an action for the current agent (specified by agent_selection).
-        Needs to update:
+        """Takes in an action for the current agent (specified by agent_selection).
 
+        Needs to update:
         - prisoner x and y coordinates
         - guard x and y coordinates
         - terminations
@@ -101,7 +97,6 @@ def step(self, actions):
 
         And any internal state used by observe() or render()
         """
-
         # Execute actions
         prisoner_action = actions["prisoner"]
         guard_action = actions["guard"]
diff --git a/tutorials/CustomEnvironment/tutorial3_action_masking.py b/tutorials/CustomEnvironment/tutorial3_action_masking.py
index babfa0a4e..24676373f 100644
--- a/tutorials/CustomEnvironment/tutorial3_action_masking.py
+++ b/tutorials/CustomEnvironment/tutorial3_action_masking.py
@@ -44,7 +44,7 @@ def __init__(self):
         self.possible_agents = ["prisoner", "guard"]
 
     def reset(self, seed=None, options=None):
-        """Reset make the environment go back to a starting point.
+        """Reset set the environment to a starting point.
 
         It needs to initialize the following attributes:
         - agents