From 3770f6bd65003b95583fbe69b4f4011807d318ea Mon Sep 17 00:00:00 2001 From: giovanni Date: Sun, 3 Sep 2023 12:40:23 +0200 Subject: [PATCH 1/3] Added comments to CustomEnvironment tutorial 2 and 3 --- .../custom_environment/2-environment-logic.md | 4 +- .../tutorial2_adding_game_logic.py | 58 ++++++++++++++++++ .../tutorial3_action_masking.py | 59 +++++++++++++++++++ 3 files changed, 119 insertions(+), 2 deletions(-) diff --git a/docs/tutorials/custom_environment/2-environment-logic.md b/docs/tutorials/custom_environment/2-environment-logic.md index f917b6cd8..0fa50d3af 100644 --- a/docs/tutorials/custom_environment/2-environment-logic.md +++ b/docs/tutorials/custom_environment/2-environment-logic.md @@ -10,8 +10,8 @@ Now that we have a basic understanding of the structure of environment repositor For this tutorial, we will be creating a two-player game consisting of a prisoner, trying to escape, and a guard, trying to catch the prisoner. This game will be played on a 7x7 grid, where: - The prisoner starts in the top left corner, -- the guard starts in the bottom right corner, -- the escape door is randomly placed in the middle of the grid, and +- The guard starts in the bottom right corner, +- The escape door is randomly placed in the middle of the grid - Both the prisoner and the guard can move in any of the four cardinal directions (up, down, left, right). ## Code diff --git a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py index a52222815..b82f2b461 100644 --- a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py +++ b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py @@ -9,11 +9,33 @@ class CustomEnvironment(ParallelEnv): + """ + The metadata holds environment constants. + The "name" metadata allows the environment to be pretty printed. + """ + metadata = { "name": "custom_environment_v0", } def __init__(self): + """ + The init method takes in environment arguments. + Should define the following attributes: + + - escape x and y coordinates + - guard x and y coordinates + - prisoner x and y coordinates + - timestamp + - possible_agents + + Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated. + Spaces should be defined in the action_space() and observation_space() methods. + If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning. + + These attributes should not be changed after initialization. + """ + self.escape_y = None self.escape_x = None self.guard_y = None @@ -24,6 +46,20 @@ def __init__(self): self.possible_agents = ["prisoner", "guard"] def reset(self, seed=None, options=None): + """ + Reset needs to initialize the following attributes: + + - agents + - timestamp + - prisoner x and y coordinates + - guard x and y coordinates + - escape x and y coordinates + - observation + - infos + + And must set up the environment so that render(), step(), and observe() can be called without issues. + """ + self.agents = copy(self.possible_agents) self.timestep = 0 @@ -51,6 +87,21 @@ def reset(self, seed=None, options=None): return observations, infos def step(self, actions): + """ + Takes in an action for the current agent (specified by agent_selection). + Needs to update: + + - prisoner x and y coordinates + - guard x and y coordinates + - terminations + - truncations + - rewards + - timestamp + - infos + + And any internal state used by observe() or render() + """ + # Execute actions prisoner_action = actions["prisoner"] guard_action = actions["guard"] @@ -110,16 +161,23 @@ def step(self, actions): return observations, rewards, terminations, truncations, infos def render(self): + """Renders the environment.""" grid = np.full((7, 7), " ") grid[self.prisoner_y, self.prisoner_x] = "P" grid[self.guard_y, self.guard_x] = "G" grid[self.escape_y, self.escape_x] = "E" print(f"{grid} \n") + # Observation space should be defined here. + # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space. + # If your spaces change over time, remove this line (disable caching). @functools.lru_cache(maxsize=None) def observation_space(self, agent): + # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/ return MultiDiscrete([7 * 7] * 3) + # Action space should be defined here. + # If your spaces change over time, remove this line (disable caching). @functools.lru_cache(maxsize=None) def action_space(self, agent): return Discrete(4) diff --git a/tutorials/CustomEnvironment/tutorial3_action_masking.py b/tutorials/CustomEnvironment/tutorial3_action_masking.py index 3d70d4893..bc73c0d86 100644 --- a/tutorials/CustomEnvironment/tutorial3_action_masking.py +++ b/tutorials/CustomEnvironment/tutorial3_action_masking.py @@ -9,11 +9,33 @@ class CustomActionMaskedEnvironment(ParallelEnv): + """ + The metadata holds environment constants. + The "name" metadata allows the environment to be pretty printed. + """ + metadata = { "name": "custom_environment_v0", } def __init__(self): + """ + The init method takes in environment arguments. + Should define the following attributes: + + - escape x and y coordinates + - guard x and y coordinates + - prisoner x and y coordinates + - timestamp + - possible_agents + + Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated. + Spaces should be defined in the action_space() and observation_space() methods. + If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning. + + These attributes should not be changed after initialization. + """ + self.escape_y = None self.escape_x = None self.guard_y = None @@ -24,6 +46,20 @@ def __init__(self): self.possible_agents = ["prisoner", "guard"] def reset(self, seed=None, options=None): + """ + Reset needs to initialize the following attributes: + + - agents + - timestamp + - prisoner x and y coordinates + - guard x and y coordinates + - escape x and y coordinates + - observation + - infos + + And must set up the environment so that render(), step(), and observe() can be called without issues. + """ + self.agents = copy(self.possible_agents) self.timestep = 0 @@ -52,6 +88,21 @@ def reset(self, seed=None, options=None): return observations, infos def step(self, actions): + """ + Takes in an action for the current agent (specified by agent_selection). + Needs to update: + + - prisoner x and y coordinates + - guard x and y coordinates + - terminations + - truncations + - rewards + - timestamp + - infos + + And any internal state used by observe() or render() + """ + # Execute actions prisoner_action = actions["prisoner"] guard_action = actions["guard"] @@ -95,6 +146,7 @@ def step(self, actions): elif self.guard_y == 6: guard_action_mask[3] = 0 + # Action mask to prevent guard from going over escape cell if self.guard_x - 1 == self.escape_x: guard_action_mask[0] = 0 elif self.guard_x + 1 == self.escape_x: @@ -145,16 +197,23 @@ def step(self, actions): return observations, rewards, terminations, truncations, infos def render(self): + """Renders the environment.""" grid = np.zeros((7, 7)) grid[self.prisoner_y, self.prisoner_x] = "P" grid[self.guard_y, self.guard_x] = "G" grid[self.escape_y, self.escape_x] = "E" print(f"{grid} \n") + # Observation space should be defined here. + # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space. + # If your spaces change over time, remove this line (disable caching). @functools.lru_cache(maxsize=None) def observation_space(self, agent): + # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/ return MultiDiscrete([7 * 7 - 1] * 3) + # Action space should be defined here. + # If your spaces change over time, remove this line (disable caching). @functools.lru_cache(maxsize=None) def action_space(self, agent): return Discrete(4) From 308c10dc131ada845aec9238cb46dfb336ff507c Mon Sep 17 00:00:00 2001 From: giovanni Date: Sun, 3 Sep 2023 17:10:28 +0200 Subject: [PATCH 2/3] Added comments to CustomEnvironment tutorial 2 and 3 --- .../tutorial3_action_masking.py | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/tutorials/CustomEnvironment/tutorial3_action_masking.py b/tutorials/CustomEnvironment/tutorial3_action_masking.py index bc73c0d86..babfa0a4e 100644 --- a/tutorials/CustomEnvironment/tutorial3_action_masking.py +++ b/tutorials/CustomEnvironment/tutorial3_action_masking.py @@ -9,8 +9,8 @@ class CustomActionMaskedEnvironment(ParallelEnv): - """ - The metadata holds environment constants. + """The metadata holds environment constants. + The "name" metadata allows the environment to be pretty printed. """ @@ -19,10 +19,9 @@ class CustomActionMaskedEnvironment(ParallelEnv): } def __init__(self): - """ - The init method takes in environment arguments. - Should define the following attributes: + """The init method takes in environment arguments. + Should define the following attributes: - escape x and y coordinates - guard x and y coordinates - prisoner x and y coordinates @@ -35,7 +34,6 @@ def __init__(self): These attributes should not be changed after initialization. """ - self.escape_y = None self.escape_x = None self.guard_y = None @@ -46,9 +44,9 @@ def __init__(self): self.possible_agents = ["prisoner", "guard"] def reset(self, seed=None, options=None): - """ - Reset needs to initialize the following attributes: + """Reset make the environment go back to a starting point. + It needs to initialize the following attributes: - agents - timestamp - prisoner x and y coordinates @@ -59,7 +57,6 @@ def reset(self, seed=None, options=None): And must set up the environment so that render(), step(), and observe() can be called without issues. """ - self.agents = copy(self.possible_agents) self.timestep = 0 @@ -88,10 +85,9 @@ def reset(self, seed=None, options=None): return observations, infos def step(self, actions): - """ - Takes in an action for the current agent (specified by agent_selection). - Needs to update: + """Takes in an action for the current agent (specified by agent_selection). + Needs to update: - prisoner x and y coordinates - guard x and y coordinates - terminations @@ -102,7 +98,6 @@ def step(self, actions): And any internal state used by observe() or render() """ - # Execute actions prisoner_action = actions["prisoner"] guard_action = actions["guard"] From ee10f0b18210c1e2737edd9d5f9d7a2a2d778f0e Mon Sep 17 00:00:00 2001 From: giovanni Date: Sun, 3 Sep 2023 17:11:51 +0200 Subject: [PATCH 3/3] Added comments to CustomEnvironment tutorial 2 and 3 --- .../tutorial2_adding_game_logic.py | 21 +++++++------------ .../tutorial3_action_masking.py | 2 +- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py index b82f2b461..6096e0ea8 100644 --- a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py +++ b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py @@ -9,8 +9,8 @@ class CustomEnvironment(ParallelEnv): - """ - The metadata holds environment constants. + """The metadata holds environment constants. + The "name" metadata allows the environment to be pretty printed. """ @@ -19,10 +19,9 @@ class CustomEnvironment(ParallelEnv): } def __init__(self): - """ - The init method takes in environment arguments. - Should define the following attributes: + """The init method takes in environment arguments. + Should define the following attributes: - escape x and y coordinates - guard x and y coordinates - prisoner x and y coordinates @@ -35,7 +34,6 @@ def __init__(self): These attributes should not be changed after initialization. """ - self.escape_y = None self.escape_x = None self.guard_y = None @@ -46,9 +44,9 @@ def __init__(self): self.possible_agents = ["prisoner", "guard"] def reset(self, seed=None, options=None): - """ - Reset needs to initialize the following attributes: + """Reset set the environment to a starting point. + It needs to initialize the following attributes: - agents - timestamp - prisoner x and y coordinates @@ -59,7 +57,6 @@ def reset(self, seed=None, options=None): And must set up the environment so that render(), step(), and observe() can be called without issues. """ - self.agents = copy(self.possible_agents) self.timestep = 0 @@ -87,10 +84,9 @@ def reset(self, seed=None, options=None): return observations, infos def step(self, actions): - """ - Takes in an action for the current agent (specified by agent_selection). - Needs to update: + """Takes in an action for the current agent (specified by agent_selection). + Needs to update: - prisoner x and y coordinates - guard x and y coordinates - terminations @@ -101,7 +97,6 @@ def step(self, actions): And any internal state used by observe() or render() """ - # Execute actions prisoner_action = actions["prisoner"] guard_action = actions["guard"] diff --git a/tutorials/CustomEnvironment/tutorial3_action_masking.py b/tutorials/CustomEnvironment/tutorial3_action_masking.py index babfa0a4e..24676373f 100644 --- a/tutorials/CustomEnvironment/tutorial3_action_masking.py +++ b/tutorials/CustomEnvironment/tutorial3_action_masking.py @@ -44,7 +44,7 @@ def __init__(self): self.possible_agents = ["prisoner", "guard"] def reset(self, seed=None, options=None): - """Reset make the environment go back to a starting point. + """Reset set the environment to a starting point. It needs to initialize the following attributes: - agents