diff --git a/docs/tutorials/custom_environment/2-environment-logic.md b/docs/tutorials/custom_environment/2-environment-logic.md index f917b6cd8..0fa50d3af 100644 --- a/docs/tutorials/custom_environment/2-environment-logic.md +++ b/docs/tutorials/custom_environment/2-environment-logic.md @@ -10,8 +10,8 @@ Now that we have a basic understanding of the structure of environment repositor For this tutorial, we will be creating a two-player game consisting of a prisoner, trying to escape, and a guard, trying to catch the prisoner. This game will be played on a 7x7 grid, where: - The prisoner starts in the top left corner, -- the guard starts in the bottom right corner, -- the escape door is randomly placed in the middle of the grid, and +- The guard starts in the bottom right corner, +- The escape door is randomly placed in the middle of the grid - Both the prisoner and the guard can move in any of the four cardinal directions (up, down, left, right). ## Code diff --git a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py index a52222815..6096e0ea8 100644 --- a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py +++ b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py @@ -9,11 +9,31 @@ class CustomEnvironment(ParallelEnv): + """The metadata holds environment constants. + + The "name" metadata allows the environment to be pretty printed. + """ + metadata = { "name": "custom_environment_v0", } def __init__(self): + """The init method takes in environment arguments. + + Should define the following attributes: + - escape x and y coordinates + - guard x and y coordinates + - prisoner x and y coordinates + - timestamp + - possible_agents + + Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated. + Spaces should be defined in the action_space() and observation_space() methods. + If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning. + + These attributes should not be changed after initialization. + """ self.escape_y = None self.escape_x = None self.guard_y = None @@ -24,6 +44,19 @@ def __init__(self): self.possible_agents = ["prisoner", "guard"] def reset(self, seed=None, options=None): + """Reset set the environment to a starting point. + + It needs to initialize the following attributes: + - agents + - timestamp + - prisoner x and y coordinates + - guard x and y coordinates + - escape x and y coordinates + - observation + - infos + + And must set up the environment so that render(), step(), and observe() can be called without issues. + """ self.agents = copy(self.possible_agents) self.timestep = 0 @@ -51,6 +84,19 @@ def reset(self, seed=None, options=None): return observations, infos def step(self, actions): + """Takes in an action for the current agent (specified by agent_selection). + + Needs to update: + - prisoner x and y coordinates + - guard x and y coordinates + - terminations + - truncations + - rewards + - timestamp + - infos + + And any internal state used by observe() or render() + """ # Execute actions prisoner_action = actions["prisoner"] guard_action = actions["guard"] @@ -110,16 +156,23 @@ def step(self, actions): return observations, rewards, terminations, truncations, infos def render(self): + """Renders the environment.""" grid = np.full((7, 7), " ") grid[self.prisoner_y, self.prisoner_x] = "P" grid[self.guard_y, self.guard_x] = "G" grid[self.escape_y, self.escape_x] = "E" print(f"{grid} \n") + # Observation space should be defined here. + # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space. + # If your spaces change over time, remove this line (disable caching). @functools.lru_cache(maxsize=None) def observation_space(self, agent): + # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/ return MultiDiscrete([7 * 7] * 3) + # Action space should be defined here. + # If your spaces change over time, remove this line (disable caching). @functools.lru_cache(maxsize=None) def action_space(self, agent): return Discrete(4) diff --git a/tutorials/CustomEnvironment/tutorial3_action_masking.py b/tutorials/CustomEnvironment/tutorial3_action_masking.py index 3d70d4893..24676373f 100644 --- a/tutorials/CustomEnvironment/tutorial3_action_masking.py +++ b/tutorials/CustomEnvironment/tutorial3_action_masking.py @@ -9,11 +9,31 @@ class CustomActionMaskedEnvironment(ParallelEnv): + """The metadata holds environment constants. + + The "name" metadata allows the environment to be pretty printed. + """ + metadata = { "name": "custom_environment_v0", } def __init__(self): + """The init method takes in environment arguments. + + Should define the following attributes: + - escape x and y coordinates + - guard x and y coordinates + - prisoner x and y coordinates + - timestamp + - possible_agents + + Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated. + Spaces should be defined in the action_space() and observation_space() methods. + If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning. + + These attributes should not be changed after initialization. + """ self.escape_y = None self.escape_x = None self.guard_y = None @@ -24,6 +44,19 @@ def __init__(self): self.possible_agents = ["prisoner", "guard"] def reset(self, seed=None, options=None): + """Reset set the environment to a starting point. + + It needs to initialize the following attributes: + - agents + - timestamp + - prisoner x and y coordinates + - guard x and y coordinates + - escape x and y coordinates + - observation + - infos + + And must set up the environment so that render(), step(), and observe() can be called without issues. + """ self.agents = copy(self.possible_agents) self.timestep = 0 @@ -52,6 +85,19 @@ def reset(self, seed=None, options=None): return observations, infos def step(self, actions): + """Takes in an action for the current agent (specified by agent_selection). + + Needs to update: + - prisoner x and y coordinates + - guard x and y coordinates + - terminations + - truncations + - rewards + - timestamp + - infos + + And any internal state used by observe() or render() + """ # Execute actions prisoner_action = actions["prisoner"] guard_action = actions["guard"] @@ -95,6 +141,7 @@ def step(self, actions): elif self.guard_y == 6: guard_action_mask[3] = 0 + # Action mask to prevent guard from going over escape cell if self.guard_x - 1 == self.escape_x: guard_action_mask[0] = 0 elif self.guard_x + 1 == self.escape_x: @@ -145,16 +192,23 @@ def step(self, actions): return observations, rewards, terminations, truncations, infos def render(self): + """Renders the environment.""" grid = np.zeros((7, 7)) grid[self.prisoner_y, self.prisoner_x] = "P" grid[self.guard_y, self.guard_x] = "G" grid[self.escape_y, self.escape_x] = "E" print(f"{grid} \n") + # Observation space should be defined here. + # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space. + # If your spaces change over time, remove this line (disable caching). @functools.lru_cache(maxsize=None) def observation_space(self, agent): + # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/ return MultiDiscrete([7 * 7 - 1] * 3) + # Action space should be defined here. + # If your spaces change over time, remove this line (disable caching). @functools.lru_cache(maxsize=None) def action_space(self, agent): return Discrete(4)