Skip to content

Commit

Permalink
Updated CustomEnvironment tutorial comments (#1084)
Browse files Browse the repository at this point in the history
  • Loading branch information
GiovanniGrotto authored Sep 4, 2023
1 parent c3dc056 commit ace9b76
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 2 deletions.
4 changes: 2 additions & 2 deletions docs/tutorials/custom_environment/2-environment-logic.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ Now that we have a basic understanding of the structure of environment repositor

For this tutorial, we will be creating a two-player game consisting of a prisoner, trying to escape, and a guard, trying to catch the prisoner. This game will be played on a 7x7 grid, where:
- The prisoner starts in the top left corner,
- the guard starts in the bottom right corner,
- the escape door is randomly placed in the middle of the grid, and
- The guard starts in the bottom right corner,
- The escape door is randomly placed in the middle of the grid
- Both the prisoner and the guard can move in any of the four cardinal directions (up, down, left, right).

## Code
Expand Down
53 changes: 53 additions & 0 deletions tutorials/CustomEnvironment/tutorial2_adding_game_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,31 @@


class CustomEnvironment(ParallelEnv):
"""The metadata holds environment constants.
The "name" metadata allows the environment to be pretty printed.
"""

metadata = {
"name": "custom_environment_v0",
}

def __init__(self):
"""The init method takes in environment arguments.
Should define the following attributes:
- escape x and y coordinates
- guard x and y coordinates
- prisoner x and y coordinates
- timestamp
- possible_agents
Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated.
Spaces should be defined in the action_space() and observation_space() methods.
If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning.
These attributes should not be changed after initialization.
"""
self.escape_y = None
self.escape_x = None
self.guard_y = None
Expand All @@ -24,6 +44,19 @@ def __init__(self):
self.possible_agents = ["prisoner", "guard"]

def reset(self, seed=None, options=None):
"""Reset set the environment to a starting point.
It needs to initialize the following attributes:
- agents
- timestamp
- prisoner x and y coordinates
- guard x and y coordinates
- escape x and y coordinates
- observation
- infos
And must set up the environment so that render(), step(), and observe() can be called without issues.
"""
self.agents = copy(self.possible_agents)
self.timestep = 0

Expand Down Expand Up @@ -51,6 +84,19 @@ def reset(self, seed=None, options=None):
return observations, infos

def step(self, actions):
"""Takes in an action for the current agent (specified by agent_selection).
Needs to update:
- prisoner x and y coordinates
- guard x and y coordinates
- terminations
- truncations
- rewards
- timestamp
- infos
And any internal state used by observe() or render()
"""
# Execute actions
prisoner_action = actions["prisoner"]
guard_action = actions["guard"]
Expand Down Expand Up @@ -110,16 +156,23 @@ def step(self, actions):
return observations, rewards, terminations, truncations, infos

def render(self):
"""Renders the environment."""
grid = np.full((7, 7), " ")
grid[self.prisoner_y, self.prisoner_x] = "P"
grid[self.guard_y, self.guard_x] = "G"
grid[self.escape_y, self.escape_x] = "E"
print(f"{grid} \n")

# Observation space should be defined here.
# lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space.
# If your spaces change over time, remove this line (disable caching).
@functools.lru_cache(maxsize=None)
def observation_space(self, agent):
# gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/
return MultiDiscrete([7 * 7] * 3)

# Action space should be defined here.
# If your spaces change over time, remove this line (disable caching).
@functools.lru_cache(maxsize=None)
def action_space(self, agent):
return Discrete(4)
54 changes: 54 additions & 0 deletions tutorials/CustomEnvironment/tutorial3_action_masking.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,31 @@


class CustomActionMaskedEnvironment(ParallelEnv):
"""The metadata holds environment constants.
The "name" metadata allows the environment to be pretty printed.
"""

metadata = {
"name": "custom_environment_v0",
}

def __init__(self):
"""The init method takes in environment arguments.
Should define the following attributes:
- escape x and y coordinates
- guard x and y coordinates
- prisoner x and y coordinates
- timestamp
- possible_agents
Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated.
Spaces should be defined in the action_space() and observation_space() methods.
If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning.
These attributes should not be changed after initialization.
"""
self.escape_y = None
self.escape_x = None
self.guard_y = None
Expand All @@ -24,6 +44,19 @@ def __init__(self):
self.possible_agents = ["prisoner", "guard"]

def reset(self, seed=None, options=None):
"""Reset set the environment to a starting point.
It needs to initialize the following attributes:
- agents
- timestamp
- prisoner x and y coordinates
- guard x and y coordinates
- escape x and y coordinates
- observation
- infos
And must set up the environment so that render(), step(), and observe() can be called without issues.
"""
self.agents = copy(self.possible_agents)
self.timestep = 0

Expand Down Expand Up @@ -52,6 +85,19 @@ def reset(self, seed=None, options=None):
return observations, infos

def step(self, actions):
"""Takes in an action for the current agent (specified by agent_selection).
Needs to update:
- prisoner x and y coordinates
- guard x and y coordinates
- terminations
- truncations
- rewards
- timestamp
- infos
And any internal state used by observe() or render()
"""
# Execute actions
prisoner_action = actions["prisoner"]
guard_action = actions["guard"]
Expand Down Expand Up @@ -95,6 +141,7 @@ def step(self, actions):
elif self.guard_y == 6:
guard_action_mask[3] = 0

# Action mask to prevent guard from going over escape cell
if self.guard_x - 1 == self.escape_x:
guard_action_mask[0] = 0
elif self.guard_x + 1 == self.escape_x:
Expand Down Expand Up @@ -145,16 +192,23 @@ def step(self, actions):
return observations, rewards, terminations, truncations, infos

def render(self):
"""Renders the environment."""
grid = np.zeros((7, 7))
grid[self.prisoner_y, self.prisoner_x] = "P"
grid[self.guard_y, self.guard_x] = "G"
grid[self.escape_y, self.escape_x] = "E"
print(f"{grid} \n")

# Observation space should be defined here.
# lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space.
# If your spaces change over time, remove this line (disable caching).
@functools.lru_cache(maxsize=None)
def observation_space(self, agent):
# gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/
return MultiDiscrete([7 * 7 - 1] * 3)

# Action space should be defined here.
# If your spaces change over time, remove this line (disable caching).
@functools.lru_cache(maxsize=None)
def action_space(self, agent):
return Discrete(4)

0 comments on commit ace9b76

Please sign in to comment.