Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated CustomEnvironment tutorial comments #1084

Merged
merged 3 commits into from
Sep 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/tutorials/custom_environment/2-environment-logic.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ Now that we have a basic understanding of the structure of environment repositor

For this tutorial, we will be creating a two-player game consisting of a prisoner, trying to escape, and a guard, trying to catch the prisoner. This game will be played on a 7x7 grid, where:
- The prisoner starts in the top left corner,
- the guard starts in the bottom right corner,
- the escape door is randomly placed in the middle of the grid, and
- The guard starts in the bottom right corner,
- The escape door is randomly placed in the middle of the grid
- Both the prisoner and the guard can move in any of the four cardinal directions (up, down, left, right).

## Code
Expand Down
53 changes: 53 additions & 0 deletions tutorials/CustomEnvironment/tutorial2_adding_game_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,31 @@


class CustomEnvironment(ParallelEnv):
"""The metadata holds environment constants.

The "name" metadata allows the environment to be pretty printed.
"""

metadata = {
"name": "custom_environment_v0",
}

def __init__(self):
"""The init method takes in environment arguments.

Should define the following attributes:
- escape x and y coordinates
- guard x and y coordinates
- prisoner x and y coordinates
- timestamp
- possible_agents

Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated.
Spaces should be defined in the action_space() and observation_space() methods.
If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning.

These attributes should not be changed after initialization.
"""
self.escape_y = None
self.escape_x = None
self.guard_y = None
Expand All @@ -24,6 +44,19 @@ def __init__(self):
self.possible_agents = ["prisoner", "guard"]

def reset(self, seed=None, options=None):
"""Reset set the environment to a starting point.

It needs to initialize the following attributes:
- agents
- timestamp
- prisoner x and y coordinates
- guard x and y coordinates
- escape x and y coordinates
- observation
- infos

And must set up the environment so that render(), step(), and observe() can be called without issues.
"""
self.agents = copy(self.possible_agents)
self.timestep = 0

Expand Down Expand Up @@ -51,6 +84,19 @@ def reset(self, seed=None, options=None):
return observations, infos

def step(self, actions):
"""Takes in an action for the current agent (specified by agent_selection).

Needs to update:
- prisoner x and y coordinates
- guard x and y coordinates
- terminations
- truncations
- rewards
- timestamp
- infos

And any internal state used by observe() or render()
"""
# Execute actions
prisoner_action = actions["prisoner"]
guard_action = actions["guard"]
Expand Down Expand Up @@ -110,16 +156,23 @@ def step(self, actions):
return observations, rewards, terminations, truncations, infos

def render(self):
"""Renders the environment."""
grid = np.full((7, 7), " ")
grid[self.prisoner_y, self.prisoner_x] = "P"
grid[self.guard_y, self.guard_x] = "G"
grid[self.escape_y, self.escape_x] = "E"
print(f"{grid} \n")

# Observation space should be defined here.
# lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space.
# If your spaces change over time, remove this line (disable caching).
@functools.lru_cache(maxsize=None)
def observation_space(self, agent):
# gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/
return MultiDiscrete([7 * 7] * 3)

# Action space should be defined here.
# If your spaces change over time, remove this line (disable caching).
@functools.lru_cache(maxsize=None)
def action_space(self, agent):
return Discrete(4)
54 changes: 54 additions & 0 deletions tutorials/CustomEnvironment/tutorial3_action_masking.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,31 @@


class CustomActionMaskedEnvironment(ParallelEnv):
"""The metadata holds environment constants.

The "name" metadata allows the environment to be pretty printed.
"""

metadata = {
"name": "custom_environment_v0",
}

def __init__(self):
"""The init method takes in environment arguments.

Should define the following attributes:
- escape x and y coordinates
- guard x and y coordinates
- prisoner x and y coordinates
- timestamp
- possible_agents

Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated.
Spaces should be defined in the action_space() and observation_space() methods.
If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning.

These attributes should not be changed after initialization.
"""
self.escape_y = None
self.escape_x = None
self.guard_y = None
Expand All @@ -24,6 +44,19 @@ def __init__(self):
self.possible_agents = ["prisoner", "guard"]

def reset(self, seed=None, options=None):
"""Reset set the environment to a starting point.

It needs to initialize the following attributes:
- agents
- timestamp
- prisoner x and y coordinates
- guard x and y coordinates
- escape x and y coordinates
- observation
- infos

And must set up the environment so that render(), step(), and observe() can be called without issues.
"""
self.agents = copy(self.possible_agents)
self.timestep = 0

Expand Down Expand Up @@ -52,6 +85,19 @@ def reset(self, seed=None, options=None):
return observations, infos

def step(self, actions):
"""Takes in an action for the current agent (specified by agent_selection).

Needs to update:
- prisoner x and y coordinates
- guard x and y coordinates
- terminations
- truncations
- rewards
- timestamp
- infos

And any internal state used by observe() or render()
"""
# Execute actions
prisoner_action = actions["prisoner"]
guard_action = actions["guard"]
Expand Down Expand Up @@ -95,6 +141,7 @@ def step(self, actions):
elif self.guard_y == 6:
guard_action_mask[3] = 0

# Action mask to prevent guard from going over escape cell
if self.guard_x - 1 == self.escape_x:
guard_action_mask[0] = 0
elif self.guard_x + 1 == self.escape_x:
Expand Down Expand Up @@ -145,16 +192,23 @@ def step(self, actions):
return observations, rewards, terminations, truncations, infos

def render(self):
"""Renders the environment."""
grid = np.zeros((7, 7))
grid[self.prisoner_y, self.prisoner_x] = "P"
grid[self.guard_y, self.guard_x] = "G"
grid[self.escape_y, self.escape_x] = "E"
print(f"{grid} \n")

# Observation space should be defined here.
# lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space.
# If your spaces change over time, remove this line (disable caching).
@functools.lru_cache(maxsize=None)
def observation_space(self, agent):
# gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/
return MultiDiscrete([7 * 7 - 1] * 3)

# Action space should be defined here.
# If your spaces change over time, remove this line (disable caching).
@functools.lru_cache(maxsize=None)
def action_space(self, agent):
return Discrete(4)