From d5d8b72845eb382798a312b8dd34d8bfe1b203a5 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 22 Jan 2023 22:31:26 -0500
Subject: [PATCH 01/32] added docstrings to babyai goto

---
 minigrid/envs/babyai/goto.py | 537 ++++++++++++++++++++++++++++++++++-
 1 file changed, 535 insertions(+), 2 deletions(-)

diff --git a/minigrid/envs/babyai/goto.py b/minigrid/envs/babyai/goto.py
index b771d1b8f..a99179cf9 100644
--- a/minigrid/envs/babyai/goto.py
+++ b/minigrid/envs/babyai/goto.py
@@ -11,9 +11,52 @@
 
 class GoToRedBallGrey(RoomGridLevel):
     """
+
+    ## Description
+
     Go to the red ball, single room, with distractors.
     The distractors are all grey to reduce perceptual complexity.
     This level has distractors but doesn't make use of language.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
+
     """
 
     def __init__(self, room_size=8, num_dists=7, **kwargs):
@@ -36,8 +79,50 @@ def gen_mission(self):
 
 class GoToRedBall(RoomGridLevel):
     """
+    ## Description
+
     Go to the red ball, single room, with distractors.
     This level has distractors but doesn't make use of language.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBall-v0`
+
     """
 
     def __init__(self, room_size=8, num_dists=7, **kwargs):
@@ -57,7 +142,50 @@ def gen_mission(self):
 
 class GoToRedBallNoDists(GoToRedBall):
     """
+
+    ## Description
+
     Go to the red ball. No distractors present.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallNoDists-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -66,7 +194,57 @@ def __init__(self, **kwargs):
 
 class GoToObj(RoomGridLevel):
     """
-    Go to an object, inside a single room with no doors, no distractors
+    ## Description
+
+    Go to an object, inside a single room with no doors, no distractors. The
+    naming convention `GoToObjS{X}` represents a room of size `X`.
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToObj-v0`
+    - `BabyAI-GoToObjS4-v0`
+    - `BabyAI-GoToObjS6-v0`
+
     """
 
     def __init__(self, room_size=8, **kwargs):
@@ -81,7 +259,68 @@ def gen_mission(self):
 
 class GoToLocal(RoomGridLevel):
     """
-    Go to an object, inside a single room with no doors, no distractors
+
+    ## Description
+
+    Go to an object, inside a single room with no doors, no distractors. The
+    naming convention `GoToLocalS{X}N{Y}` represents a room of size `X` with
+    distractor number `Y`.
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToLocal-v0`
+    - `BabyAI-GoToLocalS5N2-v0`
+    - `BabyAI-GoToLocalS6N2-v0`
+    - `BabyAI-GoToLocalS6N3-v0`
+    - `BabyAI-GoToLocalS6N4-v0`
+    - `BabyAI-GoToLocalS7N4-v0`
+    - `BabyAI-GoToLocalS7N5-v0`
+    - `BabyAI-GoToLocalS8N2-v0`
+    - `BabyAI-GoToLocalS8N3-v0`
+    - `BabyAI-GoToLocalS8N4-v0`
+    - `BabyAI-GoToLocalS8N5-v0`
+    - `BabyAI-GoToLocalS8N6-v0`
+    - `BabyAI-GoToLocalS8N7-v0`
     """
 
     def __init__(self, room_size=8, num_dists=8, **kwargs):
@@ -98,7 +337,62 @@ def gen_mission(self):
 
 class GoTo(RoomGridLevel):
     """
+
+    ## Description
+
     Go to an object, the object may be in another room. Many distractors.
+
+    ## Mission Space
+
+    "go to a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoTo-v0`
+    - `BabyAI-GoToOpen-v0`
+    - `BabyAI-GoToObjMaze-v0`
+    - `BabyAI-GoToObjMazeOpen-v0`
+    - `BabyAI-GoToObjMazeS4R2-v0`
+    - `BabyAI-GoToObjMazeS4-v0`
+    - `BabyAI-GoToObjMazeS5-v0`
+    - `BabyAI-GoToObjMazeS6-v0`
+    - `BabyAI-GoToObjMazeS7-v0`
     """
 
     def __init__(
@@ -131,9 +425,57 @@ def gen_mission(self):
 
 class GoToImpUnlock(RoomGridLevel):
     """
+
+    ## Description
+
     Go to an object, which may be in a locked room.
     Competencies: Maze, GoTo, ImpUnlock
     No unblocking.
+
+    ## Mission Space
+
+    "go to a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToImpUnlock-v0`
+
     """
 
     def gen_mission(self):
@@ -182,12 +524,64 @@ def gen_mission(self):
 
 class GoToSeq(LevelGen):
     """
+
+    ## Description
+
     Sequencing of go-to-object commands.
 
     Competencies: Maze, GoTo, Seq
     No locked room.
     No locations.
     No unblocking.
+
+    ## Mission Space
+
+    "go to a/the {color} {type}" +
+    "and go to a/the {color} {type}" +
+    ", then go to a/the {color} {type}" +
+    "and go to a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToSeq-v0`
+    - `BabyAI-GoToSeqS5R2-v0`
+
     """
 
     def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
@@ -206,10 +600,55 @@ def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
 
 class GoToRedBlueBall(RoomGridLevel):
     """
+
+    ## Description
+
     Go to the red ball or to the blue ball.
     There is exactly one red or blue ball, and some distractors.
     The distractors are guaranteed not to be red or blue balls.
     Language is not required to solve this level.
+
+    ## Mission Space
+
+    "go to the {color} ball"
+
+    {color} is the color of the box. Can be "red" or "blue".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBlueBall-v0`
+
     """
 
     def __init__(self, room_size=8, num_dists=7, **kwargs):
@@ -237,9 +676,55 @@ def gen_mission(self):
 
 class GoToDoor(RoomGridLevel):
     """
+
+    ## Description
+
     Go to a door
     (of a given color, in the current room)
     No distractors, no language variation
+
+    ## Mission Space
+
+    "go to the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToDoor-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -258,8 +743,56 @@ def gen_mission(self):
 
 class GoToObjDoor(RoomGridLevel):
     """
+
+    ## Description
+
     Go to an object or door
     (of a given type and color, in the current room)
+
+    ## Mission Space
+
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box", "key" or "door".
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the object or door.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToObjDoor-v0`
+
     """
 
     def __init__(self, **kwargs):

From 7467e0afc0d87ea37cec1c3d1428666f0e893d82 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 22 Jan 2023 22:38:20 -0500
Subject: [PATCH 02/32] added placeholder docstrings to all babyai envs

---
 minigrid/envs/babyai/open.py    | 210 ++++++++++++++++++++++
 minigrid/envs/babyai/other.py   | 210 ++++++++++++++++++++++
 minigrid/envs/babyai/pickup.py  | 210 ++++++++++++++++++++++
 minigrid/envs/babyai/putnext.py |  84 +++++++++
 minigrid/envs/babyai/synth.py   | 307 ++++++++++++++++++++++++++++++++
 minigrid/envs/babyai/unlock.py  | 252 ++++++++++++++++++++++++++
 6 files changed, 1273 insertions(+)

diff --git a/minigrid/envs/babyai/open.py b/minigrid/envs/babyai/open.py
index 640aff037..c0e1155da 100644
--- a/minigrid/envs/babyai/open.py
+++ b/minigrid/envs/babyai/open.py
@@ -17,7 +17,49 @@
 
 class Open(RoomGridLevel):
     """
+
+    ## Description
+
     Open a door, which may be in another room
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def gen_mission(self):
@@ -41,10 +83,52 @@ def gen_mission(self):
 
 class OpenRedDoor(RoomGridLevel):
     """
+
+    ## Description
+
     Go to the red door
     (always unlocked, in the current room)
     Note: this level is intentionally meant for debugging and is
     intentionally kept very simple.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, **kwargs):
@@ -58,9 +142,51 @@ def gen_mission(self):
 
 class OpenDoor(RoomGridLevel):
     """
+
+    ## Description
+
     Go to the door
     The door to open is given by its color or by its location.
     (always unlocked, in the current room)
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, debug=False, select_by=None, **kwargs):
@@ -92,10 +218,52 @@ def gen_mission(self):
 
 class OpenTwoDoors(RoomGridLevel):
     """
+
+    ## Description
+
     Open door X, then open door Y
     The two doors are facing opposite directions, so that the agent
     Can't see whether the door behind him is open.
     This task requires memory (recurrent policy) to be solved effectively.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(
@@ -139,7 +307,49 @@ def gen_mission(self):
 
 class OpenDoorsOrder(RoomGridLevel):
     """
+
+    ## Description
+
     Open one or two doors in the order specified.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, num_doors, debug=False, max_steps: int | None = None, **kwargs):
diff --git a/minigrid/envs/babyai/other.py b/minigrid/envs/babyai/other.py
index d1d8fdc1b..0ac2a084d 100644
--- a/minigrid/envs/babyai/other.py
+++ b/minigrid/envs/babyai/other.py
@@ -17,10 +17,52 @@
 
 class ActionObjDoor(RoomGridLevel):
     """
+
+    ## Description
+
     [pick up an object] or
     [go to an object or door] or
     [open a door]
     (in the current room)
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, **kwargs):
@@ -51,9 +93,51 @@ def gen_mission(self):
 
 class FindObjS5(RoomGridLevel):
     """
+
+    ## Description
+
     Pick up an object (in a random room)
     Rooms have a size of 5
     This level requires potentially exhaustive exploration
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, room_size=5, max_steps: int | None = None, **kwargs):
@@ -76,8 +160,50 @@ def gen_mission(self):
 
 class KeyCorridor(RoomGridLevel):
     """
+
+    ## Description
+
     A ball is behind a locked door, the key is placed in a
     random room.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(
@@ -122,8 +248,50 @@ def gen_mission(self):
 
 class OneRoomS8(RoomGridLevel):
     """
+
+    ## Description
+
     Pick up the ball
     Rooms have a size of 8
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, room_size=8, **kwargs):
@@ -137,9 +305,51 @@ def gen_mission(self):
 
 class MoveTwoAcross(RoomGridLevel):
     """
+
+    ## Description
+
     Task of the form: move the A next to the B and the C next to the D.
     This task is structured to have a very large number of possible
     instructions.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(
diff --git a/minigrid/envs/babyai/pickup.py b/minigrid/envs/babyai/pickup.py
index a9ff7629a..d0c74440f 100644
--- a/minigrid/envs/babyai/pickup.py
+++ b/minigrid/envs/babyai/pickup.py
@@ -11,7 +11,49 @@
 
 class Pickup(RoomGridLevel):
     """
+
+    ## Description
+
     Pick up an object, the object may be in another room.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def gen_mission(self):
@@ -25,8 +67,50 @@ def gen_mission(self):
 
 class UnblockPickup(RoomGridLevel):
     """
+
+    ## Description
+
     Pick up an object, the object may be in another room. The path may
     be blocked by one or more obstructors.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def gen_mission(self):
@@ -45,10 +129,52 @@ def gen_mission(self):
 
 class PickupLoc(LevelGen):
     """
+
+    ## Description
+
     Pick up an object which may be described using its location. This is a
     single room environment.
 
     Competencies: PickUp, Loc. No unblocking.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, **kwargs):
@@ -69,10 +195,52 @@ def __init__(self, **kwargs):
 
 class PickupDist(RoomGridLevel):
     """
+
+    ## Description
+
     Pick up an object
     The object to pick up is given by its type only, or
     by its color, or by its type and color.
     (in the current room, with distractors)
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, debug=False, **kwargs):
@@ -98,8 +266,50 @@ def gen_mission(self):
 
 class PickupAbove(RoomGridLevel):
     """
+
+    ## Description
+
     Pick up an object (in the room above)
     This task requires to use the compass to be solved effectively.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, max_steps: int | None = None, **kwargs):
diff --git a/minigrid/envs/babyai/putnext.py b/minigrid/envs/babyai/putnext.py
index 204c3645f..b1e4a37f7 100644
--- a/minigrid/envs/babyai/putnext.py
+++ b/minigrid/envs/babyai/putnext.py
@@ -10,8 +10,50 @@
 
 class PutNextLocal(RoomGridLevel):
     """
+
+    ## Description
+
     Put an object next to another object, inside a single room
     with no doors, no distractors
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, room_size=8, num_objs=8, **kwargs):
@@ -31,9 +73,51 @@ def gen_mission(self):
 
 class PutNext(RoomGridLevel):
     """
+
+    ## Description
+
     Task of the form: move the A next to the B and the C next to the D.
     This task is structured to have a very large number of possible
     instructions.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(
diff --git a/minigrid/envs/babyai/synth.py b/minigrid/envs/babyai/synth.py
index 181ab96a1..d7c662a6e 100644
--- a/minigrid/envs/babyai/synth.py
+++ b/minigrid/envs/babyai/synth.py
@@ -11,11 +11,53 @@
 
 class Synth(LevelGen):
     """
+
+    ## Description
+
     Union of all instructions from PutNext, Open, Goto and PickUp. The agent
     may need to move objects around. The agent may have to unlock the door,
     but only if it is explicitly referred by the instruction.
 
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
@@ -35,16 +77,102 @@ def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
 
 
 class SynthS5R2(Synth):
+    """
+
+    ## Description
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
+    """
+
     def __init__(self, **kwargs):
         super().__init__(room_size=5, num_rows=2, num_cols=2, num_dists=7, **kwargs)
 
 
 class SynthLoc(LevelGen):
     """
+
+    ## Description
+
     Like Synth, but a significant share of object descriptions involves
     location language like in PickUpLoc. No implicit unlocking.
 
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, **kwargs):
@@ -61,10 +189,52 @@ def __init__(self, **kwargs):
 
 class SynthSeq(LevelGen):
     """
+
+    ## Description
+
     Like SynthLoc, but now with multiple commands, combined just like in GoToSeq.
     No implicit unlocking.
 
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc, Seq
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, **kwargs):
@@ -76,6 +246,51 @@ def __init__(self, **kwargs):
 
 
 class MiniBossLevel(LevelGen):
+    """
+
+    ## Description
+
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
+    """
+
     def __init__(self, **kwargs):
         super().__init__(
             num_cols=2,
@@ -88,10 +303,102 @@ def __init__(self, **kwargs):
 
 
 class BossLevel(LevelGen):
+    """
+
+    ## Description
+
+
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
+    """
+
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
 
 class BossLevelNoUnlock(LevelGen):
+    """
+
+    ## Description
+
+
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
+    """
+
     def __init__(self, **kwargs):
         super().__init__(locked_room_prob=0, implicit_unlock=False, **kwargs)
diff --git a/minigrid/envs/babyai/unlock.py b/minigrid/envs/babyai/unlock.py
index e0f1bbd76..d2abba7d3 100644
--- a/minigrid/envs/babyai/unlock.py
+++ b/minigrid/envs/babyai/unlock.py
@@ -12,9 +12,51 @@
 
 class Unlock(RoomGridLevel):
     """
+
+    ## Description
+
     Unlock a door.
 
     Competencies: Maze, Open, Unlock. No unblocking.
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def gen_mission(self):
@@ -66,8 +108,50 @@ def gen_mission(self):
 
 class UnlockLocal(RoomGridLevel):
     """
+
+    ## Description
+
     Fetch a key and unlock a door
     (in the current room)
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, distractors=False, **kwargs):
@@ -86,7 +170,49 @@ def gen_mission(self):
 
 class KeyInBox(RoomGridLevel):
     """
+
+    ## Description
+
     Unlock a door. Key is in a box (in the current room).
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, **kwargs):
@@ -107,7 +233,49 @@ def gen_mission(self):
 
 class UnlockPickup(RoomGridLevel):
     """
+
+    ## Description
+
     Unlock a door, then pick up a box in another room
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, distractors=False, max_steps: int | None = None, **kwargs):
@@ -137,8 +305,50 @@ def gen_mission(self):
 
 class BlockedUnlockPickup(RoomGridLevel):
     """
+
+    ## Description
+
     Unlock a door blocked by a ball, then pick up a box
     in another room
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, max_steps: int | None = None, **kwargs):
@@ -168,7 +378,49 @@ def gen_mission(self):
 
 class UnlockToUnlock(RoomGridLevel):
     """
+
+    ## Description
+
     Unlock a door A that requires to unlock a door B before
+
+    ## Mission Space
+
+    "go to the red ball"
+
+    ## Action Space
+
+    | Num | Name         | Action            |
+    |-----|--------------|-------------------|
+    | 0   | left         | Turn left         |
+    | 1   | right        | Turn right        |
+    | 2   | forward      | Move forward      |
+    | 3   | pickup       | Pick up an object |
+    | 4   | drop         | Unused            |
+    | 5   | toggle       | Unused            |
+    | 6   | done         | Unused            |
+
+    ## Observation Encoding
+
+    - Each tile is encoded as a 3 dimensional tuple:
+        `(OBJECT_IDX, COLOR_IDX, STATE)`
+    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
+        [minigrid/minigrid.py](minigrid/minigrid.py)
+    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
+
+    ## Rewards
+
+    A reward of '1' is given for success, and '0' for failure.
+
+    ## Termination
+
+    The episode ends if any one of the following conditions is met:
+
+    1. The agent goes to the red ball.
+    2. Timeout (see `max_steps`).
+
+    ## Registered Configurations
+
+    - `BabyAI-GoToRedBallGrey-v0`
     """
 
     def __init__(self, max_steps: int | None = None, **kwargs):

From f3c6acd0cb480ce4672c1866c8abba0fc6f122f6 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 22 Jan 2023 22:53:09 -0500
Subject: [PATCH 03/32] updated babyai registration

---
 minigrid/envs/babyai/open.py    | 22 ++++++++---
 minigrid/envs/babyai/other.py   | 27 +++++++++++---
 minigrid/envs/babyai/pickup.py  | 16 +++++---
 minigrid/envs/babyai/putnext.py | 15 +++++++-
 minigrid/envs/babyai/synth.py   | 65 +++++----------------------------
 minigrid/envs/babyai/unlock.py  | 20 +++++++---
 6 files changed, 87 insertions(+), 78 deletions(-)

diff --git a/minigrid/envs/babyai/open.py b/minigrid/envs/babyai/open.py
index c0e1155da..05b19f759 100644
--- a/minigrid/envs/babyai/open.py
+++ b/minigrid/envs/babyai/open.py
@@ -59,7 +59,8 @@ class Open(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-Open-v0`
+
     """
 
     def gen_mission(self):
@@ -128,7 +129,8 @@ class OpenRedDoor(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-OpenRedDoor-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -186,7 +188,11 @@ class OpenDoor(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-OpenDoor-v0`
+    - `BabyAI-OpenDoorDebug-v0`
+    - `BabyAI-OpenDoorColor-v0`
+    - `BabyAI-OpenDoorLoc-v0`
+
     """
 
     def __init__(self, debug=False, select_by=None, **kwargs):
@@ -263,7 +269,10 @@ class OpenTwoDoors(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-OpenTwoDoors-v0`
+    - `BabyAI-OpenRedBlueDoors-v0`
+    - `BabyAI-OpenRedBlueDoorsDebug-v0`
+
     """
 
     def __init__(
@@ -349,7 +358,10 @@ class OpenDoorsOrder(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-OpenDoorsOrderN2-v0`
+    - `BabyAI-OpenDoorsOrderN4-v0`
+    - `BabyAI-OpenDoorsOrderN2Debug-v0`
+    - `BabyAI-OpenDoorsOrderN4Debug-v0`
     """
 
     def __init__(self, num_doors, debug=False, max_steps: int | None = None, **kwargs):
diff --git a/minigrid/envs/babyai/other.py b/minigrid/envs/babyai/other.py
index 0ac2a084d..c72501792 100644
--- a/minigrid/envs/babyai/other.py
+++ b/minigrid/envs/babyai/other.py
@@ -62,7 +62,8 @@ class ActionObjDoor(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-ActionObjDoor-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -137,7 +138,10 @@ class FindObjS5(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-FindObjS5-v0`
+    - `BabyAI-FindObjS6-v0`
+    - `BabyAI-FindObjS7-v0`
+
     """
 
     def __init__(self, room_size=5, max_steps: int | None = None, **kwargs):
@@ -203,7 +207,14 @@ class KeyCorridor(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-KeyCorridor-v0`
+    - `BabyAI-KeyCorridorS3R1-v0`
+    - `BabyAI-KeyCorridorS3R2-v0`
+    - `BabyAI-KeyCorridorS3R3-v0`
+    - `BabyAI-KeyCorridorS4R3-v0`
+    - `BabyAI-KeyCorridorS5R3-v0`
+    - `BabyAI-KeyCorridorS6R3-v0`
+
     """
 
     def __init__(
@@ -291,7 +302,11 @@ class OneRoomS8(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-OneRoomS8-v0`
+    - `BabyAI-OneRoomS12-v0`
+    - `BabyAI-OneRoomS16-v0`
+    - `BabyAI-OneRoomS20-v0`
+
     """
 
     def __init__(self, room_size=8, **kwargs):
@@ -349,7 +364,9 @@ class MoveTwoAcross(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-MoveTwoAcrossS5N2-v0`
+    - `BabyAI-MoveTwoAcrossS8N9-v0`
+
     """
 
     def __init__(
diff --git a/minigrid/envs/babyai/pickup.py b/minigrid/envs/babyai/pickup.py
index d0c74440f..9d5cb25ac 100644
--- a/minigrid/envs/babyai/pickup.py
+++ b/minigrid/envs/babyai/pickup.py
@@ -53,7 +53,8 @@ class Pickup(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-Pickup-v0`
+
     """
 
     def gen_mission(self):
@@ -110,7 +111,8 @@ class UnblockPickup(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-UnblockPickup-v0`
+
     """
 
     def gen_mission(self):
@@ -174,7 +176,8 @@ class PickupLoc(LevelGen):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-PickupLoc-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -240,7 +243,9 @@ class PickupDist(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-PickupDist-v0`
+    - `BabyAI-PickupDistDebug-v0`
+
     """
 
     def __init__(self, debug=False, **kwargs):
@@ -309,7 +314,8 @@ class PickupAbove(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-PickupAbove-v0`
+
     """
 
     def __init__(self, max_steps: int | None = None, **kwargs):
diff --git a/minigrid/envs/babyai/putnext.py b/minigrid/envs/babyai/putnext.py
index b1e4a37f7..a66cce2b8 100644
--- a/minigrid/envs/babyai/putnext.py
+++ b/minigrid/envs/babyai/putnext.py
@@ -53,7 +53,10 @@ class PutNextLocal(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-PutNextLocal-v0`
+    - `BabyAI-PutNextLocalS5N3-v0`
+    - `BabyAI-PutNextLocalS6N4-v0``
+
     """
 
     def __init__(self, room_size=8, num_objs=8, **kwargs):
@@ -117,7 +120,15 @@ class PutNext(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-PutNextS4N1-v0
+    - `BabyAI-PutNextS5N2-v0`
+    - `BabyAI-PutNextS5N1-v0`
+    - `BabyAI-PutNextS6N3-v0`
+    - `BabyAI-PutNextS7N4-v0`
+    - `BabyAI-PutNextS5N2Carrying-v0`
+    - `BabyAI-PutNextS6N3Carrying-v0`
+    - `BabyAI-PutNextS7N4Carrying-v0`
+
     """
 
     def __init__(
diff --git a/minigrid/envs/babyai/synth.py b/minigrid/envs/babyai/synth.py
index d7c662a6e..22234c868 100644
--- a/minigrid/envs/babyai/synth.py
+++ b/minigrid/envs/babyai/synth.py
@@ -57,7 +57,9 @@ class Synth(LevelGen):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-Synth-v0`
+    - `BabyAI-SynthS5R2-v0`
+
     """
 
     def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
@@ -76,55 +78,6 @@ def __init__(self, room_size=8, num_rows=3, num_cols=3, num_dists=18, **kwargs):
         )
 
 
-class SynthS5R2(Synth):
-    """
-
-    ## Description
-
-    ## Mission Space
-
-    "go to the red ball"
-
-    ## Action Space
-
-    | Num | Name         | Action            |
-    |-----|--------------|-------------------|
-    | 0   | left         | Turn left         |
-    | 1   | right        | Turn right        |
-    | 2   | forward      | Move forward      |
-    | 3   | pickup       | Pick up an object |
-    | 4   | drop         | Unused            |
-    | 5   | toggle       | Unused            |
-    | 6   | done         | Unused            |
-
-    ## Observation Encoding
-
-    - Each tile is encoded as a 3 dimensional tuple:
-        `(OBJECT_IDX, COLOR_IDX, STATE)`
-    - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in
-        [minigrid/minigrid.py](minigrid/minigrid.py)
-    - `STATE` refers to the door state with 0=open, 1=closed and 2=locked
-
-    ## Rewards
-
-    A reward of '1' is given for success, and '0' for failure.
-
-    ## Termination
-
-    The episode ends if any one of the following conditions is met:
-
-    1. The agent goes to the red ball.
-    2. Timeout (see `max_steps`).
-
-    ## Registered Configurations
-
-    - `BabyAI-GoToRedBallGrey-v0`
-    """
-
-    def __init__(self, **kwargs):
-        super().__init__(room_size=5, num_rows=2, num_cols=2, num_dists=7, **kwargs)
-
-
 class SynthLoc(LevelGen):
     """
 
@@ -172,7 +125,7 @@ class SynthLoc(LevelGen):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-SynthLoc-v0`
     """
 
     def __init__(self, **kwargs):
@@ -234,7 +187,8 @@ class SynthSeq(LevelGen):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-SynthSeq-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -288,7 +242,8 @@ class MiniBossLevel(LevelGen):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-MiniBossLevel-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -346,7 +301,7 @@ class BossLevel(LevelGen):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-BossLevel-v0`
     """
 
     def __init__(self, **kwargs):
@@ -397,7 +352,7 @@ class BossLevelNoUnlock(LevelGen):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-BossLevelNoUnlock-v0`
     """
 
     def __init__(self, **kwargs):
diff --git a/minigrid/envs/babyai/unlock.py b/minigrid/envs/babyai/unlock.py
index d2abba7d3..bc5ced954 100644
--- a/minigrid/envs/babyai/unlock.py
+++ b/minigrid/envs/babyai/unlock.py
@@ -56,7 +56,8 @@ class Unlock(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-Unlock-v0`
+
     """
 
     def gen_mission(self):
@@ -151,7 +152,9 @@ class UnlockLocal(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-UnlockLocal-v0`
+    - `BabyAI-UnlockLocalDist-v0`
+
     """
 
     def __init__(self, distractors=False, **kwargs):
@@ -212,7 +215,8 @@ class KeyInBox(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-KeyInBox-v0`
+
     """
 
     def __init__(self, **kwargs):
@@ -275,7 +279,9 @@ class UnlockPickup(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-UnlockPickup-v0`
+    - `BabyAI-UnlockPickupDist-v0`
+
     """
 
     def __init__(self, distractors=False, max_steps: int | None = None, **kwargs):
@@ -348,7 +354,8 @@ class BlockedUnlockPickup(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-BlockedUnlockPickup-v0`
+
     """
 
     def __init__(self, max_steps: int | None = None, **kwargs):
@@ -420,7 +427,8 @@ class UnlockToUnlock(RoomGridLevel):
 
     ## Registered Configurations
 
-    - `BabyAI-GoToRedBallGrey-v0`
+    - `BabyAI-UnlockToUnlock-v0`
+
     """
 
     def __init__(self, max_steps: int | None = None, **kwargs):

From ca7c4cae09660dc045cfd62db03f7f39d76a294a Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 23 Jan 2023 08:05:36 -0500
Subject: [PATCH 04/32] updated babyai docstrings for open, other, pickup, and
 putnext

---
 minigrid/envs/babyai/open.py    | 40 +++++++++++++++++++++--------
 minigrid/envs/babyai/other.py   | 43 ++++++++++++++++++++++---------
 minigrid/envs/babyai/pickup.py  | 45 +++++++++++++++++++++++++--------
 minigrid/envs/babyai/putnext.py | 20 +++++++++++----
 4 files changed, 111 insertions(+), 37 deletions(-)

diff --git a/minigrid/envs/babyai/open.py b/minigrid/envs/babyai/open.py
index 05b19f759..eaafba60a 100644
--- a/minigrid/envs/babyai/open.py
+++ b/minigrid/envs/babyai/open.py
@@ -24,7 +24,10 @@ class Open(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "open a {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
 
     ## Action Space
 
@@ -54,7 +57,7 @@ class Open(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent opens the door.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -94,7 +97,7 @@ class OpenRedDoor(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "open the red door"
 
     ## Action Space
 
@@ -124,7 +127,7 @@ class OpenRedDoor(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent opens the door.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -153,7 +156,10 @@ class OpenDoor(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
 
     ## Action Space
 
@@ -183,7 +189,7 @@ class OpenDoor(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent opens the door.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -234,7 +240,10 @@ class OpenTwoDoors(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "open the {color} door, the open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
 
     ## Action Space
 
@@ -264,7 +273,7 @@ class OpenTwoDoors(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent opens the door.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -323,7 +332,18 @@ class OpenDoorsOrder(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "open the {color} door, the open the {color} door"
+
+    or
+
+    "open the {color} door after you open the {color} door"
+
+    or
+
+    "open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
 
     ## Action Space
 
@@ -353,7 +373,7 @@ class OpenDoorsOrder(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent opens the door.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
diff --git a/minigrid/envs/babyai/other.py b/minigrid/envs/babyai/other.py
index c72501792..389977100 100644
--- a/minigrid/envs/babyai/other.py
+++ b/minigrid/envs/babyai/other.py
@@ -27,7 +27,20 @@ class ActionObjDoor(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "pick up the {color} {type}"
+
+    or
+
+    "go to the {color} {type}"
+
+    or
+
+    "open a {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box", "door" or "key".
 
     ## Action Space
 
@@ -57,7 +70,7 @@ class ActionObjDoor(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent finishes the instruction.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -103,7 +116,9 @@ class FindObjS5(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "pick up the {type}"
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
 
     ## Action Space
 
@@ -133,7 +148,7 @@ class FindObjS5(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent picks up the object.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -172,7 +187,7 @@ class KeyCorridor(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "pick up the ball"
 
     ## Action Space
 
@@ -202,7 +217,7 @@ class KeyCorridor(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent picks up the ball.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -262,12 +277,11 @@ class OneRoomS8(RoomGridLevel):
 
     ## Description
 
-    Pick up the ball
-    Rooms have a size of 8
+    Pick up the ball. Rooms have a size of 8.
 
     ## Mission Space
 
-    "go to the red ball"
+    "pick up the ball"
 
     ## Action Space
 
@@ -297,7 +311,7 @@ class OneRoomS8(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent picks up the ball.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -329,7 +343,12 @@ class MoveTwoAcross(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "put the {color} {type} next to the {color} {type}, then put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
 
     ## Action Space
 
@@ -359,7 +378,7 @@ class MoveTwoAcross(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent finishes the instruction.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
diff --git a/minigrid/envs/babyai/pickup.py b/minigrid/envs/babyai/pickup.py
index 9d5cb25ac..9ff019a40 100644
--- a/minigrid/envs/babyai/pickup.py
+++ b/minigrid/envs/babyai/pickup.py
@@ -18,7 +18,12 @@ class Pickup(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "pick up a {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
 
     ## Action Space
 
@@ -48,7 +53,7 @@ class Pickup(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent picks up the object.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -76,7 +81,12 @@ class UnblockPickup(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "pick up a/the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
 
     ## Action Space
 
@@ -106,7 +116,7 @@ class UnblockPickup(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent picks up the object.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -141,7 +151,12 @@ class PickupLoc(LevelGen):
 
     ## Mission Space
 
-    "go to the red ball"
+    "pick up the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
 
     ## Action Space
 
@@ -171,7 +186,7 @@ class PickupLoc(LevelGen):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent picks up the object.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -208,7 +223,12 @@ class PickupDist(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "pick up a/the {color}/{type}/{color}{type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
 
     ## Action Space
 
@@ -238,7 +258,7 @@ class PickupDist(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent picks up the object.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -279,7 +299,12 @@ class PickupAbove(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "go to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
 
     ## Action Space
 
@@ -309,7 +334,7 @@ class PickupAbove(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent picks up the object.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
diff --git a/minigrid/envs/babyai/putnext.py b/minigrid/envs/babyai/putnext.py
index a66cce2b8..1a81d300d 100644
--- a/minigrid/envs/babyai/putnext.py
+++ b/minigrid/envs/babyai/putnext.py
@@ -18,7 +18,12 @@ class PutNextLocal(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
 
     ## Action Space
 
@@ -48,7 +53,7 @@ class PutNextLocal(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent finishes the instructed task.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -85,7 +90,12 @@ class PutNext(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
 
     ## Action Space
 
@@ -115,12 +125,12 @@ class PutNext(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent finishes the instructed task.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
 
-    - `BabyAI-PutNextS4N1-v0
+    - `BabyAI-PutNextS4N1-v0`
     - `BabyAI-PutNextS5N2-v0`
     - `BabyAI-PutNextS5N1-v0`
     - `BabyAI-PutNextS6N3-v0`

From acfdbdb493c792c960e114cec272bbe9e97241e4 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 29 Jan 2023 21:57:06 -0500
Subject: [PATCH 05/32] updated docstrings for unlock

---
 minigrid/envs/babyai/synth.py  |  7 +++++--
 minigrid/envs/babyai/unlock.py | 30 ++++++++++++++++++------------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/minigrid/envs/babyai/synth.py b/minigrid/envs/babyai/synth.py
index 22234c868..2d1ff0697 100644
--- a/minigrid/envs/babyai/synth.py
+++ b/minigrid/envs/babyai/synth.py
@@ -262,7 +262,8 @@ class BossLevel(LevelGen):
 
     ## Description
 
-
+    Command can be any sentence drawn from the Baby Language grammar.
+    Union of all competencies. This level is a superset of all other levels.
 
     ## Mission Space
 
@@ -313,7 +314,9 @@ class BossLevelNoUnlock(LevelGen):
 
     ## Description
 
-
+    Command can be any sentence drawn from the Baby Language grammar.
+    Union of all competencies. This level is a superset of all other levels.
+    No implicit unlocking.
 
     ## Mission Space
 
diff --git a/minigrid/envs/babyai/unlock.py b/minigrid/envs/babyai/unlock.py
index bc5ced954..4a58a343e 100644
--- a/minigrid/envs/babyai/unlock.py
+++ b/minigrid/envs/babyai/unlock.py
@@ -21,7 +21,10 @@ class Unlock(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "open the {color} door"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
 
     ## Action Space
 
@@ -51,7 +54,7 @@ class Unlock(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent opens the correct door.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -117,7 +120,7 @@ class UnlockLocal(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "open the door"
 
     ## Action Space
 
@@ -147,7 +150,7 @@ class UnlockLocal(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent opens the door.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -180,7 +183,7 @@ class KeyInBox(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "open the door"
 
     ## Action Space
 
@@ -210,7 +213,7 @@ class KeyInBox(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent opens the door.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -244,7 +247,10 @@ class UnlockPickup(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "pick up the {color} box"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
 
     ## Action Space
 
@@ -274,7 +280,7 @@ class UnlockPickup(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent picks up the correct box.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -319,7 +325,7 @@ class BlockedUnlockPickup(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "pick up the box"
 
     ## Action Space
 
@@ -349,7 +355,7 @@ class BlockedUnlockPickup(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent picks up the box.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -392,7 +398,7 @@ class UnlockToUnlock(RoomGridLevel):
 
     ## Mission Space
 
-    "go to the red ball"
+    "pick up the ball"
 
     ## Action Space
 
@@ -422,7 +428,7 @@ class UnlockToUnlock(RoomGridLevel):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent picks up the ball.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations

From adfe31ba8078f4df084d3377a7dc688bddf419a2 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 29 Jan 2023 22:12:08 -0500
Subject: [PATCH 06/32] added docstrings for synth

---
 minigrid/envs/babyai/synth.py | 47 ++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 4 deletions(-)

diff --git a/minigrid/envs/babyai/synth.py b/minigrid/envs/babyai/synth.py
index 2d1ff0697..daab6c875 100644
--- a/minigrid/envs/babyai/synth.py
+++ b/minigrid/envs/babyai/synth.py
@@ -22,7 +22,24 @@ class Synth(LevelGen):
 
     ## Mission Space
 
-    "go to the red ball"
+    "go to the {color} {type}"
+
+    or
+
+    "pick up a/the {color} {type}"
+
+    or
+
+    "open the {color} door"
+
+    or
+
+    "put the {color} {type} next to the {color} {type}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
 
     ## Action Space
 
@@ -52,7 +69,7 @@ class Synth(LevelGen):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent achieves the task.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -90,7 +107,27 @@ class SynthLoc(LevelGen):
 
     ## Mission Space
 
-    "go to the red ball"
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
 
     ## Action Space
 
@@ -120,7 +157,7 @@ class SynthLoc(LevelGen):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent achieves the task.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -204,6 +241,8 @@ class MiniBossLevel(LevelGen):
 
     ## Description
 
+    Command can be any sentence drawn from the Baby Language grammar.
+    Union of all competencies. This level is a superset of all other levels.
 
     ## Mission Space
 

From bbd65042aa37f79c3b89ba9a85c69bfa9aa18f45 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 30 Jan 2023 07:41:47 -0500
Subject: [PATCH 07/32] updated docstrings for synth

---
 minigrid/envs/babyai/synth.py | 190 ++++++++++++++++++++++++++++++++--
 1 file changed, 182 insertions(+), 8 deletions(-)

diff --git a/minigrid/envs/babyai/synth.py b/minigrid/envs/babyai/synth.py
index daab6c875..44484cf1c 100644
--- a/minigrid/envs/babyai/synth.py
+++ b/minigrid/envs/babyai/synth.py
@@ -189,7 +189,50 @@ class SynthSeq(LevelGen):
 
     ## Mission Space
 
-    "go to the red ball"
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
 
     ## Action Space
 
@@ -219,7 +262,7 @@ class SynthSeq(LevelGen):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent achieves the task.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -243,10 +286,55 @@ class MiniBossLevel(LevelGen):
 
     Command can be any sentence drawn from the Baby Language grammar.
     Union of all competencies. This level is a superset of all other levels.
+    Compared to BossLevel this has a smaller room and a lower probability of
+    locked rooms.
 
     ## Mission Space
 
-    "go to the red ball"
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
 
     ## Action Space
 
@@ -276,7 +364,7 @@ class MiniBossLevel(LevelGen):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent achieves the task.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -306,7 +394,50 @@ class BossLevel(LevelGen):
 
     ## Mission Space
 
-    "go to the red ball"
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
 
     ## Action Space
 
@@ -336,7 +467,7 @@ class BossLevel(LevelGen):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent achieves the task.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations
@@ -359,7 +490,50 @@ class BossLevelNoUnlock(LevelGen):
 
     ## Mission Space
 
-    "go to the red ball"
+    Action mission space:
+
+    "go to the {color} {type} {location}"
+
+    or
+
+    "pick up a/the {color} {type} {location}"
+
+    or
+
+    "open the {color} door {location}"
+
+    or
+
+    "put the {color} {type} {location} next to the {color} {type} {location}"
+
+    {color} is the color of the box. Can be "red", "green", "blue", "purple",
+    "yellow" or "grey".
+
+    {type} is the type of the object. Can be "ball", "box" or "key".
+
+    {location} can be " ", "in front of you", "behind you", "on your left"
+    or "on your right"
+
+    And mission space:
+
+    Two action missions concatenated with "and"
+
+    Example:
+
+    go to the green key
+    and
+    put the box next to the yellow ball
+
+    Sequence mission space:
+
+    Two missions, they can be action or and missions, concatenated with
+    ", then" or "after you".
+
+    Example:
+
+    open a red door and go to the ball on your left
+    after you
+    put the grey ball next to a door
 
     ## Action Space
 
@@ -389,7 +563,7 @@ class BossLevelNoUnlock(LevelGen):
 
     The episode ends if any one of the following conditions is met:
 
-    1. The agent goes to the red ball.
+    1. The agent achieves the task.
     2. Timeout (see `max_steps`).
 
     ## Registered Configurations

From d1b81394458283079de32caef3611a23b0544a19 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 5 Feb 2023 17:50:47 -0500
Subject: [PATCH 08/32] updated docstring

---
 minigrid/envs/babyai/synth.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/minigrid/envs/babyai/synth.py b/minigrid/envs/babyai/synth.py
index 44484cf1c..439d4bcb5 100644
--- a/minigrid/envs/babyai/synth.py
+++ b/minigrid/envs/babyai/synth.py
@@ -14,9 +14,10 @@ class Synth(LevelGen):
 
     ## Description
 
-    Union of all instructions from PutNext, Open, Goto and PickUp. The agent
-    may need to move objects around. The agent may have to unlock the door,
-    but only if it is explicitly referred by the instruction.
+    Union of all instructions from PutNext, Open, Goto and PickUp.
+    The agent may need to move objects around. The agent may have
+    to unlock the door, but only if it is explicitly referred by
+    the instruction.
 
     Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open
 

From 6e1d3b210988bf6831022b763d93cb9dd1f9ccd0 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 5 Feb 2023 17:54:13 -0500
Subject: [PATCH 09/32] updated test

---
 tests/utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/utils.py b/tests/utils.py
index db0bd0725..12ddf6da0 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -7,7 +7,10 @@
 all_testing_env_specs = [
     env_spec
     for env_spec in gym.envs.registry.values()
-    if env_spec.entry_point.startswith("minigrid.envs")
+    if (
+        isinstance(env_spec.entry_point, str)
+        and env_spec.entry_point.startswith("minigrid.envs")
+    )
 ]
 
 minigrid_testing_env_specs = [

From bd8b313832e2de34a04fb55aa3fc46718f9b2d81 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 6 Feb 2023 08:50:47 -0500
Subject: [PATCH 10/32] added documentation to some of the wrappers

---
 minigrid/wrappers.py | 136 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 136 insertions(+)

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index 3f3f2205c..4b629e0f8 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -18,19 +18,49 @@ class ReseedWrapper(Wrapper):
     Wrapper to always regenerate an environment with the same set of seeds.
     This can be used to force an environment to always keep the same
     configuration when reset.
+
+    Example:
+        >>> import minigrid
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import ReseedWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [1, 9, 5, 8, 4, 3, 8, 8, 3, 1]
+        >>> env = ReseedWrapper(env, seeds=[0, 1], seed_idx=0)
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [8, 6, 5, 2, 3, 0, 0, 0, 1, 8]
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [4, 5, 7, 9, 0, 1, 8, 9, 2, 3]
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [8, 6, 5, 2, 3, 0, 0, 0, 1, 8]
+        >>> _, _ = env.reset()
+        >>> [env.np_random.integers(10) for i in range(10)]
+        [4, 5, 7, 9, 0, 1, 8, 9, 2, 3]
     """
 
     def __init__(self, env, seeds=[0], seed_idx=0):
+        """A wrapper that always regenerate an environment with the same set of seeds.
+
+        Args:
+            env: The environment to apply the wrapper
+            seeds: A list of seed to be applied to the env
+            seed_idx: Index of the initial seed in seeds
+        """
         self.seeds = list(seeds)
         self.seed_idx = seed_idx
         super().__init__(env)
 
     def reset(self, **kwargs):
+        """Resets the environment with `kwargs`."""
         seed = self.seeds[self.seed_idx]
         self.seed_idx = (self.seed_idx + 1) % len(self.seeds)
         return self.env.reset(seed=seed, **kwargs)
 
     def step(self, action):
+        """Steps through the environment with `action`."""
         return self.env.step(action)
 
 
@@ -39,13 +69,40 @@ class ActionBonus(gym.Wrapper):
     Wrapper which adds an exploration bonus.
     This is a reward to encourage exploration of less
     visited (state,action) pairs.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import ActionBonus
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> _, _ = env.reset(seed=0)
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> env_bonus = ActionBonus(env)
+        >>> _, _ = env_bonus.reset(seed=0)
+        >>> _, reward, _, _, _ = env_bonus.step(1)
+        >>> print(reward)
+        1.0
+        >>> _, reward, _, _, _ = env_bonus.step(1)
+        >>> print(reward)
+        1.0
     """
 
     def __init__(self, env):
+        """A wrapper that adds an exploration bonus to less visited (state,action) pairs.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
         self.counts = {}
 
     def step(self, action):
+        """Steps through the environment with `action`."""
         obs, reward, terminated, truncated, info = self.env.step(action)
 
         env = self.unwrapped
@@ -66,20 +123,49 @@ def step(self, action):
         return obs, reward, terminated, truncated, info
 
     def reset(self, **kwargs):
+        """Resets the environment with `kwargs`."""
         return self.env.reset(**kwargs)
 
 
+# Should be named PositionBonus
 class StateBonus(Wrapper):
     """
     Adds an exploration bonus based on which positions
     are visited on the grid.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import StateBonus
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> _, _ = env.reset(seed=0)
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> _, reward, _, _, _ = env.step(1)
+        >>> print(reward)
+        0
+        >>> env_bonus = StateBonus(env)
+        >>> obs, _ = env_bonus.reset(seed=0)
+        >>> obs, reward, terminated, truncated, info = env_bonus.step(1)
+        >>> print(reward)
+        1.0
+        >>> obs, reward, terminated, truncated, info = env_bonus.step(1)
+        >>> print(reward)
+        0.7071067811865475
     """
 
     def __init__(self, env):
+        """A wrapper that adds an exploration bonus to less visited positions.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
         self.counts = {}
 
     def step(self, action):
+        """Steps through the environment with `action`."""
         obs, reward, terminated, truncated, info = self.env.step(action)
 
         # Tuple based on which we index the counts
@@ -102,15 +188,34 @@ def step(self, action):
         return obs, reward, terminated, truncated, info
 
     def reset(self, **kwargs):
+        """Resets the environment with `kwargs`."""
         return self.env.reset(**kwargs)
 
 
 class ImgObsWrapper(ObservationWrapper):
     """
     Use the image as the only observation output, no language/mission.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import ImgObsWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> obs, _ = env.reset(seed=0)
+        >>> obs.keys()
+        dict_keys(['image', 'direction', 'mission'])
+        >>> env = ImgObsWrapper(env)
+        >>> obs, _ = env.reset()
+        >>> obs.shape
+        (7, 7, 3)
     """
 
     def __init__(self, env):
+        """A wrapper that makes image the only observation.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
         self.observation_space = env.observation_space.spaces["image"]
 
@@ -122,9 +227,40 @@ class OneHotPartialObsWrapper(ObservationWrapper):
     """
     Wrapper to get a one-hot encoding of a partially observable
     agent view as observation.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> from minigrid.wrappers import ImgObsWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> obs, _ = env.reset(seed=0)
+        >>> obs["image"][0, :, :]
+        array([[2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0],
+               [2, 5, 0]], dtype=uint8)
+        >>> env = OneHotPartialObsWrapper(env)
+        >>> obs, _ = env.reset()
+        >>> obs["image"][0, :, :]
+        array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
+               [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0]],
+               dtype=uint8)
     """
 
     def __init__(self, env, tile_size=8):
+        """A wrapper that makes the image observation a one-hot encoding of a partially observable agent view.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
         super().__init__(env)
 
         self.tile_size = tile_size

From 6acfbb444a897e7790c300bd4b5264d7e2502a47 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 6 Feb 2023 09:51:58 -0500
Subject: [PATCH 11/32] added documentation for the wrappers

---
 figures/lavacrossing_NoWrapper.png            | Bin 0 -> 4160 bytes
 figures/lavacrossing_RGBImgObsWrapper.png     | Bin 0 -> 7981 bytes
 .../lavacrossing_RGBImgPartialObsWrapper.png  | Bin 0 -> 6320 bytes
 minigrid/wrappers.py                          |  96 +++++++++++++++++-
 4 files changed, 92 insertions(+), 4 deletions(-)
 create mode 100644 figures/lavacrossing_NoWrapper.png
 create mode 100644 figures/lavacrossing_RGBImgObsWrapper.png
 create mode 100644 figures/lavacrossing_RGBImgPartialObsWrapper.png

diff --git a/figures/lavacrossing_NoWrapper.png b/figures/lavacrossing_NoWrapper.png
new file mode 100644
index 0000000000000000000000000000000000000000..1b4d0a4f058ce667f6554dbb73dd6e0d6f469b4e
GIT binary patch
literal 4160
zcmeAS@N?(olHy`uVBq!ia0y~yVCDv44mO|&<1+4DK+3YjHKHUqKdq!Zu_%?nH?gE3
zC%+^oGfAN=wWv5VKhIdtOwU+HA)}<Epx8=ZKNqe*FTW^V|NPf|Ux9{j7I;J!Gcd><
z0%69y3#E1p4E$?7T^vIy=DfWb=*8?P;2LP%GvV+0Fae$aPn|@ftR@tTeRwgSbLW*=
zGb@=H7=An~`^Ct>aG+QS=t2jY5WC9w*KIBHXJTO3Z(@9VTZ|Y3LxajFF&YM=>0mS)
zK#PKsw{m;-zK>&JVDKx51Qu+QB1VbPFkoP~w6(gxu3Z0<{JZ7y2b|@X%QG<Sv6;@w
zz@X4MN{og9FdfA1W`Dn%{h{m6X$kZDFY`Z8++X=&&S)EBw21|3VQxuFE91L&?Up4w
z1H+$#*N^T0u1B+GE=}6bqlVI`QvwdyU4Hfr#=x2d*#Eg)JlgCSZFRvKotI|KwEWA#
l!0_Pb4jrJFC(VeL(#y+bv>&WFV*|8?!PC{xWt~$(699vDm+Jrk

literal 0
HcmV?d00001

diff --git a/figures/lavacrossing_RGBImgObsWrapper.png b/figures/lavacrossing_RGBImgObsWrapper.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ce30c76df913c1aaf2ade7a49a4e55749193f67
GIT binary patch
literal 7981
zcmeHMc~nzpqQA%kTNQ1`DI!6ropBohQ3OSnw9l=gf`B4nQIL^9A_NE!AcUxl&<@bN
z9uNc*QWpedH^yLCqEdp21Q!UKNzkw(i4c;oCi5lMYJGiYE}iK)@0|DkNC=nk-Fv_9
z_x-l}?ID-F28)arK@eoH@9SOe5Tv_U^ZH019MQS9m<@i|MeX*A^1uZ}5dtDlLI(q)
z!cXC%PKBKKJmzF1J_HwLZE0(1ZT@+1R8%;A$EHo8KYzgz7m41~BCnH!K|T)u8ij`-
z(|ekiPT1pUC-5P6->$C?$5KZHmDppb_=P><9?WX-ANQO;b;06nrT^boTdD3;{8xyt
z%N0!OxzFcx_*Y#nt~GLR+HlKLyk@cWk-sDzHrGW}jD>9$ohb6!nA~8wwlDi(!j)!M
zrAbRITj?C1r!bM&?TC+Vpy(=VE4@oS^&qIHe73n20WPV3pBH%lB0vX%zWE#Y!{>i6
zgrL)3|Jx3REuFg~5C|@r#f6X*ooEl74pf#YQmd5UQxJ|!CRd<uqkh*b_&x6GfB%@5
zFM8m5{xb87t~yMUNd(oYqWcG)v!jdg&rv0Q1&r9~zKcaHE5vcNL|Tk*<;o8ir(=(-
zAgBU|Ego@`aCL<|J4eS=0qbP1`?8O_)Slb;pm_lVeeyBb7+?O`94y%P72s{xy<a<+
zd&BRCthIJz<+bXe$7ac0kJh}t7HQc2Eu$*WTeuP<j=$c<WnNFU_rUo(c^j8mWH)^p
zbMHuh5F8BB&wV@Rb8<HH<SeS*m=n^y=)lV(`S<CZGNky0AQ5}BicsxTaI3w`$*vd|
zdXuU&VqUULj*j)E<6mDUCJ>8naQDq5@)#W@Eko6=MoAapK%Qh6RHg_~(vs1w{SMez
zuI%BYnQ@-ZgHd`kg&bY3O6y{cr%ZlFYEn-6D4eV7m`}#-5m#(m8!Bv(MJ;hPgQm#q
z6K)-KqB=UAy=Q}*bkC#;KIT1a{rP#EvX&y7222R=h)j{9!Y2nqT<=m_??h=}YcktN
zbhk+|c10{1tA7}qnxT~2HJ-QIxiuxJ?)g7iDedwNF|6=R*V9OFXE^<ig}#|x+Xkfq
zFrmdnfw<)uQ##;aPj4S+t?Lv$Ek47nW4aZmx^s&RqBoahsq|8tC1bU_m6I9uiaF{`
z+FrRGGjXW}a-;9oZE&!ny@HU3ww2_vG<86x-<>E-Ium11>b#+Eh#(MKG~b>sb93IX
zOd3OBZM_g!3Kn&i$1T*Hyn!}N^=2??an+4xM{{p63roQlyM4ev!9M%6`GEswdV~CN
zgXzt69vs<JL;2mtqB&vX@bMpP`wRGV|1_1JtEu0tv6cROZYSnUn1F2~Hw<csE3r#8
zBU;P#gLe8+<#H*zMJ%W${Au*B@tDBTyW5T*9-!vseOHk&6ng5^fm*#X9ayKCHuwD@
z3wN3mCq~H3wx`;lBOK$<2`1>u7#i<#e#*Vb+a>|)vXdA;Y=Su)lo2n#MVy@)nYK0+
zj<WWo5?Ld746$@{In>Xc&(7D$V9zlmW95=Ix*L{lyUY4!7B|d-sCT;QM_9u+ExuFY
zNcN+9jGTG=^iP-iN0-eFEO{c;fw;Y}_(3aN2Rl#iYctLfRQr*+N0sbGdM!rO5*OoC
z4mJ@P8}dpY!oP$y8ePxdo$0iQDb&l)9G|F`jCq?S8!~MGI9~`PV_A8hLRRa-;0Wdf
z3JNOC-*(q%ZgGqgXVM1p4}-NgwS0SZL$@9~dW7HVbK$dr%6@Zxoyt#%sv#=FuF?*;
z0$5EnZS%kd*kZ`*+8>Zr^|%7zy;Ba8L4eD{4PY_NQ}l}k%mg!4GeY#K`VI<U7C<UX
zR+WcQyOtgCd!7@oD~pJa_ogv?E%$bOd^86ZZ)#1&)|0yaS;uNeP01=YarT8krR8X1
z!Z)NFG5;zpLl^S9kt3$+;bY3!{mag+jyGxL7ma367*qR5JTV|8w|vSy#hhYpw$;YD
zNV>fL4onO}zRoSj2^7`@DmcdjFm9rF=R|x@Nm8~zyth#xPB}kjxK28P|865bc~A+F
z=HXyGY@whc{-amIQSByak5ybJAqP6AYKS%b$x;PA(sMt9(GaAZ6kaCIJ_rC>MdDro
zJSSsi(Lb}yg*ERaLohMkX6L-sXA4Aa?ZHD0hdg`+G>GkLd4Ni_2NbQ+BZ^`u$g+um
zU^kyDneMk(>_(qN&#GWpXx7@Fr<n$)_k&wGRT6}@r&Ab9M={NhxWO@s-X{&^I)UMg
z|Ir`3GYwZ;?&-z9u_D&Iz~(o=`>J34fHE5~Z1AX&%jLHE^cce&YXNj;P1J8iY4w-o
zLjHgQS>UL89j3OyNCaiTBD5IpEZNMBd-THGo1asU%6zJJuxvtQZlQD;iKRcHsC~3b
zTduu$Q==Sh0(4#^95zKd6qUQp%V1na6t`h)dUXi|LQ2B(d`0;()$^H^OzAOJg0x0z
zytDLKyVs0~_)&^gN>{jHefhI?qnRi}GZxQ;cR4d|mbRpltFD_2m7-0KOs{tc9{m0>
zA$>J}s)6BJjL&L1$=PlBJ&i`w*u9yVnIy+1Serm=@3s{P??fwSqm{9xcMtA=b}5&I
zoS@6V{E#LbQ~7o_OnUTEwA(`m%$59@>Ac31cBxr*NBiDRObTJohJ2HnSEk4v5>sSJ
z8f$ypaXQ{Nhe6_~onT7SHcv1&_ZLbelBH8s`$tDwG*hDtBT$gUHsZJAZMD<X)pZ=f
z(NS~fl>-j_;fDE-uGmr=Q26bX%_<l0LFK~dZcpXRDse<)U_^mtQ%To66kc{Jz#&tP
ztJ(|N?3(&+h`z|a&*Sj`!=}&8&E3uGgC!3dv3X;(u^}HF#vYPsBqFQ-YnyixL1$i*
z0=)1v5OU7h8`=t%Of?XJ(yUx7*ND81_fo7y))opU<169H^U0X}){C@JQn#b3>e40e
zgQ4l}&Yb{O<u;V0&9KOvt;u*hox%Z}?l<~jX4I1dMJxz9+GIwymoE(x4{%gBcc5?k
z0@o4O(b<_3znD?U;UGI0uoOu^jn_TenM798hpEq=J^M6f%Q6H4K@;;%jP9T{C^>8=
z5mX-cx_L#L-5aMbzn)q|urgz+*I4}rEI0C#C5$=ao!e`kox7COW6q~G$a`sb<1oO@
zm#vd7;r75<AB}vc0KQ(hX}G0Bc)cXzX&qf=Zu`dBWH6@rdz#`XZiwL%UQG7-e02^H
zeFq-HzE!rh%&C3KN%8Kvm3z<YJ9jI)l`@a^_4%RYH%ELB797TA%uc^0#hxqjC&~?^
zOT6&#IB|g$cHt(VF5gu*F54)!a1pi-m>lh2GBz5+$GkJ&b#=@aG#xA~!UpzZ?U5P7
zsS^xU+6QJG$6zH8XuovzvSA;DaAgI&ME0}evXDek+H(!hyFV1N1Y!yz7cckwNm{VT
zjc9pkdX}hDo5$KBkCCwT_PTsnh#O)+kGz`*+{Q`6sf@iVIQnJ<3ZmHvLXHVc>_3DU
zK3jYbKBH-}sg%s5#xJ%OJMgJ`YC1uGzzy+dfME~Z87ypr0A)WO8VZT#MHcN{I0tK6
zYCP5|Q?Qg;op=w7(-KC*C)l(IQ~dVStY{5|O&}>vky<9>PTEHpqT|pd%`a$AZfFTV
z0;4+}2?t2pnEoWa4Z}1ZKT};@z0*nJK9kKMb<F^@a;=sb=hjX51r|n68)NB#)TvLv
zT=Gs#f?osa?~&4S^yXJPII#-Nz21Jsy1(WIgJBJlO(6X4hHWR;FRT6)^K?t96zvr;
z)}N%xE&y=_s#-|fy<x6Je1p1sgRC(94TB3;V)3!)%$MYiOXg8Rd>|ZjPuCClAN`HK
z4XF+Aa0D;#jfmBws5U9GpM+idS71ld#|Yz1>S--qDtvnX(D~cw64kV=4lD8!&VYy&
zQBR;^G1yG^hYC++Y*UN(Gv)izg+aSHt7TQ@T2&29<19yyMT}^*qfzw0NSfw8i#xX(
zmf!qP1}?f52p2%%yXR)x>7E1Tn$l&<GJAW_!UWhVl*)y~ELi4JOMPK3wP%W7y%%5O
zIyyQAO$dhA_962+b(=iI=&+a;k=5l@+?LGN#EAX=AHig*sxK`>wZ|+;o2GTqAna)9
zp65|)5Uq*0^Go6S2Zmo)5odb<J$L(@w{huG;`17UNG_q}Oj4w$s4Lh``i`KH`1ufY
zQL`(%(G1AS_Uj4DNi&HviHXC;`8P+;$B?jB`UtjrLJLy^+J=GF!zbytuKEHqDO+y$
zVa^Di_z^Tdm}s|$+kpsj+gY|X*8a_~h!14ys~FK2$4~L;m8?Nia%4vx9Zw%CbShAU
zDzc<y3_k1Ufs^OW%?B{RRn;g~jRT+Z2H;A?jHa{ER548Yd9BP!Po;GoT2YU&E1!pI
zy_=A%Yr6usTE>qLzHILiw}RHo=B7JBW}O1Y^!*GruCyE<;7M)FWlUD~wIVJUM31$%
zau;I6jjas}htI;ZS8ob(k6Vj>iW(S8*%^ix-mpCbC`pkP?lso1o5j=nR+$ngW;KP#
z!1v3^qEY58S00dIivh=XSVBwYBlz!m6WnE-2q$WB1$um|_G#IBC7QQi=2H%QC}~qc
z?hPGA%<CIY1A}09RcsNy>?M(S5hr<{r00SCzNggEd1V1;KPRX8$prlpSY-7St#je<
zs84Td^R%_XP#|_)+O5P(tWDcM?bs|F7ew=cS$jKhg*OkByRq{(YgNx2k72zJJ8#g2
z->_g_8{w_Ho-3iYu|T1U))uOsO>y$l`8hTV(MmJ1Pp`HtOrUm2NhP?};`FJUouP$`
z%ND^hzM57qJv|)+wT+rEJlhcoGH^0`5{nV*GRFdkUncb_c`wrhMccw{V9n}(gKFXU
zFQE!q{v8(SlMH_+oc_GUTl?1m&$t4_=p)AKL;pQs)pUugYD!w{ei=Wle`Ix|{vW5H
z)c%b))dn<ySE*<5YO|{9YWrR>cw|VafS(~@Jw<xg68*F1P5(!}sdco&2SMi`6T^zX
g|Jp;`K{XWm$L)wgi>xi+l+eE2F1yP9)Bo&00CW0MU;qFB

literal 0
HcmV?d00001

diff --git a/figures/lavacrossing_RGBImgPartialObsWrapper.png b/figures/lavacrossing_RGBImgPartialObsWrapper.png
new file mode 100644
index 0000000000000000000000000000000000000000..33279f9363367786c39c188d6a8a5f6e65ee4605
GIT binary patch
literal 6320
zcmcIodr(tn9=&SORmASp+4=@&?bdNiz-?9>ACTRx77-LhmqMaKY?JUzkbp=C3GPmH
zU8U4&LnAhsRUxD_M3E36AxW1V*lkj>nuLfzz#>EvA-wV+A+X=QanyDkXU7}=xHmI5
zck;{kedl-1`JHnI_D2UTS-k{;prDvNyAmL1K_GnnVj*}kuPU$^d~D0xos@ToM$RiB
z=8~X&#Jrp=T3%NA@%1N3x%6}z6&n^2h7DbxmY0`9-yR;G{q%(}S}rAAYggF8k1WpF
zlT3%8;D5r`JnDb*qabMIx|m&mJbdcvNSB!UPCBRU0WI<L-@m%Cba}@c5vxD{c0A)!
zm@}x}eNyf`aJ?m<HSs~(Wz6lkfSBX>z$@7;wd2lBvz6YRGoJ+SepJH$k=CbV&m9A$
zX8ChTMe`siwc=m%A?TOq0w8GpTVPdxc;{^hD%leXUb&bAZoj?-Zo7_yH~;nuFIuW*
zzhlo%@(tK>HOaEYwRO~HtD(pRB(K{l&J?S5%uJiXmqu&8d$&s{Na;YHIXrzelpbAO
z&)rg&ZSL#G0%r0n{*z`Qdn~u<u=eVHfzX(`(6oH!5u+thdsS;*6>+-T%u6kL<)21&
zBR?}7g6M17++1_Ho7<{jRgt8L-dYw}XjWw>SvY&U9FdZqgiY$+Ln*k4cT>>sZL?$r
zRMIq}&}-Lv++5<f{B4h|6ujiIabm5jk<(n6&`=zYk$st4;e4n@bFkF}fR20D;AIhy
zkgE)?W{rlJf@^Iz<i=KJd3gBIYx&fXq6yL~A_KviUep3Xnrx<sKU`qPwkude)&n@5
zW&9s3@=CBO-c~75Ncb(O+o=adLkr?d%Xa#sps3HFUHY+Cqm#P1AE)4S-;k8>(G3>T
z*Z6-2SyGMWi{Ow>Q;D|P9@fB&=MQtxfez_axU)PSkC6Svs#EcsOJy>d<fHzh9hu>(
z9gPA)KPPGpI<WQXai^M&d_&nU(CITbFR(3U<+U8Ep>&uhmsY4o=<zor6{tAEhvOh{
z3X`#37+JbHLJjL9+-*I*KO!{WhcT+~^5CdY98!2G91S&HqmBeXsZ?1wf-x9c%RuiW
zVmy+ai=t{PL6z*jLuzVe&rsxa9U8iO2I{Ok@Uk|CVRaeIqvWBRV*Z1Hj3!@q7Tnn`
zb)c+d=>#M`6rt$UPRS4uPv%$i)9tPUIP3*G7Nf1`zNyE2SvOJy7v;zVZPCH#mgXzi
zX|qty++qz46TF&RVt%@zt`9!C33*dcY;VG*ZiRr;ySM2Ff6zkKK_!7qjwyRHlcu8R
z87l}1fi^D|CFp=0%+9hb{7--s#0`v&Qb&Gus(kW(;MlU!;kO$rwQc37b?_S15+Xkm
zX21$`>6R^Dop!!~Yd$76V9k?-XZH0wnl|1GM%kJ710hupd<xT*EErkoU7|lLYxv2Q
zZ8=Z*OM3YGT&LbxCjSqr6&;j1=0lsMy+8#3QNqwVTkMUi;dt@91tG=+P^G~pBuNvf
zlRR{sh6pKPY9;{`QETc~r<8MHVRJR&pLFNZ`VLWXJj&xow6j!f#{!8Sx@Xif*I6j|
z$S6Im265`ip`Q`Y_{C^+1)uW>Rv>V#Nw~==5mLE6qss;#U7@Mxj2{UI>C%SkJ`DEu
zjrOyWt^L@Hmh+fC9i{m<C_xA0fX&9)<KG8TP<5i9K<Zc`Z>1@XN`ckBz4hIh3q{QV
zX32_FBDx)WnPDSlm~`1=ywqdpN<2FhRrX56xi!pfhfS0t^N#KQ5T3VNO@CyKHu=Fn
zLDbe!AOp8(9x>w7x@T(=fzH(9&TsJp8<&(>rz^9mE6cg*zf**5Ex0H38V~!S4Z*gg
zSSjwK-RyNhCdD6gvkeSpS_SNKITVxY-y2Xu3Gf$`v&MZC=wpM|h?uZGM~Vc0^1aH-
zh!5k&qhl>(a6v>Lay7bp>*%qkm`O!{9dw}*_ZK>@bv)KKam#$$ShsC7-P3cIGJwu?
zSGX62m&e*Q!+BR2LFE3(@5*Y8nRd-IlSf?MSUkG8q1_*R4!~H*68c7mlbHq}>Kw+h
zXG8>yLr<jLzJcZsvz_e(Dh0R}Hr%jR0A`zGR<%o>*pn{N<!<>#bcjNxQ^ZhW7+_he
z+DK%4saUO9>#$>)M=(B>8VtjmI~6D^^&;BaW^{9bRiy*cP%N`!MZqD6EBeU*;S*rL
zG>fVS>6K3H&f*&WHn1iU-mwEFtmo#VI>IJtp+DpiV<}K;LWjqRBSEeUc-wKcbBas#
zOFIHvq*U_V8@a#|=^AA+LQ#NeQ0|y7Z`FmHmS>fswD>P^T~I0mon*XZSV<d<^pezw
z32N~2@TP;C)GY!9>)}3sRD9u>&w1#pm@maBVY}VsC}NyPxq0w>be@+2V0?nhO&qYk
zkNCXzY{Fu)(5avJ3@(yUXcEz5O^1F&#c)Wn^u$(<OITH46o-Qh@dv<Gwx2~34Pc5y
za)B~yI#MFPp73l}U-t|{D2!)!O@MVqgMcGa@;LGPhCUs^4}wb2+c@0hy92HF*TiXy
z?khUYErZUfJEl9@*Puwg3}6Py&}xm=r4?fwLCR|trb~}Q0=-&ry&2NoX<HnH(jPo!
z^VF!bpU#@gKAja9HUsigg2)SyA4Vpz052UIxro~OWB?iD>`fl!x!o7HqIXp#(`#3_
z`E$vS*!UNApZ`NPlUdn$tbZf^q{sM>5C<$B2)}@(+oA}=(t&P{oqz*Gc(RtIeNFKA
zVa`%{)*3;<&w{XXR~YB`KC_3+^V8e)+Ij}F8b~PcdgdQ~7GVx-C_~o`zT^r-7yY-V
zqX&AZcQI9lI)at%x=yU3CFOu%hNtlH0SIT-!H4}^kl_!f0P8n1g8lmrulWXH%r+$>
zkvUVYn~sId4@CvH28-NrQgpWe5fZQ~9B)C8%7L!C|DQD88!~rUy)%K4h5@k^6tjE(
KuG&AP{^ftlkNFq?

literal 0
HcmV?d00001

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index 4b629e0f8..e477878f6 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -11,6 +11,7 @@
 
 from minigrid.core.constants import COLOR_TO_IDX, OBJECT_TO_IDX, STATE_TO_IDX
 from minigrid.core.world_object import Goal
+from pdb import set_trace
 
 
 class ReseedWrapper(Wrapper):
@@ -201,7 +202,7 @@ class ImgObsWrapper(ObservationWrapper):
         >>> import gymnasium as gym
         >>> from minigrid.wrappers import ImgObsWrapper
         >>> env = gym.make("MiniGrid-Empty-5x5-v0")
-        >>> obs, _ = env.reset(seed=0)
+        >>> obs, _ = env.reset()
         >>> obs.keys()
         dict_keys(['image', 'direction', 'mission'])
         >>> env = ImgObsWrapper(env)
@@ -231,9 +232,9 @@ class OneHotPartialObsWrapper(ObservationWrapper):
     Example:
         >>> import miniworld
         >>> import gymnasium as gym
-        >>> from minigrid.wrappers import ImgObsWrapper
+        >>> from minigrid.wrappers import OneHotPartialObsWrapper
         >>> env = gym.make("MiniGrid-Empty-5x5-v0")
-        >>> obs, _ = env.reset(seed=0)
+        >>> obs, _ = env.reset()
         >>> obs["image"][0, :, :]
         array([[2, 5, 0],
                [2, 5, 0],
@@ -298,6 +299,20 @@ class RGBImgObsWrapper(ObservationWrapper):
     """
     Wrapper to use fully observable RGB image as observation,
     This can be used to have the agent to solve the gridworld in pixel space.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import RGBImgObsWrapper
+        >>> env = gym.make("MiniGrid-Empty-5x5-v0")
+        >>> obs, _ = env.reset()
+        >>> plt.imshow(obs['image'])
+        ![NoWrapper](../figures/lavacrossing_NoWrapper.png)
+        >>> env = RGBImgObsWrapper(env)
+        >>> obs, _ = env.reset()
+        >>> plt.imshow(obs['image'])
+        ![RGBImgObsWrapper](../figures/lavacrossing_RGBImgObsWrapper.png)
     """
 
     def __init__(self, env, tile_size=8):
@@ -326,6 +341,24 @@ class RGBImgPartialObsWrapper(ObservationWrapper):
     """
     Wrapper to use partially observable RGB image as observation.
     This can be used to have the agent to solve the gridworld in pixel space.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import RGBImgObsWrapper, RGBImgPartialObsWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> plt.imshow(obs["image"])
+        ![NoWrapper](../figures/lavacrossing_NoWrapper.png)
+        >>> env_obs = RGBImgObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> plt.imshow(obs["image"])
+        ![RGBImgObsWrapper](../figures/lavacrossing_RGBImgObsWrapper.png)
+        >>> env_obs = RGBImgPartialObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> plt.imshow(obs["image"])
+        ![RGBImgPartialObsWrapper](../figures/lavacrossing_RGBImgPartialObsWrapper.png)
     """
 
     def __init__(self, env, tile_size=8):
@@ -354,7 +387,21 @@ def observation(self, obs):
 
 class FullyObsWrapper(ObservationWrapper):
     """
-    Fully observable gridworld using a compact grid encoding
+    Fully observable gridworld using a compact grid encoding instead of the agent view.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import FullyObsWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs['image'].shape
+        (7, 7, 3)
+        >>> env_obs = FullyObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> obs['image'].shape
+        (11, 11, 3)
     """
 
     def __init__(self, env):
@@ -387,6 +434,20 @@ class DictObservationSpaceWrapper(ObservationWrapper):
     where the textual instructions are replaced by arrays representing the indices of each word in a fixed vocabulary.
 
     This wrapper is not applicable to BabyAI environments, given that these have their own language component.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import DictObservationSpaceWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs['mission']
+        'avoid the lava and get to the green goal square'
+        >>> env_obs = DictObservationSpaceWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> obs['mission'][:10]
+        [19, 31, 17, 36, 20, 38, 31, 2, 15, 35]
     """
 
     def __init__(self, env, max_words_in_mission=50, word_dict=None):
@@ -507,6 +568,17 @@ class FlatObsWrapper(ObservationWrapper):
     and combine these with observed images into one flat array.
 
     This wrapper is not applicable to BabyAI environments, given that these have their own language component.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import FlatObsWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> env_obs = FlatObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> obs.shape
+        (2835,)
     """
 
     def __init__(self, env, maxStrLen=96):
@@ -531,6 +603,8 @@ def observation(self, obs):
         image = obs["image"]
         mission = obs["mission"]
 
+        set_trace()
+
         # Cache the last-encoded mission string
         if mission != self.cachedStr:
             assert (
@@ -568,6 +642,20 @@ class ViewSizeWrapper(Wrapper):
     """
     Wrapper to customize the agent field of view size.
     This cannot be used with fully observable wrappers.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import ViewSizeWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs['image'].shape
+        (7, 7, 3)
+        >>> env_obs = ViewSizeWrapper(env, agent_view_size=5)
+        >>> obs, _ = env_obs.reset()
+        >>> obs['image'].shape
+        (5, 5, 3)
     """
 
     def __init__(self, env, agent_view_size=7):

From fd6285e9924491b376934a4d5fcb753d54d477bd Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 6 Feb 2023 09:52:39 -0500
Subject: [PATCH 12/32] passed pre-commit

---
 minigrid/wrappers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index e477878f6..7892f807b 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -3,6 +3,7 @@
 import math
 import operator
 from functools import reduce
+from pdb import set_trace
 
 import gymnasium as gym
 import numpy as np
@@ -11,7 +12,6 @@
 
 from minigrid.core.constants import COLOR_TO_IDX, OBJECT_TO_IDX, STATE_TO_IDX
 from minigrid.core.world_object import Goal
-from pdb import set_trace
 
 
 class ReseedWrapper(Wrapper):

From c1e82c76b7b7b9c283ccd1f794581d4f2bbcf81b Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 6 Feb 2023 11:20:31 -0500
Subject: [PATCH 13/32] updated isort to 5.12.0

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ef8e5dbb5..93bb2434b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,7 +22,7 @@ repos:
           - --show-source
           - --statistics
   - repo: https://github.com/PyCQA/isort
-    rev: 5.10.1
+    rev: 5.12.0
     hooks:
       - id: isort
         args: ["--profile", "black"]

From 798b2fbf5dead3884cb8572db82da1138acbde65 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 6 Feb 2023 11:28:06 -0500
Subject: [PATCH 14/32] removed set_trace

---
 minigrid/wrappers.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index 7892f807b..1dd588a05 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -3,7 +3,6 @@
 import math
 import operator
 from functools import reduce
-from pdb import set_trace
 
 import gymnasium as gym
 import numpy as np
@@ -603,8 +602,6 @@ def observation(self, obs):
         image = obs["image"]
         mission = obs["mission"]
 
-        set_trace()
-
         # Cache the last-encoded mission string
         if mission != self.cachedStr:
             assert (

From 6a141aff9ab3b45c3d7843c1793decc440fffa70 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 6 Feb 2023 11:37:28 -0500
Subject: [PATCH 15/32] fixed generate website test error

---
 .github/workflows/gh-pages.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
index 26af48d70..de9b83875 100644
--- a/.github/workflows/gh-pages.yml
+++ b/.github/workflows/gh-pages.yml
@@ -14,6 +14,8 @@ jobs:
     steps:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
+        with:
+            python-version: '3.9'
 
       - name: Install dependencies
         run: pip install -r docs/requirements.txt

From 86f929886f6a0de9fe9063559eedf0271a0cbc32 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 6 Feb 2023 11:40:56 -0500
Subject: [PATCH 16/32] fixed generate website test error for non-str entry
 points

---
 docs/scripts/gen_mds.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/docs/scripts/gen_mds.py b/docs/scripts/gen_mds.py
index 6ed91ed5b..55d0569f0 100644
--- a/docs/scripts/gen_mds.py
+++ b/docs/scripts/gen_mds.py
@@ -32,19 +32,20 @@
 
 # Obtain filtered list
 for env_spec in tqdm(all_envs):
-    # minigrid.envs:Env
-    split = env_spec.entry_point.split(".")
-    # ignore gymnasium.envs.env_type:Env
-    env_module = split[0]
-
-    if len(split) > 2 and "babyai" in split[2]:
-        curr_babyai_env = split[2]
-        babyai_env_name = curr_babyai_env.split(":")[1]
-        babyai_envs[babyai_env_name] = env_spec
-    elif env_module == "minigrid":
-        env_name = split[1]
-        filtered_envs_by_type[env_name] = env_spec
-    # if env_module != "minigrid":
+    if isinstance(env_spec.entry_point, str):
+        # minigrid.envs:Env
+        split = env_spec.entry_point.split(".")
+        # ignore gymnasium.envs.env_type:Env
+        env_module = split[0]
+
+        if len(split) > 2 and "babyai" in split[2]:
+            curr_babyai_env = split[2]
+            babyai_env_name = curr_babyai_env.split(":")[1]
+            babyai_envs[babyai_env_name] = env_spec
+        elif env_module == "minigrid":
+            env_name = split[1]
+            filtered_envs_by_type[env_name] = env_spec
+        # if env_module != "minigrid":
     else:
         continue
 

From 244430837da65668d241a6678b5c1d6cef3cc553 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 6 Feb 2023 11:43:02 -0500
Subject: [PATCH 17/32] fixed generate website test error for non-str entry
 points in gen_envs_display

---
 docs/scripts/gen_envs_display.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/docs/scripts/gen_envs_display.py b/docs/scripts/gen_envs_display.py
index 9d0c76067..3deacea06 100644
--- a/docs/scripts/gen_envs_display.py
+++ b/docs/scripts/gen_envs_display.py
@@ -56,19 +56,22 @@ def generate_page(env, limit=-1, base_path=""):
     type_dict = {}
 
     for env_spec in gymnasium.envs.registry.values():
-        # minigrid.envs:Env or minigrid.envs.babyai:Env
-        split = env_spec.entry_point.split(".")
-        # ignore minigrid.envs.env_type:Env
-        env_module = split[0]
-        env_name = split[-1].split(":")[-1]
-        env_type = env_module if len(split) == 2 else split[-1].split(":")[0]
+        if isinstance(env_spec.entry_point, str):
+            # minigrid.envs:Env or minigrid.envs.babyai:Env
+            split = env_spec.entry_point.split(".")
+            # ignore minigrid.envs.env_type:Env
+            env_module = split[0]
+            env_name = split[-1].split(":")[-1]
+            env_type = env_module if len(split) == 2 else split[-1].split(":")[0]
 
-        if env_module == "minigrid":
-            if env_type not in type_dict.keys():
-                type_dict[env_type] = []
+            if env_module == "minigrid":
+                if env_type not in type_dict.keys():
+                    type_dict[env_type] = []
 
-            if env_name not in type_dict[env_type]:
-                type_dict[env_type].append(env_name)
+                if env_name not in type_dict[env_type]:
+                    type_dict[env_type].append(env_name)
+        else:
+            continue
 
     for key, value in type_dict.items():
         env_type = key

From 5e90c82f2d695122e3545229b692b56963846ef6 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 19 Feb 2023 16:16:08 -0500
Subject: [PATCH 18/32] fixed bug in reset() of DirectionObsWrapper and added
 documentation

---
 minigrid/wrappers.py | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index 5f777820a..5d9a7f397 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -3,6 +3,7 @@
 import math
 import operator
 from functools import reduce
+from pdb import set_trace
 
 import gymnasium as gym
 import numpy as np
@@ -688,6 +689,17 @@ class DirectionObsWrapper(ObservationWrapper):
     """
     Provides the slope/angular direction to the goal with the observations as modeled by (y2 - y2 )/( x2 - x1)
     type = {slope , angle}
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import DirectionObsWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> env_obs = DirectionObsWrapper(env, type="slope")
+        >>> obs, _ = env_obs.reset()
+        >>> obs['goal_direction']
+        1.0
     """
 
     def __init__(self, env, type="slope"):
@@ -696,7 +708,8 @@ def __init__(self, env, type="slope"):
         self.type = type
 
     def reset(self):
-        obs = self.env.reset()
+        obs, _ = self.env.reset()
+
         if not self.goal_position:
             self.goal_position = [
                 x for x, y in enumerate(self.grid.grid) if isinstance(y, Goal)
@@ -707,6 +720,7 @@ def reset(self):
                     int(self.goal_position[0] / self.height),
                     self.goal_position[0] % self.width,
                 )
+
         return obs
 
     def observation(self, obs):
@@ -714,7 +728,12 @@ def observation(self, obs):
             self.goal_position[1] - self.agent_pos[1],
             self.goal_position[0] - self.agent_pos[0],
         )
-        obs["goal_direction"] = np.arctan(slope) if self.type == "angle" else slope
+
+        if self.type == "angle":
+            obs["goal_direction"] = np.arctan(slope)
+        else:
+            obs["goal_direction"] = slope
+
         return obs
 
 
@@ -723,6 +742,20 @@ class SymbolicObsWrapper(ObservationWrapper):
     Fully observable grid with a symbolic state representation.
     The symbol is a triple of (X, Y, IDX), where X and Y are
     the coordinates on the grid, and IDX is the id of the object.
+
+    Example:
+        >>> import miniworld
+        >>> import gymnasium as gym
+        >>> import matplotlib.pyplot as plt
+        >>> from minigrid.wrappers import SymbolicObsWrapper
+        >>> env = gym.make("MiniGrid-LavaCrossingS11N5-v0")
+        >>> obs, _ = env.reset()
+        >>> obs['image'].shape
+        (7, 7, 3)
+        >>> env_obs = SymbolicObsWrapper(env)
+        >>> obs, _ = env_obs.reset()
+        >>> obs['image'].shape
+        (11, 11, 3)
     """
 
     def __init__(self, env):
@@ -749,4 +782,5 @@ def observation(self, obs):
         grid = np.transpose(grid, (1, 2, 0))
         grid[agent_pos[0], agent_pos[1], 2] = OBJECT_TO_IDX["agent"]
         obs["image"] = grid
+
         return obs

From a24b578fec327589f1150325fc49b2ed9bc47efe Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 19 Feb 2023 16:19:31 -0500
Subject: [PATCH 19/32] deleted unused import pdb

---
 minigrid/wrappers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index 5d9a7f397..52ca133ba 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -3,7 +3,6 @@
 import math
 import operator
 from functools import reduce
-from pdb import set_trace
 
 import gymnasium as gym
 import numpy as np

From 6beb137266225907e0de84c5577e9540847fab7d Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 19 Feb 2023 16:24:00 -0500
Subject: [PATCH 20/32] renamed StateBonus to PositionBonus

---
 docs/api/wrappers.md   | 4 ++--
 minigrid/wrappers.py   | 7 +++----
 tests/test_wrappers.py | 4 ++--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/docs/api/wrappers.md b/docs/api/wrappers.md
index 0da2c749d..e188d86e7 100644
--- a/docs/api/wrappers.md
+++ b/docs/api/wrappers.md
@@ -58,10 +58,10 @@ lastpage:
 .. autoclass:: minigrid.wrappers.RGBImgObsWrapper
 ```
 
-# State Bonus
+# Position Bonus
 
 ```{eval-rst}
-.. autoclass:: minigrid.wrappers.StateBonus
+.. autoclass:: minigrid.wrappers.PositionBonus
 ```
 
 # Symbolic Obs
diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index 52ca133ba..d4c9a550f 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -127,8 +127,7 @@ def reset(self, **kwargs):
         return self.env.reset(**kwargs)
 
 
-# Should be named PositionBonus
-class StateBonus(Wrapper):
+class PositionBonus(Wrapper):
     """
     Adds an exploration bonus based on which positions
     are visited on the grid.
@@ -136,7 +135,7 @@ class StateBonus(Wrapper):
     Example:
         >>> import miniworld
         >>> import gymnasium as gym
-        >>> from minigrid.wrappers import StateBonus
+        >>> from minigrid.wrappers import PositionBonus
         >>> env = gym.make("MiniGrid-Empty-5x5-v0")
         >>> _, _ = env.reset(seed=0)
         >>> _, reward, _, _, _ = env.step(1)
@@ -145,7 +144,7 @@ class StateBonus(Wrapper):
         >>> _, reward, _, _, _ = env.step(1)
         >>> print(reward)
         0
-        >>> env_bonus = StateBonus(env)
+        >>> env_bonus = PositionBonus(env)
         >>> obs, _ = env_bonus.reset(seed=0)
         >>> obs, reward, terminated, truncated, info = env_bonus.step(1)
         >>> print(reward)
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
index b2183c4f4..0e36cd7f3 100644
--- a/tests/test_wrappers.py
+++ b/tests/test_wrappers.py
@@ -18,7 +18,7 @@
     ReseedWrapper,
     RGBImgObsWrapper,
     RGBImgPartialObsWrapper,
-    StateBonus,
+    PositionBonus,
     ViewSizeWrapper,
 )
 from tests.utils import all_testing_env_specs, assert_equals, minigrid_testing_env_specs
@@ -79,7 +79,7 @@ def test_reseed_wrapper(env_spec):
 @pytest.mark.parametrize("env_id", ["MiniGrid-Empty-16x16-v0"])
 def test_state_bonus_wrapper(env_id):
     env = gym.make(env_id)
-    wrapped_env = StateBonus(gym.make(env_id))
+    wrapped_env = PositionBonus(gym.make(env_id))
 
     action_forward = Actions.forward
     action_left = Actions.left

From 2e8f4a364158359a714f9bca8adf9bc6117afd62 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 19 Feb 2023 16:29:01 -0500
Subject: [PATCH 21/32] renamed state bonus test to position bonus

---
 tests/test_wrappers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
index 0e36cd7f3..8755e06bf 100644
--- a/tests/test_wrappers.py
+++ b/tests/test_wrappers.py
@@ -15,10 +15,10 @@
     FullyObsWrapper,
     ImgObsWrapper,
     OneHotPartialObsWrapper,
+    PositionBonus,
     ReseedWrapper,
     RGBImgObsWrapper,
     RGBImgPartialObsWrapper,
-    PositionBonus,
     ViewSizeWrapper,
 )
 from tests.utils import all_testing_env_specs, assert_equals, minigrid_testing_env_specs
@@ -77,7 +77,7 @@ def test_reseed_wrapper(env_spec):
 
 
 @pytest.mark.parametrize("env_id", ["MiniGrid-Empty-16x16-v0"])
-def test_state_bonus_wrapper(env_id):
+def test_position_bonus_wrapper(env_id):
     env = gym.make(env_id)
     wrapped_env = PositionBonus(gym.make(env_id))
 

From abb1281d86a34f6e0128732ff7ecb9b9a66b9b6a Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 19 Feb 2023 16:55:18 -0500
Subject: [PATCH 22/32] added tests to directionobs and symbolicobs

---
 minigrid/wrappers.py   |  2 +-
 tests/test_wrappers.py | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index d4c9a550f..f7252c207 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -719,7 +719,7 @@ def reset(self):
                     self.goal_position[0] % self.width,
                 )
 
-        return obs
+        return self.observation(obs)
 
     def observation(self, obs):
         slope = np.divide(
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
index 8755e06bf..839dc2684 100644
--- a/tests/test_wrappers.py
+++ b/tests/test_wrappers.py
@@ -11,6 +11,7 @@
 from minigrid.wrappers import (
     ActionBonus,
     DictObservationSpaceWrapper,
+    DirectionObsWrapper,
     FlatObsWrapper,
     FullyObsWrapper,
     ImgObsWrapper,
@@ -19,6 +20,7 @@
     ReseedWrapper,
     RGBImgObsWrapper,
     RGBImgPartialObsWrapper,
+    SymbolicObsWrapper,
     ViewSizeWrapper,
 )
 from tests.utils import all_testing_env_specs, assert_equals, minigrid_testing_env_specs
@@ -260,3 +262,43 @@ def test_viewsize_wrapper(view_size):
     obs, _, _, _, _ = env.step(0)
     assert obs["image"].shape == (view_size, view_size, 3)
     env.close()
+
+
+@pytest.mark.parametrize("env_id", ["MiniGrid-LavaCrossingS11N5-v0"])
+@pytest.mark.parametrize("type", ["slope", "angle"])
+def test_direction_obs_wrapper(env_id, type):
+    env = gym.make(env_id)
+    env = DirectionObsWrapper(env, type=type)
+    obs = env.reset()
+
+    slope = np.divide(
+        env.goal_position[1] - env.agent_pos[1],
+        env.goal_position[0] - env.agent_pos[0],
+    )
+    if type == "slope":
+        assert obs["goal_direction"] == slope
+    elif type == "angle":
+        assert obs["goal_direction"] == np.arctan(slope)
+
+    obs, _, _, _, _ = env.step(0)
+    slope = np.divide(
+        env.goal_position[1] - env.agent_pos[1],
+        env.goal_position[0] - env.agent_pos[0],
+    )
+    if type == "slope":
+        assert obs["goal_direction"] == slope
+    elif type == "angle":
+        assert obs["goal_direction"] == np.arctan(slope)
+
+    env.close()
+
+
+@pytest.mark.parametrize("env_id", ["MiniGrid-Empty-16x16-v0"])
+def test_symbolic_obs_wrapper(env_id):
+    env = gym.make(env_id)
+    env = SymbolicObsWrapper(env)
+    obs, _ = env.reset()
+    assert obs["image"].shape == (env.width, env.height, 3)
+    obs, _, _, _, _ = env.step(0)
+    assert obs["image"].shape == (env.width, env.height, 3)
+    env.close()

From 912afc47db5eeafb130ba6129a91e43f61797857 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 20 Feb 2023 09:11:55 -0500
Subject: [PATCH 23/32] updated doc for PositionBonus

---
 minigrid/wrappers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index f7252c207..55686508c 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -130,7 +130,7 @@ def reset(self, **kwargs):
 class PositionBonus(Wrapper):
     """
     Adds an exploration bonus based on which positions
-    are visited on the grid.
+    are visited on the grid. Was previously called StateBonus.
 
     Example:
         >>> import miniworld

From 73575216395799f022206bf83354bb8222646130 Mon Sep 17 00:00:00 2001
From: Mark Towers <mark.m.towers@gmail.com>
Date: Mon, 20 Feb 2023 15:05:23 +0000
Subject: [PATCH 24/32] Update wrappers.py

---
 minigrid/wrappers.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index 55686508c..298fd75f4 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -130,7 +130,10 @@ def reset(self, **kwargs):
 class PositionBonus(Wrapper):
     """
     Adds an exploration bonus based on which positions
-    are visited on the grid. Was previously called StateBonus.
+    are visited on the grid. 
+    
+    Note: 
+        This wrapper was previously called ``StateBonus``.
 
     Example:
         >>> import miniworld

From 38b552e3815ca1ec9b05c431fb438345cebf26a0 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Fri, 24 Feb 2023 14:06:34 -0500
Subject: [PATCH 25/32] ran pre-commit

---
 minigrid/wrappers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index 298fd75f4..137790fce 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -130,9 +130,9 @@ def reset(self, **kwargs):
 class PositionBonus(Wrapper):
     """
     Adds an exploration bonus based on which positions
-    are visited on the grid. 
-    
-    Note: 
+    are visited on the grid.
+
+    Note:
         This wrapper was previously called ``StateBonus``.
 
     Example:

From 7e30a98e038009dc9db2fefd7722b771c6ae21ef Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 12 Mar 2023 18:40:26 -0400
Subject: [PATCH 26/32] removed used actions

---
 minigrid/manual_control.py | 17 +++++++++--------
 minigrid/minigrid_env.py   | 34 ++++++++++++++++++----------------
 2 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/minigrid/manual_control.py b/minigrid/manual_control.py
index 852952bcc..e07351b18 100755
--- a/minigrid/manual_control.py
+++ b/minigrid/manual_control.py
@@ -7,6 +7,7 @@
 from minigrid.minigrid_env import MiniGridEnv
 from minigrid.utils.window import Window
 from minigrid.wrappers import ImgObsWrapper, RGBImgPartialObsWrapper
+from minigrid.core.actions import Actions
 
 
 class ManualControl:
@@ -31,7 +32,7 @@ def start(self):
         self.reset(self.seed)
         self.window.show(block=True)
 
-    def step(self, action: MiniGridEnv.Actions):
+    def step(self, action: Actions):
         _, reward, terminated, truncated, _ = self.env.step(action)
         print(f"step={self.env.step_count}, reward={reward:.2f}")
 
@@ -69,13 +70,13 @@ def key_handler(self, event):
             return
 
         key_to_action = {
-            "left": MiniGridEnv.Actions.left,
-            "right": MiniGridEnv.Actions.right,
-            "up": MiniGridEnv.Actions.forward,
-            " ": MiniGridEnv.Actions.toggle,
-            "pageup": MiniGridEnv.Actions.pickup,
-            "pagedown": MiniGridEnv.Actions.drop,
-            "enter": MiniGridEnv.Actions.done,
+            "left": Actions.left,
+            "right": Actions.right,
+            "up": Actions.forward,
+            " ": Actions.toggle,
+            "pageup": Actions.pickup,
+            "pagedown": Actions.drop,
+            "enter": Actions.done,
         }
 
         action = key_to_action[key]
diff --git a/minigrid/minigrid_env.py b/minigrid/minigrid_env.py
index 6014eb0d8..1b025841d 100755
--- a/minigrid/minigrid_env.py
+++ b/minigrid/minigrid_env.py
@@ -10,6 +10,7 @@
 import numpy as np
 from gymnasium import spaces
 
+from minigrid.core.actions import Actions
 from minigrid.core.constants import COLOR_NAMES, DIR_TO_VEC, TILE_PIXELS
 from minigrid.core.grid import Grid
 from minigrid.core.mission import MissionSpace
@@ -29,21 +30,21 @@ class MiniGridEnv(gym.Env):
         "render_fps": 10,
     }
 
-    # Enumeration of possible actions
-    class Actions(IntEnum):
-        # Turn left, turn right, move forward
-        left = 0
-        right = 1
-        forward = 2
-        # Pick up an object
-        pickup = 3
-        # Drop an object
-        drop = 4
-        # Toggle/activate an object
-        toggle = 5
-
-        # Done completing task
-        done = 6
+    # # Enumeration of possible actions
+    # class Actions(IntEnum):
+    #     # Turn left, turn right, move forward
+    #     left = 0
+    #     right = 1
+    #     forward = 2
+    #     # Pick up an object
+    #     pickup = 3
+    #     # Drop an object
+    #     drop = 4
+    #     # Toggle/activate an object
+    #     toggle = 5
+
+    #     # Done completing task
+    #     done = 6
 
     def __init__(
         self,
@@ -70,7 +71,8 @@ def __init__(
         assert width is not None and height is not None
 
         # Action enumeration for this environment
-        self.actions = MiniGridEnv.Actions
+        # self.actions = MiniGridEnv.Actions
+        self.actions = Actions
 
         # Actions are discrete integer values
         self.action_space = spaces.Discrete(len(self.actions))

From 3ce7b17c88e8c565bfa0d1df8ee7a45191256854 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sun, 12 Mar 2023 18:43:52 -0400
Subject: [PATCH 27/32] removed Actions in minigrid_env in favor or
 minigrid.core.actions.Actions

---
 minigrid/manual_control.py |  2 +-
 minigrid/minigrid_env.py   | 18 ------------------
 2 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/minigrid/manual_control.py b/minigrid/manual_control.py
index e07351b18..f77616001 100755
--- a/minigrid/manual_control.py
+++ b/minigrid/manual_control.py
@@ -4,10 +4,10 @@
 
 import gymnasium as gym
 
+from minigrid.core.actions import Actions
 from minigrid.minigrid_env import MiniGridEnv
 from minigrid.utils.window import Window
 from minigrid.wrappers import ImgObsWrapper, RGBImgPartialObsWrapper
-from minigrid.core.actions import Actions
 
 
 class ManualControl:
diff --git a/minigrid/minigrid_env.py b/minigrid/minigrid_env.py
index 1b025841d..13b676fa0 100755
--- a/minigrid/minigrid_env.py
+++ b/minigrid/minigrid_env.py
@@ -3,7 +3,6 @@
 import hashlib
 import math
 from abc import abstractmethod
-from enum import IntEnum
 from typing import Iterable, TypeVar
 
 import gymnasium as gym
@@ -30,22 +29,6 @@ class MiniGridEnv(gym.Env):
         "render_fps": 10,
     }
 
-    # # Enumeration of possible actions
-    # class Actions(IntEnum):
-    #     # Turn left, turn right, move forward
-    #     left = 0
-    #     right = 1
-    #     forward = 2
-    #     # Pick up an object
-    #     pickup = 3
-    #     # Drop an object
-    #     drop = 4
-    #     # Toggle/activate an object
-    #     toggle = 5
-
-    #     # Done completing task
-    #     done = 6
-
     def __init__(
         self,
         mission_space: MissionSpace,
@@ -71,7 +54,6 @@ def __init__(
         assert width is not None and height is not None
 
         # Action enumeration for this environment
-        # self.actions = MiniGridEnv.Actions
         self.actions = Actions
 
         # Actions are discrete integer values

From c601b710f3765f75033e823df1b245c01f49c2e3 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 13 Mar 2023 11:51:32 -0400
Subject: [PATCH 28/32] fixed bug in SymbolicObsWrapper

---
 minigrid/wrappers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index 137790fce..9d3286f99 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -777,9 +777,9 @@ def observation(self, obs):
             [OBJECT_TO_IDX[o.type] if o is not None else -1 for o in self.grid.grid]
         )
         agent_pos = self.env.agent_pos
-        w, h = self.width, self.height
-        grid = np.mgrid[:w, :h]
-        grid = np.concatenate([grid, objects.reshape(1, w, h)])
+        ncol, nrow = self.width, self.height
+        grid = np.mgrid[:nrow, :ncol]
+        grid = np.concatenate([grid, objects.reshape(1, nrow, ncol)])
         grid = np.transpose(grid, (1, 2, 0))
         grid[agent_pos[0], agent_pos[1], 2] = OBJECT_TO_IDX["agent"]
         obs["image"] = grid

From 0b0eec0b6b9a08d5e390aebbc62d1fe09e418ecc Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 13 Mar 2023 20:51:52 -0400
Subject: [PATCH 29/32] added test for SymbolicObsWrapper

---
 minigrid/wrappers.py   | 30 ++++++++++++++++--------------
 tests/test_wrappers.py | 32 +++++++++++++++++++++++++++++---
 2 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
index 9d3286f99..321947918 100644
--- a/minigrid/wrappers.py
+++ b/minigrid/wrappers.py
@@ -71,7 +71,7 @@ class ActionBonus(gym.Wrapper):
     visited (state,action) pairs.
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> from minigrid.wrappers import ActionBonus
         >>> env = gym.make("MiniGrid-Empty-5x5-v0")
@@ -136,7 +136,7 @@ class PositionBonus(Wrapper):
         This wrapper was previously called ``StateBonus``.
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> from minigrid.wrappers import PositionBonus
         >>> env = gym.make("MiniGrid-Empty-5x5-v0")
@@ -199,7 +199,7 @@ class ImgObsWrapper(ObservationWrapper):
     Use the image as the only observation output, no language/mission.
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> from minigrid.wrappers import ImgObsWrapper
         >>> env = gym.make("MiniGrid-Empty-5x5-v0")
@@ -231,7 +231,7 @@ class OneHotPartialObsWrapper(ObservationWrapper):
     agent view as observation.
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> from minigrid.wrappers import OneHotPartialObsWrapper
         >>> env = gym.make("MiniGrid-Empty-5x5-v0")
@@ -302,7 +302,7 @@ class RGBImgObsWrapper(ObservationWrapper):
     This can be used to have the agent to solve the gridworld in pixel space.
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> import matplotlib.pyplot as plt
         >>> from minigrid.wrappers import RGBImgObsWrapper
@@ -344,7 +344,7 @@ class RGBImgPartialObsWrapper(ObservationWrapper):
     This can be used to have the agent to solve the gridworld in pixel space.
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> import matplotlib.pyplot as plt
         >>> from minigrid.wrappers import RGBImgObsWrapper, RGBImgPartialObsWrapper
@@ -391,7 +391,7 @@ class FullyObsWrapper(ObservationWrapper):
     Fully observable gridworld using a compact grid encoding instead of the agent view.
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> import matplotlib.pyplot as plt
         >>> from minigrid.wrappers import FullyObsWrapper
@@ -437,7 +437,7 @@ class DictObservationSpaceWrapper(ObservationWrapper):
     This wrapper is not applicable to BabyAI environments, given that these have their own language component.
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> import matplotlib.pyplot as plt
         >>> from minigrid.wrappers import DictObservationSpaceWrapper
@@ -571,7 +571,7 @@ class FlatObsWrapper(ObservationWrapper):
     This wrapper is not applicable to BabyAI environments, given that these have their own language component.
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> import matplotlib.pyplot as plt
         >>> from minigrid.wrappers import FlatObsWrapper
@@ -643,7 +643,7 @@ class ViewSizeWrapper(ObservationWrapper):
     This cannot be used with fully observable wrappers.
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> import matplotlib.pyplot as plt
         >>> from minigrid.wrappers import ViewSizeWrapper
@@ -692,7 +692,7 @@ class DirectionObsWrapper(ObservationWrapper):
     type = {slope , angle}
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> import matplotlib.pyplot as plt
         >>> from minigrid.wrappers import DirectionObsWrapper
@@ -745,7 +745,7 @@ class SymbolicObsWrapper(ObservationWrapper):
     the coordinates on the grid, and IDX is the id of the object.
 
     Example:
-        >>> import miniworld
+        >>> import minigrid
         >>> import gymnasium as gym
         >>> import matplotlib.pyplot as plt
         >>> from minigrid.wrappers import SymbolicObsWrapper
@@ -778,8 +778,10 @@ def observation(self, obs):
         )
         agent_pos = self.env.agent_pos
         ncol, nrow = self.width, self.height
-        grid = np.mgrid[:nrow, :ncol]
-        grid = np.concatenate([grid, objects.reshape(1, nrow, ncol)])
+        grid = np.mgrid[:ncol, :nrow]
+        _objects = np.transpose(objects.reshape(1, nrow, ncol), (0, 2, 1))
+
+        grid = np.concatenate([grid, _objects])
         grid = np.transpose(grid, (1, 2, 0))
         grid[agent_pos[0], agent_pos[1], 2] = OBJECT_TO_IDX["agent"]
         obs["image"] = grid
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
index 839dc2684..fd1ebd2fa 100644
--- a/tests/test_wrappers.py
+++ b/tests/test_wrappers.py
@@ -5,8 +5,10 @@
 import gymnasium as gym
 import numpy as np
 import pytest
+from pdb import set_trace
 
 from minigrid.core.actions import Actions
+from minigrid.core.constants import OBJECT_TO_IDX
 from minigrid.envs import EmptyEnv
 from minigrid.wrappers import (
     ActionBonus,
@@ -293,12 +295,36 @@ def test_direction_obs_wrapper(env_id, type):
     env.close()
 
 
-@pytest.mark.parametrize("env_id", ["MiniGrid-Empty-16x16-v0"])
+@pytest.mark.parametrize("env_id", ["MiniGrid-DistShift1-v0"])
 def test_symbolic_obs_wrapper(env_id):
     env = gym.make(env_id)
+
     env = SymbolicObsWrapper(env)
-    obs, _ = env.reset()
+    obs, _ = env.reset(seed=123)
+    agent_pos = env.agent_pos
+    goal_pos = env.goal_pos
+
     assert obs["image"].shape == (env.width, env.height, 3)
-    obs, _, _, _, _ = env.step(0)
+    assert np.alltrue(
+        obs["image"][agent_pos[0], agent_pos[1], :]
+        == np.array([agent_pos[0], agent_pos[1], OBJECT_TO_IDX["agent"]])
+    )
+    assert np.alltrue(
+        obs["image"][goal_pos[0], goal_pos[1], :]
+        == np.array([goal_pos[0], goal_pos[1], OBJECT_TO_IDX["goal"]])
+    )
+
+    obs, _, _, _, _ = env.step(2)
+    agent_pos = env.agent_pos
+    goal_pos = env.goal_pos
+
     assert obs["image"].shape == (env.width, env.height, 3)
+    assert np.alltrue(
+        obs["image"][agent_pos[0], agent_pos[1], :]
+        == np.array([agent_pos[0], agent_pos[1], OBJECT_TO_IDX["agent"]])
+    )
+    assert np.alltrue(
+        obs["image"][goal_pos[0], goal_pos[1], :]
+        == np.array([goal_pos[0], goal_pos[1], OBJECT_TO_IDX["goal"]])
+    )
     env.close()

From b643bffb1ee934f457c0b9d95fd1f84ae0a9cc66 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Mon, 13 Mar 2023 20:55:09 -0400
Subject: [PATCH 30/32] passed pre-commit

---
 tests/test_wrappers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
index fd1ebd2fa..72d9bd6aa 100644
--- a/tests/test_wrappers.py
+++ b/tests/test_wrappers.py
@@ -5,7 +5,6 @@
 import gymnasium as gym
 import numpy as np
 import pytest
-from pdb import set_trace
 
 from minigrid.core.actions import Actions
 from minigrid.core.constants import OBJECT_TO_IDX

From 670fdbfad8784d1ae7662e44c82558fc56852e45 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Wed, 22 Mar 2023 09:17:13 -0400
Subject: [PATCH 31/32] updated version number to 2.2.0

---
 minigrid/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/minigrid/__init__.py b/minigrid/__init__.py
index 27759047d..22370084d 100644
--- a/minigrid/__init__.py
+++ b/minigrid/__init__.py
@@ -6,7 +6,7 @@
 from minigrid.core import roomgrid
 from minigrid.core.world_object import Wall
 
-__version__ = "2.1.1"
+__version__ = "2.2.0"
 
 
 try:

From 68f032cdddcc12512a40c78223f498d5116a1e56 Mon Sep 17 00:00:00 2001
From: Bolun <36321182+BolunDai0216@users.noreply.github.com>
Date: Sat, 25 Mar 2023 14:37:47 -0400
Subject: [PATCH 32/32] updated default screen size from 1 to 640

---
 minigrid/minigrid_env.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/minigrid/minigrid_env.py b/minigrid/minigrid_env.py
index 9786fb5e3..910435fc1 100755
--- a/minigrid/minigrid_env.py
+++ b/minigrid/minigrid_env.py
@@ -41,7 +41,7 @@ def __init__(
         see_through_walls: bool = False,
         agent_view_size: int = 7,
         render_mode: str | None = None,
-        screen_size: int | None = 1,
+        screen_size: int | None = 640,
         highlight: bool = True,
         tile_size: int = TILE_PIXELS,
         agent_pov: bool = False,
@@ -197,7 +197,6 @@ def __str__(self):
         output = ""
 
         for j in range(self.grid.height):
-
             for i in range(self.grid.width):
                 if i == self.agent_pos[0] and j == self.agent_pos[1]:
                     output += 2 * AGENT_DIR_TO_STR[self.agent_dir]
@@ -731,7 +730,6 @@ def get_frame(
             return self.get_full_render(highlight, tile_size)
 
     def render(self):
-
         img = self.get_frame(self.highlight, self.tile_size, self.agent_pov)
 
         if self.render_mode == "human":