Farama-Foundation · elliottower · Jul 6, 2023 · Jun 20, 2023 · Jun 21, 2023 · Jun 21, 2023
diff --git a/pettingzoo/classic/chess/chess.py b/pettingzoo/classic/chess/chess.py
@@ -18,7 +18,7 @@
 | Agents             | 2                                  |
 | Action Shape       | Discrete(4672)                     |
 | Action Values      | Discrete(4672)                     |
-| Observation Shape  | (8,8,20)                           |
+| Observation Shape  | (8,8,111)                          |
 | Observation Values | [0,1]                              |
 
 
@@ -28,7 +28,7 @@
 
 The observation is a dictionary which contains an `'observation'` element which is the usual RL observation described below, and an  `'action_mask'` which holds the legal moves, described in the Legal Actions Mask section.
 
-Like AlphaZero, the main observation space is an 8x8 image representing the board. It has 20 channels representing:
+Like AlphaZero, the main observation space is an 8x8 image representing the board. It has 111 channels representing:
 
 * Channels 0 - 3: Castling rights:
   * Channel 0: All ones if white can castle queenside
@@ -41,10 +41,13 @@
 * Channel 7 - 18: One channel for each piece type and player color combination. For example, there is a specific channel that represents black knights. An index of this channel is set to 1 if a black knight is in the corresponding spot on the game board, otherwise, it is set to 0. En passant
 possibilities are represented by displaying the vulnerable pawn on the 8th row instead of the 5th.
 * Channel 19: represents whether a position has been seen before (whether a position is a 2-fold repetition)
+* Channel 20 - 111 represents the previous 7 boards, with each board represented by 13 channels. The latest board occupies the first 13 channels, followed by the second latest board, and so on. These 13 channels correspond to channels 7 - 20.
 
-Like AlphaZero, the board is always oriented towards the current agent (the currant agent's king starts on the 1st row). In other words, the two players are looking at mirror images of the board, not the same board.
+Similar to AlphaZero, our observation space follows a stacking approach, where it accumulates the previous 8 board observations.
 
-Unlike AlphaZero, the observation space does not stack the observations previous moves by default. This can be accomplished using the `frame_stacking` argument of our wrapper.
+Unlike AlphaZero, where the board orientation may vary, in our system, the `env.board_history` always maintains the orientation towards the white agent, with the white agent's king consistently positioned on the 1st row. In simpler terms, both players are observing the same board layout.
+
+Nevertheless, we have incorporated a convenient feature, the env.observe('player_1') function, specifically for the black agent's orientation. This facilitates the training of agents capable of playing proficiently as both black and white.
 
 #### Legal Actions Mask
 
@@ -202,10 +205,21 @@ def action_space(self, agent):
         return self.action_spaces[agent]
 
     def observe(self, agent):
-        observation = chess_utils.get_observation(
-            self.board, self.possible_agents.index(agent)
-        )
+        current_index = self.possible_agents.index(agent)
+
+        observation = chess_utils.get_observation(self.board, current_index)
         observation = np.dstack((observation[:, :, :7], self.board_history))
+        # We need to swap the white 6 channels with black 6 channels
+        if current_index == 1:
+            # 1. Mirror the board
+            observation = np.flip(observation, axis=0)
+            # 2. Swap the white 6 channels with the black 6 channels
+            for i in range(1, 9):
+                tmp = observation[..., 13 * i - 6 : 13 * i].copy()
+                observation[..., 13 * i - 6 : 13 * i] = observation[
+                    ..., 13 * i : 13 * i + 6
+                ]
+                observation[..., 13 * i : 13 * i + 6] = tmp
         legal_moves = (
             chess_utils.legal_moves(self.board) if agent == self.agent_selection else []
         )
@@ -272,7 +286,8 @@ def step(self, action):
         self._accumulate_rewards()
 
         # Update board after applying action
-        next_board = chess_utils.get_observation(self.board, current_agent)
+        # We always take the perspective of the white agent
+        next_board = chess_utils.get_observation(self.board, player=0)
         self.board_history = np.dstack(
             (next_board[:, :, 7:], self.board_history[:, :, :-13])
         )

diff --git a/pettingzoo/classic/chess/chess_utils.py b/pettingzoo/classic/chess/chess_utils.py
@@ -8,7 +8,8 @@ def boards_to_ndarray(boards):
     bits = np.unpackbits(arr8)
     floats = bits.astype(bool)
     boardstack = floats.reshape([len(boards), 8, 8])
-    boardimage = np.transpose(boardstack, [1, 2, 0])
+    # We do np.flip() onto `boardstack` because the board is 180 degrees rotated after the process above.
+    boardimage = np.flip(np.transpose(boardstack, [1, 2, 0]), axis=[0, 1])
     return boardimage
 
 
@@ -135,7 +136,7 @@ def get_move_plane(move):
 actions_to_moves = {}
 
 
-def action_to_move(board, action, player):
+def action_to_move(board: chess.Board, action, player: int):
     base_move = chess.Move.from_uci(actions_to_moves[action])
 
     base_coord = square_to_coord(base_move.from_square)
@@ -164,7 +165,7 @@ def make_move_mapping(uci_move):
     actions_to_moves[cur_action] = uci_move
 
 
-def legal_moves(orig_board):
+def legal_moves(orig_board: chess.Board):
     """Returns legal moves.
 
     action space is a 8x8x73 dimensional array
@@ -194,7 +195,7 @@ def legal_moves(orig_board):
     return legal_moves
 
 
-def get_observation(orig_board, player):
+def get_observation(orig_board: chess.Board, player: int):
     """Returns observation array.
 
     Observation is an 8x8x(P + L) dimensional array.
@@ -281,8 +282,9 @@ def get_observation(orig_board, player):
 
         """
     base = BASE
-    OURS = 0
-    THEIRS = 1
+    # In the module `chess`, the color is represented by 1 for white and 0 for black.
+    OURS = 1
+    THEIRS = 0
     result[base + 0] = board.pieces(chess.PAWN, OURS)
     result[base + 1] = board.pieces(chess.KNIGHT, OURS)
     result[base + 2] = board.pieces(chess.BISHOP, OURS)
@@ -321,17 +323,31 @@ def get_observation(orig_board, player):
       }
     """
     # from 0-63
-    square = board.ep_square
+    # Adjust the row number for the white pawn to the 1st if the en passant flag is set, and vice versa for black pawns.
+    # For example
+    # If the white play an en passant move, the opponent can play a special move called en passant capture.
+    # To show this, we denote the pawn at (row, col) = (1, `dest_square`) instead of (5, `dest_square`).
+    square = board.ep_square  # square where the en passant happened (int)
     if square:
-        ours = square > 32
+        ours = (
+            square < 32
+        )  # Less than 32 is a white square, otherwise it's a black square
         row = square % 8
-        dest_col_add = 8 * 7 if ours else 0
+        dest_col_add = 0 if ours else 8 * 7
         dest_square = dest_col_add + row
         if ours:
-            result[base + 0].remove(square - 8)
-            result[base + 0].add(dest_square)
+            result[base + 0].remove(
+                square + 8
+            )  # Set the `square + 8` position in channel `base` to 0
+            result[base + 0].add(
+                dest_square
+            )  # Set the `dest_square` position in channel `base` to 1
         else:
-            result[base + 6].remove(square + 8)
-            result[base + 6].add(dest_square)
+            result[base + 6].remove(
+                square - 8
+            )  # Set the `square + 8` position in channel `base` to 0
+            result[base + 6].add(
+                dest_square
+            )  # Set the `dest_square` position in channel `base` to 1
 
     return boards_to_ndarray(result)