From cbf69417ea7bbc80fd30ac9f5580d912b45c571a Mon Sep 17 00:00:00 2001 From: jacob975 Date: Tue, 20 Jun 2023 23:42:26 +0800 Subject: [PATCH 1/7] Minor fix on the comments of chess_v6 --- pettingzoo/classic/chess/chess.py | 30 ++++++++++++++++++------- pettingzoo/classic/chess/chess_utils.py | 24 +++++++++++--------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/pettingzoo/classic/chess/chess.py b/pettingzoo/classic/chess/chess.py index 763c037a5..c84a6468d 100644 --- a/pettingzoo/classic/chess/chess.py +++ b/pettingzoo/classic/chess/chess.py @@ -18,7 +18,7 @@ | Agents | 2 | | Action Shape | Discrete(4672) | | Action Values | Discrete(4672) | -| Observation Shape | (8,8,20) | +| Observation Shape | (8,8,111) | | Observation Values | [0,1] | @@ -28,7 +28,7 @@ The observation is a dictionary which contains an `'observation'` element which is the usual RL observation described below, and an `'action_mask'` which holds the legal moves, described in the Legal Actions Mask section. -Like AlphaZero, the main observation space is an 8x8 image representing the board. It has 20 channels representing: +Like AlphaZero, the main observation space is an 8x8 image representing the board. It has 111 channels representing: * Channels 0 - 3: Castling rights: * Channel 0: All ones if white can castle queenside @@ -41,10 +41,13 @@ * Channel 7 - 18: One channel for each piece type and player color combination. For example, there is a specific channel that represents black knights. An index of this channel is set to 1 if a black knight is in the corresponding spot on the game board, otherwise, it is set to 0. En passant possibilities are represented by displaying the vulnerable pawn on the 8th row instead of the 5th. * Channel 19: represents whether a position has been seen before (whether a position is a 2-fold repetition) +* Channel 20 - 111 represents the previous 7 boards, with each board represented by 13 channels. The latest board occupies the first 13 channels, followed by the second latest board, and so on. These 13 channels correspond to channels 7 - 20. -Like AlphaZero, the board is always oriented towards the current agent (the currant agent's king starts on the 1st row). In other words, the two players are looking at mirror images of the board, not the same board. +Similar to AlphaZero, our observation space follows a stacking approach, where it accumulates the previous 8 board observations. -Unlike AlphaZero, the observation space does not stack the observations previous moves by default. This can be accomplished using the `frame_stacking` argument of our wrapper. +Unlike AlphaZero, where the board orientation may vary, in our system, the `env.board_history` always maintains the orientation towards the white agent, with the white agent's king consistently positioned on the 1st row. In simpler terms, both players are observing the same board layout. + +Nevertheless, we have incorporated a convenient feature, the env.observe('player_1') function, specifically for the black agent's orientation. This facilitates the training of agents capable of playing proficiently as both black and white. #### Legal Actions Mask @@ -202,10 +205,21 @@ def action_space(self, agent): return self.action_spaces[agent] def observe(self, agent): - observation = chess_utils.get_observation( - self.board, self.possible_agents.index(agent) - ) + agent_index = self.possible_agents.index(agent) + + observation = chess_utils.get_observation(self.board, agent_index) observation = np.dstack((observation[:, :, :7], self.board_history)) + # We need to swap the white 6 channels with black 6 channels + if agent_index == 1: + # 1. Mirror the board + observation = np.flip(observation, axis=0) + # 2. Swap the white 6 channels with the black 6 channels + for i in range(1, 9): + tmp = observation[..., 13 * i - 6 : 13 * i].copy() + observation[..., 13 * i - 6 : 13 * i] = observation[ + ..., 13 * i : 13 * i + 6 + ] + observation[..., 13 * i : 13 * i + 6] = tmp legal_moves = ( chess_utils.legal_moves(self.board) if agent == self.agent_selection else [] ) @@ -272,7 +286,7 @@ def step(self, action): self._accumulate_rewards() # Update board after applying action - next_board = chess_utils.get_observation(self.board, current_agent) + next_board = chess_utils.get_observation(self.board, 0) self.board_history = np.dstack( (next_board[:, :, 7:], self.board_history[:, :, :-13]) ) diff --git a/pettingzoo/classic/chess/chess_utils.py b/pettingzoo/classic/chess/chess_utils.py index 49dc1e3ac..2f104819c 100644 --- a/pettingzoo/classic/chess/chess_utils.py +++ b/pettingzoo/classic/chess/chess_utils.py @@ -8,8 +8,8 @@ def boards_to_ndarray(boards): bits = np.unpackbits(arr8) floats = bits.astype(bool) boardstack = floats.reshape([len(boards), 8, 8]) - boardimage = np.transpose(boardstack, [1, 2, 0]) - return boardimage + boardstack = np.flip(np.transpose(boardstack, [1, 2, 0]), axis=[0, 1]) + return boardstack def square_to_coord(s): @@ -194,7 +194,7 @@ def legal_moves(orig_board): return legal_moves -def get_observation(orig_board, player): +def get_observation(orig_board, player: int): """Returns observation array. Observation is an 8x8x(P + L) dimensional array. @@ -207,6 +207,9 @@ def get_observation(orig_board, player): else: board = board + # print("Chess black is: ", chess.BLACK) # 0 + # print("Chess white is: ", chess.WHITE) # 1 + all_squares = chess.SquareSet(chess.BB_ALL) HISTORY_LEN = 1 PLANES_PER_BOARD = 13 @@ -281,8 +284,8 @@ def get_observation(orig_board, player): """ base = BASE - OURS = 0 - THEIRS = 1 + OURS = 1 + THEIRS = 0 result[base + 0] = board.pieces(chess.PAWN, OURS) result[base + 1] = board.pieces(chess.KNIGHT, OURS) result[base + 2] = board.pieces(chess.BISHOP, OURS) @@ -321,17 +324,18 @@ def get_observation(orig_board, player): } """ # from 0-63 - square = board.ep_square + # Adjust the row number for the white pawn to the 8th if the en passant flag is set, and vice versa for black pawns. + square = board.ep_square # (int) where the en passant happened if square: - ours = square > 32 + ours = square < 32 row = square % 8 - dest_col_add = 8 * 7 if ours else 0 + dest_col_add = 0 if ours else 8 * 7 dest_square = dest_col_add + row if ours: - result[base + 0].remove(square - 8) + result[base + 0].remove(square + 8) result[base + 0].add(dest_square) else: - result[base + 6].remove(square + 8) + result[base + 6].remove(square - 8) result[base + 6].add(dest_square) return boards_to_ndarray(result) From cd2b699e7827b1edd075416128eeeac206613590 Mon Sep 17 00:00:00 2001 From: jacob975 Date: Wed, 21 Jun 2023 19:58:37 +0800 Subject: [PATCH 2/7] Minor fix --- pettingzoo/classic/chess/chess.py | 9 +++--- pettingzoo/classic/chess/chess_utils.py | 42 ++++++++++++++++--------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/pettingzoo/classic/chess/chess.py b/pettingzoo/classic/chess/chess.py index c84a6468d..b9a45b83a 100644 --- a/pettingzoo/classic/chess/chess.py +++ b/pettingzoo/classic/chess/chess.py @@ -205,12 +205,12 @@ def action_space(self, agent): return self.action_spaces[agent] def observe(self, agent): - agent_index = self.possible_agents.index(agent) + current_index = self.possible_agents.index(agent) - observation = chess_utils.get_observation(self.board, agent_index) + observation = chess_utils.get_observation(self.board, current_index) observation = np.dstack((observation[:, :, :7], self.board_history)) # We need to swap the white 6 channels with black 6 channels - if agent_index == 1: + if current_index == 1: # 1. Mirror the board observation = np.flip(observation, axis=0) # 2. Swap the white 6 channels with the black 6 channels @@ -286,7 +286,8 @@ def step(self, action): self._accumulate_rewards() # Update board after applying action - next_board = chess_utils.get_observation(self.board, 0) + # We always take the perspective of the white agent + next_board = chess_utils.get_observation(self.board, player=0) self.board_history = np.dstack( (next_board[:, :, 7:], self.board_history[:, :, :-13]) ) diff --git a/pettingzoo/classic/chess/chess_utils.py b/pettingzoo/classic/chess/chess_utils.py index 2f104819c..229f6d8cf 100644 --- a/pettingzoo/classic/chess/chess_utils.py +++ b/pettingzoo/classic/chess/chess_utils.py @@ -8,8 +8,9 @@ def boards_to_ndarray(boards): bits = np.unpackbits(arr8) floats = bits.astype(bool) boardstack = floats.reshape([len(boards), 8, 8]) - boardstack = np.flip(np.transpose(boardstack, [1, 2, 0]), axis=[0, 1]) - return boardstack + # We do np.flip() onto `boardstack` because the board is 180 degrees rotated after the process above. + boardimage = np.flip(np.transpose(boardstack, [1, 2, 0]), axis=[0, 1]) + return boardimage def square_to_coord(s): @@ -135,7 +136,7 @@ def get_move_plane(move): actions_to_moves = {} -def action_to_move(board, action, player): +def action_to_move(board: chess.Board, action, player: int): base_move = chess.Move.from_uci(actions_to_moves[action]) base_coord = square_to_coord(base_move.from_square) @@ -164,7 +165,7 @@ def make_move_mapping(uci_move): actions_to_moves[cur_action] = uci_move -def legal_moves(orig_board): +def legal_moves(orig_board: chess.Board): """Returns legal moves. action space is a 8x8x73 dimensional array @@ -194,7 +195,7 @@ def legal_moves(orig_board): return legal_moves -def get_observation(orig_board, player: int): +def get_observation(orig_board: chess.Board, player: int): """Returns observation array. Observation is an 8x8x(P + L) dimensional array. @@ -207,9 +208,6 @@ def get_observation(orig_board, player: int): else: board = board - # print("Chess black is: ", chess.BLACK) # 0 - # print("Chess white is: ", chess.WHITE) # 1 - all_squares = chess.SquareSet(chess.BB_ALL) HISTORY_LEN = 1 PLANES_PER_BOARD = 13 @@ -284,6 +282,7 @@ def get_observation(orig_board, player: int): """ base = BASE + # In the module `chess`, the color is represented by 1 for white and 0 for black. OURS = 1 THEIRS = 0 result[base + 0] = board.pieces(chess.PAWN, OURS) @@ -324,18 +323,31 @@ def get_observation(orig_board, player: int): } """ # from 0-63 - # Adjust the row number for the white pawn to the 8th if the en passant flag is set, and vice versa for black pawns. - square = board.ep_square # (int) where the en passant happened + # Adjust the row number for the white pawn to the 1st if the en passant flag is set, and vice versa for black pawns. + # For example + # If the white play an en passant move, the opponent can play a special move called en passant capture. + # To show this, we denote the pawn at (row, col) = (1, `dest_square`) instead of (5, `dest_square`). + square = board.ep_square # square where the en passant happened (int) if square: - ours = square < 32 + ours = ( + square < 32 + ) # Less than 32 is a white square, otherwise it's a black square row = square % 8 dest_col_add = 0 if ours else 8 * 7 dest_square = dest_col_add + row if ours: - result[base + 0].remove(square + 8) - result[base + 0].add(dest_square) + result[base + 0].remove( + square + 8 + ) # Set the `square + 8` position in channel `base` to 0 + result[base + 0].add( + dest_square + ) # Set the `dest_square` position in channel `base` to 1 else: - result[base + 6].remove(square - 8) - result[base + 6].add(dest_square) + result[base + 6].remove( + square - 8 + ) # Set the `square + 8` position in channel `base` to 0 + result[base + 6].add( + dest_square + ) # Set the `dest_square` position in channel `base` to 1 return boards_to_ndarray(result) From f8fb12942f2fc274be4cb04f0c3ee4a9a329e782 Mon Sep 17 00:00:00 2001 From: jacob975 Date: Wed, 21 Jun 2023 20:04:28 +0800 Subject: [PATCH 3/7] Minor fix on the comment layout --- pettingzoo/classic/chess/chess_utils.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/pettingzoo/classic/chess/chess_utils.py b/pettingzoo/classic/chess/chess_utils.py index 229f6d8cf..88b39b9e2 100644 --- a/pettingzoo/classic/chess/chess_utils.py +++ b/pettingzoo/classic/chess/chess_utils.py @@ -336,18 +336,14 @@ def get_observation(orig_board: chess.Board, player: int): dest_col_add = 0 if ours else 8 * 7 dest_square = dest_col_add + row if ours: - result[base + 0].remove( - square + 8 - ) # Set the `square + 8` position in channel `base` to 0 - result[base + 0].add( - dest_square - ) # Set the `dest_square` position in channel `base` to 1 + # Set the `square + 8` position in channel `base` to False + result[base + 0].remove(square + 8) + # Set the `dest_square` position in channel `base` to True + result[base + 0].add(dest_square) else: - result[base + 6].remove( - square - 8 - ) # Set the `square + 8` position in channel `base` to 0 - result[base + 6].add( - dest_square - ) # Set the `dest_square` position in channel `base` to 1 + # Set the `square + 8` position in channel `base` to False + result[base + 6].remove(square - 8) + # Set the `dest_square` position in channel `base` to True + result[base + 6].add(dest_square) return boards_to_ndarray(result) From ee2028114887c61fd9f70e508370cb7be8f31580 Mon Sep 17 00:00:00 2001 From: jacob975 Date: Wed, 21 Jun 2023 20:34:29 +0800 Subject: [PATCH 4/7] Minor fix on comments --- pettingzoo/classic/chess/chess_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pettingzoo/classic/chess/chess_utils.py b/pettingzoo/classic/chess/chess_utils.py index 88b39b9e2..4d989f2d4 100644 --- a/pettingzoo/classic/chess/chess_utils.py +++ b/pettingzoo/classic/chess/chess_utils.py @@ -329,9 +329,8 @@ def get_observation(orig_board: chess.Board, player: int): # To show this, we denote the pawn at (row, col) = (1, `dest_square`) instead of (5, `dest_square`). square = board.ep_square # square where the en passant happened (int) if square: - ours = ( - square < 32 - ) # Less than 32 is a white square, otherwise it's a black square + # Less than 32 is a white square, otherwise it's a black square + ours = square < 32 row = square % 8 dest_col_add = 0 if ours else 8 * 7 dest_square = dest_col_add + row From 7b00b3c977dc9704567d71ad6b785d8ecaa952c0 Mon Sep 17 00:00:00 2001 From: jacob975 Date: Fri, 23 Jun 2023 10:48:55 +0800 Subject: [PATCH 5/7] Fix the extra flipping in and add more explanation on the en passant flag --- pettingzoo/classic/chess/chess_utils.py | 39 ++++++++++++++++++++----- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/pettingzoo/classic/chess/chess_utils.py b/pettingzoo/classic/chess/chess_utils.py index 4d989f2d4..07b2a8982 100644 --- a/pettingzoo/classic/chess/chess_utils.py +++ b/pettingzoo/classic/chess/chess_utils.py @@ -5,11 +5,12 @@ def boards_to_ndarray(boards): arr64 = np.array(boards, dtype=np.uint64) arr8 = arr64.view(dtype=np.uint8) - bits = np.unpackbits(arr8) + # a bit array increment from LHS to RHS + bits = np.unpackbits(arr8, bitorder="little") floats = bits.astype(bool) boardstack = floats.reshape([len(boards), 8, 8]) - # We do np.flip() onto `boardstack` because the board is 180 degrees rotated after the process above. - boardimage = np.flip(np.transpose(boardstack, [1, 2, 0]), axis=[0, 1]) + # We do np.flip() onto `boardstack` because the 1st line of the boardimage is the 8th line of the ndarray. + boardimage = np.flip(np.transpose(boardstack, [1, 2, 0]), axis=0) return boardimage @@ -322,12 +323,34 @@ def get_observation(orig_board: chess.Board, player: int): if (history_idx > 0) flip = !flip; } """ + # from 0-63 - # Adjust the row number for the white pawn to the 1st if the en passant flag is set, and vice versa for black pawns. - # For example - # If the white play an en passant move, the opponent can play a special move called en passant capture. - # To show this, we denote the pawn at (row, col) = (1, `dest_square`) instead of (5, `dest_square`). - square = board.ep_square # square where the en passant happened (int) + """ + Adjust the row number for the white pawn to the 1st if the en passant flag is set, and vice versa for black pawns. + E.g. A white pawn(e2) just made an initial two-square advance, `e2e4`. + A black pawn(f4) next to that white pawn(e4) can play en passant capture on it. + To show this chance, we denote the white pawn at `e1` instead of `e4` once that white pawn play two-square advance. + The en passant flag is set only for one turn, and it is reset after the next turn. + Note that the en passant flag has nothing to do with the opponent's pawn. + i.e. an en passant flag always set after an initial two-square advance. + + FEN: 4k3/8/8/8/4Pp2/8/8/4K3 b - e3 99 50 + The board The observation of the 7th channel(white pawn) + 8 · · · · ♚ · · · 8 · · · · · · · · + 7 · · · · · · · · 7 · · · · · · · · + 6 · · · · · · · · 6 · · · · · · · · + 5 · · · · · · · · 5 · · · · · · · · + 4 · · · · ♙ ♟ · · 4 · · · · · · · · + 3 · · · · · · · · 3 · · · · · · · · + 2 · · · · · · · · 2 · · · · · · · · + 1 · · · · ♔ · · · 1 · · · · 1 · · · + a b c d e f g h a b c d e f g h + + More details: pettingzoo/classic/chess/chess.py Line 41 + """ + + # square where the en passant happened (int) + square = board.ep_square if square: # Less than 32 is a white square, otherwise it's a black square ours = square < 32 From 53b2240b7f8989648399604ddfb518254627e239 Mon Sep 17 00:00:00 2001 From: jacob975 Date: Mon, 26 Jun 2023 23:06:01 +0800 Subject: [PATCH 6/7] Add more comments to the en passant flag --- pettingzoo/classic/chess/chess.py | 5 +++-- pettingzoo/classic/chess/chess_utils.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pettingzoo/classic/chess/chess.py b/pettingzoo/classic/chess/chess.py index b9a45b83a..f1e6df669 100644 --- a/pettingzoo/classic/chess/chess.py +++ b/pettingzoo/classic/chess/chess.py @@ -38,11 +38,12 @@ * Channel 4: Is black or white * Channel 5: A move clock counting up to the 50 move rule. Represented by a single channel where the *n* th element in the flattened channel is set if there has been *n* moves * Channel 6: All ones to help neural networks find board edges in padded convolutions -* Channel 7 - 18: One channel for each piece type and player color combination. For example, there is a specific channel that represents black knights. An index of this channel is set to 1 if a black knight is in the corresponding spot on the game board, otherwise, it is set to 0. En passant -possibilities are represented by displaying the vulnerable pawn on the 8th row instead of the 5th. +* Channel 7 - 18: One channel for each piece type and player color combination. For example, there is a specific channel that represents black knights. An index of this channel is set to 1 if a black knight is in the corresponding spot on the game board, otherwise, it is set to 0. +Similar to LeelaChessZero, en passant possibilities are represented by displaying the vulnerable pawn on the 8th row instead of the 5th. * Channel 19: represents whether a position has been seen before (whether a position is a 2-fold repetition) * Channel 20 - 111 represents the previous 7 boards, with each board represented by 13 channels. The latest board occupies the first 13 channels, followed by the second latest board, and so on. These 13 channels correspond to channels 7 - 20. + Similar to AlphaZero, our observation space follows a stacking approach, where it accumulates the previous 8 board observations. Unlike AlphaZero, where the board orientation may vary, in our system, the `env.board_history` always maintains the orientation towards the white agent, with the white agent's king consistently positioned on the 1st row. In simpler terms, both players are observing the same board layout. diff --git a/pettingzoo/classic/chess/chess_utils.py b/pettingzoo/classic/chess/chess_utils.py index 07b2a8982..2c5efadde 100644 --- a/pettingzoo/classic/chess/chess_utils.py +++ b/pettingzoo/classic/chess/chess_utils.py @@ -326,6 +326,7 @@ def get_observation(orig_board: chess.Board, player: int): # from 0-63 """ + The LeelaChessZero-style en passant flag. Adjust the row number for the white pawn to the 1st if the en passant flag is set, and vice versa for black pawns. E.g. A white pawn(e2) just made an initial two-square advance, `e2e4`. A black pawn(f4) next to that white pawn(e4) can play en passant capture on it. @@ -346,7 +347,9 @@ def get_observation(orig_board: chess.Board, player: int): 1 · · · · ♔ · · · 1 · · · · 1 · · · a b c d e f g h a b c d e f g h - More details: pettingzoo/classic/chess/chess.py Line 41 + More details: + https://github.com/Farama-Foundation/PettingZoo/blob/master/pettingzoo/classic/chess/chess.py#L42 + https://github.com/LeelaChessZero/lc0/blob/master/src/chess/board.cc#L1114 """ # square where the en passant happened (int) From b010282679eb9de0ffa488de29481d98704a4e9e Mon Sep 17 00:00:00 2001 From: jacob975 Date: Tue, 27 Jun 2023 21:57:51 +0800 Subject: [PATCH 7/7] Revise comments to en passant flag. --- pettingzoo/classic/chess/chess_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pettingzoo/classic/chess/chess_utils.py b/pettingzoo/classic/chess/chess_utils.py index 2c5efadde..e7a37c524 100644 --- a/pettingzoo/classic/chess/chess_utils.py +++ b/pettingzoo/classic/chess/chess_utils.py @@ -324,10 +324,11 @@ def get_observation(orig_board: chess.Board, player: int): } """ - # from 0-63 """ The LeelaChessZero-style en passant flag. - Adjust the row number for the white pawn to the 1st if the en passant flag is set, and vice versa for black pawns. + In FEN, the en passant flag is represented by the square that can be a possible target of an en passant, e.g. the `e3` in `4k3/8/8/8/4Pp2/8/8/4K3 b - e3 99 50`. + However, for a neural network, it is not easy to train the network to recognize sparse and unstructured data. + Therefore, we adhere to LeelaChessZero's convention, which adjusts the row number to the 1st for white pawns if the en passant flag is set, and vice versa for black pawns. E.g. A white pawn(e2) just made an initial two-square advance, `e2e4`. A black pawn(f4) next to that white pawn(e4) can play en passant capture on it. To show this chance, we denote the white pawn at `e1` instead of `e4` once that white pawn play two-square advance. @@ -335,7 +336,6 @@ def get_observation(orig_board: chess.Board, player: int): Note that the en passant flag has nothing to do with the opponent's pawn. i.e. an en passant flag always set after an initial two-square advance. - FEN: 4k3/8/8/8/4Pp2/8/8/4K3 b - e3 99 50 The board The observation of the 7th channel(white pawn) 8 · · · · ♚ · · · 8 · · · · · · · · 7 · · · · · · · · 7 · · · · · · · · @@ -346,13 +346,14 @@ def get_observation(orig_board: chess.Board, player: int): 2 · · · · · · · · 2 · · · · · · · · 1 · · · · ♔ · · · 1 · · · · 1 · · · a b c d e f g h a b c d e f g h + FEN: 4k3/8/8/8/4Pp2/8/8/4K3 b - e3 99 50 More details: https://github.com/Farama-Foundation/PettingZoo/blob/master/pettingzoo/classic/chess/chess.py#L42 https://github.com/LeelaChessZero/lc0/blob/master/src/chess/board.cc#L1114 """ - # square where the en passant happened (int) + # square where the en passant happened, ranging from 0 to 63 (int) square = board.ep_square if square: # Less than 32 is a white square, otherwise it's a black square