-
-
Notifications
You must be signed in to change notification settings - Fork 422
/
tictactoe.py
329 lines (253 loc) · 11.4 KB
/
tictactoe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# noqa: D212, D415
"""
# Tic Tac Toe
```{figure} classic_tictactoe.gif
:width: 140px
:name: tictactoe
```
This environment is part of the <a href='..'>classic environments</a>. Please read that page first for general information.
| Import | `from pettingzoo.classic import tictactoe_v3` |
|--------------------|-----------------------------------------------|
| Actions | Discrete |
| Parallel API | Yes |
| Manual Control | No |
| Agents | `agents= ['player_1', 'player_2']` |
| Agents | 2 |
| Action Shape | (1) |
| Action Values | [0, 8] |
| Observation Shape | (3, 3, 2) |
| Observation Values | [0,1] |
Tic-tac-toe is a simple turn based strategy game where 2 players, X and O, take turns marking spaces on a 3 x 3 grid. The first player to place 3 of their marks in a horizontal, vertical, or diagonal line is the winner.
### Observation Space
The observation is a dictionary which contains an `'observation'` element which is the usual RL observation described below, and an `'action_mask'` which holds the legal moves, described in the Legal Actions Mask section.
The main observation is 2 planes of the 3x3 board. For player_1, the first plane represents the placement of Xs, and the second plane shows the placement of Os. The possible values for each cell are 0 or 1; in the first plane, 1 indicates that an X has been placed in that cell, and 0 indicates
that X is not in that cell. Similarly, in the second plane, 1 indicates that an O has been placed in that cell, while 0 indicates that an O has not been placed. For player_2, the observation is the same, but Xs and Os swap positions, so Os are encoded in plane 1 and Xs in plane 2. This allows for
self-play.
#### Legal Actions Mask
The legal moves available to the current agent are found in the `action_mask` element of the dictionary observation. The `action_mask` is a binary vector where each index of the vector represents whether the action is legal or not. The `action_mask` will be all zeros for any agent except the one
whose turn it is. Taking an illegal move ends the game with a reward of -1 for the illegally moving agent and a reward of 0 for all other agents.
### Action Space
Each action from 0 to 8 represents placing either an X or O in the corresponding cell. The cells are indexed as follows:
```
0 | 3 | 6
_________
1 | 4 | 7
_________
2 | 5 | 8
```
### Rewards
| Winner | Loser |
| :----: | :---: |
| +1 | -1 |
If the game ends in a draw, both players will receive a reward of 0.
### Version History
* v3: Fixed bug in arbitrary calls to observe() (1.8.0)
* v2: Legal action mask in observation replaced illegal move list in infos (1.5.0)
* v1: Bumped version of all environments due to adoption of new agent iteration scheme where all agents are iterated over after they are done (1.4.0)
* v0: Initial versions release (1.0.0)
"""
from __future__ import annotations
import os
import gymnasium
import numpy as np
import pygame
from gymnasium import spaces
from gymnasium.utils import EzPickle
from pettingzoo import AECEnv
from pettingzoo.classic.tictactoe.board import Board
from pettingzoo.utils import agent_selector, wrappers
def get_image(path):
from os import path as os_path
cwd = os_path.dirname(__file__)
image = pygame.image.load(cwd + "/" + path)
return image
def get_font(path, size):
from os import path as os_path
cwd = os_path.dirname(__file__)
font = pygame.font.Font((cwd + "/" + path), size)
return font
def env(render_mode=None):
internal_render_mode = render_mode if render_mode != "ansi" else "human"
env = raw_env(render_mode=internal_render_mode)
if render_mode == "ansi":
env = wrappers.CaptureStdoutWrapper(env)
env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1)
env = wrappers.AssertOutOfBoundsWrapper(env)
env = wrappers.OrderEnforcingWrapper(env)
return env
class raw_env(AECEnv, EzPickle):
metadata = {
"render_modes": ["human", "rgb_array"],
"name": "tictactoe_v3",
"is_parallelizable": False,
"render_fps": 1,
}
def __init__(
self, render_mode: str | None = None, screen_height: int | None = 1000
):
super().__init__()
EzPickle.__init__(self, render_mode, screen_height)
self.board = Board()
self.agents = ["player_1", "player_2"]
self.possible_agents = self.agents[:]
self.action_spaces = {i: spaces.Discrete(9) for i in self.agents}
self.observation_spaces = {
i: spaces.Dict(
{
"observation": spaces.Box(
low=0, high=1, shape=(3, 3, 2), dtype=np.int8
),
"action_mask": spaces.Box(low=0, high=1, shape=(9,), dtype=np.int8),
}
)
for i in self.agents
}
self.rewards = {i: 0 for i in self.agents}
self.terminations = {i: False for i in self.agents}
self.truncations = {i: False for i in self.agents}
self.infos = {i: {"legal_moves": list(range(0, 9))} for i in self.agents}
self._agent_selector = agent_selector(self.agents)
self.agent_selection = self._agent_selector.reset()
self.render_mode = render_mode
self.screen_height = screen_height
self.screen = None
if self.render_mode == "human":
self.clock = pygame.time.Clock()
# Key
# ----
# blank space = 0
# agent 0 = 1
# agent 1 = 2
# An observation is list of lists, where each list represents a row
#
# [[0,0,2]
# [1,2,1]
# [2,1,0]]
def observe(self, agent):
board_vals = np.array(self.board.squares).reshape(3, 3)
cur_player = self.possible_agents.index(agent)
opp_player = (cur_player + 1) % 2
cur_p_board = np.equal(board_vals, cur_player + 1)
opp_p_board = np.equal(board_vals, opp_player + 1)
observation = np.stack([cur_p_board, opp_p_board], axis=2).astype(np.int8)
legal_moves = self._legal_moves() if agent == self.agent_selection else []
action_mask = np.zeros(9, "int8")
for i in legal_moves:
action_mask[i] = 1
return {"observation": observation, "action_mask": action_mask}
def observation_space(self, agent):
return self.observation_spaces[agent]
def action_space(self, agent):
return self.action_spaces[agent]
def _legal_moves(self):
return [i for i in range(len(self.board.squares)) if self.board.squares[i] == 0]
# action in this case is a value from 0 to 8 indicating position to move on tictactoe board
def step(self, action):
if (
self.terminations[self.agent_selection]
or self.truncations[self.agent_selection]
):
return self._was_dead_step(action)
# check if input action is a valid move (0 == empty spot)
assert self.board.squares[action] == 0, "played illegal move"
# play turn
self.board.play_turn(self.agents.index(self.agent_selection), action)
# update infos
# list of valid actions (indexes in board)
# next_agent = self.agents[(self.agents.index(self.agent_selection) + 1) % len(self.agents)]
next_agent = self._agent_selector.next()
if self.board.check_game_over():
winner = self.board.check_for_winner()
if winner == -1:
# tie
pass
elif winner == 1:
# agent 0 won
self.rewards[self.agents[0]] += 1
self.rewards[self.agents[1]] -= 1
else:
# agent 1 won
self.rewards[self.agents[1]] += 1
self.rewards[self.agents[0]] -= 1
# once either play wins or there is a draw, game over, both players are done
self.terminations = {i: True for i in self.agents}
# Switch selection to next agents
self._cumulative_rewards[self.agent_selection] = 0
self.agent_selection = next_agent
self._accumulate_rewards()
if self.render_mode == "human":
self.render()
def reset(self, seed=None, options=None):
# reset environment
self.board = Board()
self.agents = self.possible_agents[:]
self.rewards = {i: 0 for i in self.agents}
self._cumulative_rewards = {i: 0 for i in self.agents}
self.terminations = {i: False for i in self.agents}
self.truncations = {i: False for i in self.agents}
self.infos = {i: {} for i in self.agents}
# selects the first agent
self._agent_selector.reinit(self.agents)
self._agent_selector.reset()
self.agent_selection = self._agent_selector.reset()
if self.screen is None:
pygame.init()
if self.render_mode == "human":
self.screen = pygame.display.set_mode(
(self.screen_height, self.screen_height)
)
pygame.display.set_caption("Tic-Tac-Toe")
else:
self.screen = pygame.Surface((self.screen_height, self.screen_height))
def close(self):
pass
def render(self):
if self.render_mode is None:
gymnasium.logger.warn(
"You are calling render method without specifying any render mode."
)
return
screen_height = self.screen_height
screen_width = self.screen_height
# Setup dimensions for 'x' and 'o' marks
tile_size = int(screen_height / 4)
# Load and blit the board image for the game
board_img = get_image(os.path.join("img", "board.png"))
board_img = pygame.transform.scale(
board_img, (int(screen_width), int(screen_height))
)
self.screen.blit(board_img, (0, 0))
# Load and blit actions for the game
def getSymbol(input):
if input == 0:
return None
elif input == 1:
return "cross"
else:
return "circle"
board_state = list(map(getSymbol, self.board.squares))
mark_pos = 0
for x in range(3):
for y in range(3):
mark = board_state[mark_pos]
mark_pos += 1
if mark is None:
continue
mark_img = get_image(os.path.join("img", mark + ".png"))
mark_img = pygame.transform.scale(mark_img, (tile_size, tile_size))
self.screen.blit(
mark_img,
(
(screen_width / 3.1) * x + (screen_width / 17),
(screen_width / 3.145) * y + (screen_height / 19),
),
)
if self.render_mode == "human":
pygame.display.update()
self.clock.tick(self.metadata["render_fps"])
observation = np.array(pygame.surfarray.pixels3d(self.screen))
return (
np.transpose(observation, axes=(1, 0, 2))
if self.render_mode == "rgb_array"
else None
)