-
Notifications
You must be signed in to change notification settings - Fork 0
/
agent.py
303 lines (242 loc) · 10.5 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
"""
An AI player for Othello.
"""
import random
import sys
import time
# You can use the functions in othello_shared to write your AI
from othello_shared import find_lines, get_possible_moves, get_score, play_move
cache = {}
def eprint(*args, **kwargs): #you can use this for debugging, as it will print to sterr and not stdout
print(*args, file=sys.stderr, **kwargs)
# Method to compute utility value of terminal state
def compute_utility(board, color):
#IMPLEMENT
curr_score = get_score(board)
white_score = curr_score[0]
black_score = curr_score[1]
if color == 1:
return white_score - black_score
elif color == 2:
return black_score - white_score
# Better heuristic value of board
def compute_heuristic(board, color): #not implemented, optional
curr_score = get_score(board)
white_score = curr_score[0]
black_score = curr_score[1]
if color == 1:
return white_score - black_score
elif color == 2:
return black_score - white_score
return 0
############ MINIMAX ###############################
def minimax_min_node(board, color, limit, caching = 0):
# Check cache
if caching == 1:
if board in cache:
return cache[board]
if color == 1:
opponent = 2
else:
opponent = 1
possible_moves = get_possible_moves(board, opponent)
# Initializing moves and utility
minUtility = float('inf')
best_move = None
if len(possible_moves) == 0 or limit == 0:
# fixed utility in the end
return best_move, compute_utility(board, color)
for new_move in possible_moves:
# Get the new board
new_board = play_move(board, opponent, new_move[0], new_move[1])
# Compute utility
max_node = minimax_max_node(new_board, color, limit - 1, caching)
# Cache then new board
if caching == 1:
cache[new_board] = (new_move, max_node[1])
if max_node[1] < minUtility:
best_move = new_move
minUtility = max_node[1]
return best_move, minUtility
def minimax_max_node(board, color, limit, caching = 0): #returns highest possible utility
#verifies cache initially, if not in cache then run the rest
if caching == 1:
if board in cache:
return cache[board]
possible_moves = get_possible_moves(board, color)
# Initializing moves and utility
maxUtility = -float('inf')
best_move = None
if len(possible_moves) == 0 or limit == 0:
# fixed utility in the end
return best_move, compute_utility(board, color)
for new_move in possible_moves:
# Get the new board
new_board = play_move(board, color, new_move[0], new_move[1])
# Compute utility
#_, utility = minimax_min_node(new_board, color, limit - 1, caching)
min_node = minimax_min_node(new_board, color, limit - 1, caching)
# Cache then new board
if caching == 1:
cache[new_board] = (new_move, min_node[1])
if min_node[1] > maxUtility:
best_move = new_move
maxUtility = min_node[1]
return best_move, maxUtility
def select_move_minimax(board, color, limit, caching = 0):
"""
Given a board and a player color, decide on a move.
The return value is a tuple of integers (i,j), where
i is the column and j is the row on the board.
Note that other parameters are accepted by this function:
If limit is a positive integer, your code should enfoce a depth limit that is equal to the value of the parameter.
Search only to nodes at a depth-limit equal to the limit. If nodes at this level are non-terminal return a heuristic
value (see compute_utility)
If caching is ON (i.e. 1), use state caching to reduce the number of state evaluations.
If caching is OFF (i.e. 0), do NOT use state caching to reduce the number of state evaluations.
"""
cache.clear()
minimax_res = minimax_max_node(board, color, limit, caching)
return minimax_res[0]
############ ALPHA-BETA PRUNING #####################
def alphabeta_min_node(board, color, alpha, beta, limit, caching = 0, ordering = 0):
# Get the opponent
if color == 1:
opponent = 2
else:
opponent = 1
# Check cache
if caching == 1:
if board in cache:
return cache[board]
possible_moves = get_possible_moves(board, opponent)
# Initializing moves and utility
minUtility = float('inf')
best_move = None
if len(possible_moves) == 0 or limit == 0:
# fixed utility in the end
return best_move, compute_utility(board, color)
for new_move in possible_moves:
# Get the new board
new_board = play_move(board, opponent, new_move[0], new_move[1])
# Compute utility
max_node = alphabeta_max_node(new_board, color, alpha, beta, limit - 1, caching, ordering)
# Cache then new board
if caching == 1:
cache[new_board] = (new_move, max_node[1])
if max_node[1] < minUtility:
best_move = new_move
minUtility = max_node[1]
# pruning
beta = min(beta, max_node[1])
if beta <= alpha:
break
return best_move, minUtility
def alphabeta_max_node(board, color, alpha, beta, limit, caching = 0, ordering = 0):
if caching == 1:
if board in cache:
return cache[board]
possible_moves = get_possible_moves(board, color)
# Initializing moves and utility
maxUtility = -float('inf')
best_move = None
if len(possible_moves) == 0 or limit == 0:
# fixed utility in the end
return best_move, compute_utility(board, color)
# order the states
if ordering == 1:
utilities = {}
possible_moves = []
for move in possible_moves:
new_board = play_move(board, color, move[0], move[1])
successor_utility = compute_utility(new_board, color)
if successor_utility in utilities:
if utilities[successor_utility] != [move]:
utilities[successor_utility].append(move)
else:
utilities[successor_utility] = [move]
ordered_utility = utilities.keys().sort()
for utility in ordered_utility:
possible_moves += utilities[utility]
for new_move in possible_moves:
# Get the new board
new_board = play_move(board, color, new_move[0], new_move[1])
# Compute utility
min_node = alphabeta_min_node(new_board, color, alpha, beta, limit - 1, caching, ordering)
# Cache then new board
if caching == 1:
cache[new_board] = (new_move, min_node[1])
if min_node[1] > maxUtility:
best_move = new_move
maxUtility = min_node[1]
# pruning
alpha = max(alpha, min_node[1])
if beta <= alpha:
break
return best_move, maxUtility
def select_move_alphabeta(board, color, limit, caching = 0, ordering = 0):
"""
Given a board and a player color, decide on a move.
The return value is a tuple of integers (i,j), where
i is the column and j is the row on the board.
Note that other parameters are accepted by this function:
If limit is a positive integer, your code should enfoce a depth limit that is equal to the value of the parameter.
Search only to nodes at a depth-limit equal to the limit. If nodes at this level are non-terminal return a heuristic
value (see compute_utility)
If caching is ON (i.e. 1), use state caching to reduce the number of state evaluations.
If caching is OFF (i.e. 0), do NOT use state caching to reduce the number of state evaluations.
If ordering is ON (i.e. 1), use node ordering to expedite pruning and reduce the number of state evaluations.
If ordering is OFF (i.e. 0), do NOT use node ordering to expedite pruning and reduce the number of state evaluations.
"""
cache.clear()
alphabeta_res = alphabeta_max_node(board, color, -float('inf'), float('inf'), limit, caching, ordering)
return alphabeta_res[0]
####################################################
def run_ai():
"""
This function establishes communication with the game manager.
It first introduces itself and receives its color.
Then it repeatedly receives the current score and current board state
until the game is over.
"""
print("Othello AI") # First line is the name of this AI
arguments = input().split(",")
color = int(arguments[0]) #Player color: 1 for dark (goes first), 2 for light.
limit = int(arguments[1]) #Depth limit
minimax = int(arguments[2]) #Minimax or alpha beta
caching = int(arguments[3]) #Caching
ordering = int(arguments[4]) #Node-ordering (for alpha-beta only)
if (minimax == 1): eprint("Running MINIMAX")
else: eprint("Running ALPHA-BETA")
if (caching == 1): eprint("State Caching is ON")
else: eprint("State Caching is OFF")
if (ordering == 1): eprint("Node Ordering is ON")
else: eprint("Node Ordering is OFF")
if (limit == -1): eprint("Depth Limit is OFF")
else: eprint("Depth Limit is ", limit)
if (minimax == 1 and ordering == 1): eprint("Node Ordering should have no impact on Minimax")
while True: # This is the main loop
# Read in the current game status, for example:
# "SCORE 2 2" or "FINAL 33 31" if the game is over.
# The first number is the score for player 1 (dark), the second for player 2 (light)
next_input = input()
status, dark_score_s, light_score_s = next_input.strip().split()
dark_score = int(dark_score_s)
light_score = int(light_score_s)
if status == "FINAL": # Game is over.
print
else:
board = eval(input()) # Read in the input and turn it into a Python
# object. The format is a list of rows. The
# squares in each row are represented by
# 0 : empty square
# 1 : dark disk (player 1)
# 2 : light disk (player 2)
# Select the move and send it to the manager
if (minimax == 1): #run this if the minimax flag is given
movei, movej = select_move_minimax(board, color, limit, caching)
else: #else run alphabeta
movei, movej = select_move_alphabeta(board, color, limit, caching, ordering)
print("{} {}".format(movei, movej))
if __name__ == "__main__":
run_ai()