-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMCTS.py
188 lines (163 loc) · 8.1 KB
/
MCTS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import math
import random
import Globals as g
import Classes as c
import AImodels as trial
import Functions as func
def MCTS_Move(board, combinations, values, playerCombinations, playerValues, curPlayer):
#Define the starting node. Set it's parent to itself so that we can verify
#later on whether or not the node we are currently on is the head
#(if node == node.parent) this is the head
head = c.Node(c.CoordinatePair(-1, -1), 5, 0, 0)
head.parent = head
#Variables for the simulation
simBoard = [[0 for i in range(g.width)] for j in range(g.width)]
simValues = [[0 for i in range(g.width)] for j in range(g.width)]
simPlayerValues = [[0 for i in range(g.width)] for j in range(g.width)]
simCombinations = []
simPlayerCombinations = []
simCurPlayer = c.Boolean(curPlayer)
#Create a list of all untaken spaces
untaken = []
for i in range(g.width):
for j in range(g.width):
if (board[i][j] == 0):
untaken.append(c.CoordinatePair(i, j))
#Run simulations according to the number of simulations required
for i in range(g.numberOfSimulations):
print(f"Simulation #{i}!")
#Copy simulated values
func.copy2DList(board, simBoard)
func.copy2DValue(values, simValues)
func.copy2DValue(playerValues, simPlayerValues)
simCombinations = []
simPlayerCombinations = []
func.copyCombination(combinations, simCombinations)
func.copyCombination(playerCombinations, simPlayerCombinations)
simCurPlayer.value = curPlayer
simUntaken = trial.getBestMovesInAnArrayFast(simBoard, simValues, simPlayerValues, simCombinations, simPlayerCombinations, simCurPlayer.value)
#Selection and Expansion of Node Tree
start = SelectAndExpand(head, simUntaken, simBoard, simValues, simPlayerValues, simCombinations, simPlayerCombinations, simCurPlayer)
#Simulate Game is Simulation
#downPropegate after calling upPropegate
upPropegate(start, simulateGame(simBoard, simCombinations, simPlayerCombinations, simValues, simPlayerValues, start.coordinates, simUntaken, simCurPlayer))
downPropegate(head)
#Prints out final values. Commend out if you don't want to see them
#for i in range(len(head.children)):
#print(f"Child ({head.children[i].coordinates.y}, {head.children[i].coordinates.x}) was visited {head.children[i].visited} times, had a score of {head.children[i].score} and had a uct of {head.children[i].uct}")
#Get the highest scoring node and return it back to main
highest = 0
for i in range(len(head.children)):
if (head.children[i].visited > head.children[highest].visited):
highest = i
return head.children[highest].coordinates
def simulateGame(simBoard, combinations, playerCombinations, values, playerValues, randoMove, simUntaken, curPlayer):
#Remember who was first to move; if it is the Human to move, we want the
#results to come back with the sign flipped
buff = 1
if (curPlayer.value):
buff *= -1
#While there is space on the board,
while (len(simUntaken) > 0):
#func.drawBoard(simBoard)
#input()
#If the current player is the human,
if (curPlayer.value):
#Set the board space to 1
simBoard[randoMove.x][randoMove.y] = 1
#Remove combinations to keep up with the WinCheck
func.removePotential(combinations, values, randoMove)
values[randoMove.x][randoMove.y].thirdPriority = -1
playerValues[randoMove.x][randoMove.y].thirdPriority = -1
#If there is a win with this player, return -1
if (func.checkWin(simBoard, playerCombinations, curPlayer.value)):
#print("This was a loss")
#func.drawBoard(simBoard)
return (-1 * buff)
else:
simBoard[randoMove.x][randoMove.y] = 2
#Do the same if it is the AI's turn to move
func.removePotential(playerCombinations, playerValues, randoMove)
values[randoMove.x][randoMove.y].thirdPriority = -1
playerValues[randoMove.x][randoMove.y].thirdPriority = -1
#Return 1 for a positive outcome
if (func.checkWin(simBoard, combinations, curPlayer.value)):
#print("This was a win")
#func.drawBoard(simBoard)
return (1 * buff)
#Remove the untaken space from the list so spaces are not repeated
#**This could be improved if we make RandoMove an index of untaken rather
#**than a Coordinate Pair itself, but it does look ugly
curPlayer.value = not curPlayer.value
if (curPlayer.value):
simUntaken = trial.getBestMovesInAnArrayFast(simBoard, playerValues, values, playerCombinations, combinations, curPlayer.value)
else:
simUntaken = trial.getBestMovesInAnArrayFast(simBoard, values, playerValues, combinations, playerCombinations, curPlayer.value)
#Get another random untaken space
if (len(simUntaken) > 0):
randoMove = random.choice(simUntaken)
#Return 0 if it's a catgame
#print("This was a tie")
#func.drawBoard(simBoard)
return 0
def SelectAndExpand(head, untaken, simBoard, simValues, simPlayerValues, simCombinations, simPlayerCombinations, simCurPlayer):
#If there is a child node that has not yet been visited at all,
if (len(head.children) < len(untaken)):
#**EXPAND
#Make the node (a bit hacky to use heads' children length as the index for
#Untaken, keep sight of this for further issues)
head.children.append(c.Node(untaken[len(head.children)], 0, 0, 0))
#Set the parent of the new node
head.children[len(head.children) - 1].parent = head
#Return the newly made node
return head.children[len(head.children) - 1]
#Otherwise, get the highest UCT node from the list.
#Return head if there is no option
if (len(untaken) == 0):
return head
temp = getHighestUCT(head.children)
#Remove the node recieved from the list of untaken nodes
for i in range(len(untaken)):
if (untaken[i] == temp.coordinates):
untaken.pop(i)
break
#Update boardstate
if (simCurPlayer.value):
simBoard[temp.coordinates.x][temp.coordinates.y] = 1
#Check if there is already a win on this new board state
if (func.checkWin(simBoard, simPlayerCombinations, simCurPlayer.value)):
return temp
func.removePotential(simCombinations, simValues, temp.coordinates)
simValues[temp.coordinates.x][temp.coordinates.y].thirdPriority = -1
simPlayerValues[temp.coordinates.x][temp.coordinates.y].thirdPriority = -1
else:
simBoard[temp.coordinates.x][temp.coordinates.y] = 2
#Do the same if it is the AI's turn to move
if (func.checkWin(simBoard, simCombinations, simCurPlayer.value)):
return temp
func.removePotential(simPlayerCombinations, simPlayerValues, temp.coordinates)
simValues[temp.coordinates.x][temp.coordinates.y].thirdPriority = -1
simPlayerValues[temp.coordinates.x][temp.coordinates.y].thirdPriority = -1
#Recursively call this function with the updated values
simCurPlayer.value = not simCurPlayer.value
return SelectAndExpand(temp, untaken, simBoard, simValues, simPlayerValues, simCombinations, simPlayerCombinations, simCurPlayer)
def upPropegate(start, simResult):
start.visited += 1
start.score += simResult
#Repeat if we are not at the head
if (start != start.parent):
upPropegate(start.parent, simResult * -1)
def downPropegate(start):
start.uct = (start.score / start.visited) + (1.414 * math.sqrt(math.log(start.parent.visited) / start.visited))
for i in range(len(start.children)):
downPropegate(start.children[i])
def getHighestUCT(nodes) :
curUCT = -1000
index = 0
#print(f"This list has {len(nodes)} nodes")
for i in range(len(nodes)):
#print(f"Node UCT: {nodes[i].uct}")
if (nodes[i].uct > curUCT):
curUCT = nodes[i].uct
index = i
return nodes[index]