-
Notifications
You must be signed in to change notification settings - Fork 0
/
tic-tac-toe.R
209 lines (177 loc) · 5.36 KB
/
tic-tac-toe.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# https://dratewka.wordpress.com/2013/03/15/ai-overkill-teaching-a-neural-network-to-play-tic-tac-toe/
#
# Such a network is actually a multilayer perceptron with the three layers of neurons (circles) connected by weighted links (arrows).
# It works by receiving some inputs delivered to the input layer, processing them using the neurons and returning output through the
# output layer.
# Initially such a network is pretty useless until its trained. Training basically means trying to figure out how the weight values
# should be set. So how do we teach one to play tic-tac-toe?
#
# Let’s start from the beginning and define an empty board of a given size and write a function (in R) which evaluates whether
# somebody has won:
board.size <- 3
# The game board is a matrix with: 1 - tic, 0 - empty, -1 - tac
GenerateEmptyBoard <- function() {
matrix(0, ncol = board.size, nrow = board.size)
}
DisplayBoard <- function(board) {
b <- factor(board,
levels = c(-1, 0, 1),
labels = c("X", "*", "O"))
dim(b) <- dim(board)
b
}
FlipMatrix <- function(m) {
apply(m, 2, rev)
}
# Checking whether somebody has won
EvaluateBoard <- function(board) {
sums <- c(colSums(board),
rowSums(board),
sum(diag(board)),
sum(diag(FlipMatrix(board))))
if (max(sums) == board.size) {
return(1)
}
if (min(sums) == -board.size) {
return(-1)
}
0
}
# Now let’s create functions for our neural network to let it play the game.
# A reasonable approach is to assume that the network will be taught to evaluate
# the board situation. Is such case when the computer will be making its move it
# will use the neural network to evaluate each possible move and choose the best one.
neurons <- 1
numWeights <- neurons * board.size ^ 2 + 2 * neurons
# Creating an empty neural network which we represent as a matrix of weights
InitNN <- function() {
n.weights <- numWeights
matrix(runif(n.weights), nrow = neurons)
}
# Calculating the network
RunNN <- function(nn, board) {
w.out <- nn[, 1]
w0.in <- nn[, 2]
w.in <- nn[, 3:ncol(nn)]
t(w.out) %*% tanh(w.in %*% as.vector(board) + w0.in)
}
# Evaluating every move possible using the network
RunAI <- function(ai, board, side) {
res <- sapply(1:length(board), function(i) {
b <- board
b[[i]] <- side
RunNN(ai, b)
})
# We don't want the AI to cheat
res[board != 0] = -Inf
# We choose the best one
which.max(res)
}
Move <- function(ai, board, side) {
move <- RunAI(ai, board, side)
board[[move]] <- side
board
list(board, move)
}
IsMovePossible <- function(board) {
length(which(board == 0)) > 0
}
# So now we have all we need to make a neural network play tic-tac-toe,
# but it won’t be very good at it – until we teach it. One way to do it it to
# sit in front of the computer for a couple of weeks, play a game after game against
# against our pet network until it is worthy… or: behold the Random Player!
NNvsRandomPlayer <- function(tic.ai) {
side <- sample(c(-1, 1), 1)
board <- GenerateEmptyBoard()
eval <- 0
while (eval == 0 && IsMovePossible(board)) {
if (side == 1) {
x <- Move(tic.ai, board, side)
board <- x[[1]]
} else{
# Make a valid move completely at random and see what happens
move <- sample(which(board == 0), 1)
board[[move]] <- side
}
# print(DisplayBoard(board))
eval <- EvaluateBoard(board)
side <- side * -1
}
eval
}
NNvsRandom <- function(num) {
results <- c(0, 0, 0)
i <- 0
while (i < num) {
winner <-
NNvsRandomPlayer(matrix(
c(
-0.091560,
0.016083,
0.019564,
0.069865,
0.033497,
0.081025,-0.013193,
0.083981,-0.069655,-0.038064,-0.083860
),
nrow = 1,
ncol = numWeights
))
# winner <- NNvsRandomPlayer(matrix(c(0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5), nrow = 1, ncol = 11))
sprintf("Winner %i", winner)
if (winner == 1) {
results[1] <- results[1] + 1
} else {
if (winner == 0) {
results[2] <- results[2] + 1
} else {
results[3] <- results[3] + 1
}
}
i <- i + 1
}
results
}
# So now we have a worthy opponent let’s see how our network does in a series of 10,000 games:
# Wins: 40.53
# Ties: 13.04
# Losses: 46.43
# Hmm… not very good, but let’s train it:
# install.packages('DEoptim')
library(DEoptim)
TrainAI <- function() {
# Function to evaluate how good is our network
Eval <- function(w) {
tic.ai <- matrix(w, nrow = neurons)
# Playing against a random player is nondeterministic, so we need to stabilise the results
ev <-
median(sapply(1:20, function(j)
mean(
sapply(1:20, function(i)
NNvsRandomPlayer(tic.ai))
)))
ev <- -1 * (ev)
}
TrainAIEval(Eval,DEoptim::DEoptim.control(
trace = 0,
parallelType = 0,
NP = 10,
VTR = -1.0
))
}
TrainAIEval <- function(eval,ctrl) {
len <- length(InitNN())
# This is a global optimisation method, so using we need an appropriate method - a differential evolution
# algorithm seems sufficient
res <- DEoptim::DEoptim(
eval,
rep(-0.1, len),
rep(0.1, len),
ctrl
)
matrix(res$optim$bestmem, nrow = neurons)
}
# After the training our impressive single-hidden-neuron-network becomes better (again 10,000 games):
# Wins: 75.85
# Ties: 04.80
# Losses: 19.35