-
Notifications
You must be signed in to change notification settings - Fork 0
/
minesweeper_environment.py
executable file
·133 lines (113 loc) · 3.91 KB
/
minesweeper_environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import numpy as np
from minesweeper import MinesweeperCore
class MinesweeperEnvironment:
"""
Reinforcement Learning wrapper to the core game.
"""
def __init__(self, height, width, num_bombs, win_threshold=1.0):
"""
Initializes the RL wrapper.
:param height: height of the board.
:type height: int.
:param width: width of the board.
:type width: int.
:param num_bombs: number of bomb on the board.
:type num_bombs: int.
:param win_threshold: percentage of the game board discovered to consider victory.
:type win_threshold: float.
"""
self.height = height
self.width = width
self.num_bombs = num_bombs
self.game = MinesweeperCore(height, width, num_bombs, win_threshold)
def reset(self):
"""
Resets the game board.
:return: reset board.
:rtype: numpy matrix.
"""
return self.game.reset()
def reward_engineering(self):
"""
Computes the reward and returns it.
:return: reward of RL.
:rtype: int.
"""
reward = 0
if not self.is_finished():
reward = 1
elif self.is_victory():
open_table = self.game.table != self.game.UNKNOWN_CELL
reward = np.sum(open_table)
return reward
def step(self, x, y):
"""
Performs discovering action on (x, y) tile and computes the reward produced.
:param x: x coordinate on the game board.
:type x: int.
:param y: y coordinate on the game board.
:type y: int.
:return: (game board after the action was executed,
reward produced by the action,
flag corresponding to the condition still playing)
:rtype: (numpy matrix, int, bool).
"""
self.game.play(x, y)
next_state = self.game.get_board()
reward = self.reward_engineering()
done = not self.game.still_playing
return next_state, reward, done
def get_state(self, xray=False):
"""
Returns the board.
:param xray: if the board should show all bomb positions or not.
:type xray: bool.
:return: game board.
:rtype: numpy matrix.
"""
return self.game.get_board(xray)
def is_finished(self):
"""
Returns if the agent is still playing the game.
:return: is finished flag.
:rtype: bool.
"""
return not self.game.still_playing
def is_victory(self):
"""
Returns the state of the game.
:return: victory flag.
:rtype: bool.
"""
return self.game.isVictory()
def get_open_percentage(self):
"""
Computes the percentage of the board that has been uncovered.
:return: uncovered percentage of the board.
:rtype: float.
"""
open_table = self.game.table != self.game.UNKNOWN_CELL
return np.sum(open_table) / (self.height * self.width - self.num_bombs)
def print_board(self, xray=False):
"""
Prints the current state of the board.
:param xray: if the board should show all bomb positions or not.
:type xray: bool.
"""
print_table = self.game.get_board(xray)
for i in range(self.height):
for j in range(self.width):
if print_table[i, j] == self.game.BOMB:
print("#", end=' ')
elif print_table[i, j] == self.game.UNKNOWN_CELL:
print(".", end=' ')
elif print_table[i, j] == self.game.CLEAR_CELL:
print("0", end=' ')
else:
print(int(print_table[i, j]), end=' ')
print("")
def print_bomb_positions(self):
"""
Prints all known bomb positions.
"""
print("Bomb positions: " + str(self.game.bomb_positions))