-
Notifications
You must be signed in to change notification settings - Fork 0
/
multi_evaluator.py
executable file
·138 lines (126 loc) · 5.37 KB
/
multi_evaluator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import numpy as np
import random
from minesweeper_environment import MinesweeperEnvironment
from minesweeper import MinesweeperCore
from agents.csp import MinesweeperAgent
import matplotlib.pyplot as plt
import os
from agents.L4MSAgent import L4MSAgent
# This script runs an actor and evaluate it.
# Some PCs needed these configurations in order to run with GPU.
# from tensorflow.compat.v1 import ConfigProto
# from tensorflow.compat.v1 import InteractiveSession
# config = ConfigProto()
# config.gpu_options.allow_growth = True
# session = InteractiveSession(config=config)
# Comment this line to enable running using your GPU
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
def random_actor(state):
size = state.shape[0]
while True:
x = random.randint(0, size-1)
y = random.randint(0, size-1)
if state[x, y] == MinesweeperCore.UNKNOWN_CELL:
break
return x, y
# Set game configurations. CSP can play in any board size. The remaining agents can only play in 8x8 board.
size = 8
NUM_EPISODES = 10
bombs = [8, 10, 12]
# Choose an agent
agent_name = 'l4ms' # h_csp (heuristic csp), nh_csp (non-heuristic csp) or l4ms
##
if agent_name == 'l4ms':
agent = L4MSAgent(size)
weights = 'results/best_model.hdf5'
if os.path.exists(weights):
print('Loading weights from previous learning session.')
agent.load(weights)
else:
print('No weights found from previous learning session.')
victories = 0
plays_to_die = []
open_percentage = []
num_games = 3 # Do not change this line
games_open_percentage = []
games_plays_to_die = []
games_wrong_plays = []
games_victory_percentage = []
game_guesses = [] # Just for h_csp or nh_csp
guesses = [] # Just for h_csp or nh_csp
for current_game in range(num_games):
game = MinesweeperEnvironment(size, size, bombs[current_game])
if agent_name == 'h_csp':
heuristic = True
agent = MinesweeperAgent(size, bombs[current_game], heuristic)
elif agent_name == 'nh_csp':
heuristic = False
agent = MinesweeperAgent(size, bombs[current_game], heuristic)
victories = 0
if agent_name == 'l4ms':
agent.num_incorretas = 0
agent.num_plays = 0
plays_to_die = []
open_percentage = []
guesses = []
for episodes in range(1, NUM_EPISODES + 1):
state = game.reset()
agent.reset()
plays = 0
guess_percentage = 0
while game.is_finished() != True:
#action = random_actor(game.get_state()) # Use this to test random policy
action = agent.act(game.get_state())
next_state = game.step(action[0], action[1])
if agent_name == 'h_csp' or agent_name == 'nh_csp':
if agent.guess_flag == True:
guess_percentage = game.get_open_percentage()*100
state = next_state
plays += 1
if game.is_victory():
victories += 1
if agent_name == 'h_csp' or agent_name == 'nh_csp':
guesses.append(guess_percentage)
else:
plays_to_die.append(plays-1)
open_percentage.append(game.get_open_percentage()*100)
print('Game', current_game + 1, '/', num_games, 'Played',episodes,'/',NUM_EPISODES, 'Open percentage:', game.get_open_percentage()*100, '%')
game_guesses.append(guesses)
games_open_percentage.append(open_percentage)
games_plays_to_die.append(plays_to_die)
victory_percentage = victories / NUM_EPISODES
games_victory_percentage.append(victory_percentage*100)
if agent_name == 'l4ms':
wrong_percentage = agent.num_incorretas / agent.num_plays
games_wrong_plays.append(wrong_percentage*100)
print('Win rate:\n', bombs[0], 'bombs -', games_victory_percentage[0], '%\n', bombs[1], 'bombs -', games_victory_percentage[1], '%\n', bombs[2]
, 'bombs -', games_victory_percentage[2], '%')
if agent_name == 'l4ms':
print('Wrong plays rate:', bombs[0], 'bombs -', games_wrong_plays[0], '%//', bombs[1], 'bombs -', games_wrong_plays[1], '%//', bombs[2]
, 'bombs -', games_wrong_plays[2], '%')
# Plots return history
plt.hist(games_plays_to_die[0], bins=list(range(0,np.max(np.max(games_plays_to_die)))), label='8 Bombs', alpha=0.6, color='b')
plt.hist(games_plays_to_die[1], bins=list(range(0,np.max(np.max(games_plays_to_die)))), label='10 Bombs', alpha=0.6, color='darkgreen')
plt.hist(games_plays_to_die[2], bins=list(range(0,np.max(np.max(games_plays_to_die)))), label='12 Bombs', alpha=0.6, color='r')
plt.legend(loc='upper right')
plt.xlabel('# plays')
plt.ylabel('# episodes')
plt.title('Histogram of number of plays untill defeat')
plt.show()
plt.hist(games_open_percentage[0], bins=20, label='8 Bombs', alpha=0.6, color='b')
plt.hist(games_open_percentage[1], bins=20, label='10 Bombs', alpha=0.6, color='darkgreen')
plt.hist(games_open_percentage[2], bins=20, label='12 Bombs', alpha=0.6, color='r')
plt.legend(loc='upper left')
plt.xlabel('% open')
plt.ylabel('# episodes')
plt.title('Histogram of open percentage')
plt.show()
if agent_name == 'h_csp' or agent_name == 'nh_csp':
plt.hist(game_guesses[0], bins=20, label='8 Bombs', alpha=0.6, color='b')
plt.hist(game_guesses[1], bins=20, label='10 Bombs', alpha=0.6, color='darkgreen')
plt.hist(game_guesses[2], bins=20, label='12 Bombs', alpha=0.6, color='r')
plt.legend(loc='upper right')
plt.xlabel('% open ')
plt.ylabel('# episodes')
plt.title('Open board percentage after last guessed move')
plt.show()