-
Notifications
You must be signed in to change notification settings - Fork 4
/
demo.py
46 lines (40 loc) · 1.51 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""
Author: Ali Hajimirza ([email protected])
Copyright Ali Hajimirza, free for use under MIT license.
"""
import numpy as np
from algorithm.softmax import softmax
from algorithm.epsilon_greedy import epsilon_greedy
import matplotlib.pyplot as plt
def simulator(algorithm, arg, mean_arms, std=1, num_sim=2000, num_steps=1000):
correctness = np.zeros(num_steps)
optimal_indx = np.argmax(mean_arms)
for current_sim in xrange(num_sim):
alg = algorithm(arg, len(mean_arms))
for t in xrange(num_steps):
arm_indx = alg.choose_arm()
reward = np.random.normal(mean_arms[arm_indx], std)
alg.update_rewards(reward, arm_indx)
# Measuring correctness
if optimal_indx == arm_indx:
correctness[t] += 1
return np.divide(correctness, num_sim) * 100
def line_plot(data_arrays, xlabel, ylabel, labels, title, f):
plt.suptitle(title, fontsize=14)
plots = []
for data in data_arrays:
plot, = plt.plot(data)
plots.append(plot)
plt.legend(plots, labels, loc=2)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.savefig(f, format="png")
plt.clf()
if __name__ == '__main__':
mean_arms = [-2,-1,0,1,2]
data_arrays = []
data_arrays.append(simulator(epsilon_greedy, 0.1, mean_arms))
data_arrays.append(simulator(epsilon_greedy, 0.01, mean_arms))
data_arrays.append(simulator(softmax, 1 ,mean_arms))
data_arrays.append(simulator(softmax, 10 ,mean_arms))
line_plot(data_arrays, 'Steps', 'Average percent optimal action', ['Epsilon greedy: 0.1', 'Epsilon greedy: 0.01','Softmax: 1', 'Softmax: 10'], 'N-Armed Bandit', open('q3-2.png','wb'))