-
Notifications
You must be signed in to change notification settings - Fork 1
/
ambulance_environment_uniform.py
121 lines (88 loc) · 3.29 KB
/
ambulance_environment_uniform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import numpy as np
from agents import ambulance_agent
from eNet_Agent import eNet
from eNet_Agent import eNetAmbulancce
from data_Agent import dataUpdateAgent
from src import environment
from src import experiment
import pickle
''' Defining parameters to be used in the experiment'''
epLen = 5
nEps = 2000
numIters = 50
loc = 0.8+np.pi/60
scale = 2
def arrivals():
return np.random.uniform(0,1)
alpha = 1
starting_state = 0.5
env = environment.make_ambulanceEnvMDP(epLen, arrivals, alpha, starting_state)
##### PARAMETER TUNING FOR AMBULANCE ENVIRONMENT
scaling_list = [0.01, 0.1, 0.25, 0.4, 0.5, 0.6, 0.75, 1, 1.5, 2, 5]
# scaling_list = [0.25, .01] # alpha = 0.25
# scaling_list = [0.25, .1] # alpha = 1
max_reward_adapt = 0
max_reward_e_net = 0
opt_adapt_scaling = 0.01
opt_e_net_scaling = 0.01
# TRYING OUT EACH SCALING FACTOR FOR OPTIMAL ONE
for scaling in scaling_list:
# RUNNING EXPERIMENT FOR ADAPTIVE ALGORITHM
agent_list_adap = []
for _ in range(numIters):
agent_list_adap.append(ambulance_agent.AmbulanceAgent(epLen, nEps, scaling))
dict = {'seed': 1, 'epFreq' : 1, 'targetPath': './tmp.csv', 'deBug' : False, 'nEps': nEps, 'recFreq' : 10, 'numIters' : numIters}
exp = experiment.Experiment(env, agent_list_adap, dict)
adap_fig = exp.run()
dt_adapt_data = exp.save_data()
if (dt_adapt_data.groupby(['episode']).mean().tail(1))['epReward'].iloc[0] > max_reward_adapt:
max_reward_adapt = (dt_adapt_data.groupby(['episode']).mean().tail(1))['epReward'].iloc[0]
opt_adapt_scaling = scaling
dt_adapt = dt_adapt_data
opt_adapt_agent_list = agent_list_adap
# RUNNING EXPERIMENT FOR EPSILON NET ALGORITHM
epsilon = (nEps * epLen)**(-1 / 4)
action_net = np.arange(start=0, stop=1, step=epsilon)
state_net = np.arange(start=0, stop=1, step=epsilon)
agent_list = []
for _ in range(numIters):
agent_list.append(eNet(action_net, state_net, epLen, scaling, (1,1)))
exp = experiment.Experiment(env, agent_list, dict)
exp.run()
dt_net_data = exp.save_data()
if (dt_net_data.groupby(['episode']).mean().tail(1))['epReward'].iloc[0] > max_reward_e_net:
max_reward_e_net = (dt_net_data.groupby(['episode']).mean().tail(1))['epReward'].iloc[0]
opt_e_net_scaling = scaling
dt_net = dt_net_data
print(opt_adapt_scaling)
print(opt_e_net_scaling)
# RUNNING THE MEDIAN HEURISTIC ALGORITHM
input('Heuristic Agents')
# Heuristic agent
agent_list = []
for _ in range(numIters):
agent_list.append(dataUpdateAgent(epLen, np.median, alpha))
exp = experiment.Experiment(env, agent_list, dict)
exp.run()
dt_median = exp.save_data()
# RUNNING THE NO MOVEMENT HEURISTIC ALGORITHM
# Don't Move Agent
def no_move(l):
return l[-1]
agent_list = []
for _ in range(numIters):
agent_list.append(dataUpdateAgent(epLen, no_move, alpha))
exp = experiment.Experiment(env, agent_list, dict)
exp.run()
dt_no_move = exp.save_data()
# SAVING DATA TO CSV
dt_adapt.to_csv('data/ambulance_uniform_adapt_1.csv')
dt_net.to_csv('data/ambulance_uniform_net_1.csv')
dt_median.to_csv('data/ambulance_uniform_median_1.csv')
dt_no_move.to_csv('data/ambulance_uniform_no_1.csv')
agent = opt_adapt_agent_list[-1]
filehandler = open('data/ambulance_uniform_agent_1.obj', 'wb')
pickle.dump(agent, filehandler)
X[a,b,c]
X[i, :]
X[j, :]