-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathtest_mdp.py
38 lines (32 loc) · 926 Bytes
/
test_mdp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from envs.mdp import StochasticMDPEnv
class Agent:
def __init__(self):
self.seen_6 = False
def select_move(self, state):
if state == 6:
self.seen_6 = True
if state < 6 and not self.seen_6:
return 1
else:
return 0
def update(self, state, action, reward):
pass
def main():
env = StochasticMDPEnv()
state = env.current_state
print("State: %d" % state)
agent = Agent()
action = agent.select_move(state)
state, reward, done = env.step(action)
while not done:
print("Action: %d" % action)
print("Reward: %.2f" % reward)
print("State: %d" % state)
action = agent.select_move(state)
next_state, reward, done = env.step(action)
agent.update(state, action, reward)
state = next_state
print("DONE")
print(reward)
if __name__ == "__main__":
main()