-
Notifications
You must be signed in to change notification settings - Fork 1
/
bruteforcePolicy.py
47 lines (39 loc) · 1.49 KB
/
bruteforcePolicy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 16 12:08:05 2018
@author: Jonathan Scott, Jinyoung Lim, So Jin Oh
"""
import gym
import numpy as np
def bruteForcePolicy(env, playTime=1000):
# TODO: make this run based on time and kill it after the time is over.
observation = env.reset()
action = [0, 0, 0 , 1, 0, 0]
print("START")
for i in range(100):
env.render()
observation, reward, done, info = env.step(action)
print("Info has the distance: ",type(info['distance']))
#env.reset()
print("________observation________")
#print(observation)
marioPosY, marioPosX = np.where(observation == 3)
if marioPosX.size != 0:
#print("i: " , i ," mario location index: X==", marioPosX.item(0), " Y==", marioPosY.item(0))
marioPosX = marioPosX.item(0)
marioPosY = marioPosY.item(0)
print("i: " , i ," mario location index: X==", marioPosX, " Y==", marioPosY)
twoRight = observation[marioPosY, marioPosX+2]
print("twoRight : ", twoRight)
if observation[marioPosY, marioPosX + 2] != 0:
# [Up, L, Down, R, A(JUMP), B]
action =[0, 0, 0 , 1, 1, 0]
#env.step(action)
else:
action = [0, 0, 0, 1, 0, 0]
print("DONE")
env.close()#closes game
if __name__ == "__main__":
env = gym.make('SuperMarioBros-1-1-Tiles-v0') # remember need to make the environment each time
bruteForcePolicy(env)