Skip to content

Commit

Permalink
dev Tworr env, openai#3
Browse files Browse the repository at this point in the history
  • Loading branch information
mahdinobar committed Mar 14, 2024
1 parent 91e6820 commit 3ee7753
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions spinup/backup_gym/gym/envs/classic_control/tworr.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,21 @@ def reset(self):
# The reset method should return a tuple of the initial observation and some auxiliary information.
# states=(xd,yd,q1,q2,dq1,dq2,qd1,qd2,dqd1,dqd2,tau1_hat,tau2_hat)
self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(12,)) #TODO correct initialization
self.state_buffer= None
self.state_buffer=self.state #initialize episode state buffer
self.t = 0
return self._get_ob(), {}

def step(self, a):
self.state_buffer=np.vstack((self.state_buffer,self.state))
s_t = self.state[-1,:] #states at t
s_tm1 = self.state[-2, :] #states at t-1
if self.t==0:
s_t = self.state_buffer[:] # states at t
s_tm1 = s_t # TODO check. states at t-1
else:
s_t = self.state_buffer[-1, :] # states at t
s_tm1 = self.state_buffer[-2, :] #states at t-1
# TOODO
# Add noise to the force action
if self.self.torque_noise_max > 0:
torque += self.np_random.uniform(-self.self.torque_noise_max, self.self.torque_noise_max)
if self.torque_noise_max > 0:
torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)
# choose a sample target desired position to test IK
xd = self.xd[self.t]
yd = self.yd[self.t]
Expand All @@ -173,6 +176,7 @@ def step(self, a):
obs=np.array([xd,yd,q_FD[0,1],q_FD[2,1],q_FD[1,1],q_FD[3,1],q_d[0],q_d[1],dqd_t[0],dqd_t[1],tau1_hat,tau2_hat])
# update states
self.state=obs
self.state_buffer=np.vstack((self.state_buffer,self.state))
# update time index
self.t += 1
# check done episode
Expand All @@ -198,7 +202,7 @@ def render(self, mode='human'):

if self.viewer is None:
self.viewer = rendering.Viewer(500,500)
bound = self.self.LINK_LENGTH_1 + self.self.LINK_LENGTH_2 + 0.2 # 2.2 for default
bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # 2.2 for default
self.viewer.set_bounds(-bound,bound,-bound,bound)

if s is None: return None
Expand All @@ -207,7 +211,7 @@ def render(self, mode='human'):

xys = np.array([[0,0], p1, p2])[:,::-1]
thetas = [s[2], s[3]] # TODO check compatible with rendering
link_lengths = [self.self.LINK_LENGTH_1, self.self.LINK_LENGTH_2]
link_lengths = [self.LINK_LENGTH_1, self.LINK_LENGTH_2]

self.viewer.draw_line((-2.2, 1), (2.2, 1))
for ((x,y),th,llen) in zip(xys, thetas, link_lengths):
Expand Down

0 comments on commit 3ee7753

Please sign in to comment.