Skip to content

Commit

Permalink
adding tworr with PID and SAC environment, dev, openai#1
Browse files Browse the repository at this point in the history
  • Loading branch information
mahdinobar committed Mar 22, 2024
1 parent 46fe545 commit 42e0b5e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 8 deletions.
26 changes: 19 additions & 7 deletions spinup/backup_gym/gym/envs/classic_control/tworr.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def two_link_forward_kinematics(self,q):
"""
x = self.LINK_LENGTH_1 * np.cos(q[0]) + self.LINK_LENGTH_2 * np.cos(q[0] + q[1])
y = self.LINK_LENGTH_1 * np.sin(q[0]) + self.LINK_LENGTH_2 * np.sin(q[0] + q[1])
return x, y
return [x, y]

def two_link_inverse_kinematics(self,x, y):
"""Compute two inverse kinematics solutions for a target end position. The
Expand Down Expand Up @@ -151,6 +151,7 @@ def f(t, s):
sol = solve_ivp(lambda t, s: f(t, s),
[tspan[0], tspan[-1]], s_init, t_eval=tspan, rtol=1e-5)
return sol.y[:,1]

def two_link_jacobian(self, q, ld=0.1):
J=np.array([[-self.LINK_LENGTH_1*np.sin(q[0])-self.LINK_LENGTH_2*np.sin(q[0]+q[1]), -self.LINK_LENGTH_2*np.sin(q[0]+q[1])],
[self.LINK_LENGTH_1*np.cos(q[0])+self.LINK_LENGTH_2*np.cos(q[0]+q[1]), +self.LINK_LENGTH_2*np.cos(q[0]+q[1])]])
Expand All @@ -169,7 +170,19 @@ def pseudoInverseMat(A, ld):
# Compute the right pseudoinverse.
pinvA = np.linalg.lstsq((np.matmul(A, A.T) + ld * ld * np.eye(m, m)).T, A)[0].T
return pinvA
return pseudoInverseMat(J,ld)
return pseudoInverseMat(J,ld), J

def two_link_inverse_kinematics_joint_speeds(dxdt, dydt, q1, q2):
"""
given joint positions and end effector cartesian speed, using kinematics, returns joint speeds
"""
alpha = self.LINK_LENGTH_1 * np.sin(q1) + self.LINK_LENGTH_2 * np.sin(q1 + q2)
beta = self.LINK_LENGTH_2 * np.cos(q1 + q2)
gama = -self.LINK_LENGTH_1 * np.cos(q1) - self.LINK_LENGTH_2 * np.cos(q1 + q2)
dq1dt = (1 / alpha) * (-dxdt - self.LINK_LENGTH_2 * dydt / beta * np.sin(q1 + q2)) / (
1 + self.LINK_LENGTH_2 * gama * np.sin(q1 + q2) / (alpha * beta))
dq2dt = (dydt + gama * dq1dt) / beta
return np.array([dq1dt, dq2dt])

def q_command(r_ee, v_ee, Jpinv, rd, vd, e, dt):
"""
Expand Down Expand Up @@ -203,11 +216,10 @@ def reset(self):
rd_minus1 = np.array([xd[0], yd[0]])
v_minus1 = np.array([vxd, vyd])
ree_minus1 = rd_minus1
q_hat_soln1, q_hat_soln2 = two_link_inverse_kinematics(rd_minus1[0], rd_minus1[1], l1, l2)
q_hat_soln1, q_hat_soln2 = self.two_link_inverse_kinematics(rd_minus1[0], rd_minus1[1])
q_minus1 = q_hat_soln1
e_minus1 = rd_minus1 - ree_minus1
dq_t_minus1 = two_link_inverse_kinematics_joint_speeds(v_minus1[0], v_minus1[1], q_minus1[0], q_minus1[1],
l1=1.0, l2=1.0)
dq_t_minus1 = self.two_link_inverse_kinematics_joint_speeds(v_minus1[0], v_minus1[1], q_minus1[0], q_minus1[1])
# starting from t=-1
self.e = e_minus1.reshape(1, 2)
self.q = q_minus1.reshape(1, 2)
Expand All @@ -224,7 +236,7 @@ def reset(self):
# at time t=0
q_t=self.q[-1,:]
dq_t = self.dq[-1, :]
r_hat_t = self.two_link_forward_kinematics(q_t, l1, l2)
r_hat_t = self.two_link_forward_kinematics(q_t)
Jpinv_t, J_t = two_link_jacobian(q_t, ld=0.01)
v_hat_t = (r_hat_t - self.r_hat[-1, :]) / self.dt
self.r_hat=np.vstack((self.r_hat,r_hat_t))
Expand Down Expand Up @@ -289,7 +301,7 @@ def step(self,a):
vd = np.array([vxd, vyd])
q_t = self.q[-1, :]
dq_t = self.dq[-1, :]
r_hat_t = self.two_link_forward_kinematics(q_t, l1, l2)
r_hat_t = self.two_link_forward_kinematics(q_t)
Jpinv_t, J_t = self.two_link_jacobian(q_t, ld=0.01)
v_hat_t = (r_hat_t - r_hat[-1, :]) / self.dt
self.r_hat = np.vstack((self.r_hat, r_hat_t))
Expand Down
2 changes: 1 addition & 1 deletion spinup/examples/pytorch/inverted_pendulum_SAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

TRAIN=False
DEBUG=True
env_fn = lambda: gym.make('Tworr-v0')
env_fn = lambda: gym.make('Tworr-v1')
exp_name = "Tworrv0_0"
if __name__ == '__main__':
if TRAIN:
Expand Down

0 comments on commit 42e0b5e

Please sign in to comment.