From 42e0b5ebad32ad39e96aad4b138715a72cdccb8f Mon Sep 17 00:00:00 2001 From: mahdinobar Date: Fri, 22 Mar 2024 22:46:32 +0100 Subject: [PATCH] adding tworr with PID and SAC environment, dev, #1 --- .../gym/envs/classic_control/tworr.py | 26 ++++++++++++++----- .../examples/pytorch/inverted_pendulum_SAC.py | 2 +- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/spinup/backup_gym/gym/envs/classic_control/tworr.py b/spinup/backup_gym/gym/envs/classic_control/tworr.py index 16e77f8b0..0e040cc61 100644 --- a/spinup/backup_gym/gym/envs/classic_control/tworr.py +++ b/spinup/backup_gym/gym/envs/classic_control/tworr.py @@ -69,7 +69,7 @@ def two_link_forward_kinematics(self,q): """ x = self.LINK_LENGTH_1 * np.cos(q[0]) + self.LINK_LENGTH_2 * np.cos(q[0] + q[1]) y = self.LINK_LENGTH_1 * np.sin(q[0]) + self.LINK_LENGTH_2 * np.sin(q[0] + q[1]) - return x, y + return [x, y] def two_link_inverse_kinematics(self,x, y): """Compute two inverse kinematics solutions for a target end position. The @@ -151,6 +151,7 @@ def f(t, s): sol = solve_ivp(lambda t, s: f(t, s), [tspan[0], tspan[-1]], s_init, t_eval=tspan, rtol=1e-5) return sol.y[:,1] + def two_link_jacobian(self, q, ld=0.1): J=np.array([[-self.LINK_LENGTH_1*np.sin(q[0])-self.LINK_LENGTH_2*np.sin(q[0]+q[1]), -self.LINK_LENGTH_2*np.sin(q[0]+q[1])], [self.LINK_LENGTH_1*np.cos(q[0])+self.LINK_LENGTH_2*np.cos(q[0]+q[1]), +self.LINK_LENGTH_2*np.cos(q[0]+q[1])]]) @@ -169,7 +170,19 @@ def pseudoInverseMat(A, ld): # Compute the right pseudoinverse. pinvA = np.linalg.lstsq((np.matmul(A, A.T) + ld * ld * np.eye(m, m)).T, A)[0].T return pinvA - return pseudoInverseMat(J,ld) + return pseudoInverseMat(J,ld), J + + def two_link_inverse_kinematics_joint_speeds(dxdt, dydt, q1, q2): + """ + given joint positions and end effector cartesian speed, using kinematics, returns joint speeds + """ + alpha = self.LINK_LENGTH_1 * np.sin(q1) + self.LINK_LENGTH_2 * np.sin(q1 + q2) + beta = self.LINK_LENGTH_2 * np.cos(q1 + q2) + gama = -self.LINK_LENGTH_1 * np.cos(q1) - self.LINK_LENGTH_2 * np.cos(q1 + q2) + dq1dt = (1 / alpha) * (-dxdt - self.LINK_LENGTH_2 * dydt / beta * np.sin(q1 + q2)) / ( + 1 + self.LINK_LENGTH_2 * gama * np.sin(q1 + q2) / (alpha * beta)) + dq2dt = (dydt + gama * dq1dt) / beta + return np.array([dq1dt, dq2dt]) def q_command(r_ee, v_ee, Jpinv, rd, vd, e, dt): """ @@ -203,11 +216,10 @@ def reset(self): rd_minus1 = np.array([xd[0], yd[0]]) v_minus1 = np.array([vxd, vyd]) ree_minus1 = rd_minus1 - q_hat_soln1, q_hat_soln2 = two_link_inverse_kinematics(rd_minus1[0], rd_minus1[1], l1, l2) + q_hat_soln1, q_hat_soln2 = self.two_link_inverse_kinematics(rd_minus1[0], rd_minus1[1]) q_minus1 = q_hat_soln1 e_minus1 = rd_minus1 - ree_minus1 - dq_t_minus1 = two_link_inverse_kinematics_joint_speeds(v_minus1[0], v_minus1[1], q_minus1[0], q_minus1[1], - l1=1.0, l2=1.0) + dq_t_minus1 = self.two_link_inverse_kinematics_joint_speeds(v_minus1[0], v_minus1[1], q_minus1[0], q_minus1[1]) # starting from t=-1 self.e = e_minus1.reshape(1, 2) self.q = q_minus1.reshape(1, 2) @@ -224,7 +236,7 @@ def reset(self): # at time t=0 q_t=self.q[-1,:] dq_t = self.dq[-1, :] - r_hat_t = self.two_link_forward_kinematics(q_t, l1, l2) + r_hat_t = self.two_link_forward_kinematics(q_t) Jpinv_t, J_t = two_link_jacobian(q_t, ld=0.01) v_hat_t = (r_hat_t - self.r_hat[-1, :]) / self.dt self.r_hat=np.vstack((self.r_hat,r_hat_t)) @@ -289,7 +301,7 @@ def step(self,a): vd = np.array([vxd, vyd]) q_t = self.q[-1, :] dq_t = self.dq[-1, :] - r_hat_t = self.two_link_forward_kinematics(q_t, l1, l2) + r_hat_t = self.two_link_forward_kinematics(q_t) Jpinv_t, J_t = self.two_link_jacobian(q_t, ld=0.01) v_hat_t = (r_hat_t - r_hat[-1, :]) / self.dt self.r_hat = np.vstack((self.r_hat, r_hat_t)) diff --git a/spinup/examples/pytorch/inverted_pendulum_SAC.py b/spinup/examples/pytorch/inverted_pendulum_SAC.py index 954515606..1abeeeebd 100644 --- a/spinup/examples/pytorch/inverted_pendulum_SAC.py +++ b/spinup/examples/pytorch/inverted_pendulum_SAC.py @@ -11,7 +11,7 @@ TRAIN=False DEBUG=True -env_fn = lambda: gym.make('Tworr-v0') +env_fn = lambda: gym.make('Tworr-v1') exp_name = "Tworrv0_0" if __name__ == '__main__': if TRAIN: