From 6bdb4374ebb48666d1d008e2c7d38de43877c3af Mon Sep 17 00:00:00 2001 From: mahdinobar Date: Fri, 22 Mar 2024 22:37:02 +0100 Subject: [PATCH] adding tworr with PID and SAC environment, dev, #1 --- .../gym/envs/classic_control/tworr.py | 307 ++++++++++++++---- 1 file changed, 235 insertions(+), 72 deletions(-) diff --git a/spinup/backup_gym/gym/envs/classic_control/tworr.py b/spinup/backup_gym/gym/envs/classic_control/tworr.py index 65e14aee2..fc936981d 100644 --- a/spinup/backup_gym/gym/envs/classic_control/tworr.py +++ b/spinup/backup_gym/gym/envs/classic_control/tworr.py @@ -21,32 +21,41 @@ class TworrEnv(core.Env): } def __init__(self): - self.dt = 0.01 # [s] self.LINK_LENGTH_1 = 1. # [m] self.LINK_LENGTH_2 = 1. # [m] self.LINK_MASS_1 = 1. #: [kg] mass of link 1 - self.LINK_MASS_2 = 1. #: [kg] mass of link 2 - self.LINK_COM_POS_1 = 0.5 #: [m] position of the center of mass of link 1 - self.LINK_COM_POS_2 = 0.5 #: [m] position of the center of mass of link 2 + self.LINK_MASS_2 = 1. #: [kg] imperfect mass of link 2 + self.LINK_MASS_2_TRUE = 1.05 #: [kg] TRUE mass of link 2 + self.LINK_COM_POS_1 = 0.5 #r1: [m] distance of the center of mass of link 1 wrt joint + self.LINK_COM_POS_2 = 0.5 #r2: [m] distance of the center of mass of link 2 wrt joint self.torque_noise_max = 0. # TODO - self.MAX_TIMESTEPS = 100 # maximum timesteps per episode self.viewer = None self.state = None self.state_buffer= None self.t = 0 self.seed() - self.xd_init=1.2 - self.yd_init = 1.0 - self.xd = np.linspace(self.xd_init, self.xd_init+0.1, self.MAX_TIMESTEPS, endpoint=True) - self.yd = np.linspace(self.yd_init, self.yd_init+0.8, self.MAX_TIMESTEPS, endpoint=True) - # states=(xd,yd, q1,q2, dq1,dq2, qd1,qd2, dqd1,dqd2, tau1_hat,tau2_hat) - high_s = np.array([self.xd_init+1, self.yd_init+1, 1.4, 1.4, 0.2, 0.2, 1.4, 1.4, 0.2, 0.2, 1, 1]) - low_s = np.array([self.xd_init-1, self.yd_init-1, -1.4, -1.4, -0.2, -0.2, -1.4, -1.4, -0.2, -0.2, -1, -1]) + + self.xd_init = 1.5 + self.yd_init = 0.4 + self.dt = 1 / 10 # sec + self.MAX_TIMESTEPS = 100 # maximum timesteps per episode + self.vxd = -0.01 # m/s + self.vyd = 0.05 # m/s + deltax = vxd * dt * MAX_TIMESTEPS + deltay = vyd * dt * MAX_TIMESTEPS + self.xd = np.linspace(self.xd_init, self.xd_init + deltax, MAX_TIMESTEPS + 1, endpoint=True) + self.yd = np.linspace(self.yd_init, self.yd_init + deltay, MAX_TIMESTEPS + 1, endpoint=True) + + # TODO CHECK + high_s = np.array([self.xd_init+1, self.yd_init+1, 1.4, 1.4, 0.2, 0.2, 1.4, 1.4, 0.2, 0.2, 1, 1]) + low_s = np.array([self.xd_init-1, self.yd_init-1, -1.4, -1.4, -0.2, -0.2, -1.4, -1.4, -0.2, -0.2, -1, -1]) self.observation_space = spaces.Box(low=low_s, high=high_s, dtype=np.float32) high_a = np.array([0.1, 0.1]) low_a = np.array([-0.1, -0.1]) self.action_space = spaces.Box(low=low_a, high=high_a, dtype=np.float32) + + def two_link_forward_kinematics(self,q): """Compute the forward kinematics. Returns the base-coordinate Cartesian position of End-effector for a given joint angle vector. Optional @@ -112,18 +121,18 @@ def two_link_forward_dynamics(self, tau1, tau2, s_init): :param dth: :param ddth: :param self.LINK_MASS_1: - :param self.LINK_MASS_2: + :param self.LINK_MASS_2_TRUE: :param self.LINK_LENGTH_1: :param self.LINK_LENGTH_2: - :return: th1, dth1, th2, dth2 + :return: th1, dth1, th2, dth2 (second column) """ # Define derivative function def f(t, s): Iz1 = self.LINK_MASS_1 * self.LINK_LENGTH_1 ** 2 / 12 - Iz2 = self.LINK_MASS_2 * self.LINK_LENGTH_2 ** 2 / 12 - alpha = Iz1 + Iz2 + self.LINK_MASS_1 * self.LINK_COM_POS_1 ** 2 + self.LINK_MASS_2 * (self.LINK_LENGTH_1 ** 2 + self.LINK_LENGTH_2 ** 2) - beta = self.LINK_MASS_2 * self.LINK_LENGTH_1 * self.LINK_COM_POS_2 - sigma = Iz2 + self.LINK_MASS_2 * self.LINK_COM_POS_2 ** 2 + Iz2 = self.LINK_MASS_2_TRUE * self.LINK_LENGTH_2 ** 2 / 12 + alpha = Iz1 + Iz2 + self.LINK_MASS_1 * self.LINK_COM_POS_1 ** 2 + self.LINK_MASS_2_TRUE * (self.LINK_LENGTH_1 ** 2 + self.LINK_LENGTH_2 ** 2) + beta = self.LINK_MASS_2_TRUE * self.LINK_LENGTH_1 * self.LINK_COM_POS_2 + sigma = Iz2 + self.LINK_MASS_2_TRUE * self.LINK_COM_POS_2 ** 2 d11 = alpha + 2 * beta * np.cos(s[2]) d21 = sigma + beta * np.cos(s[2]) d12 = d21 @@ -141,79 +150,233 @@ def f(t, s): # Solve differential equation sol = solve_ivp(lambda t, s: f(t, s), [tspan[0], tspan[-1]], s_init, t_eval=tspan, rtol=1e-5) - return sol.y + return sol.y[:,1] + def two_link_jacobian(self, q, ld=0.1): + J=np.array([[-self.LINK_LENGTH_1*np.sin(q[0])-self.LINK_LENGTH_2*np.sin(q[0]+q[1]), -self.LINK_LENGTH_2*np.sin(q[0]+q[1])], + [self.LINK_LENGTH_1*np.cos(q[0])+self.LINK_LENGTH_2*np.cos(q[0]+q[1]), +self.LINK_LENGTH_2*np.cos(q[0]+q[1])]]) + def pseudoInverseMat(A, ld): + # Input: Any m-by-n matrix, and a damping factor. + # Output: An n-by-m pseudo-inverse of the input according to the Moore-Penrose formula + + # Get the number of rows (m) and columns (n) of A + [m, n] = np.shape(A) + + # Compute the pseudo inverse for both left and right cases + if (m > n): + # Compute the left pseudoinverse. + pinvA = np.linalg.lstsq((A.T * A + ld * ld * np.eye(n, n)), A.T)[0] + elif (m <= n): + # Compute the right pseudoinverse. + pinvA = np.linalg.lstsq((np.matmul(A, A.T) + ld * ld * np.eye(m, m)).T, A)[0].T + return pinvA + return pseudoInverseMat(J,ld) + + def q_command(r_ee, v_ee, Jpinv, rd, vd, e, dt): + """ + PID Traj Tracking Feedback Controller + Inputs: + r_ee : current end effector position + rd : desired end effector position + vd : desired end effector velocity + Jpinv : current pseudo inverse jacobian matrix + Output: joint-space velocity command of the robot. + """ + # TODO: User defined linear position gain + K_p = 40 + K_i = 10 + K_d = 10 + # TODO: User defined pseudo-inverse damping coefficient + # ld = 0.1 + # Jpinv=two_link_jacobian(q_hat_soln1, ld) + e_t = (rd - r_ee) + e = np.vstack((e, e_t.reshape(1, 2))) + v_command = vd + K_p * e_t + K_i * np.sum(e[1:, :], 0) * dt + K_d * (vd - v_ee) + qc = np.dot(Jpinv, v_command) + return qc, e def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self): - # The reset method should return a tuple of the initial observation and some auxiliary information. - # states=(xd,yd,q1,q2,dq1,dq2,qd1,qd2,dqd1,dqd2,tau1_hat,tau2_hat) - self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(12,)) #TODO correct initialization - - xd_init_noisy= self.xd_init + np.random.normal(loc=0.0, scale=0.002, size=None) - yd_init_noisy= self.yd_init + np.random.normal(loc=0.0, scale=0.002, size=None) - q_init_noisy= self.two_link_inverse_kinematics(xd_init_noisy, yd_init_noisy)[0] + [np.random.normal(loc=0.0, scale=0.01, size=None), np.random.normal(loc=0.0, scale=0.01, size=None)] - dq_init=[0,0] - qd_init_noisy=self.two_link_inverse_kinematics(xd_init_noisy, yd_init_noisy)[0] - dqd_init=[0,0] - ddqd_init = [0,0] - tau1_hat_init_noisy, tau2_hat_init_noisy = self.two_link_inverse_dynamics(qd_init_noisy, dqd_init, ddqd_init) - # reset to initial states - self.state = [xd_init_noisy, - yd_init_noisy, - q_init_noisy[0], - q_init_noisy[1], - dq_init[0], - dq_init[1], - qd_init_noisy[0], - qd_init_noisy[1], - dqd_init[0], - dqd_init[1], - tau1_hat_init_noisy, - tau2_hat_init_noisy] - self.state_buffer=self.state #initialize episode state buffer + # initialize at t=-1 + rd_minus1 = np.array([xd[0], yd[0]]) + v_minus1 = np.array([vxd, vyd]) + ree_minus1 = rd_minus1 + q_hat_soln1, q_hat_soln2 = two_link_inverse_kinematics(rd_minus1[0], rd_minus1[1], l1, l2) + q_minus1 = q_hat_soln1 + e_minus1 = rd_minus1 - ree_minus1 + dq_t_minus1 = two_link_inverse_kinematics_joint_speeds(v_minus1[0], v_minus1[1], q_minus1[0], q_minus1[1], + l1=1.0, l2=1.0) + # starting from t=-1 + self.e = e_minus1.reshape(1, 2) + self.q = q_minus1.reshape(1, 2) + self.dq = dq_t_minus1.reshape(1, 2) + self.qc = np.zeros(2).reshape(1, 2) # attention on presumptions on qc + self.dqc = np.zeros(2).reshape(1, 2) + self.ddqc = np.zeros(2).reshape(1, 2) + self.r_hat = rd_minus1.reshape(1, 2) # attention: trivial assumption(?) + self.t = 0 + + rd_t = np.array([xd[self.t + 1], yd[self.t + 1]]) # attention: index desired starts from t=-1 + vd = np.array([vxd, vyd]) + # at time t=0 + q_t=self.q[-1,:] + dq_t = self.dq[-1, :] + r_hat_t = self.two_link_forward_kinematics(q_t, l1, l2) + Jpinv_t, J_t = two_link_jacobian(q_t, ld=0.01) + v_hat_t = (r_hat_t - self.r_hat[-1, :]) / self.dt + self.r_hat=np.vstack((self.r_hat,r_hat_t)) + dqc_t, e=self.q_command(r_ee=r_hat_t, v_ee=v_hat_t, Jpinv=Jpinv_t, rd=rd_t, vd=vd, e=e, dt=dt) + qc_t = dqc_t*self.dt+self.q[-2,:] #TODO is this observer(taking q(t-1) for integration) sufficient? + ddqc_t = (dqc_t-self.dq[-2,:])/self.dt + + self.qc = np.vstack((self.qc, qc_t)) + self.dqc = np.vstack((self.dqc, dqc_t)) + self.ddqc = np.vstack((self.ddqc, ddqc_t)) + + tau1_hat, tau2_hat = self.two_link_inverse_dynamics(q_t, dqc_t, ddqc_t) + + self.q = np.vstack((q, q_t)) + self.dq = np.vstack((dq, dq_t)) + + self.state = [r_hat_t[0]-rd_t[0], + r_hat_t[1]-rd_t[1], + q_t[0], + q_t[1], + dq_t[0], + dq_t[1], + tau1_hat, + tau2_hat] + self.state_buffer = self.state + + # + # + # + # + # # The reset method should return a tuple of the initial observation and some auxiliary information. + # # states=(xd,yd,q1,q2,dq1,dq2,qd1,qd2,dqd1,dqd2,tau1_hat,tau2_hat) + # self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(12,)) #TODO correct initialization + # xd_init_noisy= self.xd_init + np.random.normal(loc=0.0, scale=0.002, size=None) + # yd_init_noisy= self.yd_init + np.random.normal(loc=0.0, scale=0.002, size=None) + # q_init_noisy= self.two_link_inverse_kinematics(xd_init_noisy, yd_init_noisy)[0] + [np.random.normal(loc=0.0, scale=0.01, size=None), np.random.normal(loc=0.0, scale=0.01, size=None)] + # dq_init=[0,0] + # qd_init_noisy=self.two_link_inverse_kinematics(xd_init_noisy, yd_init_noisy)[0] + # dqd_init=[0,0] + # ddqd_init = [0,0] + # tau1_hat_init_noisy, tau2_hat_init_noisy = self.two_link_inverse_dynamics(qd_init_noisy, dqd_init, ddqd_init) + # # reset to initial states + # self.state = [xd_init_noisy, + # yd_init_noisy, + # q_init_noisy[0], + # q_init_noisy[1], + # dq_init[0], + # dq_init[1], + # qd_init_noisy[0], + # qd_init_noisy[1], + # dqd_init[0], + # dqd_init[1], + # tau1_hat_init_noisy, + # tau2_hat_init_noisy] + # self.state_buffer=self.state #initialize episode state buffer return self._get_ob() def step(self,a): - if self.t==0: - s_t = self.state_buffer[:] # states at t - s_tm1 = s_t # TODO check. states at t-1 - else: - s_t = self.state_buffer[-1, :] # states at t - s_tm1 = self.state_buffer[-2, :] #states at t-1 - # TOODO - # Add noise to the force action - if self.torque_noise_max > 0: - torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max) - # choose a sample target desired position to test IK - xd = self.xd[self.t] - yd = self.yd[self.t] - qd = self.two_link_inverse_kinematics(xd, yd)[0] #attention: only return and use first solution of IK - # TODO correct for observation: - dqd_t = np.array([(qd[0]-s_tm1[6])/self.dt, (qd[1]-s_tm1[7])/self.dt]) - ddqd_t = np.array([(dqd_t[0]-s_tm1[8])/self.dt, (dqd_t[1]-s_tm1[9])/self.dt]) - tau1_hat, tau2_hat = self.two_link_inverse_dynamics(qd, dqd_t, ddqd_t) - s_init = [s_t[2], s_t[4], s_t[3], s_t[5]] #TODO check concept - q_FD = self.two_link_forward_dynamics(tau1_hat+a[0], tau2_hat+a[1], s_init) + # update time index + self.t += 1 #Attention doublecheck + rd_t = np.array([xd[self.t + 1], yd[self.t + 1]]) # attention: index desired starts from t=-1 + vd = np.array([vxd, vyd]) + q_t = self.q[-1, :] + dq_t = self.dq[-1, :] + r_hat_t = self.two_link_forward_kinematics(q_t, l1, l2) + Jpinv_t, J_t = self.two_link_jacobian(q_t, ld=0.01) + v_hat_t = (r_hat_t - r_hat[-1, :]) / self.dt + self.r_hat = np.vstack((self.r_hat, r_hat_t)) + dqc_t, e = self.q_command(r_ee=r_hat_t, v_ee=v_hat_t, Jpinv=Jpinv_t, rd=rd_t, vd=vd, e=e, dt=dt) + qc_t = dqc_t * dt + self.q[-2, :] # TODO is this observer(taking q(t-1) for integration) sufficient? + ddqc_t = (dqc_t - self.dq[-2, :]) / self.dt + + self.qc = np.vstack((self.qc, qc_t)) + self.dqc = np.vstack((self.dqc, dqc_t)) + self.ddqc = np.vstack((self.ddqc, ddqc_t)) + + tau1_hat, tau2_hat = self.two_link_inverse_dynamics(q_t, dqc_t, ddqc_t) + + # s_init=[th1_init,dth1_init,th2_init,dth2_init] + s_init = np.array([q_t[0], dq_t[0], q_t[1], dq_t[1]]) + t = time.time() + # TODO HERE WHY TAKES LONG?? + q_FD = self.two_link_forward_dynamics(tau1_hat+a[0], tau2_hat+a[1], + s_init) # attention: forward dynamics robot has correct m2 value + print("FD elapsed time=", time.time() - t) + self.q = np.vstack((self.q, np.array([q_FD[0], q_FD[2]]))) + self.dq = np.vstack((self.dq, np.array([q_FD[1], q_FD[3]]))) + # collect observations - obs=np.array([xd,yd,q_FD[0,1],q_FD[2,1],q_FD[1,1],q_FD[3,1],qd[0],qd[1],dqd_t[0],dqd_t[1],tau1_hat,tau2_hat]) #TODO + obs = np.array([r_hat_t[0]-rd_t[0], + r_hat_t[1]-rd_t[1], + q_t[0], + q_t[1], + dq_t[0], + dq_t[1], + tau1_hat, + tau2_hat]) # update states - self.state=obs - self.state_buffer=np.vstack((self.state_buffer,self.state)) - # update time index - self.t += 1 + self.state = obs + self.state_buffer = np.vstack((self.state_buffer, self.state)) # check done episode terminal = self._terminal() # calculate reward - x_FK, y_FK = self.two_link_forward_kinematics(np.array([q_FD[0,1],q_FD[2,1]])) - reward = 1. if np.sqrt((x_FK-xd)**2+(y_FK-yd)**2)<0.001 else 0. + x_FK, y_FK = self.two_link_forward_kinematics(np.array([q_FD[0], q_FD[2]])) + reward = 1. if np.sqrt((x_FK - xd) ** 2 + (y_FK - yd) ** 2) < 0.01 else 0. # given action it returns 4-tuple (observation, reward, done, info) return (self._get_ob(), reward, terminal, {}) + + + # + # + # if self.t==0: + # s_t = self.state_buffer[:] # states at t + # s_tm1 = s_t # TODO check. states at t-1 + # else: + # s_t = self.state_buffer[-1, :] # states at t + # s_tm1 = self.state_buffer[-2, :] #states at t-1 + # # TOODO + # # Add noise to the force action + # if self.torque_noise_max > 0: + # torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max) + # # choose a sample target desired position to test IK + # xd = self.xd[self.t] + # yd = self.yd[self.t] + # qd = self.two_link_inverse_kinematics(xd, yd)[0] #attention: only return and use first solution of IK + # + # self.two_link_jacobian(self.q) + # # TODO correct for observation: + # dqd_t = np.array([(qd[0]-s_tm1[6])/self.dt, (qd[1]-s_tm1[7])/self.dt]) + # ddqd_t = np.array([(dqd_t[0]-s_tm1[8])/self.dt, (dqd_t[1]-s_tm1[9])/self.dt]) + # tau1_hat, tau2_hat = self.two_link_inverse_dynamics(qd, dqd_t, ddqd_t) + # s_init = [s_t[2], s_t[4], s_t[3], s_t[5]] #TODO check concept + # q_FD = self.two_link_forward_dynamics(tau1_hat+a[0], tau2_hat+a[1], s_init) + # + # # collect observations + # obs=np.array([xd,yd,q_FD[0,1],q_FD[2,1],q_FD[1,1],q_FD[3,1],qd[0],qd[1],dqd_t[0],dqd_t[1],tau1_hat,tau2_hat]) #TODO + # # update states + # self.state=obs + # self.state_buffer=np.vstack((self.state_buffer,self.state)) + # # update time index + # self.t += 1 + # # check done episode + # terminal = self._terminal() + # + # # calculate reward + # x_FK, y_FK = self.two_link_forward_kinematics(np.array([q_FD[0,1],q_FD[2,1]])) + # reward = 1. if np.sqrt((x_FK-xd)**2+(y_FK-yd)**2)<0.001 else 0. + # # given action it returns 4-tuple (observation, reward, done, info) + # return (self._get_ob(), reward, terminal, {}) + def _get_ob(self): #TODO is state=observation a reasonable assumption? s = self.state return s