From 886ce69cb4b0d731e8cbba455878f11b1b89f6e4 Mon Sep 17 00:00:00 2001 From: mahdinobar Date: Thu, 14 Mar 2024 12:08:20 +0100 Subject: [PATCH] dev Tworr env, #3 --- .../gym/envs/classic_control/tworr.py | 91 ++++++++++--------- .../examples/pytorch/inverted_pendulum_SAC.py | 14 +-- 2 files changed, 53 insertions(+), 52 deletions(-) diff --git a/spinup/backup_gym/gym/envs/classic_control/tworr.py b/spinup/backup_gym/gym/envs/classic_control/tworr.py index a84b40156..c0865fcd3 100644 --- a/spinup/backup_gym/gym/envs/classic_control/tworr.py +++ b/spinup/backup_gym/gym/envs/classic_control/tworr.py @@ -9,8 +9,8 @@ __credits__ = ["Mahdi Nobar"] __author__ = "Mahdi Nobar ETH Zurich " + class TworrEnv(core.Env): - # TODO """ Two-link planar arm with two revolut joints (based on simplified models at book "A Mathematical Introduction to Robotic Manipulation" by Murry et al. @@ -19,17 +19,18 @@ class TworrEnv(core.Env): 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second' : 15 } - dt = 0.01 # [s] - LINK_LENGTH_1 = 1. # [m] - LINK_LENGTH_2 = 1. # [m] - LINK_MASS_1 = 1. #: [kg] mass of link 1 - LINK_MASS_2 = 1. #: [kg] mass of link 2 - LINK_COM_POS_1 = 0.5 #: [m] position of the center of mass of link 1 - LINK_COM_POS_2 = 0.5 #: [m] position of the center of mass of link 2 - torque_noise_max = 0. #TODO - MAX_TIMESTEPS=100 #maximum timesteps per episode def __init__(self): + self.dt = 0.01 # [s] + self.LINK_LENGTH_1 = 1. # [m] + self.LINK_LENGTH_2 = 1. # [m] + self.LINK_MASS_1 = 1. #: [kg] mass of link 1 + self.LINK_MASS_2 = 1. #: [kg] mass of link 2 + self.LINK_COM_POS_1 = 0.5 #: [m] position of the center of mass of link 1 + self.LINK_COM_POS_2 = 0.5 #: [m] position of the center of mass of link 2 + self.torque_noise_max = 0. # TODO + self.MAX_TIMESTEPS = 100 # maximum timesteps per episode + self.viewer = None # states=[xt, yt, th1, th2, dth1, dth2, th1_hat, th2_hat, dth1_hat, dth2_hat] high_s = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]) @@ -42,22 +43,22 @@ def __init__(self): self.state_buffer= None self.t = 0 self.seed() - self.xd = np.linspace(1.2,1.3, MAX_TIMESTEPS, endpoint=True) - self.yd = np.linspace(1, 1.8, MAX_TIMESTEPS, endpoint=True) + self.xd = np.linspace(1.2,1.3, self.MAX_TIMESTEPS, endpoint=True) + self.yd = np.linspace(1, 1.8, self.MAX_TIMESTEPS, endpoint=True) def two_link_forward_kinematics(self,q): """Compute the forward kinematics. Returns the base-coordinate Cartesian position of End-effector for a given joint angle vector. Optional - parameters LINK_LENGTH_1 and LINK_LENGTH_2 are the link lengths. The base is assumed to be at the + parameters self.LINK_LENGTH_1 and self.LINK_LENGTH_2 are the link lengths. The base is assumed to be at the origin. :param q: two-element list or ndarray with [q1, q2] joint angles - :param LINK_LENGTH_1: length of link 1 - :param LINK_LENGTH_2: length of link 2 + :param self.LINK_LENGTH_1: length of link 1 + :param self.LINK_LENGTH_2: length of link 2 :return: two-element ndarrays with [x,y] locations of End-Effector """ - x = LINK_LENGTH_1 * np.cos(q[0]) + LINK_LENGTH_2 * np.cos(q[0] + q[1]) - y = LINK_LENGTH_1 * np.sin(q[0]) + LINK_LENGTH_2 * np.sin(q[0] + q[1]) + x = self.LINK_LENGTH_1 * np.cos(q[0]) + self.LINK_LENGTH_2 * np.cos(q[0] + q[1]) + y = self.LINK_LENGTH_1 * np.sin(q[0]) + self.LINK_LENGTH_2 * np.sin(q[0] + q[1]) return x, y def two_link_inverse_kinematics(self,x, y): @@ -67,16 +68,16 @@ def two_link_inverse_kinematics(self,x, y): If the target is out of reach, returns the closest pose. :param target: two-element list or ndarray with [x1, y] target position - :param LINK_LENGTH_1: optional proximal link length - :param LINK_LENGTH_2: optional distal link length + :param self.LINK_LENGTH_1: optional proximal link length + :param self.LINK_LENGTH_2: optional distal link length :return: tuple (solution1, solution2) of two-element ndarrays with q1, q2 angles """ # find the position of the point in polar coordinates r = np.sqrt(x ** 2 + y ** 2) # phi is the angle of target point w.r.t. -Y axis, same origin as arm phi = np.arctan2(y, x) - alpha = np.arccos((LINK_LENGTH_1 ** 2 + LINK_LENGTH_2 ** 2 - r ** 2) / (2 * LINK_LENGTH_1 * LINK_LENGTH_2)) - beta = np.arccos((r ** 2 + LINK_LENGTH_1 ** 2 - LINK_LENGTH_2 ** 2) / (2 * LINK_LENGTH_1 * r)) + alpha = np.arccos((self.LINK_LENGTH_1 ** 2 + self.LINK_LENGTH_2 ** 2 - r ** 2) / (2 * self.LINK_LENGTH_1 * self.LINK_LENGTH_2)) + beta = np.arccos((r ** 2 + self.LINK_LENGTH_1 ** 2 - self.LINK_LENGTH_2 ** 2) / (2 * self.LINK_LENGTH_1 * r)) soln1 = np.array((phi - beta, np.pi - alpha)) soln2 = np.array((phi + beta, np.pi + alpha)) return soln1, soln2 @@ -88,35 +89,35 @@ def two_link_inverse_dynamics(self, th, dth, ddth): :param ddth: :return: tau1, tau2 """ - Iz1 = LINK_MASS_1 * LINK_LENGTH_1 ** 2 / 12 - Iz2 = LINK_MASS_2 * LINK_LENGTH_2 ** 2 / 12 - alpha = Iz1 + Iz2 + LINK_MASS_1 * LINK_COM_POS_1 ** 2 + LINK_MASS_2 * (LINK_LENGTH_1 ** 2 + LINK_LENGTH_2 ** 2) - beta = LINK_MASS_2 * LINK_LENGTH_1 * LINK_COM_POS_2 - sigma = Iz2 + LINK_MASS_2 * LINK_COM_POS_2 ** 2 + Iz1 = self.LINK_MASS_1 * self.LINK_LENGTH_1 ** 2 / 12 + Iz2 = self.LINK_MASS_2 * self.LINK_LENGTH_2 ** 2 / 12 + alpha = Iz1 + Iz2 + self.LINK_MASS_1 * self.LINK_COM_POS_1 ** 2 + self.LINK_MASS_2 * (self.LINK_LENGTH_1 ** 2 + self.LINK_LENGTH_2 ** 2) + beta = self.LINK_MASS_2 * self.LINK_LENGTH_1 * self.LINK_COM_POS_2 + sigma = Iz2 + self.LINK_MASS_2 * self.LINK_COM_POS_2 ** 2 tau1 = (alpha + 2 * beta * np.cos(th[1])) * ddth[0] + (sigma + beta * np.cos(th[1])) * ddth[1] + ( -beta * np.sin(th[1]) * dth[1]) * dth[1] + (-beta * np.sin(th[1]) * (dth[0] + dth[1]) * dth[1]) tau2 = (sigma + beta * np.cos(th[1])) * ddth[0] + sigma * ddth[1] + (beta * np.sin(th[1]) * dth[0] * dth[0]) return tau1, tau2 - def two_link_forward_dynamics(self, tau1, tau2, s_init, LINK_MASS_1=1, LINK_MASS_2=1, LINK_LENGTH_1=1, LINK_LENGTH_2=1): + def two_link_forward_dynamics(self, tau1, tau2, s_init, self.LINK_MASS_1=1, self.LINK_MASS_2=1, self.LINK_LENGTH_1=1, self.LINK_LENGTH_2=1): """Compute two inverse dynamics solutions for a target end position. :param s_init: [th1_init,dth1_init,th2_init,dth2_init] :param th: :param dth: :param ddth: - :param LINK_MASS_1: - :param LINK_MASS_2: - :param LINK_LENGTH_1: - :param LINK_LENGTH_2: + :param self.LINK_MASS_1: + :param self.LINK_MASS_2: + :param self.LINK_LENGTH_1: + :param self.LINK_LENGTH_2: :return: th1, dth1, th2, dth2 """ # Define derivative function def f(t, s): - Iz1 = LINK_MASS_1 * LINK_LENGTH_1 ** 2 / 12 - Iz2 = LINK_MASS_2 * LINK_LENGTH_2 ** 2 / 12 - alpha = Iz1 + Iz2 + LINK_MASS_1 * LINK_COM_POS_1 ** 2 + LINK_MASS_2 * (LINK_LENGTH_1 ** 2 + LINK_LENGTH_2 ** 2) - beta = LINK_MASS_2 * LINK_LENGTH_1 * LINK_COM_POS_2 - sigma = Iz2 + LINK_MASS_2 * LINK_COM_POS_2 ** 2 + Iz1 = self.LINK_MASS_1 * self.LINK_LENGTH_1 ** 2 / 12 + Iz2 = self.LINK_MASS_2 * self.LINK_LENGTH_2 ** 2 / 12 + alpha = Iz1 + Iz2 + self.LINK_MASS_1 * self.LINK_COM_POS_1 ** 2 + self.LINK_MASS_2 * (self.LINK_LENGTH_1 ** 2 + self.LINK_LENGTH_2 ** 2) + beta = self.LINK_MASS_2 * self.LINK_LENGTH_1 * self.LINK_COM_POS_2 + sigma = Iz2 + self.LINK_MASS_2 * self.LINK_COM_POS_2 ** 2 d11 = alpha + 2 * beta * np.cos(s[2]) d21 = sigma + beta * np.cos(s[2]) d12 = d21 @@ -156,15 +157,15 @@ def step(self, a): s_tm1 = self.state[-2, :] #states at t-1 # TOODO # Add noise to the force action - if self.torque_noise_max > 0: - torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max) + if self.self.torque_noise_max > 0: + torque += self.np_random.uniform(-self.self.torque_noise_max, self.self.torque_noise_max) # choose a sample target desired position to test IK xd = self.xd[self.t] yd = self.yd[self.t] q_d = self.two_link_inverse_kinematics(xd, yd)[0] #attention: only return and use first solution of IK # TODO correct for observation: - dqd_t = np.array([(q_d[0]-s_tm1[6])/dt, (q_d[1]-s_tm1[7])/dt]) - ddqd_t = np.array([(dqd_t[0]-s_tm1[8])/dt, (dqd_t[1]-s_tm1[9])/dt]) + dqd_t = np.array([(q_d[0]-s_tm1[6])/self.dt, (q_d[1]-s_tm1[7])/self.dt]) + ddqd_t = np.array([(dqd_t[0]-s_tm1[8])/self.dt, (dqd_t[1]-s_tm1[9])/self.dt]) tau1_hat, tau2_hat = two_link_inverse_dynamics(q_d, dqd_t, ddqd_t) s_init = [s[2], s[4], s[3], s[5]] q_FD = self.two_link_forward_dynamics(tau1_hat+a[0], tau2_hat+a[1], s_init) @@ -188,7 +189,7 @@ def _get_ob(self): #TODO is state=observation a reasonable assumption? return s def _terminal(self): - return bool(self.t >= MAX_TIMESTEPS) + return bool(self.t >= self.MAX_TIMESTEPS) def render(self, mode='human'): from gym.envs.classic_control import rendering @@ -197,16 +198,16 @@ def render(self, mode='human'): if self.viewer is None: self.viewer = rendering.Viewer(500,500) - bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # 2.2 for default + bound = self.self.LINK_LENGTH_1 + self.self.LINK_LENGTH_2 + 0.2 # 2.2 for default self.viewer.set_bounds(-bound,bound,-bound,bound) if s is None: return None - p1 = [LINK_LENGTH_1 * np.cos(s[2]), LINK_LENGTH_1 * np.sin(s[2])] - p2 = [p1[0] + LINK_LENGTH_2 * np.cos(s[2] + s[3]), p1[1] + LINK_LENGTH_2 * np.sin(s[2] + q[3])] + p1 = [self.LINK_LENGTH_1 * np.cos(s[2]), self.LINK_LENGTH_1 * np.sin(s[2])] + p2 = [p1[0] + self.LINK_LENGTH_2 * np.cos(s[2] + s[3]), p1[1] + self.LINK_LENGTH_2 * np.sin(s[2] + q[3])] xys = np.array([[0,0], p1, p2])[:,::-1] thetas = [s[2], s[3]] # TODO check compatible with rendering - link_lengths = [self.LINK_LENGTH_1, self.LINK_LENGTH_2] + link_lengths = [self.self.LINK_LENGTH_1, self.self.LINK_LENGTH_2] self.viewer.draw_line((-2.2, 1), (2.2, 1)) for ((x,y),th,llen) in zip(xys, thetas, link_lengths): diff --git a/spinup/examples/pytorch/inverted_pendulum_SAC.py b/spinup/examples/pytorch/inverted_pendulum_SAC.py index 52e429791..13f2dd254 100644 --- a/spinup/examples/pytorch/inverted_pendulum_SAC.py +++ b/spinup/examples/pytorch/inverted_pendulum_SAC.py @@ -9,23 +9,23 @@ from spinup.utils import plot import os -TRAIN=False -DEBUG=True -env_fn = lambda: gym.make('Pendulum-v0') +TRAIN=True +DEBUG=False +env_fn = lambda: gym.make('Tworr-v0') +exp_name = "Tworrv0_0" if __name__ == '__main__': if TRAIN: # train - exp_name="logs" - output_dir='/home/mahdi/ETHZ/codes/spinningup/spinup/examples/pytorch/'+exp_name + output_dir='/home/mahdi/ETHZ/codes/spinningup/spinup/examples/pytorch/logs/'+exp_name if not os.path.exists(output_dir): os.makedirs(output_dir) logger_kwargs = dict(output_dir=output_dir, exp_name=exp_name) - sac(env_fn, ac_kwargs={}, seed=0, steps_per_epoch=1000, epochs=2, replay_size=100000, gamma=0.99, polyak=0.995, lr=0.001, alpha=0.2, batch_size=100, start_steps=1000, update_after=200, update_every=50, num_test_episodes=10, max_ep_len=1000, logger_kwargs=logger_kwargs, save_freq=1) + sac(env_fn, ac_kwargs={}, seed=0, steps_per_epoch=500, epochs=2, replay_size=10000, gamma=0.99, polyak=0.995, lr=0.001, alpha=0.2, batch_size=100, start_steps=1000, update_after=200, update_every=50, num_test_episodes=10, max_ep_len=100, logger_kwargs=logger_kwargs, save_freq=1) if DEBUG: env_fn = gym.make('Pendulum-v0') # from gym.wrappers.monitoring.video_recorder import VideoRecorder # VideoRecorder(env_fn,'/home/mahdi/ETHZ/codes/spinningup/spinup/examples/pytorch/logs/video.mp4', enabled=True) # visualize output - _, get_action = load_policy_and_env('/home/mahdi/ETHZ/codes/spinningup/spinup/examples/pytorch/logs') + _, get_action = load_policy_and_env('/home/mahdi/ETHZ/codes/spinningup/spinup/examples/pytorch/logs/'+exp_name) run_policy(env_fn, get_action,num_episodes=3)