diff --git a/spinup/backup_gym/gym/envs/classic_control/tworr.py b/spinup/backup_gym/gym/envs/classic_control/tworr.py index d908a171f..944d0f683 100644 --- a/spinup/backup_gym/gym/envs/classic_control/tworr.py +++ b/spinup/backup_gym/gym/envs/classic_control/tworr.py @@ -5,6 +5,11 @@ from gym import core, spaces from gym.utils import seeding +from sympy import * +from sympy.physics.mechanics import LagrangesMethod, dynamicsymbols +import matplotlib.pyplot as plt +from scipy.integrate import solve_ivp + __copyright__ = "Copyright 2024, IfA https://control.ee.ethz.ch/" __credits__ = ["Mahdi Nobar"] __license__ = "" @@ -75,19 +80,123 @@ class TworrEnv(core.Env): def __init__(self): self.viewer = None - high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) - low = -high - self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) - self.action_space = spaces.Discrete(3) + # states=[xt, yt, th1, th2, dth1, dth2, th1_hat, th2_hat, dth1_hat, dth2_hat] + high_s = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]) + low_s = -high_s + self.observation_space = spaces.Box(low=low_s, high=high_s, dtype=np.float32) + high_a = np.array([0.1, 0.1]) + low_a = -high_a + self.action_space = spaces.Box(low=low_a, high=high_a, dtype=np.float32) self.state = None self.seed() + def two_link_forward_kinematics(q, l1=1.0, l2=1.0): + """Compute the forward kinematics. Returns the base-coordinate Cartesian + position of End-effector for a given joint angle vector. Optional + parameters l1 and l2 are the link lengths. The base is assumed to be at the + origin. + + :param q: two-element list or ndarray with [q1, q2] joint angles + :param l1: length of link 1 + :param l2: length of link 2 + :return: two-element ndarrays with [x,y] locations of End-Effector + """ + x = l1 * np.cos(q[0]) + l2 * np.cos(q[0] + q[1]) + y = l1 * np.sin(q[0]) + l2 * np.sin(q[0] + q[1]) + return x, y + + def two_link_inverse_kinematics(x, y, l1=1.0, l2=1.0): + """Compute two inverse kinematics solutions for a target end position. The + target is a Cartesian position vector (two-element ndarray) in world + coordinates, and the result vectors are joint angles as ndarrays [q0, q1]. + If the target is out of reach, returns the closest pose. + + :param target: two-element list or ndarray with [x1, y] target position + :param l1: optional proximal link length + :param l2: optional distal link length + :return: tuple (solution1, solution2) of two-element ndarrays with q1, q2 angles + """ + # find the position of the point in polar coordinates + r = np.sqrt(x ** 2 + y ** 2) + # phi is the angle of target point w.r.t. -Y axis, same origin as arm + phi = np.arctan2(y, x) + alpha = np.arccos((l1 ** 2 + l2 ** 2 - r ** 2) / (2 * l1 * l2)) + beta = np.arccos((r ** 2 + l1 ** 2 - l2 ** 2) / (2 * l1 * r)) + soln1 = np.array((phi - beta, np.pi - alpha)) + soln2 = np.array((phi + beta, np.pi + alpha)) + return soln1, soln2 + + def two_link_inverse_dynamics(th, dth, ddth, m1=1, m2=1, l1=1, l2=1): + """Compute two inverse dynamics solutions for a target end position. + :param th: + :param dth: + :param ddth: + :param m1: + :param m2: + :param l1: + :param l2: + :return: tau1, tau2 + """ + r1 = l1 / 2 + r2 = l2 / 2 + Iz1 = m1 * l1 ** 2 / 12 + Iz2 = m2 * l2 ** 2 / 12 + alpha = Iz1 + Iz2 + m1 * r1 ** 2 + m2 * (l1 ** 2 + l2 ** 2) + beta = m2 * l1 * r2 + sigma = Iz2 + m2 * r2 ** 2 + tau1 = (alpha + 2 * beta * np.cos(th[1])) * ddth[0] + (sigma + beta * np.cos(th[1])) * ddth[1] + ( + -beta * np.sin(th[1]) * dth[1]) * dth[1] + (-beta * np.sin(th[1]) * (dth[0] + dth[1]) * dth[1]) + tau2 = (sigma + beta * np.cos(th[1])) * ddth[0] + sigma * ddth[1] + (beta * np.sin(th[1]) * dth[0] * dth[0]) + return tau1, tau2 + + def two_link_forward_dynamics(tau1, tau2, s_init, m1=1, m2=1, l1=1, l2=1): + """Compute two inverse dynamics solutions for a target end position. + :param th: + :param dth: + :param ddth: + :param m1: + :param m2: + :param l1: + :param l2: + :return: th1, dth1, th2, dth2 + """ + # Define derivative function + def f(t, s): + r1 = l1 / 2 + r2 = l2 / 2 + Iz1 = m1 * l1 ** 2 / 12 + Iz2 = m2 * l2 ** 2 / 12 + alpha = Iz1 + Iz2 + m1 * r1 ** 2 + m2 * (l1 ** 2 + l2 ** 2) + beta = m2 * l1 * r2 + sigma = Iz2 + m2 * r2 ** 2 + d11 = alpha + 2 * beta * np.cos(s[2]) + d21 = sigma + beta * np.cos(s[2]) + d12 = d21 + d22 = sigma + c = -beta * np.sin(s[2]) + dsdt = [s[1], d12 * d22 / (d12 * d21 - d11 * d22) * ( + tau2 / d22 - tau1 / d12 - c / d22 * s[1] ** 2 + (c + c) / d12 * s[1] * s[3] + c / d12 * s[ + 3] ** 2), + s[3], d11 * d21 / (d11 * d22 - d12 * d21) * ( + tau2 / d21 - tau1 / d11 - c / d21 * s[1] ** 2 + (c + c) / d11 * s[1] * s[3] + c / d11 * + s[3] ** 2)] + return dsdt + # Define time spans, initial values, and constants + tspan = np.linspace(0, 1, 2) + # Solve differential equation + sol = solve_ivp(lambda t, s: f(t, s), + [tspan[0], tspan[-1]], s_init, t_eval=tspan, rtol=1e-5) + # Plot states + state_plotter(sol.t, sol.y, 1) + return sol.y + def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self): - self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,)) + # TODO check difference bw state and observation? + self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(12,)) return self._get_ob() def step(self, a): @@ -98,37 +207,47 @@ def step(self, a): if self.torque_noise_max > 0: torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max) - # Now, augment the state with our force action so it can be passed to - # _dsdt - s_augmented = np.append(s, torque) - - ns = rk4(self._dsdt, s_augmented, [0, self.dt]) - # only care about final timestep of integration returned by integrator - ns = ns[-1] - ns = ns[:4] # omit action - # ODEINT IS TOO SLOW! - # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt]) - # self.s_continuous = ns_continuous[-1] # We only care about the state - # at the ''final timestep'', self.dt - - ns[0] = wrap(ns[0], -pi, pi) - ns[1] = wrap(ns[1], -pi, pi) - ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1) - ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2) - self.state = ns - terminal = self._terminal() - reward = -1. if not terminal else 0. - return (self._get_ob(), reward, terminal, {}) + m1 = 1 + m2 = 1 + l1 = 1 + l2 = 1 + + # choose a sample target position to test IK + xt = 0.5 + yt = 0.6 + q_hat_soln1, q_hat_soln2 = two_link_inverse_kinematics(xt, yt, l1, l2) + print("The following solutions should reach endpoint position %s: %s OR %s" % ( + np.array((xt, yt)), q_hat_soln1, q_hat_soln2)) + # TODO correct for observation: + dth = np.array((0.1, 0.1)) # TODO + ddth = np.array((0.1, 0.1)) # TODO + tau1_hat, tau2_hat = two_link_inverse_dynamics(q_hat_soln1, dth, ddth) + print("ID torques for first IK solution:", np.array((tau1_hat, tau2_hat))) + + # s_init=[th1_init,dth1_init,th2_init,dth2_init] + s_init = [q_hat_soln1[0], dth[0], q_hat_soln1[1], dth[1]] + q_FD = self.two_link_forward_dynamics(tau1_hat+a[0], tau2_hat+a[1], s_init) + + obs=np.array([xt,yt,q_FD[0,1],q_FD[2,1],q_FD[1,1],q_FD[3,1],q_hat_soln1[0],q_hat_soln1[1],dth[0],dth[1],tau1_hat,tau2_hat]) + self.state=obs #TODO + + terminal = self._terminal() #TODO + reward = -1. if not terminal else 0. #TODO + + # given action it returns 4-tuple (observation, reward, done, info) + return (obs, reward, terminal, {}) + def _get_ob(self): - s = self.state - return np.array([cos(s[0]), sin(s[0]), cos(s[1]), sin(s[1]), s[2], s[3]]) + #TODO + return 1 def _terminal(self): - s = self.state - return bool(-cos(s[0]) - cos(s[1] + s[0]) > 1.) + #TODO + return 1 def _dsdt(self, s_augmented, t): + # todo check purpose m1 = self.LINK_MASS_1 m2 = self.LINK_MASS_2 l1 = self.LINK_LENGTH_1 @@ -204,6 +323,7 @@ def close(self): self.viewer = None def wrap(x, m, M): + # TODO remove? """ :param x: a scalar :param m: minimum possible value in range @@ -233,6 +353,7 @@ def bound(x, m, M=None): def rk4(derivs, y0, t, *args, **kwargs): + # TODO remove? """ Integrate 1D or ND system of ODEs using 4-th order Runge-Kutta. This is a toy implementation which may be useful if you find