From 42a5dcdef02029ea08947b43a10c706a014cdc9e Mon Sep 17 00:00:00 2001 From: Rick Staa Date: Thu, 1 Apr 2021 17:48:04 +0200 Subject: [PATCH] :bug: Fixes the cartpole cost formula The old reference tracking cost formula that was supplied by minghoa was wrong in the sense that it was not positive definite. The new cost function is positive definite. This commit also improves the render scaling. --- .../envs/biological/oscillator/oscillator.py | 6 +- .../cart_pole_cost/cart_pole_cost.py | 162 +++++++++++------- .../envs/classic_control/ex3_ekf/ex3_ekf.py | 4 +- 3 files changed, 107 insertions(+), 65 deletions(-) diff --git a/simzoo/envs/biological/oscillator/oscillator.py b/simzoo/envs/biological/oscillator/oscillator.py index 7e5baeb6..33444634 100644 --- a/simzoo/envs/biological/oscillator/oscillator.py +++ b/simzoo/envs/biological/oscillator/oscillator.py @@ -128,7 +128,7 @@ class Oscillator(gym.Env, Disturber): """ # noqa: E501 def __init__(self, reference_type="periodic", seed=None): - """Constructs all the necessary attributes for the oscillator object. + """Constructs all the necessary attributes for the oscillator instance. Args: reference_type (str, optional): The type of reference you want to use @@ -142,7 +142,7 @@ def __init__(self, reference_type="periodic", seed=None): self.t = 0 self.dt = 1.0 self.sigma = 0.0 - self.__init_state = np.array( + self._init_state = np.array( [0.1, 0.2, 0.3, 0.1, 0.2, 0.3] ) # Initial state when random is disabled @@ -299,7 +299,7 @@ def reset(self, random=True): self.state = ( self.np_random.uniform(low=0, high=1, size=(6,)) if random - else self.__init_state + else self._init_state ) self.t = 0 m1, m2, m3, p1, p2, p3 = self.state diff --git a/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py b/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py index 739e2096..64b60db7 100644 --- a/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py +++ b/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py @@ -1,5 +1,5 @@ """Modified version of the classic -`CartPole-v1 `_ Openai Gym environment. In +`CartPole-v1 `_ OpenAi Gym environment. In this version two things are different compared to the original: - In this version, the action space is continuous, wherein the OpenAi version @@ -32,7 +32,7 @@ class CartPoleCost(gym.Env): velocity. Source: - This environment corresponds to the version that is included in the openAi gym + This environment corresponds to the version that is included in the OpenAi gym package. It is different in the fact that: - In this version, the action space is continuous, wherein the OpenAi version @@ -78,7 +78,7 @@ class CartPoleCost(gym.Env): Episode Termination: - Pole Angle is more than 12 degrees. - - Cart Position is more than 2.4 (center of the cart reaches the edge of the + - Cart Position is more than 5 m (center of the cart reaches the edge of the display). - Episode length is greater than 200. @@ -92,9 +92,31 @@ class CartPoleCost(gym.Env): "video.frames_per_second": 50, } # Not used during training but in other gym utilities - def __init__(self, seed=None, kinematics_integrator="euler", cost_type="reference"): + def __init__( + self, + seed=None, + cost_type="stabilize", + reference_type="constant", + kinematics_integrator="euler", + ): + """Constructs all the necessary attributes for the CartPoleCost instance. + + Args: + seed (int, optional): A random seed for the environment. By default + ``None``. + cost_type (str, optional): The cost type you want to use (options are + "reference" and "stabilization"). When stabilization is used the cart is + not kept at a given reference. Defaults to "reference". + reference_type (str, optional): The type of reference you want to use + (``constant`` or ``periodic``), by default ``periodic``. + kinematics_integrator (str, optional): Solver used for the kinematics + intergration (options are "euler", "friction", "semi-implicit euler"). + Defaults to "euler". + """ super().__init__() # Setup disturber + self.t = 0 + self.reference_type = reference_type self.length = self._length_init = 0.5 # actually half the pole's length self.mass_cart = self._mass_cart_init = 1.0 self.mass_pole = self._mass_pole_init = 0.1 @@ -103,10 +125,14 @@ def __init__(self, seed=None, kinematics_integrator="euler", cost_type="referenc self.pole_mass_length = self.mass_pole * self.length self.tau = self.dt = 0.02 # seconds between state updates self.kinematics_integrator = kinematics_integrator - self.__init_state = np.array( + self.force_mag = 20 + self._init_state = np.array( [0.1, 0.2, 0.3, 0.1] ) # Initial state when random is disabled - self.force_mag = 20 + self._init_state_range = { + "low": [-0.2, -0.05, -0.05, -0.05], # NOTE: OpenAi uses -0.05 + "high": [0.2, 0.05, 0.05, 0.05], # NOTE: OpenAi uses 0.05 + } # Initial state range when random is enabled # Set the lyapunov constraint and target positions self.cost_type = cost_type @@ -114,16 +140,15 @@ def __init__(self, seed=None, kinematics_integrator="euler", cost_type="referenc self.target_pos = 0 # Thresholds - # self.theta_threshold_radians = 12 * 2 * math.pi / 360 # OpenAi value - # self.max_v = np.finfo(np.float32).max # OpenAi value - # self.max_w = np.finfo(np.float32).max # OpenAi value - # self.x_threshold = 2.4 # OpenAI value self.theta_threshold_radians = ( - 20 * 2 * math.pi / 360 + 12 * 2 * math.pi / 360 ) # Angle at which to fail the episode - self.x_threshold = 6 - self.max_v = 50 - self.max_w = 50 + self.x_threshold = 5 # NOTE: OpenAi Uses 2.4 + self.y_threshold = ( + 5 # NOTE: Defines real world window height (not used as threshold) + ) + self.max_v = 50 # NOTE: OpenAi uses np.finfo(np.float32).max + self.max_w = 50 # NOTE: OpenAi uses np.finfo(np.float32).max # Set angle limit set to 2 * theta_threshold_radians so failing observation # is still within bounds @@ -143,7 +168,7 @@ def __init__(self, seed=None, kinematics_integrator="euler", cost_type="referenc np.array([0.0], dtype=np.float32), np.array([np.finfo(np.float32).max], dtype=np.float32), dtype=np.float32, - ) + ) # NOTE: Added! # Create random seed and set gym environment parameters self.seed(seed) @@ -185,6 +210,21 @@ def reset_params(self): self.total_mass = self.mass_pole + self.mass_cart self.pole_mass_length = self.mass_pole * self.length + def reference(self, t): + """Returns the current value of the periodic reference signal that is tracked by + the Synthetic oscillatory network. + + Args: + t (float): The current time step. + + Returns: + float: The current reference value. + """ + if self.reference_type == "periodic": + return self.target_pos + 7 * np.sin((2 * np.pi) * t / 200) + else: + return self.target_pos + def cost(self, x, theta): """Returns the cost for a given cart position (x) and a pole angle (theta). @@ -200,25 +240,19 @@ def cost(self, x, theta): - r_2 (float): The cart_pole angle reference. """ if self.cost_type.lower() == "reference": - # Calculate cost (reference tracking) - r_1 = ((self.x_threshold / 10 - abs(x - self.target_pos))) / ( - self.x_threshold / 10 - ) - r_2 = ((self.theta_threshold_radians / 4) - abs(theta)) / ( - self.theta_threshold_radians / 4 - ) - cost = 20 * np.sign(r_2) * (r_2 ** 2) + np.sign(r_1) * ( - r_1 ** 2 - ) # Reference tracking task + stab_cost = x ** 2 / 100 + 20 * (theta / self.theta_threshold_radians) ** 2 + r1 = self.reference(self.t) + ref_cost = abs(x - r1) + # ref_cost = np.square(x - r1) + cost = stab_cost + ref_cost else: - # Calculate cost (stabilization task) cost = ( x ** 2 / 100 + 20 * (theta / self.theta_threshold_radians) ** 2 ) # Stabilization task - return cost, r_1, r_2 + return cost def step(self, action): """Take step into the environment. @@ -234,9 +268,6 @@ def step(self, action): - done (:obj:`bool`): Whether the episode was done. - info_dict (:obj:`dict`): Dictionary with additional information. """ - err_msg = "%r (%s) invalid" % (action, type(action)) - assert self.action_space.contains(action), err_msg - force = np.clip(action, self.action_space.low, self.action_space.high) # For the interested reader: @@ -274,6 +305,7 @@ def step(self, action): theta_dot = theta_dot + self.tau * theta_acc theta = theta + self.tau * theta_dot self.state = np.array([x, x_dot[0], theta, theta_dot[0]]) + self.t = self.t + self.dt # Increment time step # Define stopping criteria done = bool( @@ -281,7 +313,8 @@ def step(self, action): ) # Calculate cost - cost, r_1, r_2 = self.cost(x, theta) + # cost, r_1, r_2 = self.cost(x, theta) + cost = self.cost(x, theta) # Define stopping criteria if ( @@ -308,7 +341,7 @@ def step(self, action): target=self.target_pos, violation_of_x_threshold=violation_of_x_threshold, violation_of_constraint=violation_of_constraint, - reference=np.array([r_1, r_2]), + # reference=np.array([r_1, r_2]), state_of_interest=theta, ), ) @@ -325,10 +358,11 @@ def reset(self, random=True): """ # Return random initial state self.state = ( - # self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) # OpenAi value - self.np_random.uniform(low=[-5, -0.2, -0.2, -0.2], high=[5, 0.2, 0.2, 0.2]) + self.np_random.uniform( + low=self._init_state_range["low"], high=self._init_state_range["high"] + ) if random - else self.__init_state + else self._init_state ) self.steps_beyond_done = None return np.array(self.state) @@ -343,13 +377,16 @@ def render(self, mode="human"): screen_width = 600 screen_height = 400 - world_width = self.x_threshold * 2 - scale = screen_width / world_width - cart_y = 100 # Top of cart - polewidth = 10.0 - polelen = scale * (2.0 * self.length) - cartwidth = 50.0 - cartheight = 30.0 + # Make sure scaling is correct + world_x_width = self.x_threshold * 2 + world_y_width = self.y_threshold * 2 + x_scale = screen_width / world_x_width + y_scale = screen_height / world_y_width + cart_y = y_scale * 1.0 # Top of cart + pole_width = x_scale * 0.1 + pole_len = x_scale * (2.0 * self.length) + cart_width = x_scale * 0.5 + cart_height = y_scale * 0.3 if self.viewer is None: from gym.envs.classic_control import rendering @@ -357,17 +394,22 @@ def render(self, mode="human"): self.viewer = rendering.Viewer(screen_width, screen_height) # Render CartPole - l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2 - axleoffset = cartheight / 4.0 + l, r, t, b = ( + -cart_width / 2, + cart_width / 2, + cart_height / 2, + -cart_height / 2, + ) + axleoffset = cart_height / 4.0 cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) self.carttrans = rendering.Transform() cart.add_attr(self.carttrans) self.viewer.add_geom(cart) l, r, t, b = ( - -polewidth / 2, - polewidth / 2, - polelen - polewidth / 2, - -polewidth / 2, + -pole_width / 2, + pole_width / 2, + pole_len - pole_width / 2, + -pole_width / 2, ) pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) pole.set_color(0.8, 0.6, 0.4) @@ -375,7 +417,7 @@ def render(self, mode="human"): pole.add_attr(self.poletrans) pole.add_attr(self.carttrans) self.viewer.add_geom(pole) - self.axle = rendering.make_circle(polewidth / 2) + self.axle = rendering.make_circle(pole_width / 2) self.axle.add_attr(self.poletrans) self.axle.add_attr(self.carttrans) self.axle.set_color(0.5, 0.5, 0.8) @@ -386,20 +428,20 @@ def render(self, mode="human"): # Render the target position self.target = rendering.Line( - (self.target_pos * scale + screen_width / 2.0, 0), - (self.target_pos * scale + screen_width / 2.0, screen_height), + (self.target_pos * x_scale + screen_width / 2.0, 0), + (self.target_pos * x_scale + screen_width / 2.0, screen_height), ) self.target.set_color(1, 0, 0) self.viewer.add_geom(self.target) # Render the constrain position self.neg_cons = rendering.Line( - (-self.const_pos * scale + screen_width / 2.0, 0), - (-self.const_pos * scale + screen_width / 2.0, screen_height), + (-self.const_pos * x_scale + screen_width / 2.0, 0), + (-self.const_pos * x_scale + screen_width / 2.0, screen_height), ) self.cons = rendering.Line( - (self.const_pos * scale + screen_width / 2.0, 0), - (self.const_pos * scale + screen_width / 2.0, screen_height), + (self.const_pos * x_scale + screen_width / 2.0, 0), + (self.const_pos * x_scale + screen_width / 2.0, screen_height), ) self.neg_cons.set_color(0, 0, 1) self.cons.set_color(0, 0, 1) @@ -414,16 +456,16 @@ def render(self, mode="human"): # Edit the pole polygon vertex pole = self._pole_geom l, r, t, b = ( - -polewidth / 2, - polewidth / 2, - polelen - polewidth / 2, - -polewidth / 2, + -pole_width / 2, + pole_width / 2, + pole_len - pole_width / 2, + -pole_width / 2, ) pole.v = [(l, b), (l, t), (r, t), (r, b)] # Apply card movement x = self.state - cart_x = x[0] * scale + screen_width / 2.0 # Middle of cart + cart_x = x[0] * x_scale + screen_width / 2.0 # Middle of cart self.carttrans.set_translation(cart_x, cart_y) self.poletrans.set_rotation(-x[2]) return self.viewer.render(return_rgb_array=mode == "rgb_array") diff --git a/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py b/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py index 89d4f8eb..5ca9470c 100644 --- a/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py +++ b/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py @@ -130,8 +130,8 @@ class Ex3EKF(gym.Env, Disturber): sigma (float): The variance of the system noise. """ # noqa: E501 - def __init__(self, seed=400): - """Constructs all the necessary attributes for the oscillator object. + def __init__(self, seed=None): + """Constructs all the necessary attributes for the Ex3EKF instance. Args: seed (int, optional): A random seed for the environment. By default