diff --git a/simzoo/envs/biological/oscillator/oscillator.py b/simzoo/envs/biological/oscillator/oscillator.py index 7e5baeb6..33444634 100644 --- a/simzoo/envs/biological/oscillator/oscillator.py +++ b/simzoo/envs/biological/oscillator/oscillator.py @@ -128,7 +128,7 @@ class Oscillator(gym.Env, Disturber): """ # noqa: E501 def __init__(self, reference_type="periodic", seed=None): - """Constructs all the necessary attributes for the oscillator object. + """Constructs all the necessary attributes for the oscillator instance. Args: reference_type (str, optional): The type of reference you want to use @@ -142,7 +142,7 @@ def __init__(self, reference_type="periodic", seed=None): self.t = 0 self.dt = 1.0 self.sigma = 0.0 - self.__init_state = np.array( + self._init_state = np.array( [0.1, 0.2, 0.3, 0.1, 0.2, 0.3] ) # Initial state when random is disabled @@ -299,7 +299,7 @@ def reset(self, random=True): self.state = ( self.np_random.uniform(low=0, high=1, size=(6,)) if random - else self.__init_state + else self._init_state ) self.t = 0 m1, m2, m3, p1, p2, p3 = self.state diff --git a/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py b/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py index 739e2096..64b60db7 100644 --- a/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py +++ b/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py @@ -1,5 +1,5 @@ """Modified version of the classic -`CartPole-v1 `_ Openai Gym environment. In +`CartPole-v1 `_ OpenAi Gym environment. In this version two things are different compared to the original: - In this version, the action space is continuous, wherein the OpenAi version @@ -32,7 +32,7 @@ class CartPoleCost(gym.Env): velocity. Source: - This environment corresponds to the version that is included in the openAi gym + This environment corresponds to the version that is included in the OpenAi gym package. It is different in the fact that: - In this version, the action space is continuous, wherein the OpenAi version @@ -78,7 +78,7 @@ class CartPoleCost(gym.Env): Episode Termination: - Pole Angle is more than 12 degrees. - - Cart Position is more than 2.4 (center of the cart reaches the edge of the + - Cart Position is more than 5 m (center of the cart reaches the edge of the display). - Episode length is greater than 200. @@ -92,9 +92,31 @@ class CartPoleCost(gym.Env): "video.frames_per_second": 50, } # Not used during training but in other gym utilities - def __init__(self, seed=None, kinematics_integrator="euler", cost_type="reference"): + def __init__( + self, + seed=None, + cost_type="stabilize", + reference_type="constant", + kinematics_integrator="euler", + ): + """Constructs all the necessary attributes for the CartPoleCost instance. + + Args: + seed (int, optional): A random seed for the environment. By default + ``None``. + cost_type (str, optional): The cost type you want to use (options are + "reference" and "stabilization"). When stabilization is used the cart is + not kept at a given reference. Defaults to "reference". + reference_type (str, optional): The type of reference you want to use + (``constant`` or ``periodic``), by default ``periodic``. + kinematics_integrator (str, optional): Solver used for the kinematics + intergration (options are "euler", "friction", "semi-implicit euler"). + Defaults to "euler". + """ super().__init__() # Setup disturber + self.t = 0 + self.reference_type = reference_type self.length = self._length_init = 0.5 # actually half the pole's length self.mass_cart = self._mass_cart_init = 1.0 self.mass_pole = self._mass_pole_init = 0.1 @@ -103,10 +125,14 @@ def __init__(self, seed=None, kinematics_integrator="euler", cost_type="referenc self.pole_mass_length = self.mass_pole * self.length self.tau = self.dt = 0.02 # seconds between state updates self.kinematics_integrator = kinematics_integrator - self.__init_state = np.array( + self.force_mag = 20 + self._init_state = np.array( [0.1, 0.2, 0.3, 0.1] ) # Initial state when random is disabled - self.force_mag = 20 + self._init_state_range = { + "low": [-0.2, -0.05, -0.05, -0.05], # NOTE: OpenAi uses -0.05 + "high": [0.2, 0.05, 0.05, 0.05], # NOTE: OpenAi uses 0.05 + } # Initial state range when random is enabled # Set the lyapunov constraint and target positions self.cost_type = cost_type @@ -114,16 +140,15 @@ def __init__(self, seed=None, kinematics_integrator="euler", cost_type="referenc self.target_pos = 0 # Thresholds - # self.theta_threshold_radians = 12 * 2 * math.pi / 360 # OpenAi value - # self.max_v = np.finfo(np.float32).max # OpenAi value - # self.max_w = np.finfo(np.float32).max # OpenAi value - # self.x_threshold = 2.4 # OpenAI value self.theta_threshold_radians = ( - 20 * 2 * math.pi / 360 + 12 * 2 * math.pi / 360 ) # Angle at which to fail the episode - self.x_threshold = 6 - self.max_v = 50 - self.max_w = 50 + self.x_threshold = 5 # NOTE: OpenAi Uses 2.4 + self.y_threshold = ( + 5 # NOTE: Defines real world window height (not used as threshold) + ) + self.max_v = 50 # NOTE: OpenAi uses np.finfo(np.float32).max + self.max_w = 50 # NOTE: OpenAi uses np.finfo(np.float32).max # Set angle limit set to 2 * theta_threshold_radians so failing observation # is still within bounds @@ -143,7 +168,7 @@ def __init__(self, seed=None, kinematics_integrator="euler", cost_type="referenc np.array([0.0], dtype=np.float32), np.array([np.finfo(np.float32).max], dtype=np.float32), dtype=np.float32, - ) + ) # NOTE: Added! # Create random seed and set gym environment parameters self.seed(seed) @@ -185,6 +210,21 @@ def reset_params(self): self.total_mass = self.mass_pole + self.mass_cart self.pole_mass_length = self.mass_pole * self.length + def reference(self, t): + """Returns the current value of the periodic reference signal that is tracked by + the Synthetic oscillatory network. + + Args: + t (float): The current time step. + + Returns: + float: The current reference value. + """ + if self.reference_type == "periodic": + return self.target_pos + 7 * np.sin((2 * np.pi) * t / 200) + else: + return self.target_pos + def cost(self, x, theta): """Returns the cost for a given cart position (x) and a pole angle (theta). @@ -200,25 +240,19 @@ def cost(self, x, theta): - r_2 (float): The cart_pole angle reference. """ if self.cost_type.lower() == "reference": - # Calculate cost (reference tracking) - r_1 = ((self.x_threshold / 10 - abs(x - self.target_pos))) / ( - self.x_threshold / 10 - ) - r_2 = ((self.theta_threshold_radians / 4) - abs(theta)) / ( - self.theta_threshold_radians / 4 - ) - cost = 20 * np.sign(r_2) * (r_2 ** 2) + np.sign(r_1) * ( - r_1 ** 2 - ) # Reference tracking task + stab_cost = x ** 2 / 100 + 20 * (theta / self.theta_threshold_radians) ** 2 + r1 = self.reference(self.t) + ref_cost = abs(x - r1) + # ref_cost = np.square(x - r1) + cost = stab_cost + ref_cost else: - # Calculate cost (stabilization task) cost = ( x ** 2 / 100 + 20 * (theta / self.theta_threshold_radians) ** 2 ) # Stabilization task - return cost, r_1, r_2 + return cost def step(self, action): """Take step into the environment. @@ -234,9 +268,6 @@ def step(self, action): - done (:obj:`bool`): Whether the episode was done. - info_dict (:obj:`dict`): Dictionary with additional information. """ - err_msg = "%r (%s) invalid" % (action, type(action)) - assert self.action_space.contains(action), err_msg - force = np.clip(action, self.action_space.low, self.action_space.high) # For the interested reader: @@ -274,6 +305,7 @@ def step(self, action): theta_dot = theta_dot + self.tau * theta_acc theta = theta + self.tau * theta_dot self.state = np.array([x, x_dot[0], theta, theta_dot[0]]) + self.t = self.t + self.dt # Increment time step # Define stopping criteria done = bool( @@ -281,7 +313,8 @@ def step(self, action): ) # Calculate cost - cost, r_1, r_2 = self.cost(x, theta) + # cost, r_1, r_2 = self.cost(x, theta) + cost = self.cost(x, theta) # Define stopping criteria if ( @@ -308,7 +341,7 @@ def step(self, action): target=self.target_pos, violation_of_x_threshold=violation_of_x_threshold, violation_of_constraint=violation_of_constraint, - reference=np.array([r_1, r_2]), + # reference=np.array([r_1, r_2]), state_of_interest=theta, ), ) @@ -325,10 +358,11 @@ def reset(self, random=True): """ # Return random initial state self.state = ( - # self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) # OpenAi value - self.np_random.uniform(low=[-5, -0.2, -0.2, -0.2], high=[5, 0.2, 0.2, 0.2]) + self.np_random.uniform( + low=self._init_state_range["low"], high=self._init_state_range["high"] + ) if random - else self.__init_state + else self._init_state ) self.steps_beyond_done = None return np.array(self.state) @@ -343,13 +377,16 @@ def render(self, mode="human"): screen_width = 600 screen_height = 400 - world_width = self.x_threshold * 2 - scale = screen_width / world_width - cart_y = 100 # Top of cart - polewidth = 10.0 - polelen = scale * (2.0 * self.length) - cartwidth = 50.0 - cartheight = 30.0 + # Make sure scaling is correct + world_x_width = self.x_threshold * 2 + world_y_width = self.y_threshold * 2 + x_scale = screen_width / world_x_width + y_scale = screen_height / world_y_width + cart_y = y_scale * 1.0 # Top of cart + pole_width = x_scale * 0.1 + pole_len = x_scale * (2.0 * self.length) + cart_width = x_scale * 0.5 + cart_height = y_scale * 0.3 if self.viewer is None: from gym.envs.classic_control import rendering @@ -357,17 +394,22 @@ def render(self, mode="human"): self.viewer = rendering.Viewer(screen_width, screen_height) # Render CartPole - l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2 - axleoffset = cartheight / 4.0 + l, r, t, b = ( + -cart_width / 2, + cart_width / 2, + cart_height / 2, + -cart_height / 2, + ) + axleoffset = cart_height / 4.0 cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) self.carttrans = rendering.Transform() cart.add_attr(self.carttrans) self.viewer.add_geom(cart) l, r, t, b = ( - -polewidth / 2, - polewidth / 2, - polelen - polewidth / 2, - -polewidth / 2, + -pole_width / 2, + pole_width / 2, + pole_len - pole_width / 2, + -pole_width / 2, ) pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) pole.set_color(0.8, 0.6, 0.4) @@ -375,7 +417,7 @@ def render(self, mode="human"): pole.add_attr(self.poletrans) pole.add_attr(self.carttrans) self.viewer.add_geom(pole) - self.axle = rendering.make_circle(polewidth / 2) + self.axle = rendering.make_circle(pole_width / 2) self.axle.add_attr(self.poletrans) self.axle.add_attr(self.carttrans) self.axle.set_color(0.5, 0.5, 0.8) @@ -386,20 +428,20 @@ def render(self, mode="human"): # Render the target position self.target = rendering.Line( - (self.target_pos * scale + screen_width / 2.0, 0), - (self.target_pos * scale + screen_width / 2.0, screen_height), + (self.target_pos * x_scale + screen_width / 2.0, 0), + (self.target_pos * x_scale + screen_width / 2.0, screen_height), ) self.target.set_color(1, 0, 0) self.viewer.add_geom(self.target) # Render the constrain position self.neg_cons = rendering.Line( - (-self.const_pos * scale + screen_width / 2.0, 0), - (-self.const_pos * scale + screen_width / 2.0, screen_height), + (-self.const_pos * x_scale + screen_width / 2.0, 0), + (-self.const_pos * x_scale + screen_width / 2.0, screen_height), ) self.cons = rendering.Line( - (self.const_pos * scale + screen_width / 2.0, 0), - (self.const_pos * scale + screen_width / 2.0, screen_height), + (self.const_pos * x_scale + screen_width / 2.0, 0), + (self.const_pos * x_scale + screen_width / 2.0, screen_height), ) self.neg_cons.set_color(0, 0, 1) self.cons.set_color(0, 0, 1) @@ -414,16 +456,16 @@ def render(self, mode="human"): # Edit the pole polygon vertex pole = self._pole_geom l, r, t, b = ( - -polewidth / 2, - polewidth / 2, - polelen - polewidth / 2, - -polewidth / 2, + -pole_width / 2, + pole_width / 2, + pole_len - pole_width / 2, + -pole_width / 2, ) pole.v = [(l, b), (l, t), (r, t), (r, b)] # Apply card movement x = self.state - cart_x = x[0] * scale + screen_width / 2.0 # Middle of cart + cart_x = x[0] * x_scale + screen_width / 2.0 # Middle of cart self.carttrans.set_translation(cart_x, cart_y) self.poletrans.set_rotation(-x[2]) return self.viewer.render(return_rgb_array=mode == "rgb_array") diff --git a/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py b/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py index 89d4f8eb..5ca9470c 100644 --- a/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py +++ b/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py @@ -130,8 +130,8 @@ class Ex3EKF(gym.Env, Disturber): sigma (float): The variance of the system noise. """ # noqa: E501 - def __init__(self, seed=400): - """Constructs all the necessary attributes for the oscillator object. + def __init__(self, seed=None): + """Constructs all the necessary attributes for the Ex3EKF instance. Args: seed (int, optional): A random seed for the environment. By default