diff --git a/simzoo/envs/biological/oscillator/oscillator.py b/simzoo/envs/biological/oscillator/oscillator.py
index 7e5baeb6..33444634 100644
--- a/simzoo/envs/biological/oscillator/oscillator.py
+++ b/simzoo/envs/biological/oscillator/oscillator.py
@@ -128,7 +128,7 @@ class Oscillator(gym.Env, Disturber):
""" # noqa: E501
def __init__(self, reference_type="periodic", seed=None):
- """Constructs all the necessary attributes for the oscillator object.
+ """Constructs all the necessary attributes for the oscillator instance.
Args:
reference_type (str, optional): The type of reference you want to use
@@ -142,7 +142,7 @@ def __init__(self, reference_type="periodic", seed=None):
self.t = 0
self.dt = 1.0
self.sigma = 0.0
- self.__init_state = np.array(
+ self._init_state = np.array(
[0.1, 0.2, 0.3, 0.1, 0.2, 0.3]
) # Initial state when random is disabled
@@ -299,7 +299,7 @@ def reset(self, random=True):
self.state = (
self.np_random.uniform(low=0, high=1, size=(6,))
if random
- else self.__init_state
+ else self._init_state
)
self.t = 0
m1, m2, m3, p1, p2, p3 = self.state
diff --git a/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py b/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py
index 739e2096..64b60db7 100644
--- a/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py
+++ b/simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py
@@ -1,5 +1,5 @@
"""Modified version of the classic
-`CartPole-v1 `_ Openai Gym environment. In
+`CartPole-v1 `_ OpenAi Gym environment. In
this version two things are different compared to the original:
- In this version, the action space is continuous, wherein the OpenAi version
@@ -32,7 +32,7 @@ class CartPoleCost(gym.Env):
velocity.
Source:
- This environment corresponds to the version that is included in the openAi gym
+ This environment corresponds to the version that is included in the OpenAi gym
package. It is different in the fact that:
- In this version, the action space is continuous, wherein the OpenAi version
@@ -78,7 +78,7 @@ class CartPoleCost(gym.Env):
Episode Termination:
- Pole Angle is more than 12 degrees.
- - Cart Position is more than 2.4 (center of the cart reaches the edge of the
+ - Cart Position is more than 5 m (center of the cart reaches the edge of the
display).
- Episode length is greater than 200.
@@ -92,9 +92,31 @@ class CartPoleCost(gym.Env):
"video.frames_per_second": 50,
} # Not used during training but in other gym utilities
- def __init__(self, seed=None, kinematics_integrator="euler", cost_type="reference"):
+ def __init__(
+ self,
+ seed=None,
+ cost_type="stabilize",
+ reference_type="constant",
+ kinematics_integrator="euler",
+ ):
+ """Constructs all the necessary attributes for the CartPoleCost instance.
+
+ Args:
+ seed (int, optional): A random seed for the environment. By default
+ ``None``.
+ cost_type (str, optional): The cost type you want to use (options are
+ "reference" and "stabilization"). When stabilization is used the cart is
+ not kept at a given reference. Defaults to "reference".
+ reference_type (str, optional): The type of reference you want to use
+ (``constant`` or ``periodic``), by default ``periodic``.
+ kinematics_integrator (str, optional): Solver used for the kinematics
+ intergration (options are "euler", "friction", "semi-implicit euler").
+ Defaults to "euler".
+ """
super().__init__() # Setup disturber
+ self.t = 0
+ self.reference_type = reference_type
self.length = self._length_init = 0.5 # actually half the pole's length
self.mass_cart = self._mass_cart_init = 1.0
self.mass_pole = self._mass_pole_init = 0.1
@@ -103,10 +125,14 @@ def __init__(self, seed=None, kinematics_integrator="euler", cost_type="referenc
self.pole_mass_length = self.mass_pole * self.length
self.tau = self.dt = 0.02 # seconds between state updates
self.kinematics_integrator = kinematics_integrator
- self.__init_state = np.array(
+ self.force_mag = 20
+ self._init_state = np.array(
[0.1, 0.2, 0.3, 0.1]
) # Initial state when random is disabled
- self.force_mag = 20
+ self._init_state_range = {
+ "low": [-0.2, -0.05, -0.05, -0.05], # NOTE: OpenAi uses -0.05
+ "high": [0.2, 0.05, 0.05, 0.05], # NOTE: OpenAi uses 0.05
+ } # Initial state range when random is enabled
# Set the lyapunov constraint and target positions
self.cost_type = cost_type
@@ -114,16 +140,15 @@ def __init__(self, seed=None, kinematics_integrator="euler", cost_type="referenc
self.target_pos = 0
# Thresholds
- # self.theta_threshold_radians = 12 * 2 * math.pi / 360 # OpenAi value
- # self.max_v = np.finfo(np.float32).max # OpenAi value
- # self.max_w = np.finfo(np.float32).max # OpenAi value
- # self.x_threshold = 2.4 # OpenAI value
self.theta_threshold_radians = (
- 20 * 2 * math.pi / 360
+ 12 * 2 * math.pi / 360
) # Angle at which to fail the episode
- self.x_threshold = 6
- self.max_v = 50
- self.max_w = 50
+ self.x_threshold = 5 # NOTE: OpenAi Uses 2.4
+ self.y_threshold = (
+ 5 # NOTE: Defines real world window height (not used as threshold)
+ )
+ self.max_v = 50 # NOTE: OpenAi uses np.finfo(np.float32).max
+ self.max_w = 50 # NOTE: OpenAi uses np.finfo(np.float32).max
# Set angle limit set to 2 * theta_threshold_radians so failing observation
# is still within bounds
@@ -143,7 +168,7 @@ def __init__(self, seed=None, kinematics_integrator="euler", cost_type="referenc
np.array([0.0], dtype=np.float32),
np.array([np.finfo(np.float32).max], dtype=np.float32),
dtype=np.float32,
- )
+ ) # NOTE: Added!
# Create random seed and set gym environment parameters
self.seed(seed)
@@ -185,6 +210,21 @@ def reset_params(self):
self.total_mass = self.mass_pole + self.mass_cart
self.pole_mass_length = self.mass_pole * self.length
+ def reference(self, t):
+ """Returns the current value of the periodic reference signal that is tracked by
+ the Synthetic oscillatory network.
+
+ Args:
+ t (float): The current time step.
+
+ Returns:
+ float: The current reference value.
+ """
+ if self.reference_type == "periodic":
+ return self.target_pos + 7 * np.sin((2 * np.pi) * t / 200)
+ else:
+ return self.target_pos
+
def cost(self, x, theta):
"""Returns the cost for a given cart position (x) and a pole angle (theta).
@@ -200,25 +240,19 @@ def cost(self, x, theta):
- r_2 (float): The cart_pole angle reference.
"""
if self.cost_type.lower() == "reference":
-
# Calculate cost (reference tracking)
- r_1 = ((self.x_threshold / 10 - abs(x - self.target_pos))) / (
- self.x_threshold / 10
- )
- r_2 = ((self.theta_threshold_radians / 4) - abs(theta)) / (
- self.theta_threshold_radians / 4
- )
- cost = 20 * np.sign(r_2) * (r_2 ** 2) + np.sign(r_1) * (
- r_1 ** 2
- ) # Reference tracking task
+ stab_cost = x ** 2 / 100 + 20 * (theta / self.theta_threshold_radians) ** 2
+ r1 = self.reference(self.t)
+ ref_cost = abs(x - r1)
+ # ref_cost = np.square(x - r1)
+ cost = stab_cost + ref_cost
else:
-
# Calculate cost (stabilization task)
cost = (
x ** 2 / 100 + 20 * (theta / self.theta_threshold_radians) ** 2
) # Stabilization task
- return cost, r_1, r_2
+ return cost
def step(self, action):
"""Take step into the environment.
@@ -234,9 +268,6 @@ def step(self, action):
- done (:obj:`bool`): Whether the episode was done.
- info_dict (:obj:`dict`): Dictionary with additional information.
"""
- err_msg = "%r (%s) invalid" % (action, type(action))
- assert self.action_space.contains(action), err_msg
-
force = np.clip(action, self.action_space.low, self.action_space.high)
# For the interested reader:
@@ -274,6 +305,7 @@ def step(self, action):
theta_dot = theta_dot + self.tau * theta_acc
theta = theta + self.tau * theta_dot
self.state = np.array([x, x_dot[0], theta, theta_dot[0]])
+ self.t = self.t + self.dt # Increment time step
# Define stopping criteria
done = bool(
@@ -281,7 +313,8 @@ def step(self, action):
)
# Calculate cost
- cost, r_1, r_2 = self.cost(x, theta)
+ # cost, r_1, r_2 = self.cost(x, theta)
+ cost = self.cost(x, theta)
# Define stopping criteria
if (
@@ -308,7 +341,7 @@ def step(self, action):
target=self.target_pos,
violation_of_x_threshold=violation_of_x_threshold,
violation_of_constraint=violation_of_constraint,
- reference=np.array([r_1, r_2]),
+ # reference=np.array([r_1, r_2]),
state_of_interest=theta,
),
)
@@ -325,10 +358,11 @@ def reset(self, random=True):
"""
# Return random initial state
self.state = (
- # self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) # OpenAi value
- self.np_random.uniform(low=[-5, -0.2, -0.2, -0.2], high=[5, 0.2, 0.2, 0.2])
+ self.np_random.uniform(
+ low=self._init_state_range["low"], high=self._init_state_range["high"]
+ )
if random
- else self.__init_state
+ else self._init_state
)
self.steps_beyond_done = None
return np.array(self.state)
@@ -343,13 +377,16 @@ def render(self, mode="human"):
screen_width = 600
screen_height = 400
- world_width = self.x_threshold * 2
- scale = screen_width / world_width
- cart_y = 100 # Top of cart
- polewidth = 10.0
- polelen = scale * (2.0 * self.length)
- cartwidth = 50.0
- cartheight = 30.0
+ # Make sure scaling is correct
+ world_x_width = self.x_threshold * 2
+ world_y_width = self.y_threshold * 2
+ x_scale = screen_width / world_x_width
+ y_scale = screen_height / world_y_width
+ cart_y = y_scale * 1.0 # Top of cart
+ pole_width = x_scale * 0.1
+ pole_len = x_scale * (2.0 * self.length)
+ cart_width = x_scale * 0.5
+ cart_height = y_scale * 0.3
if self.viewer is None:
from gym.envs.classic_control import rendering
@@ -357,17 +394,22 @@ def render(self, mode="human"):
self.viewer = rendering.Viewer(screen_width, screen_height)
# Render CartPole
- l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
- axleoffset = cartheight / 4.0
+ l, r, t, b = (
+ -cart_width / 2,
+ cart_width / 2,
+ cart_height / 2,
+ -cart_height / 2,
+ )
+ axleoffset = cart_height / 4.0
cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
self.carttrans = rendering.Transform()
cart.add_attr(self.carttrans)
self.viewer.add_geom(cart)
l, r, t, b = (
- -polewidth / 2,
- polewidth / 2,
- polelen - polewidth / 2,
- -polewidth / 2,
+ -pole_width / 2,
+ pole_width / 2,
+ pole_len - pole_width / 2,
+ -pole_width / 2,
)
pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
pole.set_color(0.8, 0.6, 0.4)
@@ -375,7 +417,7 @@ def render(self, mode="human"):
pole.add_attr(self.poletrans)
pole.add_attr(self.carttrans)
self.viewer.add_geom(pole)
- self.axle = rendering.make_circle(polewidth / 2)
+ self.axle = rendering.make_circle(pole_width / 2)
self.axle.add_attr(self.poletrans)
self.axle.add_attr(self.carttrans)
self.axle.set_color(0.5, 0.5, 0.8)
@@ -386,20 +428,20 @@ def render(self, mode="human"):
# Render the target position
self.target = rendering.Line(
- (self.target_pos * scale + screen_width / 2.0, 0),
- (self.target_pos * scale + screen_width / 2.0, screen_height),
+ (self.target_pos * x_scale + screen_width / 2.0, 0),
+ (self.target_pos * x_scale + screen_width / 2.0, screen_height),
)
self.target.set_color(1, 0, 0)
self.viewer.add_geom(self.target)
# Render the constrain position
self.neg_cons = rendering.Line(
- (-self.const_pos * scale + screen_width / 2.0, 0),
- (-self.const_pos * scale + screen_width / 2.0, screen_height),
+ (-self.const_pos * x_scale + screen_width / 2.0, 0),
+ (-self.const_pos * x_scale + screen_width / 2.0, screen_height),
)
self.cons = rendering.Line(
- (self.const_pos * scale + screen_width / 2.0, 0),
- (self.const_pos * scale + screen_width / 2.0, screen_height),
+ (self.const_pos * x_scale + screen_width / 2.0, 0),
+ (self.const_pos * x_scale + screen_width / 2.0, screen_height),
)
self.neg_cons.set_color(0, 0, 1)
self.cons.set_color(0, 0, 1)
@@ -414,16 +456,16 @@ def render(self, mode="human"):
# Edit the pole polygon vertex
pole = self._pole_geom
l, r, t, b = (
- -polewidth / 2,
- polewidth / 2,
- polelen - polewidth / 2,
- -polewidth / 2,
+ -pole_width / 2,
+ pole_width / 2,
+ pole_len - pole_width / 2,
+ -pole_width / 2,
)
pole.v = [(l, b), (l, t), (r, t), (r, b)]
# Apply card movement
x = self.state
- cart_x = x[0] * scale + screen_width / 2.0 # Middle of cart
+ cart_x = x[0] * x_scale + screen_width / 2.0 # Middle of cart
self.carttrans.set_translation(cart_x, cart_y)
self.poletrans.set_rotation(-x[2])
return self.viewer.render(return_rgb_array=mode == "rgb_array")
diff --git a/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py b/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py
index 89d4f8eb..5ca9470c 100644
--- a/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py
+++ b/simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py
@@ -130,8 +130,8 @@ class Ex3EKF(gym.Env, Disturber):
sigma (float): The variance of the system noise.
""" # noqa: E501
- def __init__(self, seed=400):
- """Constructs all the necessary attributes for the oscillator object.
+ def __init__(self, seed=None):
+ """Constructs all the necessary attributes for the Ex3EKF instance.
Args:
seed (int, optional): A random seed for the environment. By default