Skip to content

Commit

Permalink
🐛 Fixes the cartpole cost formula
Browse files Browse the repository at this point in the history
The old reference tracking cost formula that was supplied by minghoa was
wrong in the sense that it was not positive definite. The new cost
function is positive definite. This commit also improves the render
scaling.
  • Loading branch information
rickstaa committed Apr 1, 2021
1 parent faf3984 commit 42a5dcd
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 65 deletions.
6 changes: 3 additions & 3 deletions simzoo/envs/biological/oscillator/oscillator.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ class Oscillator(gym.Env, Disturber):
""" # noqa: E501

def __init__(self, reference_type="periodic", seed=None):
"""Constructs all the necessary attributes for the oscillator object.
"""Constructs all the necessary attributes for the oscillator instance.
Args:
reference_type (str, optional): The type of reference you want to use
Expand All @@ -142,7 +142,7 @@ def __init__(self, reference_type="periodic", seed=None):
self.t = 0
self.dt = 1.0
self.sigma = 0.0
self.__init_state = np.array(
self._init_state = np.array(
[0.1, 0.2, 0.3, 0.1, 0.2, 0.3]
) # Initial state when random is disabled

Expand Down Expand Up @@ -299,7 +299,7 @@ def reset(self, random=True):
self.state = (
self.np_random.uniform(low=0, high=1, size=(6,))
if random
else self.__init_state
else self._init_state
)
self.t = 0
m1, m2, m3, p1, p2, p3 = self.state
Expand Down
162 changes: 102 additions & 60 deletions simzoo/envs/classic_control/cart_pole_cost/cart_pole_cost.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Modified version of the classic
`CartPole-v1 <https://gym.openai.com/envs/CartPole-v1/>`_ Openai Gym environment. In
`CartPole-v1 <https://gym.OpenAi.com/envs/CartPole-v1/>`_ OpenAi Gym environment. In
this version two things are different compared to the original:
- In this version, the action space is continuous, wherein the OpenAi version
Expand Down Expand Up @@ -32,7 +32,7 @@ class CartPoleCost(gym.Env):
velocity.
Source:
This environment corresponds to the version that is included in the openAi gym
This environment corresponds to the version that is included in the OpenAi gym
package. It is different in the fact that:
- In this version, the action space is continuous, wherein the OpenAi version
Expand Down Expand Up @@ -78,7 +78,7 @@ class CartPoleCost(gym.Env):
Episode Termination:
- Pole Angle is more than 12 degrees.
- Cart Position is more than 2.4 (center of the cart reaches the edge of the
- Cart Position is more than 5 m (center of the cart reaches the edge of the
display).
- Episode length is greater than 200.
Expand All @@ -92,9 +92,31 @@ class CartPoleCost(gym.Env):
"video.frames_per_second": 50,
} # Not used during training but in other gym utilities

def __init__(self, seed=None, kinematics_integrator="euler", cost_type="reference"):
def __init__(
self,
seed=None,
cost_type="stabilize",
reference_type="constant",
kinematics_integrator="euler",
):
"""Constructs all the necessary attributes for the CartPoleCost instance.
Args:
seed (int, optional): A random seed for the environment. By default
``None``.
cost_type (str, optional): The cost type you want to use (options are
"reference" and "stabilization"). When stabilization is used the cart is
not kept at a given reference. Defaults to "reference".
reference_type (str, optional): The type of reference you want to use
(``constant`` or ``periodic``), by default ``periodic``.
kinematics_integrator (str, optional): Solver used for the kinematics
intergration (options are "euler", "friction", "semi-implicit euler").
Defaults to "euler".
"""
super().__init__() # Setup disturber

self.t = 0
self.reference_type = reference_type
self.length = self._length_init = 0.5 # actually half the pole's length
self.mass_cart = self._mass_cart_init = 1.0
self.mass_pole = self._mass_pole_init = 0.1
Expand All @@ -103,27 +125,30 @@ def __init__(self, seed=None, kinematics_integrator="euler", cost_type="referenc
self.pole_mass_length = self.mass_pole * self.length
self.tau = self.dt = 0.02 # seconds between state updates
self.kinematics_integrator = kinematics_integrator
self.__init_state = np.array(
self.force_mag = 20
self._init_state = np.array(
[0.1, 0.2, 0.3, 0.1]
) # Initial state when random is disabled
self.force_mag = 20
self._init_state_range = {
"low": [-0.2, -0.05, -0.05, -0.05], # NOTE: OpenAi uses -0.05
"high": [0.2, 0.05, 0.05, 0.05], # NOTE: OpenAi uses 0.05
} # Initial state range when random is enabled

# Set the lyapunov constraint and target positions
self.cost_type = cost_type
self.const_pos = 4
self.target_pos = 0

# Thresholds
# self.theta_threshold_radians = 12 * 2 * math.pi / 360 # OpenAi value
# self.max_v = np.finfo(np.float32).max # OpenAi value
# self.max_w = np.finfo(np.float32).max # OpenAi value
# self.x_threshold = 2.4 # OpenAI value
self.theta_threshold_radians = (
20 * 2 * math.pi / 360
12 * 2 * math.pi / 360
) # Angle at which to fail the episode
self.x_threshold = 6
self.max_v = 50
self.max_w = 50
self.x_threshold = 5 # NOTE: OpenAi Uses 2.4
self.y_threshold = (
5 # NOTE: Defines real world window height (not used as threshold)
)
self.max_v = 50 # NOTE: OpenAi uses np.finfo(np.float32).max
self.max_w = 50 # NOTE: OpenAi uses np.finfo(np.float32).max

# Set angle limit set to 2 * theta_threshold_radians so failing observation
# is still within bounds
Expand All @@ -143,7 +168,7 @@ def __init__(self, seed=None, kinematics_integrator="euler", cost_type="referenc
np.array([0.0], dtype=np.float32),
np.array([np.finfo(np.float32).max], dtype=np.float32),
dtype=np.float32,
)
) # NOTE: Added!

# Create random seed and set gym environment parameters
self.seed(seed)
Expand Down Expand Up @@ -185,6 +210,21 @@ def reset_params(self):
self.total_mass = self.mass_pole + self.mass_cart
self.pole_mass_length = self.mass_pole * self.length

def reference(self, t):
"""Returns the current value of the periodic reference signal that is tracked by
the Synthetic oscillatory network.
Args:
t (float): The current time step.
Returns:
float: The current reference value.
"""
if self.reference_type == "periodic":
return self.target_pos + 7 * np.sin((2 * np.pi) * t / 200)
else:
return self.target_pos

def cost(self, x, theta):
"""Returns the cost for a given cart position (x) and a pole angle (theta).
Expand All @@ -200,25 +240,19 @@ def cost(self, x, theta):
- r_2 (float): The cart_pole angle reference.
"""
if self.cost_type.lower() == "reference":

# Calculate cost (reference tracking)
r_1 = ((self.x_threshold / 10 - abs(x - self.target_pos))) / (
self.x_threshold / 10
)
r_2 = ((self.theta_threshold_radians / 4) - abs(theta)) / (
self.theta_threshold_radians / 4
)
cost = 20 * np.sign(r_2) * (r_2 ** 2) + np.sign(r_1) * (
r_1 ** 2
) # Reference tracking task
stab_cost = x ** 2 / 100 + 20 * (theta / self.theta_threshold_radians) ** 2
r1 = self.reference(self.t)
ref_cost = abs(x - r1)
# ref_cost = np.square(x - r1)
cost = stab_cost + ref_cost
else:

# Calculate cost (stabilization task)
cost = (
x ** 2 / 100 + 20 * (theta / self.theta_threshold_radians) ** 2
) # Stabilization task

return cost, r_1, r_2
return cost

def step(self, action):
"""Take step into the environment.
Expand All @@ -234,9 +268,6 @@ def step(self, action):
- done (:obj:`bool`): Whether the episode was done.
- info_dict (:obj:`dict`): Dictionary with additional information.
"""
err_msg = "%r (%s) invalid" % (action, type(action))
assert self.action_space.contains(action), err_msg

force = np.clip(action, self.action_space.low, self.action_space.high)

# For the interested reader:
Expand Down Expand Up @@ -274,14 +305,16 @@ def step(self, action):
theta_dot = theta_dot + self.tau * theta_acc
theta = theta + self.tau * theta_dot
self.state = np.array([x, x_dot[0], theta, theta_dot[0]])
self.t = self.t + self.dt # Increment time step

# Define stopping criteria
done = bool(
abs(x) > self.x_threshold or abs(theta) > self.theta_threshold_radians
)

# Calculate cost
cost, r_1, r_2 = self.cost(x, theta)
# cost, r_1, r_2 = self.cost(x, theta)
cost = self.cost(x, theta)

# Define stopping criteria
if (
Expand All @@ -308,7 +341,7 @@ def step(self, action):
target=self.target_pos,
violation_of_x_threshold=violation_of_x_threshold,
violation_of_constraint=violation_of_constraint,
reference=np.array([r_1, r_2]),
# reference=np.array([r_1, r_2]),
state_of_interest=theta,
),
)
Expand All @@ -325,10 +358,11 @@ def reset(self, random=True):
"""
# Return random initial state
self.state = (
# self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) # OpenAi value
self.np_random.uniform(low=[-5, -0.2, -0.2, -0.2], high=[5, 0.2, 0.2, 0.2])
self.np_random.uniform(
low=self._init_state_range["low"], high=self._init_state_range["high"]
)
if random
else self.__init_state
else self._init_state
)
self.steps_beyond_done = None
return np.array(self.state)
Expand All @@ -343,39 +377,47 @@ def render(self, mode="human"):
screen_width = 600
screen_height = 400

world_width = self.x_threshold * 2
scale = screen_width / world_width
cart_y = 100 # Top of cart
polewidth = 10.0
polelen = scale * (2.0 * self.length)
cartwidth = 50.0
cartheight = 30.0
# Make sure scaling is correct
world_x_width = self.x_threshold * 2
world_y_width = self.y_threshold * 2
x_scale = screen_width / world_x_width
y_scale = screen_height / world_y_width
cart_y = y_scale * 1.0 # Top of cart
pole_width = x_scale * 0.1
pole_len = x_scale * (2.0 * self.length)
cart_width = x_scale * 0.5
cart_height = y_scale * 0.3

if self.viewer is None:
from gym.envs.classic_control import rendering

self.viewer = rendering.Viewer(screen_width, screen_height)

# Render CartPole
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
axleoffset = cartheight / 4.0
l, r, t, b = (
-cart_width / 2,
cart_width / 2,
cart_height / 2,
-cart_height / 2,
)
axleoffset = cart_height / 4.0
cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
self.carttrans = rendering.Transform()
cart.add_attr(self.carttrans)
self.viewer.add_geom(cart)
l, r, t, b = (
-polewidth / 2,
polewidth / 2,
polelen - polewidth / 2,
-polewidth / 2,
-pole_width / 2,
pole_width / 2,
pole_len - pole_width / 2,
-pole_width / 2,
)
pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
pole.set_color(0.8, 0.6, 0.4)
self.poletrans = rendering.Transform(translation=(0, axleoffset))
pole.add_attr(self.poletrans)
pole.add_attr(self.carttrans)
self.viewer.add_geom(pole)
self.axle = rendering.make_circle(polewidth / 2)
self.axle = rendering.make_circle(pole_width / 2)
self.axle.add_attr(self.poletrans)
self.axle.add_attr(self.carttrans)
self.axle.set_color(0.5, 0.5, 0.8)
Expand All @@ -386,20 +428,20 @@ def render(self, mode="human"):

# Render the target position
self.target = rendering.Line(
(self.target_pos * scale + screen_width / 2.0, 0),
(self.target_pos * scale + screen_width / 2.0, screen_height),
(self.target_pos * x_scale + screen_width / 2.0, 0),
(self.target_pos * x_scale + screen_width / 2.0, screen_height),
)
self.target.set_color(1, 0, 0)
self.viewer.add_geom(self.target)

# Render the constrain position
self.neg_cons = rendering.Line(
(-self.const_pos * scale + screen_width / 2.0, 0),
(-self.const_pos * scale + screen_width / 2.0, screen_height),
(-self.const_pos * x_scale + screen_width / 2.0, 0),
(-self.const_pos * x_scale + screen_width / 2.0, screen_height),
)
self.cons = rendering.Line(
(self.const_pos * scale + screen_width / 2.0, 0),
(self.const_pos * scale + screen_width / 2.0, screen_height),
(self.const_pos * x_scale + screen_width / 2.0, 0),
(self.const_pos * x_scale + screen_width / 2.0, screen_height),
)
self.neg_cons.set_color(0, 0, 1)
self.cons.set_color(0, 0, 1)
Expand All @@ -414,16 +456,16 @@ def render(self, mode="human"):
# Edit the pole polygon vertex
pole = self._pole_geom
l, r, t, b = (
-polewidth / 2,
polewidth / 2,
polelen - polewidth / 2,
-polewidth / 2,
-pole_width / 2,
pole_width / 2,
pole_len - pole_width / 2,
-pole_width / 2,
)
pole.v = [(l, b), (l, t), (r, t), (r, b)]

# Apply card movement
x = self.state
cart_x = x[0] * scale + screen_width / 2.0 # Middle of cart
cart_x = x[0] * x_scale + screen_width / 2.0 # Middle of cart
self.carttrans.set_translation(cart_x, cart_y)
self.poletrans.set_rotation(-x[2])
return self.viewer.render(return_rgb_array=mode == "rgb_array")
Expand Down
4 changes: 2 additions & 2 deletions simzoo/envs/classic_control/ex3_ekf/ex3_ekf.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ class Ex3EKF(gym.Env, Disturber):
sigma (float): The variance of the system noise.
""" # noqa: E501

def __init__(self, seed=400):
"""Constructs all the necessary attributes for the oscillator object.
def __init__(self, seed=None):
"""Constructs all the necessary attributes for the Ex3EKF instance.
Args:
seed (int, optional): A random seed for the environment. By default
Expand Down

0 comments on commit 42a5dcd

Please sign in to comment.