rickstaa · rickstaa · Jul 26, 2023 · Jul 26, 2023
diff --git a/stable_gym/envs/classic_control/cartpole_cost/README.md b/stable_gym/envs/classic_control/cartpole_cost/README.md
@@ -59,7 +59,11 @@ In addition to the observations, the cost and a termination and truncation boole
 [observation, cost, termination, truncation, info_dict]
 ```
 
-The info dictionary currently is empty.
+The info dictionary contains the following keys:
+
+*   **reference**: The set cart position and angle reference (i.e. the zero position and angle).
+*   **state\_of\_interest**: The state that should track the reference (SOI).
+*   **reference\_error**: The error between SOI and the reference.
 
 ## How to use
 

diff --git a/stable_gym/envs/classic_control/cartpole_cost/cartpole_cost.py b/stable_gym/envs/classic_control/cartpole_cost/cartpole_cost.py
@@ -396,14 +396,19 @@ def step(self, action):
 
         # Create observation and info dict.
         obs = np.array(self.state)
+        info_dict = dict(
+            reference=np.array([0.0, 0.0]),
+            state_of_interest=np.array([x, theta]),
+            reference_error=np.array([-x, -theta]),
+        )
 
         # NOTE: The original returns an empty info dict.
         return (
             obs,
             cost,
             terminated,
             False,
-            {},
+            info_dict,
         )
 
     def reset(self, seed=None, options=None, random=True):
@@ -467,15 +472,21 @@ def reset(self, seed=None, options=None, random=True):
         self.steps_beyond_terminated = None
         self.t = 0.0
 
-        # Create info dict and observation.
+        # Retrieve observation and info_dict.
         obs = np.array(self.state)
+        x, _, theta, _ = self.state
+        info_dict = dict(
+            reference=np.array([0.0, 0.0]),
+            state_of_interest=np.array([x, theta]),
+            reference_error=np.array([-x, -theta]),
+        )
 
         # Render environment reset if requested.
         if self.render_mode == "human":
             self.render()
 
         # NOTE: The original returns an empty info dict.
-        return obs, {}
+        return obs, info_dict
 
     def render(self):
         """Render one frame of the environment."""

diff --git a/stable_gym/envs/classic_control/cartpole_tracking_cost/cartpole_tracking_cost.py b/stable_gym/envs/classic_control/cartpole_tracking_cost/cartpole_tracking_cost.py
@@ -348,11 +348,10 @@ def cost(self, x, theta):
 
                 -   cost (float): The current cost.
                 -   r_1 (float): The current position reference.
-                -   r_2 (float): The cart_pole angle reference.
         """
         # TODO: Fine-tune cost function. The current one is a initial test.
-        ref = [self.reference(self.t), 0.0]
-        ref_cost = np.square(x - ref[0])
+        ref = self.reference(self.t)
+        ref_cost = np.square(x - ref)
         stab_cost = np.square(theta / self.theta_threshold_radians)
 
         cost = stab_cost + ref_cost
@@ -465,15 +464,15 @@ def step(self, action):
         obs = np.append(
             np.array(self.state),
             np.array(
-                [ref[0]]
+                [ref]
                 if self._exclude_reference_error_from_observation
-                else [ref[0], x - ref[0]]
+                else [ref, x - ref]
             ),
         )
         info_dict = dict(
-            reference=ref[0],
+            reference=ref,
             state_of_interest=x,
-            reference_error=x - ref[0],
+            reference_error=x - ref,
         )
 
         # NOTE: The original returns an empty info dict.
@@ -546,20 +545,20 @@ def reset(self, seed=None, options=None, random=True):
         self.steps_beyond_terminated = None
         self.t = 0.0
 
-        # Create info dict and observation.
+        # Retrieve observation and info_dict.
         x, _, theta, _ = self.state
         _, ref = self.cost(x, theta)
         info_dict = dict(
-            reference=ref[0],
+            reference=ref,
             state_of_interest=x,
-            reference_error=x - ref[0],
+            reference_error=x - ref,
         )
         obs = np.append(
             np.array(self.state),
             np.array(
-                [ref[0]]
+                [ref]
                 if self._exclude_reference_error_from_observation
-                else [ref[0], x - ref[0]]
+                else [ref, x - ref]
             ),
         )