rickstaa · rickstaa · Jan 27, 2024 · Jan 27, 2024
diff --git a/stable_gym/envs/classic_control/cartpole_cost/README.md b/stable_gym/envs/classic_control/cartpole_cost/README.md
@@ -25,7 +25,7 @@ An unactuated joint attaches a pole to a cart, which moves along a frictionless
 
 Additional modifications in our implementation:
 
-* An extra termination criterion for cumulative costs over `100` is added to hasten training.
+* Unlike the original environment's fixed cost threshold of `100`, this version allows users to adjust the maximum cost threshold improving training adaptability.
 * The gravity constant is adjusted back from `10` to the real-world value of `9.8`, aligning it closer with the original CartPole environment.
 
 These modifications were first described in [Han et al. 2019](https://arxiv.org/abs/2004.14288) and further adapted in our version for enhanced training and exploration.

diff --git a/stable_gym/envs/classic_control/cartpole_cost/cartpole_cost.py b/stable_gym/envs/classic_control/cartpole_cost/cartpole_cost.py
@@ -54,10 +54,11 @@ class CartPoleCost(gym.Env):
 
         Additional modifications in our implementation:
 
-            - An extra termination criterion for cumulative costs over ``100`` is added to
-              hasten training.
-            - The gravity constant is adjusted back from ``10`` to the real-world value of
-              ``9.8``, aligning it closer with the original CartPole environment.
+            - Unlike the original environment's fixed cost threshold of ``100``, this
+              version allows users to adjust the maximum cost threshold via the
+              :obj:`max_cost` input, improving training adaptability.
+            - The gravity constant is adjusted back from ``10`` to the real-world value
+              of ``9.8``, aligning it closer with the original CartPole environment.
 
     Observation:
         **Type**: Box(4) or Box(6)
@@ -238,7 +239,7 @@ def __init__(
         # Clip the reward.
         # NOTE: Original does not do this. Here this is done because we want to decrease
         # the cost.
-        self.reward_range = (0.0, 100.0)
+        self.reward_range = (0.0, max_cost)
 
         self.screen_width = 600
         self.screen_height = 400

diff --git a/stable_gym/envs/classic_control/cartpole_tracking_cost/README.md b/stable_gym/envs/classic_control/cartpole_tracking_cost/README.md
@@ -21,6 +21,7 @@ An unactuated joint attaches a pole to a cart, which moves along a frictionless
 
 Additional modifications in our implementation:
 
+* Unlike the original environment's fixed cost threshold of `100`, this version allows users to adjust the maximum cost threshold improving training adaptability.
 * An extra termination criterion for cumulative costs over `100` is added to hasten training.
 * The gravity constant is adjusted back from `10` to the real-world value of `9.8`, aligning it closer with the original CartPole environment.
 * The stabilization objective is replaced with a **reference tracking task** for enhanced control.

diff --git a/stable_gym/envs/classic_control/cartpole_tracking_cost/cartpole_tracking_cost.py b/stable_gym/envs/classic_control/cartpole_tracking_cost/cartpole_tracking_cost.py
@@ -47,10 +47,11 @@ class CartPoleTrackingCost(gym.Env):
 
         Additional modifications in our implementation:
 
-            - An extra termination criterion for cumulative costs over ``100`` is added to
-              hasten training.
-            - The gravity constant is adjusted back from ``10`` to the real-world value of
-              ``9.8``, aligning it closer with the original CartPole environment.
+            - Unlike the original environment's fixed cost threshold of ``100``, this
+              version allows users to adjust the maximum cost threshold via the
+              :obj:`max_cost` input, improving training adaptability.
+            - The gravity constant is adjusted back from ``10`` to the real-world value
+              of ``9.8``, aligning it closer with the original CartPole environment.
             - The stabilization objective is replaced with a **reference tracking task**
               for enhanced control.
             - Two additional observations are introduced, facilitating
@@ -275,7 +276,7 @@ def __init__(
         # Clip the reward.
         # NOTE: Original does not do this. Here this is done because we want to decrease
         # the cost.
-        self.reward_range = (0.0, 100.0)
+        self.reward_range = (0.0, max_cost)
 
         self.screen_width = 600
         self.screen_height = 400