feat: add 'FetchReachCost' environment (#204)

This commit adds a modified version of the [FetchReach environment](https://robotics.farama.org/envs/fetch/reach/) found in the [gymnasium robotics](https://robotics.farama.org) package. In this modified version, the cost was replaced by a reward. This cost is the Euclidean distance between the achieved goal position and the desired goal:
rickstaa · Jul 13, 2023 · 69d15e7 · 69d15e7
1 parent 4d84df7
commit 69d15e7
Show file tree

Hide file tree

Showing 25 changed files with 326 additions and 22 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -92,6 +92,7 @@
 extlinks = {
     "stable_gym": ("https://github.com/rickstaa/stable-gym/%s", None),
     "gymnasium": ("https://gymnasium.farama.org/%s", None),
+    "gymnasium-robotics": ("https://robotics.farama.org/%s", None),
     "stable_learning_control": (
         "https://github.com/rickstaa/stable-learning-control/%s",
         None,

diff --git a/docs/source/envs/envs.rst b/docs/source/envs/envs.rst
@@ -56,6 +56,11 @@ Environments that are based on the on `Mujoco`_ or `Mujoco gymnasium`_ environme
 Robotics environment
 --------------------
 
+.. toctree::
+    :maxdepth: 1
+
+    ./robotics/fetch_reach_cost.rst
+
 .. note::
 
     The ROS robotics environments of the Stable Gym package were moved into a separate package called :ros_gazebo_gym:`Ros Gazebo Gym <>`.
diff --git a/docs/source/envs/robotics/fetch_reach_cost.rst b/docs/source/envs/robotics/fetch_reach_cost.rst
@@ -0,0 +1,2 @@
+.. include:: ../../../../stable_gym/envs/robotics/fetch/fetch_reach_cost/README.md
+    :parser: myst_parser.sphinx_
diff --git a/examples/use_stable_gym.py b/examples/use_stable_gym.py
@@ -6,6 +6,7 @@
 ENV_NAME = "Oscillator-v1"
 # ENV_NAME = "CartPoleCost-v1"
 # ENV_NAME = "SwimmerCost-v1"
+# ENV_NAME = "FetchReachCost-v1"
 
 if __name__ == "__main__":
     env = gym.make(ENV_NAME, render_mode="human")

diff --git a/examples/use_stable_gym_vectorized.py b/examples/use_stable_gym_vectorized.py
@@ -10,8 +10,9 @@
 import stable_gym  # noqa: F401
 
 ENV_NAME = "Oscillator-v1"
-# ENV_NAME = "Ex3EKF-v1"
 # ENV_NAME = "CartPoleCost-v1"
+# ENV_NAME = "SwimmerCost-v1"
+# ENV_NAME = "FetchReachCost-v1"
 
 if __name__ == "__main__":
     envs = gym.vector.make(ENV_NAME, render_mode="human", num_envs=3)

diff --git a/package-lock.json b/package-lock.json
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,6 +30,7 @@ dependencies = [
     "gymnasium>=0.28.1",
     "gymnasium[classic_control]>=0.28.1",
     "gymnasium[mujoco]>=0.28.1",
+    "gymnasium-robotics>=1.2.2",
     "matplotlib>=3.7.1",
     "iteration_utilities>=0.11.0",
     "mujoco==2.3.3" # TODO: Remove when https://github.com/Farama-Foundation/Gymnasium/issues/597 is resolved.

diff --git a/stable_gym/__init__.py b/stable_gym/__init__.py
@@ -58,6 +58,11 @@
         "max_step": 250,
         "reward_threshold": 300,
     },
+    "FetchReachCost-v1": {
+        "module": "stable_gym.envs.robotics.fetch.fetch_reach_cost.fetch_reach_cost:FetchReachCost",
+        "max_step": 50,
+        "reward_threshold": 300,
+    },
 }
 
 for env, val in ENVS.items():

diff --git a/stable_gym/envs/biological/oscillator/oscillator.py b/stable_gym/envs/biological/oscillator/oscillator.py
@@ -112,7 +112,7 @@ class Oscillator(gym.Env, OscillatorDisturber):
     Attributes:
         state (numpy.ndarray): The current system state.
         t (float): The current time step.
-        dt (float): The environment step size.
+        dt (float): The environment step size. Also available as :attr:`.tau`.
         sigma (float): The variance of the system noise.
     """  # noqa: E501
 

diff --git a/stable_gym/envs/biological/oscillator_complicated/oscillator_complicated.py b/stable_gym/envs/biological/oscillator_complicated/oscillator_complicated.py
@@ -128,7 +128,7 @@ class is based on the :class:`~stable_gym.envs.biological.oscillator.oscillator.
     Attributes:
         state (numpy.ndarray): The current system state.
         t (float): The current time step.
-        dt (float): The environment step size.
+        dt (float): The environment step size. Also available as :attr:`.tau`.
         sigma (float): The variance of the system noise.
     """  # noqa: E501
 

diff --git a/stable_gym/envs/classic_control/cartpole_cost/cartpole_cost.py b/stable_gym/envs/classic_control/cartpole_cost/cartpole_cost.py
@@ -477,7 +477,7 @@ def step(self, action):
                         "You are calling 'step()' even though this "
                         "environment has already returned terminated = True. You "
                         "should always call 'reset()' once you receive 'terminated = "
-                        "True' -- any further steps are undefined behavior."
+                        "True' -- any further steps are undefined behaviour."
                     )
                 self.steps_beyond_terminated += 1
 
@@ -607,7 +607,7 @@ def render(self):
         """Render one frame of the environment."""
         if self.render_mode is None:
             assert self.spec is not None
-            gym.logger.warn(
+            logger.warn(
                 "You are calling render method without specifying any render mode. "
                 "You can specify the render_mode at initialization, "
                 f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'

diff --git a/stable_gym/envs/classic_control/ex3_ekf/ex3_ekf.py b/stable_gym/envs/classic_control/ex3_ekf/ex3_ekf.py
@@ -73,7 +73,7 @@ class Ex3EKF(gym.Env, Ex3EKFDisturber):
     Attributes:
         state (numpy.ndarray): The current system state.
         t (float): The current time step.
-        dt (float): The environment step size.
+        dt (float): The environment step size. Also available as :attr:`.tau`.
         sigma (float): The variance of the system noise.
     """  # noqa: E501, W605
 

diff --git a/stable_gym/envs/mujoco/ant_cost/ant_cost.py b/stable_gym/envs/mujoco/ant_cost/ant_cost.py
@@ -55,8 +55,7 @@ class AntCost(AntEnv, utils.EzPickle):
 
     Attributes:
         state (numpy.ndarray): The current system state.
-        t (float): The current time step.
-        dt (float): The environment step size.
+        dt (float): The environment step size. Also available as :attr:`.tau`.
         reference_forward_velocity (float): The forward velocity that the agent should
             try to track.
     """  # noqa: E501, W605

diff --git a/stable_gym/envs/mujoco/half_cheetah_cost/half_cheetah_cost.py b/stable_gym/envs/mujoco/half_cheetah_cost/half_cheetah_cost.py
@@ -55,8 +55,7 @@ class HalfCheetahCost(HalfCheetahEnv, utils.EzPickle):
 
     Attributes:
         state (numpy.ndarray): The current system state.
-        t (float): The current time step.
-        dt (float): The environment step size.
+        dt (float): The environment step size. Also available as :attr:`.tau`.
         reference_forward_velocity (float): The forward velocity that the agent should
             try to track.
     """  # noqa: E501, W605

diff --git a/stable_gym/envs/mujoco/hopper_cost/hopper_cost.py b/stable_gym/envs/mujoco/hopper_cost/hopper_cost.py
@@ -54,8 +54,7 @@ class HopperCost(HopperEnv, utils.EzPickle):
 
     Attributes:
         state (numpy.ndarray): The current system state.
-        t (float): The current time step.
-        dt (float): The environment step size.
+        dt (float): The environment step size. Also available as :attr:`.tau`.
         reference_forward_velocity (float): The forward velocity that the agent should
             try to track.
     """  # noqa: E501, W605

diff --git a/stable_gym/envs/mujoco/humanoid_cost/humanoid_cost.py b/stable_gym/envs/mujoco/humanoid_cost/humanoid_cost.py
@@ -19,7 +19,7 @@ class HumanoidCost(HumanoidEnv, utils.EzPickle):
 
     .. note::
         Can also be used in a vectorized manner. See the
-        :gymnasium:`gym.vector <api/vector>`w documentation.
+        :gymnasium:`gym.vector <api/vector>` documentation.
 
     Source:
         This is a modified version of the Humanoid Mujoco environment in v0.28.1 of the
@@ -55,8 +55,7 @@ class HumanoidCost(HumanoidEnv, utils.EzPickle):
 
     Attributes:
         state (numpy.ndarray): The current system state.
-        t (float): The current time step.
-        dt (float): The environment step size.
+        dt (float): The environment step size. Also available as :attr:`.tau`.
         reference_forward_velocity (float): The forward velocity that the agent should
             try to track.
     """  # noqa: E501, W605

diff --git a/stable_gym/envs/mujoco/swimmer_cost/swimmer_cost.py b/stable_gym/envs/mujoco/swimmer_cost/swimmer_cost.py
@@ -54,8 +54,7 @@ class SwimmerCost(SwimmerEnv, utils.EzPickle):
 
     Attributes:
         state (numpy.ndarray): The current system state.
-        t (float): The current time step.
-        dt (float): The environment step size.
+        dt (float): The environment step size.  Also available as :attr:`.tau`.
         reference_forward_velocity (float): The forward velocity that the agent should
             try to track.
     """  # noqa: E501, W605

diff --git a/stable_gym/envs/mujoco/walker2d_cost/walker2d_cost.py b/stable_gym/envs/mujoco/walker2d_cost/walker2d_cost.py
@@ -54,8 +54,7 @@ class Walker2dCost(Walker2dEnv, utils.EzPickle):
 
     Attributes:
         state (numpy.ndarray): The current system state.
-        t (float): The current time step.
-        dt (float): The environment step size.
+        dt (float): The environment step size.  Also available as :attr:`.tau`.
         reference_forward_velocity (float): The forward velocity that the agent should
             try to track.
     """  # noqa: E501, W605

diff --git a/stable_gym/envs/robotics/__init__.py b/stable_gym/envs/robotics/__init__.py
@@ -0,0 +1,17 @@
+"""Stable Gym gymnasium environments that are based on the environments found in the
+:gymnasium-robotics:`Gymnasium Robotics <>` package.
+
+.. note::
+
+    Some of these environments are based on the :class:`gym.GoalEnv` class. This means
+    that the ``step`` method returns a dictionary with the following keys:
+
+    -   ``observation``: The observation of the environment.
+    -   ``achieved_goal``: The goal that was achieved during execution.
+    -   ``desired_goal``: The desired goal that we asked the agent to attempt to achieve.
+
+    If you want to use these environments with RL algorithms that expect the ``step``
+    method to return a :obj:`np.ndarray` instead of a dictionary, you can use the
+    :class:`gym.wrappers.FlattenObservation` wrapper to flatten the dictionary into a
+    single :obj:`np.ndarray`.
+"""
diff --git a/stable_gym/envs/robotics/fetch/README.md b/stable_gym/envs/robotics/fetch/README.md
@@ -0,0 +1,5 @@
+# FetchCost gymnasium environments
+
+The [stable-gym package](https://github.com/rickstaa/stable-gym) contains modified versions of the [fetch environments](https://robotics.farama.org/envs/fetch/) found in the [gymnasium robotics package](https://robotics.farama.org). These environments are different because they return a (positive) cost instead of a (negative) reward, making them compatible with stable RL algorithms. Please check the [gymnasium robotics](https://robotics.farama.org/env/fetch) package for more information about these environments. The [stable-gym package](https://github.com/rickstaa/stable-gym) currently contains the following FetchCost environments:
+
+*   [FetchReachCost-v1](https://github.com/rickstaa/stable-gym/stable_gym/envs/robotics/fetch/fetch_reach_cost/README.md): Fetch has to move its end-effector to the desired goal position.
diff --git a/stable_gym/envs/robotics/fetch/__init__.py b/stable_gym/envs/robotics/fetch/__init__.py
@@ -0,0 +1,21 @@
+"""Stable Gym gymnasium environments that are based on the
+:gymnasium-robotics:`Fetch environments <envs/fetch/>` in the
+:gymnasium-robotics:`Gymnasium Robotics <>` package.
+
+.. note::
+
+    These environments are based on the :class:`gym.GoalEnv` class. This means
+    that the ``step`` method returns a dictionary with the following keys:
+
+        - ``observation``: The observation of the environment.
+        - ``achieved_goal``: The goal that was achieved during execution.
+        - ``desired_goal``: The desired goal that we asked the agent to attempt to achieve.
+
+    If you want to use these environments with RL algorithms that expect the ``step``
+    method to return a :obj:`np.ndarray` instead of a dictionary, you can use the
+    :class:`gym.wrappers.FlattenObservation` wrapper to flatten the dictionary into a
+    single :obj:`np.ndarray`.
+"""
+from stable_gym.envs.robotics.fetch.fetch_reach_cost.fetch_reach_cost import (
+    FetchReachCost,
+)
diff --git a/stable_gym/envs/robotics/fetch/fetch_reach_cost/README.md b/stable_gym/envs/robotics/fetch/fetch_reach_cost/README.md
@@ -0,0 +1,30 @@
+# FetchReachCost gymnasium environment
+
+<div align="center">
+    <img src="https://github.com/rickstaa/stable-gym/assets/17570430/d395ee04-a0e2-4320-9bd2-f248c207bf06" alt="Fetch Reach Cost environment" width="200px">
+</div>
+</br>
+
+An actuated 7-DOF [Fetch Mobile manipulator](https://fetchrobotics.com/). This environment corresponds to the [FetchReach-v2](https://robotics.farama.org/envs/fetch/reach/) environment included in the [gymnasium robotics package](https://robotics.farama.org/). It is different in the fact that:
+
+*   The reward was replaced with a cost. This was done by taking the absolute value of the reward.
+
+The rest of the environment is the same as the original FetchReach environment. Below, the modified cost is described. For more information about the environment (e.g. observation space, action space, episode termination, etc.), please refer to the [gymnasium robotics library](https://robotics.farama.org/envs/fetch/reach/).
+
+## Cost function
+
+The cost function of this environment is designed in such a way that it tries to minimize the error between FetchReach's end-effector position and the desired goal position. It is defined as the Euclidean distance between the achieved goal position and the desired goal:
+
+$$
+cost = -reward_{original}
+cost = \left \| p - p_{goal} \right \|
+$$
+
+Where:
+
+*   $p$ is the achieved goal position (i.e. the end-effector positio in Cartesian space).
+*   $p_{goal}$ is the desired goal position in Cartesian space.
+
+## How to use
+
+This environment is part of the [Stable Gym package](https://github.com/rickstaa/stable-gym). It is therefore registered as the `stable_gym:FetchReachCost-v1` gymnasium environment when you import the Stable Gym package. If you want to use the environment in stand-alone mode, you can register it yourself.
diff --git a/stable_gym/envs/robotics/fetch/fetch_reach_cost/__init__.py b/stable_gym/envs/robotics/fetch/fetch_reach_cost/__init__.py
@@ -0,0 +1,11 @@
+"""Modified version of the FetchReach Mujoco environment in v1.2.2 of the
+`Gymnasium Robotics library <https://robotics.farama.org/envs/fetch/>`_.
+This modification was first described by `Han et al. 2020 <https://arxiv.org/abs/2004.14288>`_.
+In this modified version:
+
+-   The reward was replaced with a cost. This was done by taking the absolute value of
+    the reward.
+"""  # noqa: E501
+from stable_gym.envs.robotics.fetch.fetch_reach_cost.fetch_reach_cost import (
+    FetchReachCost,
+)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		.. include:: ../../../../stable_gym/envs/robotics/fetch/fetch_reach_cost/README.md
		:parser: myst_parser.sphinx_