diff --git a/docs/source/conf.py b/docs/source/conf.py index 699b8993..67c7010b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -92,6 +92,7 @@ extlinks = { "stable_gym": ("https://github.com/rickstaa/stable-gym/%s", None), "gymnasium": ("https://gymnasium.farama.org/%s", None), + "gymnasium-robotics": ("https://robotics.farama.org/%s", None), "stable_learning_control": ( "https://github.com/rickstaa/stable-learning-control/%s", None, diff --git a/docs/source/envs/envs.rst b/docs/source/envs/envs.rst index b0c41eb3..f0e51ac8 100644 --- a/docs/source/envs/envs.rst +++ b/docs/source/envs/envs.rst @@ -56,6 +56,11 @@ Environments that are based on the on `Mujoco`_ or `Mujoco gymnasium`_ environme Robotics environment -------------------- +.. toctree:: + :maxdepth: 1 + + ./robotics/fetch_reach_cost.rst + .. note:: The ROS robotics environments of the Stable Gym package were moved into a separate package called :ros_gazebo_gym:`Ros Gazebo Gym <>`. diff --git a/docs/source/envs/robotics/fetch_reach_cost.rst b/docs/source/envs/robotics/fetch_reach_cost.rst new file mode 100644 index 00000000..dc52981c --- /dev/null +++ b/docs/source/envs/robotics/fetch_reach_cost.rst @@ -0,0 +1,2 @@ +.. include:: ../../../../stable_gym/envs/robotics/fetch/fetch_reach_cost/README.md + :parser: myst_parser.sphinx_ diff --git a/examples/use_stable_gym.py b/examples/use_stable_gym.py index c04abbf0..668f1d49 100644 --- a/examples/use_stable_gym.py +++ b/examples/use_stable_gym.py @@ -6,6 +6,7 @@ ENV_NAME = "Oscillator-v1" # ENV_NAME = "CartPoleCost-v1" # ENV_NAME = "SwimmerCost-v1" +# ENV_NAME = "FetchReachCost-v1" if __name__ == "__main__": env = gym.make(ENV_NAME, render_mode="human") diff --git a/examples/use_stable_gym_vectorized.py b/examples/use_stable_gym_vectorized.py index 74999b37..5c030c7e 100644 --- a/examples/use_stable_gym_vectorized.py +++ b/examples/use_stable_gym_vectorized.py @@ -10,8 +10,9 @@ import stable_gym # noqa: F401 ENV_NAME = "Oscillator-v1" -# ENV_NAME = "Ex3EKF-v1" # ENV_NAME = "CartPoleCost-v1" +# ENV_NAME = "SwimmerCost-v1" +# ENV_NAME = "FetchReachCost-v1" if __name__ == "__main__": envs = gym.vector.make(ENV_NAME, render_mode="human", num_envs=3) diff --git a/package-lock.json b/package-lock.json index 120bc1ee..93f558ad 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "stable-gym", - "version": "0.10.0", + "version": "0.12.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "stable-gym", - "version": "0.10.0", + "version": "0.12.0", "license": "MIT", "devDependencies": { "@commitlint/cli": "17.6.5", @@ -15,7 +15,7 @@ "cz-conventional-changelog": "3.3.0", "husky": "8.0.3", "lint-staged": "13.2.2", - "release-please": "^15.11.1", + "release-please": "15.11.1", "remark": "14.0.3", "remark-cli": "11.0.0", "remark-lint": "9.1.2", diff --git a/pyproject.toml b/pyproject.toml index 435a1e07..4900900b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "gymnasium>=0.28.1", "gymnasium[classic_control]>=0.28.1", "gymnasium[mujoco]>=0.28.1", + "gymnasium-robotics>=1.2.2", "matplotlib>=3.7.1", "iteration_utilities>=0.11.0", "mujoco==2.3.3" # TODO: Remove when https://github.com/Farama-Foundation/Gymnasium/issues/597 is resolved. diff --git a/stable_gym/__init__.py b/stable_gym/__init__.py index 55a74310..d8c3951f 100644 --- a/stable_gym/__init__.py +++ b/stable_gym/__init__.py @@ -58,6 +58,11 @@ "max_step": 250, "reward_threshold": 300, }, + "FetchReachCost-v1": { + "module": "stable_gym.envs.robotics.fetch.fetch_reach_cost.fetch_reach_cost:FetchReachCost", + "max_step": 50, + "reward_threshold": 300, + }, } for env, val in ENVS.items(): diff --git a/stable_gym/envs/biological/oscillator/oscillator.py b/stable_gym/envs/biological/oscillator/oscillator.py index 2dea074a..c11b4b16 100644 --- a/stable_gym/envs/biological/oscillator/oscillator.py +++ b/stable_gym/envs/biological/oscillator/oscillator.py @@ -112,7 +112,7 @@ class Oscillator(gym.Env, OscillatorDisturber): Attributes: state (numpy.ndarray): The current system state. t (float): The current time step. - dt (float): The environment step size. + dt (float): The environment step size. Also available as :attr:`.tau`. sigma (float): The variance of the system noise. """ # noqa: E501 diff --git a/stable_gym/envs/biological/oscillator_complicated/oscillator_complicated.py b/stable_gym/envs/biological/oscillator_complicated/oscillator_complicated.py index ff7e00b2..9cdd9e1d 100644 --- a/stable_gym/envs/biological/oscillator_complicated/oscillator_complicated.py +++ b/stable_gym/envs/biological/oscillator_complicated/oscillator_complicated.py @@ -128,7 +128,7 @@ class is based on the :class:`~stable_gym.envs.biological.oscillator.oscillator. Attributes: state (numpy.ndarray): The current system state. t (float): The current time step. - dt (float): The environment step size. + dt (float): The environment step size. Also available as :attr:`.tau`. sigma (float): The variance of the system noise. """ # noqa: E501 diff --git a/stable_gym/envs/classic_control/cartpole_cost/cartpole_cost.py b/stable_gym/envs/classic_control/cartpole_cost/cartpole_cost.py index 583c6d5e..e4e298ea 100644 --- a/stable_gym/envs/classic_control/cartpole_cost/cartpole_cost.py +++ b/stable_gym/envs/classic_control/cartpole_cost/cartpole_cost.py @@ -477,7 +477,7 @@ def step(self, action): "You are calling 'step()' even though this " "environment has already returned terminated = True. You " "should always call 'reset()' once you receive 'terminated = " - "True' -- any further steps are undefined behavior." + "True' -- any further steps are undefined behaviour." ) self.steps_beyond_terminated += 1 @@ -607,7 +607,7 @@ def render(self): """Render one frame of the environment.""" if self.render_mode is None: assert self.spec is not None - gym.logger.warn( + logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")' diff --git a/stable_gym/envs/classic_control/ex3_ekf/ex3_ekf.py b/stable_gym/envs/classic_control/ex3_ekf/ex3_ekf.py index 17c57d68..4b80dc14 100644 --- a/stable_gym/envs/classic_control/ex3_ekf/ex3_ekf.py +++ b/stable_gym/envs/classic_control/ex3_ekf/ex3_ekf.py @@ -73,7 +73,7 @@ class Ex3EKF(gym.Env, Ex3EKFDisturber): Attributes: state (numpy.ndarray): The current system state. t (float): The current time step. - dt (float): The environment step size. + dt (float): The environment step size. Also available as :attr:`.tau`. sigma (float): The variance of the system noise. """ # noqa: E501, W605 diff --git a/stable_gym/envs/mujoco/ant_cost/ant_cost.py b/stable_gym/envs/mujoco/ant_cost/ant_cost.py index 78c4f040..86e34ecd 100644 --- a/stable_gym/envs/mujoco/ant_cost/ant_cost.py +++ b/stable_gym/envs/mujoco/ant_cost/ant_cost.py @@ -55,8 +55,7 @@ class AntCost(AntEnv, utils.EzPickle): Attributes: state (numpy.ndarray): The current system state. - t (float): The current time step. - dt (float): The environment step size. + dt (float): The environment step size. Also available as :attr:`.tau`. reference_forward_velocity (float): The forward velocity that the agent should try to track. """ # noqa: E501, W605 diff --git a/stable_gym/envs/mujoco/half_cheetah_cost/half_cheetah_cost.py b/stable_gym/envs/mujoco/half_cheetah_cost/half_cheetah_cost.py index 12cd715b..7534bce2 100644 --- a/stable_gym/envs/mujoco/half_cheetah_cost/half_cheetah_cost.py +++ b/stable_gym/envs/mujoco/half_cheetah_cost/half_cheetah_cost.py @@ -55,8 +55,7 @@ class HalfCheetahCost(HalfCheetahEnv, utils.EzPickle): Attributes: state (numpy.ndarray): The current system state. - t (float): The current time step. - dt (float): The environment step size. + dt (float): The environment step size. Also available as :attr:`.tau`. reference_forward_velocity (float): The forward velocity that the agent should try to track. """ # noqa: E501, W605 diff --git a/stable_gym/envs/mujoco/hopper_cost/hopper_cost.py b/stable_gym/envs/mujoco/hopper_cost/hopper_cost.py index 93817528..7968f5f4 100644 --- a/stable_gym/envs/mujoco/hopper_cost/hopper_cost.py +++ b/stable_gym/envs/mujoco/hopper_cost/hopper_cost.py @@ -54,8 +54,7 @@ class HopperCost(HopperEnv, utils.EzPickle): Attributes: state (numpy.ndarray): The current system state. - t (float): The current time step. - dt (float): The environment step size. + dt (float): The environment step size. Also available as :attr:`.tau`. reference_forward_velocity (float): The forward velocity that the agent should try to track. """ # noqa: E501, W605 diff --git a/stable_gym/envs/mujoco/humanoid_cost/humanoid_cost.py b/stable_gym/envs/mujoco/humanoid_cost/humanoid_cost.py index bed7da2b..262280c0 100644 --- a/stable_gym/envs/mujoco/humanoid_cost/humanoid_cost.py +++ b/stable_gym/envs/mujoco/humanoid_cost/humanoid_cost.py @@ -19,7 +19,7 @@ class HumanoidCost(HumanoidEnv, utils.EzPickle): .. note:: Can also be used in a vectorized manner. See the - :gymnasium:`gym.vector `w documentation. + :gymnasium:`gym.vector ` documentation. Source: This is a modified version of the Humanoid Mujoco environment in v0.28.1 of the @@ -55,8 +55,7 @@ class HumanoidCost(HumanoidEnv, utils.EzPickle): Attributes: state (numpy.ndarray): The current system state. - t (float): The current time step. - dt (float): The environment step size. + dt (float): The environment step size. Also available as :attr:`.tau`. reference_forward_velocity (float): The forward velocity that the agent should try to track. """ # noqa: E501, W605 diff --git a/stable_gym/envs/mujoco/swimmer_cost/swimmer_cost.py b/stable_gym/envs/mujoco/swimmer_cost/swimmer_cost.py index 72d20010..f556153e 100644 --- a/stable_gym/envs/mujoco/swimmer_cost/swimmer_cost.py +++ b/stable_gym/envs/mujoco/swimmer_cost/swimmer_cost.py @@ -54,8 +54,7 @@ class SwimmerCost(SwimmerEnv, utils.EzPickle): Attributes: state (numpy.ndarray): The current system state. - t (float): The current time step. - dt (float): The environment step size. + dt (float): The environment step size. Also available as :attr:`.tau`. reference_forward_velocity (float): The forward velocity that the agent should try to track. """ # noqa: E501, W605 diff --git a/stable_gym/envs/mujoco/walker2d_cost/walker2d_cost.py b/stable_gym/envs/mujoco/walker2d_cost/walker2d_cost.py index 0de7c6a6..4e4e9511 100644 --- a/stable_gym/envs/mujoco/walker2d_cost/walker2d_cost.py +++ b/stable_gym/envs/mujoco/walker2d_cost/walker2d_cost.py @@ -54,8 +54,7 @@ class Walker2dCost(Walker2dEnv, utils.EzPickle): Attributes: state (numpy.ndarray): The current system state. - t (float): The current time step. - dt (float): The environment step size. + dt (float): The environment step size. Also available as :attr:`.tau`. reference_forward_velocity (float): The forward velocity that the agent should try to track. """ # noqa: E501, W605 diff --git a/stable_gym/envs/robotics/__init__.py b/stable_gym/envs/robotics/__init__.py new file mode 100644 index 00000000..687cf6ff --- /dev/null +++ b/stable_gym/envs/robotics/__init__.py @@ -0,0 +1,17 @@ +"""Stable Gym gymnasium environments that are based on the environments found in the +:gymnasium-robotics:`Gymnasium Robotics <>` package. + +.. note:: + + Some of these environments are based on the :class:`gym.GoalEnv` class. This means + that the ``step`` method returns a dictionary with the following keys: + + - ``observation``: The observation of the environment. + - ``achieved_goal``: The goal that was achieved during execution. + - ``desired_goal``: The desired goal that we asked the agent to attempt to achieve. + + If you want to use these environments with RL algorithms that expect the ``step`` + method to return a :obj:`np.ndarray` instead of a dictionary, you can use the + :class:`gym.wrappers.FlattenObservation` wrapper to flatten the dictionary into a + single :obj:`np.ndarray`. +""" diff --git a/stable_gym/envs/robotics/fetch/README.md b/stable_gym/envs/robotics/fetch/README.md new file mode 100644 index 00000000..e8338568 --- /dev/null +++ b/stable_gym/envs/robotics/fetch/README.md @@ -0,0 +1,5 @@ +# FetchCost gymnasium environments + +The [stable-gym package](https://github.com/rickstaa/stable-gym) contains modified versions of the [fetch environments](https://robotics.farama.org/envs/fetch/) found in the [gymnasium robotics package](https://robotics.farama.org). These environments are different because they return a (positive) cost instead of a (negative) reward, making them compatible with stable RL algorithms. Please check the [gymnasium robotics](https://robotics.farama.org/env/fetch) package for more information about these environments. The [stable-gym package](https://github.com/rickstaa/stable-gym) currently contains the following FetchCost environments: + +* [FetchReachCost-v1](https://github.com/rickstaa/stable-gym/stable_gym/envs/robotics/fetch/fetch_reach_cost/README.md): Fetch has to move its end-effector to the desired goal position. diff --git a/stable_gym/envs/robotics/fetch/__init__.py b/stable_gym/envs/robotics/fetch/__init__.py new file mode 100644 index 00000000..9e5433cb --- /dev/null +++ b/stable_gym/envs/robotics/fetch/__init__.py @@ -0,0 +1,21 @@ +"""Stable Gym gymnasium environments that are based on the +:gymnasium-robotics:`Fetch environments ` in the +:gymnasium-robotics:`Gymnasium Robotics <>` package. + +.. note:: + + These environments are based on the :class:`gym.GoalEnv` class. This means + that the ``step`` method returns a dictionary with the following keys: + + - ``observation``: The observation of the environment. + - ``achieved_goal``: The goal that was achieved during execution. + - ``desired_goal``: The desired goal that we asked the agent to attempt to achieve. + + If you want to use these environments with RL algorithms that expect the ``step`` + method to return a :obj:`np.ndarray` instead of a dictionary, you can use the + :class:`gym.wrappers.FlattenObservation` wrapper to flatten the dictionary into a + single :obj:`np.ndarray`. +""" +from stable_gym.envs.robotics.fetch.fetch_reach_cost.fetch_reach_cost import ( + FetchReachCost, +) diff --git a/stable_gym/envs/robotics/fetch/fetch_reach_cost/README.md b/stable_gym/envs/robotics/fetch/fetch_reach_cost/README.md new file mode 100644 index 00000000..e4c23548 --- /dev/null +++ b/stable_gym/envs/robotics/fetch/fetch_reach_cost/README.md @@ -0,0 +1,30 @@ +# FetchReachCost gymnasium environment + +
+ Fetch Reach Cost environment +
+
+ +An actuated 7-DOF [Fetch Mobile manipulator](https://fetchrobotics.com/). This environment corresponds to the [FetchReach-v2](https://robotics.farama.org/envs/fetch/reach/) environment included in the [gymnasium robotics package](https://robotics.farama.org/). It is different in the fact that: + +* The reward was replaced with a cost. This was done by taking the absolute value of the reward. + +The rest of the environment is the same as the original FetchReach environment. Below, the modified cost is described. For more information about the environment (e.g. observation space, action space, episode termination, etc.), please refer to the [gymnasium robotics library](https://robotics.farama.org/envs/fetch/reach/). + +## Cost function + +The cost function of this environment is designed in such a way that it tries to minimize the error between FetchReach's end-effector position and the desired goal position. It is defined as the Euclidean distance between the achieved goal position and the desired goal: + +$$ +cost = -reward_{original} +cost = \left \| p - p_{goal} \right \| +$$ + +Where: + +* $p$ is the achieved goal position (i.e. the end-effector positio in Cartesian space). +* $p_{goal}$ is the desired goal position in Cartesian space. + +## How to use + +This environment is part of the [Stable Gym package](https://github.com/rickstaa/stable-gym). It is therefore registered as the `stable_gym:FetchReachCost-v1` gymnasium environment when you import the Stable Gym package. If you want to use the environment in stand-alone mode, you can register it yourself. diff --git a/stable_gym/envs/robotics/fetch/fetch_reach_cost/__init__.py b/stable_gym/envs/robotics/fetch/fetch_reach_cost/__init__.py new file mode 100644 index 00000000..6da01471 --- /dev/null +++ b/stable_gym/envs/robotics/fetch/fetch_reach_cost/__init__.py @@ -0,0 +1,11 @@ +"""Modified version of the FetchReach Mujoco environment in v1.2.2 of the +`Gymnasium Robotics library `_. +This modification was first described by `Han et al. 2020 `_. +In this modified version: + +- The reward was replaced with a cost. This was done by taking the absolute value of + the reward. +""" # noqa: E501 +from stable_gym.envs.robotics.fetch.fetch_reach_cost.fetch_reach_cost import ( + FetchReachCost, +) diff --git a/stable_gym/envs/robotics/fetch/fetch_reach_cost/fetch_reach_cost.py b/stable_gym/envs/robotics/fetch/fetch_reach_cost/fetch_reach_cost.py new file mode 100644 index 00000000..4b0a8541 --- /dev/null +++ b/stable_gym/envs/robotics/fetch/fetch_reach_cost/fetch_reach_cost.py @@ -0,0 +1,208 @@ +"""The FetchReachCost gymnasium environment.""" + +import gymnasium as gym +import matplotlib.pyplot as plt +import numpy as np +from gymnasium import utils +from gymnasium_robotics.envs.fetch.reach import MujocoFetchReachEnv + +import stable_gym # NOTE: Required to register environments. # noqa: F401 + +EPISODES = 10 # Number of env episodes to run when __main__ is called. +RANDOM_STEP = True # Use random action in __main__. Zero action otherwise. + + +# TODO: Update solving criteria after training. +class FetchReachCost(MujocoFetchReachEnv, utils.EzPickle): + """Custom FetchReach gymnasium robotics environment. + + .. note:: + Can also be used in a vectorized manner. See the + :gymnasium:`gym.vector ` documentation. + + Source: + Modified version of the FetchReach Mujoco environment in v1.2.2 of the + `Gymnasium Robotics library `_. + This modification was first described by + `Han et al. 2020 `_. In this modified version: + + - The reward was replaced with a cost. This was done by taking the absolute + value of the reward. + + The rest of the environment is the same as the original FetchReach environment. + Below, the modified cost is described. For more information about the + environment (e.g. observation space, action space, episode termination, etc.), + please refer to the + :gymnasium-robotics:`gymnasium robotics library `. + + Modified cost: + .. math:: + + cost = \abs{reward} + + Solved Requirements: + Considered solved when the average cost is less than or equal to 50 over + 100 consecutive trials. + + How to use: + .. code-block:: python + + import stable_gyms + import gymnasium as gym + env = gym.make("FetchReachCost-v1") + + Attributes: + state (numpy.ndarray): The current system state. + dt (float): The environment step size. Also available as :attr:`.tau`. + """ + + def __init__( + self, + **kwargs, + ): + """Construts all necessary attributes for the FetchReachCost instance.""" + assert "reward_type" not in kwargs, ( + "'reward_type' should not be passed to the 'FetchReachCost' environment as " + "only 'dense' rewards are supported." + ) + self.state = None + + # Initialize the FetchReachEnv class. + super().__init__( + reward_type="dense", # NOTE: DONT CHANGE! This is required for the cost. + **kwargs, + ) + + # Reinitialize the EzPickle class. + # NOTE: Done to ensure the args of the FetchReachCost class are also pickled. + # NOTE: Ensure that all args are passed to the EzPickle class! + utils.EzPickle.__init__( + self, + **kwargs, + ) + + def cost(self, reward): + """Calculate the cost. + + Args: + reward (float): The reward returned from the FetchReach environment. + + Returns: + float: The cost (i.e. negated reward). + """ + return np.abs(reward) + + def step(self, action): + """Take step into the environment. + + .. note:: + This method overrides the + :meth:`~gymnasium_robotics.envs.fetch.fetch_env.MujocoFetchEnv.step` method + such that the new cost function is used. + + Args: + action (np.ndarray): Action to take in the environment. + + Returns: + (tuple): tuple containing: + + - obs (:obj:`np.ndarray`): Environment observation. + - cost (:obj:`float`): Cost of the action. + - terminated (:obj`bool`): Whether the episode is terminated. + - truncated (:obj:`bool`): Whether the episode was truncated. This value + is set by wrappers when for example a time limit is reached or the + agent goes out of bounds. + - info (:obj`dict`): Additional information about the environment. + """ + obs, reward, terminated, truncated, info = super().step(action) + + self.state = obs + + return obs, self.cost(reward), terminated, truncated, info + + def reset(self, seed=None, options=None): + """Reset gymnasium environment. + + Args: + seed (int, optional): A random seed for the environment. By default + ``None``. + options (dict, optional): A dictionary containing additional options for + resetting the environment. By default ``None``. Not used in this + environment. + + Returns: + (tuple): tuple containing: + + - observation (:obj:`numpy.ndarray`): Array containing the current + observation. + - info (:obj:`dict`): Dictionary containing additional information. + """ + observation, info = super().reset(seed=seed, options=options) + + self.state = np.concatenate( + [ + observation["observation"], + observation["desired_goal"], + ] + ) + + return observation, info + + @property + def tau(self): + """Alias for the environment step size. Done for compatibility with the + other gymnasium environments. + """ + return self.dt + + @property + def t(self): + """Make simulation time available as a property.""" + return self.unwrapped.data.time + + +if __name__ == "__main__": + print("Setting up 'FetchReachCost' environment.") + env = gym.make("FetchReachCost", render_mode="human") + + # Run episodes. + episode = 0 + path, paths = [], [] + s, _ = env.reset() + path.append(s) + print(f"\nPerforming '{EPISODES}' in the 'FetchReachCost' environment...\n") + print(f"Episode: {episode}") + while episode <= EPISODES: + action = ( + env.action_space.sample() + if RANDOM_STEP + else np.zeros(env.action_space.shape) + ) + s, r, terminated, truncated, _ = env.step(action) + path.append(s) + if terminated or truncated: + paths.append(path) + episode += 1 + path, reference = [], [] + s, _ = env.reset() + path.append(s) + print(f"Episode: {episode}") + print("\nFinished 'FetchReachCost' environment simulation.") + + # Plot results per episode. + print("\nPlotting episode data...") + for i in range(len(paths)): + path = paths[i] + fig, ax = plt.subplots() + print(f"\nEpisode: {i}") + path = np.array(path) + t = np.linspace(0, path.shape[0] * env.dt, path.shape[0]) + for j in range(path.shape[1]): # NOTE: Change if you want to plot less states. + ax.plot(t, path[:, j], label=f"State {j}") + ax.set_xlabel("Time (s)") + ax.set_title(f"FetchReachCost episode '{i}'") + ax.legend() + print("Close plot to see next episode...") + plt.show() + + print("\nDone") diff --git a/stable_gym/envs/robotics/fetch/fetch_reach_cost/requirements.txt b/stable_gym/envs/robotics/fetch/fetch_reach_cost/requirements.txt new file mode 100644 index 00000000..adbb9a07 --- /dev/null +++ b/stable_gym/envs/robotics/fetch/fetch_reach_cost/requirements.txt @@ -0,0 +1,2 @@ +gymnasium-robotics==1.2.2 +matplotlib==3.7.0