Farama-Foundation · pseudo-rnd-thoughts · May 31, 2023 · Mar 27, 2023 · Apr 6, 2023 · Apr 14, 2023
diff --git a/minigrid/wrappers.py b/minigrid/wrappers.py
@@ -8,7 +8,7 @@
 import gymnasium as gym
 import numpy as np
 from gymnasium import logger, spaces
-from gymnasium.core import ObservationWrapper, ObsType, Wrapper
+from gymnasium.core import ActionWrapper, ObservationWrapper, ObsType, Wrapper
 
 from minigrid.core.constants import COLOR_TO_IDX, OBJECT_TO_IDX, STATE_TO_IDX
 from minigrid.core.world_object import Goal
@@ -764,3 +764,27 @@ def observation(self, obs):
         obs["image"] = grid
 
         return obs
+
+
+class StochasticActionWrapper(ActionWrapper):
+    """
+    Add stochasticity to the actions
+
+    If a random action is provided, it is returned with probability `1 - prob`.
+    Else, a random action is sampled from the action space.
+    """
+
+    def __init__(self, env=None, prob=0.9, random_action=None):
+        super().__init__(env)
+        self.prob = prob
+        self.random_action = random_action
+
+    def action(self, action):
+        """ """
+        if np.random.uniform() < self.prob:
+            return action
+        else:
+            if self.random_action is None:
+                return self.np_random.integers(0, high=6)
+            else:
+                return self.random_action
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
@@ -21,6 +21,7 @@
     ReseedWrapper,
     RGBImgObsWrapper,
     RGBImgPartialObsWrapper,
+    StochasticActionWrapper,
     SymbolicObsWrapper,
     ViewSizeWrapper,
 )
@@ -329,6 +330,23 @@ def test_symbolic_obs_wrapper(env_id):
     env.close()
 
 
+@pytest.mark.parametrize("env_id", ["MiniGrid-Empty-16x16-v0"])
+def test_stochastic_action_wrapper(env_id):
+    env = gym.make(env_id)
+    env = StochasticActionWrapper(env, prob=0.2)
+    _, _ = env.reset()
+    for _ in range(20):
+        _, _, _, _, _ = env.step(0)
+    env.close()
+
+    env = gym.make(env_id)
+    env = StochasticActionWrapper(env, prob=0.2, random_action=1)
+    _, _ = env.reset()
+    for _ in range(20):
+        _, _, _, _, _ = env.step(0)
+    env.close()
+
+
 def test_dict_observation_space_doesnt_clash_with_one_hot():
     env = gym.make("MiniGrid-Empty-5x5-v0")
     env = OneHotPartialObsWrapper(env)