finishing the gym conversion for converters of type ToVect, further i…

…mproving #16
Grid2op · Jul 31, 2020 · 1dec57c · 1dec57c
1 parent a6bb84e
commit 1dec57c
Show file tree

Hide file tree

Showing 7 changed files with 186 additions and 35 deletions.
diff --git a/grid2op/Converter/AnalogStateConverter.py b/grid2op/Converter/AnalogStateConverter.py
@@ -18,6 +18,10 @@ class AnalogStateConverter(Converter):
     
     The grid2op observation is converted into a 1d normalied array
     The grid2op action is created from a set of real valued arrays
+
+    It can not yet be converted to / from gym space. If this feature is interesting for you, you can
+    reply to the issue posted at https://github.com/rte-france/Grid2Op/issues/16
+    
     """
 
     def __init__(self, action_space, bias=0.0):

diff --git a/grid2op/Converter/ConnectivityConverter.py b/grid2op/Converter/ConnectivityConverter.py
@@ -21,6 +21,9 @@ class ConnectivityConverter(Converter):
     A and B", connect "B and C" but "**not connect** A and C" in this case you need an algorithm to disambuate your
     action.
 
+    It can not yet be converted to / from gym space. If this feature is interesting for you, you can
+    reply to the issue posted at https://github.com/rte-france/Grid2Op/issues/16
+
     **NB** compare to :class:`IdToAct` this converter allows for a smaller size. If you have N elements connected at
     a substation, you end up with `N*(N-1)/2` different action. Compare to IdToAct though, it is expected that your
     algorithm produces more than 1 output.

diff --git a/grid2op/Converter/Converters.py b/grid2op/Converter/Converters.py
@@ -14,7 +14,6 @@ class Converter(ActionSpace):
     """
     def __init__(self, action_space):
         ActionSpace.__init__(self, action_space, action_space.legal_action, action_space.subtype)
-        # self.__class__ = Converter.init_grid(action_space)
         self.space_prng = action_space.space_prng
         self.seed_used = action_space.seed_used
 

diff --git a/grid2op/Converter/GymConverter.py b/grid2op/Converter/GymConverter.py
@@ -15,6 +15,9 @@
 
 
 class BaseGymConverter:
+    """
+    Internal class, do not use.
+    """
     def __init__(self):
         pass
 
@@ -63,7 +66,11 @@ class GymObservationSpace(spaces.Dict, BaseGymConverter):
     """
     This class allows to transform the observation space into a gym space.
 
-    Gym space will be a :class:`gym.spaces.Dict`. By default all
+    Gym space will be a :class:`gym.spaces.Dict` with the keys being the different attributes
+    of the grid2op observation. All attributes are used.
+
+    Note that gym space converted with this class should be seeded independently. It is NOT seeded
+    when calling :func:`grid2op.Environment.Environment.seed`.
     """
     def __init__(self, env):
         self.initial_obs_space = env.observation_space
@@ -144,6 +151,17 @@ def _fill_dict_obs_space(self, dict_, observation_space, env_params, opponent_sp
             dict_[attr_nm] = my_type
 
     def from_gym(self, gymlike_observation: spaces.dict.OrderedDict) -> BaseObservation:
+        """
+        This function convert the gym-like representation of an observation to a grid2op observation.
+
+        Parameters
+        ----------
+        gymlike_observation
+
+        Returns
+        -------
+
+        """
         res = self.initial_obs_space.get_empty_observation()
         for k, v in gymlike_observation.items():
             res._assign_attr_from_name(k, v)
@@ -164,6 +182,8 @@ class GymActionSpace(spaces.Dict, BaseGymConverter):
     if availabe) of the original action space instead [if not available this means there is no
     implemented way to generate reliable random action]
 
+    Note that gym space converted with this class should be seeded independantly. It is NOT seeded
+    when calling :func:`grid2op.Environment.Environment.seed`.
 
     """
     # deals with the action space (it depends how it's encoded...)
@@ -275,18 +295,21 @@ def to_gym(self, action: object) -> spaces.dict.OrderedDict:
 
         Parameters
         ----------
-        action
+        action:
+            The action (coming from grid2op or understandable by the converter)
 
         Returns
         -------
-
+        gym_action:
+            The same action converted as a OrderedDict (default used by gym in case of action space
+            being Dict)
         """
         if self.__is_converter:
-            res = self.initial_act_space.convert_action_to_gym(action)
+            gym_action = self.initial_act_space.convert_action_to_gym(action)
         else:
             # in that case action should be an instance of grid2op BaseAction
             assert isinstance(action, BaseAction), "impossible to convert an action not coming from grid2op"
-            res = self._base_to_gym(self.spaces.keys(), action,
-                                    dtypes={k: self.spaces[k].dtype for k in self.spaces},
-                                    converter=self.keys_human_2_grid2op)
-        return res
+            gym_action = self._base_to_gym(self.spaces.keys(), action,
+                                           dtypes={k: self.spaces[k].dtype for k in self.spaces},
+                                           converter=self.keys_human_2_grid2op)
+        return gym_action
diff --git a/grid2op/Converter/IdToAct.py b/grid2op/Converter/IdToAct.py
@@ -345,14 +345,18 @@ def convert_act(self, encoded_act):
 
     def get_gym_dict(self):
         """
-        Transform this converter into a dictionnary that can be used to initialized a gym.spaces.Dict
+        Transform this converter into a dictionary that can be used to initialized a :class:`gym.spaces.Dict`.
+        The converter is modeled as a "Discrete" gym space with as many elements as the number
+        of different actions handled by this converter.
+
+        This is available as the "action" keys of the spaces.Dict gym action space build from it.
 
         This function should not be used "as is", but rather through :class:`grid2op.Converter.GymConverter`
 
         Returns
         -------
         res: :class:`gym.spaces.Dict`
-            The
+            The dict
         """
         # lazy import gym
         from gym import spaces
@@ -399,8 +403,6 @@ def convert_action_from_gym(self, gymlike_action):
         """
         res = gymlike_action["action"]
         if not isinstance(res, (int, dt_int, np.int, np.int64)):
-            import pdb
-            pdb.set_trace()
             raise RuntimeError("TODO")
         return int(res)
 
@@ -443,5 +445,3 @@ def convert_action_to_gym(self, action):
         from gym import spaces
         res = spaces.dict.OrderedDict({"action": int(action)})
         return res
-
-
diff --git a/grid2op/Converter/ToVect.py b/grid2op/Converter/ToVect.py
@@ -6,7 +6,9 @@
 # SPDX-License-Identifier: MPL-2.0
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
 
+import numpy as np
 from grid2op.Converter.Converters import Converter
+from grid2op.dtypes import dt_float, dt_int
 
 
 class ToVect(Converter):
@@ -17,13 +19,31 @@ class ToVect(Converter):
 
     - `encoded_act` are numpy ndarray
     - `transformed_obs` are numpy ndarray
+    (read more about these concepts by looking at the documentation of :class:`grid2op.Converter.Converters`)
 
+    It is convertible to a gym representation (like the original action space) in the form of a spaces.Box
+    representing a coutinuous action space (even though most component are probably discrete).
+    Note that if converted to a gym space, it is unlikely the method "sample" will yield to valid results.
+    Most of the time it should generate Ambiguous action that will not be handled by grid2op.
+
+    **NB** the conversion to a gym space should be done thanks to the :class:`grid2op.Converter.GymActionSpace`.
     """
     def __init__(self, action_space):
         Converter.__init__(self, action_space)
+        self.init_action_space = action_space
         self.__class__ = ToVect.init_grid(action_space)
         self.do_nothing_vect = action_space({}).to_vect()
 
+        # for gym conversion
+        self.__gym_action_space = None
+        self.__dict_space = None
+        self.__order_gym = None
+        self.__dtypes_gym = None
+        self.__shapes_gym = None
+
+        self.__order_gym_2_me = None
+        self.__order_me_2_gym = None
+
     def convert_obs(self, obs):
         """
         This converter will match the observation to a vector, using the
@@ -59,5 +79,88 @@ def convert_act(self, encoded_act):
 
         """
         res = self.__call__({})
-        res.from_vect(encoded_act)
+        res.from_vect(encoded_act, check_legit=False)
         return res
+
+    def _init_gym_converter(self):
+        if self.__gym_action_space is None:
+            # lazy import
+            from grid2op.Converter.GymConverter import GymActionSpace
+            from gym import spaces
+            # i do that not to duplicate the code of the low / high bounds
+            gym_action_space = GymActionSpace(self.init_action_space)
+            low = tuple()
+            high = tuple()
+            order_gym = []
+            dtypes = []
+            shapes = []
+            sizes = []
+            prev = 0
+            for k, v in gym_action_space.spaces.items():
+                order_gym.append(k)
+                dtypes.append(v.dtype)
+                if isinstance(v, spaces.MultiBinary):
+                    low += tuple([0 for _ in range(v.n)])
+                    high += tuple([1 for _ in range(v.n)])
+                    my_size = v.n
+                elif isinstance(v, spaces.Box):
+                    low += tuple(v.low)
+                    high += tuple(v.high)
+                    my_size = v.low.shape[0]
+                else:
+                    raise RuntimeError("Impossible to convert this converter to gym. Type {} of data " 
+                                       "encountered while only MultiBinary and Box are supported for now.")
+                shapes.append(my_size)
+                sizes.append(np.arange(my_size) + prev)
+                prev += my_size
+            self.__gym_action_space = gym_action_space
+            my_type = spaces.Box(low=np.array(low),
+                                 high=np.array(high),
+                                 dtype=dt_float)
+
+            order_me = []
+            _order_gym_2_me = np.zeros(my_type.shape[0], dtype=dt_int) - 1
+            _order_me_2_gym = np.zeros(my_type.shape[0], dtype=dt_int) - 1
+            for el in self.init_action_space.attr_list_vect:
+                order_me.append(GymActionSpace.keys_grid2op_2_human[el])
+
+            prev = 0
+            order_gym = list(gym_action_space.spaces.keys())
+            for id_me, nm_attr in enumerate(order_me):
+                id_gym = order_gym.index(nm_attr)
+                index_me = np.arange(shapes[id_gym]) + prev
+                _order_gym_2_me[sizes[id_gym]] = index_me
+                _order_me_2_gym[index_me] = sizes[id_gym]
+                # self.__order_gym_2_me[this_gym_ind] = sizes[id_me]
+                prev += shapes[id_gym]
+            self.__order_gym_2_me = _order_gym_2_me
+            self.__order_me_2_gym = _order_me_2_gym
+            self.__dict_space = {"action": my_type}
+            self.__order_gym = order_gym
+            self.__dtypes_gym = dtypes
+            self.__shapes_gym = shapes
+
+    def get_gym_dict(self):
+        """
+        Convert this action space int a "gym" action space represented by a dictionary (spaces.Dict)
+        This dictionary counts only one keys which is "action" and inside this action is the
+        """
+        self._init_gym_converter()
+        return self.__dict_space
+
+    def convert_action_from_gym(self, gymlike_action):
+        """
+        Convert a gym-like action (ie a Ordered dictionary with one key being only "action") to an
+        action compatible with this converter (in this case a vectorized action).
+        """
+        vect = gymlike_action["action"]
+        return vect[self.__order_gym_2_me]
+
+    def convert_action_to_gym(self, action):
+        """
+        Convert a an action of this converter (ie a numpy array) into an action that is usable with
+        an open ai gym (ie a Ordered dictionary with one key being only "action")
+        """
+        from gym import spaces
+        res = spaces.dict.OrderedDict({"action": action[self.__order_me_2_gym]})
+        return res
diff --git a/grid2op/tests/test_GymConverter.py b/grid2op/tests/test_GymConverter.py
@@ -15,7 +15,7 @@
 from grid2op.dtypes import dt_float, dt_bool, dt_int
 from grid2op.tests.helper_path_test import *
 from grid2op.MakeEnv import make
-from grid2op.Converter import GymActionSpace, GymObservationSpace, IdToAct
+from grid2op.Converter import GymActionSpace, GymObservationSpace, IdToAct, ToVect
 
 import pdb
 
@@ -119,44 +119,46 @@ def test_to_from_gym_act(self):
                     assert act == act2
 
 
-class TestIdToAct(unittest.TestCase, BaseTestGymConverter):
-    def setUp(self) -> None:
-        BaseTestGymConverter.__init__(self)
-        
+class BaseTestConverter(BaseTestGymConverter):
+    def init_converter(self, env):
+        raise NotImplementedError()
+
     def test_creation(self):
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore")
             with make("l2rpn_wcci_2020", test=True) as env:
                 # test i can create
-                idtoact = IdToAct(env.action_space)
-                act_space = GymActionSpace(idtoact)
+                converter = self.init_converter(env)
+                act_space = GymActionSpace(converter)
                 act_space.sample()
 
     def test_json(self):
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore")
             with make("l2rpn_wcci_2020", test=True) as env:
                 # test i can create
-                idtoact = IdToAct(env.action_space)
-                act_space = GymActionSpace(idtoact)
+                converter = self.init_converter(env)
+                act_space = GymActionSpace(converter)
                 act_space.seed(0)
                 self._aux_test_json(act_space)
 
     def test_to_from_gym_act(self):
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore")
             with make("l2rpn_wcci_2020", test=True) as env:
-                idtoact = IdToAct(env.action_space)
-                act_space = GymActionSpace(idtoact)
+                converter = self.init_converter(env)
+                act_space = GymActionSpace(converter)
                 act_space.seed(0)
-                idtoact.seed(0)
+                converter.seed(0)
 
-                act = idtoact.sample()
-                gym_act = act_space.to_gym(act)
+                gym_act = act_space.sample()
+                act = act_space.from_gym(gym_act)
                 self._aux_test_json(act_space, gym_act)
-                assert act_space.contains(gym_act)
-                act2 = act_space.from_gym(gym_act)
-                assert act == act2
+                gym_act2 = act_space.to_gym(act)
+                act2 = act_space.from_gym(gym_act2)
+                g2op_act = converter.convert_act(act)
+                g2op_act2 = converter.convert_act(act2)
+                assert g2op_act == g2op_act2
 
                 act_space.seed(0)
                 for i in range(10):
@@ -165,5 +167,22 @@ def test_to_from_gym_act(self):
                     self._aux_test_json(act_space, gym_act)
                     gym_act2 = act_space.to_gym(act)
                     act2 = act_space.from_gym(gym_act2)
-                    assert act == act2
-                    assert gym_act == gym_act2
+                    g2op_act = converter.convert_act(act)
+                    g2op_act2 = converter.convert_act(act2)
+                    assert g2op_act == g2op_act2
+
+
+class TestIdToAct(unittest.TestCase, BaseTestConverter):
+    def init_converter(self, env):
+        return IdToAct(env.action_space)
+
+    def setUp(self) -> None:
+        BaseTestGymConverter.__init__(self)
+
+
+class TestToVect(unittest.TestCase, BaseTestConverter):
+    def init_converter(self, env):
+        return ToVect(env.action_space)
+
+    def setUp(self) -> None:
+        BaseTestGymConverter.__init__(self)