improve support for gym see issue #16

Grid2op · Jul 30, 2020 · c351875 · c351875
1 parent 1e56b36
commit c351875
Show file tree

Hide file tree

Showing 8 changed files with 430 additions and 5 deletions.
diff --git a/grid2op/Converter/Converters.py b/grid2op/Converter/Converters.py
@@ -60,3 +60,27 @@ def convert_act(self, encoded_act):
         """
         regular_act = encoded_act
         return regular_act
+
+    def get_gym_dict(self):
+        """
+        To convert this space into a open ai gym space.
+
+        # TODO
+
+        Returns
+        -------
+
+        """
+        raise NotImplementedError("Impossible to convert the converter \"{}\" automatically "
+                                  "into a gym space (or gym is not installed on your machine)."
+                                  "".format(self))
+
+    def convert_action_from_gym(self, gymlike_action):
+        raise NotImplementedError("Impossible to convert the gym-like action automatically "
+                                  "into the converter representation for \"{}\" "
+                                  "".format(self))
+
+    def convert_action_to_gym(self, gymlike_action):
+        raise NotImplementedError("Impossible to convert the gym-like action automatically "
+                                  "into the converter representation for \"{}\" "
+                                  "".format(self))
diff --git a/grid2op/Converter/GymConverter.py b/grid2op/Converter/GymConverter.py
@@ -0,0 +1,273 @@
+# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
+# See AUTHORS.txt
+# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
+# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
+# you can obtain one at http://mozilla.org/MPL/2.0/.
+# SPDX-License-Identifier: MPL-2.0
+# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
+
+import numpy as np
+from grid2op.Converter.Converters import Converter
+from grid2op.Action import BaseAction
+from grid2op.Observation import BaseObservation
+from grid2op.dtypes import dt_int, dt_bool, dt_float
+from gym import spaces
+
+
+class BaseGymConverter:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def _generic_gym_space(dt, sh, low=None, high=None):
+        if low is None:
+            low = np.iinfo(dt).min
+        if high is None:
+            high = np.iinfo(dt).max
+        shape = (sh,)
+        my_type = spaces.Box(low=dt.type(low), high=dt.type(high), shape=shape, dtype=dt)
+        return my_type
+
+    @staticmethod
+    def _boolean_type(sh):
+        return spaces.MultiBinary(n=sh)
+
+    @staticmethod
+    def _extract_obj_grid2op(grid2op_obj, attr_nm, dtype):
+        res = grid2op_obj._get_array_from_attr_name(attr_nm)
+
+        if len(res) == 1:
+            res = res[0]
+            # convert the types for json serializable
+            # this is not automatically done by gym...
+            if dtype == dt_int or dtype == np.int64 or dtype == np.int32:
+                res = int(res)
+            elif dtype == dt_float or dtype == np.float64 or dtype == np.float32:
+                res = float(res)
+            elif dtype == dt_bool:
+                res = bool(res)
+        return res
+
+    def _base_to_gym(self, keys, obj, dtypes, converter=None):
+        res = spaces.dict.OrderedDict()
+        for k in keys:
+            conv_k = k
+            if converter is not None:
+                conv_k = converter[k]
+            res[k] = self._extract_obj_grid2op(obj, conv_k, dtypes[k])
+        return res
+
+
+class GymObservationSpace(spaces.Dict, BaseGymConverter):
+    # deals with the observation space (rather easy)
+    def __init__(self, env):
+        self.initial_obs_space = env.observation_space
+        dict_ = {}
+        self._fill_dict_obs_space(dict_, env.observation_space, env.parameters)
+        spaces.Dict.__init__(self, dict_)
+
+    def _fill_dict_obs_space(self, dict_, observation_space, env_params):
+        for attr_nm, sh, dt in zip(observation_space.attr_list_vect,
+                                   observation_space.shape,
+                                   observation_space.dtype):
+            my_type = None
+            shape = (sh,)
+            if dt == dt_int:
+                # discrete observation space
+                if attr_nm == "year":
+                    my_type = spaces.Discrete(n=2100)
+                elif attr_nm == "month":
+                    my_type = spaces.Discrete(n=13)
+                elif attr_nm == "day":
+                    my_type = spaces.Discrete(n=32)
+                elif attr_nm == "hour_of_day":
+                    my_type = spaces.Discrete(n=24)
+                elif attr_nm == "minute_of_hour":
+                    my_type = spaces.Discrete(n=60)
+                elif attr_nm == "day_of_week":
+                    my_type = spaces.Discrete(n=8)
+                elif attr_nm == "topo_vect":
+                    my_type = spaces.Box(low=0, high=2, shape=shape, dtype=dt)
+                elif attr_nm == "time_before_cooldown_line":
+                    my_type = spaces.Box(low=0,
+                                         high=max(env_params.NB_TIMESTEP_COOLDOWN_LINE,
+                                                  env_params.NB_TIMESTEP_RECONNECTION),
+                                         shape=shape,
+                                         dtype=dt)
+                elif attr_nm == "time_before_cooldown_sub":
+                    my_type = spaces.Box(low=0,
+                                         high=env_params.NB_TIMESTEP_COOLDOWN_SUB,
+                                         shape=shape,
+                                         dtype=dt)
+                elif attr_nm == "duration_next_maintenance" or attr_nm == "time_next_maintenance":
+                    # can be -1 if no maintenance, otherwise always positive
+                    my_type = self._generic_gym_space(dt, sh, low=-1)
+
+            elif dt == dt_bool:
+                # boolean observation space
+                my_type = self._boolean_type(sh)
+            else:
+                # continuous observation space
+                low = float("-inf")
+                high = float("inf")
+                shape = (sh,)
+                SpaceType = spaces.Box
+                if attr_nm == "prod_p":
+                    low = observation_space.gen_pmin
+                    high = observation_space.gen_pmax
+                    shape = None
+                elif attr_nm == "prod_v" or attr_nm == "load_v" or attr_nm == "v_or" or attr_nm == "v_ex":
+                    # voltages can't be negative
+                    low = 0.
+                elif attr_nm == "a_or" or attr_nm == "a_ex":
+                    # amps can't be negative
+                    low = 0.
+                elif attr_nm == "target_dispatch" or attr_nm == "actual_dispatch":
+                    low = np.min([observation_space.gen_pmin,
+                                  -observation_space.gen_pmax])
+                    high = np.max([-observation_space.gen_pmin,
+                                   +observation_space.gen_pmax])
+                my_type = SpaceType(low=low, high=high, shape=shape, dtype=dt)
+
+            if my_type is None:
+                # if nothing has been found in the specific cases above
+                my_type = self._generic_gym_space(dt, sh)
+
+            dict_[attr_nm] = my_type
+
+    def from_gym(self, gymlike_observation: spaces.dict.OrderedDict) -> BaseObservation:
+        res = self.initial_obs_space.get_empty_observation()
+        for k, v in gymlike_observation.items():
+            res._assign_attr_from_name(k, v)
+        return res
+
+    def to_gym(self, grid2op_observation: BaseObservation) -> spaces.dict.OrderedDict:
+        return self._base_to_gym(self.spaces.keys(), grid2op_observation,
+                                 dtypes={k: self.spaces[k].dtype for k in self.spaces})
+
+
+class GymActionSpace(spaces.Dict, BaseGymConverter):
+    # deals with the action space (it depends how it's encoded...)
+    keys_grid2op_2_human = {"prod_p": "prod_p",
+                            "prod_v": "prod_v",
+                            "load_p": "load_p",
+                            "load_q": "load_q",
+                            "_redispatch": "redispatch",
+                            "_set_line_status": "set_line_status",
+                            "_switch_line_status": "change_line_status",
+                            "_set_topo_vect": "set_bus",
+                            "_change_bus_vect": "change_bus",
+                            "_hazards": "hazards",
+                            "_maintenance": "maintenance",
+                            }
+    keys_human_2_grid2op = {v: k for k, v in keys_grid2op_2_human.items()}
+
+    def __init__(self, action_space):
+        self.initial_act_space = action_space
+        dict_ = {}
+        if isinstance(action_space, Converter):
+            # a converter allows to ... convert the data so they have specific gym space
+            dict_ = action_space.get_gym_dict()
+            self.__is_converter = True
+        else:
+            self._fill_dict_act_space(dict_, action_space)
+            dict_ = self._fix_dict_keys(dict_)
+            self.__is_converter = False
+
+        spaces.Dict.__init__(self, dict_)
+
+    def _fill_dict_act_space(self, dict_, action_space):
+        for attr_nm, sh, dt in zip(action_space.attr_list_vect,
+                                   action_space.shape,
+                                   action_space.dtype):
+            my_type = None
+            shape = (sh,)
+            if dt == dt_int:
+                # discrete action space
+                if attr_nm == "_set_line_status":
+                    my_type = spaces.Box(low=-1,
+                                         high=1,
+                                         shape=shape,
+                                         dtype=dt)
+                elif attr_nm == "_set_topo_vect":
+                    my_type = spaces.Box(low=-1,
+                                         high=2,
+                                         shape=shape,
+                                         dtype=dt)
+            elif dt == dt_bool:
+                # boolean observation space
+                my_type = self._boolean_type(sh)
+                # case for all "change" action and maintenance / hazards
+            else:
+                # continuous observation space
+                low = float("-inf")
+                high = float("inf")
+                shape = (sh,)
+                SpaceType = spaces.Box
+
+                if attr_nm == "prod_p":
+                    low = action_space.gen_pmin
+                    high = action_space.gen_pmax
+                    shape = None
+                elif attr_nm == "prod_v":
+                    # voltages can't be negative
+                    low = 0.
+                elif attr_nm == "_redispatch":
+                    # redispatch
+                    low = -action_space.gen_max_ramp_down
+                    high = action_space.gen_max_ramp_up
+                my_type = SpaceType(low=low, high=high, shape=shape, dtype=dt)
+
+            if my_type is None:
+                # if nothing has been found in the specific cases above
+                my_type = self._generic_gym_space(dt, sh)
+
+            dict_[attr_nm] = my_type
+
+    def _fix_dict_keys(self, dict_: dict) -> dict:
+        res = {}
+        for k, v in dict_.items():
+            res[self.keys_grid2op_2_human[k]] = v
+        return res
+
+    def from_gym(self, gymlike_action: spaces.dict.OrderedDict) -> object:
+        """
+        Transform a gym-like action (such as the output of "sample()") into a grid2op action
+
+        Parameters
+        ----------
+        gymlike_action
+
+        Returns
+        -------
+
+        """
+        if self.__is_converter:
+            res = self.initial_act_space.convert_action_from_gym(gymlike_action)
+        else:
+            res = self.initial_act_space()
+            for k, v in gymlike_action.items():
+                res._assign_attr_from_name(self.keys_human_2_grid2op[k], v)
+        return res
+
+    def to_gym(self, action: object) -> spaces.dict.OrderedDict:
+        """
+        Transform an action (non gym) into an action compatible with the gym Space.
+
+        Parameters
+        ----------
+        action
+
+        Returns
+        -------
+
+        """
+        if self.__is_converter:
+            res = self.initial_act_space.convert_action_to_gym(action)
+        else:
+            # in that case action should be an instance of grid2op BaseAction
+            assert isinstance(action, BaseAction), "impossible to convert an action not coming from grid2op"
+            res = self._base_to_gym(self.spaces.keys(), action,
+                                    dtypes={k: self.spaces[k].dtype for k in self.spaces},
+                                    converter=self.keys_human_2_grid2op)
+        return res
diff --git a/grid2op/Converter/__init__.py b/grid2op/Converter/__init__.py
@@ -11,3 +11,11 @@
 from grid2op.Converter.IdToAct import IdToAct
 from grid2op.Converter.AnalogStateConverter import AnalogStateConverter
 from grid2op.Converter.ConnectivityConverter import ConnectivityConverter
+
+try:
+    from grid2op.Converter.GymConverter import GymObservationSpace, GymActionSpace
+    __all__.append("GymObservationSpace")
+    __all__.append("GymActionSpace")
+except ImportError:
+    # you must install open ai gym to benefit from this converter
+    pass
diff --git a/grid2op/Observation/CompleteObservation.py b/grid2op/Observation/CompleteObservation.py
@@ -142,9 +142,9 @@ def update(self, env, with_forecast=True):
         self.day_of_week = dt_int(env.time_stamp.weekday())
 
         # get the values related to topology
-        self.timestep_overflow = copy.copy(env.timestep_overflow)
-        self.line_status = copy.copy(env.backend.get_line_status())
-        self.topo_vect = copy.copy(env.backend.get_topo_vect())
+        self.timestep_overflow[:] = env.timestep_overflow
+        self.line_status[:] = env.backend.get_line_status()
+        self.topo_vect[:] = env.backend.get_topo_vect()
 
         # get the values related to continuous values
         self.prod_p[:], self.prod_q[:], self.prod_v[:] = env.backend.generators_info()
@@ -167,7 +167,7 @@ def update(self, env, with_forecast=True):
             self._forecasted_inj += env.chronics_handler.forecasts()
             self._forecasted_grid = [None for _ in self._forecasted_inj]
 
-        self.rho = env.backend.get_relative_flow().astype(dt_float)
+        self.rho[:] = env.backend.get_relative_flow().astype(dt_float)
 
         # cool down and reconnection time after hard overflow, soft overflow or cascading failure
         self.time_before_cooldown_line[:] = env.times_before_line_status_actionable

diff --git a/grid2op/Observation/ObservationSpace.py b/grid2op/Observation/ObservationSpace.py
@@ -130,3 +130,7 @@ def size_obs(self):
         :return:
         """
         return self.n
+
+    def get_empty_observation(self):
+        """return an empty observation, for internal use only."""
+        return copy.deepcopy(self._empty_obs)
diff --git a/grid2op/Space/SerializableSpace.py b/grid2op/Space/SerializableSpace.py
@@ -83,6 +83,7 @@ def __init__(self,
 
         self.shape = self._template_obj.shape()
         self.dtype = self._template_obj.dtype()
+        self.attr_list_vect = self._template_obj.attr_list_vect
 
         self._to_extract_vect = {}  # key: attr name, value: tuple: (beg_, end_, dtype)
         beg_ = 0