adding documentation for combinescaled reward

Grid2op · BDonnot · Aug 3, 2020 · Jul 15, 2020 · Jul 15, 2020 · Jul 16, 2020
commit 78871fccc933fd1951d7fcaad554d89377463867
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -43,6 +43,7 @@ Change Log
   otherwise it affects the substations but not the powerline). Changing the bus of an extremity of
   a powerline if this powerline is connected has no impact on its status and therefor it considers
   it only affects the corresponding substation.
+- [IMPROVED] added documentation and usage example for `CombineReward` and `CombineScaledReward`
 
 [1.1.1] - 2020-07-07
 ---------------------

diff --git a/grid2op/Environment/BaseEnv.py b/grid2op/Environment/BaseEnv.py
@@ -1022,6 +1022,12 @@ def _get_reward(self, action, has_error, is_done, is_illegal, is_ambiguous):
                          }
         return res, other_rewards
 
+    def get_reward_instance(self):
+        """
+        Returns the instance of the object that is used to compute the reward.
+        """
+        return self.reward_helper.template_reward
+
     def _is_done(self, has_error, is_done):
         no_more_data = self.chronics_handler.done()
         return has_error or is_done or no_more_data

diff --git a/grid2op/Reward/CombinedReward.py b/grid2op/Reward/CombinedReward.py
@@ -9,9 +9,36 @@
 from grid2op.Reward.BaseReward import BaseReward
 from grid2op.dtypes import dt_float
 
+
 class CombinedReward(BaseReward):
     """
-    This class allows to combine multiple rewards, by summing them for example.
+    This class allows to combine multiple pre defined reward. The reward it computes will
+    be the sum of all the sub rewards it is made of.
+
+    Each sub reward is identified by a key.
+
+    It is used a bit differently that the other rewards. See the section example for more information.
+
+    Examples
+    --------
+
+    .. code-block:: python
+
+        import grid2op
+        from grid2op.Reward import GameplayReward, FlatReward, CombinedReward
+
+        env = grid2op.make(..., reward_class=CombinedReward)
+        cr = self.env.get_reward_instance()
+        cr.addReward("Gameplay", GameplayReward(), 1.0)
+        cr.addReward("Flat", FlatReward(), 1.0)
+        cr.initialize(self.env)
+
+        obs = env.reset()
+        obs, reward, done, info = env.step(env.action_space())
+
+        # reward here is computed by summing the results of what would have
+        # given `GameplayReward` and the one from `FlatReward`
+
     """
     def __init__(self):
         BaseReward.__init__(self)

diff --git a/grid2op/Reward/CombinedScaledReward.py b/grid2op/Reward/CombinedScaledReward.py
@@ -11,6 +11,7 @@
 from grid2op.Reward.CombinedReward import CombinedReward
 from grid2op.dtypes import dt_float
 
+
 class CombinedScaledReward(CombinedReward):
     """
     This class allows to combine multiple rewards. 
@@ -19,7 +20,29 @@ class CombinedScaledReward(CombinedReward):
     from the range [min_sum; max_sum] to [reward_min; reward_max]
 
     min_sum and max_sum are computed from the weights and ranges of registered rewards.
-    See `Reward.BaseReward` for setting the output range.
+    See :class:`Reward.BaseReward` for setting the output range.
+
+    Examples
+    --------
+
+    .. code-block:: python
+
+        import grid2op
+        from grid2op.Reward import GameplayReward, FlatReward, CombinedScaledReward
+
+        env = grid2op.make(..., reward_class=CombinedScaledReward)
+        cr = self.env.get_reward_instance()
+        cr.addReward("Gameplay", GameplayReward(), 1.0)
+        cr.addReward("Flat", FlatReward(), 1.0)
+        cr.initialize(self.env)
+
+        obs = env.reset()
+        obs, reward, done, info = env.step(env.action_space())
+
+        # reward here is computed by summing the results of what would have
+        # given `GameplayReward` and the one from `FlatReward`
+
+
     """
 
     def __init__(self):

diff --git a/grid2op/tests/test_Reward.py b/grid2op/tests/test_Reward.py
@@ -78,10 +78,12 @@ class TestLoadingL2RPNSandBoxScore(TestLoadingReward, unittest.TestCase):
     def _reward_type(self):
         return L2RPNSandBoxScore
 
+
 class TestLoadingLinesCapacityReward(TestLoadingReward, unittest.TestCase):
     def _reward_type(self):
         return LinesCapacityReward
 
+
 class TestDistanceReward(TestLoadingReward, unittest.TestCase):
     def _reward_type(self):
         return DistanceReward