Grid2op · BDonnot · Apr 15, 2021 · Apr 12, 2021 · Apr 12, 2021 · Apr 14, 2021
diff --git a/.gitignore b/.gitignore
@@ -302,6 +302,7 @@ invert_idto_act.py
 test_bug_discord1.py
 test_networkx.py
 test_issue185.py
+test_can_make_opponent.py
 
 # profiling files
 **.prof
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -24,17 +24,24 @@ Change Log
 
 [1.5.1] - 2021-xx-yy
 -----------------------
+- [FIXED]: `Issue #187 <https://github.com/rte-france/Grid2Op/issues/187>`_: improve the computation and the
+  documentation of the `RedispReward`. This has an impact on the `env.reward_range` of all environments using this
+  reward, because the old "reward_max" was not correct.
 - [FIXED] `Issue #181 <https://github.com/rte-france/Grid2Op/issues/181>`_ : now environment can be created with
   a layout and a warning is issued in this case.
 - [FIXED] `Issue #180 <https://github.com/rte-france/Grid2Op/issues/180>`_ : it is now possible to set the thermal
   limit with a dictionary
+- [FIXED] a typo that would cause the attack to be discarded in the runner in some cases (cases for now not used)
 - [FIXED] an issue linked to the transformation into gym box space for some environments,
   this **might** be linked to `Issue #185 <https://github.com/rte-france/Grid2Op/issues/185>`_
 - [ADDED] a feature to retrieve the voltage angle (theta) in the backend (`backend.get_theta`) and in the observation.
 - [ADDED] support for multimix in the GymEnv (lack of support spotted thanks to
   `Issue #185 <https://github.com/rte-france/Grid2Op/issues/185>`_ )
+- [ADDED] basic documentation of the environment available.
 - [IMPROVED] extra layer of security preventing modification of `observation_space` and `action_space` of environment
 - [IMPROVED] better handling of dynamically generated classes
+- [IMPROVED] the documentation of the opponent
+
 
 [1.5.0] - 2021-03-31
 -------------------------

diff --git a/docs/available_envs.rst b/docs/available_envs.rst
diff --git a/docs/img/R2_full_grid.png b/docs/img/R2_full_grid.png
diff --git a/docs/img/l2rpn_case14_sandbox_layout.png b/docs/img/l2rpn_case14_sandbox_layout.png
diff --git a/docs/img/l2rpn_neurips_2020_track1_layout.png b/docs/img/l2rpn_neurips_2020_track1_layout.png
diff --git a/docs/img/l2rpn_neurips_2020_track2_layout.png b/docs/img/l2rpn_neurips_2020_track2_layout.png
diff --git a/docs/opponent.rst b/docs/opponent.rst
@@ -25,10 +25,143 @@ The class :class:`OpponentSpace` has the delicate role to:
 - compute the cost of such attack
 - make sure this cost is not too high for the opponent budget.
 
+How to create an opponent in any environment
+---------------------------------------------
+
+This section is a work in progress, it will only cover how to set up one type of opponent, and supposes
+that you already know which lines you want to attack, at which frequency etc.
+
+More detailed information about the opponent will be provide in the future.
+
+The set up for the opponent in the "l2rpn_neurips_track1" has the following configuration.
+
+.. code-block:: python
+
+    lines_attacked = ["62_58_180", "62_63_160", "48_50_136", "48_53_141", "41_48_131", "39_41_121",
+                  "43_44_125", "44_45_126", "34_35_110", "54_58_154"]
+    rho_normalization = [0.45, 0.45, 0.6, 0.35, 0.3, 0.2,
+                         0.55, 0.3, 0.45, 0.55]
+    opponent_attack_cooldown = 12*24  # 24 hours, 1 hour being 12 time steps
+    opponent_attack_duration = 12*4  # 4 hours
+    opponent_budget_per_ts = 0.16667  # opponent_attack_duration / opponent_attack_cooldown + epsilon
+    opponent_init_budget = 144.  # no need to attack straightfully, it can attack starting at midday the first day
+    config = {
+        "opponent_attack_cooldown": opponent_attack_cooldown,
+        "opponent_attack_duration": opponent_attack_duration,
+        "opponent_budget_per_ts": opponent_budget_per_ts,
+        "opponent_init_budget": opponent_init_budget,
+        "opponent_action_class": PowerlineSetAction,
+        "opponent_class": WeightedRandomOpponent,
+        "opponent_budget_class": BaseActionBudget,
+        'kwargs_opponent': {"lines_attacked": lines_attacked,
+                            "rho_normalization": rho_normalization,
+                            "attack_period": opponent_attack_cooldown}
+    }
+
+To create the same type of opponent on the **case14** grid you can do:
+
+.. code-block:: python
+
+    import grid2op
+    from grid2op.Action import PowerlineSetAction
+    from grid2op.Opponent import RandomLineOpponent, BaseActionBudget
+    env_name = "l2rpn_case14_sandbox"
+
+    env_with_opponent = grid2op.make(env_name,
+                                     opponent_attack_cooldown=12*24,
+                                     opponent_attack_duration=12*4,
+                                     opponent_budget_per_ts=0.5,
+                                     opponent_init_budget=0.,
+                                     opponent_action_class=PowerlineSetAction,
+                                     opponent_class=RandomLineOpponent,
+                                     opponent_budget_class=BaseActionBudget,
+                                     kwargs_opponent={"lines_attacked":
+                                          ["1_3_3", "1_4_4", "3_6_15", "9_10_12", "11_12_13", "12_13_14"]}
+                                     )
+    # and now you have an opponent on the l2rpn_case14_sandbox
+    # you can for example
+    obs = env_with_opponent.reset()
+
+    act = ...  # chose an action here
+    obs, reward, done, info = env_with_opponent.step(act)
+
+
+And for the track2 of neurips, if you want to make it even more complicated, you can add an opponent
+in the same fashion:
+
+.. code-block:: python
+
+    import grid2op
+    from grid2op.Action import PowerlineSetAction
+    from grid2op.Opponent import RandomLineOpponent, BaseActionBudget
+    env_name = "l2rpn_neurips_2020_track2_small"
+
+    env_with_opponent = grid2op.make(env_name,
+                                     opponent_attack_cooldown=12*24,
+                                     opponent_attack_duration=12*4,
+                                     opponent_budget_per_ts=0.5,
+                                     opponent_init_budget=0.,
+                                     opponent_action_class=PowerlineSetAction,
+                                     opponent_class=RandomLineOpponent,
+                                     opponent_budget_class=BaseActionBudget,
+                                     kwargs_opponent={"lines_attacked":
+                                                         ["26_31_106",
+                                                          "21_22_93",
+                                                          "17_18_88",
+                                                          "4_10_162",
+                                                          "12_14_68",
+                                                          "14_32_108",
+                                                          "62_58_180",
+                                                          "62_63_160",
+                                                          "48_50_136",
+                                                          "48_53_141",
+                                                          "41_48_131",
+                                                          "39_41_121",
+                                                          "43_44_125",
+                                                          "44_45_126",
+                                                          "34_35_110",
+                                                          "54_58_154",
+                                                          "74_117_81",
+                                                          "80_79_175",
+                                                          "93_95_43",
+                                                          "88_91_33",
+                                                          "91_92_37",
+                                                          "99_105_62",
+                                                          "102_104_61"]}
+                                     )
+    # and now you have an opponent on the l2rpn_case14_sandbox
+    # you can for example
+    obs = env_with_opponent.reset()
+
+    act = ...  # chose an action here
+    obs, reward, done, info = env_with_opponent.step(act)
+
+To summarize what is going on here:
+
+- `opponent_attack_cooldown`: give the minimum number of time between two attacks (here 1 attack per day)
+- `opponent_attack_duration`: duration for each attack (when a line is attacked, it will not be possible to reconnect
+  it for that many steps). In the example it's 4h (so 48 steps)
+- `opponent_action_class`: type of the action the opponent will perform (in this case `PowerlineSetAction`)
+- `opponent_class`: type of the opponent. Change it at your own risk.
+- `opponent_budget_class`: Each attack will cost some budget to the opponent. If no budget, the opponent cannot
+  attack. This specifies how the budget are computed. Do not change it.
+- `opponent_budget_per_ts`: increase of the budget of the opponent per step. The higher this number, the faster the
+  the opponent will regenerate its budget.
+- `opponent_init_budget`: initial opponent budget. It is set to 0 to "give" the agent a bit of time before the opponent
+  is triggered.
+- `kwargs_opponent`: additional information for the opponent. In this case we provide for each grid the powerline it
+  can attack.
+
+.. note::
+
+    This is only valid for the `RandomLineOpponent` that disconnect powerlines randomly (but not uniformly!). For other
+    type of Opponent, we don't provide any information in the documentation at this stage. Feel free to submit
+    a github issue if this is an issue for you.
 
 Detailed Documentation by class
 --------------------------------
 .. automodule:: grid2op.Opponent
-   :members:
+    :members:
+    :autosummary:
 
 .. include:: final.rst
diff --git a/grid2op/Chronics/GridStateFromFile.py b/grid2op/Chronics/GridStateFromFile.py
@@ -53,14 +53,17 @@ class GridStateFromFile(GridValue):
     - "prod_p.csv": for each time steps, this file contains the value for the active production of
       each generators of the grid (it counts as many rows as the number of time steps - and its header)
       and as many columns as the number of generators on the grid. The header must contains the names of
-      the generators used to map their value on the grid. Values must be convertible to floating point.
+      the generators used to map their value on the grid. Values must be convertible to floating point and the
+      column separator of this file should be semi-colon `;` (unless you specify a "sep" when loading this class)
     - "prod_v.csv": same as "prod_p.csv" but for the production voltage setpoint.
     - "load_p.csv": same as "prod_p.csv" but for the load active value (number of columns = number of loads)
     - "load_q.csv": same as "prod_p.csv" but for the load reactive value (number of columns = number of loads)
     - "maintenance.csv": that contains whether or not there is a maintenance for a given powerline (column) at
       each time step (row).
     - "hazards.csv": that contains whether or not there is a hazard for a given powerline (column) at
       each time step (row).
+    - "start_datetime.info": the time stamp (date and time) at which the chronic is starting.
+    - "time_interval.info": the amount of time between two consecutive steps (*e.g.* 5 mins, or 1h)
 
     If a file is missing, it is understood as "this value will not be modified". For example, if the file
     "prod_v.csv" is not present, it will be equivalent as not modifying the production voltage setpoint, never.

diff --git a/grid2op/Chronics/GridValue.py b/grid2op/Chronics/GridValue.py
@@ -439,7 +439,7 @@ def load_next(self):
             about the grid state (load p and load q, prod p and prod v as well as some maintenance
             or hazards information)
 
-        Generate the next values, either by reading from a file, or by generating on the fly and return a dictionnary
+        Generate the next values, either by reading from a file, or by generating on the fly and return a dictionary
         compatible with the :class:`grid2op.BaseAction` class allowed for the :class:`Environment`.
 
         More information about this dictionary can be found at :func:`grid2op.BaseAction.update`.

diff --git a/grid2op/Episode/EpisodeData.py b/grid2op/Episode/EpisodeData.py
@@ -572,7 +572,8 @@ def incr_store(self, efficient_storing, time_step, time_step_duration,
                 if efficient_storing:
                     self.attacks.collection[time_step - 1, :] = 0.
                 else:
-                    self.attack = np.concatenate((self.attack, self.attack_templ))
+                    # might not work !
+                    self.attacks = np.concatenate((self.attacks, self.attack_templ))
 
             if efficient_storing:
                 # efficient way of writing
@@ -585,6 +586,7 @@ def incr_store(self, efficient_storing, time_step, time_step_duration,
                     else:
                         self.disc_lines[time_step - 1, :] = self.disc_lines_templ
             else:
+                # might not work !
                 # completely inefficient way of writing
                 self.times = np.concatenate(
                     (self.times, (time_step_duration,)))