diff --git a/.gitignore b/.gitignore index 578a3199f..5fe0ed1cd 100644 --- a/.gitignore +++ b/.gitignore @@ -217,7 +217,15 @@ test_sac test_if_bug.py getting_started/saved_agent_DDDQN_10000/ getting_started/saved_agent_DDDQN_10000_results/ - +test_reboot.py +test_multimixrunner/ +test_N1reward.py +i_saved_the_runner_here/ +output_pdf/ +test_make_plot_injj.ipynb +plot_inj_prez_gdrive.ipynb +where_i_want_to_save_it/ +test_issue147.py # profiling files **.prof diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a5a1e28e7..bc8bc1cff 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,7 +7,6 @@ Change Log - [???] Extensive tests for DistanceReward - [???] better logging - [???] add a "plot action" method -- [???] rationalize the public and private part of the API. Some members now are public but should be private. - [???] simulate in MultiEnv - [???] in MultiEnv, when some converter of the observations are used, have each child process to compute it in parrallel and transfer the resulting data. @@ -16,6 +15,29 @@ Change Log - [???] model batteries / pumped storage in grid2op (generator but that can be charged / discharged) - [???] model dumps (as in dump storage) in grid2op (stuff that have a given energy max, and cannot produce more than the available energy) +[1.2.3] - 2020-09-25 +---------------------- +- [ADDED] `l2rpn-baselines` package dependency in the "binder" environment. +- [FIXED] binder integration that was broken momentarily +- [FIXED] an issue in the sampling of redispatching action (ramp up and ramp down were inverted) +- [FIXED] an issue causing errors when using `action_space.change_bus` and `action_space.set_bus` +- [FIXED] an issue in the sampling: redispatching and "change_bus" where always performed at the + same time +- [FIXED] `Issue #144 `_: typo that could lead to not + display some error messages in some cases. +- [FIXED] `Issue #146 `_: akward behaviour that lead to not calling + the reward function when the episode was over. +- [FIXED] `Issue #147 `_: un consistency between step and simulate + when cooldowns where applied (rule checking was not using the right method). +- [FIXED] An error preventing the loading of an Ambiguous Action (in case an agent took such action, the `EpisodeData` + would not load it properly. +- [IMPROVED] overall documentation of `BaseEnv` and `Environment` +- [IMPROVED] rationalize the public and private part of the API for `Environment` and `BaseEnv`. + Some members have been moved to private attribute (their modification would largely alterate the + behaviour of grid2op). +- [IMPROVED] internal functions are tagged as "Internal, do not use" in the documentation. +- [IMPROVED] Improved documentation for the `Environment` and `MultiMixEnvironment`. + [1.2.2] - 2020-08-19 --------------------- - [FIXED] `LightSim Issue #10`_: tests were diff --git a/binder/environment.yml b/binder/environment.yml index 324174ce2..1c4e9fcbd 100644 --- a/binder/environment.yml +++ b/binder/environment.yml @@ -5,8 +5,10 @@ dependencies: - python - numpy - keras + - pip - pip: - grid2op[challenge] + - l2rpn-baselines - jyquickhelper - numpy - numba diff --git a/docs/action.rst b/docs/action.rst index b68068b55..f60c2f853 100644 --- a/docs/action.rst +++ b/docs/action.rst @@ -1,4 +1,5 @@ .. currentmodule:: grid2op.Action +.. _action-module: Action =================================== @@ -39,6 +40,20 @@ instances of BaseAction is assessed both when calling :func:`BaseAction.update` :func:`BaseAction._check_for_ambiguity` performed for example by the Backend when it must implement its effect on the powergrid through a call to :func:`BaseAction.__call__` +Constructing an action in grid2op is made in the following manner: + +.. code-block:: python + + import grid2op + env = grid2op.make() + dictionary_describing_the_action = {} + my_action = env.action_space(dictionary_describing_the_action) + print(my_action) + +On the above code, `dictionary_describing_the_action` should be a dictionary that describe what action +you want to perform on the grid. For more information you can consult the help of the :func:`BaseAction.update`. + + .. _Illegal-vs-Ambiguous: Illegal vs Ambiguous @@ -137,6 +152,8 @@ action original status final status \* means that this bus is affected: if it was on bus 1 it moves on bus 2 and vice versa. +.. _action-module-converter: + Easier actions manipulation ---------------------------- The action class presented here can be quite complex to apprehend, especially for a machine learning algorithm. diff --git a/docs/agent.rst b/docs/agent.rst index a2d69fb80..1cef6913a 100644 --- a/docs/agent.rst +++ b/docs/agent.rst @@ -1,4 +1,5 @@ .. currentmodule:: grid2op.Agent +.. _agent-module: Agent ============ @@ -20,6 +21,36 @@ To perform their actions, agent receive two main signals from the :class:`grid2o Both these signals can be use to determine what is the best action to perform on the grid. This is actually the main objective of an :class:`BaseAgent`, and this is done in the :func:`BaseAgent.act` method. +To get started coding your agent we encourage you to read the description of the :ref:`action-module` to know how +to implement your action. Don't hesitate to have a look at the :ref:`action-module-converter` for +an easier / higher level action manipulation. + +Once you know how to manipulate a powergrid in case of the grid2op framework, you can easily implement an agent +following this example + +.. code-block:: python + + import grid2op + from grid2op.Agent import BaseAgent + + class MyCustomAgent(BaseAgent): + def __init__(self, action_space, something_else, and_another_something): + # define here the constructor of your agent + # here we say our agent needs "something_else" and "and_another_something" + # to be built just to demonstrate it does not cause any problem to extend the + # construction of the base class BaseAgent that only takes "action_space" as a constructor + BaseAgent.__init__(self, action_space) + self.something_else = something_else + self.and_another_something = and_another_something + + def act(obs, reward, done=False): + # this is the only method you need to implement + # it takes an observation obs (and a reward and a flag) + # and should return a valid action + dictionary_describing_the_action = {} # this can be anything you want that grid2op understands + my_action = env.action_space(dictionary_describing_the_action) + return my_action + Detailed Documentation by class ------------------------------- diff --git a/docs/backend.rst b/docs/backend.rst index 15f4b78f2..253d3b12c 100644 --- a/docs/backend.rst +++ b/docs/backend.rst @@ -5,6 +5,16 @@ Backend Objectives ----------- + +.. warning:: Backends are internal to grid2op. You should not have to recode any backend if you are "simply" + using grid2op, for example to develop new controller. + + Backend is an abstraction that represents the physical system (the powergrid). In theory every powerflow can be + used as a backend. For now we only provide a Backend that uses `Pandapower `_ and + a port in c++ to a subset of pandapower called `LightSim2Grid `_ . + + Both can serve as example if you want to code a new backend. + This Module defines the template of a backend class. Backend instances are responsible to translate action (performed either by an BaseAgent or by the Environment) into comprehensive powergrid modifications. @@ -34,7 +44,6 @@ The order of the values returned are always the same and determined when the bac '\*_names'. For example, when the ith element of the results of a call to :func:`Backend.get_line_flow` is the flow on the powerline with name `lines_names[i]`. - Detailed Documentation by class ------------------------------- .. automodule:: grid2op.Backend diff --git a/docs/conf.py b/docs/conf.py index 58ca2e759..a3c5de573 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -48,7 +48,10 @@ # 'sphinx.ext.autosectionlabel', # 'details', - #'exception_hierarchy' + #'exception_hierarchy', + + # for pdf + # 'rst2pdf.pdfbuilder' ] # Add any paths that contain templates here, relative to this directory. templates_path = [] #'_templates'] @@ -73,6 +76,9 @@ # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] +# for pdf +pdf_documents = [('index', u'rst2pdf', u'Sample rst2pdf doc', u'Your Name'),] + def setup(app): app.add_javascript('custom.js') diff --git a/docs/environment.rst b/docs/environment.rst index 1bceb9d8d..fe01b0dd4 100644 --- a/docs/environment.rst +++ b/docs/environment.rst @@ -1,4 +1,5 @@ .. currentmodule:: grid2op.Environment +.. _environment-module: Environment =================================== @@ -16,27 +17,128 @@ and returns an An environment is better used inside a :class:`grid2op.Runner.Runner`, mainly because runners abstract the interaction between environment and agent, and ensure the environment are properly reset after each episode. -It is however totally possible to use as any gym Environment. +.. _environment-module-usage: -Example (adapted from gym documentation available at +Usage +------ + +In this section we present some way to use the :class:`Environment` class. + +Basic Usage +++++++++++++ +This example is adapted from gym documentation available at `gym random_agent.py `_ ): .. code-block:: python import grid2op - from grid2op.BaseAgent import DoNothingAgent + from grid2op.Agent import RandomAgent env = grid2op.make() - agent = DoNothingAgent(env.action_space) - env.seed(0) - episode_count = 100 + agent = RandomAgent(env.action_space) + env.seed(0) # for reproducible experiments + episode_count = 100 # i want to make 100 episodes + + # i initialize some useful variables + reward = 0 + done = False + total_reward = 0 + + # and now the loop starts + for i in range(episode_count): + ob = env.reset() + while True: + action = agent.act(ob, reward, done) + ob, reward, done, info = env.step(action) + total_reward += reward + if done: + # in this case the episode is over + break + + # Close the env and write monitor result info to disk + env.close() + print("The total reward was {:.2f}".format(total_reward)) + + +What happens here is the following: + +- `ob = env.reset()` will reset the environment to be usable again. It will load, by default + the next "chronics" (you can imagine chronics as the graphics of a video game: it tells where + the enemies are located, where are the walls, the ground etc. - each chronics can be + thought a different "game level"). +- `action = agent.act(ob, reward, done)` will chose an action facing the observation `ob`. + This action should be of type :class:`grid2op.Action.BaseAction` (or one of its derivate class). + In case of a video game that would be you receiving and observation (usually display on the screen) + and action on a controller. For example you could chose to go "left" / "right" / "up" or "down". + Of course in the case of the powergrid the actions are more complicated that than. +- `ob, reward, done, info = env.step(action)` is the call to go to the next steps. You can imagine + it as being a the next "frame". To continue the parallel with video games, at the previous line + you asked "pacman" to go left (for example) and then the next frame is displayed (here returned + as an new observation `ob`). + +You might want to customize this general behaviour in multiple way: + +- you might want to study only one chronics (equivalent to only one level of a video game) + see `Study always the same chronics`_ +- you might want to loop through the chronics, but not always in the same order. If that is the case + you might want to consult the section `Shuffle the chronics order`_ +- you might also have spotted some chronics that have bad properties. In this case, you can + "remove" them from the environment (they will be ignored). This is explained in `Skipping some chronics`_ +- you might also want to select at random, the next chronic you will + use. This allows some compromise between all the above solution. Instead of ignoring some chronics you might want + to select them less frequently, instead of always using the same one, you can sampling it more often and of + course, because the sampling is done randomly it's unlikely that the order will remain the same. To use that + you can check the `Sampling the chronics`_ + +In a different scenarios, you might also want to skip the first time steps of the chronics, that would +be equivalent to starting into the "middle" of a video game. If that is the case, the subsection +`Skipping some time steps`_ is made for you. + +Finally, you might have noticed that each call to "env.reset" might take a while. This can dramatically +increase the training time, especially at the beginning. This is due to the fact that each time +`env.reset` is called, the whole chronics is read from the hard drive. If you want to lower this +impact then you might consult the `Optimize the data pipeline`_ section. + +Study always the same chronics +++++++++++++++++++++++++++++++ +If you spotted a particularly interesting chronics, or if you want, for some reason +your agent to see only one chronics, you can do this rather easily with grid2op. + +All chronics +are given a unique persistent ID (it means that as long as the data is not modified the same +chronics will have always the same ID each time you load the environment). The environment has a +"set_id" method that allows you to use it. Just add "env.set_id(THE\\_ID\\_YOU\\_WANT)" before +the call to "env.reset". This gives the following code: + +.. code-block:: python + + import grid2op + from grid2op.Agent import RandomAgent + env = grid2op.make() + agent = RandomAgent(env.action_space) + env.seed(0) # for reproducible experiments + episode_count = 100 # i want to make 100 episodes + + ################################### + THE_CHRONIC_ID = 42 + ################################### + + # i initialize some useful variables reward = 0 done = False total_reward = 0 + + # and now the loop starts for i in range(episode_count): + ################################### + env.set_id(THE_CHRONIC_ID) + ################################### + ob = env.reset() + + # now play the episode as usual while True: action = agent.act(ob, reward, done) - ob, reward, done, _ = env.step(action) + ob, reward, done, info = env.step(action) total_reward += reward if done: # in this case the episode is over @@ -46,6 +148,387 @@ Example (adapted from gym documentation available at env.close() print("The total reward was {:.2f}".format(total_reward)) +(as always added line compared to the base code are highlighted: they are "circle" with `#####`) + +Shuffle the chronics order ++++++++++++++++++++++++++++ +In some other usecase, you might want to go through the whole set of chronics, and then loop again +through them, but in a different order (remember that by default it will always loop in the same +order 0, 1, 2, 3, ..., 0, 1, 2, 3, ..., 0, 1, 2, 3, ...). + +Again, doing so with grid2op is rather easy. To that end you can use the `chronics_handler.shuffle` function +that will do exactly that. You can use it like this: + +.. code-block:: python + + import numpy as np + import grid2op + from grid2op.Agent import RandomAgent + env = grid2op.make() + agent = RandomAgent(env.action_space) + env.seed(0) # for reproducible experiments + episode_count = 10000 # i want to make lots of episode + + # total number of episode + total_episode = len(env.chronics_handler.subpaths) + + # i initialize some useful variables + reward = 0 + done = False + total_reward = 0 + + # and now the loop starts + for i in range(episode_count): + + ################################### + if i % total_episode == 0: + # I shuffle each time i need to + env.chronics_handler.shuffle() + ################################### + + ob = env.reset() + # now play the episode as usual + while True: + action = agent.act(ob, reward, done) + ob, reward, done, info = env.step(action) + total_reward += reward + if done: + # in this case the episode is over + break + +(as always added line compared to the base code are highlighted: they are "circle" with `#####`) + +Skipping some chronics ++++++++++++++++++++++++ +Some chronics might be too hard to start a training ("learn to walk before running") and conversely some chronics might +be too easy after a while (you can solve them without doing nothing basically). This is why grid2op allows you to +have some control about which chronics will be used by the environment. + +For this purpose you can use the `chronics_handler.set_filter` function. This function takes a +"filtering function" as argument. This "filtering function" takes as argument the full path of the +chronics and should return ``True`` / ``False`` whether or not you want to keep the There is an example: + +.. code-block:: python + + import numpy as np + import re + import grid2op + from grid2op.Agent import RandomAgent + env = grid2op.make() + agent = RandomAgent(env.action_space) + env.seed(0) # for reproducible experiments + + + ################################### + # this is the only line of code to add + # here i select only the chronics that start by "00" + env.chronics_handler.set_filter(lambda path: re.match(".*00[0-9].*", path) is not None) + kept = env.chronics_handler.reset() # if you don't do that it will not have any effect + print(kept) # i print the chronics kept + ################################### + + episode_count = 10000 # i want to make lots of episode + + # i initialize some useful variables + reward = 0 + done = False + total_reward = 0 + + # and now the loop starts + # it will only used the chronics selected + for i in range(episode_count): + ob = env.reset() + # now play the episode as usual + while True: + action = agent.act(ob, reward, done) + ob, reward, done, info = env.step(action) + total_reward += reward + if done: + # in this case the episode is over + break + +(as always added line compared to the base code are highlighted: they are "circle" with `#####`) + +Sampling the chronics ++++++++++++++++++++++++ + +Finally, for even more flexibility, you can choose to sample what will be the next used chronics. To achieve +that you can call the `chronics_handler.sample_next_chronics` This function takes a vector of probabilities +as input (if not provided it assumes all probabilities are equal) and will select an id based on this probability +vector. + +In the following example we assume that the vector of probabilities is always the same and that we want, for some +reason oversampling the 10 first chronics, and under sample the last 10: + +.. code-block:: python + + import numpy as np + import re + import grid2op + from grid2op.Agent import RandomAgent + env = grid2op.make() + agent = RandomAgent(env.action_space) + env.seed(0) # for reproducible experiments + + episode_count = 10000 # i want to make lots of episode + + # i initialize some useful variables + reward = 0 + done = False + total_reward = 0 + + ################################### + # total number of episode + total_episode = len(env.chronics_handler.subpaths) + probas = np.ones(total_episode) + # oversample the first 10 episode + probas[:10]*= 5 + # undersample the last 10 episode + probas[-10:] /= 5 + ################################### + + # and now the loop starts + # it will only used the chronics selected + for i in range(episode_count): + + ################################### + _ = env.chronics_handler.sample_next_chronics(probas) # this is added + ################################### + ob = env.reset() + + # now play the episode as usual + while True: + action = agent.act(ob, reward, done) + ob, reward, done, info = env.step(action) + total_reward += reward + if done: + # in this case the episode is over + break + +(as always added line compared to the base code are highlighted: they are "circle" with `#####`) + +**NB** here we have a constant vector of probabilities, but you might imagine adapting it during the training, +for example to oversample scenarios your agent is having trouble to solve during the training. + +Skipping some time steps ++++++++++++++++++++++++++ + +Another way to customize which data your agent will face is to make as if the chronics started at different date +and time. This might be handy in case a scenario is hard at the beginning but less hard at the end, or if you want +your agent to learn to start controlling the grid at any date and time (in grid2op most of the chronics data +provided start at midnight for example). + +To achieve this goal, you can use the :func:`BaseEnv.fast_forward_chronics` function. This function skip a given +number of steps. In the following example, we always skip the first 42 time steps before starting the +episode: + +.. code-block:: python + + import numpy as np + import re + import grid2op + from grid2op.Agent import RandomAgent + env = grid2op.make() + agent = RandomAgent(env.action_space) + env.seed(0) # for reproducible experiments + + episode_count = 10000 # i want to make lots of episode + + # i initialize some useful variables + reward = 0 + done = False + total_reward = 0 + + # and now the loop starts + # it will only used the chronics selected + for i in range(episode_count): + ob = env.reset() + + ################################### + # below are the two lines added + env.fast_forward_chronics(42) + ob = env.get_obs() + ################################### + + # now play the episode as usual + while True: + action = agent.act(ob, reward, done) + ob, reward, done, info = env.step(action) + total_reward += reward + if done: + # in this case the episode is over + break + +(as always added line compared to the base code are highlighted: they are "circle" with `#####`) + +Optimize the data pipeline +++++++++++++++++++++++++++ +Optimizing the data pipeline can be crucial if you want to learn fast, especially at the beginning of the training. +There exists multiple way to perform this task. + +First, let's start with a summary of the timing. For this test, i ran, on my personal computer, the following +code to compare the different method. + +.. code-block:: python + + import time + import grid2op + from grid2op.Chronics import MultifolderWithCache + + + ############################## + # this part changes depending on the method + env = grid2op.make("l2rpn_neurips_2020_track1_small") + env.chronics_handler.set_filter(lambda path: re.match(".*37.*", path) is not None) + kept = env.chronics_handler.reset() # if you don't do that it will not have any effect + ############################## + + episode_count = 100 + reward = 0 + done = False + total_reward = 0 + + # only the time of the following loop is measured + %%time + for i in range(episode_count): + ob = env.reset() + if i % 10 == 0: + print("10 more") + while True: + action = env.action_space.sample() + ob, reward, done, info = env.step(action) + total_reward += reward + if done: + # in this case the episode is over + break + +Results are reported in the table below: + +============================== ================ =================== +Method used memory footprint time to perform (s) +============================== ================ =================== +Nothing (see `Basic Usage`_) low 44.6 +set_chunk (see `Chunk size`_) ultra low 26.8 +`MultifolderWithCache`_ high 11.0 +============================== ================ =================== + +As you can see, the default usage uses relatively little memory but takes a while to compute (almost 45s to perform +the 100 episode.) On the contrary, the `Chunk size`_ method uses less memory and is about 40% faster. Storing all +data in memory using the `MultifolderWithCache`_ leads to a large memory footprint, but is also significantly +faster. On this benchmark, it is 75% faster (it takes only 25% of the initial time) than the original method. + +Chunk size +^^^^^^^^^^^ +The first think you can do, without changing anything to the code, is to ask grid2op to read the input grid data +by "chunk". This means that, when you call "env.reset" instead of reading all the data representing a full month, +you will read only a subset of it, thus speeding up the IO time by a large amount. In the following example we +read data by "chunk" of 100 (if you want hard drive is accessed to read data 100 time steps by 100 time steps +(instead of reading the full dataset at once) Note that this "technique" can also be used to reduce the memory +footprint (less RAM taken). + +.. code-block:: python + + import numpy as np + import re + import grid2op + from grid2op.Agent import RandomAgent + env = grid2op.make() + agent = RandomAgent(env.action_space) + env.seed(0) # for reproducible experiments + + ################################### + env.chronics_handler.set_chunk_size(100) + ################################### + + episode_count = 10000 # i want to make lots of episode + + # i initialize some useful variables + reward = 0 + done = False + total_reward = 0 + + # and now the loop starts + # it will only used the chronics selected + for i in range(episode_count): + ob = env.reset() + + # now play the episode as usual + while True: + action = agent.act(ob, reward, done) + ob, reward, done, info = env.step(action) + total_reward += reward + if done: + # in this case the episode is over + break + +(as always added line compared to the base code are highlighted: they are "circle" with `#####`) + + +MultifolderWithCache +^^^^^^^^^^^^^^^^^^^^^ +Another way is to use a dedicated class that stores the data in memory. This is particularly useful +to avoid long and inefficient I/O that are replaced by reading the the complete dataset once and store it +into memory. + +This can be achieved with: + +.. code-block:: python + + import numpy as np + import re + import grid2op + from grid2op.Agent import RandomAgent + from grid2op.Chronics import MultifolderWithCache + + ################################### + env = grid2op.make(chronics_class=MultifolderWithCache) + # I select only part of the data, it's unlikely the whole dataset can fit into memory... + env.chronics_handler.set_filter(lambda path: re.match(".*00[0-9].*", path) is not None) + # you need to do that + kept = env.chronics_handler.real_data.reset() + ################################### + + agent = RandomAgent(env.action_space) + env.seed(0) # for reproducible experiments + + episode_count = 10000 # i want to make lots of episode + + # i initialize some useful variables + reward = 0 + done = False + total_reward = 0 + + # and now the loop starts + # it will only used the chronics selected + for i in range(episode_count): + ob = env.reset() + + # now play the episode as usual + while True: + action = agent.act(ob, reward, done) + ob, reward, done, info = env.step(action) + total_reward += reward + if done: + # in this case the episode is over + break + +(as always added line compared to the base code are highlighted: they are "circle" with `#####`) + +Note that by default the `MultifolderWithCache` class will only load the **first** chronics it sees. You need +to filter it and call `env.chronics_handler.real_data.reset()` for it to work properly. + +Customization +------------- + +Environments can be customized in three major ways: + +- `Backend`: you change the solver that computes the state of the power more or less faste or be more realistically +- `Parameters`: you change the behaviour of the Environment. For example you can prevent the powerline to be + disconnected when too much current flows on it etc. +- `Rules`: you can affect the operational constraint that your agent must meet. For example you can affect + more or less powerlines in the same action etc. + +TODO Detailed Documentation by class -------------------------------- diff --git a/docs/grid2op.rst b/docs/grid2op.rst index 65d92dd22..b13ef9e32 100644 --- a/docs/grid2op.rst +++ b/docs/grid2op.rst @@ -1,24 +1,166 @@ -Grid2Op module -=================================== .. module:: grid2op +.. _grid2op-module: -The grid2op module allows to perform sequential action on a powergrid. +Grid2Op module +=================================== +The grid2op module allows to model sequential decision making on a powergrid. -It is modular in the sens that it allows to use different powerflow solver. It proposes an internal representation -of the data that can be feed to powergrids and multiple class to specify how it's done. +It is modular in the sense that it allows to use different powerflow solver. It proposes an internal representation +of the data that can be feed to powergrids and multiple classes to specify how it's done. For example, it is possible to use an "action" to set the production value of some powerplant. But we also know that it's not possible to do this for every powerplant (for example, asking a windfarm to produce more energy is not possible: the only way would be to increase the speed of the wind). It is possible to implement these kind of restrictions in this "game like" environment. -Today, the main usage of this plateform is to serve as a computation engine for the `L2RPN `_ -cpompetitions. +Today, the main usage of this platform is to serve as a computation engine for the `L2RPN `_ +competitions. -This plateform is still under development. If you notice a bug, let us know with a github issue at +This platform is still under development. If you notice a bug, let us know with a github issue at `Grid2Op `_ -#################### -Glossary -#################### -TODO Coming soon \ No newline at end of file +Objectives +----------- +The primary goal of grid2op is to model decision making process in power systems. Indeed, we believe that developing +new flexibilities on the grid would make the +"energy transition" an easier, less costly process. + +It allows fast development of new "methods" that are able to "take decisions" on a powergrid and assess how +well these controllers perform (see section `Controlling the grid`_ for more information about the "controls") . + +Thanks to a careful separation between: + +- the data used to represent the powergrid +- the solver that is able to compute the state of the grid +- the controller / agent that takes action on the powergrid. + +All bound together thanks to the :class:`grid2op.Environment` module. + +Grid2op attempts also to make the development of new control methods as easy as possible: it is relatively simple +to generate fake data and train agent on them and to use a fast (but less precise powerflow) while trying +to develop new state of the art methods. While still being usable in a "closer to reality" setting where data +can come from real grid state that happened in the past and the solver is as accurate as possible. You can switch +from one to the other almost effortlessly. + +For a more detailed description, one can consult the +`Reinforcement Learning for Electricity Network Operation `_ +paper. + +Controlling the grid +-------------------- +Modeling all what happens in the powergrid would be an extremely difficult task. Grid2op focusing on controls +that could be done today by a human (happening with **a frequency of approximately the minute**). It does not +aim at simulation really high frequency control that are often automatic today. That being said, such controls +can be taken into account by grid2op if the :class:`grid2op.Backend` allows it. + +The main focus of grid2op is to make easy to use of **the topology** to control the flows of the grid. +In real time, it is possible to reconfigure the "topology" of the grid (you can think about it +by the action on changing the graph of the power network). Such modifications are highly non linear +and can have a really counter intuitive impact and we believe they are under used by industry and are under studied +by academics at the moment +(feel free to visit the notebooks `0_Introduction.ipynb`, +`0_SmallExample.ipynb` or the `IEEE BDA Tutorial Series.ipynb` of the official +`grid2op github repository `_ for more information) + +Along with the topology, grid2op allows easily to manipulate (and thus control): + +- the voltages: by manipulating shunts, or by changing the setpoint value of the generators +- the active generation: by the use of the "redispatching" action. + +Other "flexibilities" (ways to act on the grid) are coming soon (-: + + +What is modeled in an grid2op environment +----------------------------------------- +The simulator is able to emulate a power grid (of any size or characteristics) subject to a set of +temporal injections (productions and consumptions) or maintenance / hazards for discretized +time-steps. + +More concretely a grid2op environment models "out of the box": + +- the mechanism to "implement" a control on the grid, load the next chronics data (*e.g.* new loads and productions) + and compute the appropriate state of the power network +- the disconnection of powerlines if there are on overflow for too long (known as "time overcurrent (TOC)" see + this article for more information + `overcurrent `_ ) + Conceptually this means the environment remember for how long a powergrid is in "overflow" and disconnects it + if needed. +- the disconnection of powerlines if the overflow is too high (known as "instantaneous overcurrent" see the same + wikipedia article). This means from one step to another, a given powerline can be disconnected if too much + flow goes through it +- the maintenance operations: if there is a planned maintenance, the environment is able to disconnect a powerline + for a given amount of steps and preventing its reconnection. There are information about such planned event + that are given to the control +- hazards / unplanned outages / attacks: another issue on power system is the fact that sometimes, some powerline + get disconnected in a non planned manner. For example, a tree can fall on a powerline, the grid might suffer + a cyber attack etc. This can also be modeled by grid2op. +- prevent the action on some powerlines: whether it is to model the fact in reality it is not possible to always + act on the same equipment or because some powerline are out of service (because of an attack, a maintenance + or because it needs to be repaired), grid2op can model the impossibility + of acting on a given powerline +- prevent the action on some substations: for the same reasons, sometimes you cannot act on given part of + the network, preventing you to do some topological actions. +- voltage control: though it is not the main focus of the current platform, grid2op can model automatons that + can take voltage corrective measures (in the near future we think of adding some protection monitoring + voltage violation too). +- non violation of generator physical constraints: in real life, generator cannot produce too little nor too much + (we speak about `gen_pmin` and `gen_pmax`) nor their production can vary too much between consecutive + steps (this is called `gen_max_ramp_down` and `gen_max_ramp_up`) +- stops the game if the grid is in a too bad shape. This can happen if a load or a generator has been disconnected, + or if some part of the grid is "islanded" (the graph representing the power network is not connex) or if there is + no feasible solution to the power system equations + +Here are a summary of the main modules: + +============================= ========================================================================================= +Module Name Main usage +============================= ========================================================================================= +:class:`grid2op.Environment` Implements all the mechanisms described above +:class:`grid2op.Chronics` In charge of feeding the data (loads, generations, planned maintenance, etc.) to the Environment +:class:`grid2op.Backend` Carries out the computation of the powergrid state +:class:`grid2op.Agent` The controller, in charge of managing the safety of the grid +:class:`grid2op.Action` The control send by the Agent to the Environment +:class:`grid2op.Observartion` The information sent by the Environment to the Agent, represents the powergrid state as seen by the Agent +:class:`grid2op.Opponent` Is present to model the unplanned disconnections of powerline +:class:`grid2op.Rules` Computes whether or not an action is "legal" at a given time step +:class:`grid2op.Parameters` Store the parameters that defines for example, on which case an action is legal, or how long a powerline can stay on overflow etc. +============================= ========================================================================================= + +Properties of this environments +------------------------------- +The grid2op environments have multiple shared properties: + +- highly constrained environments: these environments obey physical laws. You cannot directly choose how much + power flow on a given powerline, what you can do it choosing the "graph" of the power network and (under some + constraints) the production of each generators. Knowning that at any time steps, the powergrid state + must satisfy the `Kirchhoff's circuit laws `_ . +- stochastic environments: in all environment, you don't know fully the future, which makes it a "Partially + Observable" environment (if you were in a maze, you would not see "from above" but rather see "at the first + person"). Environment can be "even more stochastic" if there are hazards / attack on the powergrid. +- with both **continuous and discrete observation space**: some part of the observation are continuous (for example + the amount of flow on a given powerline, or the production of this generator) and some are discrete ( + for example the status - connected / disconnected - of a powerline, or how long this powerline + has been in overflow etc.) +- with **both continuous and discrete action space**: the preferred type of action is the topology, which is + represented as a discrete type of action (you can either connect / disconnect a powerline) but there exist + also some continuous action (for example you can adjust in real time the production of a set of generators) +- dynamic graph manipulation: power network can be modeled as graphs. In these environments both the observation + **and the action** are focused on graph. The observation contains the complete state of the grid, including + the "topology" (you can think of it a its graph) and actions are focused on adapting this graph to be as + robust as possible +- strong emphasis on **safety** and **security**: power system are highly critical system (who would want to + short circuit a powerplant? Or causing a blackout preventing an hospital to cure the patients?) and such it is + critical that the controls keep the powergrid safe in all circumstances. + +Disclaimer +----------- +Grid2op is a research testbed platform, it shall not be use in "production" for any kind of application. + + +Going further +-------------- +To get started into the grid2op ecosystem, we made a set of notebooks +that are available, without any installation thanks to +`Binder `_ . Feel free to visit the "getting_started" page for +more information and a detailed tour about the issue that grid2op tries to address. + diff --git a/docs/gym.rst b/docs/gym.rst new file mode 100644 index 000000000..47b37c376 --- /dev/null +++ b/docs/gym.rst @@ -0,0 +1,61 @@ +.. _openai-gym: + +Compatibility with openAI gym +=================================== + +The gym framework in reinforcement learning is widely used. Starting from version 1.2.0 we improved the +compatibility with this framework. + +Before grid2op 1.2.0 only some classes fully implemented the open AI gym interface: + +- the :class:`grid2op.Environment` (with methods such as `env.reset`, `env.step` etc.) +- the :class:`grid2op.Agent` (with the `agent.act` etc.) +- the creation of pre defined environments (with `grid2op.make`) + + +Starting from 1.2.0 we implemented some automatic converters that are able to automatically map +grid2op representation for the action space and the observation space into open AI gym "spaces". More precisely these +are represented as gym.spaces.Dict. + +If you are interested by this feature, we recommend you to proceed like this: + +.. code-block:: python + + import grid2op + from grid2op.Converters import GymActionSpace, GymObservationSpace + from grid2op.Agent import BaseAgent + + class MyAgent(BaseAgent): + def __init__(self, action_space, observation_space): + BaseAgent.__init__(self, action_space) + self.gym_obs_space = GymObservationSpace(observation_space) + self.gym_action_space = GymActionSpace(observation_space) + + def act(self, obs, reward, done=False): + # convert the observation to gym like one: + gym_obs = self.gym_obs_space.to_gym(obs) + + # do whatever you want, as long as you retrieve a gym-like action + gym_action = ... + grid2op_action = self.gym_action_space.from_gym(gym_action) + # NB advanced usage: if action_space is a grid2op.converter (for example coming from IdToAct) + # then what's called "grid2op_action" is in fact an action that can be understood by the converter. + # to convert it back to grid2op action you need to convert it. See the documentation of GymActionSpace + # for such purpose. + return grid2op_action + + env = grid2op.make(...) + my_agent = MyAgent(env.action_space, env.observation_space, ...) + + # and now do anything you like + # for example + done = False + reward = env.reward_range[0] + obs = env.reset() + while not done: + action = my_agent.act(obs, reward, done) + obs, reward, done, info = env.step(action) + +We also implemented some "converter" that allow the conversion of some action space into more convenient +`gym.spaces` (this is only available if gym is installed of course). Please check +:class:`grid2op.Converter.GymActionSpace` for more information and examples. \ No newline at end of file diff --git a/docs/img/14bus_1.png b/docs/img/14bus_1.png new file mode 100644 index 000000000..05cfedd9f Binary files /dev/null and b/docs/img/14bus_1.png differ diff --git a/docs/img/14bus_2.png b/docs/img/14bus_2.png new file mode 100644 index 000000000..0a378ea3b Binary files /dev/null and b/docs/img/14bus_2.png differ diff --git a/docs/img/14bus_th_lim.png b/docs/img/14bus_th_lim.png new file mode 100644 index 000000000..388f9b40e Binary files /dev/null and b/docs/img/14bus_th_lim.png differ diff --git a/docs/img/grid2op_action.jpg b/docs/img/grid2op_action.jpg new file mode 100644 index 000000000..02e91c228 Binary files /dev/null and b/docs/img/grid2op_action.jpg differ diff --git a/docs/img/grid2op_action_raw.jpg b/docs/img/grid2op_action_raw.jpg new file mode 100644 index 000000000..bc05a460d Binary files /dev/null and b/docs/img/grid2op_action_raw.jpg differ diff --git a/docs/index.rst b/docs/index.rst index 4bd0a3454..d2779ac83 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,11 +3,14 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. +.. |episode_example| image:: ./img/grid2op_action.jpg + +=============================================== Welcome to Grid2Op's technical documentation! -============================================= +=============================================== Grid2Op is a pythonic, easy to use framework, to be able to develop, train or evaluate performances of "agent" or -"controler" that acts on a power grid in different ways. +"controller" that acts on a powergrid in different ways. It is modular and can be use to train reinforcement learning agent or to assess the performance of optimal control algorithm. @@ -18,95 +21,65 @@ for example. **Features** - - abstract the computation of the cascading failures - - ability to have the same code running with multiple powerflow calculator - - parallel execution of one agent on multiple independent scenarios (multiprocessing) + - abstract the computation of the "cascading failures" + - ability to have the same code running with multiple powerflows + - parallel execution of one agent / controller on multiple independent scenarios (multiprocessing) - fully customisable: this software has been built to be fully customizable to serve different purposes and not only reinforcement learning, or the L2RPN competition. +Grid2Op philosophy +-------------------- +Grid2Op is a python module that aims to make easier the research on sequential decision making applied to power systems. + +This package adopt the "reinforcement learning" point of view and is compatible with the openAI gym programming +interface (see section :ref:`openai-gym` for more information). + +Applied to power system, the "reinforcement learning" framework ask: + +- a "controller" (named Agent) to take an "action" on the powergrid (for example for L2RPN competitions in 2019 + and 2020 these actions consist in modifying the connectivity of the powergrid). +- the "environment" (*a.k.a* the "real world") applies that action on the powergrid, applies some other modifications + and return the next state. + +The goal of grid2op is to model "sequential decision making" that could be made by human operators, for example +changing the configuration of some "substations" as demonstrate in the figure below: + +|episode_example| + +Any kind of "controller" can be implemented using this framework even though it has been inspired by the +"reinforcement learning" community. You can implement some heuristic "controllers" (some examples are available in the +:ref:`agent-module` module description), "controllers" that comes from the Optimization community +(for example "Optimal Power Flow") or +"Model Predictive Control". One of the goal of Grid2Op is to allow everyone to contribute to closing the gap +between all these research communities. + Main module content --------------------- .. toctree:: :maxdepth: 2 + :caption: Quickstart - intro quickstart grid2op makeenv + gym Plotting capabilities ---------------------- .. toctree:: :maxdepth: 2 + :caption: Plot plot -Compatibility with openAI gym ------------------------------ -The gym framework in reinforcement learning is widely used. Starting from version 1.2.0 we improved the -compatibility with this framework. - -Before grid2op 1.2.0 only some classes fully implemented the open AI gym interface: - -- the :class:`grid2op.Environment` (with methods such as `env.reset`, `env.step` etc.) -- the :class:`grid2op.Agent` (with the `agent.act` etc.) -- the creation of pre defined environments (with `grid2op.make`) - - -Starting from 1.2.0 we implemented some automatic converters that are able to automatically map -grid2op representation for the action space and the observation space into open AI gym "spaces". More precisely these -are represented as gym.spaces.Dict. - -If you are interested by this feature, we recommend you to proceed like this: - -.. code-block:: python - - import grid2op - from grid2op.Converters import GymActionSpace, GymObservationSpace - from grid2op.Agent import BaseAgent - - class MyAgent(BaseAgent): - def __init__(self, action_space, observation_space): - BaseAgent.__init__(self, action_space) - self.gym_obs_space = GymObservationSpace(observation_space) - self.gym_action_space = GymActionSpace(observation_space) - - def act(self, obs, reward, done=False): - # convert the observation to gym like one: - gym_obs = self.gym_obs_space.to_gym(obs) - - # do whatever you want, as long as you retrieve a gym-like action - gym_action = ... - grid2op_action = self.gym_action_space.from_gym(gym_action) - # NB advanced usage: if action_space is a grid2op.converter (for example coming from IdToAct) - # then what's called "grid2op_action" is in fact an action that can be understood by the converter. - # to convert it back to grid2op action you need to convert it. See the documentation of GymActionSpace - # for such purpose. - return grid2op_action - - env = grid2op.make(...) - my_agent = MyAgent(env.action_space, env.observation_space) - - # and now do anything you like - # for example - done = False - reward = env.reward_range[0] - obs = env.reset() - while not done: - action = my_agent.act(obs, reward, done) - obs, reward, done, info = env.step(action) - -We also implemented some "converter" that allow the conversion of some action space into more convenient -`gym.spaces` (this is only available if gym is installed of course). Please check -:class:`grid2op.Converter.GymActionSpace` for more information and examples. - Technical Documentation ---------------------------- .. toctree:: :maxdepth: 2 + :caption: Technical Documentation action agent @@ -127,6 +100,7 @@ Main Exceptions ----------------------- .. toctree:: :maxdepth: 2 + :caption: Miscellaneous exception diff --git a/docs/intro.rst b/docs/intro.rst deleted file mode 100644 index 3bca2d570..000000000 --- a/docs/intro.rst +++ /dev/null @@ -1,18 +0,0 @@ -Introduction -=================================== -Grid2Op is a tool that allows to perform Reinforcement Learning (abbreviated RL) or any -other time dependant simulation of steady state powerflow. - -The simulator is able to emulate a power grid (of any size or characteristics) subject to a set of -temporal injections (productions and consumptions) or maintenance / hazards for discretized -timesteps. - -Loadflow computation are carried out using any Backend you wish. A default backend, relying -on the open source `pandapower `_ -library is available as an example. - -Any other tools that is able to perform power flow computation can be used as a "backend" to -play the game or to accelerate the training. Instructions and method to implement -a new backend are available in the :class:`Grid2Op.Backend.Backend` documentation. - -TO be continued (package still under development) diff --git a/docs/makeenv.rst b/docs/makeenv.rst index d348cfec7..af56f0b51 100644 --- a/docs/makeenv.rst +++ b/docs/makeenv.rst @@ -1,4 +1,5 @@ .. currentmodule:: grid2op.MakeEnv +.. _make-env-module: Make: Using pre defined Environments ==================================== @@ -57,7 +58,7 @@ And on windows based machine this will look like: .. code-block:: python import grid2op - env = grid2op.make("C:\\the\\full\\path\\where\\the\\env\\is\\located\\rte_case14_realistic") + env = grid2op.make("C:\\where\\the\\env\\is\\located\\rte_case14_realistic") In bot cases it will load the environment named "rte_case14_realistic" (provided that you found a way to get it on your machine) located at the path "/full/path/where/the/env/is/located/rte_case14_realistic" (or @@ -71,7 +72,7 @@ renamed :func:`make_old`, merges the behaviour of "grid2op.download" script and It has the following behavior: 1) if you specify a full path to a local environment (containing the chronics and the default parameters), - it will be used (see section :sec:`usage`) + it will be used (see section :ref:`usage`) 2) if you specify the name of an environment that you have already downloaded, it will use this environment (NB currently no checks are implemented if the environment has been updated remotely, which can happen if we realize there were some issues with it.) @@ -104,10 +105,12 @@ an internet connection) .. code-block:: python import grid2op - print("The current local directory where the environment are downloaded is {}" - "".format(grid2op.get_current_local_dir())) - print("The environments available without necessary download are: {}".format(grid2op.list_available_local_env())) - print("I can download these environments from the internet: {}".format(grid2op.list_available_remote_env())) + print("The current local directory where the environment are downloaded is \n{}" + "".format(grid2op.get_current_local_dir())) + print("The environments available without necessary download are: \n{}" + "".format(grid2op.list_available_local_env())) + print("I can download these environments from the internet: \n{}" + "".format(grid2op.list_available_remote_env())) **NB** if you change the cache directory, all previously downloaded environments will not be visible by grid2op and they will not be removed from your local hard drive. This is why we don't recommend to change this folder unless you have a @@ -137,6 +140,8 @@ context of the L2RPN competition, we don't recommend to modify them. - \* `opponent_action_class`, `opponent_class`, `opponent_init_budget`, `opponent_budget_per_ts`, `opponent_budget_class`: all configuration for the opponent. +More information about the "customization" of the environment, especially to optimize the I/O or to manipulate +which data you interact with are available in the :ref:`environment-module` module (:ref:`environment-module-usage` section). Detailed Documentation by class -------------------------------- diff --git a/docs/plot.rst b/docs/plot.rst index b6b549e02..099a28607 100644 --- a/docs/plot.rst +++ b/docs/plot.rst @@ -1,6 +1,9 @@ .. currentmodule:: grid2op.PlotGrid - .. |replaygif| image:: ../getting_started/path_agents/awesome_agent_logs/000/episode.gif +.. |14bus_1| image:: ./img/14bus_1.png +.. |14bus_2| image:: ./img/14bus_2.png +.. |14bus_th_lim| image:: ./img/14bus_th_lim.png + Grid2Op Plotting capabilities (beta) ===================================== @@ -18,7 +21,7 @@ different purposes: The class :class:`PlotMatplot` is also used by :class:`EpisodeReplay` that allows to look at the action taken by -the agent pretty easily, and allows easy saving into gif format. +the agent pretty easily, and allows easy saving into gif format (see below for more information) .. code-block:: python3 @@ -54,16 +57,158 @@ the agent pretty easily, and allows easy saving into gif format. # execute this agent on 1 scenario, saving the results runner = Runner(**env.get_params_for_runner(), agentClass=CustomRandom) path_agent = os.path.join(path_agents, "RandomAgent") - res = runner.run(nb_episode=1, path_save=path_agent, pbar=tqdm) + res = runner.run(nb_episode=1, path_save=path_agent, pbar=tqdm, agent_seeds=[0, 1]) # and now reload it and display the "movie" of this scenario plot_epi = EpisodeReplay(path_agent) - plot_epi.replay_episode(res[0][1], max_fps=2, gif_name="episode") + plot_epi.replay_episode(res[0][1], gif_name="episode") An possible output will look like this: |replaygif| +Render the state of the grid +----------------------------- + +During the gym loop +++++++++++++++++++++ +In Grid2Op we also made available the possibility to render the state of the grid that your agent sees before taking +an action. This can be done with the provided environments following openAI gym interface like this: + +.. code-block:: python + + import grid2op + from grid2op.Agent import RandomAgent + from grid2op.Episode import EpisodeReplay + + env = grid2op.make() + agent = RandomAgent(env.action_space) + nb_episode = 1 + for i in range(nb_episode): + obs = env.reset() + done = False + reward = env.reward_range[0] + while not done: + _ = env.render() + act = agent.act(obs, reward, done) + obs, reward, done, info = env.step(act) + +**NB** we don't recommend to use the renderer during the training of an Agent as it might slow down the training +significantly. + +Offline, after the scenarios were played +++++++++++++++++++++++++++++++++++++++++ +In Grid2Op, you can execute a :ref:`runner-module` to perform the "gym loops" and store the results +in a standardized manner. Once stored, the results can be loaded back and "replayed" using the appropriate +class. Here is how you can do this: + +.. code-block:: python + + import grid2op + from grid2op.Agent import RandomAgent + from grid2op.Runner import Runner + from grid2op.Episode import EpisodeReplay + + path_saved_data = "where_i_want_to_save_it" + # create an environment and an agent + env = grid2op.make() + agent = RandomAgent(env.action_space) + + # create a runner + runner = Runner(**env.get_params_for_runner(), agentClass=None, agentInstance=agent) + + # run and save the results + res = runner.run(nb_episode=1, path_save=path_saved_data) + + # and now load it and play the "movie" + plot_epi = EpisodeReplay(path_saved_data) + plot_epi.replay_episode(res[0][1], gif_name="this_episode.gif") + + # and in `os.path.join(path_saved_data, res[0][1])` (or example "where_i_want_to_save_it/000") + # a file named "this_episode.gif" has been created + +Plot a given observation +++++++++++++++++++++++++ +We also included some module to plot a given observation that can be customize depending on what you want to plot. + +You can use them as follow: + +.. code-block:: python + + import grid2op + from grid2op.PlotGrid import PlotMatplot + + env = grid2op.make() + plot_helper = PlotMatplot(env.observation_space) + + obs = env.reset() + + # if you want to plot all the observation + fig = plot_helper.plot_obs(obs) + fig.show() + + # you can also chose what to plot for each "object type" + fig_custom = plot_helper.plot_obs(obs + line_info="rho", + load_info=None, # i don't plot anything concerning the load + gen_info="v" # i draw the voltage setpoint of the generators + ) + fig_custom.show() + +See definition of :func:`BasePlot.plot_obs` for more information. The results of the above code is: + +|14bus_1| + +And the second image is + +|14bus_2| + +Plot data on the grid +++++++++++++++++++++++ +For convenience, we also included a set of function that are able to plot some custom information and +"project" them into the graph of the grid. This can be interesting if you want to represent some properties +of the objects on the grid. The function showed here accept anything that can be converted to ``float`` that +have the same size of the number of objects (for example if you want to display something on the powerlines +you need this "something" to be vector that counts as many elements as the number of powerlines in the +powergrid). + +In the example bellow, we plot the thermal limits (maximum current allowed on a powerline) +of the each powerline: + +.. code-block:: python + + import grid2op + from grid2op.PlotGrid import PlotMatplot + + env = grid2op.make() + plot_helper = PlotMatplot(env.observation_space) + + # plot the thermal limits of each powerlines + fig_info = plot_helper.plot_info(line_values=env.get_thermal_limit()) + fig_info.show() + +The above code will output this image +|14bus_th_lim| + +Of course you can also "project" on the grid all kind of variable and also for generators and loads, for example with + +.. code-block:: python + + import grid2op + from grid2op.PlotGrid import PlotMatplot + + env = grid2op.make() + plot_helper = PlotMatplot(env.observation_space) + + # plot the thermal limits of each powerlines and the voltages magnitude of each load + fig_info = plot_helper.plot_info(line_values=env.get_thermal_limit(), load_values=obs.load_v) + fig_info.show() + + # plot only the generator pmax + fig_info2 = plot_helper.plot_info(gen_values=env.gen_pmax) + fig_info2.show() + +More information is available in the description of the function :func:`BasePlot.plot_info`. Detailed Documentation by class -------------------------------- diff --git a/docs/quickstart.rst b/docs/quickstart.rst index ae46d3cac..cf156e4a2 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -1,7 +1,7 @@ Getting started =================================== -TODO lots of modifications have been made, this section needs to be re written. +In this chapter we present how to install grid2op. ############ Installation @@ -10,13 +10,32 @@ Installation ************* Requirements ************* -This software uses python (at tested with version >= 3.5). +This software uses python (at tested with version >= 3.6). To install it i's also recommended to have `git`. ************* Installation ************* + +Using pip (recommended) +++++++++++++++++++++++++ +Grid2op is hosted on pypi and can be installed like most python package with: + +.. code-block:: bash + + pip install grid2op + +It should be now installed. Don't hesitate to visit the section `Start Using grid2op`_ for more information on its +usage or the :ref:`grid2op-module` for a more in depth presentation of this package. If you +would rather start directly to interact with a powergrid you can visit the :ref:`make-env-module`. + +From source (advanced user) ++++++++++++++++++++++++++++ +If you want to develop new grid2op module (for example a new types of Backend, or a new kind of Chronics to +read new types of data) this section is made for you. + + First, it is recommended (but optionnal) to make a virtual environment: .. code-block:: bash @@ -50,33 +69,40 @@ After this, this simulator is available under the name grid2op (from a python co import grid2op #################### -Getting started +Start Using grid2op #################### -Some Jupyter notebook are provided as example of the use of the Grid2Op package. They are located in the -[getting_start](getting_started) directories. - -These notebooks will help you in understanding how this framework is used and cover the most -interesting part of this framework: - -* 0_basic_functionalities covers the basics - of reinforcement learning (only the main concepts), how they are implemented in the - Grid2Op framework. It also covers how to create a valid environment and how to use the - `grid2op.main` function to assess how well an agent is performing. -* 1_Observation_Agents details how to create - an "expert agent" that will take pre defined actions based on the observation it gets from - the environment. This Notebook also covers the functioning of the BaseObservation class. -* 2_Action_GridManipulation demonstrates - how to use the BaseAction class and how to manipulate the powergrid. -* 3_TrainingAnAgent shows how to get started with - reinforcement learning in the Grid2Op framework. It will use the code provided by Abhinav Sagar - available on `his blog `_ - or on `this github repository `_ . This code will - be adapted (only minor changes, most of them to fit the shape of the data) - and a (D)DQN will be trained on this problem. -* 4_StudyYourAgent shows how to study an BaseAgent, for example - the methods to reload a saved experiment, or to plot the powergrid given an observation for - example. This is an introductory notebook. More user friendly graphical interface should - come soon. - -These notebooks are available without any installation thanks to -`mybinder `_ \ No newline at end of file +To get started into the grid2op ecosystem, we made a set of notebooks +that are available, without any installation thanks to +`Binder `_ . Feel free to visit the "getting_started" page for +more information and a detailed tour about the issue that grid2op tries to address. + +The most basic code, for those familiar with openAI gym (a well-known framework in reinforcement learning) is: + +.. code-block:: python + + import grid2op + # create an environment + env_name = "rte_case14_realistic" # for example, other environments might be usable + env = grid2op.make(env_name) + + # create an agent + from grid2op.Agent import RandomAgent + my_agent = RandomAgent(env.action_space) + + # proceed as you would any open ai gym loop + nb_episode = 10 + for _ in range(nb_episde): + # you perform in this case 10 different episodes + obs = env.reset() + reward = env.reward_range[0] + done = False + while not done: + # here you loop on the time steps: at each step your agent receive an observation + # takes an action + # and the environment computes the next observation that will be used at the next step. + act = agent.act(obs, reward, done) + obs, reward, done, info = env.step(act) + +**NB** Grid2Op environments implements the interface of any openAI gym environment, but they don't +inherit from them. You can use the Grid2Op environment as you would any Gym environment but they are +not strictly speaking gym environment. More information about that on the section :ref:`openai-gym` diff --git a/docs/runner.rst b/docs/runner.rst index 7865aea10..60468d4fa 100644 --- a/docs/runner.rst +++ b/docs/runner.rst @@ -1,3 +1,5 @@ +.. _runner-module: + Runner =================================== @@ -10,17 +12,19 @@ i) facilitate the evaluation of the performance of :class:`grid2op.Agent` by per ii) define a format to store the results of the evaluation of such agent in a standardized manner iii) this "agent logs" can then be re read by third party applications, such as `grid2viz `_ or by internal class to ease the study of the behaviour of - such agent, for example with the :class:`grid2op.Plot.EpisodeReplay` + such agent, for example with the classes :class:`grid2op.Episode.EpisodeData` or + :class:`grid2op.Episode.EpisodeReplay` iv) allow easy use of parallelization of this assessment. -Basically, the runner simplifies the assment of the performance of some agent. This is the "usual" gym code to run +Basically, the runner simplifies the assessment of the performance of some agent. This is the "usual" gym code to run an agent: .. code-block:: python import grid2op + from grid2op.Agent import RandomAgent env = grid2op.make() - agent = grid2op.Agent.RandomAgent(env.action_space) + agent = RandomAgent(env.action_space) NB_EPISODE = 10 # assess the performance for 10 episodes, for example for i in range(NB_EPISODE): reward = env.reward_range[0] @@ -30,7 +34,7 @@ an agent: act = agent.act(obs, reward, done) obs, reward, done, info = env.step(act) -The above code does not store anything, cannot be run easily in parrallel and is already pretty verbose. +The above code does not store anything, cannot be run easily in parallel and is already pretty verbose. To have a shorter code, that saves most of the data (and make it easier to integrate it with other applications) we can use the runner the following way: @@ -38,16 +42,39 @@ the data (and make it easier to integrate it with other applications) we can use import grid2op from grid2op.Runner import Runner + from grid2op.Agent import RandomAgent env = grid2op.make() NB_EPISODE = 10 # assess the performance for 10 episodes, for example NB_CORE = 2 # do it on 2 cores, for example PATH_SAVE = "agents_log" # and store the results in the "agents_log" folder - runner = Runner(**env.get_params_for_runner(), agentClass=grid2op.Agent.RandomAgent) + runner = Runner(**env.get_params_for_runner(), agentClass=RandomAgent) runner.run(nb_episode=NB_EPISODE, nb_process=NB_CORE, path_save=PATH_SAVE) -As we can see, with less lines of code, we could execute parrallel assessment of our agent, on 10 episode +As we can see, with less lines of code, we could execute parallel assessment of our agent, on 10 episode and save the results (observations, actions, rewards, etc.) into a dedicated folder. +If your agent is inialiazed with a custom `__init__` method that takes more than the action space to be built, +you can also use the Runner pretty easily by passing it an instance of your agent, for example: + +.. code-block:: python + + import grid2op + from grid2op.Runner import Runner + env = grid2op.make() + NB_EPISODE = 10 # assess the performance for 10 episodes, for example + NB_CORE = 2 # do it on 2 cores, for example + PATH_SAVE = "agents_log" # and store the results in the "agents_log" folder + + # initilize your agent + my_agent = FancyAgentWithCustomInitialization(env.action_space, + env.observation_space, + "whatever else you want" + ) + + # and proceed as following for the runner + runner = Runner(**env.get_params_for_runner(), agentClass=None, agentInstance=my_agent) + runner.run(nb_episode=NB_EPISODE, nb_process=NB_CORE, path_save=PATH_SAVE) + Other tools are available for this runner class, for example the easy integration of progress bars. See bellow for more information. diff --git a/getting_started/5_StudyYourAgent.ipynb b/getting_started/5_StudyYourAgent.ipynb index d96e9e772..27ec8a227 100644 --- a/getting_started/5_StudyYourAgent.ipynb +++ b/getting_started/5_StudyYourAgent.ipynb @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -67,9 +67,82 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/benjamin/Documents/grid2op_dev/getting_started/grid2op/MakeEnv/Make.py:267: UserWarning: You are using a development environment. This environment is not intended for training agents. It might not be up to date and its primary use if for tests (hence the \"test=True\" you passed as argument). Use at your own risk.\n", + " warnings.warn(_MAKE_DEV_ENV_WARN)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d32a3fb777f848efb6536728d8dab0fc", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='episode', max=2.0, style=ProgressStyle(description_width=…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d6c47f251ab84f2da56345b38a579817", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='episode', max=30.0, style=ProgressStyle(description_width…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e5699ef8bdfb4756b19709661f2063e6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='episode', max=30.0, style=ProgressStyle(description_width…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "The results for the evaluated agent are:\n", + "\tFor chronics with id 000\n", + "\t\t - cumulative reward: 497.740265\n", + "\t\t - number of time steps completed: 30 / 30\n", + "\tFor chronics with id 001\n", + "\t\t - cumulative reward: 515.659302\n", + "\t\t - number of time steps completed: 30 / 30\n" + ] + } + ], "source": [ "scoring_function = L2RPNReward\n", "env = grid2op.make(reward_class=L2RPNReward, test=True)\n", @@ -112,9 +185,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['000',\n", + " 'dict_env_modification_space.json',\n", + " '001',\n", + " 'dict_action_space.json',\n", + " 'dict_observation_space.json',\n", + " 'dict_attack_space.json']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "os.listdir(path_agent)" ] @@ -128,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -137,6 +226,53 @@ "this_episode = EpisodeData.from_disk(path_agent, episode_studied)" ] }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.datetime(2019, 1, 6, 0, 0)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import datetime, timedelta\n", + "episode_data = this_episode\n", + "this_episode.observations[0].hour_of_day\n", + "datetime(year=episode_data.observations[0].year,\n", + " month=episode_data.observations[0].month,\n", + " day=episode_data.observations[0].day,\n", + " hour=episode_data.observations[0].hour_of_day,\n", + " minute=episode_data.observations[0].minute_of_hour)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "next(this_episode.observations).minute_of_hour" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/getting_started/8_PlottingCapabilities.ipynb b/getting_started/8_PlottingCapabilities.ipynb index 536d6409d..7f5f4c189 100644 --- a/getting_started/8_PlottingCapabilities.ipynb +++ b/getting_started/8_PlottingCapabilities.ipynb @@ -60,7 +60,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## II - Plot.Plotting utility\n", + "## II - Plot.PlotGrid utility\n", "\n", "Tthe \"Plot.Plotting\" module can help render a powergrid using 2 different methods: matplotlib or plotly. The display method is defined when you create a \"plotting\" object as shown below.\n", "\n", diff --git a/getting_started/test_episodereplay.py b/getting_started/test_episodereplay.py index 0fb8e7128..44e4b2045 100644 --- a/getting_started/test_episodereplay.py +++ b/getting_started/test_episodereplay.py @@ -7,9 +7,12 @@ from tqdm import tqdm path_agents = "getting_started/study_agent_getting_started" + +# if i start from grid2op/getting started (eg cd ~/Documents/grid2op/getting_started) +path_agents = "path_agents/" with warnings.catch_warnings(): warnings.filterwarnings("ignore") - env = grid2op.make("case14_realistic") + env = grid2op.make("rte_case14_realistic") class CustomRandom(RandomAgent): @@ -27,13 +30,12 @@ def my_act(self, transformed_observation, reward, done=False): runner = Runner(**env.get_params_for_runner(), agentClass=CustomRandom) -path_agent = os.path.join(path_agents, "RandomAgent") -res = runner.run(nb_episode=2, path_save=path_agent, pbar=tqdm) +path_agent = os.path.join(path_agents, "awesome_agent_logs") +res = runner.run(nb_episode=2, path_save=path_agent, pbar=tqdm, agent_seeds=[0, 1]) ep_replay = EpisodeReplay(agent_path=path_agent) for _, chron_name, cum_reward, nb_time_step, max_ts in res: ep_replay.replay_episode(chron_name, - video_name=os.path.join(path_agent, chron_name, "epidose.gif"), display=False) if False: plot_epi = EpisodeReplay(path_agent) diff --git a/grid2op/Action/ActionSpace.py b/grid2op/Action/ActionSpace.py index b2864d4be..06cd9b129 100644 --- a/grid2op/Action/ActionSpace.py +++ b/grid2op/Action/ActionSpace.py @@ -31,14 +31,17 @@ class ActionSpace(SerializableActionSpace): Attributes ---------- - game_rules: :class:`grid2op.RulesChecker.RulesChecker` + legal_action: :class:`grid2op.RulesChecker.BaseRules` Class specifying the rules of the game used to check the legality of the actions. - """ def __init__(self, gridobj, legal_action, actionClass=BaseAction): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + The actions space is created by the environment. Do not attempt to create one yourself. + All parameters (name_gen, name_load, name_line, sub_info, etc.) are used to fill the attributes having the same name. See :class:`ActionSpace` for more information. @@ -62,9 +65,9 @@ def __init__(self, gridobj, legal_action, actionClass=BaseAction): def __call__(self, dict_=None, check_legal=False, env=None): """ This utility allows you to build a valid action, with the proper sizes if you provide it with a valid - dictionnary. + dictionary. - More information about this dictionnary can be found in the :func:`Action.update` help. This dictionnary + More information about this dictionary can be found in the :func:`Action.update` help. This dictionary is not changed in this method. **NB** This is the only recommended way to make a valid, with proper dimension :class:`Action` object: @@ -83,16 +86,16 @@ def __call__(self, dict_=None, check_legal=False, env=None): Parameters ---------- - dict_ : :class:`dict` + dict_ : ``dict`` see :func:`Action.__call__` documentation for an extensive help about this parameter - check_legal: :class:`bool` + check_legal: ``bool`` is there a test performed on the legality of the action. **NB** When an object of class :class:`Action` is used, it is automatically tested for ambiguity. If this parameter is set to ``True`` then a legality test is performed. An action can be illegal if the environment doesn't allow it, for example if an agent tries to reconnect a powerline during a maintenance. - env: :class:`grid2op.Environment`, optional + env: :class:`grid2op.Environment.Environment`, optional An environment used to perform a legality check. Returns @@ -115,11 +118,18 @@ def __call__(self, dict_=None, check_legal=False, env=None): def _is_legal(self, action, env): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Whether an action is legal or not is checked by the environment at each call + to `env.step` Parameters ---------- - action - env + action: :class:`BaseAction` + The action to test + + env: :class:`grid2op.Environment.Environment` + The current environment Returns ------- diff --git a/grid2op/Action/BaseAction.py b/grid2op/Action/BaseAction.py index 54ba4355a..7374ac380 100644 --- a/grid2op/Action/BaseAction.py +++ b/grid2op/Action/BaseAction.py @@ -59,7 +59,8 @@ class BaseAction(GridObjects): - +1 force line reconnection - 0 do nothing to this line - - the third element is the switch line status vector. It is made of a vector of size :attr:`BaseAction._n_lines` and is + - the third element is the switch line status vector. It is made of a vector of size :attr:`BaseAction._n_lines` + and is interpreted as: - ``True``: change the line status @@ -82,7 +83,8 @@ class BaseAction(GridObjects): - the sixth element is a vector, representing the redispatching. Component of this vector is added to the generators active setpoint value (if set) of the first elements. - **NB** the difference between :attr:`BaseAction._set_topo_vect` and :attr:`BaseAction._change_bus_vect` is the following: + **NB** the difference between :attr:`BaseAction._set_topo_vect` and :attr:`BaseAction._change_bus_vect` is the + following: - If a component of :attr:`BaseAction._set_topo_vect` is 1, then the object (load, generator or powerline) will be moved to bus 1 of the substation to which it is connected. If it is already to bus 1 nothing will be @@ -134,7 +136,8 @@ class BaseAction(GridObjects): _set_topo_vect: :class:`numpy.ndarray`, dtype:int Similar to :attr:`BaseAction._set_line_status` but instead of affecting the status of powerlines, it affects the - bus connectivity at a substation. It has the same size as the full topological vector (:attr:`BaseAction._dim_topo`) + bus connectivity at a substation. It has the same size as the full topological vector + (:attr:`BaseAction._dim_topo`) and for each element it should be understood as: - 0 -> don't change @@ -143,8 +146,8 @@ class BaseAction(GridObjects): - -1 -> disconnect the object. _change_bus_vect: :class:`numpy.ndarray`, dtype:bool - Similar to :attr:`BaseAction._switch_line_status` but it affects the topology at substations instead of the status - of + Similar to :attr:`BaseAction._switch_line_status` but it affects the topology at substations instead of the + status of the powerline. It has the same size as the full topological vector (:attr:`BaseAction._dim_topo`) and each component should mean: @@ -194,16 +197,16 @@ class BaseAction(GridObjects): def __init__(self): """ + + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + **It is NOT recommended** to create an action with this method, use the action space + of the environment :attr:`grid2op.Environment.Environment.action_space` instead. + This is used to create an BaseAction instance. Preferably, :class:`BaseAction` should be created with :class:`ActionSpace`. - **It is NOT recommended** to create an action with this method. Please use :func:`ActionSpace.__call__` or - :func:`ActionSpace.sample` to create a valid action. - - Parameters - ---------- - gridobj: :class:`grid2op.Space.GridObjects` - Representation of the objects present in the powergrid + IMPORTANT: Use :func:`ActionSpace.__call__` or :func:`ActionSpace.sample` to generate a valid action. """ GridObjects.__init__(self) @@ -447,10 +450,19 @@ def get_topological_impact(self, powerline_status=None): is legal or not. **NB** The impacted are the elements that can potentially be impacted by the action. This does not mean they will be impacted. For examples: - * If an action from an :class:`grid2op.BaseAgent` reconnect a powerline, but this powerline is being disconnected by a hazard at the same time step, then this action will not be implemented on the grid. - However, it this powerline couldn't be reconnected for some reason (for example it was already out of order) the action will still be declared illegal, even if it has NOT impacted the powergrid. - * If an action tries to disconnect a powerline already disconnected, it will "impact" this powergrid. This means that even if the action will do nothing, it disconnecting this powerline is against the rules, then the action will be illegal. - * If an action tries to change the topology of a substation, but this substation is already at the target topology, the same mechanism applies. The action will "impact" the substation, even if, in the end, it consists of doing nothing. + + * If an action from an :class:`grid2op.BaseAgent` reconnect a powerline, but this powerline is being + disconnected by a hazard at the same time step, then this action will not be implemented on the grid. + + However, it this powerline couldn't be reconnected for some reason (for example it was already out of order) + the action will still be declared illegal, even if it has NOT impacted the powergrid. + + * If an action tries to disconnect a powerline already disconnected, it will "impact" this powergrid. + This means that even if the action will do nothing, it disconnecting this powerline is against the rules, + then the action will be illegal. + * If an action tries to change the topology of a substation, but this substation is already at the target + topology, the same mechanism applies. The action will "impact" the substation, even if, in the end, it + consists of doing nothing. Any such "change" that would be illegal is declared as "illegal" regardless of the real impact of this action on the powergrid. @@ -458,8 +470,8 @@ def get_topological_impact(self, powerline_status=None): Returns ------- lines_impacted: :class:`numpy.array`, dtype:dt_bool - A vector with the same size as the number of powerlines in the grid (:attr:`BaseAction.n_line`) with for each - component ``True`` if the line STATUS is impacted by the action, and ``False`` otherwise. See + A vector with the same size as the number of powerlines in the grid (:attr:`BaseAction.n_line`) with for + each component ``True`` if the line STATUS is impacted by the action, and ``False`` otherwise. See :attr:`BaseAction._lines_impacted` for more information. subs_impacted: :class:`numpy.array`, dtype:dt_bool @@ -516,10 +528,10 @@ def get_topological_impact(self, powerline_status=None): def reset(self): """ - Reset the action to the "do nothing" state. - Returns - ------- + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Reset the action to the "do nothing" state. """ # False(line is disconnected) / True(line is connected) @@ -565,21 +577,16 @@ def _assign_iadd_or_warn(self, attr_name, new_value): def __iadd__(self, other): """ - Add an action to this one. - Adding an action to myself is equivalent to perform myself, and then perform other. - Add will have the following properties: + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ - - it erase the previous changes to injections - - + Add an action to this one. + Adding an action to myself is equivalent to perform myself, and then perform other. Parameters ---------- other: :class:`BaseAction` - Returns - ------- - """ # deal with injections @@ -692,6 +699,9 @@ def __iadd__(self, other): def __call__(self): """ + + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This method is used to return the effect of the current action in a format understandable by the backend. This format is detailed below. @@ -796,7 +806,8 @@ def _digest_injection(self, dict_): if k in self.attr_list_set: self._dict_inj[k] = np.array(tmp_d[k]).astype(dt_float) else: - warn = "The key {} is not recognized by BaseAction when trying to modify the injections.".format(k) + warn = "The key {} is not recognized by BaseAction when trying to modify the injections." \ + "".format(k) warnings.warn(warn) def _digest_setbus(self, dict_): @@ -812,7 +823,7 @@ def _digest_setbus(self, dict_): tmp = ddict_["loads_id"] handled = True for (c_id, bus) in tmp: - if c_id >= self.n_line: + if c_id >= self.n_load: raise AmbiguousAction("Load {} doesn't exist".format(c_id)) self._set_topo_vect[self.load_pos_topo_vect[c_id]] = bus # print("self.load_pos_topo_vect[l_id] {}".format(self.load_pos_topo_vect[l_id])) @@ -855,8 +866,8 @@ def _digest_setbus(self, dict_): msg += " as keys. None where found. Current used keys are: " msg += "{}".format(sorted(ddict_.keys())) raise AmbiguousAction(msg) - elif dict_["set_bus"] is None: - pass + else: + pass else: raise AmbiguousAction( "Invalid way to set the topology. dict_[\"set_bus\"] should be a numpy array or a dictionnary.") @@ -894,7 +905,7 @@ def _digest_change_bus(self, dict_): s_id = int(s_id) beg_ = int(np.sum(self.sub_info[:s_id])) end_ = int(beg_ + self.sub_info[s_id]) - self._change_bus_vect[beg_:end_][arr] = ~self._change_bus_vect[beg_:end_][arr] + self._change_bus_vect[beg_:end_][arr] = True elif dict_["change_bus"] is None: pass else: @@ -931,7 +942,8 @@ def _digest_hazards(self, dict_): tmp = np.array(tmp) except: raise AmbiguousAction( - "You ask to perform hazard on powerlines, this can only be done if \"hazards\" is castable into a numpy ndarray") + "You ask to perform hazard on powerlines, this can only be done if \"hazards\" can be casted " + "into a numpy ndarray") if np.issubdtype(tmp.dtype, np.dtype(bool).type): if len(tmp) != self.n_line: raise InvalidNumberOfLines( @@ -956,7 +968,8 @@ def _digest_maintenance(self, dict_): tmp = np.array(tmp) except: raise AmbiguousAction( - "You ask to perform maintenance on powerlines, this can only be done if \"maintenance\" is castable into a numpy ndarray") + "You ask to perform maintenance on powerlines, this can only be done if \"maintenance\" can " + "be casted into a numpy ndarray") if np.issubdtype(tmp.dtype, np.dtype(bool).type): if len(tmp) != self.n_line: raise InvalidNumberOfLines( @@ -982,12 +995,13 @@ def _digest_change_status(self, dict_): tmp = np.array(tmp) except: raise AmbiguousAction( - "You ask to change the bus status, this can only be done if \"change_status\" is castable into a numpy ndarray") + "You ask to change the bus status, this can only be done if \"change_status\" can be casted " + "into a numpy ndarray") if np.issubdtype(tmp.dtype, np.dtype(bool).type): if len(tmp) != self.n_line: raise InvalidNumberOfLines( - "This \"change_line_status\" action acts on {} lines while there are {} in the _grid".format( - len(tmp), self.n_line)) + "This \"change_line_status\" action acts on {} lines while there are {} in the _grid" + "".format(len(tmp), self.n_line)) elif not np.issubdtype(tmp.dtype, np.dtype(int).type): raise AmbiguousAction("You can only change line status with int or boolean numpy array vector.") self._switch_line_status[dict_["change_line_status"]] = True @@ -1056,10 +1070,9 @@ def _digest_redispatching(self, dict_): def _reset_vect(self): """ - Need to be called when update is called ! + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ - Returns - ------- + Need to be called when update is called ! """ self._vectorized = None @@ -1419,11 +1432,12 @@ def _check_for_ambiguity(self): raise InvalidLineStatus("You ask to reconnect powerline {} yet didn't tell on" " which bus.".format(q_id)) - - if np.any(self._set_topo_vect[self.line_ex_pos_topo_vect][self._set_topo_vect[self.line_or_pos_topo_vect] == -1] > 0): + disco_or = self._set_topo_vect[self.line_or_pos_topo_vect] == -1 + if np.any(self._set_topo_vect[self.line_ex_pos_topo_vect][disco_or] > 0): raise InvalidLineStatus("A powerline is connected (set to a bus at extremity end) and " "disconnected (set to bus -1 at origin end)") - if np.any(self._set_topo_vect[self.line_or_pos_topo_vect][self._set_topo_vect[self.line_ex_pos_topo_vect] == -1] > 0): + disco_ex = self._set_topo_vect[self.line_ex_pos_topo_vect] == -1 + if np.any(self._set_topo_vect[self.line_or_pos_topo_vect][disco_ex] > 0): raise InvalidLineStatus("A powerline is connected (set to a bus at origin end) and " "disconnected (set to bus -1 at extremity end)") @@ -1717,8 +1731,8 @@ def impact_on_objects(self): def as_dict(self): """ Represent an action "as a" dictionary. This dictionary is useful to further inspect on which elements - the actions had an impact. It is not recommended to use it as a way to serialize actions. The "do nothing" action - should always be represented by an empty dictionary. + the actions had an impact. It is not recommended to use it as a way to serialize actions. The "do nothing" + action should always be represented by an empty dictionary. The following keys (all optional) are present in the results: @@ -1773,7 +1787,6 @@ def as_dict(self): res: ``dict`` The action represented as a dictionary. See above for a description of it. - """ res = {} @@ -1953,7 +1966,8 @@ def effect_on(self, _sentinel=None, load_id=None, gen_id=None, line_id=None, sub - If "set_bus" is 1, then the object (load, generator or powerline) will be moved to bus 1 of the substation to which it is connected. If it is already to bus 1 nothing will be done. If it's on another bus it will connect it to bus 1. It's disconnected, it will reconnect it and connect it to bus 1. - - If "change_bus" is True, then the object will be moved from one bus to another. If the object were on bus 1 + - If "change_bus" is True, then the object will be moved from one bus to another. If the object were on + bus 1 then it will be moved on bus 2, and if it were on bus 2, it will be moved on bus 1. If the object were disconnected, then it will be connected to the affected bus. diff --git a/grid2op/Action/DontAct.py b/grid2op/Action/DontAct.py index 1a3dca491..11e97e3f1 100644 --- a/grid2op/Action/DontAct.py +++ b/grid2op/Action/DontAct.py @@ -11,6 +11,10 @@ class DontAct(PlayableAction): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This type of action is only compatible with "do nothing"... + This class is model the action where you force someone to do absolutely nothing. It is not the "do nothing" action. @@ -23,6 +27,8 @@ class DontAct(PlayableAction): def __init__(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + See the definition of :func:`BaseAction.__init__` and of :class:`BaseAction` for more information. Nothing more is done in this constructor. diff --git a/grid2op/Action/PlayableAction.py b/grid2op/Action/PlayableAction.py index ea6207c7a..c6e5fbdb7 100644 --- a/grid2op/Action/PlayableAction.py +++ b/grid2op/Action/PlayableAction.py @@ -45,9 +45,10 @@ def __init__(self): "redispatch": self._digest_redispatching } - def __call__(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Compare to the ancestor :func:`BaseAction.__call__` this type of BaseAction doesn't allow internal actions The returned tuple is same, but with empty dictionaries for internal actions @@ -85,6 +86,8 @@ def __call__(self): def update(self, dict_): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Similar to :class:`BaseAction`, except that the allowed entries are limited to the playable action set Parameters diff --git a/grid2op/Action/SerializableActionSpace.py b/grid2op/Action/SerializableActionSpace.py index 41877e786..21ae757ef 100644 --- a/grid2op/Action/SerializableActionSpace.py +++ b/grid2op/Action/SerializableActionSpace.py @@ -33,8 +33,18 @@ class SerializableActionSpace(SerializableSpace): action (see :func:`Action.size`) or to sample a new Action (see :func:`grid2op.Action.Action.sample`) """ + SET_STATUS = 0 + CHANGE_STATUS = 1 + SET_BUS = 2 + CHANGE_BUS = 3 + REDISPATCHING = 4 + def __init__(self, gridobj, actionClass=BaseAction): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + The :class:`grid2op.Environment.Environment` is responsible for the creation of the + action space. Do not attempt to make one yourself. Parameters ---------- @@ -54,6 +64,8 @@ def __init__(self, gridobj, actionClass=BaseAction): @staticmethod def from_dict(dict_): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Allows the de-serialization of an object stored as a dictionary (for example in the case of JSON saving). Parameters @@ -66,84 +78,142 @@ def from_dict(dict_): res: :class:``SerializableActionSpace`` An instance of an action space matching the dictionary. - """ tmp = SerializableSpace.from_dict(dict_) res = SerializableActionSpace(gridobj=tmp, actionClass=tmp.subtype) return res + def _get_possible_action_types(self): + rnd_types = [] + if "set_line_status" in self.actionClass.authorized_keys: + rnd_types.append(self.SET_STATUS) + if "change_line_status" in self.actionClass.authorized_keys: + rnd_types.append(self.CHANGE_STATUS) + if "set_bus" in self.actionClass.authorized_keys: + rnd_types.append(self.SET_BUS) + if "change_bus" in self.actionClass.authorized_keys: + rnd_types.append(self.CHANGE_BUS) + if "redispatch" in self.actionClass.authorized_keys: + rnd_types.append(self.REDISPATCHING) + return rnd_types + + def _sample_set_line_status(self, rnd_update=None): + if rnd_update is None: + rnd_update = {} + rnd_line = self.space_prng.randint(self.n_line) + rnd_status = self.space_prng.choice([1, -1]) + rnd_update["set_line_status"] = [(rnd_line, rnd_status)] + return rnd_update + + def _sample_change_line_status(self, rnd_update=None): + if rnd_update is None: + rnd_update = {} + rnd_line = self.space_prng.randint(self.n_line) + rnd_update["change_line_status"] = [rnd_line] + return rnd_update + + def _sample_set_bus(self, rnd_update=None): + if rnd_update is None: + rnd_update = {} + rnd_sub = self.space_prng.randint(self.n_sub) + sub_size = self.sub_info[rnd_sub] + rnd_topo = self.space_prng.choice([-1, 0, 1, 2], sub_size) + rnd_update["set_bus"] = {"substations_id": [(rnd_sub, rnd_topo)] } + return rnd_update + + def _sample_change_bus(self, rnd_update=None): + if rnd_update is None: + rnd_update = {} + rnd_sub = self.space_prng.randint(self.n_sub) + sub_size = self.sub_info[rnd_sub] + rnd_topo = self.space_prng.choice([0, 1], sub_size) + rnd_update["change_bus"] = {"substations_id": [(rnd_sub, rnd_topo)] } + return rnd_update + + def _sample_redispatch(self, rnd_update=None): + if rnd_update is None: + rnd_update = {} + gens = np.arange(self.n_gen)[self.gen_redispatchable] + rnd_gen = self.space_prng.choice(gens) + rd = -self.gen_max_ramp_down[rnd_gen] + ru = self.gen_max_ramp_up[rnd_gen] + rnd_gen_disp = (ru - rd) * self.space_prng.random() + rd + rnd_disp = np.zeros(self.n_gen) + rnd_disp[rnd_gen] = rnd_gen_disp + rnd_update["redispatch"] = rnd_disp + return rnd_update + def sample(self): """ - A utility used to sample a new random :class:`Action`. + A utility used to sample a new random :class:`BaseAction`. + + The sampled action is unitary: It has an impact on a single line/substation/generator. - The sampled action is unitary: - It has an impact on a single line/substation/generator. + There is no guarantee concerning the "legality" of the action (see the description of the + Action module for more information about illegal action). + + It will only act by doing action supported by the action space. For example, if the action space + does not support "redispatching" then this method will NOT sample any redispatching action. Returns ------- res: :class:`BaseAction` A random action sampled from the :attr:`ActionSpace.actionClass` + Examples + --------- + The first usage is to sample uniform **unary** actions, you can do this with the + following: + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # and now you can sample from the action space + random_action = env.action_space.sample() + + *Note* that the random action can be illegal depending on the game rules defined in the + rules :class:`grid2op.Rules` + + If for some reason you want to sample more complex actions, you can do this the following way: + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # and now you can sample from the action space + random_action = env.action_space() + for i in range(5): + # my resulting action will be a complex action + # that will be the results of applying 5 random actions + random_action += env.action_space.sample() + print(random_action) + """ rnd_act = self.actionClass() - - rnd_types = [] - if "set_line_status" in self.actionClass.authorized_keys: - rnd_types.append(0) - if "change_line_status" in self.actionClass.authorized_keys: - rnd_types.append(1) - if "set_bus" in self.actionClass.authorized_keys: - rnd_types.append(2) - if "change_bus" in self.actionClass.authorized_keys: - rnd_types.append(3) - if "redispatch" in self.actionClass.authorized_keys: - rnd_types.append(4) # Cannot sample this space, return do nothing + rnd_types = self._get_possible_action_types() if not len(rnd_types): return rnd_act + # this sampling rnd_type = self.space_prng.choice(rnd_types) - if rnd_type == 0: - rnd_line = self.space_prng.randint(self.n_line) - rnd_status = self.space_prng.choice([1, -1]) - rnd_update = { - "set_line_status": [(rnd_line, rnd_status)] - } - elif rnd_type == 1: - rnd_line = self.space_prng.randint(self.n_line) - rnd_update = { - "change_line_status": [rnd_line] - } - elif rnd_type == 2: - rnd_sub = self.space_prng.randint(self.n_sub) - sub_size = self.sub_info[rnd_sub] - rnd_topo = self.space_prng.choice([-1, 0, 1, 2], sub_size) - rnd_update = { - "set_bus": { - "substations_id": [(rnd_sub, rnd_topo)] - } - } - elif rnd_type == 3: - rnd_sub = self.space_prng.randint(self.n_sub) - sub_size = self.sub_info[rnd_sub] - rnd_topo = self.space_prng.choice([0, 1], sub_size) - rnd_update = { - "change_bus": { - "substations_id": [(rnd_sub, rnd_topo)] - } - } + + if rnd_type == self.SET_STATUS: + rnd_update = self._sample_set_line_status() + elif rnd_type == self.CHANGE_STATUS: + rnd_update = self._sample_change_line_status() + elif rnd_type == self.SET_BUS: + rnd_update = self._sample_set_bus() + elif rnd_type == self.CHANGE_BUS: + rnd_update = self._sample_change_bus() + elif rnd_type == self.REDISPATCHING: + rnd_update = self._sample_redispatch() else: - gens = np.arange(self.n_gen)[self.gen_redispatchable] - rnd_gen = self.space_prng.choice(gens) - ru = -self.gen_max_ramp_down[rnd_gen] - rd = self.gen_max_ramp_up[rnd_gen] - rnd_gen_disp = (ru - rd) * self.space_prng.random() + rd - rnd_disp = np.zeros(self.n_gen) - rnd_disp[rnd_gen] = rnd_gen_disp - rnd_update = { - "redispatch" : rnd_disp - } + raise Grid2OpException("Impossible to sample action of type {}".format(rnd_type)) rnd_act.update(rnd_update) return rnd_act @@ -161,13 +231,41 @@ def disconnect_powerline(self, line_id=None, line_name=None, previous_action=Non Name of the powerline. Note that either line_id or line_name should be provided. If both are provided, it is an error, if none are provided it is an error. - previous_action + previous_action: :class:`BaseAction` + If you want to stack up multiple actions. Returns ------- res: :class:`BaseAction` The action that will disconnect the powerline. + Notes + ------ + If you use `previous_action` it will modify the action **in place** which means that + `previous_action` will be modified by this method. + + Examples + --------- + You can use it this way: + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # and now you can disconnect line 0 + disco_line_0 = env.action_space.disconnect_powerline(line_id=0) + + # or line with name "0_4_1" + disco_line_1 = env.action_space.disconnect_powerline(line_name="0_4_1") + + # and you can disconnect both line 2 and 3 with: + disco_line_2 = env.action_space.disconnect_powerline(line_id=2) + disco_line_2_and_3 = env.action_space.disconnect_powerline(line_id=3, previous_action=disco_line_2) + print(disco_line_2_and_3) + # be careful, "disco_line_2" is affected and is in fact equal to "disco_line_2_and_3" + # after the last call! + """ if line_id is None and line_name is None: raise AmbiguousAction("You need to provide either the \"line_id\" or the \"line_name\" of the powerline " @@ -202,6 +300,18 @@ def reconnect_powerline(self, bus_or, bus_ex, line_id=None, line_name=None, prev Note that in case "bus_or" or "bus_ex" are not the current bus to which the powerline is connected, they will be affected by this action. + Notes + ------ + This utility requires you to specify on which bus you want to connect each end + ("*origin*" or "*extremity*") of the powerline you want to reconnect. + + If you don't want to specify them, you can set them to ``0`` and it will reconnect them + to the last known buses to which they were connected (this is automatically done by the + Environment since version `0.8.0`). + + If you use `previous_action` it will modify the action **in place** which means that + `previous_action` will be modified by this method. + Parameters ---------- line_id: ``int`` @@ -219,6 +329,30 @@ def reconnect_powerline(self, bus_or, bus_ex, line_id=None, line_name=None, prev res: :class:`BaseAction` The action that will reconnect the powerline. + Examples + --------- + You can use it this way: + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # and now you can reconnect line 0 + reco_line_0 = env.action_space.reconnect_powerline(line_id=0, bus_or=1, bus_ex=0) + + # or line with name "0_4_1" to bus 1 on its "origin" end and bus 2 on its "extremity" end + reco_line_1 = env.action_space.reconnect_powerline(line_name="0_4_1", bus_or=1, bus_ex=2) + + # and you can reconnect both line 2 and 3 with: + reco_line_2 = env.action_space.reconnect_powerline(line_id=2, bus_or=1, bus_ex=2) + reco_line_2_and_3 = env.action_space.reconnect_powerline(line_id=3, + bus_or=0, bus_ex=1, + previous_action=reco_line_2) + print(reco_line_2_and_3) + # be careful, "reco_line_2" is affected and is in fact equal to "reco_line_2_and_3" + # after the last call! + """ if line_id is None and line_name is None: raise AmbiguousAction("You need to provide either the \"line_id\" or the \"line_name\" of the powerline " @@ -263,11 +397,16 @@ def change_bus(self, name_element, extremity=None, substation=None, type_element "or" or "ex" for origin or extremity, ignored if an element is not a powerline. substation: ``int``, optional Its substation ID, if you know it will increase the performance. Otherwise, the method will search for it. - type_element: ``int``, optional + type_element: ``str``, optional Type of the element to look for. It is here to speed up the computation. One of "line", "gen" or "load" previous_action: :class:`Action`, optional The (optional) action to update. It should be of the same type as :attr:`ActionSpace.actionClass` + Notes + ------ + If you use `previous_action` it will modify the action **in place** which means that + `previous_action` will be modified by this method. + Returns ------- res: :class:`BaseAction` @@ -278,6 +417,34 @@ def change_bus(self, name_element, extremity=None, substation=None, type_element res :class:`grid2op.Exception.AmbiguousAction` If *previous_action* has not the same type as :attr:`ActionSpace.actionClass`. + Examples + --------- + You can use it this way: + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # change bus of element named 'gen_1_0' + change_gen_0 = env.action_space.change_bus('gen_1_0', type_element="gen") + + # you are not forced to specify the element types + change_load_1 = env.action_space.change_bus('load_2_1') + + # dealing with powerline, you can affect one of its extremity + # (handy when you don't know on which substation it is located) + change_line_8_or = env.action_space.change_bus('5_11_8', extremity="or") + + # and you can combine the action with + change_line_14_ex = env.action_space.change_bus('12_13_14', extremity="ex") + change_line_14_ex_load_2 = env.action_space.change_bus("load_3_2", + previous_action=change_line_14_ex) + print(change_line_14_ex_load_2) + # be careful, "change_line_14_ex" is affected and is in fact equal to + # "change_line_14_ex_load_2" + # after the last call! + """ if previous_action is None: res = self.actionClass() @@ -290,9 +457,10 @@ def change_bus(self, name_element, extremity=None, substation=None, type_element dict_, to_sub_pos, my_id, my_sub_id = self._extract_dict_action(name_element, extremity, substation, type_element, res) - dict_["change_bus"][to_sub_pos[my_id]] = True - res.update({"change_bus": {"substations_id": [(my_sub_id, dict_["change_bus"])]}}) - # res.update(dict_) + arr_ = dict_["change_bus"] + me_id_ = to_sub_pos[my_id] + arr_[me_id_] = True + res.update({"change_bus": {"substations_id": [(my_sub_id, arr_)]}}) return res def _extract_database_powerline(self, extremity): @@ -316,17 +484,7 @@ def _extract_dict_action(self, name_element, extremity=None, substation=None, ty to_sub_pos = None to_name = None - if type_element == "line": - to_subid, to_sub_pos, to_name = self._extract_database_powerline(extremity) - elif type_element[:3] == "gen" or type_element[:4] == "prod": - to_subid = self.gen_to_subid - to_sub_pos = self.gen_to_sub_pos - to_name = self.name_gen - elif type_element == "load": - to_subid = self.load_to_subid - to_sub_pos = self.load_to_sub_pos - to_name = self.name_load - elif type_element is None: + if type_element is None: # i have to look through all the objects to find it if name_element in self.name_load: to_subid = self.load_to_subid @@ -342,6 +500,16 @@ def _extract_dict_action(self, name_element, extremity=None, substation=None, ty AmbiguousAction( "Element \"{}\" not found in the powergrid".format( name_element)) + elif type_element == "line": + to_subid, to_sub_pos, to_name = self._extract_database_powerline(extremity) + elif type_element[:3] == "gen" or type_element[:4] == "prod": + to_subid = self.gen_to_subid + to_sub_pos = self.gen_to_sub_pos + to_name = self.name_gen + elif type_element == "load": + to_subid = self.load_to_subid + to_sub_pos = self.load_to_sub_pos + to_name = self.name_load else: raise AmbiguousAction("unknown type_element specifier \"{}\". type_element should be \"line\" or \"load\" " "or \"gen\"".format(extremity)) @@ -398,6 +566,34 @@ def set_bus(self, name_element, new_bus, extremity=None, substation=None, type_e AmbiguousAction If *previous_action* has not the same type as :attr:`ActionSpace.actionClass`. + Examples + --------- + You can use it this way: + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # set bus of element named 'gen_1_0' to bus 2 + setbus_gen_0 = env.action_space.set_bus('gen_1_0', new_bus=2, type_element="gen") + + # are not forced to specify the element types (example with load set to bus 1) + setbus_load_1 = env.action_space.set_bus('load_2_1', new_bus=1) + + # dealing with powerline, you can affect one of its extremity + # (handy when you don't know on which substation it is located) + setbus_line_8_or = env.action_space.set_bus('5_11_8', new_bus=1, extremity="or") + + # and you can combine the actions with: + setbus_line_14_ex = env.action_space.set_bus('12_13_14', new_bus=2, extremity="ex") + setbus_line_14_ex_load_2 = env.action_space.set_bus("load_3_2", new_bus=1, + previous_action=setbus_line_14_ex) + print(setbus_line_14_ex_load_2) + # be careful, "setbus_line_14_ex" is affected and is in fact equal to + # "setbus_line_14_ex_load_2" + # after the last call! + """ if previous_action is None: res = self.actionClass() @@ -436,6 +632,28 @@ def get_change_line_status_vect(self): @staticmethod def get_all_unitary_line_set(action_space): + """ + Return all unitary actions that "set" powerline status. + + For each powerline, there are 5 such actions: + + - disconnect it + - connected it origin at bus 1 and extremity at bus 1 + - connected it origin at bus 1 and extremity at bus 2 + - connected it origin at bus 2 and extremity at bus 1 + - connected it origin at bus 2 and extremity at bus 2 + + Parameters + ---------- + action_space: :class:`grid2op.BaseAction.ActionSpace` + The action space used. + + Returns + ------- + res: ``list`` + The list of all "set" action acting on powerline status + + """ res = [] # powerline switch: disconnection @@ -453,6 +671,22 @@ def get_all_unitary_line_set(action_space): @staticmethod def get_all_unitary_line_change(action_space): + """ + Return all unitary actions that "change" powerline status. + + For each powerline, there is only one such action that consist in change its status. + + Parameters + ---------- + action_space: :class:`grid2op.BaseAction.ActionSpace` + The action space used. + + Returns + ------- + res: ``list`` + The list of all "change" action acting on powerline status + + """ res = [] for i in range(action_space.n_line): @@ -463,7 +697,7 @@ def get_all_unitary_line_change(action_space): return res @staticmethod - def get_all_unitary_topologies_change(action_space): + def get_all_unitary_topologies_change(action_space, sub_id=None): """ This methods allows to compute and return all the unitary topological changes that can be performed on a powergrid. @@ -472,18 +706,43 @@ def get_all_unitary_topologies_change(action_space): Parameters ---------- - action_space: :class:`grid2op.BaseAction.ActionHelper` + action_space: :class:`grid2op.BaseAction.ActionSpace` The action space used. + sub_id: ``int``, optional + The substation ID. If ``None`` it is done for all substations. + + Notes + ----- + This might take a long time on large grid (possibly 10-15 mins for the IEEE 118 for example) + Returns ------- res: ``list`` The list of all the topological actions that can be performed. + Examples + --------- + You can use it this way: + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # all "change bus" action for all the substations + all_change_actions = env.action_space.get_all_unitary_topologies_change(env.action_space) + + # you can only study "change_bus" action for a given substation (can dramatically improve the computation time) + all_change_actions_sub4 = env.action_space.get_all_unitary_topologies_change(env.action_space, sub_id=4) + """ res = [] S = [0, 1] - for sub_id, num_el in enumerate(action_space.sub_info): + for sub_id_, num_el in enumerate(action_space.sub_info): + if sub_id is not None: + if sub_id_ != sub_id: + continue already_set = set() # remove the "do nothing" action, which is either equivalent to not change anything # or to change everything @@ -495,7 +754,7 @@ def get_all_unitary_topologies_change(action_space): # tup = np.array((0, *tup)).astype(dt_bool) # add a zero to first element -> break symmetry tup = np.array(tup_).astype(dt_bool) # add a zero to first element -> break symmetry indx[tup] = True - action = action_space({"change_bus": {"substations_id": [(sub_id, indx)]}}) + action = action_space({"change_bus": {"substations_id": [(sub_id_, indx)]}}) already_set.add(tup_) already_set.add(tuple([1-el for el in tup_])) res.append(action) @@ -504,7 +763,7 @@ def get_all_unitary_topologies_change(action_space): return res @staticmethod - def get_all_unitary_topologies_set(action_space): + def get_all_unitary_topologies_set(action_space, sub_id=None): """ This methods allows to compute and return all the unitary topological changes that can be performed on a powergrid. @@ -517,23 +776,49 @@ def get_all_unitary_topologies_set(action_space): action_space: :class:`grid2op.BaseAction.ActionHelper` The action space used. + sub_id: ``int``, optional + The substation ID. If ``None`` it is done for all substations. + + Notes + ----- + This might take a long time on large grid (possibly 10-15 mins for the IEEE 118 for example) + Returns ------- res: ``list`` The list of all the topological actions that can be performed. + Examples + --------- + You can use it this way: + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # all "set_bus" actions + all_change_actions = env.action_space.get_all_unitary_topologies_set(env.action_space) + + # you can only study "set_bus" action for a given substation (can dramatically improve the computation time) + all_change_actions_sub4 = env.action_space.get_all_unitary_topologies_set(env.action_space, sub_id=4) + """ res = [] S = [0, 1] - for sub_id, num_el in enumerate(action_space.sub_info): + for sub_id_, num_el in enumerate(action_space.sub_info): tmp = [] + if sub_id is not None: + if sub_id_ != sub_id: + continue + new_topo = np.full(shape=num_el, fill_value=1, dtype=dt_int) # perform the action "set everything on bus 1" - action = action_space({"set_bus": {"substations_id": [(sub_id, new_topo)]}}) + action = action_space({"set_bus": {"substations_id": [(sub_id_, new_topo)]}}) tmp.append(action) - powerlines_or_id = action_space.line_or_to_sub_pos[action_space.line_or_to_subid == sub_id] - powerlines_ex_id = action_space.line_ex_to_sub_pos[action_space.line_ex_to_subid == sub_id] + powerlines_or_id = action_space.line_or_to_sub_pos[action_space.line_or_to_subid == sub_id_] + powerlines_ex_id = action_space.line_ex_to_sub_pos[action_space.line_ex_to_subid == sub_id_] powerlines_id = np.concatenate((powerlines_or_id, powerlines_ex_id)) # computes all the topologies at 2 buses for this substation @@ -551,14 +836,14 @@ def get_all_unitary_topologies_set(action_space): # if there is a "node" without a powerline, the topology is not valid continue - action = action_space({"set_bus": {"substations_id": [(sub_id, new_topo)]}}) + action = action_space({"set_bus": {"substations_id": [(sub_id_, new_topo)]}}) tmp.append(action) else: # i need to take into account the case where 1 powerline is alone on a bus too if np.sum(indx[powerlines_id]) >= 1 and np.sum(~indx[powerlines_id]) >= 1: new_topo = np.full(shape=num_el, fill_value=1, dtype=dt_int) new_topo[~indx] = 2 - action = action_space({"set_bus": {"substations_id": [(sub_id, new_topo)]}}) + action = action_space({"set_bus": {"substations_id": [(sub_id_, new_topo)]}}) tmp.append(action) if len(tmp) >= 2: @@ -569,7 +854,33 @@ def get_all_unitary_topologies_set(action_space): return res @staticmethod - def get_all_unitary_redispatch(action_space): + def get_all_unitary_redispatch(action_space, num_down=5, num_up=5): + """ + Redispatching action are continuous action. This method is an helper to convert the continuous + action into discrete action (by rounding). + + The number of actions is equal to num_down + num_up (by default 10) per dispatchable generator. + + + This method acts as followed: + + - it will divide the interval [-gen_max_ramp_down, 0] into `num_down`, each will make + a distinct action (then counting `num_down` different action, because 0.0 is removed) + - it will do the same for [0, gen_maw_ramp_up] + + + Parameters + ---------- + action_space: :class:`grid2op.BaseAction.ActionHelper` + The action space used. + + Returns + ------- + res: ``list`` + The list of all discretized redispatching actions. + + """ + res = [] n_gen = len(action_space.gen_redispatchable) @@ -579,11 +890,11 @@ def get_all_unitary_redispatch(action_space): continue # Create evenly spaced positive interval - ramps_up = np.linspace(0.0, action_space.gen_max_ramp_up[gen_idx], num=5) - ramps_up = ramps_up[1:] # Exclude redispatch of 0MW + ramps_up = np.linspace(0.0, action_space.gen_max_ramp_up[gen_idx], num=num_up) + ramps_up = ramps_up[1:] # Exclude redispatch of 0MW # Create evenly spaced negative interval - ramps_down = np.linspace(-action_space.gen_max_ramp_down[gen_idx], 0.0, num=5) + ramps_down = np.linspace(-action_space.gen_max_ramp_down[gen_idx], 0.0, num=num_down) ramps_down = ramps_down[:-1] # Exclude redispatch of 0MW # Merge intervals diff --git a/grid2op/Action/VoltageOnlyAction.py b/grid2op/Action/VoltageOnlyAction.py index ec1030aaf..a8e768593 100644 --- a/grid2op/Action/VoltageOnlyAction.py +++ b/grid2op/Action/VoltageOnlyAction.py @@ -14,7 +14,11 @@ class VoltageOnlyAction(BaseAction): """ - This class is here to serve as a base class for the controler of the voltages (if any). It allows to perform + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is the main class used by the voltage controller. + + This class is here to serve as a base class for the controller of the voltages (if any). It allows to perform only modification of the generator voltage set point. Only action of type "injection" are supported, and only setting "prod_v" keyword. @@ -45,6 +49,8 @@ def __init__(self): def _check_dict(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Check that nothing, beside prod_v has been updated with this action. Returns @@ -59,6 +65,8 @@ def _check_dict(self): def update(self, dict_): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + As its original implementation, this method allows modifying the way a dictionary can be mapped to a valid :class:`BaseAction`. diff --git a/grid2op/Action/_BackendAction.py b/grid2op/Action/_BackendAction.py index 3c6a7bb19..54feffbe5 100644 --- a/grid2op/Action/_BackendAction.py +++ b/grid2op/Action/_BackendAction.py @@ -15,6 +15,10 @@ # TODO see if it can be done in c++ easily class ValueStore: + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + """ def __init__(self, size, dtype): ## TODO at the init it's mandatory to have everything at "1" here # if topo is not "fully connected" it will not work @@ -161,6 +165,8 @@ def reorder(self, new_order): class _BackendAction(GridObjects): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Internal class, use at your own risk. This class "digest" the players / environment / opponent / voltage controlers "action", @@ -195,7 +201,10 @@ def __init__(self): self._status_ex = np.ones(self.n_line, dtype=dt_int) def reorder(self, no_load, no_gen, no_topo, no_shunt): - """reorder the element modified, this is use when converting backends only and should not be use + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + reorder the element modified, this is use when converting backends only and should not be use outside of this usecase no_* stands for "new order" @@ -255,6 +264,8 @@ def set_redispatch(self, new_redispatching): def __iadd__(self, other): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + other: a grid2op action standard Parameters @@ -372,6 +383,8 @@ def _get_active_bus(self): def update_state(self, powerline_disconnected): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Update the internal state. Should be called after the cascading failures """ diff --git a/grid2op/Agent/DeltaRedispatchRandomAgent.py b/grid2op/Agent/DeltaRedispatchRandomAgent.py index c869fc0e5..906354a99 100644 --- a/grid2op/Agent/DeltaRedispatchRandomAgent.py +++ b/grid2op/Agent/DeltaRedispatchRandomAgent.py @@ -1,24 +1,40 @@ +# Copyright (c) 2019-2020, RTE (https://www.rte-france.com) +# See AUTHORS.txt +# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. +# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, +# you can obtain one at http://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. + import numpy as np from grid2op.Agent import BaseAgent -class DeltaRedispatchRandomAgent(BaseAgent): - def __init__(self, action_space, - n_gens_to_redispatch=2, - redispatching_delta=1.0): - """ - Agent constructor - Parameters - ---------- - :action_space: :class:`grid2op.Action.ActionSpace` - the Grid2Op action space +class DeltaRedispatchRandomAgent(BaseAgent): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Used for test. Prefer using a random agent by selecting only the redispatching action + that you want. + + This agent will perform some redispatch of a given amount among randomly selected dispatchable + generators. - :n_gens_to_redispatch: `int` - The maximum number of dispatchable generators to play with + Parameters + ---------- + action_space: :class:`grid2op.Action.ActionSpace` + the Grid2Op action space - :redispatching_delta: `float` - The redispatching MW value used in both directions - """ + n_gens_to_redispatch: `int` + The maximum number of dispatchable generators to play with + + redispatching_delta: `float` + The redispatching MW value used in both directions + + """ + def __init__(self, action_space, + n_gens_to_redispatch=2, + redispatching_delta=1.0): super().__init__(action_space) self.desired_actions = [] @@ -54,24 +70,6 @@ def __init__(self, action_space, self.desired_actions.append(act1) self.desired_actions.append(act2) - def act(self, observation, reward, done=False): - """ - Parameters - ---------- - observation: :class:`grid2op.Observation.Observation` - The current observation of the - :class:`grid2op.Environment.Environment` - reward: ``float`` - The current reward. - This is the reward obtained by the previous action - done: ``bool`` - Whether the episode has ended or not. - Used to maintain gym compatibility - Returns - ------- - res: :class:`grid2op.Action.Action` - The action chosen by agent. - """ - - return self.space_prng.choice(self.desired_actions) + act = self.space_prng.choice(self.desired_actions) + return act diff --git a/grid2op/Agent/DoNothing.py b/grid2op/Agent/DoNothing.py index e13e1eb6b..8776be937 100644 --- a/grid2op/Agent/DoNothing.py +++ b/grid2op/Agent/DoNothing.py @@ -12,6 +12,10 @@ class DoNothingAgent(BaseAgent): """ This is the most basic BaseAgent. It is purely passive, and does absolutely nothing. + + As opposed to most reinforcement learning environments, in grid2op, doing nothing is often + the best solution. + """ def __init__(self, action_space): BaseAgent.__init__(self, action_space) @@ -21,10 +25,9 @@ def act(self, observation, reward, done=False): As better explained in the document of :func:`grid2op.BaseAction.update` or :func:`grid2op.BaseAction.ActionSpace.__call__`. - The preferred way to make an object of type action is to call :func:`grid2op.BaseAction.ActionSpace.__call__` with - the - dictionnary representing the action. In this case, the action is "do nothing" and it is represented by the - empty dictionnary. + The preferred way to make an object of type action is to call :func:`grid2op.BaseAction.ActionSpace.__call__` + with the dictionary representing the action. In this case, the action is "do nothing" and it is represented by + the empty dictionary. Parameters ---------- diff --git a/grid2op/Agent/GreedyAgent.py b/grid2op/Agent/GreedyAgent.py index 6cc75c0e6..20f8e16fe 100644 --- a/grid2op/Agent/GreedyAgent.py +++ b/grid2op/Agent/GreedyAgent.py @@ -71,7 +71,6 @@ def _get_tested_action(self, observation): From this list, the one that achieve the best "simulated reward" is used. - Parameters ---------- observation: :class:`grid2op.Observation.Observation` diff --git a/grid2op/Agent/OneChangeThenNothing.py b/grid2op/Agent/OneChangeThenNothing.py index db82447d0..ce90db53e 100644 --- a/grid2op/Agent/OneChangeThenNothing.py +++ b/grid2op/Agent/OneChangeThenNothing.py @@ -25,15 +25,14 @@ class OneChangeThenNothing(BaseAgent): Examples --------- - We advise to use this class as following .. code-block:: python - import grid2op from grid2op.Agent import OneChangeThenNothing - acts_dict_ = [{}, {"set_line_status": [(0,-1)]}] # list of dictionnaries. Each dictionnaries representing a valid action + acts_dict_ = [{}, {"set_line_status": [(0,-1)]}] # list of dictionaries. Each dictionary + # represents a valid action env = grid2op.make() # create an environment for act_as_dict in zip(acts_dict_): @@ -55,21 +54,6 @@ def __init__(self, action_space): self.do_nothing_action = self.action_space({}) def act(self, observation, reward, done=False): - """ - If this agent had not acted, then it does (first time step). - - Afterwards it does nothing. - - Parameters - ---------- - observation - reward - done - - Returns - ------- - - """ if self.has_changed: res = self.do_nothing_action else: @@ -82,7 +66,7 @@ def reset(self, obs): def _get_dict_act(self): """ - Function that need to be overridden to indicate which action to perfom. + Function that need to be overridden to indicate which action to perform. Returns ------- @@ -92,12 +76,13 @@ def _get_dict_act(self): """ return self.my_dict - @classmethod def gen_next(cls, dict_): """ This function allows to change the dictionnary of the action that the agent will perform. + See the class level documentation for an example on how to use this. + Parameters ---------- dict_: ``dict`` diff --git a/grid2op/Agent/PowerlineSwitch.py b/grid2op/Agent/PowerlineSwitch.py index 125a7887c..738de668e 100644 --- a/grid2op/Agent/PowerlineSwitch.py +++ b/grid2op/Agent/PowerlineSwitch.py @@ -18,9 +18,11 @@ class PowerLineSwitch(GreedyAgent): It will choose among: - doing nothing - - disconnecting one powerline + - changing the status of one powerline - which action that will maximize the reward. All powerlines are tested. + which action that will maximize the simulated reward. All powerlines are tested at each steps. This means + that if `n` is the number of powerline on the grid, at each steps this actions will perform `n` +1 + calls to "simulate" (one to do nothing and one that change the status of each powerline) """ diff --git a/grid2op/Agent/RandomAgent.py b/grid2op/Agent/RandomAgent.py index bba3092f1..6ad254849 100644 --- a/grid2op/Agent/RandomAgent.py +++ b/grid2op/Agent/RandomAgent.py @@ -12,13 +12,20 @@ class RandomAgent(AgentWithConverter): """ - This agent acts randomnly on the powergrid. It uses the :class:`grid2op.Converters.IdToAct` to compute all the + This agent acts randomly on the powergrid. It uses the :class:`grid2op.Converters.IdToAct` to compute all the possible actions available for the environment. And then chooses a random one among all these. - **NB** Action are taken randomly among unary actions. For example, if a game rules allows to take actions that + Notes + ------ + Actions are taken uniformly at random among unary actions. For example, if a game rules allows to take actions that can disconnect a powerline AND modify the topology of a substation an action that do both will not be sampled by this class. + This agent is not equivalent to calling `env.action_space.sample()` because the sampling is not + done the same manner. This agent sample uniformly among all unary actions whereas + `env.action_space.sample()` (see :func:`grid2op.Action.SerializableActionSpace.sample` for more + information about the later). + """ def __init__(self, action_space, action_space_converter=IdToAct, **kwargs_converter): AgentWithConverter.__init__(self, action_space, action_space_converter, **kwargs_converter) diff --git a/grid2op/Agent/RecoPowerlineAgent.py b/grid2op/Agent/RecoPowerlineAgent.py index 505149fde..b32b51c3b 100644 --- a/grid2op/Agent/RecoPowerlineAgent.py +++ b/grid2op/Agent/RecoPowerlineAgent.py @@ -27,4 +27,3 @@ def _get_tested_action(self, observation): if np.any(can_be_reco): res = [self.action_space({"set_line_status": [(id_, +1)]}) for id_ in np.where(can_be_reco)[0]] return res - diff --git a/grid2op/Backend/Backend.py b/grid2op/Backend/Backend.py index c669cadd7..f129a68bc 100644 --- a/grid2op/Backend/Backend.py +++ b/grid2op/Backend/Backend.py @@ -16,13 +16,25 @@ import pandas as pd from grid2op.dtypes import dt_int, dt_float, dt_bool -from grid2op.Exceptions import * +from grid2op.Exceptions import EnvError, DivergingPowerFlow, IncorrectNumberOfElements, IncorrectNumberOfLoads +from grid2op.Exceptions import IncorrectNumberOfGenerators, BackendError, IncorrectNumberOfLines from grid2op.Space import GridObjects from grid2op.Action import CompleteAction class Backend(GridObjects, ABC): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Unless if you want to code yourself a backend this is not recommend to alter it + or use it directly in any way. + + If you want to code a backend, an example is given in :class:`PandaPowerBackend` ( + or in the repository lightsim2grid on github) + + This documentation is present mainly for exhaustivity. It is not recommended to manipulate a Backend + directly. Prefer using an :class:`grid2op.Environment.Environment` + This is a base class for each :class:`Backend` object. It allows to run power flow smoothly, and abstract the method of computing cascading failures. This class allow the user or the agent to interact with an power flow calculator, while relying on dedicated @@ -51,7 +63,6 @@ class Backend(GridObjects, ABC): And, if the flag :attr:Backend.shunts_data_available` is set to ``True`` the method :func:`Backend.shunt_info` should also be implemented. - In order to be valid and carry out some computations, you should call :func:`Backend.load_grid` and later :func:`grid2op.Spaces.GridObjects.assert_grid_correct`. It is also more than recommended to call :func:`Backend.assert_grid_correct_after_powerflow` after the first powerflow. This is all carried ou in the @@ -65,6 +76,7 @@ class Backend(GridObjects, ABC): thermal_limit_a: :class:`numpy.array`, dtype:float Thermal limit of the powerline in amps for each powerline. Thie thermal limit is relevant on only one side of the powerline: the same side returned by :func:`Backend.get_line_overflow` + """ env_name = "unknown" @@ -91,6 +103,10 @@ def __init__(self, detailed_infos_for_cascading_failures=False): def assert_grid_correct_after_powerflow(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is done as it should be by the Environment + This method is called by the environment. It ensure that the backend remains consistent even after a powerflow has be run with :func:`Backend.runpf` method. @@ -154,6 +170,10 @@ def assert_grid_correct_after_powerflow(self): def reset(self, grid_path, grid_filename=None): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is done in the `env.reset()` method and should be performed otherwise. + Reload the power grid. For backwards compatibility this method calls `Backend.load_grid`. But it is encouraged to overload it in the subclasses. @@ -163,6 +183,10 @@ def reset(self, grid_path, grid_filename=None): @abstractmethod def load_grid(self, path, filename=None): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is called once at the loading of the powergrid. + Load the powergrid. It should first define self._grid. @@ -185,6 +209,10 @@ def load_grid(self, path, filename=None): @abstractmethod def close(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is called by `env.close()` do not attempt to use it otherwise. + This function is called when the environment is over. After calling this function, the backend might not behave properly, and in any case should not be used before another call to :func:`Backend.load_grid` is performed @@ -197,6 +225,13 @@ def close(self): @abstractmethod def apply_action(self, action): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Don't attempt to apply an action directly to a backend. This function will modify + the powergrid state given the action in input. + + This is one of the core function if you want to code a backend. + Modify the powergrid with the action given by an agent or by the envir. For the L2RPN project, this action is mainly for topology if it has been sent by the agent. Or it can also affect production and loads, if the action is made by the environment. @@ -205,7 +240,7 @@ def apply_action(self, action): the implementation of this method. :param action: the action to be implemented on the powergrid. - :type action: :class:`grid2op.Action.Action` + :type action: :class:`grid2op.Action._BackendAction._BackendAction` :return: ``None`` """ @@ -214,6 +249,15 @@ def apply_action(self, action): @abstractmethod def runpf(self, is_dc=False): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is called by :func:`Backend.next_grid_state` (that computes some kind of + cascading failures). + + This is one of the core function if you want to code a backend. It will carry out + a powerflow. + + Run a power flow on the underlying _grid. Powerflow can be AC (is_dc = False) or DC (is_dc = True) @@ -229,6 +273,8 @@ def runpf(self, is_dc=False): @abstractmethod def copy(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Performs a deep copy of the backend. :return: An instance of Backend equal to :attr:`.self`, but deep copied. @@ -238,6 +284,8 @@ def copy(self): def save_file(self, full_path): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Save the current power _grid in a human readable format supported by the backend. The format is not modified by this wrapper. @@ -253,6 +301,10 @@ def save_file(self, full_path): @abstractmethod def get_line_status(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using :attr:`grid2op.Observation.BaseObservation.line_status` instead + Return the status of each lines (connected : True / disconnected: False ) It is assume that the order of the powerline is fixed: if the status of powerline "l1" is put at the 42nd element @@ -271,6 +323,11 @@ def get_line_status(self): @abstractmethod def get_line_flow(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using :attr:`grid2op.Observation.BaseObservation.a_or` or + :attr:`grid2op.Observation.BaseObservation.a_ex` for example + Return the current flow in each lines of the powergrid. Only one value per powerline is returned. If the AC mod is used, this shall return the current flow on the end of the powerline where there is a protection. @@ -292,6 +349,10 @@ def get_line_flow(self): def set_thermal_limit(self, limits): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + You can set the thermal limit directly in the environment. + This function is used as a convenience function to set the thermal limits :attr:`Backend.thermal_limit_a` in amperes. @@ -313,11 +374,6 @@ def set_thermal_limit(self, limits): in this dictionnary will be modified) - as value the new thermal limit (should be a strictly positive float). - - Returns - ------- - ``None`` - """ if isinstance(limits, np.ndarray): if limits.shape[0] == self.n_line: @@ -341,7 +397,13 @@ def set_thermal_limit(self, limits): def update_thermal_limit(self, env): """ - Upade the new thermal limit in case of DLR for example. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is done in a call to `env.step` in case of DLR for example. + + If you don't want this feature, do not implement it. + + Update the new thermal limit in case of DLR for example. By default it does nothing. @@ -352,21 +414,22 @@ def update_thermal_limit(self, env): weather condition are accessible by the backend. Our methodology doesn't make any assumption on the method used to get these thermal limits. - Parameters ---------- env: :class:`grid2op.Environment.Environment` The environment used to compute the thermal limit - Returns - ------- - ``None`` """ pass def get_thermal_limit(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Retrieve the thermal limit directly from the environment instead (with a call + to :func:`grid2op.Environment.BaseEnc.get_thermal_limit` for example) + Gives the thermal limit (in amps) for each powerline of the _grid. Only one value per powerline is returned. It is assumed that both :func:`Backend.get_line_flow` and *_get_thermal_limit* gives the value of the same @@ -384,6 +447,10 @@ def get_thermal_limit(self): def get_relative_flow(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using :attr:`grid2op.Observation.BaseObservation.rho` + This method return the relative flows, *eg.* the current flow divided by the thermal limits. It has a pretty straightforward default implementation, but it can be overriden for example for transformer if the limits are on the lower voltage side or on the upper voltage level. @@ -400,6 +467,15 @@ def get_relative_flow(self): def get_line_overflow(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using :attr:`grid2op.Observation.BaseObservation.rho` and + check whether or not the flow is higher tha 1. or have a look at + :attr:`grid2op.Observation.BaseObservation.timestep_overflow` and check the + non zero index. + + Prefer using the attribute of the :class:`grid2op.Observation.BaseObservation` + faster accessor to the line that are on overflow. For assumption about the order of the powerline flows return in this vector, see the help of the @@ -415,6 +491,10 @@ def get_line_overflow(self): @abstractmethod def get_topo_vect(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using :attr:`grid2op.Observation.BaseObservation.topo_vect` + Get the topology vector from the :attr:`Backend._grid`. The topology vector defines, for each object, on which bus it is connected. It returns -1 if the object is not connected. @@ -442,38 +522,59 @@ def get_topo_vect(self): @abstractmethod def generators_info(self): """ - This method is used to retrieve informations about the generators. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using :attr:`grid2op.Observation.BaseObservation.prod_p`, + :attr:`grid2op.Observation.BaseObservation.prod_q` and + :attr:`grid2op.Observation.BaseObservation.prod_v` instead. + + This method is used to retrieve information about the generators (active, reactive production + and voltage magnitude of the bus to which it is connected). Returns ------- prod_p ``numpy.ndarray`` - The active power production for each generator + The active power production for each generator (in MW) prod_q ``numpy.ndarray`` - The reactive power production for each generator + The reactive power production for each generator (in MVAr) prod_v ``numpy.ndarray`` - The voltage magnitude of the bus to which each generators is connected + The voltage magnitude of the bus to which each generators is connected (in kV) """ pass @abstractmethod def loads_info(self): """ - This method is used to retrieve informations about the loads. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using :attr:`grid2op.Observation.BaseObservation.load_p`, + :attr:`grid2op.Observation.BaseObservation.load_q` and + :attr:`grid2op.Observation.BaseObservation.load_v` instead. + + This method is used to retrieve information about the loads (active, reactive consumption + and voltage magnitude of the bus to which it is connected). Returns ------- load_p ``numpy.ndarray`` - The active power consumption for each load + The active power consumption for each load (in MW) load_q ``numpy.ndarray`` - The reactive power consumption for each load + The reactive power consumption for each load (in MVAr) load_v ``numpy.ndarray`` - The voltage magnitude of the bus to which each load is connected + The voltage magnitude of the bus to which each load is connected (in kV) """ pass @abstractmethod def lines_or_info(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using :attr:`grid2op.Observation.BaseObservation.p_or`, + :attr:`grid2op.Observation.BaseObservation.q_or`, + :attr:`grid2op.Observation.BaseObservation.a_or` and, + :attr:`grid2op.Observation.BaseObservation.v_or` instead + It returns the information extracted from the _grid at the origin end of each powerline. For assumption about the order of the powerline flows return in this vector, see the help of the @@ -482,19 +583,26 @@ def lines_or_info(self): Returns ------- p_or ``numpy.ndarray`` - the origin active power flowing on the lines + the origin active power flowing on the lines (in MW) q_or ``numpy.ndarray`` - the origin reactive power flowing on the lines + the origin reactive power flowing on the lines (in MVAr) v_or ``numpy.ndarray`` - the voltage magnitude at the origin of each powerlines + the voltage magnitude at the origin of each powerlines (in kV) a_or ``numpy.ndarray`` - the current flow at the origin of each powerlines + the current flow at the origin of each powerlines (in A) """ pass @abstractmethod def lines_ex_info(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using :attr:`grid2op.Observation.BaseObservation.p_ex`, + :attr:`grid2op.Observation.BaseObservation.q_ex`, + :attr:`grid2op.Observation.BaseObservation.a_ex` and, + :attr:`grid2op.Observation.BaseObservation.v_ex` instead + It returns the information extracted from the _grid at the extremity end of each powerline. For assumption about the order of the powerline flows return in this vector, see the help of the @@ -503,18 +611,20 @@ def lines_ex_info(self): Returns ------- p_ex ``numpy.ndarray`` - the extremity active power flowing on the lines + the extremity active power flowing on the lines (in MW) q_ex ``numpy.ndarray`` - the extremity reactive power flowing on the lines + the extremity reactive power flowing on the lines (in MVAr) v_ex ``numpy.ndarray`` - the voltage magnitude at the extremity of each powerlines + the voltage magnitude at the extremity of each powerlines (in kV) a_ex ``numpy.ndarray`` - the current flow at the extremity of each powerlines + the current flow at the extremity of each powerlines (in A) """ pass def shunt_info(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This method is optional. If implemented, it should return the proper information about the shunt in the powergrid. @@ -541,6 +651,8 @@ def shunt_info(self): def sub_from_bus_id(self, bus_id): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Optionnal method that allows to get the substation if the bus id is provided. :param bus_id: @@ -551,30 +663,41 @@ def sub_from_bus_id(self, bus_id): @abstractmethod def _disconnect_line(self, id_): """ - Disconnect the line of id "id" in the backend. - In this scenario, the *id* of a powerline is its position (counted starting from O) in the vector returned by + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using the action space to disconnect a powerline. + + + Disconnect the line of id "id\\_ " in the backend. + In this scenario, the *id\\_* of a powerline is its position (counted starting from O) in the vector returned by :func:`Backend.get_line_status` or :func:`Backend.get_line_flow` for example. For example, if the current flow on powerline "l1" is the 42nd element of the vector returned by :func:`Backend.get_line_flow` then :func:`Backend._disconnect_line(42)` will disconnect this same powerline "l1". For assumption about the order of the powerline flows return in this vector, see the help of the :func:`Backend.get_line_status` method. - :param id: id of the powerline to be disconnected - :type id: int + :param id_: id of the powerline to be disconnected + :type id_: int - :return: ``None`` """ pass def _runpf_with_diverging_exception(self, is_dc): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Computes a power flow on the _grid and raises an exception in case of diverging power flow, or any other exception that can be thrown by the backend. :param is_dc: mode of the power flow. If *is_dc* is True, then the powerlow is run using the DC approximation otherwise it uses the AC powerflow. :type is_dc: bool - :return: ``None`` + Raises + ------ + exc_: :class:`grid2op.Exceptions.DivergingPowerFlow` + In case of divergence of the powerflow + """ conv = False try: @@ -590,7 +713,11 @@ def _runpf_with_diverging_exception(self, is_dc): def next_grid_state(self, env, is_dc=False): """ - This method is called by the environment to compute the next _grid states. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is called by `env.step` + + This method is called by the environment to compute the next\\_grid\\_states. It allows to compute the powerline and approximate the "cascading failures" if there are some overflows. Attributes @@ -616,12 +743,12 @@ def next_grid_state(self, env, is_dc=False): infos = [] disconnected_during_cf = np.full(self.n_line, fill_value=False, dtype=dt_bool) conv_ = self._runpf_with_diverging_exception(is_dc) - if env.no_overflow_disconnection or conv_ is not None: + if env._no_overflow_disconnection or conv_ is not None: return disconnected_during_cf, infos, conv_ # the environment disconnect some - init_time_step_overflow = copy.deepcopy(env.timestep_overflow) + init_time_step_overflow = copy.deepcopy(env._timestep_overflow) while True: # simulate the cascading failure lines_flows = self.get_line_flow() @@ -629,11 +756,11 @@ def next_grid_state(self, env, is_dc=False): lines_status = self.get_line_status() # a) disconnect lines on hard overflow - to_disc = lines_flows > env.hard_overflow_threshold * thermal_limits + to_disc = lines_flows > env._hard_overflow_threshold * thermal_limits # b) deals with soft overflow init_time_step_overflow[ (lines_flows >= thermal_limits) & (lines_status)] += 1 - to_disc[init_time_step_overflow > env.nb_timestep_overflow_allowed] = True + to_disc[init_time_step_overflow > env._nb_timestep_overflow_allowed] = True # disconnect the current power lines if np.sum(to_disc[lines_status]) == 0: @@ -654,6 +781,8 @@ def next_grid_state(self, env, is_dc=False): def check_kirchoff(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Check that the powergrid respects kirchhoff's law. This function can be called at any moment to make sure a powergrid is in a consistent state, or to perform some tests for example. @@ -665,15 +794,15 @@ def check_kirchoff(self): Returns ------- p_subs ``numpy.ndarray`` - sum of injected active power at each substations + sum of injected active power at each substations (MW) q_subs ``numpy.ndarray`` - sum of injected reactive power at each substations + sum of injected reactive power at each substations (MVAr) p_bus ``numpy.ndarray`` sum of injected active power at each buses. It is given in form of a matrix, with number of substations as - row, and number of columns equal to the maximum number of buses for a substation + row, and number of columns equal to the maximum number of buses for a substation (MW) q_bus ``numpy.ndarray`` sum of injected reactive power at each buses. It is given in form of a matrix, with number of substations as - row, and number of columns equal to the maximum number of buses for a substation + row, and number of columns equal to the maximum number of buses for a substation (MVAr) """ p_or, q_or, v_or, *_ = self.lines_or_info() @@ -741,16 +870,17 @@ def check_kirchoff(self): def load_redispacthing_data(self, path, name='prods_charac.csv'): """ - This method will load everything needed for the redispatching and unit commitment problem. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This method will load everything needed for the redispatching and unit commitment problem. Parameters ---------- - path - name + path: ``str`` + Location of the datafram containing the redispatching data. - Returns - ------- + name: ``str`` + Name of the dataframe containing the redispatching data """ # for redispatching @@ -850,9 +980,4 @@ def get_action_to_set(self): set_me = complete_action_class(self) set_me.update({"set_line_status": line_status, "set_bus": topo_vect}) - - #injs = {"prod_p": prod_p, "prod_v": prod_v, - # "load_p": load_p, "load_q": load_q}} - - # set_me.update({"injection": injs}) return set_me diff --git a/grid2op/Backend/PandaPowerBackend.py b/grid2op/Backend/PandaPowerBackend.py index e8809c0b7..8cc020134 100644 --- a/grid2op/Backend/PandaPowerBackend.py +++ b/grid2op/Backend/PandaPowerBackend.py @@ -6,14 +6,6 @@ # SPDX-License-Identifier: MPL-2.0 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. -""" -This module presents an example of an implementation of a `grid2op.Backend` when using the powerflow -implementation "pandapower" available at `PandaPower `_ for more details about -this backend. This file is provided as an example of a proper :class:`grid2op.Backend.Backend` implementation. - -This backend currently does not work with 3 winding transformers and other exotic object. -""" - import os # load the python os default module import sys # laod the python sys default module import copy @@ -41,6 +33,18 @@ class PandaPowerBackend(Backend): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + If you want to code a backend to use grid2op with another powerflow, you can get inspired + from this class. Note However that implies knowning the behaviour + of PandaPower. + + This module presents an example of an implementation of a `grid2op.Backend` when using the powerflow + implementation "pandapower" available at `PandaPower `_ for more details about + this backend. This file is provided as an example of a proper :class:`grid2op.Backend.Backend` implementation. + + This backend currently does not work with 3 winding transformers and other exotic object. + As explained in the `grid2op.Backend` module, every module must inherit the `grid2op.Backend` class. This class have more attributes that are used internally for faster information retrieval. @@ -83,6 +87,20 @@ class PandaPowerBackend(Backend): v_ex: :class:`numpy.array`, dtype:float The voltage magnitude at the extremity bus of the powerline + Examples + --------- + The only recommended way to use this class is by passing an instance of a Backend into the "make" + function of grid2op. Do not attempt to use a backend outside of this specific usage. + + .. code-block:: python + + import grid2op + from grid2op.Backend import PandaPowerBackend + backend = PandaPowerBackend() + + env = grid2op.make(backend=backend) + # and use "env" as any open ai gym environment. + """ def __init__(self, detailed_infos_for_cascading_failures=False): Backend.__init__(self, detailed_infos_for_cascading_failures=detailed_infos_for_cascading_failures) @@ -141,6 +159,8 @@ def __init__(self, detailed_infos_for_cascading_failures=False): def get_nb_active_bus(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Compute the amount of buses "in service" eg with at least a powerline connected to it. Returns @@ -168,6 +188,8 @@ def _load_grid_gen_vm_pu(grid): def reset(self, path=None, grid_filename=None): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Reload the grid. For pandapower, it is a bit faster to store of a copy of itself at the end of load_grid and deep_copy it to itself instead of calling load_grid again @@ -179,6 +201,8 @@ def reset(self, path=None, grid_filename=None): def load_grid(self, path=None, filename=None): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Load the _grid, and initialize all the member of the class. Note that in order to perform topological modification of the substation of the underlying powergrid, some buses are added to the test case loaded. They are set as "out of service" unless a topological action acts on these specific substations. @@ -456,6 +480,8 @@ def load_grid(self, path=None, filename=None): def _convert_id_topo(self, id_big_topo): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + convert an id of the big topo vector into: - the id of the object in its "only object" (eg if id_big_topo represents load 2, then it will be 2) @@ -466,6 +492,8 @@ def _convert_id_topo(self, id_big_topo): def apply_action(self, backendAction=None): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Specific implementation of the method to apply an action modifying a powergrid in the pandapower format. """ if backendAction is None: @@ -603,6 +631,8 @@ def _aux_get_line_info(self, colname1, colname2): def runpf(self, is_dc=False): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Run a power flow on the underlying _grid. This implements an optimization of the powerflow computation: if the number of buses has not changed between two calls, the previous results are re used. This speeds up the computation @@ -699,6 +729,8 @@ def runpf(self, is_dc=False): def copy(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Performs a deep copy of the power :attr:`_grid`. As pandapower is pure python, the deep copy operator is perfectly suited for the task. """ @@ -707,6 +739,8 @@ def copy(self): def close(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Called when the :class:`grid2op;Environment` has terminated, this function only reset the grid to a state where it has not been loaded. """ @@ -715,6 +749,10 @@ def close(self): def save_file(self, full_path): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + You might want to use it for debugging purpose only, and only if you develop yourself a backend. + Save the file to json. :param full_path: :return: @@ -723,6 +761,8 @@ def save_file(self, full_path): def get_line_status(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + As all the functions related to powerline, pandapower split them into multiple dataframe (some for transformers, some for 3 winding transformers etc.). We make sure to get them all here. """ @@ -732,10 +772,6 @@ def _get_line_status(self): return np.concatenate((self._grid.line["in_service"].values, self._grid.trafo["in_service"].values)).astype(dt_bool) def get_line_flow(self): - """ - return the powerflow in amps in all powerlines. - :return: - """ return self.a_or def _disconnect_line(self, id_): diff --git a/grid2op/Chronics/ChangeNothing.py b/grid2op/Chronics/ChangeNothing.py index 537824fc6..1ab60924a 100644 --- a/grid2op/Chronics/ChangeNothing.py +++ b/grid2op/Chronics/ChangeNothing.py @@ -15,8 +15,18 @@ class ChangeNothing(GridValue): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Do not attempt to create an object of this class. This is initialized by the environment + at its creation. + + This set of class is mainly internal. + + We don't recommend you, unless you want to code a custom "chroncis class" to change anything + on these classes. + This class is the most basic class to modify a powergrid values. - It does nothing exceptie increasing :attr:`GridValue.max_iter` and the :attr:`GridValue.current_datetime`. + It does nothing aside from increasing :attr:`GridValue.max_iter` and the :attr:`GridValue.current_datetime`. """ def __init__(self, time_interval=timedelta(minutes=5), max_iter=-1, start_datetime=datetime(year=2019, month=1, day=1), @@ -36,64 +46,13 @@ def initialize(self, order_backend_loads, order_backend_prods, order_backend_lin self.hazard_duration = np.zeros(shape=(self.n_line, ), dtype=dt_int) def load_next(self): - """ - This function does nothing but the two requirements of load_next ie: - - - increasing :attr:`GridValue.curr_iter` of 1 - - increasing :attr:`GridValue.current_datetime` - - Returns - ------- - timestamp: ``datetime.datetime`` - The current timestamp for which the modifications have been generated. - - dict_: ``dict`` - Always empty, indicating i do nothing. - - maintenance_time: ``numpy.ndarray``, dtype:``int`` - Information about the next planned maintenance. See :attr:`GridValue.maintenance_time` for more information. - - maintenance_duration: ``numpy.ndarray``, dtype:``int`` - Information about the duration of next planned maintenance. See :attr:`GridValue.maintenance_duration` - for more information. - - hazard_duration: ``numpy.ndarray``, dtype:``int`` - Information about the current hazard. See :attr:`GridValue.hazard_duration` - for more information. - - prod_v: ``numpy.ndarray``, dtype:``float`` - the (stored) value of the generator voltage setpoint - - """ self.current_datetime += self.time_interval self.curr_iter += 1 return self.current_datetime, {}, self.maintenance_time, self.maintenance_duration, self.hazard_duration, None def check_validity(self, backend): - """ - - Parameters - ---------- - backend: :class:`grid2op.Backend` - The backend, not used here. - - Returns - ------- - res: ``bool`` - Always ``True``. As this doesn't change the powergird, there is no way to make invalid changed. - """ return True def next_chronics(self): - """ - Restarts: - - - :attr:`GridValue.current_datetime` to its origin value ( 2019 / 01 / 01) - - :attr:`GridValue.curr_iter` to 0 - - Returns - ------- - - """ self.current_datetime = self.start_datetime self.curr_iter = 0 diff --git a/grid2op/Chronics/GridStateFromFile.py b/grid2op/Chronics/GridStateFromFile.py index 0b17014de..893ab5844 100644 --- a/grid2op/Chronics/GridStateFromFile.py +++ b/grid2op/Chronics/GridStateFromFile.py @@ -22,6 +22,11 @@ class GridStateFromFile(GridValue): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Do not attempt to create an object of this class. This is initialized by the environment + at its creation. + Read the injections values from a file stored on hard drive. More detailed about the files is provided in the :func:`GridStateFromFile.initialize` method. @@ -41,6 +46,26 @@ class GridStateFromFile(GridValue): In this values, only 1 episode is stored. If the end of the episode is reached and another one should start, then it will loop from the beginning. + It reads the following files from the "path" location specified: + + - "prod_p.csv": for each time steps, this file contains the value for the active production of + each generators of the grid (it counts as many rows as the number of time steps - and its header) + and as many columns as the number of generators on the grid. The header must contains the names of + the generators used to map their value on the grid. Values must be convertible to floating point. + - "prod_v.csv": same as "prod_p.csv" but for the production voltage setpoint. + - "load_p.csv": same as "prod_p.csv" but for the load active value (number of columns = number of loads) + - "load_q.csv": same as "prod_p.csv" but for the load reactive value (number of columns = number of loads) + - "maintenance.csv": that contains whether or not there is a maintenance for a given powerline (column) at + each time step (row). + - "hazards.csv": that contains whether or not there is a hazard for a given powerline (column) at + each time step (row). + + If a file is missing, it is understood as "this value will not be modified". For example, if the file + "prod_v.csv" is not present, it will be equivalent as not modifying the production voltage setpoint, never. + + Except if the attribute :attr:`GridStateFromFile.sep` is modified, the above tables should be "semi colon" (;) + separated. + Attributes ---------- path: ``str`` @@ -85,6 +110,12 @@ def __init__(self, start_datetime=datetime(year=2019, month=1, day=1), chunk_size=None): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Do not attempt to create an object of this class. This is initialized by the environment + at its creation. + + Build an instance of GridStateFromFile. Such an instance should be built before an :class:`grid2op.Environment` is created. @@ -290,6 +321,10 @@ def _get_next_chunk(self): def initialize(self, order_backend_loads, order_backend_prods, order_backend_lines, order_backend_subs, names_chronics_to_backend=None): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Called at the creation of the environment. + In this function, the numpy arrays are read from the csv using the panda.dataframe engine. In order to be valid, the folder located at :attr:`GridStateFromFile.path` can contain: @@ -325,10 +360,6 @@ def initialize(self, order_backend_loads, order_backend_prods, order_backend_lin ---------- See help of :func:`GridValue.initialize` for a detailed help about the parameters. - Returns - ------- - ``None`` - """ self.n_gen = len(order_backend_prods) self.n_load = len(order_backend_loads) @@ -463,13 +494,6 @@ def initialize(self, order_backend_loads, order_backend_prods, order_backend_lin @staticmethod def _file_len(fname, ext_): - # i = -1 - # reading_mode = "r" - # if ext_ != ".csv": - # reading_mode += "b" - # with open(fname, reading_mode) as f: - # for i, l in enumerate(f): - # pass res = pd.read_csv(fname, sep="@", dtype=str).shape[0] return res @@ -520,6 +544,8 @@ def _init_attrs(self, load_p, load_q, prod_p, prod_v, hazards=None, maintenance= def done(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Compare to :func:`GridValue.done` an episode can be over for 2 main reasons: - :attr:`GridValue.max_iter` has been reached @@ -617,20 +643,6 @@ def load_next(self): return self.current_datetime, res, maintenance_time, maintenance_duration, hazard_duration, prod_v def check_validity(self, backend): - """ - A call to this method ensure that the action that will be sent to the current :class:`grid2op.Environment` - can be properly implemented by its :class:`grid2op.Backend`. - This specific method check that the dimension of all vectors are consistent - - Parameters - ---------- - backend: :class:`grid2op.Backend.Backend` - The backend used by the :class:`grid2op.Environment.Environment` - - Returns - ------- - ``None`` - """ at_least_one = False if self.load_p is not None: if self.load_p.shape[1] != backend.n_load: @@ -805,6 +817,28 @@ def _get_colorder_arrays_for_saving(self): self._order_backend_lines, self._order_backend_lines] def split_and_save(self, datetime_beg, datetime_end, path_out): + """ + You can use this function to save the values of the chronics in a format that will be loadable + by :class:`GridStateFromFile` + + Notes + ----- + Prefer using the :func:`Multifolder.split_and_save` that handles different chronics + + Parameters + ---------- + datetime_beg: ``str`` + Time stamp of the beginning of the data you want to save (time stamp in "%Y-%m-%d %H:%M" + format) + + datetime_end: ``str`` + Time stamp of the end of the data you want to save (time stamp in "%Y-%m-%d %H:%M" + format) + + path_out: ``str`` + Location where to save the data + + """ # work on a copy of myself tmp = copy.deepcopy(self) datetime_beg = self._convert_datetime(datetime_beg) @@ -857,4 +891,3 @@ def split_and_save(self, datetime_beg, datetime_end, path_out): tmp_for_time_delta = datetime(year=2018, month=1, day=1, hour=0, minute=0, second=0) + self.time_interval with open(os.path.join(path_out, "time_interval.info"), "w") as f: f.write("{:%H:%M}\n".format(tmp_for_time_delta)) - diff --git a/grid2op/Chronics/GridStateFromFileWithForecasts.py b/grid2op/Chronics/GridStateFromFileWithForecasts.py index a7b234759..876e318bb 100644 --- a/grid2op/Chronics/GridStateFromFileWithForecasts.py +++ b/grid2op/Chronics/GridStateFromFileWithForecasts.py @@ -97,10 +97,6 @@ def initialize(self, order_backend_loads, order_backend_prods, order_backend_lin ---------- See help of :func:`GridValue.initialize` for a detailed help about the _parameters. - Returns - ------- - ``None`` - """ super().initialize(order_backend_loads, order_backend_prods, order_backend_lines, order_backend_subs, names_chronics_to_backend) diff --git a/grid2op/Chronics/GridValue.py b/grid2op/Chronics/GridValue.py index b45a8c668..5f7234778 100644 --- a/grid2op/Chronics/GridValue.py +++ b/grid2op/Chronics/GridValue.py @@ -37,6 +37,10 @@ class GridValue(RandomObject, ABC): data to be generated from this object, then :func:`GridValue.load_next` should raise a :class:`StopIteration` exception and a call to :func:`GridValue.done` should return True. + In grid2op, the production and loads (and hazards or maintenance) can be stored in this type of + of "GridValue". This class will map things generated (or read from a file) and assign the given element + of the powergrid with its proper value at each time steps. + Attributes ---------- time_interval: :class:`.datetime.timedelta` @@ -48,15 +52,6 @@ class GridValue(RandomObject, ABC): current_datetime: :class:`datetime.datetime` The timestamp of the current scenario. - n_gen: ``int`` - Number of generators in the powergrid - - n_load: ``int`` - Number of loads in the powergrid - - n_line: ``int`` - Number of powerline in the powergrid - max_iter: ``int`` Number maximum of data to generate for one episode. @@ -99,9 +94,6 @@ def __init__(self, self.time_interval = time_interval self.current_datetime = start_datetime self.start_datetime = start_datetime - self.n_gen = None - self.n_load = None - self.n_line = None self.max_iter = max_iter self.curr_iter = 0 @@ -158,10 +150,6 @@ def initialize(self, order_backend_loads, order_backend_prods, order_backend_lin names_chronics_to_backend: ``dict`` See in the description of the method for more information about its format. - Returns - ------- - ``None`` - Examples -------- For example, suppose we have a :class:`grid2op.Backend` with: @@ -382,14 +370,14 @@ def get_hazard_duration_1d(hazard): Parameters ---------- - maintenance: ``numpy.ndarray`` - 1 dimensional array representing the time series of the maintenance (0 there is no maintenance, 1 there - is a maintenance at this time step) + hazard: ``numpy.ndarray`` + 1 dimensional array representing the time series of the hazards (0 there is no hazard, 1 there + is a hazard at this time step) Returns ------- - maintenance_duration: ``numpy.ndarray`` - Array representing the time series of the duration of the next maintenance forseeable. + hazard_duration: ``numpy.ndarray`` + Array representing the time series of the duration of the next hazard forseeable. Examples -------- @@ -443,14 +431,20 @@ def get_hazard_duration_1d(hazard): @abstractmethod def load_next(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is automatically called by the "env.step" function. It loads the next information + about the grid state (load p and load q, prod p and prod v as well as some maintenance + or hazards information) + Generate the next values, either by reading from a file, or by generating on the fly and return a dictionnary compatible with the :class:`grid2op.BaseAction` class allowed for the :class:`Environment`. - More information about this dictionnary can be found at :func:`grid2op.BaseAction.update`. + More information about this dictionary can be found at :func:`grid2op.BaseAction.update`. - As a (quick) reminder: this dictionnary has for keys: + As a (quick) reminder: this dictionary has for keys: - - "injection" (optional): a dictionnary with keys (optional) "load_p", "load_q", "prod_p", "prod_v" + - "injection" (optional): a dictionary with keys (optional) "load_p", "load_q", "prod_p", "prod_v" - "hazards" (optional) : the outage suffered from the _grid - "maintenance" (optional) : the maintenance operations planned on the grid for the current time step. @@ -460,8 +454,7 @@ def load_next(self): The current timestamp for which the modifications have been generated. dict_: ``dict`` - A dictionnary understandable by the ::func:`grid2op.BaseAction.update` method. **NB** this function should - return the dictionnary that will be converted, is should not, in any case, return an action. + Always empty, indicating i do nothing. maintenance_time: ``numpy.ndarray``, dtype:``int`` Information about the next planned maintenance. See :attr:`GridValue.maintenance_time` for more information. @@ -474,36 +467,43 @@ def load_next(self): Information about the current hazard. See :attr:`GridValue.hazard_duration` for more information. + prod_v: ``numpy.ndarray``, dtype:``float`` + the (stored) value of the generator voltage setpoint + Raises ------ StopIteration if the chronics is over + """ self.current_datetime += self.time_interval - return self.current_datetime, {}, self.maintenance_time, self.maintenance_duration, self.hazard_duration + return self.current_datetime, {}, self.maintenance_time, self.maintenance_duration, self.hazard_duration, None @abstractmethod def check_validity(self, backend): """ - To make sure that the data returned by this class are of the proper dimension, a call to this method - must be performed before actually using the data generated by this class. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is called at the creation of the environment to ensure the Backend and the chronics + are consistent with one another. - In the grid2op framework, this is ensure because the :class:`grid2op.Environment` calls this method - in its initialization. + A call to this method ensure that the action that will be sent to the current :class:`grid2op.Environment` + can be properly implemented by its :class:`grid2op.Backend`. + This specific method check that the dimension of all vectors are consistent Parameters ---------- - backend: :class:`grid2op.Backend` - The backend used by the :class;`Environment`. - - Returns - ------- - + backend: :class:`grid2op.Backend.Backend` + The backend used by the :class:`grid2op.Environment.Environment` """ raise EnvError("check_validity not implemented") def done(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Use the :class:`ChroncisHandler` for such purpose + Whether the episode is over or not. Returns @@ -520,6 +520,10 @@ def done(self): def forecasts(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Use the :class:`ChroncisHandler` for such purpose + This method is used to generate the forecasts that are made available to the :class:`grid2op.BaseAgent`. This forecasts are behaving the same way than a list of tuple as the one returned by :func:`GridValue.load_next` method. @@ -531,19 +535,23 @@ def forecasts(self): ------- res: ``list`` Each element of this list having the same type as what is returned by :func:`GridValue.load_next`. + """ return [] @abstractmethod def next_chronics(self): """ - Load the next batch of chronics. This function is called after an episode has finished by the - :class:`grid2op.Environment` or the :class:`grid2op.Runner`. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Move to the next "chronics", representing the next "level" if we make the parallel + with video games. + + A call to this function should at least restart: + + - :attr:`GridValue.current_datetime` to its origin value + - :attr:`GridValue.curr_iter` - A call to this function should also reset :attr:`GridValue.curr_iter` to 0. - Returns - ------- - ``None`` """ pass @@ -559,9 +567,6 @@ def tell_id(self, id_num): By default it does nothing. - Returns - ------- - ``None`` """ warnings.warn("Class {} doesn't handle different input folder. \"tell_id\" method has no impact." "".format(type(self).__name__)) @@ -592,28 +597,44 @@ def max_timestep(self): Returns ------- - res: int + res: ``int`` -1 if possibly infinite length of a positive integer representing the maximum duration of this episode """ # warnings.warn("Class {} has possibly and infinite duration.".format(type(self).__name__)) return self.max_iter - def shuffle(self, shuffler): + def shuffle(self, shuffler=None): """ - This method can be overiden if the data that are represented by this object need to be shuffle. + This method can be overridden if the data that are represented by this object need to be shuffle. By default it does nothing. + Parameters ---------- shuffler: ``object`` Any function that can be used to shuffle the data. + """ + pass + + def sample_next_chronics(self, probabilities=None): + """ + this is used to sample the next chronics used with given probabilities + + Parameters + ----------- + probabilities: ``np.ndarray`` + Array of integer with the same size as the number of chronics in the cache. + If it does not sum to one, it is rescaled such that it sums to one. + Returns ------- + selected: ``int`` + The integer that was selected. """ - pass + return -1 def set_chunk_size(self, new_chunk_size): """ @@ -630,9 +651,13 @@ def set_chunk_size(self, new_chunk_size): def fast_forward(self, nb_timestep): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using :func:`grid2op.Environment.BaseEnv.fast_forward_chronics` + This method allows you to skip some time step at the beginning of the chronics. - This is usefull at the beginning of the training, if you want your agent to learn on more diverse scenarios. + This is useful at the beginning of the training, if you want your agent to learn on more diverse scenarios. Indeed, the data provided in the chronics usually starts always at the same date time (for example Jan 1st at 00:00). This can lead to suboptimal exploration, as during this phase, only a few time steps are managed by the agent, so in general these few time steps will correspond to grid state around Jan 1st at 00:00. diff --git a/grid2op/Chronics/MultiFolder.py b/grid2op/Chronics/MultiFolder.py index e83cfeb54..9768cdf47 100644 --- a/grid2op/Chronics/MultiFolder.py +++ b/grid2op/Chronics/MultiFolder.py @@ -50,9 +50,6 @@ class Multifolder(GridValue): :attr:`Multifolder.path`. Each one should contain data in a format that is readable by :attr:`MultiFolder.gridvalueClass`. - id_chron_folder_current: ``int`` - Id (in :attr:`MultiFolder.subpaths`) for which data are generated in the current episode. - """ def __init__(self, path, time_interval=timedelta(minutes=5), @@ -150,7 +147,7 @@ def next_chronics(self): # self.space_prng.shuffle(self._order) self._prev_cache_id %= len(self._order) - def sample_next_chronics(self, probabilities): + def sample_next_chronics(self, probabilities=None): """ This function should be called before "next_chronics". It can be used to sample non uniformly for the next next chronics. @@ -195,6 +192,9 @@ def sample_next_chronics(self, probabilities): """ self._prev_cache_id = -1 + if probabilities is None: + probabilities = np.ones(self._order.shape[0]) + # make sure it sums to 1 probabilities /= np.sum(probabilities) # take one at "random" among these @@ -348,7 +348,7 @@ def get_id(self) -> str: def max_timestep(self): return self.data.max_timestep() - def shuffle(self, shuffler): + def shuffle(self, shuffler=None): """ This method is used to have a better control on the order in which the subfolder containing the episode are processed. @@ -385,6 +385,10 @@ def shuffle(self, shuffler): data.shuffle(shuffler=lambda x: x[1, 5, 6]) """ + if shuffler is None: + def shuffler(x): + return x[np.random.choice(len(x), size=len(x), replace=False)] + self._order = shuffler(self._order) return self.subpaths[self._order] @@ -397,17 +401,53 @@ def split_and_save(self, datetime_beg, datetime_end, path_out): save it on your local machine. This is espacially handy if you want to extract only a piece of the dataset we provide for example. - # TODO add an example somewhere - Parameters ---------- - datetime_beg: - See example (coming soon) - datetime_end: - See example (coming soon) + datetime_beg: ``dict`` + Keys are the name id of the scenarios you want to save. Values + are the corresponding starting date and time (in "%Y-%m-ùd %H:%M" + format). See example for more information. + datetime_end: ``dict`` + keys must be the same as in the "datetime_beg" argument. + + See example for more information + path_out: ``str`` The path were the data will be stored. + Examples + --------- + + Here is a short example on how to use it + + .. code-block:: python + + import grid2op + import os + env = grid2op.make() + + env.chronics_handler.real_data.split_and_save({"004": "2019-01-08 02:00", + "005": "2019-01-30 08:00", + "006": "2019-01-17 00:00", + "007": "2019-01-17 01:00", + "008": "2019-01-21 09:00", + "009": "2019-01-22 12:00", + "010": "2019-01-27 19:00", + "011": "2019-01-15 12:00", + "012": "2019-01-08 13:00", + "013": "2019-01-22 00:00"}, + {"004": "2019-01-11 02:00", + "005": "2019-02-01 08:00", + "006": "2019-01-18 00:00", + "007": "2019-01-18 01:00", + "008": "2019-01-22 09:00", + "009": "2019-01-24 12:00", + "010": "2019-01-29 19:00", + "011": "2019-01-17 12:00", + "012": "2019-01-10 13:00", + "013": "2019-01-24 00:00"}, + path_out=os.path.join("/tmp")) + """ if not isinstance(datetime_beg, dict): datetime_beg_orig = datetime_beg @@ -459,22 +499,3 @@ def split_and_save(self, datetime_beg, datetime_end, path_out): except Exception as exc_: warnings.warn("Impossible to save the \"metadata\" for the chronics with error:\n\"{}\"" "".format(exc_)) - - def fast_forward(self, nb_timestep): - """ - This method allows you to skip some time step at the beginning of the chronics. - - This is usefull at the beginning of the training, if you want your agent to learn on more diverse scenarios. - Indeed, the data provided in the chronics usually starts always at the same date time (for example Jan 1st at - 00:00). This can lead to suboptimal exploration, as during this phase, only a few time steps are managed by - the agent, so in general these few time steps will correspond to grid state around Jan 1st at 00:00. - - - Parameters - ---------- - nb_timestep: ``int`` - Number of time step to "fast forward" - - """ - for _ in range(nb_timestep): - self.data.load_next() diff --git a/grid2op/Chronics/MultifolderWithCache.py b/grid2op/Chronics/MultifolderWithCache.py index 9a95d3093..db428ea92 100644 --- a/grid2op/Chronics/MultifolderWithCache.py +++ b/grid2op/Chronics/MultifolderWithCache.py @@ -123,6 +123,8 @@ def reset(self): if self.cache_size == 0: raise RuntimeError("Impossible to initialize the new cache.") + return self.subpaths[self._order] + def initialize(self, order_backend_loads, order_backend_prods, order_backend_lines, order_backend_subs, names_chronics_to_backend=None): self._order_backend_loads = order_backend_loads diff --git a/grid2op/Converter/AnalogStateConverter.py b/grid2op/Converter/AnalogStateConverter.py index 259350d7a..9463f9022 100644 --- a/grid2op/Converter/AnalogStateConverter.py +++ b/grid2op/Converter/AnalogStateConverter.py @@ -30,7 +30,7 @@ def __init__(self, action_space, bias=0.0): self.__bias = 0.0 @staticmethod - def to_norm_vect(inputv, pad_v = 0.0, scale_v = 1.0): + def to_norm_vect(inputv, pad_v=0.0, scale_v=1.0): v = np.asarray(inputv) v = v / scale_v vsafe = np.nan_to_num(v, nan=pad_v, posinf=pad_v, neginf=pad_v) @@ -46,15 +46,12 @@ def convert_obs(self, obs): obs: :class:`grid2op.Observation.Observation` The input observation. - bias: ``float`` - Bias to add to the vector, defaults to 0.0 - Returns ------- ``np.array`` 1D array of np.float32 normalized values """ - # Store the obs for action convertion + # Store the obs for action conversion self.__obs = obs # Store some shortcuts topo = obs.topo_vect @@ -145,7 +142,7 @@ def convert_act(self, netstate): Parameters ---------- netstate: ``tuple`` - A tuple containning the following (3) elements: + A tuple containning the following (3) elements: netbus: ``np.array`` A numpy array of dimension n_bus(2) x dim_topo and range [0.0; 1.0]. @@ -288,6 +285,3 @@ def netdisp_rnd(obs): disp_rnd[rnd_gen] = rnd_ramp return disp_rnd - - - diff --git a/grid2op/Converter/BackendConverter.py b/grid2op/Converter/BackendConverter.py index 588f00c85..142a899e0 100644 --- a/grid2op/Converter/BackendConverter.py +++ b/grid2op/Converter/BackendConverter.py @@ -198,8 +198,6 @@ def _init_myself(self): # grid layout data were available super().load_grid_layout(self.path_grid_layout, self.name_grid_layout) - - def _get_possible_target_ids(self, id_source, source_2_id_sub, target_2_id_sub, nm): id_sub_source = source_2_id_sub[id_source] id_sub_target = self._sub_tg2sr[id_sub_source] @@ -370,7 +368,8 @@ def get_line_flow(self): def set_thermal_limit(self, limits): super().set_thermal_limit(limits=limits) self.source_backend.set_thermal_limit(limits=limits) - self.target_backend.set_thermal_limit(limits=limits[self._line_sr2tg]) + if limits is not None: + self.target_backend.set_thermal_limit(limits=limits[self._line_sr2tg]) def get_thermal_limit(self): tmp = self.target_backend.get_thermal_limit() @@ -425,11 +424,6 @@ def _transform_action(self, source_action): no_gen=self._gen_sr2tg, no_topo=self._topo_sr2tg, no_shunt=self._shunt_sr2tg) - """ if False: - target_action.reorder(no_load=self._load_tg2sr, - no_gen=self._gen_tg2sr, - no_topo=self._topo_tg2sr, - no_shunt=self._shunt_tg2sr)""" return target_action def load_redispacthing_data(self, path, name='prods_charac.csv'): diff --git a/grid2op/Converter/ConnectivityConverter.py b/grid2op/Converter/ConnectivityConverter.py index 0978ef1f6..1a935ec5f 100644 --- a/grid2op/Converter/ConnectivityConverter.py +++ b/grid2op/Converter/ConnectivityConverter.py @@ -173,6 +173,9 @@ def convert_act(self, encoded_act): Returns ------- + act: :class:`grid2op.Action.BaseAction` + The action that is usable by grid2op (after conversion) [the action space must be compatible with + the "set_bus" key word] """ argsort = np.argsort(np.minimum(encoded_act, 1-encoded_act)) @@ -215,6 +218,8 @@ def convert_act(self, encoded_act): def _compute_disagreement(self, encoded_act, topo_vect): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + computes the disagreement between the encoded act and the proposed topo_vect **NB** if encoded act is random uniform, and topo_vect is full of 1, then disagreement is, on average 0.5. @@ -231,4 +236,3 @@ def _compute_disagreement(self, encoded_act, topo_vect): def sample(self): coded_act = self.space_prng.rand(self.n) return self.convert_act(coded_act) - diff --git a/grid2op/Converter/GymConverter.py b/grid2op/Converter/GymConverter.py index 9e9776659..d9469141b 100644 --- a/grid2op/Converter/GymConverter.py +++ b/grid2op/Converter/GymConverter.py @@ -16,7 +16,10 @@ class BaseGymConverter: """ - Internal class, do not use. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Used as a base class to convert grid2op state to gym state (wrapper for some useful function + for both the action space and the observation space). + """ def __init__(self): pass @@ -98,7 +101,7 @@ class GymObservationSpace(spaces.Dict, BaseGymConverter): def __init__(self, env): self.initial_obs_space = env.observation_space dict_ = {} - self._fill_dict_obs_space(dict_, env.observation_space, env.parameters, env.oppSpace) + self._fill_dict_obs_space(dict_, env.observation_space, env.parameters, env._oppSpace) spaces.Dict.__init__(self, dict_) def _fill_dict_obs_space(self, dict_, observation_space, env_params, opponent_space): @@ -179,11 +182,13 @@ def from_gym(self, gymlike_observation: spaces.dict.OrderedDict) -> BaseObservat Parameters ---------- - gymlike_observation + gymlike_observation: :class:`gym.spaces.dict.OrderedDict` + The observation represented as a gym ordered dict Returns ------- - + grid2oplike_observation: :class:`grid2op.Observation.BaseObservation` + The corresponding grid2op observation """ res = self.initial_obs_space.get_empty_observation() for k, v in gymlike_observation.items(): @@ -191,6 +196,20 @@ def from_gym(self, gymlike_observation: spaces.dict.OrderedDict) -> BaseObservat return res def to_gym(self, grid2op_observation: BaseObservation) -> spaces.dict.OrderedDict: + """ + Convert a grid2op observation into a gym ordered dict. + + Parameters + ---------- + grid2op_observation: :class:`grid2op.Observation.BaseObservation` + The observation represented as a grid2op observation + + Returns + ------- + gymlike_observation: :class:`gym.spaces.dict.OrderedDict` + The corresponding gym ordered dict + + """ return self._base_to_gym(self.spaces.keys(), grid2op_observation, dtypes={k: self.spaces[k].dtype for k in self.spaces}) @@ -349,10 +368,13 @@ def from_gym(self, gymlike_action: spaces.dict.OrderedDict) -> object: Parameters ---------- - gymlike_action + gymlike_action: :class:`gym.spaces.dict.OrderedDict` + The action, represented as a gym action (ordered dict) Returns ------- + An action that can be understood by the given action_space (either a grid2Op action if the + original action space was used, or a Converter) """ if self.__is_converter: diff --git a/grid2op/Converter/ToVect.py b/grid2op/Converter/ToVect.py index 44f4b1726..80d5708aa 100644 --- a/grid2op/Converter/ToVect.py +++ b/grid2op/Converter/ToVect.py @@ -19,10 +19,10 @@ class ToVect(Converter): - `encoded_act` are numpy ndarray - `transformed_obs` are numpy ndarray - (read more about these concepts by looking at the documentation of :class:`grid2op.Converter.Converters`) + (read more about these concepts by looking at the documentation of :class:`grid2op.Converter.Converters`) It is convertible to a gym representation (like the original action space) in the form of a spaces.Box - representing a coutinuous action space (even though most component are probably discrete). + representing a continuous action space (even though most component are probably discrete). Note that if converted to a gym space, it is unlikely the method "sample" will yield to valid results. Most of the time it should generate Ambiguous action that will not be handled by grid2op. @@ -70,7 +70,7 @@ def convert_act(self, encoded_act): Parameters ---------- encoded_act: ``numpy.ndarray`` - The action, representated as a vector + The action, represented as a vector Returns ------- diff --git a/grid2op/Download/DownloadDataset.py b/grid2op/Download/DownloadDataset.py index c24a5cd4d..44dbf5acf 100644 --- a/grid2op/Download/DownloadDataset.py +++ b/grid2op/Download/DownloadDataset.py @@ -39,6 +39,8 @@ class DownloadProgressBar(tqdm): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This class is here to show the progress bar when downloading this dataset """ def update_to(self, b=1, bsize=1, tsize=None): @@ -49,7 +51,10 @@ def update_to(self, b=1, bsize=1, tsize=None): def download_url(url, output_path): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This function download the file located at 'url' and save it to 'output_path' + Parameters ---------- url: ``str`` @@ -63,6 +68,9 @@ def download_url(url, output_path): def _aux_download(url, dataset_name, path_data, ds_name_dl=None): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + """ if ds_name_dl is None: ds_name_dl = dataset_name final_path = os.path.join(path_data, ds_name_dl) @@ -116,7 +124,9 @@ def _aux_download(url, dataset_name, path_data, ds_name_dl=None): def main_download(dataset_name, path_data): - + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + """ dataset_name = dataset_name.lower().rstrip().lstrip() dataset_name = re.sub('"', "", dataset_name) diff --git a/grid2op/Environment/BaseEnv.py b/grid2op/Environment/BaseEnv.py index 94136d5c6..8203b9db2 100644 --- a/grid2op/Environment/BaseEnv.py +++ b/grid2op/Environment/BaseEnv.py @@ -28,61 +28,154 @@ class BaseEnv(GridObjects, RandomObject, ABC): """ - Internal class, do not use. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ This class represent some usefull abstraction that is re used by :class:`Environment` and :class:`grid2op.Observation._Obsenv` for example. + The documentation is showed here to document the common attributes of an "BaseEnvironment". + Attributes ---------- - no_overflow_disconnection: ``bool`` + parameters: :class:`grid2op.Parameters.Parameters` + The parameters of the game (to expose more control on what is being simulated) + + with_forecast: ``bool`` + Whether the chronics allow to have some kind of "forecast". See :func:`BaseEnv.activate_forceast` + for more information + + logger: + TO BE DONE: a way to log what is happening (**currently not implemented**) + + time_stamp: ``datetime.datetime`` + The actual time stamp of the current observation. + + nb_time_step: ``int`` + Number of time steps played in the current environment + + current_obs: :class:`grid2op.Observation.BaseObservation` + The current observation (or None if it's not intialized) + + backend: :class:`grid2op.Backend.Backend` + The backend used to compute the powerflows and cascading failures. + + done: ``bool`` + Whether the environment is "done". If ``True`` you need to call :func:`Environment.reset` in order + to continue. + + current_reward: ``float`` + The last computed reward (reward of the current step) + + other_rewards: ``dict`` + Dictionary with key being the name (identifier) and value being some RewardHelper. At each time step, all the + values will be computed by the :class:`Environment` and the information about it will be returned in the + "reward" key of the "info" dictionnary of the :func:`Environment.step`. + + chronics_handler: :class:`grid2op.Chronics.ChronicsHandler` + The object in charge managing the "chronics", which store the information about load and generator for example. + + reward_range: ``tuple`` + For open ai gym compatibility. It represents the range of the rewards: reward min, reward max + + viewer + For open ai gym compatibility. + + viewer_fig + For open ai gym compatibility. + + _gen_activeprod_t: + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Should be initialized at 0. for "step" to properly recognize it's the first time step of the game + + _no_overflow_disconnection: ``bool`` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Whether or not cascading failures are computed or not (TRUE = the powerlines above their thermal limits will not be disconnected). This is initialized based on the attribute :attr:`grid2op.Parameters.Parameters.NO_OVERFLOW_DISCONNECTION`. - timestep_overflow: ``numpy.ndarray``, dtype: int + _timestep_overflow: ``numpy.ndarray``, dtype: int + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Number of consecutive timesteps each powerline has been on overflow. - nb_timestep_overflow_allowed: ``numpy.ndarray``, dtype: int + _nb_timestep_overflow_allowed: ``numpy.ndarray``, dtype: int + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Number of consecutive timestep each powerline can be on overflow. It is usually read from :attr:`grid2op.Parameters.Parameters.NB_TIMESTEP_POWERFLOW_ALLOWED`. - hard_overflow_threshold: ``float`` + _hard_overflow_threshold: ``float`` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Number of timestep before an :class:`grid2op.BaseAgent.BaseAgent` can reconnet a powerline that has been disconnected by the environment due to an overflow. - env_dc: ``bool`` + _env_dc: ``bool`` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Whether the environment computes the powerflow using the DC approximation or not. It is usually read from :attr:`grid2op.Parameters.Parameters.ENV_DC`. + _names_chronics_to_backend: ``dict`` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ - TODO update docs here... - # store actions "cooldown" - times_before_line_status_actionable - max_timestep_line_status_deactivated - times_before_topology_actionable - max_timestep_topology_deactivated - time_next_maintenance - duration_next_maintenance - hard_overflow_threshold + Configuration file used to associated the name of the objects in the backend + (both extremities of powerlines, load or production for + example) with the same object in the data (:attr:`Environment.chronics_handler`). The idea is that, usually + data generation comes from a different software that does not take into account the powergrid infrastructure. + Hence, the same "object" can have a different name. This mapping is present to avoid the need to rename + the "object" when providing data. A more detailed description is available at + :func:`grid2op.ChronicsHandler.GridValue.initialize`. - # redispacthing - target_dispatch - actual_dispatch + _env_modification: :class:`grid2op.Action.Action` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ - gen_activeprod_t: - Should be initialized at 0. for "step" to properly recognize it's the first time step of the game + Representation of the actions of the environment for the modification of the powergrid. - other_rewards: ``dict`` - Dictionnary with key being the name (identifier) and value being some RewardHelper. At each time step, all the - values will be computed by the :class:`Environment` and the information about it will be returned in the - "reward" key of the "info" dictionnary of the :func:`Environment.step`. + _rewardClass: ``type`` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Type of reward used. Should be a subclass of :class:`grid2op.BaseReward.BaseReward` + + _init_grid_path: ``str`` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + The path where the description of the powergrid is located. + + _game_rules: :class:`grid2op.Rules.RulesChecker` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + The rules of the game (define which actions are legal and which are not) + + _helper_action_player: :class:`grid2op.Action.ActionSpace` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Helper used to manipulate more easily the actions given to / provided by the :class:`grid2op.Agent.BaseAgent` + (player) + + _helper_action_env: :class:`grid2op.Action.ActionSpace` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Helper used to manipulate more easily the actions given to / provided by the environment to the backend. + + _helper_observation: :class:`grid2op.Observation.ObservationSpace` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Helper used to generate the observation that will be given to the :class:`grid2op.BaseAgent` + + _reward_helper: :class:`grid2p.BaseReward.RewardHelper` + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Helper that is called to compute the reward at each time step. """ def __init__(self, parameters, + voltagecontrolerClass, thermal_limit_a=None, epsilon_poly=1e-2, tol_poly=1e-6, @@ -120,8 +213,11 @@ def __init__(self, # define logger self.logger = None + # the voltage controler + # class used for the action spaces - self.helper_action_class = None + self._helper_action_class = None + self._helper_observation_class = None # and calendar data self.time_stamp = None @@ -130,71 +226,64 @@ def __init__(self, # observation self.current_obs = None - self.ignore_min_up_down_times = self.parameters.IGNORE_MIN_UP_DOWN_TIME - self.forbid_dispatch_off = not self.parameters.ALLOW_DISPATCH_GEN_SWITCH_OFF + self._ignore_min_up_down_times = self.parameters.IGNORE_MIN_UP_DOWN_TIME + self._forbid_dispatch_off = not self.parameters.ALLOW_DISPATCH_GEN_SWITCH_OFF # type of power flow to play # if True, then it will not disconnect lines above their thermal limits - self.no_overflow_disconnection = self.parameters.NO_OVERFLOW_DISCONNECTION - self.timestep_overflow = None - self.nb_timestep_overflow_allowed = None + self._no_overflow_disconnection = self.parameters.NO_OVERFLOW_DISCONNECTION + self._timestep_overflow = None + self._nb_timestep_overflow_allowed = None + self._hard_overflow_threshold = self.parameters.HARD_OVERFLOW_THRESHOLD # store actions "cooldown" - self.times_before_line_status_actionable = None - self.max_timestep_line_status_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_LINE - self.times_before_topology_actionable = None - self.max_timestep_topology_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_SUB + self._times_before_line_status_actionable = None + self._max_timestep_line_status_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_LINE + self._times_before_topology_actionable = None + self._max_timestep_topology_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_SUB # for maintenance operation - self.time_next_maintenance = None - self.duration_next_maintenance = None + self._time_next_maintenance = None + self._duration_next_maintenance = None # hazard (not used outside of this class, information is given in `times_before_line_status_actionable` self._hazard_duration = None - # hard overflow part - self.hard_overflow_threshold = self.parameters.HARD_OVERFLOW_THRESHOLD - self.env_dc = self.parameters.ENV_DC - - # Remember last line buses - self.last_bus_line_or = None - self.last_bus_line_ex = None + self._env_dc = self.parameters.ENV_DC # redispatching data - self.target_dispatch = None - self.actual_dispatch = None - self.gen_uptime = None - self.gen_downtime = None - self.gen_activeprod_t = None - self.gen_activeprod_t_redisp = None + self._target_dispatch = None + self._actual_dispatch = None + self._gen_uptime = None + self._gen_downtime = None + self._gen_activeprod_t = None + self._gen_activeprod_t_redisp = None self._thermal_limit_a = thermal_limit_a - # maintenance / hazards - self.time_next_maintenance = None - self.duration_next_maintenance = None - # store environment modifications self._injection = None self._maintenance = None self._hazards = None - self.env_modification = None + self._env_modification = None # to use the data self.done = False self.current_reward = None - self.helper_action_env = None + self._helper_action_env = None self.chronics_handler = None - self.game_rules = None - self.helper_action_player = None - - self.rewardClass = None - self.actionClass = None - self.observationClass = None - self.legalActClass = None - self.helper_observation = None - self.names_chronics_to_backend = None - self.reward_helper = None + self._game_rules = None + self._helper_action_player = None + + self._rewardClass = None + self._actionClass = None + self._observationClass = None + self._legalActClass = None + self._helper_observation = None + self._names_chronics_to_backend = None + self._reward_helper = None + + # gym compatibility self.reward_range = None, None self.viewer = None self.viewer_fig = None @@ -210,26 +299,27 @@ def __init__(self, self.other_rewards[k] = RewardHelper(v) # opponent - self.opponent_action_class = opponent_action_class # class of the action of the opponent - self.opponent_class = opponent_class # class of the opponent - self.opponent_init_budget = dt_float(opponent_init_budget) - self.opponent_attack_duration = dt_int(opponent_attack_duration) - self.opponent_attack_cooldown = dt_int(opponent_attack_cooldown) - self.opponent_budget_per_ts = dt_float(opponent_budget_per_ts) - self.kwargs_opponent = kwargs_opponent - self.opponent_budget_class = opponent_budget_class - - ## below initialized by _create_env, above: need to be called - self.opponent_action_space = None - self.compute_opp_budget = None - self.opponent = None - self.oppSpace = None + self._opponent_action_class = opponent_action_class # class of the action of the opponent + self._opponent_class = opponent_class # class of the opponent + self._opponent_init_budget = dt_float(opponent_init_budget) + self._opponent_attack_duration = dt_int(opponent_attack_duration) + self._opponent_attack_cooldown = dt_int(opponent_attack_cooldown) + self._opponent_budget_per_ts = dt_float(opponent_budget_per_ts) + self._kwargs_opponent = kwargs_opponent + self._opponent_budget_class = opponent_budget_class + + # below initialized by _create_env, above: need to be called + self._opponent_action_space = None + self._compute_opp_budget = None + self._opponent = None + self._oppSpace = None # voltage - self.voltage_controler = None + self._voltagecontrolerClass = voltagecontrolerClass + self._voltage_controler = None # backend - self.init_grid_path = None + self._init_grid_path = None # backend action self._backend_action_class = None @@ -243,34 +333,34 @@ def _create_opponent(self): if not self.__is_init: raise EnvError("Impossible to create an opponent with a non initialized environment!") - if not issubclass(self.opponent_action_class, BaseAction): + if not issubclass(self._opponent_action_class, BaseAction): raise EnvError("Impossible to make an environment with an opponent action class not derived from BaseAction") try: - self.opponent_init_budget = dt_float(self.opponent_init_budget) + self._opponent_init_budget = dt_float(self._opponent_init_budget) except Exception as e: raise EnvError("Impossible to convert \"opponent_init_budget\" to a float with error {}".format(e)) - if self.opponent_init_budget < 0.: - raise EnvError("If you want to deactive the opponent, please don't set its budget to a negative number." + if self._opponent_init_budget < 0.: + raise EnvError("If you want to deactivate the opponent, please don't set its budget to a negative number." "Prefer the use of the DontAct action type (\"opponent_action_class=DontAct\" " "and / or set its budget to 0.") - if not issubclass(self.opponent_class, BaseOpponent): + if not issubclass(self._opponent_class, BaseOpponent): raise EnvError("Impossible to make an opponent with a type that does not inherit from BaseOpponent.") - self.opponent_action_space = self.helper_action_class(gridobj=self.backend, - legal_action=AlwaysLegal, - actionClass=self.opponent_action_class) - - self.compute_opp_budget = self.opponent_budget_class(self.opponent_action_space) - self.opponent = self.opponent_class(self.opponent_action_space) - self.oppSpace = OpponentSpace(compute_budget=self.compute_opp_budget, - init_budget=self.opponent_init_budget, - attack_duration=self.opponent_attack_duration, - attack_cooldown=self.opponent_attack_cooldown, - budget_per_timestep=self.opponent_budget_per_ts, - opponent=self.opponent - ) - self.oppSpace.init_opponent(**self.kwargs_opponent) - self.oppSpace.reset() + self._opponent_action_space = self._helper_action_class(gridobj=self.backend, + legal_action=AlwaysLegal, + actionClass=self._opponent_action_class) + + self._compute_opp_budget = self._opponent_budget_class(self._opponent_action_space) + self._opponent = self._opponent_class(self._opponent_action_space) + self._oppSpace = OpponentSpace(compute_budget=self._compute_opp_budget, + init_budget=self._opponent_init_budget, + attack_duration=self._opponent_attack_duration, + attack_cooldown=self._opponent_attack_cooldown, + budget_per_timestep=self._opponent_budget_per_ts, + opponent=self._opponent + ) + self._oppSpace.init_opponent(**self._kwargs_opponent) + self._oppSpace.reset() def _has_been_initialized(self): # type of power flow to play @@ -281,46 +371,46 @@ def _has_been_initialized(self): self._backend_action_class = _BackendAction.init_grid(self.backend) self._backend_action = self._backend_action_class() - self.no_overflow_disconnection = self.parameters.NO_OVERFLOW_DISCONNECTION - self.timestep_overflow = np.zeros(shape=(self.n_line,), dtype=dt_int) - self.nb_timestep_overflow_allowed = np.full(shape=(self.n_line,), + self._no_overflow_disconnection = self.parameters.NO_OVERFLOW_DISCONNECTION + self._timestep_overflow = np.zeros(shape=(self.n_line,), dtype=dt_int) + self._nb_timestep_overflow_allowed = np.full(shape=(self.n_line,), fill_value=self.parameters.NB_TIMESTEP_OVERFLOW_ALLOWED, dtype=dt_int) # store actions "cooldown" - self.times_before_line_status_actionable = np.zeros(shape=(self.n_line,), dtype=dt_int) - self.max_timestep_line_status_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_LINE + self._times_before_line_status_actionable = np.zeros(shape=(self.n_line,), dtype=dt_int) + self._max_timestep_line_status_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_LINE - self.times_before_topology_actionable = np.zeros(shape=(self.n_sub,), dtype=dt_int) - self.max_timestep_topology_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_SUB + self._times_before_topology_actionable = np.zeros(shape=(self.n_sub,), dtype=dt_int) + self._max_timestep_topology_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_SUB # hazard (not used outside of this class, information is given in `times_before_line_status_actionable` self._hazard_duration = np.zeros(shape=(self.n_line,), dtype=dt_int) # hard overflow part - self.hard_overflow_threshold = self.parameters.HARD_OVERFLOW_THRESHOLD - self.env_dc = self.parameters.ENV_DC - - # Remember lines last bus - self.last_bus_line_or = np.full(shape=self.n_line, fill_value=1, dtype=dt_int) - self.last_bus_line_ex = np.full(shape=self.n_line, fill_value=1, dtype=dt_int) + self._hard_overflow_threshold = self.parameters.HARD_OVERFLOW_THRESHOLD + self._env_dc = self.parameters.ENV_DC # initialize maintenance / hazards - self.time_next_maintenance = np.full(self.n_line, -1, dtype=dt_int) - self.duration_next_maintenance = np.zeros(shape=(self.n_line,), dtype=dt_int) - self.times_before_line_status_actionable = np.full(shape=(self.n_line,), fill_value=0, dtype=dt_int) + self._time_next_maintenance = np.full(self.n_line, -1, dtype=dt_int) + self._duration_next_maintenance = np.zeros(shape=(self.n_line,), dtype=dt_int) + self._times_before_line_status_actionable = np.full(shape=(self.n_line,), fill_value=0, dtype=dt_int) # create the vector to the proper shape - self.target_dispatch = np.zeros(self.n_gen, dtype=dt_float) - self.actual_dispatch = np.zeros(self.n_gen, dtype=dt_float) - self.gen_uptime = np.zeros(self.n_gen, dtype=dt_int) - self.gen_downtime = np.zeros(self.n_gen, dtype=dt_int) - self.gen_activeprod_t = np.zeros(self.n_gen, dtype=dt_float) - self.gen_activeprod_t_redisp = np.zeros(self.n_gen, dtype=dt_float) + self._target_dispatch = np.zeros(self.n_gen, dtype=dt_float) + self._actual_dispatch = np.zeros(self.n_gen, dtype=dt_float) + self._gen_uptime = np.zeros(self.n_gen, dtype=dt_int) + self._gen_downtime = np.zeros(self.n_gen, dtype=dt_int) + self._gen_activeprod_t = np.zeros(self.n_gen, dtype=dt_float) + self._gen_activeprod_t_redisp = np.zeros(self.n_gen, dtype=dt_float) self._reset_redispatching() self.__is_init = True def reset(self): + """ + Reset the base environment (set the appropriate variables to correct initialization). + It is (and must be) overloaded in other :class:`grid2op.Environment` + """ self.__is_init = True self.current_obs = None @@ -350,6 +440,23 @@ def seed(self, seed=None): seed_opponent: ``tuple`` The seed used to set the prng for the opponent (if any otherwise ``None``) + Examples + --------- + + Seeding an environment should be done with: + + .. code-block:: python + + import grid2op + env = grid2op.make() + env.seed(0) + obs = env.reset() + + As long as the environment instance (variable `env` in the above code) is not `reset` the `env.seed` has no + real effect (but can have side effect). + + For a full control on the seed mechanism it is more than advised to reset it after it has been seeded. + """ try: seed = np.array(seed).astype(dt_int) @@ -371,21 +478,21 @@ def seed(self, seed=None): if self.chronics_handler is not None: seed = self.space_prng.randint(max_int) seed_chron = self.chronics_handler.seed(seed) - if self.helper_observation is not None: + if self._helper_observation is not None: seed = self.space_prng.randint(max_int) - seed_obs = self.helper_observation.seed(seed) - if self.helper_action_player is not None: + seed_obs = self._helper_observation.seed(seed) + if self._helper_action_player is not None: seed = self.space_prng.randint(max_int) - seed_action_space = self.helper_action_player.seed(seed) - if self.helper_action_env is not None: + seed_action_space = self._helper_action_player.seed(seed) + if self._helper_action_env is not None: seed = self.space_prng.randint(max_int) - seed_env_modif = self.helper_action_env.seed(seed) - if self.voltage_controler is not None: + seed_env_modif = self._helper_action_env.seed(seed) + if self._voltage_controler is not None: seed = self.space_prng.randint(max_int) - seed_volt_cont = self.voltage_controler.seed(seed) - if self.opponent is not None: + seed_volt_cont = self._voltage_controler.seed(seed) + if self._opponent is not None: seed = self.space_prng.randint(max_int) - seed_opponent = self.opponent.seed(seed) + seed_opponent = self._opponent.seed(seed) return (seed, seed_chron, seed_obs, seed_action_space, seed_env_modif, seed_volt_cont, seed_opponent) def deactivate_forecast(self): @@ -410,6 +517,8 @@ def deactivate_forecast(self): # tell grid2op not to read the "forecast" env = grid2op.make("rte_case14_realistic", data_feeding_kwargs={"gridvalueClass": GridStateFromFile}) + do_nothing_action = env.action_space() + # improve speed ups to not even try to use forecast env.deactivate_forecast() @@ -417,11 +526,11 @@ def deactivate_forecast(self): obs = env.reset() # but this will make the programm stop working - # obs.simulate() # DO NOT RUN IT RAISES AN ERROR + # obs.simulate(do_nothing_action) # DO NOT RUN IT RAISES AN ERROR """ - if self.helper_observation is not None: - self.helper_observation.with_forecast = False + if self._helper_observation is not None: + self._helper_observation.with_forecast = False self.with_forecast = False def reactivate_forecast(self): @@ -430,15 +539,51 @@ def reactivate_forecast(self): in the observation space. This will most likely lead to some performance decrease but you will be able to use `obs.simulate` function. + + Notes + ------ + You can use this function as followed: + + .. code-block:: python + + import grid2op + from grid2op.Chronics import GridStateFromFile + # tell grid2op not to read the "forecast" + env = grid2op.make("rte_case14_realistic", data_feeding_kwargs={"gridvalueClass": GridStateFromFile}) + + do_nothing_action = env.action_space() + + # improve speed ups to not even try to use forecast + env.deactivate_forecast() + + # this is normal behavior + obs = env.reset() + + # but this will make the programm stop working + # obs.simulate(do_nothing_action) # DO NOT RUN IT RAISES AN ERROR + + env.reactivate_forecast() + obs, reward, done, info = env.step(do_nothing_action) + + # and now forecast are available again + simobs, sim_r, sim_d, sim_info = obs.simulate(do_nothing_action) + """ - if self.helper_observation is not None: - self.helper_observation.with_forecast = True + if self._helper_observation is not None: + self._helper_observation.with_forecast = True self.with_forecast = True @abstractmethod - def init_backend(self, init_grid_path, chronics_handler, backend, - names_chronics_to_backend, actionClass, observationClass, - rewardClass, legalActClass): + def _init_backend(self, init_grid_path, chronics_handler, backend, + names_chronics_to_backend, actionClass, observationClass, + rewardClass, legalActClass): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This method is used for Environment specific implementation. Only use it if you know exactly what + you are doing. + + """ pass def set_thermal_limit(self, thermal_limit): @@ -450,6 +595,23 @@ def set_thermal_limit(self, thermal_limit): thermal_limit: ``numpy.ndarray`` The new thermal limit. It must be a numpy ndarray vector (or convertible to it). For each powerline it gives the new thermal limit. + + Examples + --------- + + This function can be used like this: + + .. code-block:: python + + import grid2op + + # I create an environment + env = grid2op.make("rte_case5_example", test=True) + + # i set the thermal limit of each powerline to 20000 amps + env.set_thermal_limit([20000 for _ in range(env.n_line)]) + + """ if not self.__is_init: raise Grid2OpException("Impossible to set the thermal limit to a non initialized Environment") @@ -468,16 +630,16 @@ def set_thermal_limit(self, thermal_limit): def _reset_redispatching(self): # redispatching - self.target_dispatch[:] = 0. - self.actual_dispatch[:] = 0. - self.gen_uptime[:] = 0 - self.gen_downtime[:] = 0 - self.gen_activeprod_t[:] = 0. - self.gen_activeprod_t_redisp[:] = 0. + self._target_dispatch[:] = 0. + self._actual_dispatch[:] = 0. + self._gen_uptime[:] = 0 + self._gen_downtime[:] = 0 + self._gen_activeprod_t[:] = 0. + self._gen_activeprod_t_redisp[:] = 0. def _get_new_prod_setpoint(self, action): # get the modification of generator active setpoint from the action - new_p = 1. * self.gen_activeprod_t + new_p = 1. * self._gen_activeprod_t if "prod_p" in action._dict_inj: tmp = action._dict_inj["prod_p"] indx_ok = np.isfinite(tmp) @@ -485,9 +647,9 @@ def _get_new_prod_setpoint(self, action): # modification of the environment always override the modification of the agents (if any) # TODO have a flag there if this is the case. - if "prod_p" in self.env_modification._dict_inj: + if "prod_p" in self._env_modification._dict_inj: # modification of the production setpoint value - tmp = self.env_modification._dict_inj["prod_p"] + tmp = self._env_modification._dict_inj["prod_p"] indx_ok = np.isfinite(tmp) new_p[indx_ok] = tmp[indx_ok] return new_p @@ -500,48 +662,48 @@ def _make_redisp(self, action, new_p): # get the redispatching action (if any) redisp_act_orig = 1. * action._redispatch - previous_redisp = 1. * self.actual_dispatch + previous_redisp = 1. * self._actual_dispatch - if np.all(redisp_act_orig == 0.) and np.all(self.target_dispatch == 0.) and np.all(self.actual_dispatch == 0.): + if np.all(redisp_act_orig == 0.) and np.all(self._target_dispatch == 0.) and np.all(self._actual_dispatch == 0.): return valid, except_, info_ # I update the target dispatch of generator i have never modified - already_modified_gen = self.target_dispatch != 0. - self.target_dispatch[already_modified_gen] += redisp_act_orig[already_modified_gen] + already_modified_gen = self._target_dispatch != 0. + self._target_dispatch[already_modified_gen] += redisp_act_orig[already_modified_gen] first_modified = (~already_modified_gen) & (redisp_act_orig != 0) - self.target_dispatch[first_modified] = self.actual_dispatch[first_modified] + redisp_act_orig[first_modified] + self._target_dispatch[first_modified] = self._actual_dispatch[first_modified] + redisp_act_orig[first_modified] already_modified_gen |= first_modified # check that everything is consistent with pmin, pmax: - if np.any(self.target_dispatch > self.gen_pmax - self.gen_pmin): + if np.any(self._target_dispatch > self.gen_pmax - self.gen_pmin): # action is invalid, the target redispatching would be above pmax for at least a generator - cond_invalid = self.target_dispatch > self.gen_pmax - self.gen_pmin + cond_invalid = self._target_dispatch > self.gen_pmax - self.gen_pmin except_ = InvalidRedispatching("You cannot ask for a dispatch higher than pmax - pmin [it would be always " "invalid because, even if the sepoint is pmin, this dispatch would set it " "to a number higher than pmax, which is impossible]. Invalid dispatch for " "generator(s): " "{}".format(np.where(cond_invalid)[0])) - self.target_dispatch -= redisp_act_orig + self._target_dispatch -= redisp_act_orig return valid, except_, info_ - if np.any(self.target_dispatch < self.gen_pmin - self.gen_pmax): + if np.any(self._target_dispatch < self.gen_pmin - self.gen_pmax): # action is invalid, the target redispatching would be below pmin for at least a generator - cond_invalid = self.target_dispatch < self.gen_pmin - self.gen_pmax + cond_invalid = self._target_dispatch < self.gen_pmin - self.gen_pmax except_ = InvalidRedispatching("You cannot ask for a dispatch lower than pmin - pmax [it would be always " "invalid because, even if the sepoint is pmax, this dispatch would set it " "to a number bellow pmin, which is impossible]. Invalid dispatch for " "generator(s): " "{}".format(np.where(cond_invalid)[0])) - self.target_dispatch -= redisp_act_orig + self._target_dispatch -= redisp_act_orig return valid, except_, info_ # i can't redispatch turned off generators [turned off generators need to be turned on before redispatching] - if np.any(redisp_act_orig[new_p == 0.]) and self.forbid_dispatch_off: + if np.any(redisp_act_orig[new_p == 0.]) and self._forbid_dispatch_off: # action is invalid, a generator has been redispatched, but it's turned off except_ = InvalidRedispatching("Impossible to dispatch a turned off generator") - self.target_dispatch -= redisp_act_orig + self._target_dispatch -= redisp_act_orig return valid, except_, info_ - if self.forbid_dispatch_off is True: + if self._forbid_dispatch_off is True: redisp_act_orig_cut = 1.0 * redisp_act_orig redisp_act_orig_cut[new_p == 0.] = 0. if np.any(redisp_act_orig_cut != redisp_act_orig): @@ -550,9 +712,9 @@ def _make_redisp(self, action, new_p): else: redisp_act_orig_cut = redisp_act_orig - mismatch = self.actual_dispatch - self.target_dispatch + mismatch = self._actual_dispatch - self._target_dispatch mismatch = np.abs(mismatch) - if np.abs(np.sum(self.actual_dispatch)) >= self._tol_poly or \ + if np.abs(np.sum(self._actual_dispatch)) >= self._tol_poly or \ np.sum(mismatch) >= self._tol_poly: except_ = self._compute_dispatch_vect(already_modified_gen, new_p) valid = except_ is None @@ -561,11 +723,11 @@ def _make_redisp(self, action, new_p): def _compute_dispatch_vect(self, already_modified_gen, new_p): except_ = None # first i define the participating generator - gen_participating = (new_p > 0.) | (self.actual_dispatch != 0.) | (self.target_dispatch != self.actual_dispatch) + gen_participating = (new_p > 0.) | (self._actual_dispatch != 0.) | (self._target_dispatch != self._actual_dispatch) gen_participating[~self.gen_redispatchable] = False # define the objective value - target_vals = self.target_dispatch[gen_participating] - self.actual_dispatch[gen_participating] + target_vals = self._target_dispatch[gen_participating] - self._actual_dispatch[gen_participating] already_modified_gen_me = already_modified_gen[gen_participating] target_vals_me = target_vals[already_modified_gen_me] nb_dispatchable = np.sum(gen_participating) @@ -592,30 +754,24 @@ def jac(actual_dispatchable): (actual_dispatchable[already_modified_gen_me] - target_vals_me) return res - # hessian is not used for the optimization method - # hess_mat = np.zeros((nb_dispatchable, nb_dispatchable)) - # hess_mat[already_modified_gen_me, already_modified_gen_me] = 2.0 * weights[already_modified_gen_me] - # def hess(actual_dispatchable): - # return hess_mat - mat_sum_0_no_turn_on = np.ones((1, nb_dispatchable)) const_sum_O_no_turn_on = np.zeros(1) equality_const = LinearConstraint(mat_sum_0_no_turn_on, const_sum_O_no_turn_on, const_sum_O_no_turn_on) # gen increase in the chronics - incr_in_chronics = new_p - (self.gen_activeprod_t_redisp - self.actual_dispatch) + incr_in_chronics = new_p - (self._gen_activeprod_t_redisp - self._actual_dispatch) # minmum value available for disp ## first limit delta because of pmin - p_min_const = self.gen_pmin[gen_participating] - new_p[gen_participating] - self.actual_dispatch[ + p_min_const = self.gen_pmin[gen_participating] - new_p[gen_participating] - self._actual_dispatch[ gen_participating] ## second limit delta because of ramps ramp_down_const = -self.gen_max_ramp_down[gen_participating] - incr_in_chronics[gen_participating] min_disp = np.maximum(p_min_const, ramp_down_const) # maximum value available for disp ## first limit delta because of pmin - p_max_const = self.gen_pmax[gen_participating] - new_p[gen_participating] - self.actual_dispatch[ + p_max_const = self.gen_pmax[gen_participating] - new_p[gen_participating] - self._actual_dispatch[ gen_participating] ## second limit delta because of ramps ramp_up_const = self.gen_max_ramp_up[gen_participating] - incr_in_chronics[gen_participating] @@ -642,15 +798,18 @@ def f(init): return res res = f(x0) if res.success: - self.actual_dispatch[gen_participating] += res.x + self._actual_dispatch[gen_participating] += res.x else: except_ = InvalidRedispatching("Redispatching automaton terminated with error:\n{}".format(res.message)) return except_ def _update_actions(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Retrieve the actions to perform the update of the underlying powergrid represented by - the :class:`grid2op.Backend`in the next time step. + the :class:`grid2op.Backend`in the next time step. + A call to this function will also read the next state of :attr:`chronics_handler`, so it must be called only once per time step. @@ -673,19 +832,21 @@ def _update_actions(self): else: self._hazards = None self.time_stamp = timestamp - self.duration_next_maintenance = maintenance_duration - self.time_next_maintenance = maintenance_time + self._duration_next_maintenance = maintenance_duration + self._time_next_maintenance = maintenance_time self._hazard_duration = hazard_duration - return self.helper_action_env({"injection": self._injection, "maintenance": self._maintenance, + return self._helper_action_env({"injection": self._injection, "maintenance": self._maintenance, "hazards": self._hazards}), prod_v def _update_time_reconnection_hazards_maintenance(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This supposes that :attr:`Environment.times_before_line_status_actionable` is already updated with the cascading failure, soft overflow and hard overflow. It also supposes that :func:`Environment._update_actions` has been called, so that the vectors - :attr:`Environment.duration_next_maintenance`, :attr:`Environment.time_next_maintenance` and + :attr:`Environment.duration_next_maintenance`, :attr:`Environment._time_next_maintenance` and :attr:`Environment._hazard_duration` are updated with the most recent values. Finally the Environment supposes that this method is called before calling :func:`Environment.get_obs` This function integrates the hazards and maintenance in the @@ -697,15 +858,19 @@ def _update_time_reconnection_hazards_maintenance(self): above paragraph) For this Environment, we suppose that the maximum of the 3 values are taken into account. The reality would be more complicated. + """ - first_time_maintenance = self.time_next_maintenance == 0 - self.times_before_line_status_actionable[first_time_maintenance] = np.maximum(self.times_before_line_status_actionable[first_time_maintenance], - self.duration_next_maintenance[first_time_maintenance]) - self.times_before_line_status_actionable[:] = np.maximum(self.times_before_line_status_actionable, + first_time_maintenance = self._time_next_maintenance == 0 + self._times_before_line_status_actionable[first_time_maintenance] = np.maximum( + self._times_before_line_status_actionable[first_time_maintenance], + self._duration_next_maintenance[first_time_maintenance]) + self._times_before_line_status_actionable[:] = np.maximum(self._times_before_line_status_actionable, self._hazard_duration) def _voltage_control(self, agent_action, prod_v_chronics): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Update the environment action "action_env" given a possibly new voltage setpoint for the generators. This function can be overide for a more complex handling of the voltages. @@ -720,21 +885,26 @@ def _voltage_control(self, agent_action, prod_v_chronics): The voltages that has been specified in the chronics """ - res = self.helper_action_env() + res = self._helper_action_env() if prod_v_chronics is not None: res.update({"injection": {"prod_v": prod_v_chronics}}) return res def _handle_updown_times(self, gen_up_before, redisp_act): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Handles the up and down tims for the generators. + """ # get the generators that are not connected after the action except_ = None # computes which generator will be turned on after the action - gen_up_after = 1.0 * self.gen_activeprod_t - if "prod_p" in self.env_modification._dict_inj: - tmp = self.env_modification._dict_inj["prod_p"] + gen_up_after = 1.0 * self._gen_activeprod_t + if "prod_p" in self._env_modification._dict_inj: + tmp = self._env_modification._dict_inj["prod_p"] indx_ok = np.isfinite(tmp) - gen_up_after[indx_ok] = self.env_modification._dict_inj["prod_p"][indx_ok] + gen_up_after[indx_ok] = self._env_modification._dict_inj["prod_p"][indx_ok] gen_up_after += redisp_act gen_up_after = gen_up_after > 0. @@ -744,32 +914,32 @@ def _handle_updown_times(self, gen_up_before, redisp_act): gen_still_connected = gen_up_before & gen_up_after gen_still_disconnected = (~gen_up_before) & (~gen_up_after) - if np.any(self.gen_downtime[gen_connected_this_timestep] < self.gen_min_downtime[gen_connected_this_timestep]) \ - and not self.ignore_min_up_down_times: + if np.any(self._gen_downtime[gen_connected_this_timestep] < self.gen_min_downtime[gen_connected_this_timestep]) \ + and not self._ignore_min_up_down_times: # i reconnected a generator before the minimum time allowed - id_gen = self.gen_downtime[gen_connected_this_timestep] < self.gen_min_downtime[gen_connected_this_timestep] + id_gen = self._gen_downtime[gen_connected_this_timestep] < self.gen_min_downtime[gen_connected_this_timestep] id_gen = np.where(id_gen)[0] id_gen = np.where(gen_connected_this_timestep[id_gen])[0] except_ = GeneratorTurnedOnTooSoon("Some generator has been connected too early ({})".format(id_gen)) return except_ else: - self.gen_downtime[gen_connected_this_timestep] = -1 - self.gen_uptime[gen_connected_this_timestep] = 1 + self._gen_downtime[gen_connected_this_timestep] = -1 + self._gen_uptime[gen_connected_this_timestep] = 1 - if np.any(self.gen_uptime[gen_disconnected_this] < self.gen_min_uptime[gen_disconnected_this]) and \ - not self.ignore_min_up_down_times: + if np.any(self._gen_uptime[gen_disconnected_this] < self.gen_min_uptime[gen_disconnected_this]) and \ + not self._ignore_min_up_down_times: # i disconnected a generator before the minimum time allowed - id_gen = self.gen_uptime[gen_disconnected_this] < self.gen_min_uptime[gen_disconnected_this] + id_gen = self._gen_uptime[gen_disconnected_this] < self.gen_min_uptime[gen_disconnected_this] id_gen = np.where(id_gen)[0] id_gen = np.where(gen_connected_this_timestep[id_gen])[0] except_ = GeneratorTurnedOffTooSoon("Some generator has been disconnected too early ({})".format(id_gen)) return except_ else: - self.gen_downtime[gen_connected_this_timestep] = 0 - self.gen_uptime[gen_connected_this_timestep] = 1 + self._gen_downtime[gen_connected_this_timestep] = 0 + self._gen_uptime[gen_connected_this_timestep] = 1 - self.gen_uptime[gen_still_connected] += 1 - self.gen_downtime[gen_still_disconnected] += 1 + self._gen_uptime[gen_still_connected] += 1 + self._gen_downtime[gen_still_disconnected] += 1 return except_ def get_obs(self): @@ -780,13 +950,48 @@ def get_obs(self): ------- res: :class:`grid2op.Observation.Observation` The current BaseObservation given to the :class:`grid2op.BaseAgent.BaseAgent` / bot / controler. + + Examples + --------- + + This function can be use at any moment, even if the actual observation is not present. + + .. code-block:: python + + import grid2op + + # I create an environment + env = grid2op.make() + + obs = env.reset() + + # have a big piece of code + obs2 = env.get_obs() + + # obs2 and obs are identical. + """ - res = self.helper_observation(env=self) + res = self._helper_observation(env=self) return res def get_thermal_limit(self): """ - get the current thermal limit in amps + Get the current thermal limit in amps registered for the environment. + + Examples + --------- + + It can be used like this: + + .. code-block:: python + + import grid2op + + # I create an environment + env = grid2op.make() + + thermal_limits = env.get_thermal_limit() + """ return 1.0 * self._thermal_limit_a @@ -824,13 +1029,46 @@ def step(self, action): due to overflow - "is_illegal" (``bool``) whether the action given as input was illegal - "is_ambiguous" (``bool``) whether the action given as input was ambiguous. - - "is_illegal_redisp" (``bool``) was the action illegal due to redispatching + - "is_dispatching_illegal" (``bool``) was the action illegal due to redispatching - "is_illegal_reco" (``bool``) was the action illegal due to a powerline reconnection - - "exception" (``list`` of :class:`Exceptions.Exceptions.Grid2OpException` if an exception was raised - or ``[]`` if everything was fine.) + - "opponent_attack_line" (``np.ndarray``, ``bool``) for each powerline, say if the opponent + attacked it (``True``) or not (``False``). + - "opponent_attack_sub" (``np.ndarray``, ``bool``) for each substation, say if the opponent + attacked it (``True``) or not (``False``). + - "opponent_attack_duration" (``int``) the duration of the current attack (if any) + - "exception" (``list`` of :class:`Exceptions.Exceptions.Grid2OpException` if an exception was + raised or ``[]`` if everything was fine.) + - "detailed_infos_for_cascading_failures" (optional, only if the backend has been create with + `detailed_infos_for_cascading_failures=True`) the list of the intermediate steps computed during + the simulation of the "cascading failures". + + Examples + --------- + + As any openAI gym environment, this is used like: + + .. code-block:: python + + import grid2op + from grid2op.Agent import RandomAgent + + # I create an environment + env = grid2op.make() + + # define an agent here, this is an example + agent = RandomAgent(env.action_space) + + # environment need to be "reset" before usage: + obs = env.reset() + reward = env.reward_range[0] + done = False + + # now run through each steps like this + while not done: + action = agent.act(obs, reward, done) + obs, reward, done, info = env.step(action) """ - # TODO update the documentation if not self.__is_init: raise Grid2OpException("Impossible to make a step with a non initialized backend. Have you called " @@ -853,49 +1091,49 @@ def step(self, action): init_line_status = copy.deepcopy(self.backend.get_line_status()) try: beg_ = time.time() - is_legal, reason = self.game_rules(action=action, env=self) + is_legal, reason = self._game_rules(action=action, env=self) if not is_legal: # action is replace by do nothing - action = self.helper_action_player({}) + action = self._helper_action_player({}) except_.append(reason) is_illegal = True ambiguous, except_tmp = action.is_ambiguous() if ambiguous: # action is replace by do nothing - action = self.helper_action_player({}) + action = self._helper_action_player({}) is_ambiguous = True except_.append(except_tmp) # get the modification of generator active setpoint from the environment - self.env_modification, prod_v_chronics = self._update_actions() - self.env_modification._single_act = False # because it absorbs all redispatching actions + self._env_modification, prod_v_chronics = self._update_actions() + self._env_modification._single_act = False # because it absorbs all redispatching actions new_p = self._get_new_prod_setpoint(action) if self.redispatching_unit_commitment_availble: # remember generator that were "up" before the action - gen_up_before = self.gen_activeprod_t > 0. + gen_up_before = self._gen_activeprod_t > 0. # compute the redispatching and the new productions active setpoint valid_disp, except_tmp, info_ = self._make_redisp(action, new_p) if not valid_disp: # game over case - action = self.helper_action_player({}) + action = self._helper_action_player({}) is_illegal_redisp = True except_.append(except_tmp) is_done = True except_.append("Game over due to infeasible redispatching state. A generator would " "\"behave abnormally\" in a real system.") if except_tmp is not None: - action = self.helper_action_player({}) + action = self._helper_action_player({}) is_illegal_redisp = True except_.append(except_tmp) # check the validity of min downtime and max uptime - except_tmp = self._handle_updown_times(gen_up_before, self.actual_dispatch) + except_tmp = self._handle_updown_times(gen_up_before, self._actual_dispatch) if except_tmp is not None: is_illegal_reco = True - action = self.helper_action_player({}) + action = self._helper_action_player({}) except_.append(except_tmp) # make sure the dispatching action is not implemented "as is" by the backend. @@ -904,8 +1142,8 @@ def step(self, action): self._backend_action += action action._redispatch[:] = init_disp - self._backend_action += self.env_modification - self._backend_action.set_redispatch(self.actual_dispatch) + self._backend_action += self._env_modification + self._backend_action.set_redispatch(self._actual_dispatch) # now get the new generator voltage setpoint voltage_control_act = self._voltage_control(action, prod_v_chronics) @@ -915,17 +1153,17 @@ def step(self, action): # TODO code the opponent part here and split more the timings! here "opponent time" is # included in time_apply_act tick = time.time() - attack, attack_duration = self.oppSpace.attack(observation=self.current_obs, - agent_action=action, - env_action=self.env_modification) + attack, attack_duration = self._oppSpace.attack(observation=self.current_obs, + agent_action=action, + env_action=self._env_modification) if attack is not None: # the opponent choose to attack # i update the "cooldown" on these things lines_attacked, subs_attacked = attack.get_topological_impact() - self.times_before_line_status_actionable[lines_attacked] = \ - np.maximum(attack_duration, self.times_before_line_status_actionable[lines_attacked]) - self.times_before_topology_actionable[subs_attacked] = \ - np.maximum(attack_duration, self.times_before_topology_actionable[subs_attacked]) + self._times_before_line_status_actionable[lines_attacked] = \ + np.maximum(attack_duration, self._times_before_line_status_actionable[lines_attacked]) + self._times_before_topology_actionable[subs_attacked] = \ + np.maximum(attack_duration, self._times_before_topology_actionable[subs_attacked]) self._backend_action += attack self._time_opponent += time.time() - tick self.backend.apply_action(self._backend_action) @@ -936,7 +1174,7 @@ def step(self, action): try: # compute the next _grid state beg_ = time.time() - disc_lines, detailed_info, conv_ = self.backend.next_grid_state(env=self, is_dc=self.env_dc) + disc_lines, detailed_info, conv_ = self.backend.next_grid_state(env=self, is_dc=self._env_dc) self._time_powerflow += time.time() - beg_ if conv_ is None: beg_ = time.time() @@ -945,40 +1183,40 @@ def step(self, action): self._backend_action.update_state(disc_lines) # one timestep passed, i can maybe reconnect some lines - self.times_before_line_status_actionable[self.times_before_line_status_actionable > 0] -= 1 + self._times_before_line_status_actionable[self._times_before_line_status_actionable > 0] -= 1 # update the vector for lines that have been disconnected - self.times_before_line_status_actionable[disc_lines] = int(self.parameters.NB_TIMESTEP_RECONNECTION) + self._times_before_line_status_actionable[disc_lines] = int(self.parameters.NB_TIMESTEP_RECONNECTION) self._update_time_reconnection_hazards_maintenance() # for the powerline that are on overflow, increase this time step - self.timestep_overflow[overflow_lines] += 1 + self._timestep_overflow[overflow_lines] += 1 # set to 0 the number of timestep for lines that are not on overflow - self.timestep_overflow[~overflow_lines] = 0 + self._timestep_overflow[~overflow_lines] = 0 # build the topological action "cooldown" aff_lines, aff_subs = action.get_topological_impact(init_line_status) - if self.max_timestep_line_status_deactivated > 0: + if self._max_timestep_line_status_deactivated > 0: # i update the cooldown only when this does not impact the line disconnected for the # opponent or by maitnenance for example cond = aff_lines # powerlines i modified # powerlines that are not affected by any other "forced disconnection" - cond &= self.times_before_line_status_actionable < self.max_timestep_line_status_deactivated - self.times_before_line_status_actionable[cond] = self.max_timestep_line_status_deactivated - if self.max_timestep_topology_deactivated > 0: - self.times_before_topology_actionable[self.times_before_topology_actionable > 0] -= 1 - self.times_before_topology_actionable[aff_subs] = self.max_timestep_topology_deactivated + cond &= self._times_before_line_status_actionable < self._max_timestep_line_status_deactivated + self._times_before_line_status_actionable[cond] = self._max_timestep_line_status_deactivated + if self._max_timestep_topology_deactivated > 0: + self._times_before_topology_actionable[self._times_before_topology_actionable > 0] -= 1 + self._times_before_topology_actionable[aff_subs] = self._max_timestep_topology_deactivated # build the observation self.current_obs = self.get_obs() self._time_extract_obs += time.time() - beg_ # extract production active value at this time step (should be independant of action class) - self.gen_activeprod_t[:], *_ = self.backend.generators_info() + self._gen_activeprod_t[:], *_ = self.backend.generators_info() # problem with the gen_activeprod_t above, is that the slack bus absorbs alone all the losses # of the system. So basically, when it's too high (higher than the ramp) it can # mess up the rest of the environment - self.gen_activeprod_t_redisp[:] = new_p + self.actual_dispatch + self._gen_activeprod_t_redisp[:] = new_p + self._actual_dispatch has_error = False except Grid2OpException as e: except_.append(e) @@ -1017,7 +1255,7 @@ def step(self, action): return self.current_obs, self.current_reward, self.done, infos def _get_reward(self, action, has_error, is_done, is_illegal, is_ambiguous): - res = self.reward_helper(action, self, has_error, is_done, is_illegal, is_ambiguous) + res = self._reward_helper(action, self, has_error, is_done, is_illegal, is_ambiguous) other_rewards = {k: v(action, self, has_error, is_done, is_illegal, is_ambiguous) for k, v in self.other_rewards.items() } @@ -1025,9 +1263,11 @@ def _get_reward(self, action, has_error, is_done, is_illegal, is_ambiguous): def get_reward_instance(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Returns the instance of the object that is used to compute the reward. """ - return self.reward_helper.template_reward + return self._reward_helper.template_reward def _is_done(self, has_error, is_done): no_more_data = self.chronics_handler.done() @@ -1035,21 +1275,23 @@ def _is_done(self, has_error, is_done): def _reset_vectors_and_timings(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Maintenance are not reset, otherwise the data are not read properly (skip the first time step) """ - self.no_overflow_disconnection = self.parameters.NO_OVERFLOW_DISCONNECTION - self.timestep_overflow[:] = 0 - self.nb_timestep_overflow_allowed[:] = self.parameters.NB_TIMESTEP_OVERFLOW_ALLOWED + self._no_overflow_disconnection = self.parameters.NO_OVERFLOW_DISCONNECTION + self._timestep_overflow[:] = 0 + self._nb_timestep_overflow_allowed[:] = self.parameters.NB_TIMESTEP_OVERFLOW_ALLOWED self.nb_time_step = 0 - self.hard_overflow_threshold = self.parameters.HARD_OVERFLOW_THRESHOLD - self.env_dc = self.parameters.ENV_DC + self._hard_overflow_threshold = self.parameters.HARD_OVERFLOW_THRESHOLD + self._env_dc = self.parameters.ENV_DC - self.times_before_line_status_actionable[:] = 0 - self.max_timestep_line_status_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_LINE + self._times_before_line_status_actionable[:] = 0 + self._max_timestep_line_status_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_LINE - self.times_before_topology_actionable[:] = 0 - self.max_timestep_topology_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_SUB + self._times_before_topology_actionable[:] = 0 + self._max_timestep_topology_deactivated = self.parameters.NB_TIMESTEP_COOLDOWN_SUB # reset timings self._time_apply_act = 0 @@ -1062,8 +1304,8 @@ def _reset_vectors_and_timings(self): self.done = False def _reset_maintenance(self): - self.time_next_maintenance[:] = -1 - self.duration_next_maintenance[:] = 0 + self._time_next_maintenance[:] = -1 + self._duration_next_maintenance[:] = 0 def __enter__(self): """ @@ -1099,7 +1341,11 @@ def close(self): if self.viewer is not None: self.viewer = None self.viewer_fig = None - self.backend.close() + + if self.backend is not None: + self.backend.close() + self.backend = None + self.__is_init = False def attach_layout(self, grid_layout): """ @@ -1108,10 +1354,25 @@ def attach_layout(self, grid_layout): Parameters ---------- - grid_layout + grid_layout: ``dict`` + The layout of the grid (*i.e* the coordinates (x,y) of all substations). The keys + should be the substation names, and the values a tuple (with two float) representing + the coordinate of the substation. - Returns - ------- + Examples + --------- + Here is an example on how to attach a layout for an environment: + + .. code-block:: python + + import grid2op + + # create the environment + env = grid2op.make() + + # assign coordinates (0., 0.) to all substations (this is a dummy thing to do here!) + layout = {sub_name: (0., 0.) for sub_name in env.name_sub} + env.attach_layout(layout) """ if isinstance(grid_layout, dict): @@ -1119,7 +1380,7 @@ def attach_layout(self, grid_layout): elif isinstance(grid_layout, list): grid_layout = {k: v for k, v in zip(self.name_sub, grid_layout)} else: - raise EnvError("Attempt to set a layout from something different than a dictionnary or a list. " + raise EnvError("Attempt to set a layout from something different than a dictionary or a list. " "This is for now not supported.") if self.__is_init: @@ -1130,7 +1391,7 @@ def attach_layout(self, grid_layout): "".format(el)) tmp = grid_layout[el] try: - x,y = tmp + x, y = tmp x = dt_float(x) y = dt_float(y) res[el] = (x, y) @@ -1139,16 +1400,16 @@ def attach_layout(self, grid_layout): "that will be used the grid layout. The error is: \"{}\"" "".format(el, e_)) super().attach_layout(res) - if self.helper_action_player is not None: - self.helper_action_player.attach_layout(res) - if self.helper_action_env is not None: - self.helper_action_env.attach_layout(res) - if self.helper_observation is not None: - self.helper_observation.attach_layout(res) - if self.voltage_controler is not None: - self.voltage_controler.attach_layout(res) - if self.opponent_action_space is not None: - self.opponent_action_space.attach_layout(res) + if self._helper_action_player is not None: + self._helper_action_player.attach_layout(res) + if self._helper_action_env is not None: + self._helper_action_env.attach_layout(res) + if self._helper_observation is not None: + self._helper_observation.attach_layout(res) + if self._voltage_controler is not None: + self._voltage_controler.attach_layout(res) + if self._opponent_action_space is not None: + self._opponent_action_space.attach_layout(res) def fast_forward_chronics(self, nb_timestep): """ @@ -1159,12 +1420,40 @@ def fast_forward_chronics(self, nb_timestep): 00:00). This can lead to suboptimal exploration, as during this phase, only a few time steps are managed by the agent, so in general these few time steps will correspond to grid state around Jan 1st at 00:00. - Parameters ---------- nb_timestep: ``int`` Number of time step to "fast forward" + Examples + --------- + This can be used like this: + + .. code-block:: python + + import grid2op + + # create the environment + env = grid2op.make() + + # skip the first 150 steps of the chronics + env.fast_forward_chronics(150) + done = env.is_done + if not done: + obs = env.get_obs() + # do something + else: + # there was a "game over" + # you need to reset the env (which will "cancel" the fast_forward) + pass + # do something else + + Notes + ----- + This method can set the state of the environment in a 'game over' state (`done=True`) for example if the + chronics last `xxx` time steps and you ask to "fast foward" more than `xxx` steps. This is why we advise to + check the state of the environment after the call to this method if you use it (see the "Examples" paragaph) + """ # Go to the timestep requested minus one nb_timestep = max(1, nb_timestep - 1) @@ -1174,16 +1463,22 @@ def fast_forward_chronics(self, nb_timestep): # Update the timing vectors min_time_line_reco = np.zeros(self.n_line, dtype=dt_int) min_time_topo = np.zeros(self.n_sub, dtype=dt_int) - ff_time_line_act = self.times_before_line_status_actionable - nb_timestep - ff_time_topo_act = self.times_before_topology_actionable - nb_timestep - self.times_before_line_status_actionable[:] = np.maximum(ff_time_line_act, min_time_line_reco) - self.times_before_topology_actionable[:] = np.maximum(ff_time_topo_act, min_time_topo) + ff_time_line_act = self._times_before_line_status_actionable - nb_timestep + ff_time_topo_act = self._times_before_topology_actionable - nb_timestep + self._times_before_line_status_actionable[:] = np.maximum(ff_time_line_act, min_time_line_reco) + self._times_before_topology_actionable[:] = np.maximum(ff_time_topo_act, min_time_topo) # Update to the fast forward state using a do nothing action - self.step(self.helper_action_player({})) + self.step(self._helper_action_player({})) def get_current_line_status(self): - """internal, do not use outside of "Rules" or simulate etc.""" + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + prefer using :attr:`grid2op.BaseObservation.line_status` + + This method allows to retrieve the line status. + """ if self.current_obs is not None: powerline_status = self.current_obs.line_status else: diff --git a/grid2op/Environment/BaseMultiProcessEnv.py b/grid2op/Environment/BaseMultiProcessEnv.py index 4e35cc28d..a207bea56 100644 --- a/grid2op/Environment/BaseMultiProcessEnv.py +++ b/grid2op/Environment/BaseMultiProcessEnv.py @@ -17,6 +17,8 @@ class RemoteEnv(Process): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This class represent the environment that is executed on a remote process. Note that the environment is only created in the subprocess, and is not available in the main process. Once created @@ -38,6 +40,8 @@ def __init__(self, env_params, remote, parent_remote, seed, name=None): def init_env(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Initialize the environment that will perform all the computation of this process. Remember the environment only lives in this process. It cannot be transfer to / from the main process. @@ -47,8 +51,6 @@ def init_env(self): is provided in :func:`grid2op.Chronics.Multifolder.shuffle`. """ - # TODO documentation - self.space_prng = np.random.RandomState() self.space_prng.seed(seed=self.seed_used) self.backend = self.env_params["_raw_backend_class"]() diff --git a/grid2op/Environment/Environment.py b/grid2op/Environment/Environment.py index 0ed8c44dc..691615c56 100644 --- a/grid2op/Environment/Environment.py +++ b/grid2op/Environment/Environment.py @@ -26,60 +26,11 @@ class Environment(BaseEnv): """ This class is the grid2op implementation of the "Environment" entity in the RL framework. - TODO clean the attribute, make a doc for all of them, move the description of some of them in BaseEnv when relevant. Attributes ---------- - logger: ``logger`` - Use to store some information (currently in beta status) - time_stamp: ``datetime.time`` - Current time of the chronics - - nb_time_step: ``int`` - Number of time steps played this episode - - parameters: :class:`grid2op.Parameters.Parameters` - Parameters used for the game - - rewardClass: ``type`` - Type of reward used. Should be a subclass of :class:`grid2op.BaseReward.BaseReward` - - init_grid_path: ``str`` - The path where the description of the powergrid is located. - - backend: :class:`grid2op.Backend.Backend` - The backend used to compute powerflows and cascading failures. - - game_rules: :class:`grid2op.Rules.RulesChecker` - The rules of the game (define which actions are legal and which are not) - - helper_action_player: :class:`grid2op.Action.ActionSpace` - Helper used to manipulate more easily the actions given to / provided by the :class:`grid2op.Agent.BaseAgent` - (player) - - helper_action_env: :class:`grid2op.Action.ActionSpace` - Helper used to manipulate more easily the actions given to / provided by the environment to the backend. - - helper_observation: :class:`grid2op.Observation.ObservationSpace` - Helper used to generate the observation that will be given to the :class:`grid2op.BaseAgent` - - current_obs: :class:`grid2op.Observation.Observation` - The current observation (or None if it's not intialized) - - chronics_handler: :class:`grid2op.ChronicsHandler.ChronicsHandler` - Helper to get the modification of each time step during the episode. - - names_chronics_to_backend: ``dict`` - Configuration file used to associated the name of the objects in the backend - (both extremities of powerlines, load or production for - example) with the same object in the data (:attr:`Environment.chronics_handler`). The idea is that, usually - data generation comes from a different software that does not take into account the powergrid infrastructure. - Hence, the same "object" can have a different name. This mapping is present to avoid the need to rename - the "object" when providing data. A more detailed description is available at - :func:`grid2op.ChronicsHandler.GridValue.initialize`. - - reward_helper: :class:`grid2p.BaseReward.RewardHelper` - Helper that is called to compute the reward at each time step. + name: ``str`` + The name of the environment action_space: :class:`grid2op.Action.ActionSpace` Another name for :attr:`Environment.helper_action_player` for gym compatibility. @@ -99,11 +50,6 @@ class Environment(BaseEnv): viewer: ``object`` Used to display the powergrid. Currently not supported. - env_modification: :class:`grid2op.Action.Action` - Representation of the actions of the environment for the modification of the powergrid. - - current_reward: ``float`` - The reward of the current time step """ def __init__(self, init_grid_path: str, @@ -139,6 +85,7 @@ def __init__(self, tol_poly=tol_poly, other_rewards=other_rewards, with_forecast=with_forecast, + voltagecontrolerClass=voltagecontrolerClass, opponent_action_class=opponent_action_class, opponent_class=opponent_class, opponent_budget_class=opponent_budget_class, @@ -151,9 +98,6 @@ def __init__(self, warnings.warn("It is NOT recommended to create an environment without \"make\" and EVEN LESS " "to use an environment without a name") self.name = name - # the voltage controler - self.voltagecontrolerClass = voltagecontrolerClass - self.voltage_controler = None # for gym compatibility (initialized below) self.action_space = None @@ -169,31 +113,18 @@ def __init__(self, self._raw_backend_class = _raw_backend_class # for plotting - self.init_backend(init_grid_path, chronics_handler, backend, - names_chronics_to_backend, actionClass, observationClass, - rewardClass, legalActClass) - - def init_backend(self, - init_grid_path, chronics_handler, backend, - names_chronics_to_backend, actionClass, observationClass, - rewardClass, legalActClass): + self._init_backend(init_grid_path, chronics_handler, backend, + names_chronics_to_backend, actionClass, observationClass, + rewardClass, legalActClass) + + def _init_backend(self, + init_grid_path, chronics_handler, backend, + names_chronics_to_backend, actionClass, observationClass, + rewardClass, legalActClass): """ - TODO documentation - - Parameters - ---------- - init_grid_path - chronics_handler - backend - names_chronics_to_backend - actionClass - observationClass - rewardClass - legalActClass - - Returns - ------- + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Create a proper and valid environment. """ if not isinstance(rewardClass, type): @@ -203,12 +134,12 @@ def init_backend(self, if not issubclass(rewardClass, BaseReward): raise Grid2OpException("Parameter \"rewardClass\" used to build the Environment should derived form " "the grid2op.BaseReward class, type provided is \"{}\"".format(type(rewardClass))) - self.rewardClass = rewardClass - self.actionClass = actionClass - self.observationClass = observationClass + self._rewardClass = rewardClass + self._actionClass = actionClass + self._observationClass = observationClass # backend - self.init_grid_path = os.path.abspath(init_grid_path) + self._init_grid_path = os.path.abspath(init_grid_path) if not isinstance(backend, Backend): raise Grid2OpException( "Parameter \"backend\" used to build the Environment should derived form the " @@ -217,9 +148,9 @@ def init_backend(self, # all the above should be done in this exact order, otherwise some weird behaviour might occur # this is due to the class attribute self.backend.set_env_name(self.name) - self.backend.load_grid(self.init_grid_path) # the real powergrid of the environment - self.backend.load_redispacthing_data(os.path.split(self.init_grid_path)[0]) - self.backend.load_grid_layout(os.path.split(self.init_grid_path)[0]) + self.backend.load_grid(self._init_grid_path) # the real powergrid of the environment + self.backend.load_redispacthing_data(os.path.split(self._init_grid_path)[0]) + self.backend.load_grid_layout(os.path.split(self._init_grid_path)[0]) self.backend.assert_grid_correct() self._has_been_initialized() # really important to include this piece of code! and just here after the # backend has loaded everything @@ -241,8 +172,8 @@ def init_backend(self, "Parameter \"legalActClass\" used to build the Environment should derived form the " "grid2op.BaseRules class, type provided is \"{}\"".format( type(legalActClass))) - self.game_rules = RulesChecker(legalActClass=legalActClass) - self.legalActClass = legalActClass + self._game_rules = RulesChecker(legalActClass=legalActClass) + self._legalActClass = legalActClass # action helper if not isinstance(actionClass, type): @@ -266,20 +197,20 @@ def init_backend(self, type(observationClass))) # action affecting the grid that will be made by the agent - self.helper_action_class = ActionSpace.init_grid(gridobj=self.backend) - self.helper_action_player = self.helper_action_class(gridobj=self.backend, - actionClass=actionClass, - legal_action=self.game_rules.legal_action) + self._helper_action_class = ActionSpace.init_grid(gridobj=self.backend) + self._helper_action_player = self._helper_action_class(gridobj=self.backend, + actionClass=actionClass, + legal_action=self._game_rules.legal_action) # action that affect the grid made by the environment. - self.helper_action_env = self.helper_action_class(gridobj=self.backend, - actionClass=CompleteAction, - legal_action=self.game_rules.legal_action) - self.helper_observation_class = ObservationSpace.init_grid(gridobj=self.backend) - self.helper_observation = self.helper_observation_class(gridobj=self.backend, - observationClass=observationClass, - rewardClass=rewardClass, - env=self) + self._helper_action_env = self._helper_action_class(gridobj=self.backend, + actionClass=CompleteAction, + legal_action=self._game_rules.legal_action) + self._helper_observation_class = ObservationSpace.init_grid(gridobj=self.backend) + self._helper_observation = self._helper_observation_class(gridobj=self.backend, + observationClass=observationClass, + rewardClass=rewardClass, + env=self) # handles input data if not isinstance(chronics_handler, ChronicsHandler): @@ -297,17 +228,17 @@ def init_backend(self, self.chronics_handler.check_validity(self.backend) # reward function - self.reward_helper = RewardHelper(self.rewardClass) - self.reward_helper.initialize(self) + self._reward_helper = RewardHelper(self._rewardClass) + self._reward_helper.initialize(self) for k, v in self.other_rewards.items(): v.initialize(self) # controler for voltage - if not issubclass(self.voltagecontrolerClass, BaseVoltageController): + if not issubclass(self._voltagecontrolerClass, BaseVoltageController): raise Grid2OpException("Parameter \"voltagecontrolClass\" should derive from \"ControlVoltageFromFile\".") - self.voltage_controler = self.voltagecontrolerClass(gridobj=self.backend, - controler_backend=self.backend) + self._voltage_controler = self._voltagecontrolerClass(gridobj=self.backend, + controler_backend=self.backend) # create the opponent # At least the 3 following attributes should be set before calling _create_opponent @@ -317,7 +248,7 @@ def init_backend(self, # first injections given) self._reset_maintenance() self._reset_redispatching() - do_nothing = self.helper_action_env({}) + do_nothing = self._helper_action_env({}) *_, fail_to_start, info = self.step(do_nothing) if fail_to_start: raise Grid2OpException("Impossible to initialize the powergrid, the powerflow diverge at iteration 0. " @@ -327,9 +258,9 @@ def init_backend(self, self.backend.assert_grid_correct_after_powerflow() # for gym compatibility - self.action_space = self.helper_action_player # this should be an action !!! - self.observation_space = self.helper_observation # this return an observation. - self.reward_range = self.reward_helper.range() + self.action_space = self._helper_action_player # this should be an action !!! + self.observation_space = self._helper_observation # this return an observation. + self.reward_range = self._reward_helper.range() self.viewer = None self.viewer_fig = None @@ -344,6 +275,8 @@ def init_backend(self, def _voltage_control(self, agent_action, prod_v_chronics): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Update the environment action "action_env" given a possibly new voltage setpoint for the generators. This function can be overide for a more complex handling of the voltages. @@ -358,10 +291,10 @@ def _voltage_control(self, agent_action, prod_v_chronics): The voltages that has been specified in the chronics """ - volt_control_act = self.voltage_controler.fix_voltage(self.current_obs, - agent_action, - self.env_modification, - prod_v_chronics) + volt_control_act = self._voltage_controler.fix_voltage(self.current_obs, + agent_action, + self._env_modification, + prod_v_chronics) return volt_control_act def set_chunk_size(self, new_chunk_size): @@ -377,7 +310,7 @@ def set_chunk_size(self, new_chunk_size): learning agent) at the beginning when the agent performs poorly, the software might spend most of its time loading the data. - **NB** this has no effect if the chronics does not support this feature. TODO see xxx for more information + **NB** this has no effect if the chronics does not support this feature. **NB** The environment need to be **reset** for this to take effect (it won't affect the chronics already loaded) @@ -387,6 +320,20 @@ def set_chunk_size(self, new_chunk_size): new_chunk_size: ``int`` or ``None`` The new chunk size (positive integer) + Examples + --------- + Here is an example on how to use this function + + .. code-block:: python + + import grid2op + + # I create an environment + env = grid2op.make("rte_case5_example", test=True) + env.set_chunk_size(100) + # and now data will be read from the hard drive 100 time steps per 100 time steps + # instead of the whole episode at once. + """ if new_chunk_size is None: self.chronics_handler.set_chunk_size(new_chunk_size) @@ -408,7 +355,7 @@ def set_id(self, id_): """ Set the id that will be used at the next call to :func:`Environment.reset`. - **NB** this has no effect if the chronics does not support this feature. TODO see xxx for more information + **NB** this has no effect if the chronics does not support this feature. **NB** The environment need to be **reset** for this to take effect. @@ -475,9 +422,44 @@ def attach_renderer(self, graph_layout=None): Parameters ---------- graph_layout: ``dict`` + Here for backward compatibility. Currently not used. + + If you want to set a specific layout call :func:`BaseEnv.attach_layout` + If ``None`` this class will use the default substations layout provided when the environment was created. Otherwise it will use the data provided. + Examples + --------- + Here is how to use the function + + .. code-block:: python + + import grid2op + + # create the environment + env = grid2op.make() + + if False: + # if you want to change the default layout of the powergrid + # assign coordinates (0., 0.) to all substations (this is a dummy thing to do here!) + layout = {sub_name: (0., 0.) for sub_name in env.name_sub} + env.attach_layout(layout) + # NB again, this code will make everything look super ugly !!!! Don't change the + # default layout unless you have a reason to. + + # and if you want to use the renderer + env.attach_renderer() + + # and now you can "render" (plot) the state of the grid + obs = env.reset() + done = False + reward = env.reward_range[0] + while not done: + env.render() + action = agent.act(obs, reward, done) + obs, reward, done, info = env.step(action) + """ # Viewer already exists: skip if self.viewer is not None: @@ -491,30 +473,35 @@ def attach_renderer(self, graph_layout=None): "Please install matplotlib or run pip install grid2op[optional]" raise Grid2OpException(err_msg) from None - self.viewer = PlotMatplot(self.helper_observation) + self.viewer = PlotMatplot(self._helper_observation) self.viewer_fig = None # Set renderer modes self.metadata = {'render.modes': ["human", "silent"]} def __str__(self): - return '<{} instance>'.format(type(self).__name__) + return '<{} instance named {}>'.format(type(self).__name__, self.name) # TODO be closer to original gym implementation def reset_grid(self): """ - Reset the backend to a clean state by reloading the powergrid from the hard drive. This might takes some time. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is automatically called when using `env.reset` + + Reset the backend to a clean state by reloading the powergrid from the hard drive. + This might takes some time. If the thermal has been modified, it also modify them into the new backend. """ - self.backend.reset(self.init_grid_path) # the real powergrid of the environment + self.backend.reset(self._init_grid_path) # the real powergrid of the environment self.backend.assert_grid_correct() if self._thermal_limit_a is not None: self.backend.set_thermal_limit(self._thermal_limit_a.astype(dt_float)) self._backend_action = self._backend_action_class() - do_nothing = self.helper_action_env({}) + do_nothing = self._helper_action_env({}) *_, fail_to_start, info = self.step(do_nothing) if fail_to_start: raise Grid2OpException("Impossible to initialize the powergrid, the powerflow diverge at iteration 0. " @@ -524,8 +511,7 @@ def add_text_logger(self, logger=None): """ Add a text logger to this :class:`Environment` - Logging is for now an incomplete feature, really incomplete (beta) - + Logging is for now an incomplete feature, really incomplete (not used) Parameters ---------- @@ -545,6 +531,25 @@ def reset(self): to ensure the episode is fully over. This method should be called only at the end of an episode. + + Examples + -------- + The standard "gym loop" can be done with the following code: + + .. code-block:: python + + import grid2op + + # create the environment + env = grid2op.make() + + # and now you can "render" (plot) the state of the grid + obs = env.reset() + done = False + reward = env.reward_range[0] + while not done: + action = agent.act(obs, reward, done) + obs, reward, done, info = env.step(action) """ super().reset() self.chronics_handler.next_chronics() @@ -552,7 +557,7 @@ def reset(self): self.backend.name_line, self.backend.name_sub, names_chronics_to_backend=self.names_chronics_to_backend) self.current_obs = None - self.env_modification = None + self._env_modification = None self._reset_maintenance() self._reset_redispatching() self._reset_vectors_and_timings() # it need to be done BEFORE to prevent cascading failure when there has been @@ -564,13 +569,36 @@ def reset(self): self._reset_vectors_and_timings() # and it needs to be done AFTER to have proper timings at tbe beginning # reset the opponent - self.oppSpace.reset() + self._oppSpace.reset() return self.get_obs() def render(self, mode='human'): """ Render the state of the environment on the screen, using matplotlib Also returns the Matplotlib figure + + Examples + -------- + Rendering need first to define a "renderer" which can be done with the following code: + + .. code-block:: python + + import grid2op + + # create the environment + env = grid2op.make() + + # if you want to use the renderer + env.attach_renderer() + + # and now you can "render" (plot) the state of the grid + obs = env.reset() + done = False + reward = env.reward_range[0] + while not done: + env.render() # this piece of code plot the grid + action = agent.act(obs, reward, done) + obs, reward, done, info = env.step(action) """ # Try to create a plotter instance # Does nothing if viewer exists @@ -598,39 +626,49 @@ def render(self, mode='human'): def copy(self): """ - performs a deep copy of the environment + Performs a deep copy of the environment + + Unless you have a reason to, it is not advised to make copy of an Environment. + + Examples + -------- + It should be used as follow: + + .. code-block:: python + + import grid2op + env = grid2op.make() + cpy_of_env = env.copy() - Returns - ------- """ tmp_backend = self.backend self.backend = None - tmp_obs_space = self.helper_observation + tmp_obs_space = self._helper_observation self.observation_space = None - self.helper_observation = None + self._helper_observation = None obs_tmp = self.current_obs self.current_obs = None - volt_cont = self.voltage_controler - self.voltage_controler = None + volt_cont = self._voltage_controler + self._voltage_controler = None res = copy.deepcopy(self) res.backend = tmp_backend.copy() - res.helper_observation = tmp_obs_space.copy() - res.observation_space = res.helper_observation + res._helper_observation = tmp_obs_space.copy() + res.observation_space = res._helper_observation res.current_obs = obs_tmp.copy() - res.voltage_controler = volt_cont.copy() + res._voltage_controler = volt_cont.copy() if self._thermal_limit_a is not None: res.backend.set_thermal_limit(self._thermal_limit_a) self.backend = tmp_backend self.observation_space = tmp_obs_space - self.helper_observation = tmp_obs_space + self._helper_observation = tmp_obs_space self.current_obs = obs_tmp - self.voltage_controler = volt_cont + self._voltage_controler = volt_cont return res def get_kwargs(self, with_backend=True): @@ -642,10 +680,15 @@ def get_kwargs(self, with_backend=True): code are used) but you still want to make parallel processing using "MultiProcessing" module. In that case, you can send this dictionary to each child process, and have each child process make a copy of ``self`` + **NB** This function should not be used to make a copy of an environment. Prefer using :func:`Environment.copy` + for such purpose. + + Returns ------- res: ``dict`` - A dictionary that helps build an environment like ``self`` + A dictionary that helps build an environment like ``self`` (which is NOT a copy of self) but rather + an instance of an environment with the same properties. Examples -------- @@ -658,36 +701,39 @@ def get_kwargs(self, with_backend=True): env = grid2op.make() # create the environment of your choice copy_of_env = Environment(**env.get_kwargs()) # And you can use this one as you would any other environment. + # NB this is not a "proper" copy. for example it will not be at the same step, it will be possible + # seeded with a different seed. + # use `env.copy()` to make a proper copy of an environment. """ res = {} - res["init_grid_path"] = self.init_grid_path + res["init_grid_path"] = self._init_grid_path res["chronics_handler"] = copy.deepcopy(self.chronics_handler) if with_backend: res["backend"] = self.backend.copy() res["parameters"] = copy.deepcopy(self.parameters) res["names_chronics_to_backend"] = copy.deepcopy(self.names_chronics_to_backend) - res["actionClass"] = self.actionClass - res["observationClass"] = self.observationClass - res["rewardClass"] = self.rewardClass - res["legalActClass"] = self.legalActClass + res["actionClass"] = self._actionClass + res["observationClass"] = self._observationClass + res["rewardClass"] = self._rewardClass + res["legalActClass"] = self._legalActClass res["epsilon_poly"] = self._epsilon_poly res["tol_poly"] = self._tol_poly res["thermal_limit_a"] = self._thermal_limit_a - res["voltagecontrolerClass"] = self.voltagecontrolerClass + res["voltagecontrolerClass"] = self._voltagecontrolerClass res["other_rewards"] = {k: v.rewardClass for k, v in self.other_rewards.items()} res["name"] = self.name res["_raw_backend_class"] = self._raw_backend_class res["with_forecast"] = self.with_forecast - res["opponent_action_class"] = self.opponent_action_class - res["opponent_class"] = self.opponent_class - res["opponent_init_budget"] = self.opponent_init_budget - res["opponent_budget_per_ts"] = self.opponent_budget_per_ts - res["opponent_budget_class"] = self.opponent_budget_class - res["opponent_attack_duration"] = self.opponent_attack_duration - res["opponent_attack_cooldown"] = self.opponent_attack_cooldown - res["kwargs_opponent"] = self.kwargs_opponent + res["opponent_action_class"] = self._opponent_action_class + res["opponent_class"] = self._opponent_class + res["opponent_init_budget"] = self._opponent_init_budget + res["opponent_budget_per_ts"] = self._opponent_budget_per_ts + res["opponent_budget_class"] = self._opponent_budget_class + res["opponent_attack_duration"] = self._opponent_attack_duration + res["opponent_attack_cooldown"] = self._opponent_attack_cooldown + res["kwargs_opponent"] = self._kwargs_opponent return res def get_params_for_runner(self): @@ -702,8 +748,8 @@ def get_params_for_runner(self): import grid2op from grid2op.Runner import Runner + from grid2op.Agent import DoNothingAgent # for example env = grid2op.make() # create the environment of your choice - agent = DoNothingAgent(env.actoin_space) # create the proper runner runner = Runner(**env.get_params_for_runner(), agentClass=DoNothingAgent) @@ -713,14 +759,14 @@ def get_params_for_runner(self): """ res = {} - res["init_grid_path"] = self.init_grid_path + res["init_grid_path"] = self._init_grid_path res["path_chron"] = self.chronics_handler.path res["parameters_path"] = self.parameters.to_dict() res["names_chronics_to_backend"] = self.names_chronics_to_backend - res["actionClass"] = self.actionClass - res["observationClass"] = self.observationClass - res["rewardClass"] = self.rewardClass - res["legalActClass"] = self.legalActClass + res["actionClass"] = self._actionClass + res["observationClass"] = self._observationClass + res["rewardClass"] = self._rewardClass + res["legalActClass"] = self._legalActClass res["envClass"] = Environment res["gridStateclass"] = self.chronics_handler.chronicsClass res["backendClass"] = self._raw_backend_class @@ -731,17 +777,17 @@ def get_params_for_runner(self): del dict_["path"] res["gridStateclass_kwargs"] = dict_ res["thermal_limit_a"] = self._thermal_limit_a - res["voltageControlerClass"] = self.voltagecontrolerClass + res["voltageControlerClass"] = self._voltagecontrolerClass res["other_rewards"] = {k: v.rewardClass for k, v in self.other_rewards.items()} res["grid_layout"] = self.grid_layout res["name_env"] = self.name - res["opponent_action_class"] = self.opponent_action_class - res["opponent_class"] = self.opponent_class - res["opponent_init_budget"] = self.opponent_init_budget - res["opponent_budget_per_ts"] = self.opponent_budget_per_ts - res["opponent_budget_class"] = self.opponent_budget_class - res["opponent_attack_duration"] = self.opponent_attack_duration - res["opponent_attack_cooldown"] = self.opponent_attack_cooldown - res["opponent_kwargs"] = self.kwargs_opponent + res["opponent_action_class"] = self._opponent_action_class + res["opponent_class"] = self._opponent_class + res["opponent_init_budget"] = self._opponent_init_budget + res["opponent_budget_per_ts"] = self._opponent_budget_per_ts + res["opponent_budget_class"] = self._opponent_budget_class + res["opponent_attack_duration"] = self._opponent_attack_duration + res["opponent_attack_cooldown"] = self._opponent_attack_cooldown + res["opponent_kwargs"] = self._kwargs_opponent return res diff --git a/grid2op/Environment/MultiMixEnv.py b/grid2op/Environment/MultiMixEnv.py index fc121b35f..3d18c9a96 100644 --- a/grid2op/Environment/MultiMixEnv.py +++ b/grid2op/Environment/MultiMixEnv.py @@ -18,12 +18,136 @@ class MultiMixEnvironment(GridObjects, RandomObject): """ This class represent a single powergrid configuration, - backed by multiple enviromnents parameters and chronics + backed by multiple environments parameters and chronics - It implements most of the BaseEnv public interface: + It implements most of the :class:`BaseEnv` public interface: so it can be used as a more classic environment. - # TODO example on how to use it + MultiMixEnvironment environments behave like a superset of the environment: they + are made of sub environments (called mixes) that are grid2op regular :class:`Environment`. + You might think the MultiMixEnvironment as a dictionary of :class:`Environment` that implements + some of the :class:`BaseEnv` interface such as :func:`BaseEnv.step` or :func:`BaseEnv.reset`. + + By default, each time you call the "step" function a different mix is used. Mixes, by default + are looped through always in the same order. You can see the Examples section for information + about control of these + + + Examples + -------- + In this section we present some common use of the MultiMix environment. + + **Basic Usage** + + You can think of a MultiMixEnvironment as any :class:`Environment`. So this is a perfectly + valid way to use a MultiMix: + + .. code-block:: python + + import grid2op + from grid2op.Agent import RandomAgent + + # we use an example of a multimix dataset attached with grid2op pacakage + multimix_env = grid2op.make("l2rpn_neurips_2020_track2", test=True) + + # define an agent like in any environment + agent = RandomAgent(multimix_env.action_space) + + # and now you can do the open ai gym loop + NB_EPISODE = 10 + for i in range(NB_EPISODE): + obs = multimix_env.reset() + # each time "reset" is called, another mix is used. + reward = multimix_env.reward_range[0] + done = False + while not done: + act = agent.act(obs, reward, done) + obs, reward, done, info = multimix_env.step(act) + + **Use each mix one after the other** + + In case you want to study each mix independently, you can iterate through the MultiMix + in a pythonic way. This makes it easy to perform, for example, 10 episode for a given mix + before passing to the next one. + + .. code-block:: python + + import grid2op + from grid2op.Agent import RandomAgent + + # we use an example of a multimix dataset attached with grid2op pacakage + multimix_env = grid2op.make("l2rpn_neurips_2020_track2", test=True) + + NB_EPISODE = 10 + for mix in multimix_env: + # mix is a regular environment, you can do whatever you want with it + # for example + for i in range(NB_EPISODE): + obs = multimix_env.reset() + # each time "reset" is called, another mix is used. + reward = multimix_env.reward_range[0] + done = False + while not done: + act = agent.act(obs, reward, done) + obs, reward, done, info = multimix_env.step(act) + + + **Selecting a given Mix** + + Sometimes it might be interesting to study only a given mix. + For that you can use the `[]` operator to select only a given mix (which is a grid2op environment) + and use it as you would. + + This can be done with: + + .. code-block:: python + + import grid2op + from grid2op.Agent import RandomAgent + + # we use an example of a multimix dataset attached with grid2op pacakage + multimix_env = grid2op.make("l2rpn_neurips_2020_track2", test=True) + + # define an agent like in any environment + agent = RandomAgent(multimix_env.action_space) + + # list all available mixes: + mixes_names = list(multimix_env.keys()) + + # and now supposes we want to study only the first one + mix = multimix_env[mixes_names[0]] + + # and now you can do the open ai gym loop, or anything you want with it + NB_EPISODE = 10 + for i in range(NB_EPISODE): + obs = mix.reset() + # each time "reset" is called, another mix is used. + reward = mix.reward_range[0] + done = False + while not done: + act = agent.act(obs, reward, done) + obs, reward, done, info = mix.step(act) + + **Using the Runner** + + For MultiMixEnvironment using the :class:`grid2op.Runner.Runner` cannot be done in a + straightforward manner. Here we give an example on how to do it. + + .. code-block:: python + + import os + import grid2op + from grid2op.Agent import RandomAgent + + # we use an example of a multimix dataset attached with grid2op pacakage + multimix_env = grid2op.make("l2rpn_neurips_2020_track2", test=True) + + # you can use the runner as following + PATH = "PATH/WHERE/YOU/WANT/TO/SAVE/THE/RESULTS" + for mix in multimix_env: + runner = Runner(**mix.get_params_for_runner(), agentClass=RandomAgent) + runner.run(nb_episode=1, + path_save=os.path.join(PATH,mix.name)) """ def __init__(self, @@ -184,7 +308,7 @@ def reset(self, random=False): def seed(self, seed=None): """ - Set the seed of this :class:`Environment` for a better control + Set the seed of this :class:`Environment` for a better control and to ease reproducible experiments. Parameters @@ -195,7 +319,7 @@ def seed(self, seed=None): Returns --------- seeds: ``list`` - The seed used to set the prng (pseudo random number generator) + The seed used to set the prng (pseudo random number generator) for all environments, and each environment ``tuple`` seeds """ @@ -234,7 +358,7 @@ def reactivate_forecast(self): def set_thermal_limit(self, thermal_limit): """ Set the thermal limit effectively. - Will propagate to all underlying environments + Will propagate to all underlying mixes """ for mix in self.mix_envs: mix.set_thermal_limit(thermal_limit) @@ -260,17 +384,5 @@ def close(self): mix.close() def attach_layout(self, grid_layout): - """ - Compare to the method of the base class, this one performs a check. - This method must be called after initialization. - - Parameters - ---------- - grid_layout - - Returns - ------- - - """ for mix in self.mix_envs: mix.attach_layout(grid_layout) diff --git a/grid2op/Environment/SingleEnvMultiProcess.py b/grid2op/Environment/SingleEnvMultiProcess.py index 33b45375f..de781eb7b 100644 --- a/grid2op/Environment/SingleEnvMultiProcess.py +++ b/grid2op/Environment/SingleEnvMultiProcess.py @@ -13,13 +13,12 @@ class SingleEnvMultiProcess(BaseMultiProcessEnvironment): """ - This class allows to evaluate a single agent instance on multiple environments running in parrallel. + This class allows to evaluate a single agent instance on multiple environments running in parallel. It is a kind of :class:`BaseMultiProcessEnvironment`. For more information you can consult the documentation of this parent class. It allows to interact at the same time with different copy of the (same) environment in parallel - Attributes ----------- env: `list::grid2op.Environment.Environment` @@ -33,7 +32,8 @@ class SingleEnvMultiProcess(BaseMultiProcessEnvironment): Examples -------- - An example on how you can best leverage this class is given in the getting_started notebooks. Another simple example is: + An example on how you can best leverage this class is given in the getting_started notebooks. Another simple + example is: .. code-block:: python diff --git a/grid2op/Episode/EpisodeData.py b/grid2op/Episode/EpisodeData.py index 272e6244f..25933d91f 100644 --- a/grid2op/Episode/EpisodeData.py +++ b/grid2op/Episode/EpisodeData.py @@ -6,69 +6,113 @@ # SPDX-License-Identifier: MPL-2.0 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. -""" -This module provides a way to serialize on disk et deserialize one run episode along with some -methods and utilities to ease its manipulation. - -If enabled when usign the :class:`Runner`, the :class:`EpisodeData` -will save the information in a structured way. For each episode there will be a folder with: - - - "episode_meta.json" that represents some meta information about: - - - "backend_type": the name of the :class:`grid2op.Backend.Backend` class used - - "chronics_max_timestep": the **maximum** number of timestep for the chronics used - - "chronics_path": the path where the temporal data (chronics) are located - - "env_type": the name of the :class:`grid2op.Environment` class used. - - "grid_path": the path where the powergrid has been loaded from - - "nb_timestep_played": number of time step the agent has succesfully managed - - "cumulative_reward": its total cumulative reward - - - "episode_times.json": gives some information about the total time spend in multiple part of the runner, mainly the - :class:`grid2op.Agent.BaseAgent` (and especially its method :func:`grid2op.BaseAgent.act`) and amount of time - spent in the :class:`grid2op.Environment.Environment` - - - "_parameters.json": is a representation as json of a the :class:`grid2op.Parameters.Parameters` used for this episode - - "rewards.npy" is a numpy 1d array giving the rewards at each time step. We adopted the convention that the stored - reward at index `i` is the one observed by the agent at time `i` and **NOT** the reward sent by the - :class:`grid2op.Environment` after the action has been implemented. - - "exec_times.npy" is a numpy 1d array giving the execution time of each time step of the episode - - "actions.npy" gives the actions that has been taken by the :class:`grid2op.BaseAgent.BaseAgent`. At row `i` of - "actions.npy" is a - vectorized representation of the action performed by the agent at timestep `i` *ie.* **after** having observed - the observation present at row `i` of "observation.npy" and the reward showed in row `i` of "rewards.npy". - - "disc_lines.npy" gives which lines have been disconnected during the simulation of the cascading failure at each - time step. The same convention as for "rewards.npy" has been adopted. This means that the powerlines are - disconnected when the :class:`grid2op.Agent.BaseAgent` takes the :class:`grid2op.BaseAction` at time step `i`. - - "observations.npy" is a numpy 2d array representing the :class:`grid2op.BaseObservation.BaseObservation` at the - disposal of the - :class:`grid2op.Agent.BaseAgent` when he took his action. - - "env_modifications.npy" is a 2d numpy array representing the modification of the powergrid from the environment. - these modification usually concerns the hazards, maintenance, as well as modification of the generators production - setpoint or the loads consumption. - -All of the above should allow to read back, and better understand the behaviour of some -:class:`grid2op.Agent.BaseAgent`, even though such utility functions have not been coded yet. -""" import json import os import numpy as np -from grid2op.Exceptions import Grid2OpException, AmbiguousAction +from grid2op.Exceptions import Grid2OpException, EnvError from grid2op.Action import ActionSpace from grid2op.Observation import ObservationSpace +# TODO refacto the "save / load" logic. For now save is in the CollectionWrapper and load in the EpisodeData + class EpisodeData: + """ + .. warning:: The attributes of this class are not up to date. + TODO be consistent with the real behaviour now. + + This module provides a way to serialize on disk et deserialize one run episode along with some + methods and utilities to ease its manipulation. + + If enabled when usign the :class:`Runner`, the :class:`EpisodeData` + will save the information in a structured way. For each episode there will be a folder with: + + - "episode_meta.json" that represents some meta information about: + + - "agent_seed": the seed used to seed the agent (if any) + - "backend_type": the name of the :class:`grid2op.Backend.Backend` class used + - "chronics_max_timestep": the **maximum** number of timestep for the chronics used + - "chronics_path": the path where the time dependant data (chronics) are located + - "cumulative_reward": the cumulative reward over all the episode + - "env_seed": the seed used to seed the environment (if any) + - "env_type": the name of the :class:`grid2op.Environment` class used. + - "grid_path": the path where the powergrid has been loaded from + - "nb_timestep_played": number of time step the agent has succesfully managed + + - "episode_times.json": gives some information about the total time spend in multiple part of the runner, mainly the + :class:`grid2op.Agent.BaseAgent` (and especially its method :func:`grid2op.BaseAgent.act`) and amount of time + spent in the :class:`grid2op.Environment.Environment` + - "_parameters.json": is a representation as json of a the :class:`grid2op.Parameters.Parameters` used for this episode + - "rewards.npz" is a numpy 1d array giving the rewards at each time step. We adopted the convention that the stored + reward at index `i` is the one observed by the agent at time `i` and **NOT** the reward sent by the + :class:`grid2op.Environment` after the action has been implemented. + - "exec_times.npy" is a numpy 1d array giving the execution time of each time step of the episode + - "actions.npy" gives the actions that has been taken by the :class:`grid2op.BaseAgent.BaseAgent`. At row `i` of + "actions.npy" is a + vectorized representation of the action performed by the agent at timestep `i` *ie.* **after** having observed + the observation present at row `i` of "observation.npy" and the reward showed in row `i` of "rewards.npy". + - "disc_lines.npy" gives which lines have been disconnected during the simulation of the cascading failure at each + time step. The same convention as for "rewards.npy" has been adopted. This means that the powerlines are + disconnected when the :class:`grid2op.Agent.BaseAgent` takes the :class:`grid2op.BaseAction` at time step `i`. + - "observations.npy" is a numpy 2d array representing the :class:`grid2op.BaseObservation.BaseObservation` at the + disposal of the + :class:`grid2op.Agent.BaseAgent` when he took his action. + - "env_modifications.npy" is a 2d numpy array representing the modification of the powergrid from the environment. + these modification usually concerns the hazards, maintenance, as well as modification of the generators production + setpoint or the loads consumption. + + All of the above should allow to read back, and better understand the behaviour of some + :class:`grid2op.Agent.BaseAgent`, even though such utility functions have not been coded yet. + + Examples + -------- + Here is an example on how to save the action your agent was doing by the :class:`grid2op.Runner.Runner` of grid2op. + + .. code-block:: python + + import grid2op + from grid2op.Runner import Runner + + # I create an environment + env = grid2op.make("rte_case5_example", test=True) + + # I create the runner + runner = Runner(**env.get_params_for_runner()) + + # I start the runner and save the results in "/I/SAVED/RESULTS/THERE" + # I start the evaluation on 2 different episode + res = runner.run(path_save="/I/SAVED/RESULTS/THERE", nb_episode=2) + + And now i can reload the data easily with the EpisodeData class: + + .. code-block:: python + + import grid2op + from grid2op.Episode import EpisodeData + # I study only the first episode saved, because... why not + li_episode = EpisodeData.list_episode(path_save) + full_path, episode_studied = li_episode[0] + this_episode = EpisodeData.from_disk(path_agent, episode_studied) + + # now the episode is loaded, and you can easily iterate through the observation, the actions etc. + for act in this_episode.actions: + print(act) + + for i, obs in enumerate(this_episode.observations): + print("At step {} the active productions were {}".format(i, obs.prod_p)) + + """ ACTION_SPACE = "dict_action_space.json" OBS_SPACE = "dict_observation_space.json" ENV_MODIF_SPACE = "dict_env_modification_space.json" ATTACK_SPACE = "dict_attack_space.json" # action space of the attack (this is NOT the OpponentSpace) this is the "opponent action space" + PARAMS = "_parameters.json" META = "episode_meta.json" TIMES = "episode_times.json" OTHER_REWARDS = "other_rewards.json" - AG_EXEC_TIMES = "agent_exec_times.npz" ACTIONS = "actions.npz" ENV_ACTIONS = "env_modifications.npz" @@ -77,6 +121,9 @@ class EpisodeData: ATTACK = "opponent_attack.npz" REWARDS = "rewards.npz" + ATTR_EPISODE = [PARAMS, META, TIMES, OTHER_REWARDS, AG_EXEC_TIMES, ACTIONS, + ENV_ACTIONS, OBSERVATIONS, LINES_FAILURES, ATTACK, REWARDS] + def __init__(self, actions=None, env_actions=None, @@ -93,18 +140,18 @@ def __init__(self, attack_space=None, path_save=None, disc_lines_templ=None, - attack_templ=None, attack=None, - logger=None, - name="EpisodeDAta", + name="EpisodeData", get_dataframes=None, other_rewards=[]): + self.parameters = None self.actions = CollectionWrapper(actions, action_space, - "actions") + "actions", + check_legit=False) self.observations = CollectionWrapper(observations, observation_space, "observations") @@ -206,6 +253,88 @@ def __init__(self, logger.info( "Creating path \"{}\" to save the episode {}".format(self.episode_path, self.name)) + @staticmethod + def list_episode(path_agent): + """ + From a given path where a runner is supposed to have run, it extracts the subdirectories that can + store values from an episode. + + Parameters + ---------- + path_agent: ``str`` + The path where to look for data coming from "episode" + + Returns + ------- + res: ``list`` + A list of possible episodes. Each element of this list is a tuple: (full_path, episode_name) + + Examples + -------- + + .. code-block:: python + + import grid2op + import os + import numpy as np + from grid2op.Runner import Runner + from grid2op.Episode import EpisodeData + + ################ + # INTRO + # create a runner + env = grid2op.make() + # see the documentation of the Runner if you want to change the agent. + # in this case it will be "do nothing" + runner = Runner(**env.get_params_for_runner()) + + # execute it a given number of chronics + nb_episode = 2 + path_save = "i_saved_the_runner_here" + res = runner.run(nb_episode=nb_episode, path_save=path_save) + + # END INTRO + ################## + + li_episode = EpisodeData.list_episode(path_save) + # and now you can iterate through it: + for full_episode_path, episode_name in li_episode: + this_episode = EpisodeData.from_disk(path_agent, episode_name) + # you can do something with it now + + """ + res = [] + li_subfiles = list(os.listdir(path_agent)) + for el in sorted(li_subfiles): + # loop through the files that stores the agent's logs + this_dir = os.path.join(path_agent, el) + if not os.path.isdir(this_dir): + # it cannot be the result of an episode if it is not a directory. + continue + ok_ = True + for file_that_should_be in EpisodeData.ATTR_EPISODE: + if not os.path.exists(os.path.join(this_dir, file_that_should_be)): + # one file is missing + ok_ = False + break + if ok_: + res.append((os.path.abspath(this_dir), el)) + return res + + def reboot(self): + """ + Do as if the data just got read from the hard drive (loop again from the + initial observation and action) + """ + self.actions.reboot() + self.observations.reboot() + self.env_actions.reboot() + + def go_to(self, index): + self.actions.go_to(index) + self.observations.go_to(index+1) + self.env_actions.go_to(index) + def get_actions(self): return self.actions.collection @@ -216,8 +345,25 @@ def __len__(self): return int(self.meta["chronics_max_timestep"]) @classmethod - def from_disk(cls, agent_path, name=str(1)): + def from_disk(cls, agent_path, name="1"): + """ + This function allows you to reload an episode stored using the runner. + + See the example at the definition of the class for more information on how to use it. + + Parameters + ---------- + agent_path: ``str`` + Path pass at the "runner.run" method + name: ``str`` + The name of the episode you want to reload. + + Returns + ------- + res: + The data loaded properly in memory. + """ if agent_path is None: raise Grid2OpException("A path to an episode should be provided, please call \"from_disk\" with " "\"agent_path other\" than None") @@ -258,7 +404,7 @@ def from_disk(cls, agent_path, name=str(1)): attack_space = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ATTACK_SPACE)) - return cls(actions, + return cls(actions=actions, env_actions=env_actions, observations=observations, rewards=rewards, @@ -270,7 +416,7 @@ def from_disk(cls, agent_path, name=str(1)): observation_space=observation_space, action_space=action_space, helper_action_env=helper_action_env, - path_save=None, # No save when reading + path_save=None, # No save when reading attack=attack, attack_space=attack_space, name=name, @@ -278,17 +424,49 @@ def from_disk(cls, agent_path, name=str(1)): other_rewards=other_rewards) def set_parameters(self, env): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Used by the Runner to serialize properly an episode + + TODO + + Parameters + ---------- + env + + Returns + ------- + + """ if self.serialize: self.parameters = env.parameters.to_dict() def set_meta(self, env, time_step, cum_reward, env_seed, agent_seed): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Used by he runner to serialize properly an episode + + TODO + + Parameters + ---------- + env + time_step + cum_reward + env_seed + agent_seed + + Returns + ------- + + """ if self.serialize: self.meta = {} self.meta["chronics_path"] = "{}".format( env.chronics_handler.get_id()) self.meta["chronics_max_timestep"] = "{}".format( env.chronics_handler.max_timestep()) - self.meta["grid_path"] = "{}".format(env.init_grid_path) + self.meta["grid_path"] = "{}".format(env._init_grid_path) self.meta["backend_type"] = "{}".format( type(env.backend).__name__) self.meta["env_type"] = "{}".format(type(env).__name__) @@ -305,6 +483,28 @@ def set_meta(self, env, time_step, cum_reward, env_seed, agent_seed): def incr_store(self, efficient_storing, time_step, time_step_duration, reward, env_act, act, obs, opp_attack, info): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Used by he runner to serialize properly an episode + + TODO + + Parameters + ---------- + efficient_storing + time_step + time_step_duration + reward + env_act + act + obs + opp_attack + info + + Returns + ------- + + """ if self.serialize: self.actions.update(time_step, act.to_vect(), efficient_storing) @@ -350,6 +550,7 @@ def incr_store(self, efficient_storing, time_step, time_step_duration, if "rewards" in info: self.other_rewards.append({k: self._convert_to_float(v) for k, v in info["rewards"].items()}) + # TODO add is_illegal and is_ambiguous flags! def _convert_to_float(self, el): try: @@ -359,6 +560,23 @@ def _convert_to_float(self, el): return res def set_episode_times(self, env, time_act, beg_, end_): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Used by he runner to serialize properly an episode + + TODO + + Parameters + ---------- + env + time_act + beg_ + end_ + + Returns + ------- + + """ if self.serialize: self.episode_times = {} self.episode_times["Env"] = {} @@ -374,6 +592,16 @@ def set_episode_times(self, env, time_act, beg_, end_): self.episode_times["total"] = float(end_ - beg_) def to_disk(self): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Used by he runner to serialize properly an episode + + TODO + + Returns + ------- + + """ if self.serialize: parameters_path = os.path.join( self.episode_path, EpisodeData.PARAMS) @@ -414,6 +642,9 @@ def to_disk(self): class CollectionWrapper: """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Utility to make the interaction with stored actions and stored observations more pythonic + A wrapping class to add some behaviors (iterability, item access, update, save) to grid2op object collections (:class:`grid2op.Action.BaseAction` and :class:`grid2op.Observation.BaseObservation` classes essentially). @@ -474,7 +705,7 @@ def __init__(self, collection, helper, collection_name, check_legit=True): collection_obj = self.helper.from_vect(self.collection[i, :], check_legit=check_legit) self.objects.append(collection_obj) - except AmbiguousAction: + except EnvError as exc_: self._game_over = i break @@ -512,6 +743,14 @@ def update(self, time_step, values, efficient_storage): def save(self, path): np.savez_compressed(path, data=self.collection) # do not change keyword arguments + def reboot(self): + self.i = 0 + + def go_to(self, index): + if index >= len(self): + raise Grid2OpException("index too long for collection {}".format(self.collection_name)) + self.i = index + if __name__ == "__main__": pass diff --git a/grid2op/Episode/EpisodeReboot.py b/grid2op/Episode/EpisodeReboot.py new file mode 100644 index 000000000..c695499a7 --- /dev/null +++ b/grid2op/Episode/EpisodeReboot.py @@ -0,0 +1,247 @@ +# Copyright (c) 2019-2020, RTE (https://www.rte-france.com) +# See AUTHORS.txt +# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. +# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, +# you can obtain one at http://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. + +import warnings +import copy +import json +import os +import re +import numpy as np + +from datetime import timedelta + +from grid2op.dtypes import dt_float, dt_int, dt_bool +from grid2op.Exceptions import Grid2OpException +from grid2op.Chronics import GridValue, ChronicsHandler +from grid2op.Opponent import BaseOpponent +from grid2op.Environment import Environment + +from grid2op.Episode.EpisodeData import EpisodeData + + +class _GridFromLog(GridValue): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + """ + def __init__(self, episode_data, + time_interval=timedelta(minutes=5), + max_iter=-1, + start_datetime=None, + chunk_size=None + ): + # TODO reload directly the loadp, loadq, prodp and prodv from the path of the episode data if possible + self.episode_data = episode_data + if start_datetime is None: + warnings.warn("\"start_datetime\" argument is ignored when building the _GridFromLog") + if chunk_size is None: + warnings.warn("\"chunk_size\" argument is ignored when building the _GridFromLog") + GridValue.__init__(self, + time_interval=time_interval, + max_iter=max_iter, + start_datetime=self.episode_data.observations[0].get_time_stamp(), + chunk_size=None) + + # TODO reload that + self.maintenance_time = np.zeros(self.episode_data.observations[0].line_status.shape[0], dtype=int) - 1 + self.maintenance_duration = np.zeros(self.episode_data.observations[0].line_status.shape[0], dtype=int) + self.hazard_duration = np.zeros(self.episode_data.observations[0].line_status.shape[0], dtype=int) + self.curr_iter = 0 + + def initialize(self, order_backend_loads, order_backend_prods, order_backend_lines, order_backend_subs, + names_chronics_to_backend): + pass + + def load_next(self): + self.curr_iter += 1 + obs = self.episode_data.observations[self.curr_iter] + self.current_datetime = obs.get_time_stamp() + + res = {} + injs = {"prod_p": obs.prod_p.astype(dt_float), + "load_p": obs.load_p.astype(dt_float), + "load_q": obs.load_q.astype(dt_float), + } + res["injection"] = injs + + # TODO + # if self.maintenance is not None: + # res["maintenance"] = self.maintenance[self.current_index, :] + # if self.hazards is not None: + # res["hazards"] = self.hazards[self.current_index, :] + + prod_v = obs.prod_v + return self.current_datetime,\ + res, \ + self.maintenance_time, \ + self.maintenance_duration, \ + self.hazard_duration, \ + prod_v + + def check_validity(self, backend): + return True + + def next_chronics(self): + self.episode_data.reboot() + + +class OpponentFromLog(BaseOpponent): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + """ + pass + + +class EpisodeReboot: + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This is a first implementation to serve as "what can be done". + + It is a beta feature + + """ + def __init__(self): + self.episode_data = None + self.env = None + self.chronics_handler = None + self.current_time_step = None + self.action = None # the last action played + + warnings.warn("EpisodeReboot is a beta feature, it will likely be renamed, methods will be adapted " + "and it has probably some bugs. Use with care!") + + def load(self, backend, agent_path=None, name=None, data=None, env_kwargs={}): + if data is None: + if agent_path is not None and name is not None: + self.episode_data = EpisodeData.from_disk(agent_path, name) + else: + raise Grid2OpException("To replay an episode you need at least to provide an EpisodeData " + "(using the keyword argument \"data=...\") or provide the path and name where " + "the " + "episode is stored (keyword arguments \"agent_path\" and \"name\").") + else: + self.episode_data = copy.deepcopy(data) + self.episode_data.reboot() + + self.chronics_handler = ChronicsHandler(chronicsClass=_GridFromLog, + episode_data=self.episode_data) + + if "chronics_handler" in env_kwargs: + del env_kwargs["chronics_handler"] + if "backend" in env_kwargs: + del env_kwargs["backend"] + if "opponent_class" in env_kwargs: + del env_kwargs["opponent_class"] + if "name" in env_kwargs: + del env_kwargs["name"] + + nm = "unknonwn" + seed = None + with open(os.path.join(agent_path, name, "episode_meta.json")) as f: + dict_ = json.load(f) + nm = re.sub("Environment_", "", dict_["env_type"]) + if dict_["env_seed"] is not None: + seed = int(dict_["env_seed"]) + + self.env = Environment(**env_kwargs, + backend=backend, + chronics_handler=self.chronics_handler, + opponent_class=OpponentFromLog, + name=nm) + if seed is not None: + self.env.seed(seed) + + tmp = self.env.reset() + + # always have the two bellow synch ! otherwise it messes up the "chronics" + # in the env, when calling "env.step" + self.current_time_step = 0 + self.env.chronics_handler.real_data.curr_iter = 0 + + # first observation of the scenario + current_obs = self.episode_data.observations[self.current_time_step] + self._assign_state(current_obs) + return self.env.get_obs() + + def _assign_state(self, obs): + """ + works only if observation store the complete state of the grid... + """ + self.env._gen_activeprod_t[:] = obs.prod_p.astype(dt_float) + self.env._actual_dispatch[:] = obs.actual_dispatch.astype(dt_float) + self.env._target_dispatch[:] = obs.target_dispatch.astype(dt_float) + self.env._gen_activeprod_t_redisp[:] = obs.prod_p.astype(dt_float) + obs.actual_dispatch.astype(dt_float) + self.env.current_obs = obs + self.env._timestep_overflow[:] = obs.timestep_overflow.astype(dt_int) + self.env._times_before_line_status_actionable[:] = obs.time_before_cooldown_line.astype(dt_int) + self.env._times_before_topology_actionable[:] = obs.time_before_cooldown_sub.astype(dt_int) + + self.env._duration_next_maintenance[:] = obs.duration_next_maintenance.astype(dt_int) + self.env._time_next_maintenance[:] = obs.time_next_maintenance.astype(dt_int) + + # TODO check that the "stored" "last bus for when the powerline were connected" are + # kept there (I might need to do a for loop) + # to test that i might need to use a "change status" and see if powerlines are connected + # to the right bus + self.env._backend_action += self.env._helper_action_env({"set_bus": obs.topo_vect.astype(dt_int), + "injection": {"load_p": obs.load_p.astype(dt_float), + "load_q": obs.load_q.astype(dt_float), + "prod_p": obs.prod_p.astype(dt_float), + "prod_v": obs.prod_v.astype(dt_float), + } + }) + self.env.backend.apply_action(self.env._backend_action) + disc_lines, detailed_info, conv_ = self.env.backend.next_grid_state(env=self.env) + if conv_ is None: + self.env._backend_action.update_state(disc_lines) + self.env._backend_action.reset() + + def next(self, update=False): + """ + go to next time step + if "update" then i reuse the observation stored to go to this time step, otherwise not + + do as if the environment will execute the action the stored agent did at the next time step + (compared to the time step the environment is currently at) + """ + if self.current_time_step is None: + raise Grid2OpException("Impossible to go to the next time step with an episode not loaded. " + "Call \"EpisodeReboot.load\" before.") + + if update: + # I put myself at the observation just before the next time step + obs = self.episode_data.observations[self.current_time_step] + self.env._backend_action = self.env._backend_action_class() + self._assign_state(obs) + + self.action = self.episode_data.actions[self.current_time_step] + self.env.chronics_handler.real_data.curr_iter = self.current_time_step + new_obs, new_reward, new_done, new_info = self.env.step(self.action) + + self.current_time_step += 1 + # the chronics handler handled the "self.env.chronics_handler.curr_iter += 1" + return new_obs, new_reward, new_done, new_info + + def go_to(self, time_step): + """ + goes to the step number "time_step". + + So if you go_to timestep 10 then you retrieve the 10th observation and its as if the + agent did the 9th action (just before) + """ + if time_step > len(self.episode_data.actions): + raise Grid2OpException("The stored episode counts only {} time steps. You cannot go " + "at time step {}" + "".format(len(self.episode_data.actions), time_step)) + + if time_step <= 0: + raise Grid2OpException("You cannot go to timestep <= 0, it does not make sense (as there is not \"-1th\"" + "action). If you want to load the data, please use \"EpisodeReboot.load\".") + self.current_time_step = time_step - 1 + return self.next(update=True) diff --git a/grid2op/Episode/EpisodeReplay.py b/grid2op/Episode/EpisodeReplay.py index 2cd610c50..6c8fe4c2f 100644 --- a/grid2op/Episode/EpisodeReplay.py +++ b/grid2op/Episode/EpisodeReplay.py @@ -22,6 +22,9 @@ class EpisodeReplay(object): This class allows to see visually what an agent has done during an episode. It uses for now the "PlotPygame" as the method to plot the different states of the system. It reads directly data from the runner. + Examples + -------- + It can be used the following manner. .. code-block:: python diff --git a/grid2op/MakeEnv/Make.py b/grid2op/MakeEnv/Make.py index c1142b129..cb8dee280 100644 --- a/grid2op/MakeEnv/Make.py +++ b/grid2op/MakeEnv/Make.py @@ -48,8 +48,44 @@ _REQUEST_EXCEPT_RETRY_ERR = "Exception in getting an answer from \"{}\".\n" \ "Retrying.. {} attempt(s) remaining" +_LIST_REMOTE_URL = "https://api.github.com/repos/bdonnot/grid2op-datasets/contents/datasets.json" +_LIST_REMOTE_KEY = "download_url" +_LIST_REMOTE_INVALID_CONTENT_JSON_ERR = "Impossible to retrieve available datasets. " \ + "File could not be converted to json. " \ + "Parsing error:\n {}" +_LIST_REMOTE_CORRUPTED_CONTENT_JSON_ERR = "Corrupted json retrieved from github api. " \ + "Please wait a few minutes and try again. " \ + "If the error persist, contact grid2op organizers" +_LIST_REMOTE_INVALID_DATASETS_JSON_ERR = "Impossible to retrieve available datasets. " \ + "File could not be converted to json. " \ + "The error was \n\"{}\"" + +_FETCH_ENV_UNKNOWN_ERR = "Impossible to find the environment named \"{}\".\n" \ + "Current available environments are:\n{}" + +_MULTIMIX_FILE = ".multimix" + +_MAKE_DEV_ENV_WARN = "You are using a development environment. " \ + "This environment is not intended for training agents. It might not be up to date "\ + "and its primary use if for tests (hence the \"test=True\" you passed as argument). "\ + "Use at your own risk." +_MAKE_DEV_ENV_DEPRECATED_WARN = "Dev env \"{}\" has been deprecated " \ + "and will be removed in future version.\n" \ + "Please update to dev envs starting by \"rte\" or \"l2rpn\"" +_MAKE_FIRST_TIME_WARN = "It is the first time you use the environment \"{}\".\n" \ + "We will attempt to download this environment from remote" +_MAKE_UNKNOWN_ENV = "Impossible to load the environment named \"{}\"." + +_EXTRACT_DS_NAME_CONVERT_ERR = "The \"dataset_name\" argument " \ + "should be convertible to string, " \ + "but \"{}\" was provided." +_EXTRACT_DS_NAME_RECO_ERR = "Impossible to recognize the environment name from path \"{}\"" + def _send_request_retry(url, nb_retry=10, gh_session=None): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + """ if nb_retry <= 0: raise Grid2OpException(_REQUEST_FAIL_EXHAUSTED_ERR.format(url)) @@ -73,24 +109,6 @@ def _send_request_retry(url, nb_retry=10, gh_session=None): return _send_request_retry(url, nb_retry=nb_retry-1, gh_session=gh_session) -# _LIST_REMOTE_URL = "https://api.github.com/repos/bdonnot/grid2op-datasets/contents/contents.json" -_LIST_REMOTE_URL = "https://api.github.com/repos/bdonnot/grid2op-datasets/contents/datasets.json" -_LIST_REMOTE_KEY = "download_url" -_LIST_REMOTE_INVALID_CONTENT_JSON_ERR = "Impossible to retrieve available datasets. " \ - "File could not be converted to json. " \ - "Parsing error:\n {}" -_LIST_REMOTE_CORRUPTED_CONTENT_JSON_ERR = "Corrupted json retrieved from github api. " \ - "Please wait a few minutes and try again. " \ - "If the error persist, contact grid2op organizers" -_LIST_REMOTE_INVALID_DATASETS_JSON_ERR = "Impossible to retrieve available datasets. " \ - "File could not be converted to json. " \ - "The error was \n\"{}\"" - -_FETCH_ENV_UNKNOWN_ERR = "Impossible to find the environment named \"{}\".\n" \ - "Current available environments are:\n{}" -# _FETCH_ENV_TAR_URL = "https://github.com/BDonnot/grid2op-datasets/releases/download/{}/{}.tar.bz2" - - def _retrieve_github_content(url, is_json=True): answer = _send_request_retry(url) try: @@ -130,12 +148,6 @@ def _fecth_environments(dataset_name): return url, ds_name_dl -_EXTRACT_DS_NAME_CONVERT_ERR = "The \"dataset_name\" argument " \ - "should be convertible to string, " \ - "but \"{}\" was provided." -_EXTRACT_DS_NAME_RECO_ERR = "Impossible to recognize the environment name from path \"{}\"" - - def _extract_ds_name(dataset_path): """ If a path is provided, clean it to have a proper datasetname. @@ -167,30 +179,19 @@ def _extract_ds_name(dataset_path): dataset_name = os.path.splitext(dataset_name)[0] return dataset_name -_MULTIMIX_FILE = ".multimix" def _aux_is_multimix(dataset_path): if os.path.exists(os.path.join(dataset_path, _MULTIMIX_FILE)): return True return False + def _aux_make_multimix(dataset_path, **kwargs): # Local import to prevent imports loop from grid2op.Environment import MultiMixEnvironment return MultiMixEnvironment(dataset_path, **kwargs) -_MAKE_DEV_ENV_WARN = "You are using a development environment. " \ - "This environment is not intended for training agents. It might not be up to date "\ - "and its primary use if for tests (hence the \"test=True\" you passed as argument). "\ - "Use at your own risk." -_MAKE_DEV_ENV_DEPRECATED_WARN = "Dev env \"{}\" has been deprecated " \ - "and will be removed in future version.\n" \ - "Please update to dev envs starting by \"rte\" or \"l2rpn\"" -_MAKE_FIRST_TIME_WARN = "It is the first time you use the environment \"{}\".\n" \ - "We will attempt to download this environment from remote" -_MAKE_UNKNOWN_ENV = "Impossible to load the environment named \"{}\"." - def make(dataset="rte_case14_realistic", test=False, **kwargs): """ @@ -288,4 +289,3 @@ def make(dataset="rte_case14_realistic", test=False, **kwargs): if _aux_is_multimix(real_ds_path): make_from_path_fn = _aux_make_multimix return make_from_path_fn(dataset_path=real_ds_path, **kwargs) - diff --git a/grid2op/MakeEnv/MakeFromPath.py b/grid2op/MakeEnv/MakeFromPath.py index 4dd682751..22ff07bf5 100644 --- a/grid2op/MakeEnv/MakeFromPath.py +++ b/grid2op/MakeEnv/MakeFromPath.py @@ -89,6 +89,10 @@ def _check_path(path, info): def make_from_dataset_path(dataset_path="/", **kwargs): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Prefer using the :func:`grid2op.make` function. + This function is a shortcut to rapidly create environments within the grid2op Framework. We don't recommend using directly this function. Prefer using the :func:`make` function. diff --git a/grid2op/MakeEnv/MakeOld.py b/grid2op/MakeEnv/MakeOld.py index 9f1e5be5c..0cf9db88c 100644 --- a/grid2op/MakeEnv/MakeOld.py +++ b/grid2op/MakeEnv/MakeOld.py @@ -93,6 +93,8 @@ def make_old(name_env="case14_realistic", **kwargs): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + (DEPRECATED) This function is a shortcut to rapidly create some (pre defined) environments within the grid2op Framework. For now, only the environment corresponding to the IEEE "case14" powergrid, with some pre defined chronics diff --git a/grid2op/MakeEnv/UpdateEnv.py b/grid2op/MakeEnv/UpdateEnv.py index 26776ef33..9d24a292b 100644 --- a/grid2op/MakeEnv/UpdateEnv.py +++ b/grid2op/MakeEnv/UpdateEnv.py @@ -27,6 +27,8 @@ def update_env(env_name=None): This function allows you to retrieve the latest version of the some of files used to create the environment. + File can be for example "config.py" or "prod_charac.csv" or "difficulty_levels.json". + Parameters ---------- env_name: ``str`` @@ -34,12 +36,25 @@ def update_env(env_name=None): have already downloaded). If ``None`` it will look for updates for all the environments locally available. + Examples + -------- + Here is an example on how to for the update of your environments: + + .. code-block:: python + + import grid2op + grid2op.update_env() + # it will download the files "config.py" or "prod_charac.csv" or "difficulty_levels.json" + # of your local environment to match the latest version available. + """ _update_files(env_name=env_name) def _update_file(dict_, env_name, file_name): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Update a single file of a single environment. File can be for example "config.py" or "prod_charac.csv" or "difficulty_levels.json". @@ -64,6 +79,7 @@ def _update_file(dict_, env_name, file_name): def _update_files(env_name=None): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ Update all the "modified" files of a given environment. If ``None`` is provided as input, all local environments will be checked for update. diff --git a/grid2op/MakeEnv/get_default_aux.py b/grid2op/MakeEnv/get_default_aux.py index 744b3403e..66b4587b0 100644 --- a/grid2op/MakeEnv/get_default_aux.py +++ b/grid2op/MakeEnv/get_default_aux.py @@ -15,6 +15,8 @@ def _get_default_aux(name, kwargs, defaultClassApp, _sentinel=None, defaultinstance=None, defaultClass=None, build_kwargs={}, isclass=False): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Helper to build default parameters forwarded to :class:`grid2op.Environment.Environment` for its creation. Exactly one of ``defaultinstance`` or ``defaultClass`` should be used, and set to not ``None`` diff --git a/grid2op/Observation/BaseObservation.py b/grid2op/Observation/BaseObservation.py index 056918edf..891c466ed 100644 --- a/grid2op/Observation/BaseObservation.py +++ b/grid2op/Observation/BaseObservation.py @@ -19,6 +19,7 @@ # TODO fix "bug" when action not initalized, return nan in to_vect + class BaseObservation(GridObjects): """ Basic class representing an observation. @@ -28,7 +29,7 @@ class BaseObservation(GridObjects): Attributes ---------- action_helper: :class:`grid2op.Action.ActionSpace` - A reprensentation of the possible action space. + A representation of the possible action space. year: ``int`` The current year @@ -303,14 +304,14 @@ def state_of(self, _sentinel=None, load_id=None, gen_id=None, line_id=None, subs - "maintenance": information about the maintenance operation (time of the next maintenance and duration of this next maintenance. - "cooldown_time": for how many timestep i am not supposed to act on the powerline due to cooldown - (see :attr:`grid2op.Parameters.Parameters.NB_TIMESTEP_LINE_STATUS_REMODIF` for more information) + (see :attr:`grid2op.Parameters.Parameters.NB_TIMESTEP_COOLDOWN_LINE` for more information) - if a substation is inspected, it returns the topology to this substation in a dictionary with keys: - "topo_vect": the representation of which object is connected where - "nb_bus": number of active buses in this substations - "cooldown_time": for how many timestep i am not supposed to act on the substation due to cooldown - (see :attr:`grid2op.Parameters.Parameters.NB_TIMESTEP_TOPOLOGY_REMODIF` for more information) + (see :attr:`grid2op.Parameters.Parameters.NB_TIMESTEP_COOLDOWN_SUB` for more information) Raises ------ @@ -405,6 +406,10 @@ def state_of(self, _sentinel=None, load_id=None, gen_id=None, line_id=None, subs def reset(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Resetting a single observation is unlikely to do what you want to do. + Reset the :class:`BaseObservation` to a blank state, where everything is set to either ``None`` or to its default value. @@ -485,7 +490,9 @@ def __compare_stats(self, other, name): def __eq__(self, other): """ - Test the equality of two actions. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Test the equality of two observations. 2 actions are said to be identical if the have the same impact on the powergrid. This is unlrelated to their respective class. For example, if an BaseAction is of class :class:`BaseAction` and doesn't act on the _injection, it @@ -566,6 +573,7 @@ def __eq__(self, other): "target_dispatch", "actual_dispatch" ]: if not self.__compare_stats(other, stat_nm): + print("error for {}".format(stat_nm)) # one of the above stat is not equal in this and in other return False @@ -574,6 +582,9 @@ def __eq__(self, other): @abstractmethod def update(self, env, with_forecast=True): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This is carried out automatically by the environment in `env.step` + Update the actual instance of BaseObservation with the new received value from the environment. An observation is a description of the powergrid perceived by an agent. The agent takes his decision based on @@ -620,12 +631,68 @@ def connectivity_matrix(self): ------- res: ``numpy.ndarray``, shape:dim_topo,dim_topo, dtype:float The connectivity matrix, as defined above + + Examples + --------- + If you want to know if powerline 0 is connected at its "extremity" side with the load of id 0 you can do + + .. code-block:: python + + import grid2op + env = grid2op.make() + obs = env.reset() + + # retrieve the id of extremity of powerline 1: + id_lineex_0 = obs.line_ex_pos_topo_vect[0] + id_load_1 = obs.load_pos_topo_vect[0] + + # get the connectivity matrix + connectivity_matrix = obs.connectivity_matrix() + + # know if the objects are connected or not + are_connected = connectivity_matrix[id_lineex_0, id_load_1] + # as `are_connected` is 1.0 then these objects are indeed connected + + And now, supposes we do an action that changes the topology of the substation to which these + two objects are connected, then we get (same example continues) + + .. code-block:: python + + topo_action = env.action_space({"set_bus": {"substations_id": [(1, [1,1,1,2,2,2])]}}) + print(topo_action) + # This action will: + # - NOT change anything to the injections + # - NOT perform any redispatching action + # - NOT force any line status + # - NOT switch any line status + # - NOT switch anything in the topology + # - Set the bus of the following element: + # - assign bus 1 to line (extremity) 0 [on substation 1] + # - assign bus 1 to line (origin) 2 [on substation 1] + # - assign bus 1 to line (origin) 3 [on substation 1] + # - assign bus 2 to line (origin) 4 [on substation 1] + # - assign bus 2 to generator 0 [on substation 1] + # - assign bus 2 to load 0 [on substation 1] + + obs, reward, done, info = env.step(topo_action) + # and now retrieve the matrix + connectivity_matrix = obs.connectivity_matrix() + + # know if the objects are connected or not + are_connected = connectivity_matrix[id_lineex_0, id_load_1] + # as `are_connected` is 0.0 then these objects are not connected anymore + # this is visible when you "print" the action (see above) in the two following lines: + # - assign bus 1 to line (extremity) 0 [on substation 1] + # - assign bus 2 to load 0 [on substation 1] + # -> one of them is on bus 1 [line (extremity) 0] and the other on bus 2 [load 0] """ raise NotImplementedError("This method is not implemented") def bus_connectivity_matrix(self): """ - If we denote by `nb_bus` the total number bus of the powergrid. + If we denote by `nb_bus` the total number bus of the powergrid (you can think of a "bus" being + a "node" if you represent a powergrid as a graph [mathematical object, not a plot] with the lines + being the "edges"]. The `bus_connectivity_matrix` will have a size nb_bus, nb_bus and will be made of 0 and 1. @@ -641,23 +708,26 @@ def bus_connectivity_matrix(self): def get_forecasted_inj(self, time_step=1): """ - This function allows you to retrieve directly the "planned" injections for the timestep `time_step` + This function allows you to retrieve directly the "forecast" injections for the step `time_step`. + + We remind that the environment, under some conditions, can produce these forecasts automatically. + This function allows to retrieve what has been forecast. Parameters ---------- time_step: ``int`` - The horizon of the forecast; + The horizon of the forecast (given in number of time steps) Returns ------- prod_p_f: ``numpy.ndarray`` - The forecasted generators active values + The forecast generators active values prod_v_f: ``numpy.ndarray`` - The forecasted generators voltage setpoins + The forecast generators voltage setpoins load_p_f: ``numpy.ndarray`` - The forecasted load active consumption + The forecast load active consumption load_q_f: ``numpy.ndarray`` - The forecasted load reactive consumption + The forecast load reactive consumption """ if time_step >= len(self._forecasted_inj): raise NoForecastAvailable("Forecast for {} timestep ahead is not possible with your chronics.".format(time_step)) @@ -686,16 +756,28 @@ def get_forecasted_inj(self, time_step=1): return prod_p_f, prod_v_f, load_p_f, load_q_f def get_time_stamp(self): - """get the time stamp of the current observation as a datetime.datetim object""" + """ + Get the time stamp of the current observation as a `datetime.datetime` object + """ res = datetime.datetime(year=self.year, month=self.month, day=self.day, - hour=self.hour_of_day, minute=self.minute_of_hour) + hour=self.hour_of_day, minute=self.minute_of_hour) return res def simulate(self, action, time_step=1): """ - This method is used to simulate the effect of an action on a forecasted powergrid state. It has the same return - value as the :func:`grid2op.Environment.Environment.step` function. + This method is used to simulate the effect of an action on a forecast powergrid state. This forecast + state is built upon the current observation. + The forecast are pre computed by the environment. + + More concretely, if not deactivated by the environment + (see :func:`grid2op.Environment.BaseEnv.deactivate_forecast`) and the environment has the capacity to + generate these forecasts (which is the case in most grid2op environments) this function will simulate + the effect of doing an action now and return the "next state" (often the state you would get at + time `t + 5` mins) if you were to do the action at this step. + + It has the same return + value as the :func:`grid2op.Environment.Environment.step` function. Parameters ---------- @@ -713,18 +795,59 @@ def simulate(self, action, time_step=1): Returns ------- - observation: :class:`grid2op.Observation.Observation` - agent's observation of the current environment - reward: ``float`` - amount of reward returned after previous action - done: ``bool`` - whether the episode has ended, in which case further step() calls will return undefined results - info: ``dict`` - contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) + simulated_observation: :class:`grid2op.Observation.Observation` + agent's observation of the current environment after the application of the action "act" on the + the current state. + + reward: ``float`` + amount of reward returned after previous action + + done: ``bool`` + whether the episode has ended, in which case further step() calls will return undefined results + + info: ``dict`` + contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) + + Notes + ------ + This is a simulation in the sense that the "next grid state" is not the real grid state you will get. As you + don't know the future, the "injections you forecast for the next step" will not be the real injection you + will get in the next step. + + Also, in some circumstances, the "Backend" (ie the powerflow) used to do the simulation may not be the + same one as the one used by the environment. This is to model a real fact: as accurate your powerflow is, it does + not model all the reality (*"all models are wrong"*). Having a different solver for the environment ( + "the reality") than the one used to anticipate the impact of the action (this "simulate" function) + is a way to represent this fact. + + Examples + -------- + To simulate what would be the effect of the action "act" if you were to take this action at this step + you can do the following: + + .. code-block:: python + + import grid2op + # retrieve an environment + env = grid2op.make() + + # retrieve an observation, this is the same for all observations + obs = env.reset() + + # and now you can simulate the effect of doing nothing in the next time step + act = env.action_space() # this can be any action that grid2op understands + simulated_obs, simulated_reward, simulated_done, simulated_info = obs.simulate(act) + + # `simulated_obs` will be the "observation" after the application of action `act` on the + # " forecast of the grid state (it will be the "forecast state at time t+5mins usually) + # `simulated_reward` will be the reward for the same action on the same forecast state + # `simulated_done` will indicate whether or not the simulation ended up in a "game over" + # `simulated_info` gives extra information on this forecast state """ if self.action_helper is None or self._obs_env is None: - raise NoForecastAvailable("No forecasts are available for this instance of BaseObservation (no action_space " + raise NoForecastAvailable("No forecasts are available for this instance of BaseObservation " + "(no action_space " "and no simulated environment are set).") if time_step < 0: @@ -746,7 +869,8 @@ def simulate(self, action, time_step=1): self._obs_env.init(inj_action, time_stamp=timestamp, timestep_overflow=self.timestep_overflow, - topo_vect=self.topo_vect) + topo_vect=self.topo_vect, + time_step=time_step) sim_obs, *rest = self._obs_env.simulate(action) sim_obs = copy.deepcopy(sim_obs) @@ -754,6 +878,8 @@ def simulate(self, action, time_step=1): def copy(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Make a (deep) copy of the observation. Returns diff --git a/grid2op/Observation/CompleteObservation.py b/grid2op/Observation/CompleteObservation.py index 7d49ca062..c9cc0de10 100644 --- a/grid2op/Observation/CompleteObservation.py +++ b/grid2op/Observation/CompleteObservation.py @@ -120,15 +120,6 @@ def _reset_matrices(self): self.dictionnarized = None def update(self, env, with_forecast=True): - """ - This use the environement to update properly the BaseObservation. - - Parameters - ---------- - env: :class:`grid2op.Environment.Environment` - The environment from which to update this observation. - - """ # reset the matrices self._reset_matrices() self.reset() @@ -142,7 +133,7 @@ def update(self, env, with_forecast=True): self.day_of_week = dt_int(env.time_stamp.weekday()) # get the values related to topology - self.timestep_overflow[:] = env.timestep_overflow + self.timestep_overflow[:] = env._timestep_overflow self.line_status[:] = env.backend.get_line_status() self.topo_vect[:] = env.backend.get_topo_vect() @@ -170,20 +161,24 @@ def update(self, env, with_forecast=True): self.rho[:] = env.backend.get_relative_flow().astype(dt_float) # cool down and reconnection time after hard overflow, soft overflow or cascading failure - self.time_before_cooldown_line[:] = env.times_before_line_status_actionable - self.time_before_cooldown_sub[:] = env.times_before_topology_actionable - self.time_next_maintenance[:] = env.time_next_maintenance - self.duration_next_maintenance[:] = env.duration_next_maintenance + self.time_before_cooldown_line[:] = env._times_before_line_status_actionable + self.time_before_cooldown_sub[:] = env._times_before_topology_actionable + self.time_next_maintenance[:] = env._time_next_maintenance + self.duration_next_maintenance[:] = env._duration_next_maintenance # redispatching - self.target_dispatch[:] = env.target_dispatch - self.actual_dispatch[:] = env.actual_dispatch + self.target_dispatch[:] = env._target_dispatch + self.actual_dispatch[:] = env._actual_dispatch def from_vect(self, vect, check_legit=True): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + To reload an observation from a vector, use the "env.observation_space.from_vect()". + Convert back an observation represented as a vector into a proper observation. - Some convertion are done silently from float to the type of the corresponding observation attribute. + Some conversion are done silently from float to the type of the corresponding observation attribute. Parameters ---------- @@ -344,4 +339,3 @@ def bus_connectivity_matrix(self): self.bus_connectivity_matrix_[bus_id_or, bus_id_ex] = 1 self.bus_connectivity_matrix_[bus_id_ex, bus_id_or] = 1 return self.bus_connectivity_matrix_ - diff --git a/grid2op/Observation/ObservationSpace.py b/grid2op/Observation/ObservationSpace.py index 308630ec4..841aa02d1 100644 --- a/grid2op/Observation/ObservationSpace.py +++ b/grid2op/Observation/ObservationSpace.py @@ -16,7 +16,7 @@ class ObservationSpace(SerializableObservationSpace): """ - Helper that provides usefull functions to manipulate :class:`BaseObservation`. + Helper that provides useful functions to manipulate :class:`BaseObservation`. BaseObservation should only be built using this Helper. It is absolutely not recommended to make an observation directly form its constructor. @@ -60,6 +60,8 @@ def __init__(self, observationClass=CompleteObservation, with_forecast=True): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Env: requires :attr:`grid2op.Environment.parameters` and :attr:`grid2op.Environment.backend` to be valid """ @@ -77,7 +79,7 @@ def __init__(self, self.rewardClass = rewardClass # helpers - self.action_helper_env = env.helper_action_env + self.action_helper_env = env._helper_action_env self.reward_helper = RewardHelper(rewardClass=self.rewardClass) self.reward_helper.initialize(env) @@ -92,13 +94,13 @@ def __init__(self, parameters=env.parameters, reward_helper=self.reward_helper, action_helper=self.action_helper_env, - thermal_limit_a=env._thermal_limit_a, - legalActClass=env.legalActClass, - donothing_act=env.helper_action_player(), + thermal_limit_a=env.get_thermal_limit(), + legalActClass=env._legalActClass, + donothing_act=env._helper_action_player(), other_rewards=other_rewards, - completeActionClass=env.helper_action_env.actionClass, - helper_action_class=env.helper_action_class, - helper_action_env=env.helper_action_env) + completeActionClass=env._helper_action_env.actionClass, + helper_action_class=env._helper_action_class, + helper_action_env=env._helper_action_env) for k, v in self.obs_env.other_rewards.items(): v.initialize(env) @@ -132,10 +134,19 @@ def size_obs(self): return self.n def get_empty_observation(self): - """return an empty observation, for internal use only.""" + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + return an empty observation, for internal use only.""" return copy.deepcopy(self._empty_obs) def copy(self): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + Perform a deep copy of the Observation space. + + """ backend = self._backend_obs self._backend_obs = None obs_ = self._empty_obs diff --git a/grid2op/Observation/SerializableObservationSpace.py b/grid2op/Observation/SerializableObservationSpace.py index 5e0415eed..eae7f93b2 100644 --- a/grid2op/Observation/SerializableObservationSpace.py +++ b/grid2op/Observation/SerializableObservationSpace.py @@ -19,7 +19,6 @@ class SerializableObservationSpace(SerializableSpace): Attributes ---------- - observationClass: ``type`` Type used to build the :attr:`SerializableActionSpace._template_act` @@ -46,17 +45,20 @@ def __init__(self, gridobj, observationClass=CompleteObservation): @staticmethod def from_dict(dict_): """ - Allows the de-serialization of an object stored as a dictionnary (for example in the case of json saving). + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This is used internally by EpisodeData to restore the state of the powergrid + + Allows the de-serialization of an object stored as a dictionary (for example in the case of json saving). Parameters ---------- dict_: ``dict`` - Representation of an BaseObservation Space (aka SerializableObservationSpace) as a dictionnary. + Representation of an BaseObservation Space (aka SerializableObservationSpace) as a dictionary. Returns ------- res: :class:``SerializableObservationSpace`` - An instance of an action space matching the dictionnary. + An instance of an action space matching the dictionary. """ tmp = SerializableSpace.from_dict(dict_) diff --git a/grid2op/Observation/_ObsEnv.py b/grid2op/Observation/_ObsEnv.py index 3f194aa5c..d93af2401 100644 --- a/grid2op/Observation/_ObsEnv.py +++ b/grid2op/Observation/_ObsEnv.py @@ -17,6 +17,8 @@ class _ObsCH(ChangeNothing): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This class is reserved to internal use. Do not attempt to do anything with it. """ def forecasts(self): @@ -25,6 +27,8 @@ def forecasts(self): class _ObsEnv(BaseEnv): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This class is an 'Emulator' of a :class:`grid2op.Environment.Environment` used to be able to 'simulate' forecasted grid states. It should not be used outside of an :class:`grid2op.Observation.BaseObservation` instance, or one of its derivative. @@ -48,19 +52,27 @@ def __init__(self, helper_action_env, other_rewards={}): BaseEnv.__init__(self, parameters, thermal_limit_a, other_rewards=other_rewards) - self.helper_action_class = helper_action_class - self.reward_helper = reward_helper - self.obsClass = None - # self._action = None - self.CompleteActionClass = completeActionClass - self.init_backend(init_grid_path=None, - chronics_handler=_ObsCH(), - backend=backend_instanciated, - names_chronics_to_backend=None, - actionClass=action_helper.actionClass, - observationClass=obsClass, - rewardClass=None, - legalActClass=legalActClass) + self._helper_action_class = helper_action_class + self._reward_helper = reward_helper + self._obsClass = None + + self.gen_activeprod_t_init = np.zeros(self.n_gen, dtype=dt_float) + self.gen_activeprod_t_redisp_init = np.zeros(self.n_gen, dtype=dt_float) + self.times_before_line_status_actionable_init = np.zeros(self.n_line, dtype=dt_int) + self.times_before_topology_actionable_init = np.zeros(self.n_sub, dtype=dt_int) + self.time_next_maintenance_init = np.zeros(self.n_line, dtype=dt_int) + self.duration_next_maintenance_init = np.zeros(self.n_line, dtype=dt_int) + self.target_dispatch_init = np.zeros(self.n_gen, dtype=dt_float) + self.actual_dispatch_init = np.zeros(self.n_gen, dtype=dt_float) + + self._init_backend(init_grid_path=None, + chronics_handler=_ObsCH(), + backend=backend_instanciated, + names_chronics_to_backend=None, + actionClass=action_helper.actionClass, + observationClass=obsClass, + rewardClass=None, + legalActClass=legalActClass) self.no_overflow_disconnection = parameters.NO_OVERFLOW_DISCONNECTION self._load_p, self._load_q, self._load_v = None, None, None @@ -70,72 +82,43 @@ def __init__(self, # convert line status to -1 / 1 instead of false / true self._line_status = None self.is_init = False - self.helper_action_env = helper_action_env - self.env_modification = self.helper_action_env() - self._do_nothing_act = self.helper_action_env() + self._helper_action_env = helper_action_env + self.env_modification = self._helper_action_env() + self._do_nothing_act = self._helper_action_env() self._backend_action_set = self._backend_action_class() # opponent self.opp_space_state = None self.opp_state = None - def init_backend(self, - init_grid_path, - chronics_handler, - backend, - names_chronics_to_backend, - actionClass, - observationClass, - rewardClass, legalActClass): - """ - backend should not be the backend of the environment!!! - - Parameters - ---------- - init_grid_path - chronics_handler - backend - names_chronics_to_backend - actionClass - observationClass - rewardClass - legalActClass - - Returns - ------- - - """ - self.env_dc = self.parameters.FORECAST_DC + def _init_backend(self, + init_grid_path, + chronics_handler, + backend, + names_chronics_to_backend, + actionClass, + observationClass, + rewardClass, legalActClass): + self._env_dc = self.parameters.FORECAST_DC self.chronics_handler = chronics_handler self.backend = backend self._has_been_initialized() - self.obsClass = observationClass + self._obsClass = observationClass if not issubclass(legalActClass, BaseRules): raise Grid2OpException( "Parameter \"legalActClass\" used to build the Environment should derived form the " "grid2op.BaseRules class, type provided is \"{}\"".format( type(legalActClass))) - self.game_rules = RulesChecker(legalActClass=legalActClass) - self.legalActClass = legalActClass - self.helper_action_player = self._do_nothing + self._game_rules = RulesChecker(legalActClass=legalActClass) + self._legalActClass = legalActClass + self._helper_action_player = self._do_nothing self.backend.set_thermal_limit(self._thermal_limit_a) self._create_opponent() - self.gen_activeprod_t_init = np.zeros(self.n_gen, dtype=dt_float) - self.gen_activeprod_t_redisp_init = np.zeros(self.n_gen, dtype=dt_float) - self.times_before_line_status_actionable_init = np.zeros(self.n_line, dtype=dt_int) - self.times_before_topology_actionable_init = np.zeros(self.n_sub, dtype=dt_int) - self.time_next_maintenance_init = np.zeros(self.n_line, dtype=dt_int) - self.duration_next_maintenance_init = np.zeros(self.n_line, dtype=dt_int) - self.target_dispatch_init = np.zeros(self.n_gen, dtype=dt_float) - self.actual_dispatch_init = np.zeros(self.n_gen, dtype=dt_float) - self.last_bus_line_or_init = np.zeros(self.n_line, dtype=dt_int) - self.last_bus_line_ex_init = np.zeros(self.n_line, dtype=dt_int) - - self.current_obs_init = self.obsClass(seed=None, - obs_env=None, - action_helper=None) + self.current_obs_init = self._obsClass(seed=None, + obs_env=None, + action_helper=None) self.current_obs = self.current_obs_init def _do_nothing(self, x): @@ -143,6 +126,8 @@ def _do_nothing(self, x): def _update_actions(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Retrieve the actions to perform the update of the underlying powergrid represented by the :class:`grid2op.Backend`in the next time step. A call to this function will also read the next state of :attr:`chronics_handler`, so it must be called only @@ -159,6 +144,8 @@ def _update_actions(self): def copy(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Implement the deep copy of this instance. Returns @@ -173,8 +160,10 @@ def copy(self): self.backend = backend return res - def init(self, new_state_action, time_stamp, timestep_overflow, topo_vect): + def init(self, new_state_action, time_stamp, timestep_overflow, topo_vect, time_step=1): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Initialize a "forecasted grid state" based on the new injections, possibly new topological modifications etc. Parameters @@ -196,52 +185,95 @@ def init(self, new_state_action, time_stamp, timestep_overflow, topo_vect): ``None`` """ + self._reset_to_orig_state() + self._reset_vect() self._topo_vect[:] = topo_vect # TODO update maintenance time, duration and cooldown accordingly (see all todos in `update_grid`) # TODO set the shunts here # update the action that set the grid to the real value - self._backend_action_set += self.helper_action_env({"set_line_status": np.array(self._line_status, dtype=dt_int), - "set_bus": self._topo_vect, - "injection": {"prod_p": self._prod_p, - "prod_v": self._prod_v, - "load_p": self._load_p, - "load_q": self._load_q} - }) + still_in_maintenance, reconnected, first_ts_maintenance = self._update_vector_with_timestep(time_step) + if np.any(first_ts_maintenance): + set_status = np.array(self._line_status, dtype=dt_int) + set_status[first_ts_maintenance] = -1 + topo_vect = np.array(self._topo_vect, dtype=dt_int) + topo_vect[self.line_or_pos_topo_vect[first_ts_maintenance]] = -1 + topo_vect[self.line_ex_pos_topo_vect[first_ts_maintenance]] = -1 + else: + set_status = self._line_status + topo_vect = self._topo_vect + + self._backend_action_set += self._helper_action_env({"set_line_status": set_status, + "set_bus": topo_vect, + "injection": {"prod_p": self._prod_p, + "prod_v": self._prod_v, + "load_p": self._load_p, + "load_q": self._load_q} + }) self._backend_action_set += new_state_action self.is_init = True self.current_obs.reset() self.time_stamp = time_stamp - self.timestep_overflow[:] = timestep_overflow + self._timestep_overflow[:] = timestep_overflow + + def _update_vector_with_timestep(self, time_step): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + update the value of the "time dependant" attributes + """ + self._times_before_line_status_actionable[:] = np.maximum(self._times_before_line_status_actionable - time_step, + 0.) + self._times_before_topology_actionable[:] = np.maximum(self._times_before_topology_actionable - time_step, + 0.) + + still_in_maintenance = (self._duration_next_maintenance > time_step) & (self._time_next_maintenance == 0) + reconnected = (self._duration_next_maintenance < time_step) & (self._time_next_maintenance == 0) + first_ts_maintenance = self._time_next_maintenance == time_step + + # powerline that are still in maintenance at this time step + self._time_next_maintenance[still_in_maintenance] = 0 + self._duration_next_maintenance[still_in_maintenance] -= 1 + + # powerline that will be in maintenance at this time step + self._time_next_maintenance[first_ts_maintenance] = 0 + + # powerline that won't be in maintenance at this time step + self._time_next_maintenance[reconnected] = -1 + self._duration_next_maintenance[reconnected] = 0 + return still_in_maintenance, reconnected, first_ts_maintenance def reset(self): super().reset() self.current_obs = self.current_obs_init + def _reset_vect(self): + self._gen_activeprod_t[:] = self.gen_activeprod_t_init + self._gen_activeprod_t_redisp[:] = self.gen_activeprod_t_redisp_init + self._times_before_line_status_actionable[:] = self.times_before_line_status_actionable_init + self._times_before_topology_actionable[:] = self.times_before_topology_actionable_init + self._time_next_maintenance[:] = self.time_next_maintenance_init + self._duration_next_maintenance[:] = self.duration_next_maintenance_init + self._target_dispatch[:] = self.target_dispatch_init + self._actual_dispatch[:] = self.actual_dispatch_init + def _reset_to_orig_state(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + reset this "environment" to the state it should be """ self.reset() # reset the "BaseEnv" - # self.time_stamp = None # TODO this should not throw... self.backend.set_thermal_limit(self._thermal_limit_a) - self.gen_activeprod_t[:] = self.gen_activeprod_t_init - self.gen_activeprod_t_redisp[:] = self.gen_activeprod_t_redisp_init - self.times_before_line_status_actionable[:] = self.times_before_line_status_actionable_init - self.times_before_topology_actionable[:] = self.times_before_topology_actionable_init - self.time_next_maintenance[:] = self.time_next_maintenance_init - self.duration_next_maintenance[:] = self.duration_next_maintenance_init - self.target_dispatch[:] = self.target_dispatch_init - self.actual_dispatch[:] = self.actual_dispatch_init - self.last_bus_line_or[:] = self.last_bus_line_or_init - self.last_bus_line_ex[:] = self.last_bus_line_ex_init - self._backend_action_set.all_changed() self._backend_action = copy.deepcopy(self._backend_action_set) - self.oppSpace._set_state(self.opp_space_state, self.opp_state) + self._oppSpace._set_state(self.opp_space_state, self.opp_state) def simulate(self, action): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Prefer using `obs.simulate(action)` + This function is the core method of the :class:`ObsEnv`. It allows to perform a simulation of what would give and action if it were to be implemented on the "forecasted" powergrid. @@ -279,12 +311,13 @@ def simulate(self, action): """ self._reset_to_orig_state() - # TODO set back the "change" to True obs, reward, done, info = self.step(action) return obs, reward, done, info def get_obs(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Method to retrieve the "forecasted grid" as a valid observation object. Returns @@ -298,19 +331,17 @@ def get_obs(self): def update_grid(self, env): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Update this "emulated" environment with the real powergrid. Parameters ---------- - env: :class:`grid2op.Environement.BaseEnv` - A reference to the environement - - Returns - ------- - + env: :class:`grid2op.Environment.BaseEnv` + A reference to the environment """ real_backend = env.backend - self.reward_helper = env.reward_helper + self._reward_helper = env._reward_helper self._load_p, self._load_q, self._load_v = real_backend.loads_info() self._prod_p, self._prod_q, self._prod_v = real_backend.generators_info() @@ -325,17 +356,15 @@ def update_grid(self, env): # Make a copy of env state for simulation # TODO this depends on the datetime simulated, so find a way to have it independant of that !!! self._thermal_limit_a = env._thermal_limit_a.astype(dt_float) - self.gen_activeprod_t_init[:] = env.gen_activeprod_t - self.gen_activeprod_t_redisp_init[:] = env.gen_activeprod_t_redisp - self.times_before_line_status_actionable_init[:] = env.times_before_line_status_actionable - self.times_before_topology_actionable_init[:] = env.times_before_topology_actionable - self.time_next_maintenance_init[:] = env.time_next_maintenance - self.duration_next_maintenance_init[:] = env.duration_next_maintenance - self.target_dispatch_init[:] = env.target_dispatch - self.actual_dispatch_init[:] = env.actual_dispatch - self.last_bus_line_or_init[:] = env.last_bus_line_or - self.last_bus_line_ex_init[:] = env.last_bus_line_ex - self.opp_space_state, self.opp_state = env.oppSpace._get_state() + self.gen_activeprod_t_init[:] = env._gen_activeprod_t + self.gen_activeprod_t_redisp_init[:] = env._gen_activeprod_t_redisp + self.times_before_line_status_actionable_init[:] = env._times_before_line_status_actionable + self.times_before_topology_actionable_init[:] = env._times_before_topology_actionable + self.time_next_maintenance_init[:] = env._time_next_maintenance + self.duration_next_maintenance_init[:] = env._duration_next_maintenance + self.target_dispatch_init[:] = env._target_dispatch + self.actual_dispatch_init[:] = env._actual_dispatch + self.opp_space_state, self.opp_state = env._oppSpace._get_state() # TODO check redispatching and simulate are working as intended # TODO also update the status of hazards, maintenance etc. # TODO and simulate also when a maintenance is forcasted! diff --git a/grid2op/Opponent/BaseActionBudget.py b/grid2op/Opponent/BaseActionBudget.py index b1a882e26..eafa6774e 100644 --- a/grid2op/Opponent/BaseActionBudget.py +++ b/grid2op/Opponent/BaseActionBudget.py @@ -12,6 +12,8 @@ class BaseActionBudget: """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This is the base class representing the action bugdet. It makes sure the opponent uses the correct type of "action", and compute the bugdet associated to it. @@ -21,9 +23,12 @@ def __init__(self, action_space): def __call__(self, attack): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This function takes an attack as input and compute the cost associated to it. **NB** The cost of a "None" attack is necessarily 0 ! + Parameters ---------- attack: :class:`ŋrid2op.BaseAction.BaseAction` diff --git a/grid2op/Opponent/OpponentSpace.py b/grid2op/Opponent/OpponentSpace.py index ed53b5302..9be3ac37d 100644 --- a/grid2op/Opponent/OpponentSpace.py +++ b/grid2op/Opponent/OpponentSpace.py @@ -10,6 +10,7 @@ class OpponentSpace(object): """ + Is similar to the action space, but for the opponent. Attributes ---------- diff --git a/grid2op/Opponent/RandomLineOpponent.py b/grid2op/Opponent/RandomLineOpponent.py index 82727e9f1..999373d26 100644 --- a/grid2op/Opponent/RandomLineOpponent.py +++ b/grid2op/Opponent/RandomLineOpponent.py @@ -13,6 +13,11 @@ class RandomLineOpponent(BaseOpponent): + """ + An opponent that disconnect at random any powerlines among a specified list given + at the initialization. + + """ def __init__(self, action_space): BaseOpponent.__init__(self, action_space) self._do_nothing = None @@ -23,6 +28,19 @@ def __init__(self, action_space): # it should have the exact same signature as here def init(self, lines_attacked=[], **kwargs): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Used when the opponent is created. + + Parameters + ---------- + lines_attacked + kwargs + + Returns + ------- + + """ # this if the function used to properly set the object. # It has the generic signature above, # and it's way more flexible that the other one. diff --git a/grid2op/Parameters.py b/grid2op/Parameters.py index 391c94039..3d9ae1339 100644 --- a/grid2op/Parameters.py +++ b/grid2op/Parameters.py @@ -155,7 +155,7 @@ def _isok_txt(arg): def init_from_dict(self, dict_): """ - Initialize the object given a dictionary. All keys are optional. If a key is not present in the dictionnary, + Initialize the object given a dictionary. All keys are optional. If a key is not present in the dictionary, the default parameters is used. Parameters @@ -263,7 +263,7 @@ def init_from_json(self, json_path): @staticmethod def from_json(json_path): """ - Create instance from a json path. + Create instance of a Parameters from a path where is a json is saved. Parameters ---------- diff --git a/grid2op/Plot/BasePlot.py b/grid2op/Plot/BasePlot.py index fdb6f2cc9..15cbf7f63 100644 --- a/grid2op/Plot/BasePlot.py +++ b/grid2op/Plot/BasePlot.py @@ -24,9 +24,12 @@ from grid2op.Exceptions import PlotError - class BasePlot(GridObjects): """ + .. warning:: /!\\\\ This module is deprecated /!\\\\ + + Prefer using the module `grid2op.PlotGrid + Utility class that allows to compute the position of the objects of the powergrid. Deriving from this class allows to perform the display of the powergrid. @@ -47,6 +50,8 @@ def __init__(self, load_prod_dist=70., bus_radius=6.): + warnings.warn("This whole class has been deprecated. Use `grid2op.PlotGrid module instead`", + category=DeprecationWarning) if substation_layout is None: if observation_space.grid_layout is None: # if no layout is provided, and observation_space has no layout, then it fails @@ -119,6 +124,10 @@ def __init__(self, def plot_layout(self, fig=None, reward=None, done=None, timestamp=None): """ + .. warning:: /!\\\\ This module is deprecated /!\\\\ + + Prefer using the module `grid2op.PlotGrid + This function plot the layout of the grid, as well as the object. You will see the name of each elements and their id. """ @@ -139,6 +148,10 @@ def plot_info(self, fig=None, line_info=None, load_info=None, gen_info=None, sub colormap=None, unit=None): """ + .. warning:: /!\\\\ This module is deprecated /!\\\\ + + Prefer using the module `grid2op.PlotGrid + Plot some information on the powergrid. For now, only numeric data are supported. Parameters @@ -231,6 +244,10 @@ def plot_obs(self, gen_info="p", colormap="line"): """ + .. warning:: /!\\\\ This module is deprecated /!\\\\ + + Prefer using the module `grid2op.PlotGrid + Plot the given observation in the given figure. For now it represents information about load and generator active values. @@ -332,6 +349,11 @@ def _get_gen_name(self, sub_id, g_id): def _compute_layout(self): """ + + .. warning:: /!\\\\ This module is deprecated /!\\\\ + + Prefer using the module `grid2op.PlotGrid + Compute the position of each of the objects. Parameters @@ -762,20 +784,6 @@ def init_fig(self, fig, reward, done, timestamp): pass - - - - - - - - - - - - - - ## DEPRECATED FUNCTIONS def plot_observation(self, observation, fig=None, line_info="rho", @@ -783,6 +791,10 @@ def plot_observation(self, observation, fig=None, gen_info="p"): """ + .. warning:: /!\\\\ This module is deprecated /!\\\\ + + Prefer using the module `grid2op.PlotGrid + Parameters ---------- observation: :class:`grid2op.Observation.Observation` @@ -822,6 +834,10 @@ def get_plot_observation(self, observation, fig=None, gen_info="p"): """ + .. warning:: /!\\\\ This module is deprecated /!\\\\ + + Prefer using the module `grid2op.PlotGrid + Parameters ---------- observation: :class:`grid2op.Observation.Observation` diff --git a/grid2op/Plot/EpisodeReplay.py b/grid2op/Plot/EpisodeReplay.py index cf54da7ec..dc42df34d 100644 --- a/grid2op/Plot/EpisodeReplay.py +++ b/grid2op/Plot/EpisodeReplay.py @@ -9,6 +9,7 @@ import sys import numpy as np from datetime import datetime +import warnings from grid2op.Episode import EpisodeData from grid2op.Exceptions import Grid2OpException @@ -34,6 +35,11 @@ class EpisodeReplay(object): """ + + .. warning:: /!\\\\ This class is deprecated /!\\\\ + + Prefer using the class `grid2op.Episode.EpisodeReplay` + This class allows to see visually what an agent has done during an episode. It uses for now the "PlotPygame" as the method to plot the different states of the system. It reads directly data from the runner. @@ -66,18 +72,24 @@ class EpisodeReplay(object): The last data of the episode inspected. """ def __init__(self, agent_path): + warnings.warn("This whole class has been deprecated. Use `grid2op.PlotGrid module instead`", + category=DeprecationWarning) + if not os.path.exists(agent_path): raise Grid2OpException("Nothing is found at \"{}\" where an agent path should have been.".format(agent_path)) self.agent_path = agent_path self.episode_data = None if not can_save_gif: - import warnings warnings.warn("The final video will not be saved as \"imageio\" and \"imageio_ffmpeg\" packages cannot be " "imported. Please try \"{} -m pip install imageio imageio-ffmpeg\"".format(sys.executable)) def replay_episode(self, episode_id, max_fps=10, video_name=None, display=True): """ + .. warning:: /!\\\\ This class is deprecated /!\\\\ + + Prefer using the class `grid2op.Episode.EpisodeReplay` + When called, this function will start the display of the episode in a "mini movie" format. Parameters diff --git a/grid2op/Plot/PlotMatplotlib.py b/grid2op/Plot/PlotMatplotlib.py index 54630b334..6965d2ae6 100644 --- a/grid2op/Plot/PlotMatplotlib.py +++ b/grid2op/Plot/PlotMatplotlib.py @@ -32,9 +32,11 @@ fig.show() """ +import warnings from grid2op.Exceptions import PlotError from grid2op.Plot.BasePlot import BasePlot + try: import matplotlib import matplotlib.pyplot as plt @@ -49,6 +51,11 @@ class PlotMatplotlib(BasePlot): """ + + .. warning:: /!\\\\ This class is deprecated /!\\\\ + + Prefer using the class `grid2op.PlotGrid.PlotMatplot` + This class aims at simplifying the representation of the grid using matplotlib graphical libraries. It can be used to inspect position of elements, or to project some static data on this plot. It can be usefull @@ -70,6 +77,10 @@ def __init__(self, radius_sub=radius_sub, load_prod_dist=load_prod_dist, bus_radius=bus_radius) + + warnings.warn("This whole class has been deprecated. Use `grid2op.PlotGrid.PlotMatplot` instead`", + category=DeprecationWarning) + if not can_plot: raise RuntimeError("Impossible to plot as matplotlib cannot be imported. Please install \"matplotlib\" " " with \"pip install --update matplotlib\"") @@ -204,16 +215,6 @@ def _draw_topos_one_sub(self, fig, sub_id, buses_z, elements, bus_vect): color=color, alpha=self.alpha_obj) return [] - - - - - - - - - - def _draw_powerlines____________(self, ax, texts=None, colormap=None): colormap_ = lambda x: self.col_line vals = [0. for _ in range(self.n_line)] diff --git a/grid2op/Plot/PlotPlotly.py b/grid2op/Plot/PlotPlotly.py index d0f200ccb..b6623c07c 100644 --- a/grid2op/Plot/PlotPlotly.py +++ b/grid2op/Plot/PlotPlotly.py @@ -64,6 +64,9 @@ # Some utilities to plot substation, lines or get the color id for the colormap. def draw_sub(pos, radius=50, line_color="LightSeaGreen"): """ + + .. warning:: /!\\\\ This class is deprecated /!\\\\ + This function will draw the contour of a unique substation. Parameters @@ -97,6 +100,8 @@ def draw_sub(pos, radius=50, line_color="LightSeaGreen"): def get_col(rho): """ + .. warning:: /!\\\\ This class is deprecated /!\\\\ + Get the index (in the color palette) of the current capacity usage. Parameters @@ -125,6 +130,9 @@ def get_col(rho): def draw_line(pos_sub_or, pos_sub_ex, rho, color_palette, status, line_color="gray"): """ + + .. warning:: /!\\\\ This class is deprecated /!\\\\ + Draw a powerline with the color depending on its line capacity usage. Parameters @@ -172,6 +180,11 @@ def draw_line(pos_sub_or, pos_sub_ex, rho, color_palette, status, line_color="gr class PlotPlotly(BasePlot): """ + + .. warning:: /!\\\\ This class is deprecated /!\\\\ + + Prefer using the class `grid2op.PlotGrid.PlotPlotly` + This class aims at simplifying the representation of an observation as a plotly object given a layout of a given powergrid substation. It "automatically" handles the positionning of the powerlines, loads and generators based on that. @@ -328,6 +341,10 @@ def _draw_subs_one_sub(self, fig, sub_id, center, this_col, txt_): def _draw_powerlines_one_powerline(self, fig, l_id, pos_or, pos_ex, status, value, txt_, or_to_ex, this_col): """ + .. warning:: /!\\\\ This class is deprecated /!\\\\ + + Prefer using the class `grid2op.PlotGrid.PlotPlotly` + Draw the powerline, between two substations. Parameters diff --git a/grid2op/Plot/PlotPyGame.py b/grid2op/Plot/PlotPyGame.py index 8ed648d62..cdcfcdac6 100644 --- a/grid2op/Plot/PlotPyGame.py +++ b/grid2op/Plot/PlotPyGame.py @@ -124,6 +124,9 @@ def _draw_arrow(surf, color, start_pos, end_pos, positive_flow, width=1, num_arr class PlotPyGame(BasePlot): """ + + .. warning:: /!\\\\ This class is deprecated /!\\\\ + This renderer should be used only for "online" representation of a powergrid. """ @@ -215,6 +218,8 @@ def __init__(self, def change_duration_timestep_display(self, new_timestep_duration_seconds): """ + .. warning:: /!\\\\ This class is deprecated /!\\\\ + Change the duration on which the screen is displayed. """ self.timestep_duration_seconds = new_timestep_duration_seconds @@ -229,7 +234,7 @@ def init_pygame(self): def reset(self, env): """ - Reset the runner in a consistent state, equivalent to a state where it has not run at all. + .. warning:: /!\\\\ This class is deprecated /!\\\\ Parameters ---------- @@ -282,6 +287,9 @@ def _event_looper(self, force=False): def _press_key_to_quit(self): """ + + .. warning:: /!\\\\ This class is deprecated /!\\\\ + This utility function waits for the player to press a key to exit the renderer (called when the episode is done) Returns @@ -364,6 +372,9 @@ def deactivate_display(self): def get_rgb(self, obs, reward=None, done=None, timestamp=None): """ + + .. warning:: /!\\\\ This class is deprecated /!\\\\ + Computes and returns the rgb 3d array from an observation, and potentially other informations. Parameters @@ -432,6 +443,9 @@ def init_fig(self, fig, reward, done, timestamp): def _post_process_obs(self, fig, reward, done, timestamp, subs, lines, loads, gens, topos): """ + + .. warning:: /!\\\\ This class is deprecated /!\\\\ + In canse of plotply, fig is whether the player press "quit" or not Parameters diff --git a/grid2op/Plot/Plotting.py b/grid2op/Plot/Plotting.py index 75dd76970..ea16ce3cf 100644 --- a/grid2op/Plot/Plotting.py +++ b/grid2op/Plot/Plotting.py @@ -15,6 +15,11 @@ class Plotting: + """ + + .. warning:: /!\\\\ This class is deprecated /!\\\\ + + """ allwed_display_mod = {"pygame": PlotPyGame, "plotly": PlotPlotly, "matplotlib": PlotMatplotlib} diff --git a/grid2op/PlotGrid/BasePlot.py b/grid2op/PlotGrid/BasePlot.py index a96783b84..208c0d1c7 100644 --- a/grid2op/PlotGrid/BasePlot.py +++ b/grid2op/PlotGrid/BasePlot.py @@ -16,8 +16,11 @@ from grid2op.PlotGrid.PlotUtil import PlotUtil as pltu from grid2op.dtypes import dt_float, dt_int + class BasePlot(ABC): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Abstract interface to plot the state of the powergrid Implement the interface with a plotting library to generate drawings diff --git a/grid2op/PlotGrid/LayoutUtil.py b/grid2op/PlotGrid/LayoutUtil.py index 0e35bc34c..0562e50d6 100644 --- a/grid2op/PlotGrid/LayoutUtil.py +++ b/grid2op/PlotGrid/LayoutUtil.py @@ -50,6 +50,7 @@ def layout_obs_sub_only(obs, scale=1000.0): return improved_layout + def layout_obs_sub_load_and_gen(obs, scale=1000.0, use_initial=False): # Create a graph of substations vertices G = nx.Graph() diff --git a/grid2op/PlotGrid/PlotMatplot.py b/grid2op/PlotGrid/PlotMatplot.py index d9dccd229..b621055db 100644 --- a/grid2op/PlotGrid/PlotMatplot.py +++ b/grid2op/PlotGrid/PlotMatplot.py @@ -19,6 +19,8 @@ class PlotMatplot(BasePlot): """ + This class uses the python library "matplotlib" to draw the powergrid. + Attributes ---------- @@ -78,6 +80,25 @@ class PlotMatplot(BasePlot): Length of the arrow on the powerlines _line_arrow_width: ``int`` Width of the arrow on the powerlines + + Examples + -------- + You can use it this way: + + .. code-block:: python + + import grid2op + from grid2op.PlotGrid import PlotMatplot + env = grid2op.make() + plot_helper = PlotMatplot(env.observation_space) + + # and now plot an observation (for example) + obs = env.reset() + fig = plot_helper.plot_obs(obs) + fig.show() + + # more information about it on the `getting_started/8_PlottingCapabilities.ipynb` notebook of grid2op + """ def __init__(self, @@ -92,7 +113,7 @@ def __init__(self, load_radius=8, load_name=False, load_id=False, - gen_radius = 8, + gen_radius=8, gen_name=False, gen_id=False, line_name=False, diff --git a/grid2op/PlotGrid/PlotPlotly.py b/grid2op/PlotGrid/PlotPlotly.py index 25f673b7c..3336e6c4b 100644 --- a/grid2op/PlotGrid/PlotPlotly.py +++ b/grid2op/PlotGrid/PlotPlotly.py @@ -20,6 +20,30 @@ class PlotPlotly(BasePlot): + """ + + This class uses the python library "plotly" to draw the powergrid. Plotly has the ability to generate + interactive graphs. + + Examples + -------- + You can use it this way: + + .. code-block:: python + + import grid2op + from grid2op.PlotGrid import PlotPlotly + env = grid2op.make() + plot_helper = PlotPlotly(env.observation_space) + + # and now plot an observation (for example) + obs = env.reset() + fig = plot_helper.plot_obs(obs) + fig.show() + + # more information about it on the `getting_started/8_PlottingCapabilities.ipynb` notebook of grid2op + + """ def __init__(self, observation_space, width=1280, diff --git a/grid2op/PlotGrid/PlotUtil.py b/grid2op/PlotGrid/PlotUtil.py index f0117a4d4..37232908f 100644 --- a/grid2op/PlotGrid/PlotUtil.py +++ b/grid2op/PlotGrid/PlotUtil.py @@ -10,6 +10,11 @@ class PlotUtil: + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is a wrapper that contains utilities to draw the information on the plots more easily. + """ @staticmethod def format_value_unit(value, unit): if isinstance(value, float): diff --git a/grid2op/PlotGrid/__init__.py b/grid2op/PlotGrid/__init__.py index d0d9914cc..bea8bab2b 100644 --- a/grid2op/PlotGrid/__init__.py +++ b/grid2op/PlotGrid/__init__.py @@ -4,14 +4,14 @@ from grid2op.PlotGrid.BasePlot import BasePlot -# Contionnal exports for optional dependencies +# Conditional exports for optional dependencies try: from grid2op.PlotGrid.PlotMatplot import PlotMatplot __all__.append("PlotMatplot") except ImportError: - pass # Silent fail because it is optional + pass # Silent fail because it is optional try: from grid2op.PlotGrid.PlotPlotly import PlotPlotly __all__.append("PlotPlotly") except ImportError: - pass # Silent fail because it is optional + pass # Silent fail because it is optional diff --git a/grid2op/Reward/BaseReward.py b/grid2op/Reward/BaseReward.py index f820b3f42..17955cc75 100644 --- a/grid2op/Reward/BaseReward.py +++ b/grid2op/Reward/BaseReward.py @@ -18,6 +18,10 @@ class BaseReward(ABC): One of the goal of Reinforcement Learning is to maximize the (discounted) sum of (expected) rewards over time. + + You can create all rewards you want in grid2op. The only requirement is that all rewards should inherit this + BaseReward. + Attributes ---------- reward_min: ``float`` @@ -30,6 +34,73 @@ class BaseReward(ABC): :class:`grid2op.Action.BaseAction` in the best possible scenario. + Examples + --------- + If you want the environment to compute a reward that is the sum of the flow (this is not a good reward, but + we use it as an example on how to do it) you can achieve it with: + + .. code-block: + + import grid2op + from grid2op.Reward import BaseReward + + # first you create your reward + class SumOfFlowReward(BaseReward): + def __init__(self): + BaseReward.__init__(self) + + def initialize(self, env): + # this function is used to inform the class instance about the environment specification + # you can use `env.n_line` or `env.n_load` or `env.get_thermal_limit()` for example + # do not forget to initialize "reward_min" and "reward_max" + self.reward_min = 0. + self.reward_max = np.sum(env.get_thermal_limit) + + # in this case the maximum reward is obtained when i compute the sum of the maximum flows + # on each powerline + + def __call__(action, env, has_error, is_done, is_illegal, is_ambiguous): + # this method is called at the end of 'env.step' to compute the reward + # in our case we just want to sum the flow on each powerline because... why not... + if has_error: + # see the "Notes" paragraph for more information + res = self.reward_min + else: + res = np.sum(env.get_obs().a_or) + return res + + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=SumOfFlowReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + assert np.sum(obs.a_or) == reward + # the above should be true + + Notes + ------ + If the flag `has_error` is set to ``True`` this indicates there has been an error in the "env.step" function. + This might induce some undefined behaviour if using some method of the environment. + + Please make sure to check whether or not this is the case when defining your reward. + + This "new" behaviour has been introduce to "fix" the akward behavior spotted in + # https://github.com/rte-france/Grid2Op/issues/146 + + .. code-block:: python + + def __call__(action, env, has_error, is_done, is_illegal, is_ambiguous): + if has_error: + # DO SOMETHING IN THIS CASE + res = self.reward_min + else: + # DO NOT USE `env.get_obs()` (nor any method of the environment `env.XXX` if the flag `has_error` + # is set to ``True`` + # This might result in undefined behaviour + res = np.sum(env.get_obs().a_or) + return res + """ @abstractmethod def __init__(self): @@ -43,7 +114,7 @@ def __init__(self): def initialize(self, env): """ If :attr:`BaseReward.reward_min`, :attr:`BaseReward.reward_max` or other custom attributes require to have a - valid :class:`grid2op.Environement.Environment` to be initialized, this should be done in this method. + valid :class:`grid2op.Environment.Environment` to be initialized, this should be done in this method. **NB** reward_min and reward_max are used by the environment to compute the maximum and minimum reward and cast it in "reward_range" which is part of the openAI gym public interface. If you don't define them, some @@ -54,10 +125,6 @@ def initialize(self, env): env: :class:`grid2op.Environment.Environment` An environment instance properly initialized. - Returns - ------- - ``None`` - """ pass @@ -96,6 +163,17 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): res: ``float`` The reward associated to the input parameters. + Notes + ------ + All the flags can be used to know on which type of situation the reward is computed. + + For example, if `has_error` is ``True`` it means there was an error during the computation of the powerflow. + this means there is a "game_over", so ``is_done`` is ``True`` in this case. + + But, if there is ``is_done`` equal to ``True`` but ``has_error`` equal to ``False`` this means that the episode + is over without any error. In other word, your agent sucessfully managed all the scenario and to get to the + end of the episode. + """ pass diff --git a/grid2op/Reward/BridgeReward.py b/grid2op/Reward/BridgeReward.py index 66549f38b..58a007afe 100644 --- a/grid2op/Reward/BridgeReward.py +++ b/grid2op/Reward/BridgeReward.py @@ -15,8 +15,26 @@ class BridgeReward(BaseReward): """ - This reward computes a penalty based on how many bridges are present in the grid netwrok. + This reward computes a penalty based on how many bridges are present in the grid network. In graph theory, a bridge is an edge that if removed will cause the graph to be disconnected. + + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import BridgeReward + + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=BridgeReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is computed with this class (computing the penalty based on the number of "bridges" in the grid) + """ def __init__(self, min_pen_lte=0.0, max_pen_gte=1.0): BaseReward.__init__(self) diff --git a/grid2op/Reward/CloseToOverflowReward.py b/grid2op/Reward/CloseToOverflowReward.py index cd8ce5331..1009967a0 100644 --- a/grid2op/Reward/CloseToOverflowReward.py +++ b/grid2op/Reward/CloseToOverflowReward.py @@ -9,11 +9,30 @@ from grid2op.Reward.BaseReward import BaseReward from grid2op.dtypes import dt_float + class CloseToOverflowReward(BaseReward): """ This reward finds all lines close to overflowing. Returns max reward when there is no overflow, min reward if more than one line is close to overflow and the mean between max and min reward if one line is close to overflow + + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import CloseToOverflowReward + + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=CloseToOverflowReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is computed with this class (computing the penalty based on the number of overflow) + """ def __init__(self, max_lines=5): BaseReward.__init__(self) diff --git a/grid2op/Reward/ConstantReward.py b/grid2op/Reward/ConstantReward.py index 08225da2e..b82ebc0c9 100644 --- a/grid2op/Reward/ConstantReward.py +++ b/grid2op/Reward/ConstantReward.py @@ -9,12 +9,31 @@ from grid2op.Reward.BaseReward import BaseReward from grid2op.dtypes import dt_float + class ConstantReward(BaseReward): """ - Most basic implementation of reward: everything has the same values. + Most basic implementation of reward: everything has the same values: 0.0 + + Note that this :class:`BaseReward` subtype is not useful at all, whether to train an :attr:`BaseAgent` + nor to assess its performance of course. + + + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import ConstantReward - Note that this :class:`BaseReward` subtype is not usefull at all, whether to train an :attr:`BaseAgent` nor to assess its - performance of course. + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=ConstantReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is 0., always... Not really useful """ def __init__(self): diff --git a/grid2op/Reward/DistanceReward.py b/grid2op/Reward/DistanceReward.py index 39e8e37c1..976aeb510 100644 --- a/grid2op/Reward/DistanceReward.py +++ b/grid2op/Reward/DistanceReward.py @@ -10,9 +10,29 @@ from grid2op.Reward.BaseReward import BaseReward from grid2op.dtypes import dt_float + class DistanceReward(BaseReward): """ - This reward computes a penalty based on the distance of the current grid to the grid at time 0. + This reward computes a penalty based on the distance of the current grid to the grid at time 0 where + everything is connected to bus 1. + + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import DistanceReward + + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=DistanceReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is computed with the DistanceReward class + """ def __init__(self): BaseReward.__init__(self) @@ -25,7 +45,7 @@ def __call__(self, action, env, has_error, return self.reward_min # Get topo from env - obs = env.current_obs + obs = env.get_obs() topo = obs.topo_vect idx = 0 diff --git a/grid2op/Reward/EconomicReward.py b/grid2op/Reward/EconomicReward.py index 09a2c70d6..eecde2f3d 100644 --- a/grid2op/Reward/EconomicReward.py +++ b/grid2op/Reward/EconomicReward.py @@ -12,6 +12,7 @@ from grid2op.Reward.BaseReward import BaseReward from grid2op.dtypes import dt_float + class EconomicReward(BaseReward): """ This reward computes the marginal cost of the powergrid. As RL is about maximising a reward, while we want to @@ -20,6 +21,23 @@ class EconomicReward(BaseReward): - the reward is positive if there is no game over, no error etc. - the reward is inversely proportional to the cost of the grid (the higher the reward, the lower the economic cost). + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import EconomicReward + + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=EconomicReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is computed with the EconomicReward class + """ def __init__(self): BaseReward.__init__(self) @@ -38,7 +56,7 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): res = self.reward_min else: # compute the cost of the grid - res = dt_float(np.sum(env.current_obs.prod_p * env.gen_cost_per_MW)) + res = dt_float(np.sum(env.get_obs().prod_p * env.gen_cost_per_MW)) # we want to minimize the cost by maximizing the reward so let's take the opposite res *= dt_float(-1.0) # to be sure it's positive, add the highest possible cost diff --git a/grid2op/Reward/FlatReward.py b/grid2op/Reward/FlatReward.py index 95566cb5e..544f07dd7 100644 --- a/grid2op/Reward/FlatReward.py +++ b/grid2op/Reward/FlatReward.py @@ -9,10 +9,28 @@ from grid2op.Reward.BaseReward import BaseReward from grid2op.dtypes import dt_float + class FlatReward(BaseReward): """ This reward return a fixed number (if there are not error) or 0 if there is an error. + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import FlatReward + + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=FlatReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is computed with the FlatReward class + """ def __init__(self, per_timestep=1): BaseReward.__init__(self) diff --git a/grid2op/Reward/GameplayReward.py b/grid2op/Reward/GameplayReward.py index ae49043a7..a2b12aa8c 100644 --- a/grid2op/Reward/GameplayReward.py +++ b/grid2op/Reward/GameplayReward.py @@ -9,12 +9,31 @@ from grid2op.Reward.BaseReward import BaseReward from grid2op.dtypes import dt_float + class GameplayReward(BaseReward): """ This rewards is strictly computed based on the Game status. It yields a negative reward in case of game over. - A half negative reward on rules infringment. + A half negative reward on rules infringement. Otherwise the reward is positive. + + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import GameplayReward + + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=GameplayReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is computed with the GameplayReward class + """ def __init__(self): BaseReward.__init__(self) diff --git a/grid2op/Reward/IncreasingFlatReward.py b/grid2op/Reward/IncreasingFlatReward.py index 759649c7c..6fd17e1ac 100644 --- a/grid2op/Reward/IncreasingFlatReward.py +++ b/grid2op/Reward/IncreasingFlatReward.py @@ -10,11 +10,29 @@ from grid2op.Reward.BaseReward import BaseReward from grid2op.dtypes import dt_float + class IncreasingFlatReward(BaseReward): """ - This reward just counts the number of timestep the agent has sucessfully manage to perform. + This reward just counts the number of timestep the agent has successfully manage to perform. + + It adds a constant reward for each time step successfully handled. + + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import IncreasingFlatReward - It adds a constant reward for each time step sucessfully handled. + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=IncreasingFlatReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is computed with the IncreasingFlatReward class """ def __init__(self, per_timestep=1): @@ -31,7 +49,7 @@ def initialize(self, env): def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): if not has_error: - res = dt_float(env.nb_time_step * self.per_timestep) + res = dt_float(env._nb_time_step * self.per_timestep) else: res = self.reward_min return res diff --git a/grid2op/Reward/L2RPNReward.py b/grid2op/Reward/L2RPNReward.py index 678c456a6..1fcf9e850 100644 --- a/grid2op/Reward/L2RPNReward.py +++ b/grid2op/Reward/L2RPNReward.py @@ -13,10 +13,36 @@ class L2RPNReward(BaseReward): """ - This is the historical :class:`BaseReward` used for the Learning To Run a Power Network competition. + This is the historical :class:`BaseReward` used for the Learning To Run a Power Network competition on WCCI 2019 See `L2RPN `_ for more information. + This rewards makes the sum of the "squared margin" on each powerline. + + The margin is defined, for each powerline as: + `margin of a powerline = (thermal limit - flow in amps) / thermal limit` + (if flow in amps <= thermal limit) else `margin of a powerline = 0.` + + This rewards is then: `sum (margin of this powerline) ^ 2`, for each powerline. + + + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import L2RPNReward + + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=L2RPNReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is computed with the L2RPNReward class + """ def __init__(self): BaseReward.__init__(self) diff --git a/grid2op/Reward/L2RPNSandBoxScore.py b/grid2op/Reward/L2RPNSandBoxScore.py index 73b92f05a..dea60a27e 100644 --- a/grid2op/Reward/L2RPNSandBoxScore.py +++ b/grid2op/Reward/L2RPNSandBoxScore.py @@ -14,10 +14,15 @@ class L2RPNSandBoxScore(BaseReward): """ - This score represent the L2RPN score. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + It **must not** serve as a reward. This scored needs to be minimized, + and a reward needs to be maximized! Also, this "reward" is not scaled or anything. Use it as your + own risk. - It **must not** serve as a reward, as the aim of L2RPN competition is to minimize the score, and a reward - needs to be maximize! Also, this "reward" is not scaled or anything. Use it as your own risk + Implemented as a reward to make it easier to use in the context of the L2RPN competitions, this "reward" + computed the "grid operation cost". It should not be used to train an agent. + + The "reward" the closest to this score is given by the :class:`RedispReward` class. """ def __init__(self, alpha_redisph=1.0): @@ -27,16 +32,22 @@ def __init__(self, alpha_redisph=1.0): self.alpha_redisph = dt_float(alpha_redisph) def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): + if has_error: + # DO SOMETHING IN THIS CASE + return self.reward_min + # compute the losses gen_p, *_ = env.backend.generators_info() load_p, *_ = env.backend.loads_info() losses = np.sum(gen_p, dtype=dt_float) - np.sum(load_p, dtype=dt_float) # compute the marginal cost - p_t = np.max(env.gen_cost_per_MW[env.gen_activeprod_t > 0.]).astype(dt_float) + gen_activeprod_t = env._gen_activeprod_t + p_t = np.max(env.gen_cost_per_MW[gen_activeprod_t > 0.]).astype(dt_float) # redispatching amount - c_redispatching = dt_float(2.0) * self.alpha_redisph * np.sum(np.abs(env.actual_dispatch)) * p_t + actual_dispatch = env._actual_dispatch + c_redispatching = dt_float(2.0) * self.alpha_redisph * np.sum(np.abs(actual_dispatch)) * p_t # cost of losses c_loss = losses * p_t diff --git a/grid2op/Reward/LinesCapacityReward.py b/grid2op/Reward/LinesCapacityReward.py index ce92ef0cc..67685a46f 100644 --- a/grid2op/Reward/LinesCapacityReward.py +++ b/grid2op/Reward/LinesCapacityReward.py @@ -10,6 +10,7 @@ from grid2op.Reward.BaseReward import BaseReward from grid2op.dtypes import dt_float + class LinesCapacityReward(BaseReward): """ Reward based on lines capacity usage @@ -19,6 +20,24 @@ class LinesCapacityReward(BaseReward): Compared to `:class:L2RPNReward`: This reward is linear (instead of quadratic) and only considers connected lines capacities + + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import LinesCapacityReward + + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=LinesCapacityReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is computed with the LinesCapacityReward class + """ def __init__(self): BaseReward.__init__(self) @@ -33,7 +52,7 @@ def __call__(self, action, env, has_error, if has_error or is_illegal or is_ambiguous: return self.reward_min - obs = env.current_obs + obs = env.get_obs() n_connected = np.sum(obs.line_status.astype(dt_float)) usage = np.sum(obs.rho[obs.line_status == True]) usage = np.clip(usage, 0.0, float(n_connected)) diff --git a/grid2op/Reward/LinesReconnectedReward.py b/grid2op/Reward/LinesReconnectedReward.py index 22c67eacf..809f8aa1e 100644 --- a/grid2op/Reward/LinesReconnectedReward.py +++ b/grid2op/Reward/LinesReconnectedReward.py @@ -11,10 +11,30 @@ from grid2op.Reward.BaseReward import BaseReward from grid2op.dtypes import dt_float + class LinesReconnectedReward(BaseReward): """ This reward computes a penalty - based on the number of off cooldown disconnected lines + based on the number of powerline that could have been reconnected (cooldown at 0.) but + are still disconnected. + + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import LinesReconnectedReward + + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=LinesReconnectedReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is computed with the LinesReconnectedReward class + """ def __init__(self): BaseReward.__init__(self) @@ -28,7 +48,7 @@ def __call__(self, action, env, has_error, return self.reward_min # Get obs from env - obs = env.current_obs + obs = env.get_obs() # All lines ids lines_id = np.arange(env.n_line) diff --git a/grid2op/Reward/RedispReward.py b/grid2op/Reward/RedispReward.py index a17ac8b3c..3e93a0aaa 100644 --- a/grid2op/Reward/RedispReward.py +++ b/grid2op/Reward/RedispReward.py @@ -12,10 +12,33 @@ from grid2op.Reward.BaseReward import BaseReward from grid2op.dtypes import dt_float + class RedispReward(BaseReward): """ - This reward can be used for environments where redispatching is availble. It assigns a cost to redispatching action + This reward can be used for environments where redispatching is available. It assigns a cost to redispatching action and penalizes with the losses. + + This is the closest reward to the score used for the l2RPN competitions. + + Examples + --------- + You can use this reward in any environment with: + + .. code-block: + + import grid2op + from grid2op.Reward import RedispReward + + # then you create your environment with it: + NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic" + env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=RedispReward) + # and do a step with a "do nothing" action + obs = env.reset() + obs, reward, done, info = env.step(env.action_space()) + # the reward is computed with the RedispReward class + + # NB this is the default reward of many environments in the grid2op framework + """ def __init__(self, alpha_redisph=5.0): BaseReward.__init__(self) @@ -52,10 +75,12 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): losses = np.sum(gen_p) - np.sum(load_p) # compute the marginal cost - marginal_cost = np.max(env.gen_cost_per_MW[env.gen_activeprod_t > 0.]) + gen_activeprod_t = env._gen_activeprod_t + marginal_cost = np.max(env.gen_cost_per_MW[gen_activeprod_t > 0.]) # redispatching amount - redisp_cost = self.alpha_redisph * np.sum(np.abs(env.actual_dispatch)) * marginal_cost + actual_dispatch = env._actual_dispatch + redisp_cost = self.alpha_redisph * np.sum(np.abs(actual_dispatch)) * marginal_cost # cost of losses losses_cost = losses * marginal_cost diff --git a/grid2op/Reward/RewardHelper.py b/grid2op/Reward/RewardHelper.py index 1a62690fa..3347f2b14 100644 --- a/grid2op/Reward/RewardHelper.py +++ b/grid2op/Reward/RewardHelper.py @@ -9,8 +9,13 @@ from grid2op.Reward.BaseReward import BaseReward from grid2op.Reward.ConstantReward import ConstantReward + class RewardHelper: """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + It is a class internal to the :class:`grid2op.Environment.Environment` do not use outside + of its purpose and do not attempt to modify it. + This class aims at making the creation of rewards class more automatic by the :class:`grid2op.Environment`. It is not recommended to derived or modified this class. If a different reward need to be used, it is recommended @@ -59,7 +64,6 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): Gives the reward that follows the execution of the :class:`grid2op.BaseAction.BaseAction` action in the :class:`grid2op.Environment.Environment` env; - Parameters ---------- action: :class:`grid2op.Action.Action` @@ -85,11 +89,9 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): Returns ------- + res: ``float`` + The computed reward """ - if not has_error: - res = self.template_reward(action, env, has_error, is_done, is_illegal, is_ambiguous) - else: - # no more data to consider, no powerflow has been run, reward is what it is - res = self.template_reward.reward_min + res = self.template_reward(action, env, has_error, is_done, is_illegal, is_ambiguous) return res diff --git a/grid2op/Rules/PreventReconnection.py b/grid2op/Rules/PreventReconnection.py index c7337ecf4..9c655f2c2 100644 --- a/grid2op/Rules/PreventReconnection.py +++ b/grid2op/Rules/PreventReconnection.py @@ -31,20 +31,18 @@ def __call__(self, action, env): # at first iteration, env.current_obs is None... # TODO this is used inside the environment (for step) inside LookParam and here # this could be computed only once, and fed to this instead - if env.current_obs is not None: - powerline_status = env.current_obs.line_status - else: - powerline_status = None + powerline_status = env.get_current_line_status() + aff_lines, aff_subs = action.get_topological_impact(powerline_status) - if np.any(env.times_before_line_status_actionable[aff_lines] > 0): + if np.any(env._times_before_line_status_actionable[aff_lines] > 0): # i tried to act on a powerline too shortly after a previous action # or shut down due to an overflow or opponent or hazards or maintenance - ids = np.logical_and(env.times_before_line_status_actionable[aff_lines] > 0, aff_lines).nonzero()[0] + ids = np.where((env._times_before_line_status_actionable > 0) & aff_lines)[0] return False, IllegalAction("Powerline with ids {} have been modified illegally (cooldown)".format(ids)) - if np.any(env.times_before_topology_actionable[aff_subs] > 0): + if np.any(env._times_before_topology_actionable[aff_subs] > 0): # I tried to act on a topology too shortly after a previous action - ids = np.logical_and(env.times_before_topology_actionable[aff_subs] > 0, aff_subs).nonzero()[0] + ids = np.where((env._times_before_topology_actionable > 0) & aff_subs)[0] return False, IllegalAction("Substation with ids {} have been modified illegally (cooldown)".format(ids)) return True, None diff --git a/grid2op/Runner/Runner.py b/grid2op/Runner/Runner.py index f8cc3096d..33bcf5025 100644 --- a/grid2op/Runner/Runner.py +++ b/grid2op/Runner/Runner.py @@ -44,8 +44,11 @@ # TODO use gym logger if specified by the user. # TODO: if chronics are "loop through" multiple times, only last results are saved. :-/ + class DoNothingLog: """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + A class to emulate the behaviour of a logger, but that does absolutely nothing. """ INFO = 2 @@ -70,6 +73,8 @@ def warning(self, *args, **kwargs): class ConsoleLog(DoNothingLog): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + A class to emulate the behaviour of a logger, but that prints on the console """ @@ -107,10 +112,42 @@ def warning(self, *args, **kwargs): class Runner(object): """ - A runner is a utilitary tool that allows to create environment, and run simulations more easily. - This specific class as for main purpose to evaluate the performance of a trained :class:`grid2op.BaseAgent`, rather - than to train it. Of course, it is possible to adapt it for a specific training mechanisms. Examples of such - will be made available in the future. + A runner is a utilitary tool that allows to run simulations more easily. It is a more convenient way to execute the following loops: + + .. code-block:: python + + import grid2op + from grid2op.Agent import RandomAgent # for example... + from grid2op.Runner import Runner + + env = grid2op.make() + + ############### + # the gym loops + nb_episode = 5 + for i in range(nb_episode): + obs = env.reset() + done = False + reward = env.reward_range[0] + while not done: + act = agent.act(obs, reward, done) + obs, reward, done, info = env.step(act) + + ############### + # equivalent with use of a Runner + runner = Runner(**env.get_params_for_runner(), agentClass=RandomAgent) + res = runner.run(nb_episode=nn_episode) + + + This specific class as for main purpose to evaluate the performance of a trained :class:`grid2op.BaseAgent`, + rather than to train it. + + It has also the good property to be able to save the results of a experiment in a standardized + manner described in the :class:`grid2op.Episode.EpisodeData`. + + **NB** we do not recommend to create a runner from scratch by providing all the arguments. We strongly + encourage you to use the :func:`grid2op.Environment.Environment.get_params_for_runner` for + creating a runner. Attributes ---------- @@ -222,6 +259,11 @@ class Runner(object): grid_layout: ``dict``, optional The layout of the grid (position of each substation) usefull if you need to plot some things for example. + + Examples + -------- + Different examples are showed in the description of the main method :func:`Runner.run` + """ def __init__(self, @@ -507,7 +549,6 @@ def _new_env(self, chronics_handler, backend, parameters): legalActClass=self.legalActClass, voltagecontrolerClass=self.voltageControlerClass, other_rewards=self._other_rewards, - opponent_action_class=self.opponent_action_class, opponent_class=self.opponent_class, opponent_init_budget=self.opponent_init_budget, @@ -516,9 +557,7 @@ def _new_env(self, chronics_handler, backend, parameters): opponent_attack_duration=self.opponent_attack_duration, opponent_attack_cooldown=self.opponent_attack_cooldown, kwargs_opponent=self.opponent_kwargs, - with_forecast=self.with_forecast, - _raw_backend_class=self.backendClass ) @@ -529,7 +568,7 @@ def _new_env(self, chronics_handler, backend, parameters): res.attach_layout(self.grid_layout) if self._useclass: - agent = self.agentClass(res.helper_action_player) + agent = self.agentClass(res.action_space) else: if self.__can_copy_agent: agent = copy.copy(self.agent) @@ -539,25 +578,19 @@ def _new_env(self, chronics_handler, backend, parameters): def init_env(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Function used to initialized the environment and the agent. It is called by :func:`Runner.reset`. - - Returns - ------- - ``None`` - """ self.env, self.agent = self._new_env(self.chronics_handler, self.backend, self.parameters) def reset(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Used to reset an environment. This method is called at the beginning of each new episode. If the environment is not initialized, then it initializes it with :func:`Runner.make_env`. - - Returns - ------- - ``None`` - """ if self.env is None: self.init_env() @@ -566,6 +599,8 @@ def reset(self): def run_one_episode(self, indx=0, path_save=None, pbar=False, env_seed=None, max_iter=None, agent_seed=None): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Function used to run one episode of the :attr:`Runner.agent` and see how it performs in the :attr:`Runner.env`. Parameters @@ -633,27 +668,27 @@ def _run_one_episode(env, agent, logger, indx, path_save=None, (1, env.backend.n_line), fill_value=False, dtype=dt_bool) attack_templ = np.full( - (1, env.oppSpace.action_space.size()), fill_value=0., dtype=dt_float) + (1, env._oppSpace.action_space.size()), fill_value=0., dtype=dt_float) if efficient_storing: times = np.full(nb_timestep_max, fill_value=np.NaN, dtype=dt_float) rewards = np.full(nb_timestep_max, fill_value=np.NaN, dtype=dt_float) actions = np.full((nb_timestep_max, env.action_space.n), fill_value=np.NaN, dtype=dt_float) env_actions = np.full( - (nb_timestep_max, env.helper_action_env.n), fill_value=np.NaN, dtype=dt_float) + (nb_timestep_max, env._helper_action_env.n), fill_value=np.NaN, dtype=dt_float) observations = np.full( (nb_timestep_max+1, env.observation_space.n), fill_value=np.NaN, dtype=dt_float) disc_lines = np.full( (nb_timestep_max, env.backend.n_line), fill_value=np.NaN, dtype=dt_bool) - attack = np.full((nb_timestep_max, env.opponent_action_space.n), fill_value=0., dtype=dt_float) + attack = np.full((nb_timestep_max, env._opponent_action_space.n), fill_value=0., dtype=dt_float) else: times = np.full(0, fill_value=np.NaN, dtype=dt_float) rewards = np.full(0, fill_value=np.NaN, dtype=dt_float) actions = np.full((0, env.action_space.n), fill_value=np.NaN, dtype=dt_float) - env_actions = np.full((0, env.helper_action_env.n), fill_value=np.NaN, dtype=dt_float) + env_actions = np.full((0, env._helper_action_env.n), fill_value=np.NaN, dtype=dt_float) observations = np.full((0, env.observation_space.n), fill_value=np.NaN, dtype=dt_float) disc_lines = np.full((0, env.backend.n_line), fill_value=np.NaN, dtype=dt_bool) - attack = np.full((0, env.opponent_action_space.n), fill_value=0., dtype=dt_float) + attack = np.full((0, env._opponent_action_space.n), fill_value=0., dtype=dt_float) if path_save is not None: # store observation at timestep 0 @@ -670,12 +705,12 @@ def _run_one_episode(env, agent, logger, indx, path_save=None, times=times, observation_space=env.observation_space, action_space=env.action_space, - helper_action_env=env.helper_action_env, + helper_action_env=env._helper_action_env, path_save=path_save, disc_lines_templ=disc_lines_templ, attack_templ=attack_templ, attack=attack, - attack_space=env.opponent_action_space, + attack_space=env._opponent_action_space, logger=logger, name=env.chronics_handler.get_name(), other_rewards=[]) @@ -699,9 +734,9 @@ def _run_one_episode(env, agent, logger, indx, path_save=None, cum_reward += reward time_step += 1 pbar_.update(1) - opp_attack = env.oppSpace.last_attack + opp_attack = env._oppSpace.last_attack episode.incr_store(efficient_storing, time_step, end__ - beg__, - float(reward), env.env_modification, + float(reward), env._env_modification, act, obs, opp_attack, info) end_ = time.time() @@ -727,6 +762,8 @@ def _run_one_episode(env, agent, logger, indx, path_save=None, @staticmethod def _make_progress_bar(pbar, total, next_pbar): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Parameters ---------- pbar: ``bool`` or ``type`` or ``object`` @@ -761,6 +798,8 @@ def _make_progress_bar(pbar, total, next_pbar): def _run_sequential(self, nb_episode, path_save=None, pbar=False, env_seeds=None, agent_seeds=None, max_iter=None): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This method is called to see how well an agent performed on a sequence of episode. Parameters @@ -852,7 +891,9 @@ def _one_process_parrallel(runner, episode_this_process, process_id, path_save=N def _run_parrallel(self, nb_episode, nb_process=1, path_save=None, env_seeds=None, agent_seeds=None, max_iter=None): """ - This method will run in parrallel, independantly the nb_episode over nb_process. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This method will run in parallel, independently the nb_episode over nb_process. In case the agent cannot be cloned using `copy.copy`: nb_process is set to 1 @@ -860,7 +901,7 @@ def _run_parrallel(self, nb_episode, nb_process=1, path_save=None, env_seeds=Non is actually performed with more than 1 cores (nb_process > 1) It uses the python multiprocess, and especially the :class:`multiprocess.Pool` to perform the computations. - This implies that all runs are completely independant (they happen in different process) and that the + This implies that all runs are completely independent (they happen in different process) and that the memory consumption can be big. Tests may be recommended if the amount of RAM is low. It has the same return type as the :func:`Runner.run_sequential`. @@ -941,7 +982,12 @@ def _run_parrallel(self, nb_episode, nb_process=1, path_save=None, env_seeds=Non return res def _clean_up(self): - """close the environment is it has been created""" + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + close the environment if it has been created + + """ if self.env is not None: self.env.close() self.env = None diff --git a/grid2op/Space/GridObjects.py b/grid2op/Space/GridObjects.py index 2e0ec21ed..c09fcf3a7 100644 --- a/grid2op/Space/GridObjects.py +++ b/grid2op/Space/GridObjects.py @@ -28,6 +28,10 @@ class GridObjects: """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Almost every class inherit from this class, so they have its methods and attributes. + Do not attempt to use it outside of grid2op environment. + This class stores in a Backend agnostic way some information about the powergrid. It stores information about numbers of objects, and which objects are where, their names, etc. @@ -37,7 +41,6 @@ class GridObjects: :class:`grid2op.Backend.Backend` all inherit from this class. This means that each of the above has its own representation of the powergrid. - The modeling adopted for describing a powergrid is the following: - only the main objects of a powergrid are represented. An "object" is either a load (consumption) a generator @@ -168,36 +171,37 @@ class GridObjects: ---------- n_line: :class:`int` - number of powerlines in the powergrid + number of powerlines in the powergrid [*class attribute*] n_gen: :class:`int` - number of generators in the powergrid + number of generators in the powergrid [*class attribute*] n_load: :class:`int` - number of loads in the + number of loads in the powergrid. [*class attribute*] n_sub: :class:`int` - number of loads in the powergrid + number of loads in the powergrid. [*class attribute*] dim_topo: :class:`int` - The total number of objects in the powergrid. This is also the dimension of the "topology vector" defined above. + The total number of objects in the powergrid. + This is also the dimension of the "topology vector" defined above. [*class attribute*] sub_info: :class:`numpy.ndarray`, dtype:int - for each substation, gives the number of elements connected to it + for each substation, gives the number of elements connected to it [*class attribute*] load_to_subid: :class:`numpy.ndarray`, dtype:int for each load, gives the id the substation to which it is connected. For example, :attr:`GridObjects.load_to_subid` [load_id] gives the id of the substation to which the load of id - `load_id` is connected. + `load_id` is connected. [*class attribute*] gen_to_subid: :class:`numpy.ndarray`, dtype:int - for each generator, gives the id the substation to which it is connected + for each generator, gives the id the substation to which it is connected [*class attribute*] line_or_to_subid: :class:`numpy.ndarray`, dtype:int - for each line, gives the id the substation to which its "origin" end is connected + for each line, gives the id the substation to which its "origin" end is connected [*class attribute*] line_ex_to_subid: :class:`numpy.ndarray`, dtype:int - for each line, gives the id the substation to which its "extremity" end is connected + for each line, gives the id the substation to which its "extremity" end is connected [*class attribute*] load_to_sub_pos: :class:`numpy.ndarray`, dtype:int Suppose you represent the topoology of the substation *s* with a vector (each component of this vector will @@ -206,16 +210,16 @@ class GridObjects: current load. Suppose that load of id `l` is connected to the substation of id `s` (this information is stored in :attr:`GridObjects.load_to_subid` [l]), then if you represent the topology of the substation `s` with a vector `sub_topo_vect`, then "`sub_topo_vect` [ :attr:`GridObjects.load_to_subid` [l] ]" will encode - on which bus the load of id `l` is stored. + on which bus the load of id `l` is stored. [*class attribute*] gen_to_sub_pos: :class:`numpy.ndarray`, dtype:int - same as :attr:`GridObjects.load_to_sub_pos` but for generators. + same as :attr:`GridObjects.load_to_sub_pos` but for generators. [*class attribute*] line_or_to_sub_pos: :class:`numpy.ndarray`, dtype:int - same as :attr:`GridObjects.load_to_sub_pos` but for "origin" end of powerlines. + same as :attr:`GridObjects.load_to_sub_pos` but for "origin" end of powerlines. [*class attribute*] line_ex_to_sub_pos: :class:`numpy.ndarray`, dtype:int - same as :attr:`GridObjects.load_to_sub_pos` but for "extremity" end of powerlines. + same as :attr:`GridObjects.load_to_sub_pos` but for "extremity" end of powerlines. [*class attribute*] load_pos_topo_vect: :class:`numpy.ndarray`, dtype:int The topology if the entire grid is given by a vector, say *topo_vect* of size @@ -223,100 +227,101 @@ class GridObjects: :attr:`GridObjects.load_to_sub_pos` [l] is the index of the load *l* in the vector :attr:`grid2op.BaseObservation.BaseObservation.topo_vect` . This means that, if "`topo_vect` [ :attr:`GridObjects.load_pos_topo_vect` \[l\] ]=2" - then load of id *l* is connected to the second bus of the substation. + then load of id *l* is connected to the second bus of the substation. [*class attribute*] gen_pos_topo_vect: :class:`numpy.ndarray`, dtype:int - same as :attr:`GridObjects.load_pos_topo_vect` but for generators. + same as :attr:`GridObjects.load_pos_topo_vect` but for generators. [*class attribute*] line_or_pos_topo_vect: :class:`numpy.ndarray`, dtype:int - same as :attr:`GridObjects.load_pos_topo_vect` but for "origin" end of powerlines. + same as :attr:`GridObjects.load_pos_topo_vect` but for "origin" end of powerlines. [*class attribute*] line_ex_pos_topo_vect: :class:`numpy.ndarray`, dtype:int - same as :attr:`GridObjects.load_pos_topo_vect` but for "extremity" end of powerlines. + same as :attr:`GridObjects.load_pos_topo_vect` but for "extremity" end of powerlines. [*class attribute*] name_load: :class:`numpy.ndarray`, dtype:str - ordered names of the loads in the grid. + ordered names of the loads in the grid. [*class attribute*] name_gen: :class:`numpy.ndarray`, dtype:str - ordered names of the productions in the grid. + ordered names of the productions in the grid. [*class attribute*] name_line: :class:`numpy.ndarray`, dtype:str - ordered names of the powerline in the grid. + ordered names of the powerline in the grid. [*class attribute*] name_sub: :class:`numpy.ndarray`, dtype:str - ordered names of the substation in the grid + ordered names of the substation in the grid [*class attribute*] attr_list_vect: ``list``, static List of string. It represents the attributes that will be stored to/from vector when the BaseObservation is converted to/from it. This parameter is also used to compute automatically :func:`GridObjects.dtype` and :func:`GridObjects.shape` as well as :func:`GridObjects.size`. If this class is derived, then it's really important that this vector is properly set. All the attributes with the name on this vector should have - consistently the same size and shape, otherwise, some methods will not behave as expected. + consistently the same size and shape, otherwise, some methods will not behave as expected. [*class attribute*] _vectorized: :class:`numpy.ndarray`, dtype:float The representation of the GridObject as a vector. See the help of :func:`GridObjects.to_vect` and :func:`GridObjects.from_vect` for more information. **NB** for performance reason, the conversion of the internal representation to a vector is not performed at any time. It is only performed when :func:`GridObjects.to_vect` is - called the first time. Otherwise, this attribute is set to ``None``. + called the first time. Otherwise, this attribute is set to ``None``. [*class attribute*] gen_type: :class:`numpy.ndarray`, dtype:str Type of the generators, among: "solar", "wind", "hydro", "thermal" and "nuclear". Optional. Used - for unit commitment problems or redispacthing action. + for unit commitment problems or redispacthing action. [*class attribute*] gen_pmin: :class:`numpy.ndarray`, dtype:float Minimum active power production needed for a generator to work properly. Optional. Used - for unit commitment problems or redispacthing action. + for unit commitment problems or redispacthing action. [*class attribute*] gen_pmax: :class:`numpy.ndarray`, dtype:float Maximum active power production needed for a generator to work properly. Optional. Used - for unit commitment problems or redispacthing action. + for unit commitment problems or redispacthing action. [*class attribute*] gen_redispatchable: :class:`numpy.ndarray`, dtype:bool For each generator, it says if the generator is dispatchable or not. Optional. Used - for unit commitment problems or redispacthing action. + for unit commitment problems or redispacthing action. [*class attribute*] gen_max_ramp_up: :class:`numpy.ndarray`, dtype:float Maximum active power variation possible between two consecutive timestep for each generator: a redispatching action on generator `g_id` cannot be above :attr:`GridObjects.gen_ramp_up_max` [`g_id`]. Optional. Used - for unit commitment problems or redispacthing action. + for unit commitment problems or redispacthing action. [*class attribute*] gen_max_ramp_down: :class:`numpy.ndarray`, dtype:float Minimum active power variationpossible between two consecutive timestep for each generator: a redispatching action on generator `g_id` cannot be below :attr:`GridObjects.gen_ramp_down_min` [`g_id`]. Optional. Used - for unit commitment problems or redispacthing action. + for unit commitment problems or redispacthing action. [*class attribute*] gen_min_uptime: :class:`numpy.ndarray`, dtype:float The minimum time (expressed in the number of timesteps) a generator needs to be turned on: it's not possible to turn off generator `gen_id` that has been turned on less than `gen_min_time_on` [`gen_id`] timesteps ago. Optional. Used - for unit commitment problems or redispacthing action. + for unit commitment problems or redispacthing action. [*class attribute*] gen_min_downtime: :class:`numpy.ndarray`, dtype:float The minimum time (expressed in the number of timesteps) a generator needs to be turned off: it's not possible to turn on generator `gen_id` that has been turned off less than `gen_min_time_on` [`gen_id`] timesteps ago. Optional. Used - for unit commitment problems or redispacthing action. + for unit commitment problems or redispacthing action. [*class attribute*] gen_cost_per_MW: :class:`numpy.ndarray`, dtype:float For each generator, it gives the "operating cost", eg the cost, in terms of "used currency" for the production of one MW with this generator, if it is already turned on. It's a positive real number. It's the marginal cost for each MW. Optional. Used - for unit commitment problems or redispacthing action. + for unit commitment problems or redispacthing action. [*class attribute*] gen_startup_cost: :class:`numpy.ndarray`, dtype:float The cost to start a generator. It's a positive real number. Optional. Used - for unit commitment problems or redispacthing action. + for unit commitment problems or redispacthing action. [*class attribute*] gen_shutdown_cost: :class:`numpy.ndarray`, dtype:float The cost to shut down a generator. It's a positive real number. Optional. Used - for unit commitment problems or redispacthing action. + for unit commitment problems or redispacthing action. [*class attribute*] redispatching_unit_commitment_availble: ``bool`` Does the current grid allow for redispatching and / or unit commit problem. If not, any attempt to use it - will raise a :class:`grid2op.Exceptions.UnitCommitorRedispachingNotAvailable` error. + will raise a :class:`grid2op.Exceptions.UnitCommitorRedispachingNotAvailable` error. [*class attribute*] For an environment to be compatible with this feature, you need to set up, when loading the backend: + - :attr:`GridObjects.gen_type` - :attr:`GridObjects.gen_pmin` - :attr:`GridObjects.gen_pmax` @@ -331,19 +336,19 @@ class GridObjects: grid_layout: ``dict`` or ``None`` The layout of the powergrid in a form of a dictionnary with keys the substation name, and value a tuple of - the coordinate of this substation. If no layout are provided, it defaults to ``None`` + the coordinate of this substation. If no layout are provided, it defaults to ``None`` [*class attribute*] shunts_data_available: ``bool`` - Whether or not the backend support the shunt data. + Whether or not the backend support the shunt data. [*class attribute*] n_shunt: ``int`` or ``None`` - Number of shunts on the grid. It might be ``None`` if the backend does not support shunts. + Number of shunts on the grid. It might be ``None`` if the backend does not support shunts. [*class attribute*] name_shunt: ``numpy.ndarray``, dtype:``str`` or ``None`` - Name of each shunt on the grid, or ``None`` if the backend does not support shunts. + Name of each shunt on the grid, or ``None`` if the backend does not support shunts. [*class attribute*] shunt_to_subid: :class:`numpy.ndarray`, dtype:int - for each shunt (if supported), gives the id the substation to which it is connected + for each shunt (if supported), gives the id the substation to which it is connected [*class attribute*] """ @@ -434,12 +439,16 @@ def __init__(self): @classmethod def _update_value_set(cls): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Update the class attribute `attr_list_vect_set` from `attr_list_vect` """ cls.attr_list_set = set(cls.attr_list_vect) def _raise_error_attr_list_none(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Raise a "NotImplementedError" if :attr:`GridObjects.attr_list_vect` is not defined. Raises @@ -448,12 +457,14 @@ def _raise_error_attr_list_none(self): """ if self.attr_list_vect is None: - raise NotImplementedError("attr_list_vect attribute is not defined for class {}. " - "It is not possible to convert it from/to a vector, " - "nor to know its size, shape or dtype.".format(type(self))) + raise IncorrectNumberOfElements("attr_list_vect attribute is not defined for class {}. " + "It is not possible to convert it from/to a vector, " + "nor to know its size, shape or dtype.".format(type(self))) def _get_array_from_attr_name(self, attr_name): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This function returns the proper attribute vector that can be inspected in the :func:`GridObject.shape`, :func:`GridObject.size`, :func:`GridObject.dtype`, :func:`GridObject.from_vect` and :func:`GridObject.to_vect` method. @@ -487,6 +498,23 @@ def to_vect(self): res: ``numpy.ndarray`` The representation of this action as a flat numpy ndarray + Examples + -------- + It is mainly used for converting Observation of Action to vector: + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # for an observation: + obs = env.reset() + obs_as_vect = obs.to_vect() + + # for an action + act = env.action_space.sample() + ac_as_vect = act.to_vec() + """ if self._vectorized is None: @@ -503,7 +531,10 @@ def shape(self): The shapes of all the components of the action, mainly used for gym compatibility is the shape of all part of the action. - It is a numpy integer array. + It is mainly used to know of which "sub spaces the action space and observation space are made of, but + you can also directly use it on an observation or an action. + + It returns a numpy integer array. This function must return a vector from which the sum is equal to the return value of "size()". @@ -518,6 +549,22 @@ def shape(self): ------- res: ``numpy.ndarray`` The shape of the :class:`GridObjects` + + Examples + -------- + It is mainly used to know of which "sub spaces the action space and observation space are made of. + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # for an observation: + obs_space_shapes = env.observation_space.shape() + + # for an action + act_space_shapes = env.action_space.shape() + """ self._raise_error_attr_list_none() res = np.array([self._get_array_from_attr_name(el).shape[0] for el in self.attr_list_vect]).astype(dt_int) @@ -528,6 +575,9 @@ def dtype(self): The types of the components of the GridObjects, mainly used for gym compatibility is the shape of all part of the action. + It is mainly used to know of which types each "sub spaces" the action space and observation space are made of, + but you can also directly use it on an observation or an action. + It is a numpy array of objects. The dtype vector must have the same number of components as the return value of the :func:`GridObjects.shape` @@ -541,6 +591,22 @@ def dtype(self): ------- res: ``numpy.ndarray`` The dtype of the :class:`GridObjects` + + Examples + -------- + It is mainly used to know of which "sub spaces the action space and observation space are made of. + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # for an observation: + obs_space_types = env.observation_space.dtype() + + # for an action + act_space_types = env.action_space.dtype() + """ self._raise_error_attr_list_none() @@ -549,6 +615,8 @@ def dtype(self): def _assign_attr_from_name(self, attr_nm, vect): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Assign the proper attributes with name 'attr_nm' with the value of the vector vect If this function is overloaded, then the _get_array_from_attr_name must be too. @@ -556,11 +624,7 @@ def _assign_attr_from_name(self, attr_nm, vect): Parameters ---------- attr_nm - vect - - Returns - ------- - ``None`` + vect: """ tmp = getattr(self, attr_nm) if isinstance(tmp, (dt_bool, dt_int, dt_float)): @@ -582,6 +646,8 @@ def from_vect(self, vect, check_legit=True): either :attr:`GridObjects.attr_list_vect` is properly defined for the derived class, or this function must be redefined. + It is recommended to use it from the action_space and the observation_space exclusively. + Only the size is checked. If it does not match, an :class:`grid2op.Exceptions.AmbiguousAction` is thrown. Otherwise the component of the vector are coerced into the proper type silently. @@ -593,6 +659,30 @@ def from_vect(self, vect, check_legit=True): vect: ``numpy.ndarray`` A vector representing an BaseAction. + Examples + -------- + It is mainly used for converting back vector representing action or observation into "grid2op" action + or observation. **NB** You should use it only with the "env.action_space" and "env.observation_space" + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # get the vector representation of an observation: + obs = env.reset() + obs_as_vect = obs.to_vect() + + # convert it back to an observation (which will be equal to the first one) + obs_cpy = env.observation_space.from_vect(obs_as_vect) + + # get the vector representation of an action: + act = env.action_space.sample() + act_as_vect = act.to_vec() + + # convert it back to an action (which will be equal to the first one) + act_cpy = env.action_space.from_vect(act_as_vect) + """ if vect.shape[0] != self.size(): @@ -603,13 +693,21 @@ def from_vect(self, vect, check_legit=True): try: vect = np.array(vect).astype(dt_float) except Exception as exc_: - raise AmbiguousAction("Impossible to convert the input vector to a floating point numy array with error:\n" - "\"{}\".".format(exc_)) + raise EnvError("Impossible to convert the input vector to a floating point numpy array " + "with error:\n" + "\"{}\".".format(exc_)) self._raise_error_attr_list_none() prev_ = 0 for attr_nm, sh, dt in zip(self.attr_list_vect, self.shape(), self.dtype()): - tmp = vect[prev_:(prev_ + sh)].astype(dt) + tmp = vect[prev_:(prev_ + sh)] + try: + tmp = tmp.astype(dt) + except Exception as exc_: + raise EnvError("Impossible to convert the input vector to its type ({}) for attribute \"{}\" " + "with error:\n" + "\"{}\".".format(dt, attr_nm, exc_)) + self._assign_attr_from_name(attr_nm, tmp) prev_ += sh @@ -618,10 +716,14 @@ def from_vect(self, vect, check_legit=True): def size(self): """ - When the action is converted to a vector, this method return its size. + When the action / observation is converted to a vector, this method return its size. NB that it is a requirement that converting an GridObjects gives a vector of a fixed size throughout a training. + The size of an object if constant, but more: for a given environment the size of each action or the size + of each observations is constant. This allows us to also define the size of the "action_space" and + "observation_space": this method also applies to these spaces (see the examples bellow). + **NB**: in case the class GridObjects is derived, either :attr:`GridObjects.attr_list_vect` is properly defined for the derived class, or this function must be redefined. @@ -631,12 +733,35 @@ def size(self): size: ``int`` The size of the GridObjects if it's converted to a flat vector. + Examples + -------- + It is mainly used to know the size of the vector that would represent these objects + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # get the vector representation of an observation: + obs = env.reset() + print("The size of this observation is {}".format(obs.size())) + + # get the vector representation of an action: + act = env.action_space.sample() + print("The size of this action is {}".format(act.size())) + + # it can also be used with the action_space and observation_space + print("The size of the observation space is {}".format(env.observation_space.size())) + print("The size of the action space is {}".format(env.action_space.size())) + """ res = np.sum(self.shape()).astype(dt_int) return res def _aux_pos_big_topo(self, vect_to_subid, vect_to_sub_pos): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Return the proper "_pos_big_topo" vector given "to_subid" vector and "to_sub_pos" vectors. This function is also called to performed sanity check after the load on the powergrid. @@ -656,6 +781,8 @@ def _aux_pos_big_topo(self, vect_to_subid, vect_to_sub_pos): def _compute_pos_big_topo(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Compute the position of each element in the big topological vector. Topology action are represented by numpy vector of size np.sum(self.sub_info). @@ -683,6 +810,10 @@ def _compute_pos_big_topo(self): def assert_grid_correct(self): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + + This is used at the initialization of the environment. + Performs some checking on the loaded _grid to make sure it is consistent. It also makes sure that the vector such as *sub_info*, *load_to_subid* or *gen_to_sub_pos* are of the right type eg. numpy.ndarray with dtype: dt_int @@ -1097,7 +1228,11 @@ def assert_grid_correct(self): def attach_layout(self, grid_layout): """ - grid layout is a dictionnary with the keys the name of the substations, and the value the tuple of coordinates + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + We do not recommend to "attach layout" outside of the environment. Please refer to the function + :func:`grid2op.Environment.BaseEnv.attach_layout` for more information. + + grid layout is a dictionary with the keys the name of the substations, and the value the tuple of coordinates of each substations. No check are made it to ensure it is correct. Parameters @@ -1110,11 +1245,21 @@ def attach_layout(self, grid_layout): @classmethod def set_env_name(cls, name): + """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + Do not attempt in any case to modify the name of the environment once it has been loaded. If you + do that, you might experience undefined behaviours, notably with the multi processing but not only. + + """ cls.env_name = name @classmethod def init_grid(cls, gridobj): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This is done at the creation of the environment. Use of this class outside of this particular + use is really dangerous and will lead to undefined behaviours. **Do not use this function**. + Initialize this :class:`GridObjects` subclass with a provided class. It does not perform any check on the validity of the `gridobj` parameters, but it guarantees that if `gridobj` @@ -1197,7 +1342,8 @@ class res(cls): def get_obj_connect_to(self, _sentinel=None, substation_id=None): """ - Get all the object connected to a given substation: + Get all the object connected to a given substation. This is particularly usefull if you want to know the + names of the generator / load connected to a given substation, or which extremity etc. Parameters ---------- @@ -1210,7 +1356,7 @@ def get_obj_connect_to(self, _sentinel=None, substation_id=None): Returns ------- res: ``dict`` - A dictionnary with keys: + A dictionary with keys: - "loads_id": a vector giving the id of the loads connected to this substation, empty if none - "generators_id": a vector giving the id of the generators connected to this substation, empty if none @@ -1220,6 +1366,26 @@ def get_obj_connect_to(self, _sentinel=None, substation_id=None): substation, empty if none. - "nb_elements" : number of elements connected to this substation + Examples + -------- + + .. code-block:: python + + import grid2op + env = grid2op.make() + + # get the vector representation of an observation: + sub_id = 1 + dict_ = env.get_obj_connect_to(substation_id=sub_id) + print("The names of the loads connected to substation {} are: {}".format( + sub_id, env.name_load[dict_["loads_id"]])) + print("The names of the generators connected to substation {} are: {}".format( + sub_id, env.name_gen[dict_["loads_id"]])) + print("The powerline whose origin end is connected to substation {} are: {}".format( + sub_id, env.name_line[dict_["lines_or_id"]])) + print("The powerline whose extremity end is connected to substation {} are: {}".format( + sub_id, env.name_line[dict_["lines_ex_id"]])) + """ if _sentinel is not None: raise Grid2OpException("get_obj_connect_to should be used only with key-word arguments") @@ -1239,9 +1405,81 @@ def get_obj_connect_to(self, _sentinel=None, substation_id=None): return res def get_obj_substations(self, _sentinel=None, substation_id=None): + """ + Return the object connected as a substation in form of a numpy array instead of a dictionary (as + opposed to :func:`GridObjects.get_obj_connect_to`). + + This format is particularly useful for example if you want to know the number of generator connected + to a given substation for example (see section examples). + + Parameters + ---------- + _sentinel: ``None`` + Used to prevent positional parameters. Internal, do not use. + + substation_id: ``int`` + ID of the substation we want to inspect + + Returns + ------- + res: ``numpy.ndarray`` + A matrix with as many rows as the number of element of the substation and 5 columns: + + 1. column 0: the id of the substation + 2. column 1: -1 if this object is not a load, or `LOAD_ID` is this object is a load (see example) + 3. column 2: -1 if this object is not a generator, or `GEN_ID` is this object is a generator (see example) + 4. column 3: -1 if this object is not the origin end of a line, or `LOR_ID` is this object is the + origin end of a powerline(see example) + 5. column 4: -1 if this object is not a extremity end, or `LEX_ID` is this object is the extremity + end of a powerline + + Examples + -------- + + .. code-block:: python + + import numpy as np + import grid2op + env = grid2op.make() + + # get the vector representation of an observation: + sub_id = 1 + mat = env.get_obj_substations(substation_id=sub_id) + + # the first element of the substation is: + mat[0,:] + # array([ 1, -1, -1, -1, 0], dtype=int32) + # we know it's connected to substation 1... no kidding... + # we can also get that: + # 1. this is not a load (-1 at position 1 - so 2nd component) + # 2. this is not a generator (-1 at position 2 - so 3rd component) + # 3. this is not the origin end of a powerline (-1 at position 3) + # 4. this is the extremity end of powerline 0 (there is a 0 at position 4 - so last component) + + # likewise, the second element connected at this substation is: + mat[1,:] + # array([ 1, -1, -1, 2, -1], dtype=int32) + # it represents the origin end of powerline 2 + + # the 5th element connected at this substation is: + mat[4,:] + # which is equal to array([ 1, -1, 0, -1, -1], dtype=int32) + # so it's represents a generator, and this generator has the id 0 + + # the 6th element connected at this substation is: + mat[5,:] + # which is equal to array([ 1, 0, -1, -1, -1], dtype=int32) + # so it's represents a generator, and this generator has the id 0 + + # and, last example, if you want to count the number of generator connected at this + # substation you can + is_gen = mat[:,env.GEN_COL] != -1 # a boolean vector saying ``True`` if the object is a generator + nb_gen = np.sum(is_gen) + + """ # TODO finish the doc if _sentinel is not None: - raise Grid2OpException("get_obj_connect_to should be used only with key-word arguments") + raise Grid2OpException("get_obj_substations should be used only with key-word arguments") if substation_id is None: raise Grid2OpException("You ask the composition of a substation without specifying its id." @@ -1283,6 +1521,20 @@ def get_lines_id(self, _sentinel=None, from_=None, to_=None): ------ :class:`grid2op.Exceptions.BackendError` if no match is found. + + Examples + -------- + It can be used like: + + .. code-block:: python + + import numpy as np + import grid2op + env = grid2op.make() + + l_ids = env.get_lines_id(from_=0, to_=1) + print("The powerlines connecting substation 0 to substation 1 have for ids: {}".format(l_ids)) + """ res = [] if from_ is None: @@ -1321,6 +1573,19 @@ def get_generators_id(self, sub_id): :class:`grid2op.Exceptions.BackendError` if no match is found. + Examples + -------- + It can be used like: + + .. code-block:: python + + import numpy as np + import grid2op + env = grid2op.make() + + g_ids = env.get_generators_id(sub_id=1) + print("The generators connected to substation 1 have for ids: {}".format(g_ids)) + """ res = [] if sub_id is None: @@ -1357,6 +1622,19 @@ def get_loads_id(self, sub_id): ------ :class:`grid2op.Exceptions.BackendError` if no match found. + Examples + -------- + It can be used like: + + .. code-block:: python + + import numpy as np + import grid2op + env = grid2op.make() + + c_ids = env.get_loads_id(sub_id=1) + print("The loads connected to substation 1 have for ids: {}".format(c_ids)) + """ res = [] if sub_id is None: @@ -1377,8 +1655,12 @@ def get_loads_id(self, sub_id): @classmethod def to_dict(cls): """ - Convert the object as a dictionnary. - Note that unless this method is overidden, a call to it will only output the + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This is used internally only to save action_space or observation_space for example. Do not + attempt to use it in a different context. + + Convert the object as a dictionary. + Note that unless this method is overridden, a call to it will only output the Returns ------- @@ -1435,6 +1717,11 @@ def to_dict(cls): @staticmethod def from_dict(dict_): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This is used internally only to restore action_space or observation_space if they + have been saved by `to_dict`. Do not + attempt to use it in a different context. + Create a valid GridObject (or one of its derived class if this method is overide) from a dictionnary (usually read from a json file) diff --git a/grid2op/Space/RandomObject.py b/grid2op/Space/RandomObject.py index 2e5b7f9d3..59a46010e 100644 --- a/grid2op/Space/RandomObject.py +++ b/grid2op/Space/RandomObject.py @@ -42,6 +42,61 @@ class RandomObject(object): If you really need other sources of randomness (for example if you use tensorflow or torch) we strongly recommend you to overload the :func:`BaseAgent.seed` accordingly so that the neural networks are always initialized in the same order using the same weights. + + Examples + --------- + If you don't use any :class:`grid2op.Runner.Runner` we recommend using this method twice: + + 1. to set the seed of the :class:`grid2op.Environment.Environment` + 2. to set the seed of your :class:`grid2op.Agent.BaseAgent` + + .. code-block:: python + + import grid2op + from grid2op.Agent import RandomAgent # or any other agent of course. It might also be a custom you developed + # create the environment + env = grid2op.make() + agent = RandomAgent(env.action_space) + + # and now set the seed + env_seed = 42 + agent_seed = 12345 + env.seed(env_seed) + agent.seed(agent_seed) + + # continue your experiments + + If you are using a :class:`grid2op.Runner.Runner`we recommend using the "env_seeds" and "agent_seeds" when + calling the function :func:`grid2op.Runner.Runner.run` like this: + + .. code-block:: python + + import grid2op + import numpy as np + from grid2op.dtypes import dt_int + from grid2op.Agent import RandomAgent # or any other agent of course. It might also be a custom you developed + from grid2op.Runner import Runner + + np.random.seed(42) # or any other seed of course :-) + + # create the environment + env = grid2op.make() + # NB setting a seed in this environment will have absolutely no effect on the runner + + # and now set the seed + runner = Runner(**env.get_params_for_runner(), agentClass=RandomAgent) + + # and now start your experiments + nb_episode = 2 + maximum_int_poss = np.iinfo(dt_int).max # this will be the maximum integer your computer can represent + res = runner.run(nb_episode=nb_episode, + # generate the seeds for the agent + agent_seeds=[np.random.randint(0, maximum_int_poss) for _ in range(nb_episode)], + # generate the seeds for the environment + env_seeds=[np.random.randint(0, maximum_int_poss) for _ in range(nb_episode)] + ) + # NB for fully reproducible expriment you have to have called "np.random.seed" before using this method. + """ def __init__(self): self.space_prng = np.random.RandomState() @@ -49,6 +104,9 @@ def __init__(self): def seed(self, seed): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + We do not recommend to use this function outside of the two examples given in the description of this class. + Set the seed of the source of pseudo random number used for this RandomObject. Parameters diff --git a/grid2op/Space/SerializableSpace.py b/grid2op/Space/SerializableSpace.py index eaae5f97c..752b93efc 100644 --- a/grid2op/Space/SerializableSpace.py +++ b/grid2op/Space/SerializableSpace.py @@ -20,6 +20,11 @@ class SerializableSpace(GridObjects, RandomObject): """ + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This is a higher level wrapper that allows to avoid code duplicates for + the action_space and observation_space. None of the methods here should be + used outside of `env.action_space` or `env.observation_space` + This class allows to serialize / de serialize the action space or observation space. It should not be used inside an Environment, as some functions of the action might not be compatible with @@ -96,7 +101,12 @@ def __init__(self, @staticmethod def from_dict(dict_): """ - Allows the de-serialization of an object stored as a dictionnary (for example in the case of json saving). + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This is used internally only to restore action_space or observation_space if they + have been saved by `to_dict`. Do not + attempt to use it in a different context. + + Allows the de-serialization of an object stored as a dictionary (for example in the case of json saving). Parameters ---------- @@ -108,7 +118,7 @@ def from_dict(dict_): Returns ------- res: :class:`SerializableSpace` - An instance of an SerializableSpace matching the dictionnary. + An instance of an SerializableSpace matching the dictionary. """ @@ -168,7 +178,11 @@ def from_dict(dict_): def to_dict(self): """ - Serialize this object as a dictionnary. + .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ + This is used internally only to save action_space or observation_space for example. Do not + attempt to use it in a different context. + + Serialize this object as a dictionary. Returns ------- @@ -190,12 +204,18 @@ def size(self): ------- n: ``int`` The size of the action space. + + Examples + -------- + See :func:`GridObjects.size` for more information. + """ return self.n def from_vect(self, obj_as_vect, check_legit=True): """ - Convert an action, represented as a vector to a valid :class:`BaseAction` instance + Convert an action, represented as a vector to a valid :class:`BaseAction` instance. It works the + same way for observations. Parameters ---------- @@ -209,17 +229,125 @@ def from_vect(self, obj_as_vect, check_legit=True): The corresponding action (or observation) as an object (and not as a vector). The return type is given by the type of :attr:`SerializableSpace._template_obj` + Examples + -------- + See :func:`GridObjects.from_vect` for more information. + """ res = copy.deepcopy(self._template_obj) res.from_vect(obj_as_vect, check_legit=check_legit) return res def extract_from_vect(self, obj_as_vect, attr_name): + """ + This method allows you to extract only a given part of the observation / action if this one + is represented as a vector. + + Parameters + ---------- + obj_as_vect: ``numpy.ndarray`` + the object (action or observation) represented as a vector. + + attr_name: ``str`` + the name of the attribute you want to extract from the object + + Returns + ------- + res: ``numpy.ndarray`` + The value of the attribute with name `attr_name` + + Examples + --------- + We detail only the process for the observation, but it works the same way for the action too. + + .. code-block:: python + + import numpy as np + import grid2op + env = grid2op.make() + + # get the vector representation of an observation: + obs = env.reset() + obs_as_vect = obs.to_vect() + + # and now you can extract for example the load + load_p = env.observation_space.extract_from_vect(obs_as_vect, "load_p") + assert np.all(load_p == obs.load_p) + # and this should assert to True + + """ beg_, end_, dtype = self.get_indx_extract(attr_name) res = obj_as_vect[beg_:end_].astype(dtype) return res def get_indx_extract(self, attr_name): + """ + Retrieve the type, the beginning and the end of a given attribute in the action or observation + once it is represented as vector. + + [advanced usage] This is particularly useful to avoid parsing of all the observation / action when you want only + to extract a subset of them (see example) + + Parameters + ---------- + attr_name: ``str`` + The name of the attribute you want to extract information from + + Returns + ------- + beg_: ``int`` + The first component of the vector that concerns the attribute + end_: ``int`` + The las component of the vector that concerns the attribute + dtype: + The type of the attribute + + Examples + -------- + This is an "advanced" function used to accelerate the study of an agent. Supposes you have an environment + and you want to compute a runner from it. Then you want to have a quick look at the "relative flows" that + this agent provides: + + .. code-block:: python + + import grid2op + import os + import numpy as np + from grid2op.Runner import Runner + from grid2op.Episode import EpisodeData + + ################ + # INTRO + # create a runner + env = grid2op.make() + # see the documentation of the Runner if you want to change the agent. + # in this case it will be "do nothing" + runner = Runner(**env.get_params_for_runner()) + + # execute it a given number of chronics + nb_episode = 2 + path_save = "i_saved_the_runner_here" + res = runner.run(nb_episode=nb_episode, path_save=path_save) + + # END INTRO + ################## + + # now let's load only the flows for each of the computed episode + li_episode = EpisodeData.list_episode(path_save) # retrieve the list of where each episode is stored + beg_, end_, dtype = env.observation_space.get_indx_extract("rho") + observation_space_name = "observations.npz" + + for full_episode_path, episode_name in li_episode: + all_obs = np.load(os.path.join(full_episode_path, observation_space_name))["data"] + + # and you use the function like this: + all_flows = all_obs[beg_:end_, :].astype(dtype) + + # you can now do something with the computed flows + # each row will be a time step, each column a powerline + # you can have "nan" if the episode "game over" before the end. + + """ if attr_name not in self._to_extract_vect: raise Grid2OpException("Attribute \"{}\" is not found in the object of type \"{}\"." "".format(attr_name, self.subtype)) diff --git a/grid2op/tests/BaseBackendTest.py b/grid2op/tests/BaseBackendTest.py index 461897675..399bd8b75 100644 --- a/grid2op/tests/BaseBackendTest.py +++ b/grid2op/tests/BaseBackendTest.py @@ -979,7 +979,7 @@ def test_nb_timestep_overflow_nodisc(self): self.backend.load_grid(self.path_matpower, case_file) self.backend.assert_grid_correct() - env.timestep_overflow[self.id_2nd_line_disco] = 0 + env._timestep_overflow[self.id_2nd_line_disco] = 0 thermal_limit = 10 * self.lines_flows_init thermal_limit[self.id_first_line_disco] = self.lines_flows_init[self.id_first_line_disco] / 2 thermal_limit[self.id_2nd_line_disco] = 400 @@ -1011,7 +1011,7 @@ def test_nb_timestep_overflow_nodisc_2(self): self.backend.load_grid(self.path_matpower, case_file) self.backend.assert_grid_correct() - env.timestep_overflow[self.id_2nd_line_disco] = 1 + env._timestep_overflow[self.id_2nd_line_disco] = 1 thermal_limit = 10 * self.lines_flows_init thermal_limit[self.id_first_line_disco] = self.lines_flows_init[self.id_first_line_disco] / 2 @@ -1044,7 +1044,7 @@ def test_nb_timestep_overflow_disc2(self): self.backend.load_grid(self.path_matpower, case_file) self.backend.assert_grid_correct() - env.timestep_overflow[self.id_2nd_line_disco] = 2 + env._timestep_overflow[self.id_2nd_line_disco] = 2 thermal_limit = 10 * self.lines_flows_init thermal_limit[self.id_first_line_disco] = self.lines_flows_init[self.id_first_line_disco] / 2 @@ -1072,7 +1072,7 @@ def test_set_bus(self): warnings.filterwarnings("ignore") env = make(test=True, backend=backend) env.reset() - action = env.helper_action_player({"set_bus": {"lines_or_id": [(17, 2)]}}) + action = env.action_space({"set_bus": {"lines_or_id": [(17, 2)]}}) obs, reward, done, info = env.step(action) assert np.all(np.isfinite(obs.v_or)) assert np.sum(env.backend.get_topo_vect() == 2) == 1 @@ -1086,7 +1086,7 @@ def test_change_bus(self): warnings.filterwarnings("ignore") env = make(test=True, backend=backend) env.reset() - action = env.helper_action_player({"change_bus": {"lines_or_id": [17]}}) + action = env.action_space({"change_bus": {"lines_or_id": [17]}}) obs, reward, done, info = env.step(action) assert np.all(np.isfinite(obs.v_or)) assert np.sum(env.backend.get_topo_vect() == 2) == 1 @@ -1099,13 +1099,13 @@ def test_change_bustwice(self): warnings.filterwarnings("ignore") env = make(test=True, backend=backend) env.reset() - action = env.helper_action_player({"change_bus": {"lines_or_id": [17]}}) + action = env.action_space({"change_bus": {"lines_or_id": [17]}}) obs, reward, done, info = env.step(action) assert not done assert np.all(np.isfinite(obs.v_or)) assert np.sum(env.backend.get_topo_vect() == 2) == 1 - action = env.helper_action_player({"change_bus": {"lines_or_id": [17]}}) + action = env.action_space({"change_bus": {"lines_or_id": [17]}}) obs, reward, done, info = env.step(action) assert not done assert np.all(np.isfinite(obs.v_or)) diff --git a/grid2op/tests/BaseRedispTest.py b/grid2op/tests/BaseRedispTest.py index e79c37299..86d6c7fa7 100644 --- a/grid2op/tests/BaseRedispTest.py +++ b/grid2op/tests/BaseRedispTest.py @@ -92,7 +92,7 @@ def test_no_impact_env(self): obs, reward, done, info = self.env.step(act) assert self.compare_vect(obsinit.prod_p, ref_data) - target_val = obs.prod_p + self.env.actual_dispatch + target_val = obs.prod_p + self.env._actual_dispatch assert self.compare_vect(obs.prod_p[:-1], target_val[:-1]) # I remove last component which is the slack bus assert np.all(obs.prod_p >= self.env.gen_pmin) assert np.all(target_val <= self.env.gen_pmax) @@ -104,17 +104,17 @@ def test_basic_redispatch_act(self): self.skip_if_needed() act = self.env.action_space({"redispatch": [2, 5]}) obs, reward, done, info = self.env.step(act) - assert np.abs(np.sum(self.env.actual_dispatch)) <= self.tol_one + assert np.abs(np.sum(self.env._actual_dispatch)) <= self.tol_one th_dispatch = np.array([ 0. , -2.5, 5. , 0. , -2.5]) - assert self.compare_vect(self.env.actual_dispatch, th_dispatch) - target_val = self.chronics_handler.real_data.prod_p[1, :] + self.env.actual_dispatch + assert self.compare_vect(self.env._actual_dispatch, th_dispatch) + target_val = self.chronics_handler.real_data.prod_p[1, :] + self.env._actual_dispatch assert self.compare_vect(obs.prod_p[:-1], target_val[:-1]) # I remove last component which is the slack bus assert np.all(obs.prod_p >= self.env.gen_pmin) assert np.all(target_val <= self.env.gen_pmax) # check that the redispatching is apply in the right direction - indx_ok = self.env.target_dispatch != 0. - assert np.all(np.sign(self.env.actual_dispatch[indx_ok]) == np.sign(self.env.target_dispatch[indx_ok])) + indx_ok = self.env._target_dispatch != 0. + assert np.all(np.sign(self.env._actual_dispatch[indx_ok]) == np.sign(self.env._target_dispatch[indx_ok])) def test_redispatch_act_above_pmax(self): # in this test, the asked redispatching for generator 2 would make it above pmax, so the environment @@ -122,10 +122,10 @@ def test_redispatch_act_above_pmax(self): self.skip_if_needed() act = self.env.action_space({"redispatch": [2, 60]}) obs, reward, done, info = self.env.step(act) - assert np.abs(np.sum(self.env.actual_dispatch)) <= self.tol_one + assert np.abs(np.sum(self.env._actual_dispatch)) <= self.tol_one th_dispatch = np.array([0.0000000e+00, -2.3289999e+01, 5.0890003e+01, 9.9999998e-03, -2.7610004e+01]) - assert self.compare_vect(self.env.actual_dispatch, th_dispatch) - target_val = self.chronics_handler.real_data.prod_p[1, :] + self.env.actual_dispatch + assert self.compare_vect(self.env._actual_dispatch, th_dispatch) + target_val = self.chronics_handler.real_data.prod_p[1, :] + self.env._actual_dispatch assert self.compare_vect(obs.prod_p[:-1], target_val[:-1]) # I remove last component which is the slack bus assert np.all(obs.prod_p >= self.env.gen_pmin) assert np.all(target_val <= self.env.gen_pmax) @@ -138,17 +138,17 @@ def test_two_redispatch_act(self): obs, reward, done, info = self.env.step(act) th_dispatch = np.array([0., 10, 20., 0., 0.]) th_dispatch[1] += obs_first.actual_dispatch[1] - assert self.compare_vect(self.env.target_dispatch, th_dispatch) + assert self.compare_vect(self.env._target_dispatch, th_dispatch) # check that the redispatching is apply in the right direction - indx_ok = self.env.target_dispatch != 0. - assert np.all(np.sign(self.env.actual_dispatch[indx_ok]) == np.sign(self.env.target_dispatch[indx_ok])) + indx_ok = self.env._target_dispatch != 0. + assert np.all(np.sign(self.env._actual_dispatch[indx_ok]) == np.sign(self.env._target_dispatch[indx_ok])) th_dispatch = np.array([0., 10., 20., 0., -30.]) th_dispatch = np.array([ 0.0000000e+00, -5.0001135e-03, 2.0000000e+01, 9.9999998e-03, -2.0005001e+01]) - assert self.compare_vect(self.env.actual_dispatch, th_dispatch) + assert self.compare_vect(self.env._actual_dispatch, th_dispatch) - target_val = self.chronics_handler.real_data.prod_p[2, :] + self.env.actual_dispatch + target_val = self.chronics_handler.real_data.prod_p[2, :] + self.env._actual_dispatch assert self.compare_vect(obs.prod_p[:-1], target_val[:-1]) # I remove last component which is the slack bus - assert np.abs(np.sum(self.env.actual_dispatch)) <= self.tol_one + assert np.abs(np.sum(self.env._actual_dispatch)) <= self.tol_one assert np.all(target_val <= self.env.gen_pmax) assert np.all(obs.prod_p >= self.env.gen_pmin) @@ -158,12 +158,12 @@ def test_redispacth_two_gen(self): obs, reward, done, info = self.env.step(act) assert not done th_dispatch = np.array([0., 10, 20., 0., 0.]) - assert self.compare_vect(self.env.target_dispatch, th_dispatch) - assert self.compare_vect(self.env.actual_dispatch, self.array_double_dispatch) + assert self.compare_vect(self.env._target_dispatch, th_dispatch) + assert self.compare_vect(self.env._actual_dispatch, self.array_double_dispatch) # check that the redispatching is apply in the right direction - indx_ok = self.env.target_dispatch != 0. - assert np.all(np.sign(self.env.actual_dispatch[indx_ok]) == np.sign(self.env.target_dispatch[indx_ok])) + indx_ok = self.env._target_dispatch != 0. + assert np.all(np.sign(self.env._actual_dispatch[indx_ok]) == np.sign(self.env._target_dispatch[indx_ok])) assert np.all(obs.prod_p <= self.env.gen_pmax) assert np.all(obs.prod_p >= self.env.gen_pmin) @@ -174,12 +174,12 @@ def test_redispacth_all_gen(self): obs, reward, done, info = self.env.step(act) th_dispatch = np.array([0., 10, 20., 0., -30.]) - assert self.compare_vect(self.env.target_dispatch, th_dispatch) - assert self.compare_vect(self.env.actual_dispatch, self.array_double_dispatch) + assert self.compare_vect(self.env._target_dispatch, th_dispatch) + assert self.compare_vect(self.env._actual_dispatch, self.array_double_dispatch) # check that the redispatching is apply in the right direction - indx_ok = self.env.target_dispatch != 0. - assert np.all(np.sign(self.env.actual_dispatch[indx_ok]) == np.sign(self.env.target_dispatch[indx_ok])) + indx_ok = self.env._target_dispatch != 0. + assert np.all(np.sign(self.env._actual_dispatch[indx_ok]) == np.sign(self.env._target_dispatch[indx_ok])) assert np.all(obs.prod_p <= self.env.gen_pmax) assert np.all(obs.prod_p >= self.env.gen_pmin) @@ -189,14 +189,14 @@ def test_count_turned_on(self): # recoded it: it's the normal behavior to call "env.reset()" to get the first time step obs = self.env.reset() - assert np.all(self.env.gen_uptime == np.array([0, 1, 1, 0, 1])) - assert np.all(self.env.gen_downtime == np.array([1, 0, 0, 1, 0])) + assert np.all(self.env._gen_uptime == np.array([0, 1, 1, 0, 1])) + assert np.all(self.env._gen_downtime == np.array([1, 0, 0, 1, 0])) assert np.all(obs.prod_p <= self.env.gen_pmax) assert np.all(obs.prod_p >= self.env.gen_pmin) obs, reward, done, info = self.env.step(act) - assert np.all(self.env.gen_uptime == np.array([0, 2, 2, 0, 2])) - assert np.all(self.env.gen_downtime == np.array([2, 0, 0, 2, 0])) + assert np.all(self.env._gen_uptime == np.array([0, 2, 2, 0, 2])) + assert np.all(self.env._gen_downtime == np.array([2, 0, 0, 2, 0])) assert np.all(obs.prod_p <= self.env.gen_pmax) assert np.all(obs.prod_p >= self.env.gen_pmin) @@ -206,14 +206,14 @@ def test_count_turned_on(self): assert np.all(obs.prod_p >= self.env.gen_pmin) obs, reward, done, info = self.env.step(act) - assert np.all(self.env.gen_uptime == np.array([0, 67, 67, 1, 67])) - assert np.all(self.env.gen_downtime == np.array([67, 0, 0, 0, 0])) + assert np.all(self.env._gen_uptime == np.array([0, 67, 67, 1, 67])) + assert np.all(self.env._gen_downtime == np.array([67, 0, 0, 0, 0])) assert np.all(obs.prod_p <= self.env.gen_pmax) assert np.all(obs.prod_p >= self.env.gen_pmin) obs, reward, done, info = self.env.step(act) - assert np.all(self.env.gen_uptime == np.array([1, 68, 68, 2, 68])) - assert np.all(self.env.gen_downtime == np.array([0, 0, 0, 0, 0])) + assert np.all(self.env._gen_uptime == np.array([1, 68, 68, 2, 68])) + assert np.all(self.env._gen_downtime == np.array([0, 0, 0, 0, 0])) assert np.all(obs.prod_p <= self.env.gen_pmax) assert np.all(obs.prod_p >= self.env.gen_pmin) @@ -266,7 +266,7 @@ def test_redispacth_non_dispatchable_generator(self): # Check that generator 0 isn't redispatchable assert self.env.gen_redispatchable[0] == False # Check that generator 0 is off - assert self.env.gen_downtime[0] >= 1 + assert self.env._gen_downtime[0] >= 1 # Try to redispatch redispatch_act = self.env.action_space({"redispatch": [(0, 5.)]}) @@ -335,7 +335,7 @@ def test_redispatch_generator_off(self): # Check that generator 1 is off assert obs.prod_p[1] == 0 - assert self.env.gen_downtime[1] >= 1 + assert self.env._gen_downtime[1] >= 1 # Try to redispatch generator 1 redispatch_act = self.env.action_space({"redispatch": [(1, 5.)]}) @@ -374,26 +374,26 @@ def test_redisp_toohigh_toolow(self): obs, reward, done, info = self.env.step(act) assert not done assert info["is_dispatching_illegal"] is False - assert np.all(self.env.target_dispatch == [-1., 0., 0., 0., 0.]) + assert np.all(self.env._target_dispatch == [-1., 0., 0., 0., 0.]) act = self.env.action_space({"redispatch": [0, 0]}) obs, reward, done, info = self.env.step(act) assert not done assert info["is_dispatching_illegal"] is False - assert np.all(self.env.target_dispatch == [-1., 0., 0., 0., 0.]) + assert np.all(self.env._target_dispatch == [-1., 0., 0., 0., 0.]) # this one is not correct: too high decrease act = self.env.action_space({"redispatch": [0, self.env.gen_pmin[0] - self.env.gen_pmax[0]]}) obs, reward, done, info = self.env.step(act) assert not done assert info["is_dispatching_illegal"] - assert np.all(self.env.target_dispatch == [-1., 0., 0., 0., 0.]) + assert np.all(self.env._target_dispatch == [-1., 0., 0., 0., 0.]) # this one is not correct: too high increase act = self.env.action_space({"redispatch": [0, self.env.gen_pmax[0] - self.env.gen_pmin[0] +2 ]}) obs, reward, done, info = self.env.step(act) assert not done assert info["is_dispatching_illegal"] - assert np.all(self.env.target_dispatch == [-1., 0., 0., 0. ,0.]) + assert np.all(self.env._target_dispatch == [-1., 0., 0., 0. ,0.]) def test_error_message_notzerosum_oneshot(self): self.skipTest("Ok with new redispatching implementation") @@ -473,14 +473,14 @@ def test_redispatch_noneedtocurtaildispact(self): act = self.env.action_space({"redispatch": [(0, +5)]}) obs, reward, done, info = self.env.step(act) assert not done - assert np.all(self.env.target_dispatch == [5., 0., 0., 0., 0.]) + assert np.all(self.env._target_dispatch == [5., 0., 0., 0., 0.]) target_p = self.env.chronics_handler.real_data.data.prod_p[3, :] target_p_t = self.env.chronics_handler.real_data.data.prod_p[2, :] assert self.compare_vect(obsinit.prod_p[:-1], target_p_t[:-1]) # only look at dispatchable generator, remove slack bus (last generator) assert np.all(obs.prod_p[0:2] - obsinit.prod_p[0:2] <= obs.gen_max_ramp_up[0:2]) assert np.all(obs.prod_p[0:2] - obsinit.prod_p[0:2] >= -obs.gen_max_ramp_down[0:2]) - assert np.all(np.abs(self.env.actual_dispatch - np.array([5., -2.5, 0., 0., -2.5])) <= self.tol_one) + assert np.all(np.abs(self.env._actual_dispatch - np.array([5., -2.5, 0., 0., -2.5])) <= self.tol_one) def test_sum0_again(self): # perform a valid redispatching action @@ -490,8 +490,8 @@ def test_sum0_again(self): act = self.env.action_space({"redispatch": [(0, +10)]}) obs, reward, done, info = self.env.step(act) assert np.abs(np.sum(obs.actual_dispatch)) <= self.tol_one - indx_ok = self.env.target_dispatch != 0. - assert np.all(np.sign(self.env.actual_dispatch[indx_ok]) == np.sign(self.env.target_dispatch[indx_ok])) + indx_ok = self.env._target_dispatch != 0. + assert np.all(np.sign(self.env._actual_dispatch[indx_ok]) == np.sign(self.env._target_dispatch[indx_ok])) def test_sum0_again2(self): self.skip_if_needed() @@ -503,8 +503,8 @@ def test_sum0_again2(self): act = env.action_space({"redispatch": [(0, +5)]}) obs, reward, done, info = env.step(act) assert np.abs(np.sum(obs.actual_dispatch)) <= self.tol_one - indx_ok = self.env.target_dispatch != 0. - assert np.all(np.sign(self.env.actual_dispatch[indx_ok]) == np.sign(self.env.target_dispatch[indx_ok])) + indx_ok = self.env._target_dispatch != 0. + assert np.all(np.sign(self.env._actual_dispatch[indx_ok]) == np.sign(self.env._target_dispatch[indx_ok])) donothing = env.action_space() obsinit, reward, done, info = env.step(donothing) act = env.action_space({"redispatch": [(0, -5)]}) @@ -525,8 +525,8 @@ def test_sum0_again3(self): act = env.action_space({"redispatch": [(0, +5)]}) obs, reward, done, info = env.step(act) assert np.abs(np.sum(obs.actual_dispatch)) <= self.tol_one - indx_ok = self.env.target_dispatch != 0. - assert np.all(np.sign(self.env.actual_dispatch[indx_ok]) == np.sign(self.env.target_dispatch[indx_ok])) + indx_ok = self.env._target_dispatch != 0. + assert np.all(np.sign(self.env._actual_dispatch[indx_ok]) == np.sign(self.env._target_dispatch[indx_ok])) assert np.all(obs.prod_p[0:2] - obs_init.prod_p[0:2] <= obs.gen_max_ramp_up[0:2]) assert np.all(obs.prod_p[0:2] - obs_init.prod_p[0:2] >= -obs.gen_max_ramp_down[0:2]) assert np.all(obs.actual_dispatch == np.array([5.0, -2.5, 0., 0., -2.5])) diff --git a/grid2op/tests/issue_126.py b/grid2op/tests/issue_126.py deleted file mode 100644 index ce4487e77..000000000 --- a/grid2op/tests/issue_126.py +++ /dev/null @@ -1,45 +0,0 @@ -import unittest -import warnings -import grid2op -from grid2op.Agent import DeltaRedispatchRandomAgent -from grid2op.Runner import Runner -from grid2op import make -from grid2op.Episode import EpisodeData -import os -import numpy as np -import tempfile - -class Issue126Tester(unittest.TestCase): - - def test_issue_126(self): - with tempfile.TemporaryDirectory() as tmpdirname: - #run redispatch agent on one scenario for 100 timesteps - dataset = "rte_case14_realistic" - nb_episode=1 - nb_timesteps=100 - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore") - env = make(dataset) - agent = DeltaRedispatchRandomAgent(env.action_space) - runner = Runner(**env.get_params_for_runner(), - agentClass=None, - agentInstance=agent) - nb_episode=1 - res = runner.run(nb_episode=nb_episode, - path_save=tmpdirname, - nb_process=1, - max_iter=nb_timesteps, - env_seeds=[0], - agent_seeds=[0], - pbar=False) - - episode_data = EpisodeData.from_disk(tmpdirname, '000') - - assert len(episode_data.actions.objects) == nb_timesteps - assert len(episode_data.observations.objects) == (nb_timesteps + 1) - assert len(episode_data.actions) == nb_timesteps - assert len(episode_data.observations) == (nb_timesteps + 1) - -if __name__ == "__main__": - unittest.main() diff --git a/grid2op/tests/test_Action.py b/grid2op/tests/test_Action.py index 666b1cb13..53def571f 100644 --- a/grid2op/tests/test_Action.py +++ b/grid2op/tests/test_Action.py @@ -299,6 +299,7 @@ def test_update_status(self): def test_update_set_topo_by_dict_obj(self): self._skipMissingKey('set_bus') + self._skipMissingKey('change_bus') action = self.helper_action({"set_bus": {"loads_id": [(1, 3)]}}) assert action.effect_on(load_id=1)["set_bus"] == 3 @@ -339,6 +340,7 @@ def test_update_set_topo_by_dict_sub2(self): def test_update_undo_change_bus(self): self._skipMissingKey('change_bus') + self._skipMissingKey('set_bus') # Create dummy change_bus action action = self.helper_action({"change_bus": {"loads_id": [1]}}) @@ -371,6 +373,7 @@ def test_update_undo_change_bus(self): def test_update_change_bus_by_dict_obj(self): self._skipMissingKey('change_bus') + self._skipMissingKey('set_bus') action = self.helper_action({"change_bus": {"loads_id": [1]}}) assert action.effect_on(load_id=1)["set_bus"] == 0 @@ -1194,6 +1197,7 @@ def test_iadd_change_change_bus(self): assert act1._change_bus_vect[1] == False assert np.any(act1._set_topo_vect != 0) == False + # TODO a generic method to build them all maybe ? class TestDontAct_PowerlineChangeAndDispatchAction(TestIADD, unittest.TestCase): """ diff --git a/grid2op/tests/test_Agent.py b/grid2op/tests/test_Agent.py index ac8642540..5900d53c7 100644 --- a/grid2op/tests/test_Agent.py +++ b/grid2op/tests/test_Agent.py @@ -91,7 +91,7 @@ def _aux_test_agent(self, agent, i_max=30): return i, cum_reward, all_acts def test_0_donothing(self): - agent = DoNothingAgent(self.env.helper_action_player) + agent = DoNothingAgent(self.env.action_space) with warnings.catch_warnings(): warnings.filterwarnings("error") i, cum_reward, all_acts = self._aux_test_agent(agent) @@ -100,7 +100,7 @@ def test_0_donothing(self): assert np.abs(cum_reward - expected_reward, dtype=dt_float) <= self.tol_one, "The reward has not been properly computed" def test_1_powerlineswitch(self): - agent = PowerLineSwitch(self.env.helper_action_player) + agent = PowerLineSwitch(self.env.action_space) with warnings.catch_warnings(): warnings.filterwarnings("error") i, cum_reward, all_acts = self._aux_test_agent(agent) @@ -110,7 +110,7 @@ def test_1_powerlineswitch(self): assert np.abs(cum_reward - expected_reward) <= self.tol_one, "The reward has not been properly computed" def test_2_busswitch(self): - agent = TopologyGreedy(self.env.helper_action_player) + agent = TopologyGreedy(self.env.action_space) with warnings.catch_warnings(): warnings.filterwarnings("error") i, cum_reward, all_acts = self._aux_test_agent(agent, i_max=10) diff --git a/grid2op/tests/test_Environment.py b/grid2op/tests/test_Environment.py index 8e798a2cf..315735d04 100644 --- a/grid2op/tests/test_Environment.py +++ b/grid2op/tests/test_Environment.py @@ -135,7 +135,7 @@ def test_proper_injection_at_first(self): assert self.compare_vect(injs_act, vect) def test_proper_voltage_modification(self): - do_nothing = self.env.helper_action_player({}) + do_nothing = self.env.action_space({}) obs, reward, done, info = self.env.step(do_nothing) # should load the first time stamp vect = np.array([143.9, 139.1, 0.2, 13.3, 146. ]) assert self.compare_vect(obs.prod_v, vect), "Production voltages setpoint have not changed at first time step" @@ -145,7 +145,7 @@ def test_proper_voltage_modification(self): def test_number_of_timesteps(self): for i in range(287): - do_nothing = self.env.helper_action_player({}) + do_nothing = self.env.action_space({}) obs, reward, done, info = self.env.step(do_nothing) # should load the first time stamp injs_act, *_ = self.env.backend.loads_info() vect = np.array([19.0, 87.9, 44.4, 7.2, 10.4, 27.5, 8.4, 3.2, 5.7, 12.2, 13.6]) @@ -156,7 +156,7 @@ def test_stop_right_time(self): done = False i = 0 while not done: - do_nothing = self.env.helper_action_player({}) + do_nothing = self.env.action_space({}) obs, reward, done, info = self.env.step(do_nothing) # should load the first time stamp i += 1 assert i == 287 @@ -177,7 +177,7 @@ def test_reward(self): cp.enable() beg_ = time.time() cum_reward = dt_float(0.0) - do_nothing = self.env.helper_action_player({}) + do_nothing = self.env.action_space({}) while not done: obs, reward, done, info = self.env.step(do_nothing) # should load the first time stamp cum_reward += reward @@ -219,22 +219,22 @@ def compare_vect(self, pred, true): def test_ambiguous_detected(self): self.skipTest("deprecated test as the reconnection is handled by backend action") - act = self.env.helper_action_player({"set_line_status": [(1, 1)]}) + act = self.env.action_space({"set_line_status": [(1, 1)]}) obs, reward, done, info = self.env.step(act) assert info['is_ambiguous'] assert not info["is_illegal"] def test_notambiguous_correct(self): - act = self.env.helper_action_player({"set_line_status": [(1, -1)]}) + act = self.env.action_space({"set_line_status": [(1, -1)]}) obs, reward, done, info = self.env.step(act) assert not info['is_ambiguous'] assert not info["is_illegal"] assert np.sum(obs.line_status) == 7 def test_illegal_detected(self): - act = self.env.helper_action_player({"set_line_status": [(1, -1)]}) + act = self.env.action_space({"set_line_status": [(1, -1)]}) self.env.game_rules = RulesChecker(legalActClass=DefaultRules) - self.env.times_before_line_status_actionable[1] = 1 + self.env._times_before_line_status_actionable[1] = 1 obs, reward, done, info = self.env.step(act) # the action is illegal and it has not been implemented on the powergrid @@ -387,16 +387,16 @@ def test_attach(self): dict_act = act.to_dict() assert "grid_layout" in dict_act assert dict_act["grid_layout"] == {k: [x,y] for k,(x,y) in zip(env.name_sub, my_layout)} - dict_ = env.helper_action_player.to_dict() + dict_ = env.action_space.to_dict() assert "grid_layout" in dict_ assert dict_["grid_layout"] == {k: [x,y] for k,(x,y) in zip(env.name_sub, my_layout)} - dict_ = env.helper_action_env.to_dict() + dict_ = env._helper_action_env.to_dict() assert "grid_layout" in dict_ assert dict_["grid_layout"] == {k: [x,y] for k,(x,y) in zip(env.name_sub, my_layout)} - dict_ = env.helper_observation.to_dict() + dict_ = env.observation_space.to_dict() assert "grid_layout" in dict_ assert dict_["grid_layout"] == {k: [x,y] for k,(x,y) in zip(env.name_sub, my_layout)} - dict_ = env.opponent_action_space.to_dict() + dict_ = env._opponent_action_space.to_dict() assert "grid_layout" in dict_ assert dict_["grid_layout"] == {k: [x,y] for k,(x,y) in zip(env.name_sub, my_layout)} diff --git a/grid2op/tests/test_EpisodeData.py b/grid2op/tests/test_EpisodeData.py index 17027145c..45a694b5d 100644 --- a/grid2op/tests/test_EpisodeData.py +++ b/grid2op/tests/test_EpisodeData.py @@ -7,10 +7,11 @@ # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. import tempfile +import warnings import pdb +import grid2op from grid2op.tests.helper_path_test import * - from grid2op.Exceptions import * from grid2op.Chronics import Multifolder from grid2op.Reward import L2RPNReward @@ -18,6 +19,7 @@ from grid2op.Runner import Runner from grid2op.Episode import EpisodeData from grid2op.dtypes import dt_float +from grid2op.Agent import BaseAgent DEBUG = True PATH_ADN_CHRONICS_FOLDER = os.path.abspath(os.path.join(PATH_CHRONICS, "test_multi_chronics")) @@ -70,6 +72,36 @@ def setUp(self): max_iter=self.max_iter, name_env="test_episodedata_env") + def test_load_ambiguous(self): + f = tempfile.mkdtemp() + + class TestSuitAgent(BaseAgent): + def __init__(self, *args, **kwargs): + BaseAgent.__init__(self, *args, **kwargs) + + def act(self, observation, reward, done=False): + # do a ambiguous action + return self.action_space({"set_line_status": [(0, 1)], + "change_line_status": [0]} + ) + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + with grid2op.make("rte_case14_test", test=True) as env: + my_agent = TestSuitAgent(env.action_space) + runner = Runner(**env.get_params_for_runner(), + agentClass=None, + agentInstance=my_agent) + + # test that the right seeds are assigned to the agent + res = runner.run(nb_episode=1, + max_iter=self.max_iter, + path_save=f) + episode_data = EpisodeData.from_disk(agent_path=f, name=res[0][1]) + assert int(episode_data.meta["chronics_max_timestep"]) == self.max_iter + assert len(episode_data.actions) == self.max_iter + assert len(episode_data.observations) == self.max_iter + 1 + def test_one_episode_with_saving(self): f = tempfile.mkdtemp() episode_name, cum_reward, timestep = self.runner.run_one_episode(path_save=f) diff --git a/grid2op/tests/test_MakeEnv.py b/grid2op/tests/test_MakeEnv.py index a74a9081d..465343c3f 100644 --- a/grid2op/tests/test_MakeEnv.py +++ b/grid2op/tests/test_MakeEnv.py @@ -357,12 +357,12 @@ def test_case5_config(self): dataset_path = os.path.join(PATH_CHRONICS_Make2, "rte_case5_example") with make_from_dataset_path(dataset_path) as env: # Check config is loaded from config.py - assert env.rewardClass == L2RPNReward - assert env.actionClass == TopologyAction - assert env.observationClass == CompleteObservation + assert env._rewardClass == L2RPNReward + assert env._actionClass == TopologyAction + assert env._observationClass == CompleteObservation assert isinstance(env.backend, PandaPowerBackend) - assert env.legalActClass == DefaultRules - assert isinstance(env.voltage_controler, ControlVoltageFromFile) + assert env._legalActClass == DefaultRules + assert isinstance(env._voltage_controler, ControlVoltageFromFile) assert isinstance(env.chronics_handler.real_data, Multifolder) assert env.action_space.grid_layout != None @@ -378,12 +378,12 @@ def test_case14_test_config(self): dataset_path = os.path.join(PATH_CHRONICS_Make2, "rte_case14_test") with make_from_dataset_path(dataset_path) as env: # Check config is loaded from config.py - assert env.rewardClass == RedispReward - assert env.actionClass == TopologyAndDispatchAction - assert env.observationClass == CompleteObservation + assert env._rewardClass == RedispReward + assert env._actionClass == TopologyAndDispatchAction + assert env._observationClass == CompleteObservation assert isinstance(env.backend, PandaPowerBackend) - assert env.legalActClass == DefaultRules - assert isinstance(env.voltage_controler, ControlVoltageFromFile) + assert env._legalActClass == DefaultRules + assert isinstance(env._voltage_controler, ControlVoltageFromFile) assert isinstance(env.chronics_handler.real_data, Multifolder) assert env.action_space.grid_layout != None @@ -400,12 +400,12 @@ def test_case14_redisp_config(self): dataset_path = os.path.join(PATH_CHRONICS_Make2, "rte_case14_redisp") with make_from_dataset_path(dataset_path) as env: # Check config is loaded from config.py - assert env.rewardClass == RedispReward - assert env.actionClass == TopologyAndDispatchAction - assert env.observationClass == CompleteObservation + assert env._rewardClass == RedispReward + assert env._actionClass == TopologyAndDispatchAction + assert env._observationClass == CompleteObservation assert isinstance(env.backend, PandaPowerBackend) - assert env.legalActClass == DefaultRules - assert isinstance(env.voltage_controler, ControlVoltageFromFile) + assert env._legalActClass == DefaultRules + assert isinstance(env._voltage_controler, ControlVoltageFromFile) assert isinstance(env.chronics_handler.real_data, Multifolder) def test_case14_redisp_runs(self): @@ -422,12 +422,12 @@ def test_l2rpn19_test_config(self): dataset_path = os.path.join(PATH_CHRONICS_Make2, "l2rpn_2019") with make_from_dataset_path(dataset_path) as env: # Check config is loaded from config.py - assert env.rewardClass == L2RPNReward - assert env.actionClass == TopologyAction - assert env.observationClass == CompleteObservation + assert env._rewardClass == L2RPNReward + assert env._actionClass == TopologyAction + assert env._observationClass == CompleteObservation assert isinstance(env.backend, PandaPowerBackend) - assert env.legalActClass == DefaultRules - assert isinstance(env.voltage_controler, ControlVoltageFromFile) + assert env._legalActClass == DefaultRules + assert isinstance(env._voltage_controler, ControlVoltageFromFile) assert isinstance(env.chronics_handler.real_data, Multifolder) assert env.action_space.grid_layout != None @@ -436,34 +436,34 @@ class TestMakeFromPathConfigOverride(unittest.TestCase): def test_case5_override_reward(self): dataset_path = os.path.join(PATH_CHRONICS_Make2, "rte_case5_example") with make_from_dataset_path(dataset_path, reward_class=FlatReward) as env: - assert env.rewardClass == FlatReward + assert env._rewardClass == FlatReward def test_case14_test_override_reward(self): dataset_path = os.path.join(PATH_CHRONICS_Make2, "rte_case14_test") with make_from_dataset_path(dataset_path, reward_class=FlatReward) as env: - assert env.rewardClass == FlatReward + assert env._rewardClass == FlatReward def test_l2rpn19_override_reward(self): self.skipTest("l2rpn has been removed") dataset_path = os.path.join(PATH_CHRONICS_Make2, "l2rpn_2019") with make_from_dataset_path(dataset_path, reward_class=FlatReward) as env: - assert env.rewardClass == FlatReward + assert env._rewardClass == FlatReward def test_case5_override_action(self): dataset_path = os.path.join(PATH_CHRONICS_Make2, "rte_case5_example") with make_from_dataset_path(dataset_path, action_class=VoltageOnlyAction) as env: - assert env.actionClass == VoltageOnlyAction + assert env._actionClass == VoltageOnlyAction def test_case14_test_override_action(self): dataset_path = os.path.join(PATH_CHRONICS_Make2, "rte_case14_test") with make_from_dataset_path(dataset_path, action_class=VoltageOnlyAction) as env: - assert env.actionClass == VoltageOnlyAction + assert env._actionClass == VoltageOnlyAction def test_l2rpn19_override_action(self): self.skipTest("l2rpn has been removed") dataset_path = os.path.join(PATH_CHRONICS_Make2, "l2rpn_2019") with make_from_dataset_path(dataset_path, action_class=VoltageOnlyAction) as env: - assert env.actionClass == VoltageOnlyAction + assert env._actionClass == VoltageOnlyAction def test_case5_override_chronics(self): dataset_path = os.path.join(PATH_CHRONICS_Make2, "rte_case5_example") diff --git a/grid2op/tests/test_MultiMix.py b/grid2op/tests/test_MultiMix.py index 17d253964..e8daa6b67 100644 --- a/grid2op/tests/test_MultiMix.py +++ b/grid2op/tests/test_MultiMix.py @@ -99,9 +99,9 @@ def test_creation_with_opponent(self): assert mme.current_obs is not None assert mme.current_env is not None for env in mme: - assert env.opponent_class == BaseOpponent - assert env.opponent_init_budget == dt_float(42.0) - assert env.opponent_budget_per_ts == dt_float(0.42) + assert env._opponent_class == BaseOpponent + assert env._opponent_init_budget == dt_float(42.0) + assert env._opponent_budget_per_ts == dt_float(0.42) def test_reset(self): mme = MultiMixEnvironment(PATH_DATA_MULTIMIX) @@ -163,9 +163,9 @@ def test_reset_with_opponent(self): mme.reset() assert mme.current_obs is not None assert mme.current_env is not None - assert mme.opponent_class == BaseOpponent - assert mme.opponent_init_budget == dt_float(42.0) - assert mme.opponent_budget_per_ts == dt_float(0.42) + assert mme._opponent_class == BaseOpponent + assert mme._opponent_init_budget == dt_float(42.0) + assert mme._opponent_budget_per_ts == dt_float(0.42) def test_reset_seq(self): mme = MultiMixEnvironment(PATH_DATA_MULTIMIX) @@ -182,7 +182,6 @@ def test_reset_random(self): assert mme.current_obs is not None assert mme.current_env is not None - def test_seeding(self): mme1 = MultiMixEnvironment(PATH_DATA_MULTIMIX) seeds_1 = mme1.seed(2) diff --git a/grid2op/tests/test_Observation.py b/grid2op/tests/test_Observation.py index a71589a5b..76cc5578c 100644 --- a/grid2op/tests/test_Observation.py +++ b/grid2op/tests/test_Observation.py @@ -107,26 +107,26 @@ def tearDown(self): self.env.close() def test_sum_shape_equal_size(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) assert obs.size() == np.sum(obs.shape()) def test_size(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) obs.size() def test_copy_space(self): - obs_space2 = self.env.helper_observation.copy() + obs_space2 = self.env.observation_space.copy() assert isinstance(obs_space2, ObservationSpace) def test_proper_size(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) assert obs.size() == self.size_obs def test_size_action_space(self): - assert self.env.helper_observation.size() == self.size_obs + assert self.env.observation_space.size() == self.size_obs def test_bus_conn_mat(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) mat1 = obs.bus_connectivity_matrix() ref_mat = np.array([[1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.], @@ -145,7 +145,7 @@ def test_bus_conn_mat(self): assert np.all(mat1 == ref_mat) def test_conn_mat(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) mat = obs.connectivity_matrix() ref_mat = np.array([[0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., @@ -191,11 +191,11 @@ def test_conn_mat(self): assert np.all(mat[:10,:] == ref_mat) def test_observation_space(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) assert self.env.observation_space.n == obs.size() def test_shape_correct(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) assert obs.shape().shape == obs.dtype().shape assert np.all(obs.dtype() == self.dtypes) assert np.all(obs.shape() == self.shapes) @@ -207,12 +207,12 @@ def test_0_load_properly(self): def test_1_generating_obs(self): # test that helper_obs is abl to generate a valid observation - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) pass def test_2_reset(self): # test that helper_obs is abl to generate a valid observation - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) assert obs.prod_p[0] is not None obs.reset() assert np.all(np.isnan(obs.prod_p)) @@ -221,7 +221,7 @@ def test_2_reset(self): def test_3_reset(self): # test that helper_obs is able to generate a valid observation - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) obs2 = obs.copy() assert obs == obs2 obs2.reset() @@ -231,7 +231,7 @@ def test_3_reset(self): # assert obs.prod_p is not None def test_shapes_types(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) dtypes = obs.dtype() assert np.all(dtypes == self.dtypes) shapes = obs.shape() @@ -239,8 +239,8 @@ def test_shapes_types(self): def test_4_to_from_vect(self): # test that helper_obs is abl to generate a valid observation - obs = self.env.helper_observation(self.env) - obs2 = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) + obs2 = self.env.observation_space(self.env) vect = obs.to_vect() assert vect.shape[0] == obs.size() obs2.reset() @@ -255,9 +255,9 @@ def test_4_to_from_vect(self): def test_5_simulate_proper_timestep(self): self.skipTest("This is extensively tested elswhere, and the chronics have been changed.") - obs_orig = self.env.helper_observation(self.env) - action = self.env.helper_action_player({}) - action2 = self.env.helper_action_player({}) + obs_orig = self.env.observation_space(self.env) + action = self.env.action_space({}) + action2 = self.env.action_space({}) simul_obs, simul_reward, simul_has_error, simul_info = obs_orig.simulate(action) real_obs, real_reward, real_has_error, real_info = self.env.step(action2) @@ -269,21 +269,21 @@ def test_5_simulate_proper_timestep(self): assert np.abs(simul_reward - real_reward) <= self.tol_one def test_6_simulate_dont_affect_env(self): - obs_orig = self.env.helper_observation(self.env) + obs_orig = self.env.observation_space(self.env) obs_orig = obs_orig.copy() for i in range(self.env.backend.n_line): # simulate lots of action tmp = np.full(self.env.backend.n_line, fill_value=False, dtype=np.bool) tmp[i] = True - action = self.env.helper_action_player({"change_line_status": tmp}) + action = self.env.action_space({"change_line_status": tmp}) simul_obs, simul_reward, simul_has_error, simul_info = obs_orig.simulate(action) - obs_after = self.env.helper_observation(self.env) + obs_after = self.env.observation_space(self.env) assert obs_orig == obs_after def test_inspect_load(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) dict_ = obs.state_of(load_id=0) assert "p" in dict_ assert np.abs(dict_["p"] - 21.2) <= self.tol_one @@ -297,7 +297,7 @@ def test_inspect_load(self): assert dict_["sub_id"] == 1 def test_inspect_gen(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) dict_ = obs.state_of(gen_id=0) assert "p" in dict_ assert np.abs(dict_["p"] - 93.6) <= self.tol_one @@ -311,7 +311,7 @@ def test_inspect_gen(self): assert dict_["sub_id"] == 1 def test_inspect_line(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) dict_both = obs.state_of(line_id=0) assert "origin" in dict_both dict_ = dict_both["origin"] @@ -340,7 +340,7 @@ def test_inspect_line(self): assert dict_["sub_id"] == 1 def test_inspect_topo(self): - obs = self.env.helper_observation(self.env) + obs = self.env.observation_space(self.env) dict_ = obs.state_of(substation_id=1) assert "topo_vect" in dict_ assert np.all(dict_["topo_vect"] == [1, 1, 1, 1, 1, 1]) @@ -348,7 +348,7 @@ def test_inspect_topo(self): assert dict_["nb_bus"] == 1 def test_get_obj_connect_to(self): - dict_ = self.env.helper_observation.get_obj_connect_to(substation_id=1) + dict_ = self.env.observation_space.get_obj_connect_to(substation_id=1) assert 'loads_id' in dict_ assert np.all(dict_['loads_id'] == 0) assert 'generators_id' in dict_ @@ -361,56 +361,56 @@ def test_get_obj_connect_to(self): assert dict_['nb_elements'] == 6 def test_to_dict(self): - dict_ = self.env.helper_observation.to_dict() + dict_ = self.env.observation_space.to_dict() self.maxDiff = None self.assertDictEqual(dict_, self.dict_) def test_from_dict(self): res = ObservationSpace.from_dict(self.dict_) - assert res.n_gen == self.env.helper_observation.n_gen - assert res.n_load == self.env.helper_observation.n_load - assert res.n_line == self.env.helper_observation.n_line - assert np.all(res.sub_info == self.env.helper_observation.sub_info) - assert np.all(res.load_to_subid == self.env.helper_observation.load_to_subid) - assert np.all(res.gen_to_subid == self.env.helper_observation.gen_to_subid) - assert np.all(res.line_or_to_subid == self.env.helper_observation.line_or_to_subid) - assert np.all(res.line_ex_to_subid == self.env.helper_observation.line_ex_to_subid) - assert np.all(res.load_to_sub_pos == self.env.helper_observation.load_to_sub_pos) - assert np.all(res.gen_to_sub_pos == self.env.helper_observation.gen_to_sub_pos) - assert np.all(res.line_or_to_sub_pos == self.env.helper_observation.line_or_to_sub_pos) - assert np.all(res.line_ex_to_sub_pos == self.env.helper_observation.line_ex_to_sub_pos) - assert np.all(res.load_pos_topo_vect == self.env.helper_observation.load_pos_topo_vect) - assert np.all(res.gen_pos_topo_vect == self.env.helper_observation.gen_pos_topo_vect) - assert np.all(res.line_or_pos_topo_vect == self.env.helper_observation.line_or_pos_topo_vect) - assert np.all(res.line_ex_pos_topo_vect == self.env.helper_observation.line_ex_pos_topo_vect) - assert issubclass(res.observationClass, self.env.helper_observation._init_subtype) + assert res.n_gen == self.env.observation_space.n_gen + assert res.n_load == self.env.observation_space.n_load + assert res.n_line == self.env.observation_space.n_line + assert np.all(res.sub_info == self.env.observation_space.sub_info) + assert np.all(res.load_to_subid == self.env.observation_space.load_to_subid) + assert np.all(res.gen_to_subid == self.env.observation_space.gen_to_subid) + assert np.all(res.line_or_to_subid == self.env.observation_space.line_or_to_subid) + assert np.all(res.line_ex_to_subid == self.env.observation_space.line_ex_to_subid) + assert np.all(res.load_to_sub_pos == self.env.observation_space.load_to_sub_pos) + assert np.all(res.gen_to_sub_pos == self.env.observation_space.gen_to_sub_pos) + assert np.all(res.line_or_to_sub_pos == self.env.observation_space.line_or_to_sub_pos) + assert np.all(res.line_ex_to_sub_pos == self.env.observation_space.line_ex_to_sub_pos) + assert np.all(res.load_pos_topo_vect == self.env.observation_space.load_pos_topo_vect) + assert np.all(res.gen_pos_topo_vect == self.env.observation_space.gen_pos_topo_vect) + assert np.all(res.line_or_pos_topo_vect == self.env.observation_space.line_or_pos_topo_vect) + assert np.all(res.line_ex_pos_topo_vect == self.env.observation_space.line_ex_pos_topo_vect) + assert issubclass(res.observationClass, self.env.observation_space._init_subtype) def test_json_serializable(self): - dict_ = self.env.helper_observation.to_dict() + dict_ = self.env.observation_space.to_dict() res = json.dumps(obj=dict_, indent=4, sort_keys=True) def test_json_loadable(self): - dict_ = self.env.helper_observation.to_dict() + dict_ = self.env.observation_space.to_dict() tmp = json.dumps(obj=dict_, indent=4, sort_keys=True) res = ObservationSpace.from_dict(json.loads(tmp)) - assert res.n_gen == self.env.helper_observation.n_gen - assert res.n_load == self.env.helper_observation.n_load - assert res.n_line == self.env.helper_observation.n_line - assert np.all(res.sub_info == self.env.helper_observation.sub_info) - assert np.all(res.load_to_subid == self.env.helper_observation.load_to_subid) - assert np.all(res.gen_to_subid == self.env.helper_observation.gen_to_subid) - assert np.all(res.line_or_to_subid == self.env.helper_observation.line_or_to_subid) - assert np.all(res.line_ex_to_subid == self.env.helper_observation.line_ex_to_subid) - assert np.all(res.load_to_sub_pos == self.env.helper_observation.load_to_sub_pos) - assert np.all(res.gen_to_sub_pos == self.env.helper_observation.gen_to_sub_pos) - assert np.all(res.line_or_to_sub_pos == self.env.helper_observation.line_or_to_sub_pos) - assert np.all(res.line_ex_to_sub_pos == self.env.helper_observation.line_ex_to_sub_pos) - assert np.all(res.load_pos_topo_vect == self.env.helper_observation.load_pos_topo_vect) - assert np.all(res.gen_pos_topo_vect == self.env.helper_observation.gen_pos_topo_vect) - assert np.all(res.line_or_pos_topo_vect == self.env.helper_observation.line_or_pos_topo_vect) - assert np.all(res.line_ex_pos_topo_vect == self.env.helper_observation.line_ex_pos_topo_vect) - assert issubclass(res.observationClass, self.env.helper_observation._init_subtype) + assert res.n_gen == self.env.observation_space.n_gen + assert res.n_load == self.env.observation_space.n_load + assert res.n_line == self.env.observation_space.n_line + assert np.all(res.sub_info == self.env.observation_space.sub_info) + assert np.all(res.load_to_subid == self.env.observation_space.load_to_subid) + assert np.all(res.gen_to_subid == self.env.observation_space.gen_to_subid) + assert np.all(res.line_or_to_subid == self.env.observation_space.line_or_to_subid) + assert np.all(res.line_ex_to_subid == self.env.observation_space.line_ex_to_subid) + assert np.all(res.load_to_sub_pos == self.env.observation_space.load_to_sub_pos) + assert np.all(res.gen_to_sub_pos == self.env.observation_space.gen_to_sub_pos) + assert np.all(res.line_or_to_sub_pos == self.env.observation_space.line_or_to_sub_pos) + assert np.all(res.line_ex_to_sub_pos == self.env.observation_space.line_ex_to_sub_pos) + assert np.all(res.load_pos_topo_vect == self.env.observation_space.load_pos_topo_vect) + assert np.all(res.gen_pos_topo_vect == self.env.observation_space.gen_pos_topo_vect) + assert np.all(res.line_or_pos_topo_vect == self.env.observation_space.line_or_pos_topo_vect) + assert np.all(res.line_ex_pos_topo_vect == self.env.observation_space.line_ex_pos_topo_vect) + assert issubclass(res.observationClass, self.env.observation_space._init_subtype) class TestObservationHazard(unittest.TestCase): @@ -487,7 +487,7 @@ def test_1_generating_obs_withhazard(self): # test that helper_obs is abl to generate a valid observation obs = self.env.get_obs() assert np.all(obs.time_before_cooldown_line == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) - action = self.env.helper_action_player({}) + action = self.env.action_space({}) _ = self.env.step(action) obs = self.env.get_obs() assert np.all(obs.time_before_cooldown_line == [0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) @@ -573,7 +573,7 @@ def test_1_generating_obs_withmaintenance(self): -1, -1, -1, -1, -1, -1, -1, -1, -1])) assert np.all(obs.duration_next_maintenance == np.array([ 0, 0, 0, 0, 12, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])) - action = self.env.helper_action_player({}) + action = self.env.action_space({}) _ = self.env.step(action) obs = self.env.get_obs() assert np.all(obs.time_next_maintenance == np.array([ -1, -1, -1, -1, 0, -1, 275, -1, -1, -1, -1, diff --git a/grid2op/tests/test_Opponent.py b/grid2op/tests/test_Opponent.py index 2c669ea88..38dc4fba6 100644 --- a/grid2op/tests/test_Opponent.py +++ b/grid2op/tests/test_Opponent.py @@ -74,7 +74,7 @@ def test_env_modif_oppo(self): with make("rte_case5_example", test=True, opponent_class=TestSuiteOpponent_001) as env: obs = env.reset() obs, reward, done, info = env.step(env.action_space()) - assert isinstance(env.opponent, TestSuiteOpponent_001) + assert isinstance(env._opponent, TestSuiteOpponent_001) def test_env_modif_oppobudg(self): with warnings.catch_warnings(): @@ -82,7 +82,7 @@ def test_env_modif_oppobudg(self): with make("rte_case5_example", test=True, opponent_budget_class=TestSuiteBudget_001) as env: obs = env.reset() obs, reward, done, info = env.step(env.action_space()) - assert isinstance(env.compute_opp_budget, TestSuiteBudget_001) + assert isinstance(env._compute_opp_budget, TestSuiteBudget_001) def test_env_modif_opponent_init_budget(self): with warnings.catch_warnings(): @@ -91,7 +91,7 @@ def test_env_modif_opponent_init_budget(self): with make("rte_case5_example", test=True, opponent_init_budget=init_budg) as env: obs = env.reset() obs, reward, done, info = env.step(env.action_space()) - assert env.opponent_init_budget == init_budg + assert env._opponent_init_budget == init_budg def test_env_modif_opponent_init_budget_ts(self): with warnings.catch_warnings(): @@ -100,7 +100,7 @@ def test_env_modif_opponent_init_budget_ts(self): with make("rte_case5_example", test=True, opponent_budget_per_ts=init_budg) as env: obs = env.reset() obs, reward, done, info = env.step(env.action_space()) - assert env.opponent_budget_per_ts == init_budg + assert env._opponent_budget_per_ts == init_budg def test_env_modif_opponent_action_class(self): with warnings.catch_warnings(): @@ -108,7 +108,7 @@ def test_env_modif_opponent_action_class(self): with make("rte_case5_example", test=True, opponent_action_class=TopologyAction) as env: obs = env.reset() obs, reward, done, info = env.step(env.action_space()) - assert issubclass(env.opponent_action_class, TopologyAction) + assert issubclass(env._opponent_action_class, TopologyAction) def test_env_opp_attack(self): # and test reset, which apparently is NOT done correctly @@ -126,15 +126,15 @@ def test_env_opp_attack(self): obs = env.reset() # opponent should not attack at the first time step assert np.all(obs.line_status) - assert env.opponent_init_budget == init_budg + assert env._opponent_init_budget == init_budg obs, reward, done, info = env.step(env.action_space()) - assert env.oppSpace.budget == init_budg - 1.0 + assert env._oppSpace.budget == init_budg - 1.0 obs = env.reset() # opponent should not attack at the first time step assert np.all(obs.line_status) - assert env.opponent_init_budget == init_budg - assert env.oppSpace.budget == init_budg + assert env._opponent_init_budget == init_budg + assert env._oppSpace.budget == init_budg def test_env_opp_attack_budget_ts(self): with warnings.catch_warnings(): @@ -149,18 +149,18 @@ def test_env_opp_attack_budget_ts(self): opponent_attack_cooldown=ATTACK_COOLDOWN, opponent_class=TestSuiteOpponent_001) as env: obs = env.reset() - assert env.opponent_init_budget == 0. + assert env._opponent_init_budget == 0. obs, reward, done, info = env.step(env.action_space()) # no attack possible - assert env.oppSpace.budget == init_budg_ts + assert env._oppSpace.budget == init_budg_ts obs, reward, done, info = env.step(env.action_space()) # i can attack at the second time steps, and budget of an attack is 1, so I have 0 now - assert env.oppSpace.budget == 0. + assert env._oppSpace.budget == 0. obs = env.reset() - assert env.opponent_init_budget == 0. - assert env.opponent_budget_per_ts == 0.5 - assert env.oppSpace.budget == 0. + assert env._opponent_init_budget == 0. + assert env._opponent_budget_per_ts == 0.5 + assert env._oppSpace.budget == 0. def test_RandomLineOpponent_not_enough_budget(self): """Tests that the attack is ignored when the budget is too low""" @@ -179,18 +179,18 @@ def test_RandomLineOpponent_not_enough_budget(self): kwargs_opponent={"lines_attacked": LINES_ATTACKED}) as env: env.seed(0) obs = env.reset() - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget # The opponent can attack - for i in range(env.oppSpace.attack_duration): + for i in range(env._oppSpace.attack_duration): obs, reward, done, info = env.step(env.action_space()) - attack = env.oppSpace.last_attack - assert env.oppSpace.budget == init_budget - i - 1 + attack = env._oppSpace.last_attack + assert env._oppSpace.budget == init_budget - i - 1 assert any(attack._set_line_status != 0) # There is not enough budget for a second attack - assert abs(env.oppSpace.budget - (init_budget - ATTACK_DURATION)) <= 1e-5 + assert abs(env._oppSpace.budget - (init_budget - ATTACK_DURATION)) <= 1e-5 obs, reward, done, info = env.step(env.action_space()) - attack = env.oppSpace.last_attack + attack = env._oppSpace.last_attack assert attack is None def test_RandomLineOpponent_attackable_lines(self): @@ -214,11 +214,11 @@ def test_RandomLineOpponent_attackable_lines(self): # Collect some attacks and check that they belong to the correct lines for _ in range(tries): obs = env.reset() - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget obs, reward, done, info = env.step(env.action_space()) - assert env.oppSpace.budget == init_budget - 1 + assert env._oppSpace.budget == init_budget - 1 - attack = env.oppSpace.last_attack + attack = env._oppSpace.last_attack attacked_line = np.where(attack._set_line_status == -1)[0][0] line_name = env.action_space.name_line[attacked_line] assert line_name in attackable_lines_case14 @@ -243,11 +243,11 @@ def test_RandomLineOpponent_disconnects_only_one_line(self): # Collect some attacks and check that they belong to the correct lines for _ in range(tries): obs = env.reset() - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget obs, reward, done, info = env.step(env.action_space()) - assert env.oppSpace.budget == init_budget - 1 + assert env._oppSpace.budget == init_budget - 1 - attack = env.oppSpace.last_attack + attack = env._oppSpace.last_attack n_disconnected = np.sum(attack._set_line_status == -1) assert n_disconnected == 1 @@ -281,7 +281,7 @@ def test_RandomLineOpponent_with_agent(self): env.seed(0) obs = env.reset() reward = 0 - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget assert np.all(obs.time_before_cooldown_line == 0) # the "agent" does an action (on the same powerline as the opponent attacks) obs, reward, done, info = env.step(env.action_space({"set_line_status": [(line_opponent_attack, 1)]})) @@ -418,13 +418,13 @@ def test_RandomLineOpponent_only_attack_connected(self): # and check that they belong to the correct lines pre_obs = env.reset() done = False - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget for i in range(length): obs, reward, done, info = env.step(env.action_space()) - attack = env.oppSpace.last_attack + attack = env._oppSpace.last_attack attacked_line = np.where(attack._set_line_status == -1)[0][0] - if env.oppSpace.current_attack_duration < env.oppSpace.attack_duration: + if env._oppSpace.current_attack_duration < env._oppSpace.attack_duration: # The attack is ungoing. The line must have been disconnected already assert not pre_obs.line_status[attacked_line] else: @@ -468,14 +468,14 @@ def test_RandomLineOpponent_same_attack_order_and_attacks_all_lines(self): # Collect some attacks and check that they belong to the correct lines obs = env.reset() done = False - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget for i in range(length): if done: obs = env.reset() pre_done = done obs, reward, done, info = env.step(env.action_space()) - attack = env.oppSpace.last_attack + attack = env._oppSpace.last_attack if attack is None: # should only happen here if all attackable lines are already disconnected assert np.sum(obs.line_status == False) == 6 continue @@ -583,10 +583,10 @@ def test_proper_action_class(self): opponent_class=RandomLineOpponent, kwargs_opponent={"lines_attacked": LINES_ATTACKED}) as env: env.seed(0) - assert env.opponent_action_class == opponent_action_class - assert issubclass(env.oppSpace.action_space.actionClass, opponent_action_class) - assert issubclass(env.opponent_action_space.actionClass, opponent_action_class) - opp_space = env.oppSpace + assert env._opponent_action_class == opponent_action_class + assert issubclass(env._oppSpace.action_space.actionClass, opponent_action_class) + assert issubclass(env._opponent_action_space.actionClass, opponent_action_class) + opp_space = env._oppSpace attack, duration = opp_space.attack(env.get_obs(), env.action_space(), env.action_space()) assert isinstance(attack, opponent_action_class) @@ -616,7 +616,7 @@ def test_get_set_state(self): observation = env.get_obs() env_action = env.action_space() - opp_space = env.oppSpace + opp_space = env._oppSpace # FIRST CHECK: WHEN NO ATTACK ARE PERFORMED # test that if i do "a loop of get / set" i get the same stuff init_state = opp_space._get_state() @@ -706,10 +706,10 @@ def test_env_opponent(self): env = make("rte_case14_opponent", test=True, param=param) env.seed(0) # make sure i have reproducible experiments obs = env.reset() - assert env.oppSpace.budget == 0 + assert env._oppSpace.budget == 0 assert np.all(obs.line_status) obs, reward, done, info = env.step(env.action_space()) - assert env.oppSpace.budget == 0.5 + assert env._oppSpace.budget == 0.5 assert np.all(obs.line_status) obs, reward, done, info = env.step(env.action_space()) @@ -726,8 +726,8 @@ def test_multienv_opponent(self): obs = multi_env.reset() for ob in obs: assert np.all(ob.line_status) - assert np.all(multi_env.opponent[0]._lines_ids == [3, 4, 15, 12, 13, 14]) - assert np.all(multi_env.opponent[1]._lines_ids == [3, 4, 15, 12, 13, 14]) + assert np.all(multi_env._opponent[0]._lines_ids == [3, 4, 15, 12, 13, 14]) + assert np.all(multi_env._opponent[1]._lines_ids == [3, 4, 15, 12, 13, 14]) env.close() multi_env.close() @@ -750,18 +750,18 @@ def test_WeightedRandomOpponent_not_enough_budget(self): "attack_period": 1}) as env: env.seed(0) obs = env.reset() - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget # The opponent can attack - for i in range(env.oppSpace.attack_duration): + for i in range(env._oppSpace.attack_duration): obs, reward, done, info = env.step(env.action_space()) - attack = env.oppSpace.last_attack - assert env.oppSpace.budget == init_budget - i - 1 + attack = env._oppSpace.last_attack + assert env._oppSpace.budget == init_budget - i - 1 assert any(attack._set_line_status != 0) # There is not enough budget for a second attack - assert abs(env.oppSpace.budget - (init_budget - ATTACK_DURATION)) <= 1e-5 + assert abs(env._oppSpace.budget - (init_budget - ATTACK_DURATION)) <= 1e-5 obs, reward, done, info = env.step(env.action_space()) - attack = env.oppSpace.last_attack + attack = env._oppSpace.last_attack assert attack is None def test_WeightedRandomOpponent_attackable_lines(self): @@ -787,11 +787,11 @@ def test_WeightedRandomOpponent_attackable_lines(self): # Collect some attacks and check that they belong to the correct lines for _ in range(tries): obs = env.reset() - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget obs, reward, done, info = env.step(env.action_space()) - assert env.oppSpace.budget == init_budget - 1 + assert env._oppSpace.budget == init_budget - 1 - attack = env.oppSpace.last_attack + attack = env._oppSpace.last_attack attacked_line = np.where(attack._set_line_status == -1)[0][0] line_name = env.action_space.name_line[attacked_line] assert line_name in attackable_lines_case14 @@ -818,11 +818,11 @@ def test_WeightedRandomOpponent_disconnects_only_one_line(self): # Collect some attacks and check that they belong to the correct lines for _ in range(tries): obs = env.reset() - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget obs, reward, done, info = env.step(env.action_space()) - assert env.oppSpace.budget == init_budget - 1 + assert env._oppSpace.budget == init_budget - 1 - attack = env.oppSpace.last_attack + attack = env._oppSpace.last_attack n_disconnected = np.sum(attack._set_line_status == -1) assert n_disconnected == 1 @@ -859,7 +859,7 @@ def test_WeightedRandomOpponent_with_agent(self): env.seed(0) obs = env.reset() reward = 0 - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget assert np.all(obs.time_before_cooldown_line == 0) # the "agent" does an action (on the same powerline as the opponent attacks) obs, reward, done, info = env.step(env.action_space({"set_line_status": [(line_opponent_attack, 1)]})) @@ -1004,13 +1004,13 @@ def test_WeightedRandomOpponent_only_attack_connected(self): # and check that they belong to the correct lines pre_obs = env.reset() done = False - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget for i in range(length): obs, reward, done, info = env.step(env.action_space()) - attack = env.oppSpace.last_attack + attack = env._oppSpace.last_attack attacked_line = np.where(attack._set_line_status == -1)[0][0] - if env.oppSpace.current_attack_duration < env.oppSpace.attack_duration: + if env._oppSpace.current_attack_duration < env._oppSpace.attack_duration: # The attack is ungoing. The line must have been disconnected already assert not pre_obs.line_status[attacked_line] else: @@ -1053,14 +1053,14 @@ def test_WeightedRandomOpponent_same_attack_order_and_attacks_all_lines(self): # Collect some attacks and check that they belong to the correct lines obs = env.reset() done = False - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget for i in range(length): if done: obs = env.reset() pre_done = done obs, reward, done, info = env.step(env.action_space()) - attack = env.oppSpace.last_attack + attack = env._oppSpace.last_attack if attack is None: # should only happen here if all attackable lines are already disconnected assert np.sum(obs.line_status == False) == 6 continue @@ -1096,15 +1096,15 @@ def test_either_attack_or_tell_attack_continues(self): # Collect some attacks and check that they belong to the correct lines obs = env.reset() done = False - assert env.oppSpace.budget == init_budget + assert env._oppSpace.budget == init_budget for i in range(length): if done: obs = env.reset() obs, reward, done, info = env.step(env.action_space()) - assert env.oppSpace.opponent._attack_counter == 56 - assert env.oppSpace.opponent._attack_continues_counter == 44 - assert env.oppSpace.opponent._attack_counter \ - + env.oppSpace.opponent._attack_continues_counter \ + assert env._oppSpace.opponent._attack_counter == 56 + assert env._oppSpace.opponent._attack_continues_counter == 44 + assert env._oppSpace.opponent._attack_counter \ + + env._oppSpace.opponent._attack_continues_counter \ == length diff --git a/grid2op/tests/test_PandaPowerBackend.py b/grid2op/tests/test_PandaPowerBackend.py index d0070fa10..2b858d1c2 100644 --- a/grid2op/tests/test_PandaPowerBackend.py +++ b/grid2op/tests/test_PandaPowerBackend.py @@ -159,8 +159,8 @@ def test_set_bus(self): warnings.filterwarnings("ignore") env = make(test=True, backend=backend) env.reset() - # action = env.helper_action_player({"change_bus": {"lines_or_id": [17]}}) - action = env.helper_action_player({"set_bus": {"lines_or_id": [(17, 2)]}}) + # action = env.action_space({"change_bus": {"lines_or_id": [17]}}) + action = env.action_space({"set_bus": {"lines_or_id": [(17, 2)]}}) obs, reward, done, info = env.step(action) assert np.all(np.isfinite(obs.v_or)) assert np.sum(env.backend._grid["bus"]["in_service"]) == 15 @@ -172,7 +172,7 @@ def test_change_bus(self): warnings.filterwarnings("ignore") env = make(test=True, backend=backend) env.reset() - action = env.helper_action_player({"change_bus": {"lines_or_id": [17]}}) + action = env.action_space({"change_bus": {"lines_or_id": [17]}}) obs, reward, done, info = env.step(action) assert np.all(np.isfinite(obs.v_or)) assert np.sum(env.backend._grid["bus"]["in_service"]) == 15 @@ -184,14 +184,14 @@ def test_change_bustwice(self): warnings.filterwarnings("ignore") env = make(test=True, backend=backend) env.reset() - action = env.helper_action_player({"change_bus": {"lines_or_id": [17]}}) + action = env.action_space({"change_bus": {"lines_or_id": [17]}}) obs, reward, done, info = env.step(action) assert not done assert np.all(np.isfinite(obs.v_or)) assert np.sum(env.backend._grid["bus"]["in_service"]) == 15 assert env.backend._grid["trafo"]["hv_bus"][2] == 18 - action = env.helper_action_player({"change_bus": {"lines_or_id": [17]}}) + action = env.action_space({"change_bus": {"lines_or_id": [17]}}) obs, reward, done, info = env.step(action) assert not done assert np.all(np.isfinite(obs.v_or)) diff --git a/grid2op/tests/test_Reward.py b/grid2op/tests/test_Reward.py index b26c485d9..63279dc4d 100644 --- a/grid2op/tests/test_Reward.py +++ b/grid2op/tests/test_Reward.py @@ -94,7 +94,7 @@ def test_do_nothing(self): dn_action = self.env.action_space({}) obs, r, d, info = self.env.step(dn_action) - max_reward = self.env.reward_helper.range()[1] + max_reward = self.env._reward_helper.range()[1] assert r == max_reward def test_disconnect(self): @@ -126,14 +126,14 @@ def _reward_type(self): return CombinedReward def test_add_reward(self): - cr = self.env.reward_helper.template_reward + cr = self.env._reward_helper.template_reward assert cr is not None cr.addReward("Gameplay", GameplayReward(), 1.0) cr.addReward("Flat", FlatReward(), 1.0) cr.initialize(self.env) def test_remove_reward(self): - cr = self.env.reward_helper.template_reward + cr = self.env._reward_helper.template_reward assert cr is not None added = cr.addReward("Gameplay", GameplayReward(), 1.0) assert added == True @@ -143,7 +143,7 @@ def test_remove_reward(self): assert removed == False def test_update_reward_weight(self): - cr = self.env.reward_helper.template_reward + cr = self.env._reward_helper.template_reward assert cr is not None added = cr.addReward("Gameplay", GameplayReward(), 1.0) assert added == True @@ -153,7 +153,7 @@ def test_update_reward_weight(self): assert updated == False def test_combine_distance_gameplay(self): - cr = self.env.reward_helper.template_reward + cr = self.env._reward_helper.template_reward assert cr is not None added = cr.addReward("Gameplay", GameplayReward(), 0.5) assert added == True @@ -168,7 +168,7 @@ def test_combine_distance_gameplay(self): assert r < 1.0 def test_combine_simulate(self): - cr = self.env.reward_helper.template_reward + cr = self.env._reward_helper.template_reward assert cr is not None gr = GameplayReward() gr.set_range(-21.0, 21.0) diff --git a/grid2op/tests/test_Rules.py b/grid2op/tests/test_Rules.py index 600cefd24..d694b2830 100644 --- a/grid2op/tests/test_Rules.py +++ b/grid2op/tests/test_Rules.py @@ -67,7 +67,7 @@ def setUp(self): names_chronics_to_backend=self.names_chronics_to_backend, name="test_rules_env1") - self.helper_action = self.env.helper_action_env + self.helper_action = self.env._helper_action_env def test_AlwaysLegal(self): # build a random action acting on everything @@ -190,7 +190,7 @@ def test_PreventReconection(self): try: self.env.parameters.MAX_SUB_CHANGED = 2 self.env.parameters.MAX_LINE_STATUS_CHANGED = 1 - self.env.times_before_line_status_actionable[id_line] = 1 + self.env._times_before_line_status_actionable[id_line] = 1 _ = self.helper_action({"change_bus": {"substations": [(id_1, arr1)]}, "set_bus": {"substations_id": [(id_2, arr2)]}, "change_line_status": arr_line1, @@ -201,10 +201,10 @@ def test_PreventReconection(self): except IllegalAction: pass - self.env.times_before_line_status_actionable[:] = 0 + self.env._times_before_line_status_actionable[:] = 0 self.env.parameters.MAX_SUB_CHANGED = 2 self.env.parameters.MAX_LINE_STATUS_CHANGED = 1 - self.env.times_before_line_status_actionable[1] = 1 + self.env._times_before_line_status_actionable[1] = 1 _ = self.helper_action({"change_bus": {"substations": [(id_1, arr1)]}, "set_bus": {"substations_id": [(id_2, arr2)]}, "change_line_status": arr_line1, @@ -226,7 +226,7 @@ def test_linereactionnable_throw(self): arr_line2 = np.full(self.helper_action.n_line, fill_value=0, dtype=np.int) arr_line2[id_line2] = -1 - self.env.max_timestep_line_status_deactivated = 1 + self.env._max_timestep_line_status_deactivated = 1 self.helper_action.legal_action = RulesChecker(legalActClass=PreventReconnection).legal_action # i act a first time on powerline 15 @@ -257,7 +257,7 @@ def test_linereactionnable_nothrow(self): arr_line2 = np.full(self.helper_action.n_line, fill_value=0, dtype=np.int) arr_line2[id_line2] = -1 - self.env.max_timestep_line_status_deactivated = 1 + self.env._max_timestep_line_status_deactivated = 1 self.helper_action.legal_action = RulesChecker(legalActClass=PreventReconnection).legal_action # i act a first time on powerline 15 @@ -287,7 +287,7 @@ def test_linereactionnable_throw_longerperiod(self): arr_line2 = np.full(self.helper_action.n_line, fill_value=0, dtype=np.int) arr_line2[id_line2] = -1 - self.env.max_timestep_line_status_deactivated = 2 + self.env._max_timestep_line_status_deactivated = 2 self.env.parameters.NB_TIMESTEP_LINE_STATUS_REMODIF = 2 self.helper_action.legal_action = RulesChecker(legalActClass=PreventReconnection).legal_action @@ -325,7 +325,7 @@ def test_toporeactionnable_throw(self): arr_line2 = np.full(self.helper_action.n_line, fill_value=0, dtype=np.int) arr_line2[id_line2] = -1 - self.env.max_timestep_topology_deactivated = 1 + self.env._max_timestep_topology_deactivated = 1 self.helper_action.legal_action = RulesChecker(legalActClass=PreventReconnection).legal_action # i act a first time on powerline 15 @@ -356,7 +356,7 @@ def test_toporeactionnable_nothrow(self): arr_line2 = np.full(self.helper_action.n_line, fill_value=0, dtype=np.int) arr_line2[id_line2] = -1 - self.env.max_timestep_topology_deactivated = 1 + self.env._max_timestep_topology_deactivated = 1 self.helper_action.legal_action = RulesChecker(legalActClass=PreventReconnection).legal_action # i act a first time on powerline 15 @@ -386,7 +386,7 @@ def test_toporeactionnable_throw_longerperiod(self): arr_line2 = np.full(self.helper_action.n_line, fill_value=0, dtype=np.int) arr_line2[id_line2] = -1 - self.env.max_timestep_topology_deactivated = 2 + self.env._max_timestep_topology_deactivated = 2 self.helper_action.legal_action = RulesChecker(legalActClass=PreventReconnection).legal_action # i act a first time on powerline 15 diff --git a/grid2op/tests/test_Runner.py b/grid2op/tests/test_Runner.py index f5e6495ec..e9b98594d 100644 --- a/grid2op/tests/test_Runner.py +++ b/grid2op/tests/test_Runner.py @@ -165,7 +165,7 @@ def seed(self, seed): res = runner.run(nb_episode=3, max_iter=self.max_iter, env_seeds=[1, 2, 3], agent_seeds=[5, 6, 7]) assert np.all(my_agent.seeds == [5, 6, 7]) - # test that is no seeds are set, then the "seed" funciton of the agent is not called. + # test that is no seeds are set, then the "seed" function of the agent is not called. my_agent.seeds = [] res = runner.run(nb_episode=3, max_iter=self.max_iter, env_seeds=[1, 2, 3]) assert my_agent.seeds == [] diff --git a/grid2op/tests/test_issue_126.py b/grid2op/tests/test_issue_126.py new file mode 100644 index 000000000..3dae6f2a1 --- /dev/null +++ b/grid2op/tests/test_issue_126.py @@ -0,0 +1,55 @@ +# Copyright (c) 2019-2020, RTE (https://www.rte-france.com) +# See AUTHORS.txt +# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. +# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, +# you can obtain one at http://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. + +import unittest +import warnings +import grid2op +from grid2op.Agent import DeltaRedispatchRandomAgent +from grid2op.Runner import Runner +from grid2op import make +from grid2op.Episode import EpisodeData +import os +import numpy as np +import tempfile +import pdb + + +class Issue126Tester(unittest.TestCase): + def test_issue_126(self): + # run redispatch agent on one scenario for 100 timesteps + dataset = "rte_case14_realistic" + nb_episode = 1 + nb_timesteps = 100 + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env = make(dataset, test=True) + + agent = DeltaRedispatchRandomAgent(env.action_space) + runner = Runner(**env.get_params_for_runner(), + agentClass=None, + agentInstance=agent) + + with tempfile.TemporaryDirectory() as tmpdirname: + res = runner.run(nb_episode=nb_episode, + path_save=tmpdirname, + nb_process=1, + max_iter=nb_timesteps, + env_seeds=[0], + agent_seeds=[0], + pbar=False) + episode_data = EpisodeData.from_disk(tmpdirname, res[0][1]) + + assert len(episode_data.actions.objects) - nb_timesteps == 0, "wrong number of actions" + assert len(episode_data.actions) - nb_timesteps == 0, "wrong number of actions" + assert len(episode_data.observations.objects) - (nb_timesteps + 1) == 0, "wrong number of observations" + assert len(episode_data.observations) - (nb_timesteps + 1) == 0, "wrong number of observations" + + +if __name__ == "__main__": + unittest.main() diff --git a/grid2op/tests/issue_131.py b/grid2op/tests/test_issue_131.py similarity index 61% rename from grid2op/tests/issue_131.py rename to grid2op/tests/test_issue_131.py index d50b84d15..06bbc2a92 100755 --- a/grid2op/tests/issue_131.py +++ b/grid2op/tests/test_issue_131.py @@ -1,14 +1,25 @@ +# Copyright (c) 2019-2020, RTE (https://www.rte-france.com) +# See AUTHORS.txt +# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. +# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, +# you can obtain one at http://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. + #!/usr/bin/env python3 import grid2op import unittest import numpy as np +import warnings class Issue131Tester(unittest.TestCase): def test_issue_131(self): - env = grid2op.make("rte_case14_realistic") + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env = grid2op.make("rte_case14_realistic") # Get forecast after a simulate works obs = env.reset() diff --git a/grid2op/tests/issue_140.py b/grid2op/tests/test_issue_140.py similarity index 70% rename from grid2op/tests/issue_140.py rename to grid2op/tests/test_issue_140.py index 3c92f18d8..07d189cd1 100644 --- a/grid2op/tests/issue_140.py +++ b/grid2op/tests/test_issue_140.py @@ -1,3 +1,11 @@ +# Copyright (c) 2019-2020, RTE (https://www.rte-france.com) +# See AUTHORS.txt +# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. +# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, +# you can obtain one at http://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. + import grid2op import numpy as np #!/usr/bin/env python3 @@ -6,6 +14,7 @@ import unittest import numpy as np import warnings +from grid2op.Parameters import Parameters import pdb @@ -19,9 +28,25 @@ def test_issue_140(self): # the environment is not downloaded, I skip this test self.skipTest("{} is not downloaded".format(env_name)) + param = Parameters() + # test was originally designed with these values + param.init_from_dict({ + "NO_OVERFLOW_DISCONNECTION": False, + "NB_TIMESTEP_OVERFLOW_ALLOWED": 3, + "NB_TIMESTEP_COOLDOWN_SUB": 3, + "NB_TIMESTEP_COOLDOWN_LINE": 3, + "HARD_OVERFLOW_THRESHOLD": 200., + "NB_TIMESTEP_RECONNECTION": 12, + "IGNORE_MIN_UP_DOWN_TIME": True, + "ALLOW_DISPATCH_GEN_SWITCH_OFF": True, + "ENV_DC": False, + "FORECAST_DC": False, + "MAX_SUB_CHANGED": 1, + "MAX_LINE_STATUS_CHANGED": 1 + }) with warnings.catch_warnings(): warnings.filterwarnings("ignore") - env = grid2op.make(env_name) + env = grid2op.make(env_name, param=param) ts_per_chronics = 2016 seed = 725 @@ -61,6 +86,7 @@ def test_issue_140(self): "loads_id": [(27, 1)]}}) obs, reward, done, info = env.step(act0) obs, reward, done, info = env.step(act1) + assert not done simulate_obs0, simulate_reward0, simulate_done0, simulate_info0 = obs.simulate(do_nothing) simulate_obs1, simulate_reward1, simulate_done1, simulate_info1 = obs.simulate(act2) diff --git a/grid2op/tests/test_issue_146.py b/grid2op/tests/test_issue_146.py new file mode 100644 index 000000000..de4642517 --- /dev/null +++ b/grid2op/tests/test_issue_146.py @@ -0,0 +1,53 @@ +# Copyright (c) 2019-2020, RTE (https://www.rte-france.com) +# See AUTHORS.txt +# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. +# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, +# you can obtain one at http://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. + +import grid2op +import numpy as np +#!/usr/bin/env python3 + +import grid2op +import unittest +from grid2op.Reward import BaseReward +import warnings +from grid2op.dtypes import dt_float + +import pdb + + +class TestReward(BaseReward): + def __init__(self): + super().__init__() + self.reward_min = dt_float(100.0) # Note difference from below + self.reward_max = dt_float(0.0) + + def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): + if has_error: + return dt_float(-10.0) + else: + return dt_float(1.0) + + +class Issue146Tester(unittest.TestCase): + def test_issue_146(self): + """ + the reward helper skipped the call to the reward when "has_error" was True + This was not really an issue... but rather a enhancement, but still + """ + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env = grid2op.make("rte_case14_realistic", test=True, reward_class=TestReward) + + action = env.action_space({"set_bus": {"substations_id": [(1, [2, 2, 1, 1, 2, -1])]}}) + obs, reward, done, info = env.step(action) + assert done + assert reward == dt_float(-10.0), "reward should be -10.0 and not \"reward_min\" (ie 100.)" + + +if __name__ == "__main__": + unittest.main() diff --git a/grid2op/tests/test_issue_147.py b/grid2op/tests/test_issue_147.py new file mode 100644 index 000000000..b308d83ef --- /dev/null +++ b/grid2op/tests/test_issue_147.py @@ -0,0 +1,55 @@ +# Copyright (c) 2019-2020, RTE (https://www.rte-france.com) +# See AUTHORS.txt +# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. +# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, +# you can obtain one at http://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. + +import grid2op +import numpy as np +#!/usr/bin/env python3 + +import grid2op +import unittest +from grid2op.Parameters import Parameters +import warnings +import pdb + + +class Issue147Tester(unittest.TestCase): + def test_issue_147(self): + """ + The rule "Prevent Reconnection" was not properly applied, this was because the + observation of the _ObsEnv was not properly updated. + """ + + param = Parameters() + param.NO_OVERFLOW_DISCONNECTION = True + param.NB_TIMESTEP_COOLDOWN_SUB = 3 + param.NB_TIMESTEP_COOLDOWN_LINE = 3 + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env = grid2op.make("rte_case14_realistic", test=True, param=param) + + action = env.action_space({"set_bus": {"substations_id": [(1, [2, 2, 1, 1, 2, 2])]}}) + + obs, reward, done, info = env.step(env.action_space({"set_line_status": [(0, -1)]})) + env.step(env.action_space()) + sim_o, sim_r, sim_d, info = obs.simulate(env.action_space()) + env.step(env.action_space()) + sim_o, sim_r, sim_d, info = obs.simulate(env.action_space()) + env.step(env.action_space()) + sim_o, sim_r, sim_d, info = obs.simulate(env.action_space()) + obs, reward, done, info = env.step(env.action_space({"set_line_status": [(0, 1)]})) + assert obs.time_before_cooldown_line[0] == 3 + sim_o, sim_r, sim_d, sim_info = obs.simulate(action) + assert not sim_d + assert not sim_info["is_illegal"] # this was declared as "illegal" due to an issue with updating + # the line status in the observation of the _ObsEnv + obs, reward, done, info = obs.simulate(action) + assert not info["is_illegal"] + + +if __name__ == "__main__": + unittest.main() diff --git a/grid2op/tests/test_issue_148.py b/grid2op/tests/test_issue_148.py new file mode 100644 index 000000000..a6afcbb79 --- /dev/null +++ b/grid2op/tests/test_issue_148.py @@ -0,0 +1,80 @@ +# Copyright (c) 2019-2020, RTE (https://www.rte-france.com) +# See AUTHORS.txt +# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. +# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, +# you can obtain one at http://mozilla.org/MPL/2.0/. +# SPDX-License-Identifier: MPL-2.0 +# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems. + +import grid2op +import numpy as np +import os +from grid2op.tests.helper_path_test import PATH_CHRONICS + +import grid2op +import unittest +from grid2op.Parameters import Parameters +import warnings +import pdb + + +class Issue148Tester(unittest.TestCase): + def test_issue_148(self): + """ + The rule "Prevent Reconnection" was not properly applied, this was because the + observation of the _ObsEnv was not properly updated. + """ + + param = Parameters() + param.NO_OVERFLOW_DISCONNECTION = True + param.NB_TIMESTEP_COOLDOWN_SUB = 3 + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + env = grid2op.make(os.path.join(PATH_CHRONICS, "env_14_test_maintenance"), + param=param) + + ID_MAINT = 11 # in maintenance at the second time step + obs = env.reset() + # check i can "simulate" properly if a maintenance happens next + sim_o, sim_r, sim_d, sim_i = obs.simulate(env.action_space()) + assert not sim_d + assert sim_o.time_next_maintenance[ID_MAINT] == 0 # the stuff have been properly updated + assert not sim_o.line_status[ID_MAINT] + oo_, rr_, dd_, ii_ = env.step(env.action_space()) + assert not dd_ + assert oo_.time_next_maintenance[ID_MAINT] == 0 + assert not oo_.line_status[ID_MAINT] + + # check once the maintenance is performed, it stays this way + sim_o, sim_r, sim_d, sim_i = oo_.simulate(env.action_space()) + assert not sim_d + assert sim_o.time_next_maintenance[ID_MAINT] == 0 # the stuff have been properly updated + assert not sim_o.line_status[ID_MAINT] + oo_, rr_, dd_, ii_ = env.step(env.action_space()) + assert not dd_ + assert oo_.time_next_maintenance[ID_MAINT] == 0 + assert not oo_.line_status[ID_MAINT] + + # now test the cooldown + action = env.action_space({"set_bus": {"substations_id": [(1, [1, 1, 1, 1, 1, 1])]}}) + oo_, rr_, dd_, ii_ = env.step(action) + assert oo_.time_before_cooldown_sub[1] == 3 + oo_, rr_, dd_, ii_ = env.step(env.action_space()) + oo_, rr_, dd_, ii_ = env.step(action) + assert oo_.time_before_cooldown_sub[1] == 1 + assert ii_["is_illegal"] + + oo_, rr_, dd_, ii_ = oo_.simulate(action) + assert not dd_ + assert oo_.time_before_cooldown_sub[ID_MAINT] == 0 + assert not ii_["is_illegal"] + + oo_, rr_, dd_, ii_ = env.step(env.action_space()) + assert not dd_ + assert oo_.time_before_cooldown_sub[ID_MAINT] == 0 + + + + +if __name__ == "__main__": + unittest.main()