diff --git a/README.md b/README.md index f8ee28c..40911f3 100644 --- a/README.md +++ b/README.md @@ -10,45 +10,72 @@ The environment is described in [this paper](https://www.researchgate.net/public - `/examples` contains prototype code for the interaction of RL algorithms with an emulator building model from BOPTEST. - `/testing` contains code for unit testing of this software. -## Quick-Start +## Quick-Start (using BOPTEST-Service) +BOPTEST-Service allows to directly access BOPTEST test cases in the cloud, without the need to run it locally. Interacting with BOPTEST-Service requires less configuration effort but is considerably slower because of the communication overhead between the agent and the test case running in the cloud. Use this approach when you want to quickly check out the functionality of this repository. + 1) Create a conda environment from the `environment.yml` file provided (instructions [here](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file)). -2) Run a BOPTEST case with the building emulator model to be controlled (instructions [here](https://github.com/ibpsa/project1-boptest/blob/master/README.md)). -3) Develop and test your own RL algorithms. See example below using the [Bestest hydronic case with a heat-pump](https://github.com/ibpsa/project1-boptest/tree/master/testcases/bestest_hydronic_heat_pump) and the [A2C algorithm](https://stable-baselines.readthedocs.io/en/master/modules/a2c.html) from Stable-Baselines: +2) Check out the `boptest-gym-service` branch and run the example below that uses the [Bestest hydronic case with a heat-pump](https://github.com/ibpsa/project1-boptest/tree/master/testcases/bestest_hydronic_heat_pump) and the [DQN algorithm](https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html) from Stable-Baselines: ```python -from boptestGymEnv import BoptestGymEnv, NormalizedActionWrapper, NormalizedObservationWrapper -from stable_baselines3 import A2C -from examples.test_and_plot import test_agent - -# BOPTEST case address -url = 'http://127.0.0.1:5000' - -# Instantite environment -env = BoptestGymEnv(url = url, - actions = ['oveHeaPumY_u'], - observations = {'reaTZon_y':(280.,310.)}, - random_start_time = True, - max_episode_length = 24*3600, - warmup_period = 24*3600, - step_period = 900) - -# Add wrappers to normalize state and action spaces (Optional) +from boptestGymEnv import BoptestGymEnv, NormalizedObservationWrapper, DiscretizedActionWrapper +from stable_baselines3 import DQN + +# url for the BOPTEST service. +url = 'https://api.boptest.net' + +# Decide the state-action space of your test case +env = BoptestGymEnv( + url = url, + testcase = 'bestest_hydronic_heat_pump', + actions = ['oveHeaPumY_u'], + observations = {'time':(0,604800), + 'reaTZon_y':(280.,310.), + 'TDryBul':(265,303), + 'HDirNor':(0,862), + 'InternalGainsRad[1]':(0,219), + 'PriceElectricPowerHighlyDynamic':(-0.4,0.4), + 'LowerSetp[1]':(280.,310.), + 'UpperSetp[1]':(280.,310.)}, + predictive_period = 24*3600, + regressive_period = 6*3600, + random_start_time = True, + max_episode_length = 24*3600, + warmup_period = 24*3600, + step_period = 3600) + +# Normalize observations and discretize action space env = NormalizedObservationWrapper(env) -env = NormalizedActionWrapper(env) +env = DiscretizedActionWrapper(env,n_bins_act=10) + +# Instantiate an RL agent +model = DQN('MlpPolicy', env, verbose=1, gamma=0.99, + learning_rate=5e-4, batch_size=24, + buffer_size=365*24, learning_starts=24, train_freq=1) -# Instantiate and train an RL algorithm -model = A2C('MlpPolicy', env) -model.learn(total_timesteps=int(1e5)) +# Main training loop +model.learn(total_timesteps=10) -# Test trained agent -observations, actions, rewards, kpis = test_agent(env, model, - start_time=0, - episode_length=14*24*3600, - warmup_period=24*3600, - plot=True) +# Loop for one episode of experience (one day) +done = False +obs, _ = env.reset() +while not done: + action, _ = model.predict(obs, deterministic=True) + obs,reward,terminated,truncated,info = env.step(action) + done = (terminated or truncated) + +# Obtain KPIs for evaluation +env.get_kpis() ``` +## Quick-Start (running BOPTEST locally) +Running BOPTEST locally is substantially faster + +1) Create a conda environment from the `environment.yml` file provided (instructions [here](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file)). +2) Run a BOPTEST case with the building emulator model to be controlled (instructions [here](https://github.com/ibpsa/project1-boptest/blob/master/README.md)). +3) Check out the `master` branch of this repository and run the example above replacing the url to be `url = 'http://127.0.0.1:5000'` and avoiding the `testcase` argument to the `BoptestGymEnv` class. + + ## Citing the project Please use the following reference if you used this repository for your research. diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..d312e28 --- /dev/null +++ b/__init__.py @@ -0,0 +1,5 @@ +''' +Created on Dec 20, 2020 + +@author: Javier Arroyo +''' diff --git a/docs/tutorials/CCAI Summer School 2022/Tutorial_2_Building_Control_with_RL_using_BOPTEST.ipynb b/docs/tutorials/CCAI Summer School 2022/Tutorial_2_Building_Control_with_RL_using_BOPTEST.ipynb deleted file mode 100644 index 48b60d4..0000000 --- a/docs/tutorials/CCAI Summer School 2022/Tutorial_2_Building_Control_with_RL_using_BOPTEST.ipynb +++ /dev/null @@ -1,3090 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "V1CcDG8FanTw" - }, - "source": [ - "#**Key Learning Objectives** 🎯" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oT2QjTu24zwV" - }, - "source": [ - "\n", - "This is an introductory, hands-on tutorial to guide you through the main concepts of Reinforcement Learning (RL) for controlling Heating, Ventilation and Air Conditioning (HVAC) systems for buildings. \n", - "We are going to apply RL to a building emulator from the Building Optimization Testing (BOPTEST) framework **[1]** using the BOPTEST-Gym interface **[2]**. \n", - "BOPTEST is a framework for performance benchmarking of control algorithms.\n", - "Further information and documentation can be found here: \n", - "\n", - "[https://ibpsa.github.io/project1-boptest/](https://ibpsa.github.io/project1-boptest/)\n", - "\n", - "You will learn:\n", - "\n", - "- What RL is, how it works and how it can be used in the application of building energy management. \n", - "- The most popular standard for representing general RL problems: OpenAI-Gym.\n", - "- The BOPTEST API and its Gym interface. \n", - "\n", - "📌 **Note**: This tutorial was prepared for use with BOPTEST v0.3.0. \n", - "and will make usage of a web-based version of BOPTEST (called \"BOPTEST-Service\") so as not to require installation of any BOPTEST software on a user's own device. It is also possible to use BOPTEST on a user's own (local) device. Both the web-based and local versions have the same functionality, and will produce the same results, with only small changes in the API (changing the BOPTEST-service url to your localhost url, that is, to: `http://127.0.0.1:5000/`). \n", - "\n", - "**EDIT**: This tutorial was originally developed with BOPTEST v0.2.0. and has been updated to version 0.3.0. on *Oct 25, 2022*. There are just small changes required for this update, basically retrieving the `'payload'` after each request. That is the origin of the differences between the notebook explained in the recording and this updated notebook. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TSTpxm2GrjhR" - }, - "source": [ - "# **Outline** ⏰\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VUbaQ5GqrvIl" - }, - "source": [ - "[Part 1: Background](#background)\n", - " 1. [Introduction to Reinforcement Learning](#introRL)\n", - " 1. [Application of Reinforcement Learning in buildings](#applicationRL)\n", - "\n", - "[Part 2: The OpenAI-Gym standard](#openAIGym)\n", - " 1. [What is OpenAI-Gym?](#whatIsOpenAIGym)\n", - " 1. [Example using an OpenAI-Gym environment](#exampleOpenAIGym)\n", - "\n", - "[Part 3: The Building Optimization Testing (BOPTEST) Framework](#boptest)\n", - " 1. [What is BOPTEST?](#whatIsBoptest)\n", - " 1. [Selecting a building test case](#selectBuilding)\n", - " 1. [Obtaining general information about the building](#obtainInfo)\n", - " 1. [Getting control input and measurement points](#gettingIOs)\n", - "\n", - "[Part 4: Implementing RL for a building with BOPTEST-Gym](#implementingRL)\n", - " 1. [What is BOPTEST-Gym?](#whatIsBoptestGym)\n", - " 1. [Starting up a BOPTEST-Gym environment](#startingUpBoptestGym)\n", - " 1. [Interacting with a BOPTEST-Gym environment](#interactingWithBoptestGym)\n", - " 1. [Developing a basic RL algorithm](#developingRlAlgo)\n", - " 1. [Testing our RL algorithm in BOPTEST-Gym](#testingRlAlgo)\n", - "\n", - "[Gearing up](#gearingUp)\n", - "\n", - "[Further resources](#furtherResources)\n", - "\n", - "[Feedback](#feedbackForm)\n", - "\n", - "[Annex I: Formal RL theory](#theoryRlFormal)\n", - "\n", - "[References](#references)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oEzP9ZW4MXPv" - }, - "source": [ - "#**Part 1: Background** 📖 " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Fas232CyMX6_" - }, - "source": [ - "##**Introduction to Reinforcement Learning** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TAy9fRjUTSdb" - }, - "source": [ - "Could you imagine a magic oracle able to decide on the best actions to optimize any process? Could you imagine this oracle not needing any prior information of the process but just learning from interacting with it? Powerful, right? Well, that is exactly what RL is meant for. \n", - "\n", - "Reinforcement Learning (RL) is one of the categories of machine learning, along with unsupervised learning and supervised learning. The main difference from the other categories is that RL learns from dynamic data, that is, data that are obtained while learning. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "e853vYumSx08" - }, - "source": [ - "\n", - "\n", - "*Figure: The categories of machine learning. Source: [Mathworks](https://www.mathworks.com/discovery/reinforcement-learning.html)*\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2_qdE6Ab4aE9" - }, - "source": [ - "In RL the goal is to learn the actions to be taken to achieve a predefined objective. RL relies on the principle of *repetitive experimentation*, that is, an approach where we roll out several **episodes of experience** where an agent 🤖 (the RL algorithm) interacts with its environment 🌎 (the process to be optimized) to learn based on a **reward** signal that is returned for every **action** taken from a specific **state** of the environment. \n", - "\n", - "Let's take the example of teaching a dog to grab a stick. In this case, the dog is the agent and all its surroundings conform the environment. Whenever the dog observes that there is a person throwing a stick it will perform an action. In case it grabs the stick and brings it back, the person will provide a cookie as a reward to encourage that behavior. In case the dog does not go for the stick but just runs around or goes chasing other dogs, the person will not provide the cookie. Eventually, the dog will associate the actions that bring the most rewards to specific observations and will be taking those actions accordingly. \n", - "\n", - "A more formal introduction to RL and its associated terminology can be found at the end of this tutorial, in [Annex I: Formal RL theory](#theoryRlFormal). " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mMppgppKX4Fy" - }, - "source": [ - "\n", - "\n", - "*Figure: RL notation when teaching a dog. Source: [Mathworks](https://www.mathworks.com/discovery/reinforcement-learning.html)*\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pNy7uRzo8foI" - }, - "source": [ - "⚠️ **Important note:** ⚠️ It is common to find in the RL literature that the same term indistinctly designates the\n", - "state and the observation. This is not strictly correct for partially observable environments (most of the cases) where the observation only conveys part of the information that defines the state. For example, the state of the Tic-Tac-Toe game can be fully observed because there is a finite number of possibilities that define the state of a game. On the contrary, the thermal state of a building is only partially oservable. We can observe the indoor air temperature, but we cannot measure all temperatures from walls, ground, furniture... which also influence the building's thermal state. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pNC0UnC2WYyE" - }, - "source": [ - "\n", - "\n", - "What is particularly extraordinary of RL algorithms is that the same algorithm can be successfully used for a variety of tasks, from [robotic motion control](https://www.technologyreview.com/2021/04/08/1022176/boston-dynamics-cassie-robot-walk-reinforcement-learning-ai/) to [defeat the human world champion in the game of Go](\n", - "https://www.youtube.com/watch?v=WXuK6gekU1Y&ab_channel=DeepMind).\n", - "The latter is an astonishing achievement. It is true that the IBM supercomputer Deep Blue could previously [defeat Garry Casparov in chess](https://en.wikipedia.org/wiki/Deep_Blue_versus_Garry_Kasparov), but Go is to chess what chess is to Tic-Tac-Toe ([*Chris Wiltz*](https://www.designnews.com/design-hardware-software/googles-ai-beat-go-champion-mimicking-human-intuition)). And what is more important, professionals of Go state that this game has so many possible combinations that mastering it requires certain intuition and creativity, qualities that have only been attributed to humans so far... if AlphaGo defeated the best human player of Go, could machines resemble these qualities? Well, that is more a philosophical question. This tutorial is limited to investigate whether machines can efficiently control buildings, which you will see is already an enormous challenge!\n", - "\n", - "\n", - "\n", - " " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XOcvaJpkNbho" - }, - "source": [ - "\n", - "\n", - "*Figure: Netflix documentary that explains how AlphaGo, a RL algorithm developed by [DeepMind](https://www.deepmind.com/), could defeat Lee Sedol (4-1) and Fan Hui (5-0), the human world champions in the game of Go.*\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "o1jhKBOeOciu" - }, - "source": [ - "📌 **QUICK FACTS:**\n", - "- RL is a **category of machine learning** algorithms, together with supervised and unsupervised learning. \n", - "- Contrarily to other machine learning techniques, RL learns from **dynamic data**, that is, data that are obtained from interactions with the environment. \n", - "- Particularly, it learns from **state-action-reward** samples, so there is no need of domain knowledge to model the environment.\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6G1nWECgbmuW" - }, - "source": [ - "##**Application of Reinforcement Learning in buildings** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xoTh8XvAM_OR" - }, - "source": [ - "During the last decade, there has been a clear interest growth in using optimal control for HVAC systems **[3]**. The figure below underlines this increased interest by showing the number of yearly peer-reviewed scientific publications related to optimal control in buildings. \n", - "RL algorithms have\n", - "gained particular popularity for their application in a **demand response** setting.\n", - "An extensive review for this application was written by  Vázquez-Canteli et al. **[4]** This review is\n", - "not limited to HVAC systems but also demand response for charging electric\n", - "vehicles or thermal energy storage." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7NywyXo6hD5n" - }, - "source": [ - "\n", - "\n", - "*Figure: Evolution of the number of scientific publications about optimal control in buildings during the last decades. Data obtained from the Clarivate Web of Science.*" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8KR-sTeJiJG2" - }, - "source": [ - "RL has already attracted the attention of the building control community for\n", - "many years. The figure below is obtained from the popular paper of Chen et al. **[5]** who graphically summarized the application of RL in buildings indicating the amount of data required by each research work to train the implemented RL algorithm. \n", - "\n", - "\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nCH0DBBbPEC7" - }, - "source": [ - "\n", - "\n", - "\n", - "*Figure: Summary of the data required in the history of RL applications to buildings. Chen et al.* **[5]** ." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ae18iXNKWV5I" - }, - "source": [ - "You can see from the figure that the feasibility and potential of applying RL for HVAC control\n", - "was first investigated by Liu and Henze back in 2006. Then, the interest was lost for a period, probably because Model Predictive Control (MPC) has been typically preferred for optimal control in buildings because it is much more data-efficient (it does not need as much data to be implemented). A comprehensive and complete review on the application of MPC for building energy management is provided by Drgona et al. **[6]**. \n", - "The reasons why RL is gaining momentum again are clear: \n", - "\n", - "- Evolution in deep learning\n", - "- We have much more data than before\n", - "- We have much more computational power than before\n", - "\n", - "In fact, there exist very recent developments for the application of RL in buildings, most of them using the OpenAI-Gym standard that is introduced in the next section. It is worth mentioning:\n", - "\n", - "- [CityLearn](https://github.com/intelligent-environments-lab/CityLearn) ➡️ Gym environment for providing demand response scenarios at an urban scale. That is, the goal of the RL agent is to flatten the energy demand of a district. It considers static\n", - "building heating and cooling load data and simplified models for energy storage. \n", - "- [Gym-Eplus](https://github.com/zhangzhizza/Gym-Eplus) ➡️ Gym environment wrapper around EnergyPlus simulation models. \n", - "- [Sinergym](https://github.com/ugr-sail/sinergym) ➡️ Extension of Gym-Eplus. \n", - "- [Energym](https://github.com/bsl546/energym) ➡️ Gym wrapper around building simulation models to assess controller performance. \n", - "- [Beobench](https://github.com/rdnfn/beobench) ➡️ A Toolkit for Unified Access to BuildingSimulations for Reinforcement Learning.\n", - "- 👉🏻[BOPTEST-Gym](https://github.com/ibpsa/project1-boptest-gym) ➡️ Gym environment for the BOPTEST Framework. The goal of the RL agent in this environment is to efficienty control an individual building. It allows testing against high-fidelity building models. \n", - "\n", - "The last of which is the focus of this tutorial. \n", - "\n", - "These RL frameworks for HVAC control bring hope\n", - "to the adoption of this technology in buildings. However, there is still a clear\n", - "need to different techniques and understand the best practices of RL \n", - "for this particular application. Let's investigate how!" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7YnuNAQdM_L2" - }, - "source": [ - "#**Part 2: The OpenAI-Gym standard** 🤖 " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Sv728rc3M_Ir" - }, - "source": [ - "##**What is OpenAI-Gym?** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AhQfyrBCUigq" - }, - "source": [ - "[OpenAI-Gym](https://www.gymlibrary.ml/) is a software package that gathers a **collection of tasks** called environments with a **unique Python interface** to control all of them. This unique interface is a key feature in the software package, and has given rise to a standard for the format in which RL agents are developed and treated, independently of\n", - "their internal functioning. The tasks defined in the Gym environments involve\n", - "a wide variety of fields like video games, classic control theory problems, or high dimensional robotic locomotive tasks. You can find a list of available environment [here](https://www.gymlibrary.ml/environments/classic_control/).\n", - "\n", - "\n", - "\n", - "The OpenAI-Gym philosophy heavily relies on the episodic aspect of RL, i.e.\n", - "the agent’s history is broken down into a series of experiences called **episodes** that may be of\n", - "variable length. The agent interacts with the environment until it reaches a\n", - "terminal state when the episode is finished. The goal is to maximize the total\n", - "cumulative reward per episode.\n", - "\n", - "The main methods of the OpenAI-Gym interface are the following:\n", - "\n", - "- `obs = env.reset()` ➡️ The `reset` method is the one called first to initialize the environment `env` (whatever it is). The environment returns the first observation `obs` (state). \n", - "- `next_obs,reward,done,info = env.step(action)` ➡️ The `step` method is used iteratively to interact with the environment. The RL agent computes an `action`, and the environment returns the next observation, associated reward, whether the episode is done (=terminated), and some other optional information. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ntNSOBzJPJuF" - }, - "source": [ - "##**Example using an OpenAI-Gym environment** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mQ2879zIPOJ0" - }, - "source": [ - "Now that we understand the main concepts of OpenAI-Gym we are going to illustrate its typical usage with a quick example. We're going to use the [CartPole environment](https://www.gymlibrary.ml/environments/classic_control/cart_pole/), which is one of the classic control problems available in the OpenAI-Gym framework.\n", - "Let's start by installing the dependencies that we require: \n", - "\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "E0sfte45O8iN", - "outputId": "de423891-b2c5-4901-f197-592ce81e3233" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: gym==0.21.0 in /usr/local/lib/python3.7/dist-packages (0.21.0)\n", - "Requirement already satisfied: numpy>=1.18.0 in /usr/local/lib/python3.7/dist-packages (from gym==0.21.0) (1.21.6)\n", - "Requirement already satisfied: importlib-metadata>=4.8.1 in /usr/local/lib/python3.7/dist-packages (from gym==0.21.0) (4.13.0)\n", - "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from gym==0.21.0) (1.5.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=4.8.1->gym==0.21.0) (3.9.0)\n", - "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=4.8.1->gym==0.21.0) (4.1.1)\n" - ] - } - ], - "source": [ - "!pip install gym==0.21.0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RnmDsSAnM_F-" - }, - "source": [ - "**Cartpole environment description:**\n", - "\n", - "\"*A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces in the left (-1) and right (+1) direction on the cart. A reward of +1 is provided for every timestep that the pole remains upright.*\"\n", - "\n", - "\n", - "You can also check out the physics of the environment in the [GitHub repository of OpenAI-Gym](https://github.com/openai/gym).\n", - "See below an example of the evolution of an episode of the Cartpole environment. Note that most of the Gym envronments can be rendered to show how the RL agent is performing. \n", - "\n", - "![Cartpole](https://cdn-images-1.medium.com/max/1143/1*h4WTQNVIsvMXJTCpXm_TAw.gif)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eMa2F3q_pV-C" - }, - "source": [ - "First, we are going to import `gym` and then `make` our Cartpole environment (version 1). Note how it is possible to instantiate the registered Gym environments by referring to their names with a string. \n", - "After that, we are going to interact with the environment for a maximum number of episodes of experience that we are going to indicate with `max_num_episodes`. In each episode, the environment is initialized with the `reset` method, and then we interact with the environment until the episode is `done`, which happens when the pole is down. \n", - "\n", - "It is important to note that in this example we are not implementing RL just yet. Instead, we are using the `sample` method from the action space of the environment to compute a random control action. This is useful when we want to quickly check how an environment behaves, but we should aim to replace that line by some intelligent RL agent able to compute a control action that optimizes the performance of the environment. " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "LBxXhZc5nGb3", - "outputId": "7ca9fb87-6573-4e69-ec2d-c9f5ac0025ad" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - " Episode #1 had 17 steps and total_reward=17.0\n", - "\n", - " Episode #2 had 10 steps and total_reward=10.0\n", - "\n", - " Episode #3 had 19 steps and total_reward=19.0\n", - "\n", - " Episode #4 had 49 steps and total_reward=49.0\n", - "\n", - " Episode #5 had 15 steps and total_reward=15.0\n", - "\n", - " Episode #6 had 38 steps and total_reward=38.0\n", - "\n", - " Episode #7 had 28 steps and total_reward=28.0\n", - "\n", - " Episode #8 had 10 steps and total_reward=10.0\n", - "\n", - " Episode #9 had 38 steps and total_reward=38.0\n", - "\n", - " Episode #10 had 25 steps and total_reward=25.0\n" - ] - } - ], - "source": [ - "import gym\n", - "\n", - "env = gym.make('CartPole-v1')\n", - "max_num_episodes = 10 # maximum number of episodes\n", - "\n", - "for episode in range(max_num_episodes):\n", - " done = False\n", - " obs = env.reset()\n", - " total_reward = 0.0\n", - " step = 0\n", - " while not done:\n", - " action = env.action_space.sample() # Compute random action. This is to be replaced by a RL algo\n", - " obs,reward,done,info = env.step(action) # send the action to the environment\n", - " total_reward += reward\n", - " step += 1\n", - "\n", - " print('\\n Episode #{} had {} steps and total_reward={}'.format(episode+1,step,total_reward))\n", - "\n", - "env.close()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ai9dHGWksZeu" - }, - "source": [ - "Notice how every episode lasts for a different number of steps because we are applying random forces to the cart. Also, notice how the total reward of each episode is equal to the number of steps, because the Cartpole environment gives a reward of +1 every timestep that we get to maintain the pole upright.\n", - "\n", - "\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tpeeOA8BM-5L" - }, - "source": [ - "#**Part 3: The Building Optimization Testing (BOPTEST) framework** 🏠 " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "z0ry1NQwuMXa" - }, - "source": [ - "Now that we understand how RL and OpenAI-Gym work, let's use that knowledge for the particular application of buildings. \n", - "In this tutorial we are going to connect with a BOPTEST building emulator that we will use as our environment to control through RL.\n", - "This emulator is a simulation model that was configured based on detailed physics and that has been peer-reviewed to ensure that it represents the behavior of an actual building as realistically as possible. Hence, although it is a simulation model, we are going to consider this emulator as the real building for control, testing and benchmarking. \n", - "\n", - "In this section we explain what BOPTEST is and how it can be generally used. Next section will exclusively focus on BOPTEST-Gym, the OpenAI-Gym interface of BOPTEST, to learn how we can use it to implement and assess RL algorithms for building control. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OcPk7llkJP4m" - }, - "source": [ - "##**What is BOPTEST?** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HtDrzTFuJU0e" - }, - "source": [ - "BOPTEST is a software framework enables the performance evaluation and benchmarking of advanced control algorithms for building HVAC control through simulations. The software is developed and is available on the BOPTEST GitHub respository at [https://github.com/ibpsa/project1-boptest](https://github.com/ibpsa/project1-boptest) \n", - "\n", - "and general information about BOPTEST can be found through the following link:\n", - "\n", - "[https://ibpsa.github.io/project1-boptest/](https://ibpsa.github.io/project1-boptest/)\n", - "\n", - "In the link below you can also find information about the overarching project that gave birth to BOPTEST, IBPSA Project 1:\n", - "\n", - "[https://ibpsa.github.io/project1/](https://ibpsa.github.io/project1/)\n", - "\n", - "\n", - "\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sNiHr2w0IFYI" - }, - "source": [ - "\n", - "\n", - "*Figure: The BOPTEST concept.*\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Jj7kbbF8JXEG" - }, - "source": [ - "The main use case of the BOPTEST framework is the one where a control developer wants to evaluate the performance of his/her building control strategy. Testing in a real building may be very expensive, or just not possible. BOPTEST offers a menu of emulator building models so that the control developer can select one of them, interact in co-simulation, and eventually assess the performance of his/her controller with a set of Key Performance Indicators (KPIs) that are calculated by the BOPTEST framework. \n", - "\n", - "Note that using a standardized set of building emulators, testing scenarios, and KPIs enables benchmarking, allows to compare across different controllers, and throws light on what are the best practices for building control. In addition, making these emulators easily and rapidly available to use allows for control developers without expertise in building modeling to utilize them for controls testing and evaluation. \n", - "\n", - "In this section we are going to explain the basic BOPTEST functionality to connect to a building test case and obtain available control inputs and measurement points. For a more complete description on how to use BOPTEST please visit this [BOPTEST Colab tutorial](https://github.com/ibpsa/project1-boptest/blob/master/docs/workshops/BS21Workshop_20210831/Introduction_to_the_BOPTEST_framework.ipynb). " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l5Fgw7eJHEjy" - }, - "source": [ - "##**Selecting a building test case** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "owb2Z2rqHEjz" - }, - "source": [ - "BOPTEST test cases are developed as [Functional Mock-up Units (FMU's)](https://fmi-standard.org/) and deployed within a containerized environment using the [Docker](https://www.docker.com/) software with:\n", - "\n", - "* A detailed emulator **building model**.\n", - "* Yearly **boundary condition data** for weather, schedules, pricing, and emission factors. These data are representative of the building location. \n", - "* An **API** that allows for, among other things, initializing a simulation or testing scenario, advancing a simulation with a control input, receiving forecast data, receiving emulator data, and receiving computed KPIs. The full API is described [here](https://github.com/ibpsa/project1-boptest/tree/boptest-service#test-case-restful-api).\n", - "\n", - "The basic workflow to test a controller is:\n", - "\n", - "1. Select a **test case** from the menu of those available. \n", - "2. Select one of the **testing scenarios** defined for the given test case. Testing scenarios are standardized for each emulator. \n", - "3. Set **parameters** for the interaction with your test controller, such as the control step or forecast horizon, if required. \n", - "4. Run the test case scenario in a **co-simulation** loop with your controller. \n", - "5. Request the KPIs and **evaluate** your controller's performance. \n", - "\n", - "We start by selecting and launching a BOPTEST building test case from the [repository of currently available test cases](https://ibpsa.github.io/project1-boptest/testcases/index.html). In this example, we are going to work with the test case called `bestest_hydronic_heat_pump`, which is a single-zone residential building with radiant floor heating and a heat pump. This is a high-fidelity, yet, relatively simple test case that allows us to focus on fundamental aspects. You may want to note the other test cases available in the repository as well as the fact that there are more under development. \n", - "\n", - "We can launch our chosen test case as follows. First, import the Python `requests` library so that we can make HTTP requests to the BOPTEST API at the address indicated by the `url`. Then, use the `POST /testcases//select` BOPTEST API endpoint to launch the test case and receive a corresponding `testid`. While the `url` is the common gateway for everyone to access the BOPTEST web-service, the `testid` is a unique identifier for you to address the test case that you have selected and launched. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "V_qU6ukZghTb" - }, - "outputs": [], - "source": [ - "import requests\n", - "\n", - "# url for the BOPTEST service\n", - "url = 'https://api.boptest.net' \n", - "\n", - "# Select test case and get identifier\n", - "testcase = 'bestest_hydronic_heat_pump'\n", - "\n", - "# Check if already started a test case and stop it if so before starting another\n", - "try:\n", - " requests.put('{0}/stop/{1}'.format(url, testid))\n", - "except:\n", - " pass\n", - "\n", - "# Select and start a new test case\n", - "testid = \\\n", - "requests.post('{0}/testcases/{1}/select'.format(url,testcase)).json()['testid']\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eRZGKWDlHEj2" - }, - "source": [ - "Please do not get distracted by the `try-except` statement. We are using that one to stop already created test cases if we are revisiting this cell. This prevents from having several dangling test cases that can overwhelm our server. \n", - "\n", - "Once you have successfully obtained the `testid`, it is possible to start interacting with your selected test case using the rest of the BOPTEST API. You will need this `testid` for all further interactions with this test case. For example, use the `GET /name` BOPTEST API endpoint, along with your `testid`, to request the name of your test case and check that it matches the one we want." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "8mdK5JtNI-e_", - "outputId": "d2d023e0-eb35-4122-cbd6-11420f694521" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{'name': 'bestest_hydronic_heat_pump'}\n" - ] - } - ], - "source": [ - "# Get test case name\n", - "name = requests.get('{0}/name/{1}'.format(url, testid)).json()['payload']\n", - "print(name)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gUOQXYjlHEj3" - }, - "source": [ - "With our unique `testid` in-hand and having some practice using the BOPTEST API, we are ready to move on to start using our building emulator. For this tutorial, we are going to explain only how to obtain information about the building using the BOPTEST API before moving to learn BOPTEST-Gym. \n", - "Note that the test case will timeout after 15 minutes of no requests. If the test case times out, you can simply select and start a new one by repeating the steps described above.\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jmglIZGFHEj3" - }, - "source": [ - "##**Obtaining general information about the building** \n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mJ6leLGvRJya" - }, - "source": [ - "The first thing we want to do is learn about the building and system that we want to control. All building information can be found under documentation provided for each specific test case on the [Test Cases tab](https://ibpsa.github.io/project1-boptest/testcases/index.html) of the BOPTEST website. \n", - "\n", - "The building information includes a description of the building envelope, the HVAC system design, the functioning of the baseline controller, available control inputs and measurement outputs, and available testing scenarios. Understanding how the system works is an important practice for control design, so take as much time as needed to understand the equipment, the points that can be measured, and the points that can be overwritten by your controller. \n", - "We briefly summarize the `bestest_hydronic_heat_pump` case here for completeness, but it is strongly recommended to have a deeper look into the [documentation](https://ibpsa.github.io/project1-boptest/testcases/ibpsa/testcases_ibpsa_bestest_hydronic_heat_pump/). \n", - "\n", - "The building represents a residential dwelling of 192 $m^2$ for a family of 5 members. \n", - "An air-to-water modulating heat pump of 15 $kW$ nominal heating capacity extracts energy from the ambient air to heat up the floor heating emission system, as shown in the figure below. \n", - "An evaporator fan blows ambient air through the heat pump evaporator when the heat pump is operating. \n", - "The floor heating system injects heat into the floor using water as the working fluid." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SMQcNDl1HEj4" - }, - "source": [ - "\n", - "\n", - "\n", - "*Figure: Schematic of HVAC system and control for the `bestest_hydronic_heat_pump` test case.*" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lNLRXp2eHEj4" - }, - "source": [ - "A baseline controller is embedded in every test case emulator that is meant to be representative of a typical controller for that type of building. The baseline controller includes local loop control such that supervisory set points may be the focus of a test controller, although many of those local loop control signals are also available for overwriting if a user chooses. The baseline controller can also be considered an initial benchmark for control performance. \n", - "\n", - "In our selected test case, the baseline controller consists of a PI controller with the zone operative temperature as the controlled variable and the heat pump modulation signal for compressor frequency as the control variable, as depicted as C1 in the figure above and shown in the figure below. \n", - "The control variable is limited between 0 and 1, and it is computed to drive the zone operative temperature towards its set point, which is defined as a function of the occupancy schedule. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RqVtoDgTHEj4" - }, - "source": [ - "\n", - "\n", - "\n", - "*Figure: Primary PI controller C1.*\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ryOicW_KHEj5" - }, - "source": [ - "All other equipment (fan for the heat pump evaporator circuit and floor heating emission system pump) are switched on when the heat pump is working (modulating signal higher than 0) and switched off otherwise. This is depicted in the figure of the HVAC schematic as controller C2." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-J5j60bRHEj5" - }, - "source": [ - "##**Getting control input and measurement points** \n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AGG7G6VeR4WB" - }, - "source": [ - "While control input and measurement points are described in the documentation, they are also available to retreive from the BOPTEST API. This is especially useful to store for later when requesting data for a specific point.\n", - "\n", - "Retrieve the control input and measurement outputs using the `GET /inputs` and `GET /measurements` BOPTEST API endpoints." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0IKRxBykJY6u", - "outputId": "cdf6b6dc-0bf4-426a-c0c5-a55f0951f599" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "TEST CASE INPUTS ---------------------------------------------\n", - "dict_keys(['oveTSet_activate', 'ovePum_activate', 'ovePum_u', 'oveHeaPumY_u', 'oveTSet_u', 'oveHeaPumY_activate', 'oveFan_activate', 'oveFan_u'])\n", - "TEST CASE MEASUREMENTS ---------------------------------------\n", - "dict_keys(['weaSta_reaWeaPAtm_y', 'reaPFan_y', 'reaQHeaPumCon_y', 'reaTRet_y', 'weaSta_reaWeaNOpa_y', 'weaSta_reaWeaTBlaSky_y', 'reaQHeaPumEva_y', 'weaSta_reaWeaNTot_y', 'weaSta_reaWeaSolAlt_y', 'reaTZon_y', 'weaSta_reaWeaHHorIR_y', 'weaSta_reaWeaLon_y', 'weaSta_reaWeaSolTim_y', 'weaSta_reaWeaCloTim_y', 'reaPPumEmi_y', 'weaSta_reaWeaHGloHor_y', 'weaSta_reaWeaHDifHor_y', 'weaSta_reaWeaRelHum_y', 'reaTSetHea_y', 'reaCO2RooAir_y', 'weaSta_reaWeaSolDec_y', 'reaPHeaPum_y', 'weaSta_reaWeaHDirNor_y', 'reaTSetCoo_y', 'weaSta_reaWeaWinDir_y', 'reaTSup_y', 'weaSta_reaWeaSolZen_y', 'reaQFloHea_y', 'reaCOP_y', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTWetBul_y', 'weaSta_reaWeaTDewPoi_y', 'weaSta_reaWeaWinSpe_y', 'weaSta_reaWeaLat_y', 'weaSta_reaWeaCeiHei_y', 'weaSta_reaWeaSolHouAng_y'])\n" - ] - } - ], - "source": [ - "# Get inputs available\n", - "inputs = requests.get('{0}/inputs/{1}'.format(url, testid)).json()['payload']\n", - "print('TEST CASE INPUTS ---------------------------------------------')\n", - "print(inputs.keys())\n", - "# Get measurements available\n", - "print('TEST CASE MEASUREMENTS ---------------------------------------')\n", - "measurements = requests.get('{0}/measurements/{1}'.format(url, testid)).json()['payload']\n", - "print(measurements.keys())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "A4L6Gw6YJU5L" - }, - "source": [ - "The naming convention is such that the extension `_y` indicates a measurement point, `_u` indicates the value of an input which can be overwritten by a test controller, and `_activate` indicates the enabling (with value 0 or 1) of a test controller to overwrite the corresponding input value. \n", - "Hence, `_u` is enabled for overwriting by the test controller when `_activate=1`.\n", - "`weaSta_` indicates a measurement for a weather point, so that historical weather data can be easily retrieved.\n", - "\n", - "Notice that the jsons returned from the `GET /inputs` and `GET /measurements` BOPTEST API endpoints also include a description and unit of each variable, as well as the minimum and maximum value for inputs variables:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "U7guJ_I10QOF" - }, - "source": [ - "Now let's stop the test case since we are not going to use it for a while. We do this to not overwhelm the server." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "v5_1Q_H80Z5k", - "outputId": "c2e714bb-ef91-4769-c8c4-76465733548d" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 6 - } - ], - "source": [ - "requests.put('{0}/stop/{1}'.format(url, testid))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UHYHM9MjSz_C" - }, - "source": [ - "# **Part 4: Implementing RL for a building with BOPTEST-Gym** 🤖 🏠 " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BEC76h9HT7gL" - }, - "source": [ - "##**What is BOPTEST-Gym?** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "z7A9k7GFUBsK" - }, - "source": [ - "BOPTEST-Gym is the OpenAI-Gym interface of BOPTEST that helps to train RL agents for the application of building climate control.\n", - "The BOPTEST-Gym interface accomodates the BOPTEST API to have BOPTEST building emulators as environments that follow the OpenAI-Gym standard. \n", - "Therefore, the BOPTEST-Gym interface facilitates the development of RL agents as it allows interacting with the BOPTEST building emulators with a standard that is very well known by the machine learning community. Or even better, it allows us to directly use existing RL agents that have been developed following this standard, like those from the [Stable Baselines 3](https://stable-baselines3.readthedocs.io/en/master/) repository.\n", - "\n", - "You can find more information about BOPTEST-Gym in [this paper](https://publications.ibpsa.org/conference/paper/?id=bs2021_30380), but here we summarize the main points you should know:\n", - "- BOPTEST-Gym enables the interaction of RL agents with a set of physics-based and highly **detailed building models** to assess RL for the application of building climate control. \n", - "- All **hyperparameters** of the environment are initialized when the environment is instantiated. A particularly relevant hyperparameter is `testcase`, a string specifying the BOPTEST emulator of choice. This string selects the building model from the [menu of BOPTEST building emulators](https://ibpsa.github.io/project1-boptest/testcases/index.html). \n", - "- The **state** of any building emulator environment can have a *time* component e.g. a weekly schedule, a *measurement* component with a subset (or all) measurements available in the building, and an *exogenous* component including disturbances of any kind of boundary condition data to the building such as electricity prices, ambient temperature, or temperature set-points. \n", - "- The **action** space is defined based on any subset (or all) inputs available to the emulator. These can be either building set-points, like zone\n", - "operative temperature set-points, or lower level actuator signals, such as heat\n", - "pump modulating signal or a pump stage.\n", - "- The **`reset()`** method is called at the beginning of every episode to return the environment to a logical initial state. \n", - "- The **`step()`** method is called every time step to take the action computed by the RL agent, overwrite the building inputs with the vector of action values and advance the building simulation model during one time step period. BOPTEST-Gym also has wrappers for discretization of the state and action spaces. This functionality comes in handy when training RL agents. \n", - "- A default **reward** function is implemented in the `compute_reward` method of the BOPTEST-Gym environment that can be overwritten. It is convenient to use the BOPTEST `/kpis` API to obtain the KPI values at the present time for defining custom reward functions. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kvMgiRhLX2i8" - }, - "source": [ - "##**Starting up a BOPTEST-Gym environment** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WcrOX1Z_UvTY" - }, - "source": [ - "BOPTEST-Gym uses RL algorithms from the [Stable Baselines 3](https://stable-baselines3.readthedocs.io/en/master/) repository to exemplify and test its functionality. Therefore, we need to install stable-baselines3.\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "jpZk6qJKTuYl", - "outputId": "8b5166ec-7c96-480a-84bc-732bc7521f3a" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: stable-baselines3==0.8.0 in /usr/local/lib/python3.7/dist-packages (0.8.0)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from stable-baselines3==0.8.0) (3.2.2)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from stable-baselines3==0.8.0) (1.21.6)\n", - "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from stable-baselines3==0.8.0) (1.5.0)\n", - "Requirement already satisfied: gym>=0.17 in /usr/local/lib/python3.7/dist-packages (from stable-baselines3==0.8.0) (0.21.0)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from stable-baselines3==0.8.0) (1.3.5)\n", - "Requirement already satisfied: torch>=1.4.0 in /usr/local/lib/python3.7/dist-packages (from stable-baselines3==0.8.0) (1.12.1+cu113)\n", - "Requirement already satisfied: importlib-metadata>=4.8.1 in /usr/local/lib/python3.7/dist-packages (from gym>=0.17->stable-baselines3==0.8.0) (4.13.0)\n", - "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=4.8.1->gym>=0.17->stable-baselines3==0.8.0) (4.1.1)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=4.8.1->gym>=0.17->stable-baselines3==0.8.0) (3.9.0)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->stable-baselines3==0.8.0) (1.4.4)\n", - "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->stable-baselines3==0.8.0) (2.8.2)\n", - "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->stable-baselines3==0.8.0) (3.0.9)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->stable-baselines3==0.8.0) (0.11.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->stable-baselines3==0.8.0) (1.15.0)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->stable-baselines3==0.8.0) (2022.4)\n" - ] - } - ], - "source": [ - "!pip install stable-baselines3==0.8.0 " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4ljXzd7W4R0H" - }, - "source": [ - "Now that we have all package dependencies, let's clone the BOPTEST-Gym repository. We are going to clone the `boptest-gym-service` branch which works in the same way as the `master` branch but allows us to directly use the web-based version of BOPTEST that is readily available such that we do not have to deploy the building test case Docker containers locally. \n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "McqIwvAz5ZuD", - "outputId": "e734b733-5092-4ed6-bb0d-c97d61e3df40" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Cloning into 'project1-boptest-gym'...\n", - "remote: Enumerating objects: 2896, done.\u001b[K\n", - "remote: Counting objects: 100% (442/442), done.\u001b[K\n", - "remote: Compressing objects: 100% (268/268), done.\u001b[K\n", - "remote: Total 2896 (delta 213), reused 345 (delta 153), pack-reused 2454\u001b[K\n", - "Receiving objects: 100% (2896/2896), 46.80 MiB | 22.24 MiB/s, done.\n", - "Resolving deltas: 100% (1490/1490), done.\n" - ] - } - ], - "source": [ - "try:\n", - " !rm -rf project1-boptest-gym\n", - "except:\n", - " pass\n", - "!git clone -b boptest-gym-service https://github.com/ibpsa/project1-boptest-gym.git" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cs9guwYo5w50" - }, - "source": [ - "Now we move our working directory to our recently cloned repository, import the `BoptestGymEnv` class, and instantiate our first BOPTEST-Gym environment! " - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "mZsXUZIQ5iIj", - "outputId": "76d5b60a-9b92-4f98-82e2-b4618b5dfb7e" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.7/dist-packages/gym/spaces/box.py:74: UserWarning: \u001b[33mWARN: Box bound precision lowered by casting to float32\u001b[0m\n", - " \"Box bound precision lowered by casting to {}\".format(self.dtype)\n" - ] - } - ], - "source": [ - "import os\n", - "os.chdir('/content/project1-boptest-gym')\n", - "from boptestGymEnv import BoptestGymEnv\n", - "\n", - "# Instantite environment\n", - "env = BoptestGymEnv(url = url,\n", - " testcase = 'bestest_hydronic_heat_pump',\n", - " actions = ['oveHeaPumY_u'],\n", - " observations = {'reaTZon_y':(280.,310.)}, \n", - " random_start_time = False,\n", - " start_time = 31*24*3600,\n", - " max_episode_length = 24*3600,\n", - " warmup_period = 24*3600,\n", - " step_period = 3600)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8XVI61rnU4QZ" - }, - "source": [ - "You have connected to a BOPTEST building emulator and wrapped it around a Gym environment. Let's examine more in detail the arguments that you have used:\n", - "- `url`: the domain where your test case lives. In this case it is the url to BOPTEST-service, but it could be your localhost if you decide to spin a test case in your machine using Docker. \n", - "- `testcase`: The string identifier of the testcase.\n", - "- `actions`: List of strings indicating the action space. \n", - "- `observations`: Dictionary mapping observation keys to a tuple with the lower and upper bound of each observation. These bounds define the typical operational range for discretization and normalization purposes. Observation keys must belong either to the set of measurements or to the set of forecasting variables of the BOPTEST test case.\n", - "- `max_episode_lenght`: Maximum duration of each episode in seconds.\n", - "- `random_start_time`: Set to True if desired to use a random start time for each episode. That is typically usefull when training an RL agent to run several episodes with different boundary condition data. In our case, we set it to False and specify the start time of the episode.\n", - "- `start_time`: start time of the episode. It is specified in seconds from the beginning of the year. To be used in combination with `random_start_time=False`. \n", - "- `warmup_period`: Desired simulation period to initialize each episode, in seconds. In our case, we simulate the testcase for one day right before the beginning of the episode. \n", - "- `step_period`: The period of each control step, in seconds. In this case is set to one hour. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-ZcNOH0SYEiR" - }, - "source": [ - "Now you can interact with the building emulator following the Gym standard. Everytime you use one of the methods of your environment, BOPTEST-Gym will send the associated commands through the BOPTEST API that you have learned above as to provide the desired functionality. A schematic of this process is shown in the figure below. This figure illustrates the typical steps that take place when training an agent and the mapping between the BOPTEST-Gym interface and the BOPTEST API. It is important to note that a state can be returned not only with current measurements, but also with boundary condition forecast or regressive values. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rcnaIJvhYDa5" - }, - "source": [ - "\n", - "\n", - "*Figure: Sequence diagram for training an agent withthe BOPTEST-Gym environment.*" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AZVIz69qXyCZ" - }, - "source": [ - "##**Interacting with a BOPTEST-Gym environment** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dA9_wVo8bMxr" - }, - "source": [ - "Let's see what we can do with our building Gym environment. Recall that the first step is using the `reset` method to simulate the building right before the episode start time a time period specified in `warmup_period`. This will bring the building to a reasonable initial state and the environment will return an observation `obs` which, in our case, it is comprised of only the zone operative temperature (`reaTZon_y`). This temperature is in Kelvins, so we convert it to degrees Celsius." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Z4n5GsjXV08x", - "outputId": "8c37e26f-e97d-4b4a-d41d-792fc143dbbf" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Zone temperature: 21.37 degC\n", - "Episode starting day: 31.0 (from beginning of the year)\n" - ] - } - ], - "source": [ - "obs = env.reset()\n", - "print('Zone temperature: {:.2f} degC'.format(obs[0]-273.15))\n", - "print('Episode starting day: {:.1f} (from beginning of the year)'.format(env.start_time/24/3600)) " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IeJyBvLYqC11" - }, - "source": [ - "📌 **Note: About initialization** \n", - "\n", - "The initial state in the emulator consists of all states after simulation during the warmup period without any external input from an external controller. This particular emulator has 63 continuous time states comprising temperatures of walls, floor, roof, water, etc. During the warmup period, the baseline controller embedded in the emulator is used. After initialization the baseline controller will also work at any time unless some of the control variables are intentionally overwritten by an external controller. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9i9VfDrdYJ0e" - }, - "source": [ - "We can inspect the observation and action space of any environment as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "a_PC0YAEYR5U", - "outputId": "dc3c02a9-e656-4adf-a4d9-884f58e8d587" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Observation space of the building environment:\n", - "Box([280.], [310.], (1,), float32)\n", - "Action space of the building environment:\n", - "Box([0.], [1.], (1,), float32)\n" - ] - } - ], - "source": [ - "print('Observation space of the building environment:')\n", - "print(env.observation_space)\n", - "print('Action space of the building environment:')\n", - "print(env.action_space)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SBVCnncbePIQ" - }, - "source": [ - "So this environment has a Box (continuous and bounded) observation space which is the indoor building temperature. The operational range of this variable goes from $280$ $K$ to $310$ $K$. That is, from ~$7$ $°C$ to $37$ $°C$. On the other hand, the action space is a continuous variable that goes from $0$ to $1$. The latter variable represents the heat pump compressor frequency with $0$ meaning no heating, and $1$ meaning the heat pump working at full capacity. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pkx5Os6-Yltb" - }, - "source": [ - "But actually, the BOPTEST-Gym environment can be directly printed to show a lot of useful information to control the building:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "FGzL_ZskfoyO", - "outputId": "470e1fa7-b197-487d-ea2b-2bae9b9b2191" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "========================\n", - "BOPTEST CASE INFORMATION\n", - "========================\n", - "\n", - "Test case name\n", - "--------------\n", - "{'name': 'bestest_hydronic_heat_pump'}\n", - "\n", - "All measurement variables\n", - "-------------------------\n", - "{'reaCO2RooAir_y': {'Description': 'CO2 concentration in the zone',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'ppm'},\n", - " 'reaCOP_y': {'Description': 'Heat pump COP',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': '1'},\n", - " 'reaPFan_y': {'Description': 'Electrical power of the heat pump evaporator '\n", - " 'fan',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'W'},\n", - " 'reaPHeaPum_y': {'Description': 'Heat pump electrical power',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'W'},\n", - " 'reaPPumEmi_y': {'Description': 'Emission circuit pump electrical power',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'W'},\n", - " 'reaQFloHea_y': {'Description': 'Floor heating thermal power released to the '\n", - " 'zone',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'W'},\n", - " 'reaQHeaPumCon_y': {'Description': 'Heat pump thermal power exchanged in the '\n", - " 'condenser',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'W'},\n", - " 'reaQHeaPumEva_y': {'Description': 'Heat pump thermal power exchanged in the '\n", - " 'evaporator',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'W'},\n", - " 'reaTRet_y': {'Description': 'Return water temperature from radiant floor',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'K'},\n", - " 'reaTSetCoo_y': {'Description': 'Zone operative temperature setpoint for '\n", - " 'cooling',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'K'},\n", - " 'reaTSetHea_y': {'Description': 'Zone operative temperature setpoint for '\n", - " 'heating',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'K'},\n", - " 'reaTSup_y': {'Description': 'Supply water temperature to radiant floor',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'K'},\n", - " 'reaTZon_y': {'Description': 'Zone operative temperature',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'K'},\n", - " 'weaSta_reaWeaCeiHei_y': {'Description': 'Cloud cover ceiling height '\n", - " 'measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'm'},\n", - " 'weaSta_reaWeaCloTim_y': {'Description': 'Day number with units of seconds',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 's'},\n", - " 'weaSta_reaWeaHDifHor_y': {'Description': 'Horizontal diffuse solar radiation '\n", - " 'measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'W/m2'},\n", - " 'weaSta_reaWeaHDirNor_y': {'Description': 'Direct normal radiation '\n", - " 'measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'W/m2'},\n", - " 'weaSta_reaWeaHGloHor_y': {'Description': 'Global horizontal solar '\n", - " 'irradiation measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'W/m2'},\n", - " 'weaSta_reaWeaHHorIR_y': {'Description': 'Horizontal infrared irradiation '\n", - " 'measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'W/m2'},\n", - " 'weaSta_reaWeaLat_y': {'Description': 'Latitude of the location',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'rad'},\n", - " 'weaSta_reaWeaLon_y': {'Description': 'Longitude of the location',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'rad'},\n", - " 'weaSta_reaWeaNOpa_y': {'Description': 'Opaque sky cover measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': '1'},\n", - " 'weaSta_reaWeaNTot_y': {'Description': 'Sky cover measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': '1'},\n", - " 'weaSta_reaWeaPAtm_y': {'Description': 'Atmospheric pressure measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'Pa'},\n", - " 'weaSta_reaWeaRelHum_y': {'Description': 'Outside relative humidity '\n", - " 'measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': '1'},\n", - " 'weaSta_reaWeaSolAlt_y': {'Description': 'Solar altitude angle measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'rad'},\n", - " 'weaSta_reaWeaSolDec_y': {'Description': 'Solar declination angle measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'rad'},\n", - " 'weaSta_reaWeaSolHouAng_y': {'Description': 'Solar hour angle measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'rad'},\n", - " 'weaSta_reaWeaSolTim_y': {'Description': 'Solar time',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 's'},\n", - " 'weaSta_reaWeaSolZen_y': {'Description': 'Solar zenith angle measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'rad'},\n", - " 'weaSta_reaWeaTBlaSky_y': {'Description': 'Black-body sky temperature '\n", - " 'measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'K'},\n", - " 'weaSta_reaWeaTDewPoi_y': {'Description': 'Dew point temperature measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'K'},\n", - " 'weaSta_reaWeaTDryBul_y': {'Description': 'Outside drybulb temperature '\n", - " 'measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'K'},\n", - " 'weaSta_reaWeaTWetBul_y': {'Description': 'Wet bulb temperature measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'K'},\n", - " 'weaSta_reaWeaWinDir_y': {'Description': 'Wind direction measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'rad'},\n", - " 'weaSta_reaWeaWinSpe_y': {'Description': 'Wind speed measurement',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': 'm/s'}}\n", - "\n", - "All forecasting variables\n", - "-------------------------\n", - "['winDir',\n", - " 'TDewPoi',\n", - " 'LowerSetp[1]',\n", - " 'PriceElectricPowerConstant',\n", - " 'UpperSetp[1]',\n", - " 'PriceElectricPowerHighlyDynamic',\n", - " 'solTim',\n", - " 'solHouAng',\n", - " 'nOpa',\n", - " 'InternalGainsRad[1]',\n", - " 'nTot',\n", - " 'HGloHor',\n", - " 'winSpe',\n", - " 'TBlaSky',\n", - " 'solDec',\n", - " 'lon',\n", - " 'PriceElectricPowerDynamic',\n", - " 'HDifHor',\n", - " 'InternalGainsCon[1]',\n", - " 'solZen',\n", - " 'HHorIR',\n", - " 'relHum',\n", - " 'pAtm',\n", - " 'Occupancy[1]',\n", - " 'ceiHei',\n", - " 'lat',\n", - " 'InternalGainsLat[1]',\n", - " 'TWetBul',\n", - " 'TDryBul',\n", - " 'HDirNor',\n", - " 'EmissionsElectricPower',\n", - " 'cloTim',\n", - " 'solAlt',\n", - " 'time',\n", - " 'UpperCO2[1]']\n", - "\n", - "All input variables\n", - "-------------------\n", - "{'oveFan_activate': {'Description': 'Activation for Integer signal to control '\n", - " 'the heat pump evaporator fan either on or '\n", - " 'off',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': None},\n", - " 'oveFan_u': {'Description': 'Integer signal to control the heat pump '\n", - " 'evaporator fan either on or off',\n", - " 'Maximum': 1,\n", - " 'Minimum': 0,\n", - " 'Unit': '1'},\n", - " 'oveHeaPumY_activate': {'Description': 'Activation for Heat pump modulating '\n", - " 'signal for compressor speed between 0 '\n", - " '(not working) and 1 (working at '\n", - " 'maximum capacity)',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': None},\n", - " 'oveHeaPumY_u': {'Description': 'Heat pump modulating signal for compressor '\n", - " 'speed between 0 (not working) and 1 (working '\n", - " 'at maximum capacity)',\n", - " 'Maximum': 1,\n", - " 'Minimum': 0,\n", - " 'Unit': '1'},\n", - " 'ovePum_activate': {'Description': 'Activation for Integer signal to control '\n", - " 'the emission circuit pump either on or '\n", - " 'off',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': None},\n", - " 'ovePum_u': {'Description': 'Integer signal to control the emission circuit '\n", - " 'pump either on or off',\n", - " 'Maximum': 1,\n", - " 'Minimum': 0,\n", - " 'Unit': '1'},\n", - " 'oveTSet_activate': {'Description': 'Activation for Zone operative '\n", - " 'temperature setpoint',\n", - " 'Maximum': None,\n", - " 'Minimum': None,\n", - " 'Unit': None},\n", - " 'oveTSet_u': {'Description': 'Zone operative temperature setpoint',\n", - " 'Maximum': 308.15,\n", - " 'Minimum': 278.15,\n", - " 'Unit': 'K'}}\n", - "\n", - "Default simulation step (seconds)\n", - "---------------------------------\n", - "3600\n", - "\n", - "Default forecasting parameters (seconds)\n", - "----------------------------------------\n", - "{'horizon': 86400, 'interval': 3600}\n", - "\n", - "Default scenario\n", - "----------------\n", - "{'electricity_price': 'constant'}\n", - "\n", - "Test case scenario\n", - "------------------\n", - "{'electricity_price': 'constant'}\n", - "\n", - "===========================\n", - "GYM ENVIRONMENT INFORMATION\n", - "===========================\n", - "\n", - "Observation space\n", - "-----------------\n", - "Box([280.], [310.], (1,), float32)\n", - "\n", - "Action space\n", - "------------\n", - "Box([0.], [1.], (1,), float32)\n", - "\n", - "Is a regressive environment\n", - "---------------------------\n", - "False\n", - "\n", - "Is a predictive environment\n", - "---------------------------\n", - "False\n", - "\n", - "Regressive period (seconds)\n", - "---------------------------\n", - "None\n", - "\n", - "Predictive period (seconds)\n", - "---------------------------\n", - "None\n", - "\n", - "Measurement variables used in observation space\n", - "-----------------------------------------------\n", - "['reaTZon_y']\n", - "\n", - "Predictive variables used in observation space\n", - "----------------------------------------------\n", - "[]\n", - "\n", - "Sampling time (seconds)\n", - "-----------------------\n", - "3600\n", - "\n", - "Random start time\n", - "-----------------\n", - "False\n", - "\n", - "Excluding periods (seconds from the beginning of the year)\n", - "----------------------------------------------------------\n", - "None\n", - "\n", - "Warmup period for each episode (seconds)\n", - "----------------------------------------\n", - "86400\n", - "\n", - "Maximum episode length (seconds)\n", - "--------------------------------\n", - "86400\n", - "\n", - "Environment reward function (source code)\n", - "-----------------------------------------\n", - "(' def compute_reward(self):\\n'\n", - " \" '''\\n\"\n", - " \" Compute the reward of last state-action-state' tuple. The \\n\"\n", - " ' reward is implemented as the negated increase in the objective\\n'\n", - " ' integrand function. In turn, this objective integrand function \\n'\n", - " ' is calculated as the sum of the total operational cost plus\\n'\n", - " ' the weighted discomfort. \\n'\n", - " ' \\n'\n", - " ' Returns\\n'\n", - " ' -------\\n'\n", - " ' Reward: float\\n'\n", - " \" Reward of last state-action-state' tuple\\n\"\n", - " ' \\n'\n", - " ' Notes\\n'\n", - " ' -----\\n'\n", - " ' This method is just a default method to compute reward. It can be \\n'\n", - " ' overridden by defining a child from this class with\\n'\n", - " ' this same method name, i.e. `compute_reward`. If a custom reward \\n'\n", - " ' is defined, it is strongly recommended to derive it using the KPIs\\n'\n", - " ' as returned from the BOPTEST framework, as it is done in this \\n'\n", - " ' default `compute_reward` method. This ensures that all variables \\n'\n", - " ' that may contribute to any KPI are properly accounted and \\n'\n", - " ' integrated. \\n'\n", - " ' \\n'\n", - " \" '''\\n\"\n", - " ' \\n'\n", - " ' # Define a relative weight for the discomfort \\n'\n", - " ' w = 1\\n'\n", - " ' \\n'\n", - " ' # Compute BOPTEST core kpis\\n'\n", - " ' kpis = '\n", - " \"requests.get('{0}/kpi/{1}'.format(self.url,self.testid)).json()['payload']\\n\"\n", - " ' \\n'\n", - " ' # Calculate objective integrand function at this point\\n'\n", - " \" objective_integrand = kpis['cost_tot'] + w*kpis['tdis_tot']\\n\"\n", - " ' \\n'\n", - " ' # Compute reward\\n'\n", - " ' reward = -(objective_integrand - self.objective_integrand)\\n'\n", - " ' \\n'\n", - " ' self.objective_integrand = objective_integrand\\n'\n", - " ' \\n'\n", - " ' return reward\\n')\n", - "\n", - "Environment hierarchy\n", - "---------------------\n", - "(,\n", - " ,\n", - " )\n", - "\n", - "\n" - ] - } - ], - "source": [ - "print(env)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-KYf1BksgfQj" - }, - "source": [ - "Note that this descriptive summary provides information not only about the Gym environment but also all information about the original BOPTEST test case. This may be useful, for example, if we want to extend our observation space or if we want to change our control action. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zQ_y22lrg1cM" - }, - "source": [ - "BOPTEST-Gym comes along with other functionality that may be useful when training RL agents, like the capacity to discretize and normalize observation and action spaces. For instance, we are dealing now with continuous action environment meaning that the agent could decide to take any action between 0 and 1. However, it is probably helpful to the agent to decide on just whether the heating needs to be turned on (action=1) or off (action=0). For that, we can wrap our environment around a discretization wrapper with only one action bin (one bin has two extremes). The concept of wrappers is very powerful in Gym environments. With them, we are capable to customize observation, action, step function, etc. of an environment. No matter how many wrappers are applied, `env.unwrapped` always gives back the internal original environment object. Let's see how it works with BOPTEST-Gym:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "zIqfeNwgh9VK", - "outputId": "d58259b5-ccd1-4fcb-98ea-171224f8141a" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Action space of the wrapped agent:\n", - "Discrete(2)\n", - "Action space of the original agent:\n", - "Box([0.], [1.], (1,), float32)\n" - ] - } - ], - "source": [ - "from boptestGymEnv import DiscretizedActionWrapper\n", - "env = DiscretizedActionWrapper(env,n_bins_act=1)\n", - "print('Action space of the wrapped agent:')\n", - "print(env.action_space)\n", - "print('Action space of the original agent:')\n", - "print(env.unwrapped.action_space)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ghlx_zaf282q" - }, - "source": [ - "Another thing that we can do is to interact with the building environment for one episode of experience (one day). This is similar to what we did with the Cartpole example, but this time we are going to run just one episode and use a hysteresis controller that will turn on the heating the temperature is below a predefined temperature setpoint, and turn it off when the temperature goes above the setpoint. We first configure such controller:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "MrO0o7hNf5pB" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "np.set_printoptions(precision=3)\n", - "\n", - "class SimpleController(object):\n", - " '''Simple controller for this emulator. \n", - " \n", - " '''\n", - " def __init__(self, TSet=22+273.15):\n", - " self.TSet = TSet\n", - " \n", - " def predict(self, obs):\n", - " # Compute control\n", - " if obs[0]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JAIt_IfivAHN" - }, - "source": [ - "In this section we are going to develop a very simple RL agent based on the very well known *q-learning* algorithm. Although simple, this exercise will help us understand the main concepts of RL and how this machine learning technique can be helpful to mitigate climate change by enhancing building's operational efficiency. Recall that our objective is to develop an RL agent that can decide on the best action to take in each situation (each state) just from interactions with the environment (the building). Imagine we are at time $k$ in a certain state $\\pmb{s}$ and take an action $\\pmb{a}$. In return, we obtain a reward $r'$ the next time step and end up in a state $\\pmb{s}'$ :" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nxO3gVx3vqrL" - }, - "source": [ - "![](https://drive.google.com/file/d/1XVbDEiHT2fWIGtnPLE0uphC2hV5XubKc/view?usp=sharing)\n", - "\n", - "\n", - "\n", - "*Figure: The backup diagram. Edited version from the book of Richard S. Sutton and Andrew G. Barto* **[6]**." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XflFYx7lylyw" - }, - "source": [ - "In *q-learning* we aim to derive an *action-value function*, the q-function. The q-function indicates what is the **long-term** value of taking an action $a$ from a certain state $s$. With this information we not only have an estimation of the value of each state, but we can also decide to take the next action $\\pmb{a}'$ that leads to the highest value from the next state $\\pmb{s}'$. This principle relies on the so-called *Bellman optimality equation* that is presented below:\n", - "\n", - "\\begin{align}\n", - " q(\\pmb{s},\\pmb{a}) = r' + \\gamma \\max_{\\pmb{a}'} q(\\pmb{s}',\\pmb{a}')\n", - "\\end{align}\n", - "\n", - "This equation states that the total expected cummulative return of taking action $\\pmb{a}$ from state $\\pmb{s}$ equals the immediate reward $r'$ plus the maximum achievable reward that we can obtain from the following state $\\pmb{s}'$. Note that the q-function estimates the **TOTAL EXPECTED CUMULATIVE RETURN** of taking action $\\pmb{a}$ from state $\\pmb{s}$ (not just the immediate reward). So given the q-function we can know straight-away what is the best action to take for each state $\\pmb{s}$. You can imagine a q-function with one-dimensional state and action spaces as follows:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HBpa3qjuysK-" - }, - "source": [ - "\n", - "\n", - "\n", - "\n", - "*Figure: Example of how a q-function may look like for the case with one-dimensional state and action spaces. Note that, given the q-function, we can pick the action $a$ that leads to the highest expected cumulative reward $q_*$ from state $s$.*\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "db8AVf3GoCz9" - }, - "source": [ - "Powerful, right? now the question remains how to derive the q-function 😅.\n", - "\n", - "The q-function is inferred iteratively using the reward received by the agent each control step and bootstrapping with the Bellman optimality equation presented above. The sum of the immediate reward and the next-state q-function estimate is called the target. We use this target to recursively update the q-function at a learning rate $\\alpha$. The difference between the target and our current q-function estimate is called *temporal difference*. In summary, the q-learning method consists of recursively updating the q-function using the following formula:\n", - "\n", - "\\begin{align}\n", - " q(\\pmb{s},\\pmb{a}) = q(\\pmb{s},\\pmb{a}) + \\alpha [ \\underbrace{\\underbrace{r' + \\gamma \\max_{\\pmb{a}'} q(\\pmb{s}',\\pmb{a}')}_\\text{target} - q(\\pmb{s},\\pmb{a})}_\\text{temporal difference}]\n", - "\\end{align}\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qjBqNfXd_pY2" - }, - "source": [ - "So in summary, the agent observes the reward once it has taken an action from a state. It has to explore the rewards from different state-action pairs and update its q-function as it goes.\n", - "\n", - "In our example we are going to use tabular state and action spaces to expedite learning and to easily store and visualize the q-function. Note, however, that we could use general function approximators like neural networks to configure the q-function. \n", - "\n", - "📌 **Note: The exploration-exploitation dilema** ⚖️\n", - "\n", - "RL always faces the so-called exploration-exploitation dilema. That is, how much of what we have learned we should exploit and how much we should explore to find even better solutions? In our case, we implement an *Epsilon-greedy* approach to balance exploration and exploitation of the RL agent. That is, the agent sometimes picks a random action (exploration), and sometimes picks an \"intelligent\" action (exploitation). The frequency at which the agent picks a random action is determined by *Epsilon* (`eps`) and it follows a linearly decaying schedule. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZQ7Um2UtLHk4" - }, - "source": [ - "\n", - "\n", - "*Figure: The epsilon-greedy strategy for balancing exploration and exploitation.*\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6aOHW96nLqOZ" - }, - "source": [ - "Our `Q_Learning_Agent` consists of only three methods: \n", - "\n", - "- `__init__` ➡️ The constructor.\n", - "- `predict` ➡️ Method to decide on an action given an observation. \n", - "- `learn` ➡️ Method for learning with the q-learning method explained above. \n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "id": "9U81QUVcUfoW" - }, - "outputs": [], - "source": [ - "class Q_Learning_Agent(object):\n", - "\n", - " def __init__(self, env, eps_min=0.01, eps_decay=0.01, alpha=0.05, gamma=0.9):\n", - " '''Constructor of q-learning agent. Assumes discrete state and action spaces.\n", - "\n", - " '''\n", - " self.env = env\n", - " self.eps_min = eps_min\n", - " self.eps_decay = eps_decay\n", - " self.alpha = alpha\n", - " self.gamma = gamma\n", - "\n", - " # Initialize epsilon \n", - " self.eps = 1.0\n", - "\n", - " # Initialize q-function as a null function\n", - " self.q = np.zeros((env.observation_space.n,\n", - " env.action_space.n))\n", - " \n", - " def predict(self, obs, deterministic=True):\n", - " '''Method to select an action with an epsilon-greedy policy. \n", - "\n", - " '''\n", - " if deterministic:\n", - " # Use q-function to decide action\n", - " return np.argmax(self.q[obs])\n", - " else:\n", - " if self.eps > self.eps_min:\n", - " # Linearly decreasing schedule\n", - " self.eps -= self.eps_decay\n", - " if np.random.random() < self.eps:\n", - " # Explore with random action\n", - " return np.random.choice([a for a in range(env.action_space.n)]) \n", - " else:\n", - " # Exploit the information of our q-function\n", - " return np.argmax(self.q[obs])\n", - "\n", - " def learn(self, total_episodes=10):\n", - " '''Learn from a number of interactions with the environment.\n", - "\n", - " '''\n", - " for i in range(total_episodes):\n", - " # Initialize enviornment\n", - " done = False\n", - " obs = env.reset()\n", - " # Print episode number and starting day from beginning of the year:\n", - " print('-------------------------------------------------------------------')\n", - " print('Episode number: {0}, starting day: {1:.1f} ' \\\n", - " '(from beginning of the year)'.format(i+1, env.unwrapped.start_time/24/3600))\n", - "\n", - " while not done:\n", - " # Get action with epsilon-greedy policy and simulate\n", - " act = self.predict(obs, deterministic=False)\n", - " nxt_obs, rew, done, _ = env.step(act)\n", - " # Compute temporal difference target and error to udpate q-function\n", - " td_target = rew + self.gamma*np.max(self.q[nxt_obs])\n", - " td_error = td_target - self.q[obs][act]\n", - " self.q[obs][act] += self.alpha*td_error\n", - " # Make our next observation the current observation\n", - " obs = nxt_obs\n", - " # Print the q-function after every episode to show progress\n", - " print('q(s,a) = ')\n", - " print(self.q)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RWPbW8WKaQET" - }, - "source": [ - "##**Testing our RL algorithm in BOPTEST-Gym** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SV8bk8x75C_0" - }, - "source": [ - "Now that we have a RL agent ready, let's test it in BOPTEST-Gym! We are going to exploit the features of BOPTEST-Gym to: \n", - "\n", - "- Define a custom reward function of the enviornment.\n", - "- Instantiate the environment and define its state and action spaces. \n", - "- Train our RL agent.\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wy1TpSGEPxYr" - }, - "source": [ - "### Define a custom reward function of the environment" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "J9jwn5BQQCyj" - }, - "source": [ - "The definition of the reward function is **KEY**🗝 since it is what drives the learning of an agent. \n", - "The `BoptestGymEnv` Class allows to override its `compute_reward` method that is called every control step as to freely define any reward function of choice. \n", - "\n", - "In our example, the goal is to implement a RL agent to identify the actions that keep comfort inside the building, and we should encode our reward function accordingly. We could implement this function by integrating the temperature deviations out of the comfort range. However, this approach is error-prone. We typically want to directly use signals from the environment to define the reward, preferrably those that are directly related to the function we want to optimize so that we make sure we strive for the ground truth optimum. In BOPTEST we use the `GET /kpis` API to obtain the so-called core KPIs at the present time, which are:\n", - "\n", - "\n", - "* **Thermal discomfort**: reported with units of [$K \\, h/zone$], defines the cumulative deviation of zone temperatures from upper and lower comfort limits that are predefined within the test case FMU for each zone, averaged over all zones. Air temperature is used for air-based systems and operative temperature is used for radiant systems.\n", - "* **Indoor Air Quality (IAQ) Discomfort**: reported with units of [$ppm \\, h/zone$], defines the extent that the CO$_2$ concentration levels in zones exceed bounds of the acceptable concentration level, which are predefined within the test case FMU for each zone, averaged over all zones.\n", - "* **Energy Use**: reported with units of [$kWh/m^2$], defines the HVAC energy usage. \n", - "* **Cost**: reported with units of [USD/$m^2$] or [EUR/$m^2$], defines the operational cost associated with the HVAC energy usage.\n", - "* **Emissions**: reported with units of [$kg \\, CO_2/m^2$], defines the CO$_2$ emissions from the HVAC energy usage.\n", - "* **Computational time ratio**: defines the average ratio between the controller computation time and the test simulation control step. The controller computation time is measured as the time between two emulator advances.\n", - "\n", - "The time series graph below shows how thermal discomfort and energy use are computed by the BOPTEST `GET /kpis` API call. \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZQagxsvtr3ow" - }, - "source": [ - "\n", - "\n", - "*Figure: Integration of thermal discomfort (top) and energy use (bottom). In BOPTEST, the `GET /kpis` API can directly return these values every control step. Note that the integration step is significantly smaller than the control step.*" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "esAUAwHdr8y1" - }, - "source": [ - "The core KPIs are normally calculated at the end of the simulation to assess the controller performance, although they can be computed at any time. The warmup period is not taken into account for the calculation of the KPIs. See below how we define the `compute_reward` method using the `GET /kpi`. Every control step we check whether there has been a discomfort increment. If there is not discomfort increment, we reward our agent with $1$, otherwise we return a $0$ (no reward). Clipping the reward is a good practice to accelerate learning. " - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "id": "hTcc3XiVP-A6" - }, - "outputs": [], - "source": [ - "# Redefine reward function\n", - "class BoptestGymEnvCustomReward(BoptestGymEnv):\n", - " '''Define a custom reward for this building\n", - " \n", - " '''\n", - " def compute_reward(self):\n", - " '''Custom reward function. To expedite learning, we use a clipped reward \n", - " function that has a value of 1 when there is no increase in discomfort \n", - " and 0 otherwise. We use the BOPTEST `GET /kpis` API call to compute the \n", - " total cummulative discomfort from the beginning of the episode. Note \n", - " that this is the true value that BOPTEST uses when evaluating \n", - " controllers. \n", - " \n", - " '''\n", - " # Compute BOPTEST core kpis\n", - " kpis = requests.get('{0}/kpi/{1}'.format(self.url, self.testid)).json()['payload']\n", - " # Calculate objective integrand function as the total discomfort\n", - " objective_integrand = kpis['tdis_tot']\n", - " # Give reward if there is not immediate increment in discomfort\n", - " if objective_integrand == self.objective_integrand:\n", - " reward=1\n", - " else:\n", - " reward=0\n", - " # Record current objective integrand for next evaluation\n", - " self.objective_integrand = objective_integrand\n", - " return reward" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2hpd_svcOhDy" - }, - "source": [ - "### Instantiate the environment and define its state and action spaces" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xszlVIQtOkiz" - }, - "source": [ - "Similarly to our `SimpleController` example, now we are going to use an agent that observes only the current indoor temperature and decides whether to turn heating on or off. However, instead of hard-coding such logic, we are going to use our very own implementation of the `Q_Learning_Agent` to see if it can learn how to do that. \n", - "For this, we are going to let our RL agent interact with the building for some episodes of experience. \n", - "Since we are now going to run several episodes for training, we want to stop our previous environment and start one that randomly initializes our building emulator throughout the year. \n", - "This allows to train our agent when using different boundary condition data in our building environment. We are also going to exclude the Spring, Summer, and Fall periods for training since we are only focused on learning the heating behavior. \n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "id": "24fsDMTv8tSF", - "outputId": "7ce7d024-c3ff-49f5-f2cb-af0a46af88ab" - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {} - } - ], - "source": [ - "env.stop()\n", - "import random\n", - "\n", - "# Seed for random starting times of episodes\n", - "seed = 123456\n", - "random.seed(seed)\n", - "# Seed for random exploration and epsilon-greedy schedule\n", - "np.random.seed(seed)\n", - "\n", - "# Winter period goes from December 21 (day 355) to March 20 (day 79)\n", - "excluding_periods = [(79*24*3600, 355*24*3600)]\n", - "# Temperature setpoints\n", - "lower_setp = 21 + 273.15\n", - "upper_setp = 24 + 273.15\n", - "# Instantite environment\n", - "env = BoptestGymEnvCustomReward(url = url,\n", - " testcase = 'bestest_hydronic_heat_pump',\n", - " actions = ['oveHeaPumY_u'],\n", - " observations = {'reaTZon_y':(lower_setp,upper_setp)}, \n", - " random_start_time = True,\n", - " excluding_periods = excluding_periods,\n", - " max_episode_length = 2*24*3600,\n", - " warmup_period = 24*3600,\n", - " step_period = 3600,\n", - " render_episodes = True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NU8aoMvV9AdE" - }, - "source": [ - "We have set the zone temperature as the only observation of the environment state. We have also set the lower and upper bounds of this variable to be $21$ and $24 °C$, respectively, which are the bounds of the comfort range during occupied periods. These bounds can be used by the environment for normalization or discretization purposes. In fact, we are going to discretize both the action and observation spaces to expedite learning. We decide to set only one bin for the action space (two possible actions: heating on or off). We split the observation space in three bins with the outer bounds of the comfort range as bins of the observation space (`outs_are_bins=True`). That is, the observation space is defined by $[-∞,21,24,+∞]$ as shown on the left hand side of the figure below. Note that only the middle bin is always comfortable whereas the other bins may lead to discomfort. If we had set `outs_are_bins=False` we would have had all our bins within the comfort range. The latter would give the agent a notion of what is the temperature within the comfort range (close to the lower bound, middle, or close to the upper bound), but it would raise an error if the temperature is out of the range. " - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "id": "uCUZKrOMOIEN" - }, - "outputs": [], - "source": [ - "from boptestGymEnv import DiscretizedObservationWrapper\n", - "env = DiscretizedActionWrapper(env, n_bins_act=1)\n", - "env = DiscretizedObservationWrapper(env, n_bins_obs=3, outs_are_bins=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ab6WP3zLEvnb" - }, - "source": [ - "\n", - "\n", - "*Figure: Possibilities for the discretization of the state space.*\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GTvGxERwOOI6" - }, - "source": [ - "### Train our RL agent" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Sc3XqYSDOuGq" - }, - "source": [ - "The only missing step is to let our RL agent learn by rolling out episodes of experience with the environment. We use the previously defined `learn` method for this. Note that, since we set `render_episodes=True`, we will be seeing a plot with relevant variables after each episode is finished. This is helpful to check if the agent is learning as expected from early stages. If the agent is not showing any sign of life we can prematurely stop the learning process to use new learning settings while saving some valuable time and computational cost. " - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "jtOpX5y_RTsV", - "outputId": "83940660-f744-4d0f-a440-79ec2d5c45f9" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "-------------------------------------------------------------------\n", - "Episode number: 1, starting day: 11.4 (from beginning of the year)\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "q(s,a) = \n", - "[[0. 0. ]\n", - " [1.936 1.398]\n", - " [0.594 0. ]]\n", - "-------------------------------------------------------------------\n", - "Episode number: 2, starting day: 67.8 (from beginning of the year)\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "q(s,a) = \n", - "[[0. 0.17 ]\n", - " [2.414 2.116]\n", - " [1.491 0.594]]\n", - "-------------------------------------------------------------------\n", - "Episode number: 3, starting day: 0.9 (from beginning of the year)\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "q(s,a) = \n", - "[[1.594 1.141]\n", - " [2.411 2.221]\n", - " [1.491 0.594]]\n", - "-------------------------------------------------------------------\n", - "Episode number: 4, starting day: 29.9 (from beginning of the year)\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "q(s,a) = \n", - "[[2.411 2.219]\n", - " [2.382 2.409]\n", - " [1.491 0.594]]\n", - "-------------------------------------------------------------------\n", - "Episode number: 5, starting day: 19.8 (from beginning of the year)\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "q(s,a) = \n", - "[[2.411 2.219]\n", - " [2.833 2.967]\n", - " [2.287 1.918]]\n", - "-------------------------------------------------------------------\n", - "Episode number: 6, starting day: 11.0 (from beginning of the year)\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "q(s,a) = \n", - "[[2.411 2.219]\n", - " [4.677 4.24 ]\n", - " [2.367 1.918]]\n", - "-------------------------------------------------------------------\n", - "Episode number: 7, starting day: 45.2 (from beginning of the year)\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "q(s,a) = \n", - "[[2.452 3.17 ]\n", - " [4.847 4.609]\n", - " [2.432 1.997]]\n", - "-------------------------------------------------------------------\n", - "Episode number: 8, starting day: 362.2 (from beginning of the year)\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEaCAYAAABEsMO+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOy9eZhcVbW//67u9JTOTAZCICMkERAIhEFBkElBBEQiKuBVQPHeC1e8Xr0ConDxqygoighIgiIoP2YQhESQGGSWDJCQEIaQeWw6Q89j1ef3xz7dXd1d1V3VNXfv93nqqTq79rCqTtVZZ6299tomCY/H4/F4co2CbAvg8Xg8Hk80vILyeDweT07iFZTH4/F4chKvoDwej8eTk3gF5fF4PJ6cxCsoj8fj8eQkXkF5PB6PJyfxCsrj8Xg8OYlXUB6Px+PJSdKmoMxsupktNLOVwfEhZnZNusbzeDweT/8inRbUPOAqoAVA0grgS2kcz+PxeDz9iHQqqMGSXu9S1prG8Twej8fTj0ingqo0s2mAAMxsDrAtjeN5PB6Ppx9h6cpmbmZTgbnAx4HdwDrgAkkb0jKgx+PxeFKGme2Lm5b5BLAP0ACsBJ4GFkgKp12GdCgoMysEfi7pu2ZWDhRIqkn5QB6Px+NJOWZ2NzABeApYAlQApcB04ETgCOBKSS+kVY5EFJSZGXABMFXS9WY2Edg7ylwTZvaapGNSJ6rH4/EMLMzsNOAWoBC4S9LPurz/78BlQAioBS6V9HYKxj1Y0soe3i8GJkpak+xYPcqRoIK6AwgDJ0n6iJmNBJ6VdGSMuhOAh4G6tnJJjyUttcfj8fRzAk/Ue8CpwGZgMfDlSAVkZsMkVQevzwL+U9Jp2ZA3HQxKsP7Rkg43szcAJO0ONGk0SoGdwEkRZQLSpqDMrBR4ASjBfbZHJF1rZlOAB4C9gKXAVyQ1p0sOj8fjSQFHAWskrQUwsweAs4F2BdWmnALKCYLSUoWZHQtcB0zCXVPNDaupqRwnFokqqJZAq7dF5o3BWVTdkHRRkrL1hSacdVdrZkXAS2a2APgO8CtJD5jZ74BLgDtidVJQUKCysrLMSOzxeAYk9fX1ApZFFM2VNDfieAKwKeJ4M3B0137M7DLcNa6YzgZBKvg98N+4G/tQivvulUQV1G+Ax4GxZvYTYA4QNTtEMMnWTZtLujhRIeNFzl9ZGxwWBQ/hTtr5Qfk9uDuCmAqqrKyMurq6WG97PGlj586dvP56tyndAcvYsWM54ogjsi1GWjCzBkmzk+1H0m3AbWZ2Pu56/NWkheugStKCFPaXEAkpKEn3mdlS4GScqfc5SatjVH8q4nUpcA6wtU9SJkBg4S0F9gduAz4A9khqWyS8GXdn0rXdpcClAMXFsbyWHk96WbduHdu2bePAAw/MtihZp66ujldffbXfKqg42ALsF3G8b1AWiwfo4cY7Eczs8ODlIjO7CTc109T2vqRlURummIQUlJn9Bngg0Ng9IunRLm3vB15KTLzEkRQCDjOzEThrb2ac7ebi1m1RXl6ensVhHk8vNDc3M2HCBI45xgfAVlVVsXz58myLkU0WAwcEc+hbcGuSzo+sYGYHSHo/ODwDeJ/U8Msux5GWXptXKu0k6uJbClxjZjNwF/8HJC2Js+0BwNgEx+szkvaY2SLgY8AIMxsUWFG93YV4PFmjubnZW/ABxcXFNDcP3FgmSa1mdjnwDC7M/A+SVpnZ9cASSU8Cl5vZKbicp7tJkXtP0omp6CdZEnXx3QPcY2ajgHOBn5vZREkHdK1rZjV0noPaDnw/GWF7IwjaaAmUUxkuPPPnwCLcfNkDuBP4RDrl8Hj6SnNzM+Xl5dkWIyOEQiHC4TBFRUXU1dUxePBgtm/fztq1azn22GNZvnw51dXVSMItwRx4SJoPzO9S9qOI11ekY1wz2wn8C3gZeAX4l6T6dIzVE33Nxbc/znU2CXgnWgVJQyUNi3hM7+r2SwPjcT7TFTjz+O+SnsIpxu+Y2RpcqPnv0yyHJyAUCrF1q5t6rKqqYufOnYTDYf7yl79QW1tLZWUlf/vb3wB49dVXWbnSrQ189NFHaWhoIBQKtd9Fh0Ih0pWaK1dobm6mpKSk/Xjnzp2sXu2mebdu3UplZSXg5qpCoRD19fVs2+ZSXFZWVlJb62KENm7cSDgcpr6+np07dwKwa9cuGhoaMvlxaGxsBOCpp57igw8+oKWlhd/85jeEw2FeeuklnnzySQD++Mc/snbtWoqKihg8eDAAQ4cOpbS0lFCo78Fj69atY8+ePdTX1/P8888DsHLlShYvXgzALbfcQm1tLRs3buTpp58G4L333qOuro6mpiZWrVoFwLZt29iyxTle3nnnHVpb+33e6ynAr3GBZlcBm8xsiZndYmbnZUqIhBSUmd1oZu8D1+NyMs2WdGaMugvjKUslklZImiXpEEkHS7o+KF8r6ShJ+0v6gqSm3vrKV9r+jAAVFRWEw2GampqoqXGZpurq6lLuNgmFQu1/2M2bN9PS0kJjYyNbt26loaGBhx56CHB//OXLl1NQUMDEiRMpKCigpKSE8ePHA7DvvvsyZswYACZMmEBhYSHPP/88zzzzDAA33ngjmzdvpqKigpdectOZS5YsYdu2bYTD4fZ6W7dubb+wZPqCnAwvvPACq1atori4mJtuuon169dTWFjImjVusf6aNWvalf38+fNpbm5mx44d7d/Fa6+9xrp16wB44IEHkMSGDRvaL8wLFy5k/fr17Ny5k1tuuQWAl156iUWLFgFw1113UVlZSUVFBffee297n++++y4ACxYsoLGxkcrKSpYscZ79t956i02bNrXL1HZD8tprrxEKhfj5z39OfX09hx56KGPHjqWwsJCzzjoLSZxwwgmce+65AFx22WVMmzaN0aNHM2vWLAAOOuggCgoKuPnmm9tlXbDABZS1Ke2Kigref/99QqEQP/3pT6mvr+ftt9/mt7/9LQDLli1r/723tLQALkp30qRJAJx77rmUlZUxatQoZsyYATgFVltbS1NTEytWrABg+/bt7d/9smXLqKqq4u2332bXrl20tra23yT0FyRVS3pW0nWSPgVMBP6Im+e6P5OCxP0AvgmM7qVOKTAKWA6MDF6PAiYD7yQyXrYegwcPVraor6/Xzp07JUnbtm1TQ0ODmpqatHDhQknS2rVrtXz5cknSvffeq+3bt+vDDz/UnXfeKUn6y1/+ojfeeEOSdP3116ulpUUrV67UfffdJ0n685//rBUrVqipqUnf/e53JUmvv/66Hn744fb3165dq5qaGv3iF7+QJL388st6+umnJUm//e1vtXHjRu3YsUPXXnutJGn+/PmaP3++JOl3v/uddu3apS1btuiee+5Jy3e0ffv29s+4ePFibd26Va2trXryySclSevWrdPixYvV2tqq66+/Xq2trVqxYoUWLFggSXr44Ye1bt061dXV6Y477pAkLVu2TIsWLZIkPfjgg9q6dav27NmjRx55ROFwWNu3b1dVVZUkacuWLQqHw0l/jltuuUWS9M4772jBggVqbm7Wfffdp9WrV6u1tTUlY0QjFAqpoqJCklRVVaU9e/ZIkjZt2qSmpiY1NDRow4YNkqQNGzZo48aNkqSFCxeqsbFRO3bs0IsvvijJfW/r16+XJC1YsECtra3avn27li1bJklqbm5OStabb75ZW7duleT+G7W1taqpqdFdd90lSVqzZk27LLt371Y4HFYoFFIoFEpq3HhYuHBh+3f2zjvvJNweqFMOXO+iPXDJYecANwMvBo9f4wI1JmVMjjiFnRk8Hx7t0aXuFbjM5U3A2uD1ukBhXZ7AF1QOFGbj5PRFQc2dO1fbtm1TZWWlHn/8cUnSq6++2n4hveeee1RdXa3NmzfrkUcekSQ9/vjjeu211yRJ11xzjWpra7Vq1ap2ZfH4449r1apVampq0l//+ldJ0vr167Vy5UpJ7uLR0NCg5uZmbdmyJSF5w+GwGhoaJElNTU2qr6+XJFVUVKihoUGtra3asWOHJKmurk41NTWSpOrqarW0tLRfCHKdNqVSVVXV/nk2b96suro6tbS06P3335ck7dy5U9u3b5ekduXV2NjYfv7mz5/f/r3feOONqq+v16uvvqpnnnlGkjRv3jxt3rxZlZWVuvXWWyVJzz//vP7xj39Icoq9oqJCW7du1R133KHW1la9/fbbkqQ9e/Zo8+bNktzv5IMPPkj795Iv3Hbbbe3nrb8Rj4ICTgPeBdbgkrN2ff87uMwSK4CFqVIeuAQMS3C5V4tT0Wef5IhT2LnB86Ioj3/EaPNfCX4hBbgQyqdxmXM3Bc9vAzcB+/fSfr9AnreBVcAVQfl1uKi9N4PHZ3qTpS8KauvWrWppaVFdXV37hWzz5s3tF73Vq1erqalJdXV1Wrt2rSSppqamXUnU1dXlxQXf00FNTU279VFRUaGmpia1tLS0X1Crq6tVXV3d/n5zc7Oam5vbLeRozJs3T5s2bUq/8HlCf/4+elNQuMi9D4CpuCwRy4EDu9Q5Ebc5LMB/AA/20uf0QJGtDI4PAa6JUu9jgfJ7BBe9/SjwXeBYoKSnMVL5SDRZbKmkxt7KIt47GDgQ5/YDQNK9Mer+E3gOF2G3UsFeI0HE4Ik45fW4pD/HaD8eGC9pmZkNDb7UzwHnAbWSfhHv5ywvL5fPJOHJBrfffjtz5sxh7NiMrcjIae69916OO+44pk7NSOq3jGJm9ZJihmya2ceA6yR9Oji+CkDSDTHqzwJ+K+nYHvr8J/A94E5Js4KylZIO7kXWycCZOA/ZvpJKe6qfKhJdB/UKzq3XWxlmdi3wSZyCmg+cjluoG1VBAadIaulaKGkXTns/GuTXi4qkbQQ79kqqMbPVRMkY4fHkMn4dVGf6+VqoQWYWuY60T7n4IrgE6C0t0WBJr3cJ248akmhmM3Ebzn4cZzmNAF4DftfLGCkjLgVlZnvjvqyyQEu3fbphwOAYzeYAhwJvSLrIzMYBUa2fgElmNk7Sy13GPhbYLumDaAoshryTgVm4OP5jcYvZ/g3nU/0fSbujtPGpjjxZxyuozvRzBdWqFOTiAzCzC3HZHk7opWqlmU2jI+H3HIIb+y79VeJS072K2yHiZ0rz3k/RiNeC+jTwNVwWhpsjymuAq2O0aZAUNrNWMxuGm0/aL0ZdcBEiV0Uprw7eixrO3hUzG4KzuL4tqTrYl+rHuBPyY1wKj24Ja5VEqqPq6ur2UNt+h8JQsxlqNkD9TmjcBY2V0LALrABKR0LpqI4HYWisguY90LjbPQoKoXQ0DJ8MI/aHYZNgUOAhCLdCS717tNa555JhMHg8DIpxoQ63QlM1tDZA2V4dfXWTXW78um1QtxVqtwWvt0F9BQwqg7IxTq5hk2H4NBg+CQqzpyCampq8goqguLi4Pdw+lyksLGTmzLiyqiVCXLn4gkwSPwBOUO9LaC7DXedmmtkWXADbhVHqTZNUZWajAi9W5HhTJK1L4HP0mUTnoM5VnIttzex2nPL6EvA/uCzjbyrGNhxmtlhRNj4M3ntL0kfjGLMIl6T2GUk3R3l/MvBUb/7WROegtmzZwiuvvBJ3/ayjsLvIFxRBtxX6chf/+gpo+BDqP3QX7NKR7oI+aDAUDXavJackWhuhtd69NoPCkqBuGRSWuj5b6gKFtQuaq3AxMWHXR0ERFBa554IiCDVBS63rp6gcrBDCLa481AThkHuvYJAbt7AEioa4ugChZte+pc7VKR4KRUPdc/FQKB4Cg8pBrR1yNQXKtLnG1WlXuiOheHiU7yk9lJWVccYZZ7jMCQ27oHoDNFdD8TAYMRVKhmdEjlzh3XffbV+LlMuUlJRw1llnJdQmjjmoQbgNC0/GKabFwPmSVkXUmYULZDhNHTn54hm7HCiQVNNLvZeB09WxKeKBwEO9XUNTRUIKCsDMzgAOonPgw/Vd6hhuIm1TcDwZGCYp5i/NzN5XlJRJwXtrJO3fi1yG20pjl6RvR5SPD+anMLP/xm26+KWe+srrIInWRqjZ1PlRvbHjde1md1EuKofmWnfBGzzWPQoGQcWbzirZ95Ow3ydh3xNgaIqn8iR30S0odtZPtIt/OORk3bPWKaXiYU6uwWOhZERHm3DIfa49a6BqvbPWSkbA8Knugl48NDHZWhrgw+WwfTHsWOyeazbBmENh7yPdY6+DnNKu2wG1W6Bmo/uOqzc4mQtLnZzDJsOo6TByBoyc7izHgiiWQONuqFwFO4NH5Ur33NoIw6e4z95cBXs+cK9HH+zkGXOIex41M6tWX4/s+cCdF+Ss1WGToHRE/O0lqNse/GYHw+Bx0b/DPKQ3BRXU+QzOg9SWi+8nkbn4zOw54KN0uOk2SoqpKc3sp8CNkvYExyNx0x6xtk06A/hf3ALdGbgYggskvZnIZ+0riVpQv8PNOZ0I3IWbZ3pd0iVR6sZl9UTUvx8Xsj6vS/nXgVMlfbGX9sfhFpO9RccmilcDXwYOw7n41gPfbFNYschZBaWwuyi2XRC7PW9yF7IhE2DofsFjYsfrYRPde6UjnXsuHHIWTcOHrt9QE4w9DMr3zvYnzS2aqmHH0g6ltetdZ9ENHgvl+7iL7rCJHd91qNF9n9XrYNd7sPs92P2us0qH7uvqFw9z3/3u953VtteBTvGNPtg99joIhuzTWXm3uVsrVzol2vao3uAU4JhDOyuuwWMy/12FmmHzi7DuaVj7lFMsI2e4z1Ff4ZRV8RCnVEd9BEbNgGFTnJu1fB/3nex6B7a/DjuWwI5l7nOXjnA3VI273O9z5PSIPma6R9fvK8eJR0GlYcw32qL3IsqWSeoW6Bbx/udwSmoocK6k99IsZsfYCSqoFZIOiXgeAiyQ9Ikode/BhTwujrPvcbgM6c24EHFwk37FwDmStsctaJLkhIIKh9yFbcfSjkfFG+4ucujEjgvisImdFVH5OKd8PLlHS0NwMxG47Ur3ghHT3HlL5sLaUu8sropAYVWucM+DBjtlN3xKMMfW9jzZWTOpskTqK2DdAqeQNvzdKYupn4UpZ7gbnk5KVk7J7noHdq12v/GqdVC9Hmq3Oit5xP7OUh13JIw7orPiCbU4K3X3e66Pnas7+mqpdwpvxP7ue217Hj4NhoyP/r9Q2N0kbH8dPlwBDZWgEJSMdAp+6H7uexs+NZAjdf+tLCmoFcCRbXNVQVLtJZIO6lLvVjon+z4ZtyZrPYCkb2VE3gQV1OuSjjKz14DPAzuBVdHcb2b2Di6p7Aagjo697A/pZYwTgTb/5ipJ/4hbwBSRcQWlsLvT3rGkQxl9+Ka7iIw7InjMhnGHO+vH4+kNySnDnW87q6V6fYciqFrvLJGSEe4iXDbGKYbC0o65wMJip+AGlbmboqKhzh1cPMwFsYSaYNu/YP0zTjlMPMUppamfcZZlNmjY5SzVPR8ErsUPOl43VweKZlrgIZD7PiqWuc+099Ew9lAoHw+Ym5Os/7DDzVy11pUNneRcx8MmB5Zz8Dx8csKehywpqO/jAs7uDoouAp6UdGOXej1u2yG3s0XaSVRB/RC4FadNb8Np2HmKSP8eUXdStD4kbeibqJkjYQXV2uRcGYNKgzmVHu6ywiF3x1YRKKLtSwJlNDpQQoFCGns4lI1K/sN4PNGIdO82VLpHa1MQjNIM4WYX9NJS737bLbXuIt9U7dzIVuh+pxNPgX2Ph0ElvY+ZTZprnZLZsyawkuS8D2MOc9ZVPLTUOwXfNq9WvcFFt1atd27dr76VkEjZUFDBuKfjruHgdnx4Jkqdubg1Vc/1FkiRTuJWUGZWABwj6ZXguAQolVTVQ5vjgAMk3R3s1TQkXeGJZrYfbgJvHE5xzpV0S5CJ4kFcstr1wHnR1kFFkqiC2vTag/zzH39zlhByd6KDypyyKixxf4Zws/uTtEWnlQxzd27Fw93rgphrkD0DiLKyMj7/+c8P2P2PurJq1SreeOONbIvRK6WlpcyZMyehNtlSUPFgZkfjkiucjJt2eRb4m6SMbnGcqAXVbYKth7rX4uaQZkiabmb7AA/HSsMRpPVY3RZdkig9pDr6Gi6y72dmdiUwUlKPGycmqqBqa2vZvj2YIgs1O9dA/XZo2OkitMycIhq6n3MFFMVa2+wZ6DzwwANceeWVDBqUaJKX/slTTz1FYWEhBxwQNcA3ZygsLGTKlCkJtYkziu804BZcFN9dkn7W5f3jcVF+hwBfkvRIjH5eknScdd9Itm3qZVgPMuwFfAqnsA4BluGU1UO9fcZkSfRfsNDMzgUeU++a7RxcNodlAJK2BoqjG2b2Y1yM/7eBHqP1YtFDqqOzcSmXwIWhP0+Kd/YdMmQI++/fYxS8xxMXxcXFNDU1eQUV0NzczP777z8g/19mVoibSjkVl+ZosZk9KentiGobcTfh3+2pL0nHBc8JrrsASTtxe0DdH8h1JE5hpZ1EQ1K+CTwMNJtZtZnVmFl1jLrNgRJrS6nR053Cy8AxuEVpSdMl1dG4iLDy7TgXYLQ2l5rbMXLJANgt05Oj9PPUPgkzwFM/HQWskdtwtRl4AHfD3Y6k9cH60nC0DiIxs8IgeC0ugvqjI46LzewbwL2SfhL3p0iChG7TEtS+D5nZncCI4ENdDMyLVlHS34C/JSJLLKKkOoocR2YW1fJLJtWRx5MqvILqTD9XUKlOFtsjkkJm9q6ZTZS0sae6ZvYl4E6gztwu6j8B/oDzdF3QVxkSJSEFFWRruACYIunHQWDCeEmvd60r6Rdmdioul9504EeS/p4KoXuQrwinnO6T9FhQvKMtm0QwT1WRThk8nmTwCqoz/VxBpSxZbAKMBFaZ2eu45T8ARMk+cQ1whKQ1ZnY4LmnsHEl/zZyoic9B3Y4zJU/CJV6txflIo+bQw2V1KMO5+XqMwTQz621eq6c6gfL8PS7QIjIP35PAV4GfBc9P9DSGx5NNvILqTD9XUL0RV7LYBPlhnPWaFWQvDwLP3s+0coLEFdTRkg43szcAJO02s6i/niBF0Y+Af+AiRW41s+sl/SFG34vM7FHgiUjzM+j/OJxyWQT8MUb7Y4GvAG+ZWVueqKtxiukhM7sEt2j4vLg/rceTYUpKSryCimCAK6jFwAFmNgWnmL6E27g1YcysFPh3XPKEt4DfS+ppsn2smX0n4nhE5LGiJONOB4kqqJYgsqQt8GEMsSfnvgfMCiJA2kIVX8H5MaNxGm6e6v7ghOzBJaQtxMXg/1pSzAURkl4CYi0eOTlGuceTU3gLqjMDWUFJajWzy4Fn6EgWu6pLstgjcSniRgJnmtn/dU1bFHAP0ILLV3o6biPZK3oYfh4u916s44yQqIL6De7LGGdmP8Eli42aBReXBilyBXJNUBYVuW3jbwduD+aSRuP2lOrTuiiPJx8pKiryCiqCgaygACTNx+1IHln2o4jXi3Guv944sC15t5n9HugWN9BljP9LXNrUk2gU331mtpQOi+RzklbHqL4G+JeZPYGzuM4GVrSZiT2ZiHI75/aYcdzj6Y94C6qDUCiEpJzfrDBPaN+NPLDMeqxsZr/pUiSgElgUeKsyQl9WAw7GmZvCBUDE4oPg0UZbcELazEQz+wPwWaBCwYZaZnYd8A3gw6Da1cFdiceTc3gF1UGb9eTTPqWEQyPWrBpQFhzHyiSxlO6MAm4yswcl/TqNsraTaJj5j4Av4EK5DbjbzB6W9P+61s2SifhH4Le4nHyR/ErSLzIvjseTGMXFxdTUZC03Z04x0N17qURSQmZorGzl5vYEfAWXXintJGpBXQAcGswXYWY/A94EuikoM5sN/ACYFDlOb9ttJIOkF4IsEh5PXlJcXMzGjRt54YUXsi1K1qmvrx/wCiqOXHwluBvyI3Bz/F+UtD5d8khqyKRFm6iC2oqLrGsMjkuIHZd/Hy6SL3KH22xxuZn9G7AEt71xt2zmZnYpcCkw4P8Unuwxbdo0qqur8em23P/w+OOPz7YYWSPOXHyXALsl7R9kf/g5fcxnGoc8g3BLeTano/+oYyaYzfwvuEW5bRkhTsFFg2yGzrsstmXPTZ2occs4GXgqYg5qHG5yT7jFxeMlXdxTHzmxo67H4+nX9JbNPNjh4TpJnw6OrwKQdENEnWeCOq8GCmQ7MCaOZN69ydY16zlAA/BPXBq5rcn0Hy+JWlDPAAtxgrfiFs7G4lozuyuo39RWGJGCKCNI2tH22szmAU/11qa+vl5m1pBWwdx3n83bZD++H9+Pn93xy1KQi6+9ThCdVwXshbsp7zN9yXqeDuJSUIFm/iluIe0GXIDERNy2wVcHYeFduQiYCRTR4eITkFEF1ZaHLzg8B1jZWxtJiWZ5TxgzW5KFPFx+fD++H9+P3ytmdqGkPwevj5X0csR7l0v6bSbkiNeCugkXHj5Fwfa/ZjYM+EXw3rejtDlS0oyUSBknZnY/bu+n0Wa2GbgW+KSZHYZTjutxW4Z4PB5PrhNPLr62OpsDQ2I4PSRESIDvAH8OXt8KHB7x3sW4aOm0E6+C+iwwPdKvGWxl8R/AO0RXUK+Y2YFdJvTSiqQvRyn+fabG93g8nhQSTy6+tmTYr+Iy+/wj2fmnAIvxOtpx2ohXQSnahw72F4n1ZRwDvGlm63BzUG0LwtIWZp5nzO29ih/fj+/HH6jjx5OLD3cD/iczWwPswimxVKAYr6Mdp424oviC6L3HJN3bpfxC4Lwoe4lgZpOi9SVpQx9l9Xg8Hk8GMLN6XLo6A6YFrwmOp/YUfZhSOeJUUBNwwQ0NdKTAmI1LdXSOpKhroczsOOAASXcHmc+HSFqXEsk9Ho/HkxbMbCoQivW+pA3x7OGXtBwJroM6CWhL5f62pIU91L0Wp8RmSJpuZvsAD0s6NhmBPR6Px5NezOx5XEq7Hvfnk/THtMqRLgUYbBo4C1gmaVZQtsLPQXk8Hk9uE2xweDEuvV20/flu72l/vlTRl2zm8dIsSW1BFGaWEZ+lx+PxeJIjV/bnS+eC1IfM7E7cVsHfAJ7D7cro8Xg8njxBUoukbdnYPDadFtQY4BGgGpgB/AiXu8/j8Xg8nl5J5xzUMkmHdynzc1Aej8fjiYuUW1BBdon/BKaa2YqIt4YCL0dv5fF4PB5PZ1JuQZnZcGAkcANwZcRbNZJ2pVN7f4IAACAASURBVHQwj8fj8fRbelVQQbjhf+Ji3wW8BNzRtquux+PxeDzpIB4F9RBQQ0dm2/OBEZK+0EObMcA3gMl03u69x40CPR6Px+NpI545qIMlHRhxvMjMestQ/gTwIi60PGa6DI/H4/F4YhGPglpmZsdIeg3AzI4GlvTSZrCk7yctncfj8XgGLPG4+Fbj1jG15WOaCLyL2y456vYZZvb/gFckzU+tuB6Px+MZKMSjoKJum9FGtO0zzKwGKAeagZaOqhrWRzk9Ho/HM8CId7uNkbhthSMDHpalUa6UYmZ/wO0KXCHp4GzL4/F4PJ7eiceC+jHwNeADOnZSlKSTeml3FnB8cPi8pKeSE7XvmNnxQC1wr1dQHo/Hkx/Eo6DeBT4qqTnuTs1+BhwJ3BcUfRm3RfFVfRU0WcxsMvBUPApq9OjRmjx5cvKDhlvdI5cwg8KS7uWhJohn0XbM9s2gcOeygkIoKOpSUdDa1L19YRFYYeeyWN/foNL4xo/FoBLcxqB9IQH5E2kfDSuAwuKEpMsIyf5WohFugXCXgN9E2idCNPkLBrlHBlm6dGmlpDEZHTTPiOeMrARGABUJ9PsZ4DDJXTHM7B7gDSBrCqo3zOxS4FKAiRMnsmRJb4GKcfD7/d2frrDrRTqL1G6Fzz4IU8/oKNu9Bv54IAyfHF/70/8MB3yuo6x6g/usw6d0rlu1Hq5ocIqqjaW/glevh8ER/8vmWtjn43DWI53bP/AJ13ekQqrbDqf8Dj5yfkdZfQX8bh8YMbV3+et2wEm/gYO+2nvdaLz1B3j+v6F8XEdZSz2M+gh84e+9t3/7T7DwMijfu/e6e9bCN7d0HivbrHkS5p8PQ/bpvW7VevjqWzBqRu91bx0Gg8c6pdxG9Qa4cCmMTqHTY9M/4bHTYei+HWXhVqewvpHZzb7NrNv8vacz8SioG4A3zGwl0H7rJ+msXtqNANpSGw3vm3iZQ9JcYC7A7NmzU5P/qbURzn+t858h2zzxeXcHGUmoEUZOh6+t7L39X7/YvX1rIwyfChe/27n8V0WBVVPYue6h/w6fuKGj7IO/woq53cdqbYSzHoW9j+wo+9tFUcZvgvLxcPF7vcv/7KXd2ydCqBE+cgGccntH2eYX4KVr4mvf2ggzvgifimPnmbkTk5M1HYQaYcrpcObDvde956Pxy9/aCF9dFVi3AX86PPWfP9QIE46DOc92lNVuhT/PTu04npQQj4K6B/g58BYQpw+lXaktwvlSjqdzXr6BgcL03ZWUJsy6u8IUduXJtI/6OeOtG6VeQnWTlD8REpE/7vaxSFLWdJAu+aP+BtPw+ZM9f56MEo+Cqpf0m0Q6lXR/sKd9263v9yVtT1S4/EedXRa5gBV0978rATmtgI5YmfYOorePt27Uej3UTVb+ZBIkRxsrpvxRO0jyu84yafuu4zzXyZL0+fNkknh+aS+a2Q1m9jEzO7ztEa2imc0Mng8HxgObg8c+sdpkAjO7H3gVmGFmm83skowMnIhlkjESsYASaB/tc8ZrrcWyaqLJZUY3Qz6jVkmSd/rJWqtZJ8HvOl6nS6xznQ4LKt7fnyfrxGNBzQqej4koExAtzPw7uECDX0Z5L1abtCPpy9kYN6G7zUyRiAUUb/uYnzNG3a73RTHvlOO9q86gVZLsHXi0zx+LdFgQyZJOazUjlk00+S33vmcPEIeCknRivJ1JujR4eXrX7TiCbTsij2uI/usz+k3WCZFzc1BR/4wJyGkJtI9VN5oFEu2noDjrRq0Xi2QvRlE+a9TP2UP7RGTNOddTAvJbnPK3fXdRLdM0uPi6nT/v4stVelVQZvajaOWSru+h2StAV5depzJJQ+MRMK/J1SCJaC6yhC7w8baPVbfreqEEXXzJuCijff5ESHYyP1FZc831lA53asw6PkhioBOPi68u4nUpLmXQ6mgVzWxvYAJQZmaz6PglDAMG9zSImY0N+gdA0sYequcJOeriS3WQRCy3VTqCJEiBiy/ZIIloLsp0BElE/axZJi0uvkSCbJLFB0nkE/G4+DrNJ5nZL4BnYlT/NC4t0r7AzRHlNcDV0RoEKZF+CeyDWww8CacAD+pNtpwnIddTpojmdknEFRmv247orq9o30ksF1msulEVZKbcZgnIH7V5GlxkGSVdLr4Yv5+0KKhkXLSeTNKX3B6DcQqoG5LuAe4xs3MlPRpnfz/GBWA8J2mWmZ0IXNgHuXIPhXPUgormdkvkrjjO9vHWjVavrW7UgIpo66CSkD8Ron7WBPpM9rvONtHOSUzilD+R30+yRB3Lu/hylXjmoN6i4zamEBgD9DT/hKRHzewMnBUU6baL1q5F0k4zKzCzAkmLzOzXMWTZF/gS8AmcxdWAS8X0NLCgLbVS7pCDQRKxrJqE5nDitcDirRvrTjkHgySiTrIncqefSWsvDSRqAcbt4os1h+mDJAYy8VhQn4143QrskNRjBlQz+x3O0joRuAuYA7weo/oeMxsCvADcZ2YVdJ73auvzbtz81lO4zBYVOOU3HTgN+IGZXSnphTg+U2bIp3VQmQySiHe9S7qCJLK9DiqfgyTSsQ4q1nfi10ENeOKZg9oQLLI9Dneb8RIu8WtPfFzSIWa2QtL/mdkvgQUx6p4NNAL/DVyAy9sXzdL6paRoyeJWAo+ZWTFut9/cYaCvg4q77gBbB5WpgI500B+CJLq5KHPQUvUA8YeZfwF4LCj6o5k9LOn/9dCsIXiuN7N9gJ24zBLdkBRpLd0Tq8MYyiny/WZgTU91Mk8OuvhycR1ULBeZXwdF7l04MxgkkS4XX7ffXw7eCHiA+Fx8FwCHti28DfZ6ehPoSUE9ZWYjgJuAZbhf6V3RKprZ53Euu7G4X6lBP1moO5DWQWUzWWx/XQeVi5P36VoHFctFnMy5iopfB5VPxKOgtuLmetoyQ5QAW3pqIOnHwctHzewpoFRSVYzqNwJnSoq6tiq/yVEXXzrWQWUzWWx/XQeVi5P3mXbxZSRZbC5aqh6IT0FVAavM7O+4s3gq8LqZ/QZA0rfaKgbWUFTMDEmPRXlrR1+Vk5ntndNZ0v06qBjKMMXroDIaGRfD7ZiudVA553pKNuIzWpfZXgflXXy5SjwK6vHg0cbzPdQ9s4f3RMc8ViRLzOxB4C903hAxWt2uzKd7SqXcob+ug4rqIoyRLDYT66AyubZI4e5bg0f7Tnpqn+/roBLKhOHXQXn6TjxRfDEDF6LUvagPMgwD6oFPRXZFdGXWlVwzT7qQJ0ESia6DSiRIor+tg0pFkEQ+r4NKR5BE1tdB5eL37IG+ZZLolUQSzPZRqbURx77ZWSQX10FFXfPh10HFjU8WS1qCJLK5Dqpt7Jx0yQ9s0qKgSCzBbLTdequAJZKe6GkQSbf3WcKMkWM/+IT2c0qyfa6ug+p5nXnPZHId1EBPFhttP7GkiRbkYhHv5dj/dYATzzqoL0h6uLeySBJMMFsKzATa+jsXWAccamYnSvp2bzLmJDH3uMk2aQqSyPY6qEwGSWRqHVROup4yuA4qHUEiMa2kYKxc+7sOcOKxoK6iQ3n0VNYTMRPMAocAx0oKAZjZHcCLuMwVbyUwRm6Ri2ugILaLLJltyHNhHVSmtlFPdj8hvw4qep9ZXQdFREBG173KPNkkpoIys9OBzwATurjhhuFy8sUkwQSzI4EhOLceQDkwSlLIzNqj+szMpJ5vp+KpkzlycA0URHe7JOviSyhVTSrWQXW9aGV4HVRS+wn5dVBROo39+8nEOijIUWvV05MFtRVYApwFLI0or8HlzeuJRBLM3gi8aWbP425tjgd+amblwHMR9RaZ2aPAE5GbGQY5+I4DvgosAv7Yi2yZIRcDJID4rZoE28dcBxVH3UwGSSRrlaQiSCJT1l466JdBEpCT1qontoKStBxYbmb/n6SWeDozs1HBy5oubw0LFuruijLO781sPnBUUHS1pK3B6+9FVD0NuBi438ymAHtw81eFwLPAryX1lsQ2c+RiolhIzAKKt33MzxrDWou6timDQRJJL9RNMkgi3v2UcnIBaRq+64QykSRLBq01T9LEMwc12cxuAA6k895OU6PUXUrHLPJEYHfwegSwEZjSVtHMZkp6J8iUDrApeN47yBCxLLLjIBfg7cDtZlYEjAYaJO2J4zNkgVyNCMqBdVDxBD60ydUf10H5ZLHd+8xkstiMZa3wJEs8Cupu4FrgV7j9nS4ixi2gpCkAZjYPeFzS/OD4dOBzXap/B7gUt917excRr0+KJVBg0W2LQ/bskasuvlxcB5VIkIRfB5VdMh0kkQ4XX1T5czBrhycuX0OZpIWASdog6TrgjF7aHNOmnAAkLQA+HllB0qXByzuAsyWdiJtDqgK+G6f8OUwCrpxMkq51UNE+azqCJKKuDcqgi8+vg8pskETGXHzegspF4vmlNZlZAfC+mV1uZufgou56YquZXWNmk4PHD3BBF9G4RlK1mR2Hs5ruwimtlGJmp5nZu2a2xsyuTHX/3cjZVelpWgeVzWSxfh1UBsnwOqhMJIt1g+XezYAnLgV1BW4d07eAI4ALcRFzPfFlXGh5W6LZsUFZNELB8xnAPElPA8VxyBU3ZlYI3AacjptL+7KZHZjKMbqRi4ligbQkW00k2WeiyWLjqpvhZLFRrboBlCw2bs9AviSLTdNYnqSJJ1nsYgAzC8ebNy+I1rvCzIa6Q9X2UH2Lmd2J28bj52ZWQup9Y0cBayStBTCzB3Bbzb+dkt5DzdBS17msuZqcDJIwg5Z6aNzdUdZcS0LzIt3a18Rob9Bc1bluqClKXYNwa+d6QMzN5Vobuo+fyF191/aJ0NrYXSYzUCi+PkNR2sfE3O+or7Kmg9YGGFQWX10zd256k79pT2yrprk2tZ+/pZ6Y1lrTbvpusRmUjkhCME804kl19DHg9zi33kQzOxT4pqT/7KHNR4F7gVHBcSXw1Rjbtp+HCyH/haQ9ZjaezuHlqWACHVGCAJuBo7vIfCkuaIOJEycm1vuGv8P8C7uXj5iWWD+ZYMi+8OKVsLyLF/WjX4+//T//B97qskHyQV/rXnf4FPjL2Z3LzOCQSzuXlQx3iuuuLoGhZWOhoKjL+BPgXz+B1fd1Lp9xXpzyT4BXroN3H4qvfjSmdpmCHTQYisq7yx+LU++Mr97wyfD3byYkWkY4/qb46g2bBC/8r3v0xt5HRm//0tXukUqO+WH3shH7w5+S2LmnbDRc8n7f23uiYr0lXjCzfwFzgCclzQrKVko6uIc2rwA/kLQoOP4k8FNJH4/VJp2Y2RzgNElfD46/Ahwt6fJo9WfPnq0lS5ZkUkSPxzPAMLOlkmZnW45cJq5s5pI2WWcTPBSrbkB5m3IK2j8fZIbIFluA/SKO96WHbeuXLl1aaWYb0izTaKAyzWP48f34fvzcHX9SFsfPC+JRUJvM7OOAggWyVxBj64wI1prZD4E/BccXAmv7LmbSLAYOCDJQbAG+BJwfq7KkMekWyMyWZPPuyY/vx/fjZ338ydkaP1+IJxjh34HLcPM4W4DDguOeuBgXxfdo8BiNW+CbFYI8gJfjtvxYDTwkaVW25PF4PB5P78QTxVcJXJBgv9NwLrWCYIyTcWucDklUwFQRLBye32tFj8fj8eQEPW23cSs9xFxK+lYP/d6HywaxktRv6NJfmOvH9+P78f34ntjEjOIzs8jFuP+Hy8fXjqR7YnZq9pKk41Iiocfj8XgGJL2GmQOY2RttIeZxdWp2Mi5zxEKgfdNBSY/1RUiPx+PxDDziCjMn8eXVFwEzgSI6XHwCvILyeDweT1zEq6AS5UhJM9LUt8fj8XgGAD0FSdTQYTkNNrPqtrdw+fWG9dDvK2Z2oKTU5LrzeDwez4AjrjmohDs1W40LNV+Hm4NqU2pZCzP3eDweT36RLgUVNYWHpHSnD/J4PB5PPyEtCsrj8Xg8nmTJwR31PB6Px+PxCsrj8Xg8OYpXUB6Px+PJSbyC8ng8Hk9O4hWUx+PxeHISr6A8Ho/Hk5N4BeXxeDyenMQrKI/H4/HkJF5BeTwejycnGRAKysz+YGYVZrYy27J4PB6PJz6ynurIzK4A7gZqgLuAWcCVkp5N4RjHA7XAvZIO7q3+6NGjNXny5FQN7/F4PN1YunRppaQx2ZYjl0nXflCJcLGkW8zs08BI4CvAn4CUKShJL5jZ5HjrT548mSVLlqRq+E48+ijceWdauk45e+0F99+f2j7//ne46abu5WecAVdc0bnsuuvglVf6PtaQIfDgg1BU1Lf2N9wAixb1fXzPwGH4cHj44cTamJlPnt0LuaCgLHj+DPAnSavMzHpqkBYhzC4FLgWYOHFi2sZ55RWYPBnmzEnbECnj059OvYJasgRGjoRLLukoe+01eO657grq6afh3/4NZvRx68tzzoH6enfx6At/+xuceSYc4jeJ8fRCcXG2Jeif5IKCWmpmzwJTgKvMbCgd28RnDElzgbkAs2fPTpvfU4KZM+FTn0rXCKlFglTeLkgwbVrnz9/cDK+/Hr3uxz4Gs2f3bayiItdHX5HgqKPg+OP73ofH4+k7uaCgLgEOA9ZKqjezvYCLsixT2giHU3vBTydmTt7CwtT1Ge3zt40TT91EiNVvvOTTufJ4+iNZU1BmdniXoqlZ8OxlHAkK8iR2sqAgOQskGtE+f6xxkv2ukpU/n86Vx9MfyaYF9cvguRQ4AliBm486BFgCfCxVA5nZ/cAngdFmthm4VtLvU9V/IqTaZZZOzNKjoKJZULEUVLIWVLIKKl/OlcfTH8magpJ0IoCZPQYcIemt4Phg4LoUj/XlVPaXDPnkNkrWRRYN7+LzeDzxkgsOjBltyglA0krgI1mUJ63kk9vIu/jy51x5PP2RXAiSeMvM7gL+HBxfgHP39UvyyW3kXXz5c648nv5ILiiorwH/AbStgnkBuCNr0qSZcDh/7soLCtLj4otmQcVy8SVrQSXr4suXc+Xx9EeyqqDMrBBYEMxH/SqbsmSKfLor9xZU/pwrj6c/ktX7Q0khIGxmfVzrn3/k08S7D5LIn3Pl8fRHcsHFV4ubh/o7UNdWKOlb2RMpfeTTxLsPksifc+Xx9EdyQUE9FjwGBPnkNvIuvvw5Vx5PfyTrCkrSPdmWIZPkk9vIu/jy51x5PP2RrCsoMzsAuAE4EJdVAgBJU2PUnw18AtgHaABWAn+XtDv90iZPPrmNvIsvf86Vx9MfyYW/3924sPJW4ETgXjrWRLVjZheZ2TLgKqAMeBeoAI4DnjOze8wsfftkpIh8uiv3FlT+nCuPpz+SdQsKKJO00MxM0gbgOjNbCvyoS73BwLGSGqJ1YmaHAQcAG9MrbnLk0125t6Dy51x5PP2RXFBQTWZWALxvZpcDW4AhXStJuq2nTiS9mSb5Uko+Tbz7IIn8OVceT38kF+4Pr8BZR9/CZTW/EPhqPA0Dl19ekU9uI+/iy59z5fH0R3LBgtolqRa3HirRjQrz7vKRT24j7+LLn3Pl8fRHckFB/cHM9gUWAy8CL0RmN28jmKPqerl5Oo46OUU+uY28iy9/zpXH0x/J+v2hpBNw22vcCowAnjazXVGqLjKz/4qM1JN0jZkVm9lJZnYPcboGs0k+JSD1yWLz51x5PP2RrFtQZnYcbl3TJ3AK6imcJdWV04CLgfvNbAqwB7duqhB4Fvi1pDcyInQS5NNdubeg8udceTz9kawrKOB5YCluse58Sc3RKklqBG4HbjezImA00CBpT6YETQX5NPHugyTy51x5PP2RXFBQo4FjgeOBb5lZGHhV0g9jNZDUAmzLkHwpJZ8m3n2QRP6cK4+nP5J1BSVpj5mtBfYD9gU+DhRlV6r0kU9uI+/iy59z5fH0R7KuoALl9A7wEi7l0UWx3Hz9gXxyG3kXX/6cK4+nP5J1BQXsLynFl8HcJZ/cRt7Flz/nyuPpj+TC329/M1toZisBzOwQM7sm20Kli3xyG3kXX/6cK4+nP5ILCmoeLkN5C4CkFcCXsipRGsmntTV+HVT+nCuPpz+SC3+/wZJe71LWmupBzOw0M3vXzNaY2ZWp7j9e8umu3FtQ+XOuPJ7+SC4oqEozmwYIwMzmkOIQcjMrBG4DTsdtjPhlMzswlWPESz5NvPsgifw5Vx5PfyQXgiQuA+YCM81sC7AOuCDFYxwFrJG0FsDMHgDOBt5ORedLlsDNN8dXd/ny/HEbFRbC974HI0akrs9XXoHTT+8+zo4dcP75ncubmpL7rgoL4dprYcyYvrWvqcmfc+Xx9EeyrqACpXGKmZXjLLp63BzUhhQOMwHYFHG8GTg6soKZXQpcCjBxYmIb844bB5/9bHx1zzoLPvGJhLrPGnPnwrp1qe3zzDO7K6iJE+Hee51CiuT886GsrO9j/frX8N57fW9/3nkwalTf23s8nuSwbCX/NrNhOOtpAvAE8Fxw/D/ACklnp3CsOcBpkr4eHH8FOFrS5dHqz549W0uWLEnV8B6Px9MNM1sqaXa25chlsmlB/QnYDbwKfAP4AW5/p3PSsDvuFlymijb2DcqisnTp0kozS6UFF43RQGWax/Dj+/H9+Lk7/qQsjp8XZNOCekvSR4PXhbjAiIlBUthUjzUIeA84GaeYFgPnS1qV6rESkGlJNu+e/Ph+fD/+wB0/X8imBdXS9kJSyMw2p0M5Bf23mtnlwDO47Tn+kE3l5PF4PJ7eyaaCOtTMqoPXBpQFxwZI0rBUDiZpPjA/lX16PB6PJ31kTUFJKszW2DnCXD++H9+P78f3xCZrc1Aej8fj8fSEX4bo8Xg8npzEKyiPx+Px5CReQXk8Ho8nJ/EKyuPxeDw5iVdQHo/H48lJvILyeDweT07iFZTH4/F4cpKMKygzm25mC81sZXB8iJldk2k5PB6Px5PbZMOCmgdcRZCLT9IK3P5PHo/H4/G0kw0FNVjS613KWrMgh8fj8XhymGwoqEozmwYI2jcT3JYFOTwej8eTw2Q8F5+ZTcUlSvw4bsPCdcAFktK9QaDH4/F48oiMZjMPNib8T0mnmFk5UCCpJpMyeDwejyc/yKiCCjYmPC54XZfJsT0ej8fTd8xsOvA93Fb17bpD0klpGzMLLr47gAnAw0C7kpL0WEYF8Xg8Hk/cmNly4HfAUiDUVi5pabrGzMaGhaXATiBS6wrwCsrj8Xhyl1ZJd2RyQL9hocfj8Xh6xcyuAyqAx4GmtnJJu9I2ZioUlJn9SdJXeisLyu8mCDGPRNLFSQuSIgoKClRWVpZtMbJGYaFAEApbtkXx5DECFAYz9/B0pr6+XpLyJt2cma2LUixJU9M1ZqpcfAdFHgTRekfEqPtUxOtS4Bxga4rkSAllZWXU1Q3gGI6tX4OCobD3rdmWxJNnNDSFWbq6kUVL63n97Qb2GT2IHbtCnHfyUL786WGY11TtmFlDtmVIBElTMj1mUgrKzK4CrgbKzKy6rRhoxq116oakR7v0cT/wUjJyeFKIBPXPQuGYbEviyTF2VYVYvLqB1eua2VkVorFZDCrssI4amsR7G5uZMamYTx4+mG99cSTDhxSyfWcr/zevkordIb71xZEUFHRXUpLYVNHK+xub2bi9hW2VreyuCVNSbEweX8QxB5dx4JTiqG09mcHMioD/AI4Pip4H7pTUkrYxU+Tiu0HSVX1sOwN4WtL+SQuSIsrLyzVgLajGt2DzmRDeBdPWQ+GobEvkyTJNzWHmPVHFc6/XMWtGKQdPLWbMyEGUlRitIadcAIoGGQdOKaG8rLvXqq4hzNV3fMj4vQbxvQtHUVhohMLinfXNvPhmPS++2UAoJGZOLmby+CLGjx7EqGGF7UrvlRUNtLSKOScN5fSPD6G4KP8VlZnVSyrPthzxYmZ3AUXAPUHRV4CQpK+nbcxUBUmY2QS6x8e/EKVeDZ3noLYDV3W1rLLJgFZQu2+DxjegZSOMvAyGnp1tiTxZpKo2xA/u+JCxIwdx+XkjGTWssM99NTaHuW6us6Qmjy/ijfcaGTWskOMOLeMThw1m2r5FMV2Aklj5QRP3P1vN+m0tXHLWCE48YnBeW1R5qKCWSzq0t7KUjpkiC+pnuIzkb9MRHy9JZyXdeRYY0Apq61dh8HHQWgGhnTDu5mxL5MkSobC48rcfMnHvQVw2J7prLlEksWJNE1srWzl8RinjRiU+y7D8vUbueGwP5WXG97+yF2P70EcukIcKahnwBUkfBMdTgUckHZ62MVOkoN4FDpHUFEfdhZJO7q0smwxoBbVuFuw9F0KVsOtXMPHZbEvkyRL3zq/izfcauem/xlJYmFuWSigk7n+2mscW1fDv547k1KMG510ARh4qqJOBu4G1uFiDScBFkhala8xU3XqsxfkmYyooMysFBgOjzWwk7gMCDMNllvDkAi0boGgKFA6Hlg+yLY0nS2zc0cLjz9cw7+q9c045ARQWGheePpyjDirjxj/t5OmXaznnhCHMmlHK8CF9d0MOFMysAJgj6aF420haaGYHADOConfjMUqSIVUW1KPAocBCOi/g+lZEnSuAbwP7AFvoUFDVwDxJv41zrHKgUVKo18p9ZMBaUKFqWDMeptcCLfDeUPfairItmSeDSOJ/b/2Qow8uZc5Jw7ItTq+0hsQLb9Tzt1frWL2uiWHlBcycXMKMScXMnFzMAfsVU1aSe8uNsm1BmdkSSbPjqHeSpH+Y2eejvZ/ONHWpsqCeDB4xkXQLcIuZ/ZekuBfYBJr+S8AFwJE4BVhiZpXA07gwxzV9ltzTQcsGKJocxA0Xw6B9XFlxzgRYejLAP99oYHd1iHNOGJptUeJiUKFx0uxyTppdTjgsNu1o5d0NTaze0MyipfWs39rChDGDmDm5mJmTS5g5qZjJ+xQxKActw0jM7A/AZ4EKSQcHZaOAB4HJwHrgPEm7zfk3bwE+A9QDX5O0rJchnjOz7wb9ReZF7ZoZ4gTgH8CZUfpIa5q6VEbxlQETJb0bR92DgQNxC3UBkHRvjLr/BJ4DyK7NLQAAIABJREFUngBWSgoH5aOAE4Hzgccl/TnpDxEwYC2omr/Cnjtgv/nueOMpMOp7MOTT2ZXLkzEamsJcdP02rvrqXhw6vbT3BnlAc4tYu6WZdzY08+6GZt5Z38SO3SGmTShqV1gzJxWzz5hBGZ3H6s2CMrPjgVrg3ggFdSOwS9LPzOxKYKSk75vZZ4D/wimoo4FbJB3dy/gJZYYwsymS1vVWlkpSYkGZ2ZnAL4BiYIqZHQZcHy2Kz8yuBT6JU1DzgdNxC3WjKijglGgLwQIt/yjwaLCAzJMsrRugaFLHcdFUaFmbPXk8Gef+Z6s5aFpJv1FOAMVF5hTR5JL2srqGMO9tdErrxTfrmfeXPTQ2u3VYpx5VzicOG5zQWqsPd7cyZmRqowklvWBmk7sUn427foJbj/Q88P2g/F45i+M1MxthZuMlxdytvA+ZIR4FukbsPULsrEHt9HVqJlXf6HXAUbgvC0lvBiGI0ZiDm696Q9JFZjYO6Mn6mWRm4yS9HFloZscC2yV9kM6VzAOKlvXOxddG0STn4kuQUFisXtfMaysb+NeqBuobwkzdt5gLPj2s00XCk1tsrWzlyRdqmXf13tkWJe2UlxUwa0Yps2Z0KOJdVSGWv9/I0y/Xcvuju/nMx4bw2U8M6TEUfsWaRu59uordNWHmXr03hYmF4g8ysyURx3MlRc3AE8G4CKWzHRgXvJ4AbIqotzkoi6mgID5vlpnNxKWzG95lHmpYZLsubVIyNZMqBdUiqaqLeRyOUbdBUtjMWs1sGC477n499P1rIFqWiurgvWh+UU9faNkApRFzpkWTofapmNUjqakPs/jtBl5b2cDrqxoZPaKQYw4u44ovjmLk0AKWvtPID++sZNb0Ei45e0Sf1r940kc4LG6+bydfPHVYyi2BfGHU8EJOnF3OibPL2bijhSdfqOWbN2xnxqRiDp5awqTxRQwdXEBjs9jyYQsLF9dTUx/mgtOGcepR5YkqJ3DbV/QapBALSTKzPs/RJODNmoGbCxtB5+ttDfCNGN0vwk3NXEX0qZmfm1mvUzOp+iWuMrPzgcIgDPFbwCsx6i4xsxHAPNzGV7XAqz30PU7SW10LJb0Vxfz1JENUC2p91KqS2Li9lVffckppzeZmDtm/hGMOLuOSs7oroAljizj1qHIefK6ab96wnaMPKuWUo8o5bHopRYNiZw/YVNFKY5MoLjLKSowxIwrzOntArvLY8zU0tYjzTs6PwIh0M3FcEZd/YSSXnDWcxW838s6GZp79Vx219WHKSozRIwq5+MzhHD6jNNNh+DvaXHdmNh53gw8uMjryRn/foKwn4vJmSXoCeMLMPiapp2t1JJ+VVBOlr4SmZlIVZj4Y+AHwKVz4+DPAjyU1dqlnwL6SNgXHk4Fhklb00Pf7kg6I8d6adOTwG7BBEu+PhSnLYdB4d9yyGTYcBft3JJvfUtHCglfreOGNepqaxcc+WsYxHy1j1vQSSorjC+Wtqg3x7L/qeH5pPZsrWjn64FKOPqiMyeOLaGoRa7e08MZ7jbz5XiNFg4xhgwtoaRW1DaK+McyUfVzy0LNPGMrQwbkXPpxvrNvazHd+XcFt/7s3+4wemNZTNognzDy4Rj4VESRxE7AzIkhilKT/NbMzgMvpCJL4jaSjeun7dUlHmdlSnFVTA6yWNDNG/VLgEpy7L9Il2G2rJDPbjIvs/l1P1/feyMaW729J+mgC9e8H/iFpXpfyrwOnSvpiqmUckAoqXAfv7wXT68GCi75C8N5gOKCKzZWF3P3XKt54t5FTjy7npNmDmT6xOOmopw/3tPLy8gaWvdPIph0tDC4tYMLYQcyaXsph00sZ3+WCWVsf5v1NzTz3eh2vvNXAF04eypyThvWL5KHZoLlFXHbTds45YSifOXZItsUZUMQRxXc/zgU3GtgBXAv8BXgImAhswIWZ///tnXl8lNW5+L/PTBYSkpAAIWEXISTsmwhWvQpqrbttwd5We70WRNHbqz+KYn9a+XFdauvyU6sUuODSCqiVulyLBanUqpVeZDVAkH01IYRAAlln5rl/nHe8Q5hJQjKZmSTn+/m8n3mXc85z3kneed5zzrMcc17+XwC+gzEzv01VvwjWbkD7czHZKP4Z+BlmNmujqt4WovwfgAKM5fR/YNaXtqnqPUHKdnTK3Q7UAvOBN87WsbdZCkpEnlXVe0XkvwiehDCYFd+rwAuquraRMrIwGRxrMFOCAOdhLAa/q6qFTe1/KNqlgqreBgdvgP5fnXbat7M/i/NfZ9ln3Zl8WSrfm5AaM06PB4pqWfD2cXYfquEn16fzreFJMdO31sL8P5ZysNjDf0zr2upCBbV2ou2oG0gjZ7M2qOooEdmsqsOdKbpPVHV8A22PBO4ArsT8ls9T1R2N6Vdzx/O/dz6fOos644CbRWQfxjlMMOt9w4MVVtUi4FsiMgEY6pz+k6p+1MQ+W4Lhd9INYONXVcQdziZR9vPq7NExF0Kmd1Y8j9yZyfrtVSz+8wmeWXKMzHQ3GWluOqe5ycxw0y0jjt5ZcQzskxBz/Y82n26sYPX6CubNyrbKqR0hIiGDu4rI6HocfP3W0scd679CoFuIdgZgrPxSnc9PgV3AVExEoUY9jM1SUKq6zvn8WEQSgDzMSGq7qtaEqNYkr08nIGGTgxKKyF7MHKuXZlrPtElq937jA6WqvP3XkyxecYJ5t/Zn6MhSiOEf99G5HRid24HKKh+Fx0yiu2MnvBQf97K/sJZPN1aw40ANHZNc5PQxTpljByeR0zt0eoe2zoGiWp5ZcozH7sokPTV2/7aWFuHpeq4pMDHEtQVOHNVfYNaXUoCHQ5T9CmOk8TZQipk+LAcecT4bRbgcda8B5mE0pGCcde9Q1Q/qllXVfSJyEZCjqi+LSCbmRiPBBFU9GiFZrQtnBKWqzH/7OGu3VvHCzGwy6R/Ski/WSOrgol+PBIJ5H/p8ytdHPew4UMOWPTU8+tJRKquNkce4oUbBRWJ60OtVjp/0UVrmJSFB6JzmpmMHiaiiPFbm5YEXi7n9xnQGWb+0doeqTmhivYXO7sdAKD9XP6Mx03oTgNcxTsRFZyszXFZ8BRizwp3OcX/MNNwZ1iCO7f15QK6qDhSRHsAfVPXCEG1fgFmIO97MPu4FzmuMgmrKGpTXq1TWKClBsom2Cg79EO14NfM/upb126t46t+7kdbRDSdehVOroMfvG26jlXGgqJY1+cZMfvu+Goacm8i4IcYq8Wyt2bxe5esSj0lVXublWJmP0nIvx8q8lJZ5zaiuzMvJCh9pKS4yUtzUeJSSE15U+WY6smu6m24Zbrp1jqNH1zh6ZsaRmeEOmwI7WeHjvuePMG5oB/712vSwtGlpGtFeg5JGpnAXkVtU9TURmRGsHVUNmTQuwFjiJxhH4nlnszwTLpvS8jpewbsJPYz7LjAKWA+gqodFJKjzhYg8AqzFzFk211pPgZWOY9v8uh7bIjINmAaQkJBwVg3/Y0slsxccxe2C8UOTuPWaTvTJDm3i7/OZt+haj6IKPoWkRCElyRXSJ6jF8exj+dqupysngLjQvlCtnd5Z8fTOimfyZWmcrPSxblsVa/IrWbziBKlJLsYNTWL80CSGDUgMGli01qNs2F7F6nUV/H1zJSlJYlKVd3KTkWrWwfr3SiAj1UXnNLM21inFdYZDZ0WVjyOlXopLPd985u+qZuU/TnGwqJYemXHc/+Mu9M5qXkSv0nIvs35zhOEDErn1mk7NasvSJvgtJk3SXOf4x865uinc/Uq0KU5yPkwc1dWYqcO5IqKqOqgxlZtrxecPe3EFJnnVmxhFMBnYr6p3Banjt71fr6qjHQ37eTAjCRH5Dka7e1X1F03uqGmrp6oeEpFuwIfATzVISno4+xFUTa3iU8Xrhf/65CSvf1jGj65M4/sTU7/5MTpZ6ePTjRV89EUFX+6spkOikBAnuJwBV1WNcrLCR5xb6JkZR/9e8YwdnMS4oUkR8fWp2tqd+957j0fuHn36mkTNHth/CQzY3+J9iBV8PuWr/U6opvwqDhXXMrR/Ilmd4+iUYiIJHC72sOGrKvpmx3PpmGQuGZ1MZnr4fYh8PuWdj0+yZMUJnr4ni77dm6ak1m6t5MnXjnH1tzpy6zWd2u3aWywRAyOoFkvhLiKlGF1wChP1pwwzaCkDyoL5TgVtp5kK6uX6rgezpxcT3j0Ho9R+iRn6LdGzSMHRXETk/wEnVTWo9WFzzcwPH/Xw5O9LKK/wMSavA7sP1bJtbzWjBnbgsrHJnD8kieQOZyodVaWyWjl4xMNX+2v4R34lX+6q5qc3ZXDZ2Jb7P957qIwepd34uvNx+vaoE1pLa2F7R8g91W7zQh074SV/dzVHj3spO+WlQ4KLruluxgzqQEaEDAxW/uMUi949zv+fkVXv9GNltY+iY14KSzwUlXgoLPGwZU8NxaUeZt7ShTF5bScIbGsnBhRUo1K4i8jz9bWjAXn/Auq4/OGN6pEv2oACirijLoCIXIGJOgGwUlU/bGF5HQGXqpY7+x9ioq3/OVj5cPhB+XzKxq+qKdhbTZ/seEbndQiqlBpiz+EaHnihmFuv6dQijpQer/Lob//GzAlTSBkSInbjzj7Q52NIONvgx5Zw8u7H5by5qoxHp2fSr0cCx8q87DxQw1f7a9i+v4Yd+2s4ftJHVmc32V3iyO4cR3bXOM7pHs/5gyMeksfSADGgoAJTuIPJMXVGCncRudXZvRATt+8N53gysFVV7wzS9l8xIY3eVdX9AecTgIuAW4HVqvpKvX0Mk5FEP0wuknMIWNcK5qjrlM/GRD9XYG19zraN0rINlHHeDN52DuMwI7bHQpWPNUfdg0dq+feninjsrsywW139bvkJtOw9/uXCpYg/D1Rd9v0TdJ0DHZtk/GMJI8s/O8nCd4/jU1CFAb3jGdg7gYF9zNa9a5yNVdhKiJaCEpGxwAFVLRSRRIy13Y3ATuABPTNhob/eGuAiVfU4xyEddZ2wSD/BRJvoBxwHkgAXsBKYq6obGuxrmBTUJmAR8CUBUcxV9eMgZadibOc/wpikX4IZzbwUou2/EgZNfDbEmoIC+GRjBS++Vcr8B7LD5nC640ANs35zhN/d8wYp7r2QHWKW9fCPIfkySP/XsMi1NA+/qXrnNJddS2rFRFFBrcfk2TsmJini65gBxkhgkKpOClFvO3CBX4E5PlFrVDW3AXnxmHBNlWdrjR2uVd0qVa13njKA+4BRqloCICJdMJHPgyooTGypnwBLnZHacUygQjdGEz/bGE3c2rl4ZDJbd1fz2Msl/PLuzKaE9j+N6hofj79SwvTvZ5Di3lN/Wvf4c0wyQ0tM4HYLXTpZ51pLk3EHjJJ+gMlD5Y8wvrGeek8AG0RkNWZw8U+YXID14pit15uXKhThMg97TkRmi8gFIjLav4UoW8LpJujlzrmgqGqVqs51/KT6ApcBo1W1r6re3h6Uk5+pN6Tj8SjPLD5GTW3zRr4L3ztBv+7xXH5+MtRsg4SBoQsnDITqrc2SZ2klqBfK/gDHfgO+qobLW1ojbhHxD04uw8xm+Qk5aFHVlzGh6t4G/ogZTb3aYr0kfFN8v8TY0O/if6f4VFXPCJkhIr8DhmFs4xWTqnizs9Xr9BUpYnGKz8+pSh+/+l0JB454+P6EVC4bm3zWERD+svYUC989zvyfZ5OW7IMdnaH/fnBnBK9QXQAHr4b+Nv17m0Zr4eD3wFsM7kyo2Q69V1jjmBYiilN8D2LSchzFREUfrarqxM97NVTQBKduBsYKOzDdRlB3nbD0NUwKaicwuJ74e4FlZ9d3XVXnNLtDzSSWFRQYc/R1BVW8+/FJvtxVzeXnd+SqCzpybs+GY8utL6ji0ZeO8vS93ejXIwEqP4fC6dCvnpG9+mBHJvTbDPE9w3w3lphAvXD4ZtAK6LnMuBSUvgjHnoE+f7N/9xagMQrK8QV9DrOksVBVnwiT7PFAd4wV9Snn3EAgJVSwWMd+4B5MMsSNwHiMD2uo2H3N72eYFNQ7wDRVPdJg4VZArCuoQApLPPzp05OsWnuKpEQXl49NZuLYjmR3OXOk/td1p3j+jVJmT+3KiIHOC1DJr8BzGLKeq1/QocmQch10+pcWuAtLVFGFwmlQuxt6/QlcAb5SJb8y4a76/A3iukavj22QhhSUiLgxQVevAA5iour8UFWjMt8uIl8CYzGGESNFJA94XFW/10DVJhMuI4l0oEBE1gL+hFSqqjfULSgi52Gy7/bldJP0oOk2WgXeEjMN9k1KrMBPZ1+DXatTpu6n1nPN+cxOVKZMVG6boOwvrGHTjmpeWlJFWkcTubt3tzh8qmzbXcXB4lpenJ5G9y5xUOa0U/4OdLmv4XtMvhzK34L4/nUu1H3Baeg4yLkzXpIaUacpchqU2x7leOH4f4JWQ68PTldOAF1mga/MRBPpOhviegWRYUHiIKneBLZN4Xxgp6ruBhCR1zFLItFaEK5S1SoRQUQSVbVAROq14Gsu4VJQgdN2AlyMydIYjMUYS77TTNJbNdX5UPx/nQMJ+JQg5wCp51pgPannWp1zLuCcZOGcEXD9cKG8wkdpuY9TJxQXML6vmx5j4nC7XCbYiL8PCf0guREj9JRroWwJFN9f50KwKcW65xo6DnLujKnKWJbTxL40KDtCclJuhIzp5kc2GF0fhcRRcOIl8DUrZnPbxZUBvf90trXiRCQw6+2COjFCe2ICrPo5iDFSiBYHRSQdk9X3QyecUYua94YtkoSIjMJErZ0M7AH+GCx8kYh8qqoXhUVoC9GapvgsFkvrpBFTfJOA76jqVOf4x8A4Vf23SPUxFCJyCdAJ+HNjbA+aSrNGUM6i2g+d7SgmBIZo/flGZovIQuAv/O90IKr6x+b0JZxUVFSoiFS2sJg4wNPCMqx8K9/Kj135SQ2UOQT0Djju5ZyLOM562BZ1UigFC8LQEjR3iq8A+ITTc0H9nwbq3IbJvBtPgEk6xq4+JlDVFg8fLiJfaBSz+lr5Vr6VH/Py1wI5ToCCQ5hlkx+1eOeCoKpeEdkuIn0CI/q0NM1VUN/DfGmrReTPmJAZDYU4GNtQaAyLxWJp76iqR0T+DViBMTN/SVW3RLFLGcAWEflvTBoNIHTM1XDQLAWlqu8A7zgRwm/AJBbsJiK/Bd5W1ZVBqv1dRAZHy1TSYrFYWguquhwIEcU54jQrJ19TCIsVn+PotQRY4ngaTwZmYWLl1WU8sFFE9mDWoMQ00YrNzJvGgoaLWPlWvpVv5UcfZw1qvn8NKmJyI50PSkT6BjuvqjYaqcViscQoIvIuJhN5q1mDOmtUdZ+IXATkqOrLIpIJhD8Tn8VisVjCScTXoKIxgpoNnAfkqupAEekB/KG+AIUWi8ViiS6O79MZtKTJeYubUwfhu8D1OBpYVQ8DqVHoR5MREa+IbBSRLSKySUR+JiLN/i5FZIaIbBWRzSLyl8DpUBH5tSNvm4j4rPyIy/+ViOQ7W3uX/4MoPQOR+A7uFJEvnbY/FZHBAdd+LiI7xZhbt9T9N1b+lc2Vdbao6sfBtpYWGtEN+G/nc73z2RHYHOl+NPMeTgbsdwNWAXPC0O4EINnZnw684ex/C/gMY2rqBrzApVZ+xORfA3yImRLv6MhPa8fy10bhGYjUd5AWsH89JlICwGBgE5CISWHuwyT+i5b8XX75Lb0Bnzqf5ZhAaf6tHChrUdmRuME6NzsTmA/sBm4HPscsvEW8L824h5N1js/FJF0U5wf0Sech3gzcEVBuFiYG4SbgiQZkjAI+c/YvANZhPM+TnYdzkJUfMfn3Ab8IuFYL3NSO5S/CBA6N5DMQje/gh8AHzv7PgZ8HXPNgEvZFS/6KQPltdYu4kQSQCbyF0cC5wMPA5VHoR9hQ1d1izDC7YfzBTqjqWBFJBD4TkZWY6Bk3YGJpVYhI5waanQJ84LT/uZg0y19jHgCvqm6z8iMjH/NjMltEnsYoSDcBIWjaofwJ1HHIb0vfgYjcDcwAEgB/JOWewJrAW3bORUv+wUD5bZVoKKgrVHUWZrgOgPNPNysKfWkJvg0MFxPoEUxAxRyMEn5ZVSsAVPVYqAZE5BaMIcklzvEAYBAmFhfACRG5WFU/sfJbXr6qrhSRscDfgWLMCM4bonp7kP85MClU/db+Hajqi8CLIvIj4CHg1nruNRbltxkipqBEZDpwF3CuiGwOuJSKWV9otYjIuZiH5QjmzfKnqrqiTplGLWqKyOWYfFmXqKo/mO53MUnCTjplPJhpr0+s/IjIR1UfAx5zytRiEsn567Q3+UuokyqnLX0HAbwO/NbZrxu4VQgI3BoF+VELHBtRIjWXiHmLOAdYiklW6N86R6oPYbyXwAXiTEzEjDnO8TRMvpR453ggZlH3O5i3P/8C8Bn3jZlz34XxEQs8/wPMImwcJsiuB7jOyo+YfDfQxdkfjvkhimvH8vOJ/DMQqe8gJ2D/OuALZ38IoY0koiF/NxEykojmFvUOtMbNeTg2Alucf5qZgMu55gIexyyE5gOrgU7OtQcw2TA3YlIl1213FVDkXN8IvOecd2MMS7Y59X1WfkTld3DqbcWsA0T67x9r8kfGQB9aSv5zTpsbnXpDAq49iFGe22NA/lXR/h2MxBZxR11L+Fm3bl23uLi4hcBQouPbZrFYWg4fkO/xeKaOGTPmSLQ7E0miYSRhCTNxcXELs7OzB2VmZpa6XC77xmGxtCF8Pp8UFxcPLiwsXIjxjWo32LfttsHQzMzMMqucLJa2h8vl0szMzBOYGZJ2hVVQbQOXVU4WS9vFeb7b3e91u7thi8VisbQOrIKyWCwWS0xiFZTFYrGEmVGjRkU082xbxSooi8ViCTMbNmwoiHYf2gJWQVksFoslJrEKymKxWCwxiVVQlrDgdrvH5OXlDc7JyRkyceLEAUePHnUDbN++PSEnJ2dItPsXbgLv96qrrjq3vLw86LPUntcinnnmma55eXmD8/LyBrtcrjH+/SlTpvRuuHbr5ZVXXkkfPnx4Xm5u7uChQ4cOWrZsWVq0+9RasQrKEhYSExN9BQUFW3fs2LElPT3d8+STT2ZGu08tSeD9xsfH69NPP33a/dbW1gLtey1ixowZRwsKCrZ+8MEHO7Kzs2sKCgq2FhQUbF20aNGBaPetpZg3b17nZ599Nvv999/fuX379q1Lly7dfccdd/TbtWtXfLT71hqxCsoSdsaPH3/q0KFDCdHuR6S46KKLTu7cuTPx/fffTx0zZkzuxIkTB+Tk5AwFSE5OHuUv9+CDD2YPHDhwcG5u7uC77rqrJ8CWLVsSL7744pwhQ4YMGjNmTO6GDRs6ROs+Wor169cn5ebmVka7Hy1NWVmZa86cOT3feuutXX369PEADBs2rHr8+PHly5cvt6OoJmBj8VnCisfjYfXq1alTpkw5Gu2+RILa2lpWrFiR9u1vf7sMYOvWrckbNmzYkpeXVxNY7s0330xbvnx5+rp16wpSU1N9RUVFboCpU6f2XbBgwb5hw4ZVf/TRRx2nT5/eZ82aNV8Fk9Va2bRpU9KgQYPavIJatGhR5yFDhlQMGDCgNvB8QkKCr6Kiwg4GmoBVUG2Rp2VM2Nv8ma6r73J1dbUrLy9vcFFRUXz//v2rbrzxxrKw9yEEb731Vg+ASZMmHX7ooYeGTp8+fUdlZaV7yZIlfR9++OFtixcv7tWpU6faa6+9tuj+++8fPmvWrG2HDx/usHz58h6zZs3avmjRor59+vQ5dcUVVxy99957Rz3++OObkpOTffXJ9N8vwLhx48rvueeeo6tWrUoZPnz4qbrKCeDDDz9Mu+WWW46mpqb6ALKysrwnTpxwbdiwIWXy5Mn9/eVqamqkbt1wMPGu/WH/n/hobp96/yf8bNmyJenyyy+P2P8DAAUt8Azk1f8M5OfnJw0bNuwMRbx169bk2267rcR/7PP5cLlC66tly5alFRYWxt19990hMw63F6yCaos0oExaAv+aTHl5uevSSy/NeeKJJ7o99NBDEUkNMGnSpMP+/UcffTTfv//www9vA7j55psP+s/9+te/3gzQpUuX2mHDhm0HmDJlyj7/9WeffXZDY2T677fu+YYUWyBer5fU1FRPsHbCTWOVSUtQUFCQNHPmzKLAczNmzOhRWlrq7tKli+epp576OuxCG1AmLUFaWpq37gvGqlWrOlZWVrqGDRtWNXLkyLxrrrnm+LRp00qee+65zMD7r6qqkmnTpvVOS0vzrlu3ruPcuXP3R7r/sYgddlrCSmpqqu/555/fP3fu3Cy/oYAFrrzyyrLXXnutq9/ar6ioyN25c2dfr169al566aUMMG/Wn3/+eVJ0expevF4v+/bt6zBq1Kgq/7k9e/bEezwe0tPTvWvWrEmJZv/CyfXXX3/8vffeyzh8+HAcwObNmxPvvPPOcxYsWLD3iy++SJo0adKxxx57rNDn81H3/p988snMKVOmHH3hhRcO1dTUyIgRI6rql9Y+sArKEnYuvPDCyry8vMoFCxZ0BtizZ09iVlbWcP/m/0FuT0yaNKnsqquuOj5y5MhBeXl5gx955JFsgKVLl+5++eWXu+bm5g7OyckZsmzZsvRo9zWcbNmyJTErK6umQ4cO30Tbv//++3vOmTOn8Pbbbz/avXv3NvMWM2HChIqZM2d+PWHChIH9+vUbct111+XMmzdv75VXXnly/fr1yVdffXUZBL//jRs3Jo8dO7aytLTUlZGR4alvCrA9YTPqtgE2bdq0d8SIEe3CKMHS+pk9e3ZWWVmZu6SkJC43N7fqwQcfbHNZYg8cOBA3YcKE3MWLF+8eN25c5U033dR36dKl+9xud9D7nz9/fueVK1emJSUl+VJSUnzz5s07WLfNTZs2dR0xYsQ5UbidqGEVVBvAKiiLpe3THhWUHUdaLBaLJSaxCspisVgsMYlVUBaLxWKJSayCahv4fD5fizh4WiyW6OM83432sWty2WuAAAAAeElEQVQrWAXVNsgvLi7uZJWUxdL28Pl8Ulxc3AnIb7BwG8NGkmgDeDyeqYWFhQsLCwuHYl86LJa2hg/I93g8U6PdkUhjzcwtFovFEpPYt22LxWKxxCRWQVksFoslJrEKymKxWCwxiVVQFovFYolJrIKyWCwWS0zyP4KStwHYW7vwAAAAAElFTkSuQmCC\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "q(s,a) = \n", - "[[2.452 3.271]\n", - " [5.66 5.162]\n", - " [2.737 2.014]]\n", - "-------------------------------------------------------------------\n", - "Episode number: 9, starting day: 72.3 (from beginning of the year)\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "q(s,a) = \n", - "[[2.452 3.917]\n", - " [6.03 6.114]\n", - " [2.737 2.014]]\n", - "-------------------------------------------------------------------\n", - "Episode number: 10, starting day: 357.8 (from beginning of the year)\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "q(s,a) = \n", - "[[2.56 4.458]\n", - " [6.448 6.346]\n", - " [3.256 2.165]]\n" - ] - } - ], - "source": [ - "model = Q_Learning_Agent(env, eps_min=0.01, eps_decay=0.001, alpha=0.1, gamma=0.9)\n", - "model.learn(total_episodes=10)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Qblijq9WVobQ" - }, - "source": [ - "Since our environment has been defined with one-dimensional state and action spaces, we can plot the q-function after training as follows:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 248 - }, - "id": "z_u32nxmzSDm", - "outputId": "59c90ac3-7659-4b5f-91a4-73c66f7d837e" - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "acts = ['a=0','a=1']\n", - "stas = ['T<21', '2124']\n", - "colors = ['b', 'g', 'r']\n", - "\n", - "fig = plt.figure()\n", - "ax = fig.add_subplot(111, projection='3d')\n", - "ax.set_xlabel('actions', labelpad=6, fontsize=12)\n", - "ax.set_ylabel('states', labelpad=10, fontsize=12)\n", - "ax.set_zlabel('$\\mathbf{q(s,a)}$', labelpad=0, fontsize=15)\n", - "plt.xticks(ticks=range(len(acts)), labels=acts)\n", - "plt.yticks(ticks=range(len(stas)), labels=stas)\n", - "\n", - "for i, s in enumerate(stas):\n", - " x = np.arange(len(acts))\n", - " h = model.q[i,:]\n", - "\n", - " # Set color\n", - " color = [colors[i]]*len(acts)\n", - "\n", - " # Plot the 3D bar graph\n", - " ax.bar(x, h, zs=i, zdir='y', color=color, alpha=0.8)\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "V0AIl-HeVyqs" - }, - "source": [ - "Does it sound familiar? this is actually the [q-function that we had conceptually introduced before](#qFunctionConcept), but for our specific case!\n", - "\n", - "We observe that the state with the highest value is the one in the middle (green bars 🟢👌, `2124`), there is more value on `a=0`, so there is a preference for the agent to turn heating off. \n", - "\n", - "Sometimes it is useful to know what is the value of being on a specific state, independently of the action to be taken. This is represented by the so-called state-value function, which relates to the action-value function as follows:\n", - "\n", - "\\begin{align}\n", - " v(\\pmb{s}) = \\max_{\\pmb{a}} q(\\pmb{s},\\pmb{a})\n", - "\\end{align}\n", - "\n", - "At this point we can easily compute and plot the value function for our case:\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 288 - }, - "id": "urJOkjSNoa-h", - "outputId": "57f5b577-d685-431b-9296-649aaba81837" - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEPCAYAAABBUX+lAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAQ2klEQVR4nO3de5BfZX3H8fdHwk1QrLBiuWiYiiitCnRFFLWKUwW09VJa77YUjU4RcbRTER3U6dSWjtc6KhMVZcolRS4tg1ZwrNfKbQPoAIGiiAICWUAq2AokfPvHOSlL3CT7S3bPJvu8XzM7+7s85zzfX57JJ0/O7znnpKqQJC18j5jvAiRJwzDwJakRBr4kNcLAl6RGGPiS1IhF813A+uyyyy61ePHi+S5DkrYYy5cvv6OqxqZ7b7MO/MWLFzMxMTHfZUjSFiPJT9f1nod0JKkRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEZv1mbZqx/jS8fkuYcGaWOLZ6uo4w5ekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUiEEDP8ljkpyV5NokK5I8e8j+JallQ19L55PA16rqiCTbAI8cuH9JatZggZ9kJ+D5wF8AVNX9wP1D9S9JrRvykM5ewCTwxSRXJPl8kh3WbpRkSZKJJBOTk5MDlidJC9uQgb8IOAD4bFXtD/wKOG7tRlW1tKrGq2p8bGxswPIkaWEbMvBvBm6uqkv652fR/QMgSRrAYIFfVbcBNyXZp3/pRcA1Q/UvSa0bepXOMcBp/QqdG4AjB+5fkpo1aOBX1ZWA97KTpHngmbaS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNWLRkJ0luRG4B1gNrKqq8SH7l6SWDRr4vRdW1R3z0K8kNc1DOpLUiKEDv4ALkyxPsmS6BkmWJJlIMjE5OTlweZK0cA0d+M+tqgOAw4Cjkzx/7QZVtbSqxqtqfGxsbODyJGnhGjTwq+qW/vdK4FzgwCH7l6SWDRb4SXZI8qg1j4EXA1cN1b8ktW7IVTq7AucmWdPv6VX1tQH7l6SmDRb4VXUD8Iyh+pMkPZzLMiWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUiPm4p+0gxr09+pyZmJjvCiRtDGf4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhoxeOAn2SrJFUnOH7pvSWrZfMzwjwVWzEO/ktS0QQM/yR7AS4HPD9mvJGn4Gf4ngL8BHlxXgyRLkkwkmZicnByuMkla4AYL/CQvA1ZW1fL1tauqpVU1XlXjY2NjA1UnSQvfkDP8g4E/TnIjsAw4JMmpA/YvSU0bLPCr6r1VtUdVLQZeA/xHVb1hqP4lqXWuw5ekRszLLQ6r6lvAt+ajb0lqlTN8SWrESDP8JDsCzwN+H9i1f/l2YDnw3aq6d3bLkyTNlhkFfpID6c6QfSWw7Tqa3ZfkbOCfquqyWapPkjRLNnhIJ8k5wEV0K2u2AwKsAlYCk/3j9O+9Hrg4yVlzVbAkaePMZIb/CuB+4ALgbODiqvqvqQ2S7AMcBPwJ8GK6/wlIkjYjMwn8DwOfqqrb19Wgqq4DrgNOSfJ44JhZqk+SNEs2GPhV9f5RdlhVtwHv2+iKJElzYpOXZSbZLcnTk8zLmn5J0syMFPhJjkpyXpK39M9PAH4GXAFcn+RJc1CjJGkWjDrDfz3d9exvTfJY4P39PgI8AThhdsuTJM2WUQP/qf3v5XSrchYBX6U7Zh/gBbNWmSRpVo0a+L/V/14J/C5QwKnAR/vXHzdLdUmSZtmogf+L/vcfAS/pH18P7Ng/9tIKkrSZGjXwl9MdujkbeCHw38CVwJova2+ctcokSbNq1MA/AbiLLvQfBI6vqtU8dGbtd2exNknSLBpp7XxVXZ7kCcBTgFumnH37ceAk4M5Zrk+SNEtGPlmqqv4HuHyt11bOWkWSpDkxk6tlHpVk65nuMMnWSY7atLIkSbNtJjP8zwF/m2QZcC5waVXdN7VBku2AA4FXAa+mW575hVmuVdLmZHx8vitYuCYm5mS3Mwn8K4H96G6AciywOslPeOh4/c7AXsBW/fP020iSNiMbPKRTVQcAr+OhJZmLgL3pzrQ9qH+8qH/vMuC1/TaSpM3IjL60raplwLIk+wIvAg7gobNqV9J9ifuNqrpmTqqUJG2yUVfpHAh8sao+NRfFSJLmzqgnXp0M3J7kjCQvTbLVBrfoJdkuyaVJfpDk6iQfGrFvSdIm2JgboGwP/BlwHt1lkj+V5KAZbHcfcEhVPYPuS+BDZ7idJGkWjBr4BwOfAG6i+5J2F+CvgP9Mcn2SD6xrw+qsubja1v1PjV6yJGljjBT4VXVRVb2rqhbTrdD5KPBTuvD/HTZwA5QkWyW5ku6L3q9X1SXTtFmSZCLJxOTk5CjlSZLWY1PuafsD4PvAxcCqmWxQVauraj9gD+DAJL83TZulVTVeVeNjY2ObUJ4kaaqRVukk2Z7uFodHAIcDO6x5C1gNfGMm+6mqu5N8EzgUuGqUGiRJG2fUZZmTdF/aQhfy0K3BPxVYVlW3rWvDJGPAA33Ybw/8IXDiiP1LkjbSqIH/yP73T4DTgVOr6roZbvvbwCn9Us5HAGdW1fkj9i9J2kijBv5ngdOq6vujdlRVPwT2H3U7SdLsGPUGKEfPVSGSpLm1Kat0JElbEANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqRGDBX6SPZN8M8k1Sa5OcuxQfUuSYNGAfa0C3l1Vlyd5FLA8yder6poBa5CkZg02w6+qW6vq8v7xPcAKYPeh+pek1s3LMfwki4H9gUvmo39JatHggZ9kR+Bs4J1V9ctp3l+SZCLJxOTk5NDlSdKCNWjgJ9maLuxPq6pzpmtTVUuraryqxsfGxoYsT5IWtCFX6QT4ArCiqj42VL+SpM6QM/yDgTcChyS5sv85fMD+Jalpgy3LrKrvARmqP0nSw3mmrSQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqRGDBX6Sk5OsTHLVUH1Kkh4y5Az/S8ChA/YnSZpisMCvqu8Adw3VnyTp4Ta7Y/hJliSZSDIxOTk53+VI0oKx2QV+VS2tqvGqGh8bG5vvciRpwdjsAl+SNDcMfElqxJDLMs8ALgL2SXJzkqOG6luSBIuG6qiqXjtUX5Kk3+QhHUlqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaMWjgJzk0yXVJfpTkuCH7lqTWDRb4SbYCPg0cBuwLvDbJvkP1L0mtG3KGfyDwo6q6oaruB5YBLx+wf0lq2qIB+9oduGnK85uBZ63dKMkSYEn/9N4k1w1Q23zbBbhjvouYqWS+K9gsbDFjlrc6YL0tZsw28S/ZE9f1xpCBPyNVtRRYOt91DCnJRFWNz3cdmjnHbMvjmA17SOcWYM8pz/foX5MkDWDIwL8M2DvJXkm2AV4DnDdg/5LUtMEO6VTVqiRvBy4AtgJOrqqrh+p/M9fUIawFwjHb8jQ/Zqmq+a5BkjQAz7SVpEYY+JLUCAN/liXZOcmV/c9tSW6Z8nybDWx7Wn/piauSnJxk6/71pyS5KMl9Sf56mE+y5UuyZ5JvJrkmydVJju1f/9P++YNJZrRMb1PGtd9+2rGd8v4zk6xKcsTGfdp2zMXfsSnvL+hxMPBnWVXdWVX7VdV+wEnAx9c8788wfpgk2yTZoX96GvAU4GnA9sCb+9fvAt4BfGTuP8GCsgp4d1XtCxwEHN1fzuMq4FXAd2aykySPHXVc++1mMrZrLjtyInDhRn3KxqxvLIAdNrB50+Ng4M+TJE9N8lHgOuDJAFX11eoBl9Kdq0BVrayqy4AH5q3gLVBV3VpVl/eP7wFWALtX1YqqWu8Z3OkckuR0uiXFMzbK2PaOAc4GVo7Sj6Y10c/iD0l+83TV1sfBwB9Qkh2SHJnke8DngGuAp1fVFWu12xp4I/C1eShzQUqyGNgfuGQD7XZLcjzd2BxNNyN88gz2v1Fjm2R34JXAZ0f8SJrek4EzgLcD1yQ5PsluazdqdRw2u0srLHC3Aj8E3lxV166n3WeA71TVd4cpa2FLsiPdzO2dVfXL9bQ7EPg+8HngeVU1ynVXNnZsPwG8p6oenGZCqhFV1WrgfOD8JGPA3wM/S/Kcqrp0StMmx8EZ/rCOoLucxDlJTkjyGxc5SvIBYAx419DFLUT9TO5s4LSqOmcDzX8IHEV3+e5/S/KWJI+eYVcbO7bjwLIkN/b7+EySV8ywT00jyU5J3kp3Jv/ewF/Sje2a95sdB2f4A6qqC4ELk+wMvIEuVO6gmxXemOTNwEuAF1XVg/NZ60LQH8P9ArCiqj62ofZV9WvgFOCUJHvThf8VSS6qqjdsYNuNGtuq2mtKvV8Czq+qfx31s6qT5FTg2cCXgTdV1fVrvd/0ODjDnwf9KoNP9qsKjgdW92+dBOwKXNQvMTsBIMnjk9xMNyN5f5KbR5h5tuxguuO0h0xZtnd4klf2f57PBr6S5IK1N6yq66vqOGAfuvCYkVHHVrPuTGCfqjpu7bDvNT0OXlpBkhrhDF+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJSDJB/tL6860/Qv6K25KWwwDX5IaYeCrOUnek+SWJPckuS7JS+muXf/qJPcm+UHf7sgkK/p2N/R3USLJDsC/A7v17e/t74X7iCTHJflxkjuTnJnksf022yU5tX/97iSXJdl1vv4M1CYDX01Jsg/dDa6fWVWPorv70bXAh4F/qaodq+oZffOVwMuARwNHAh9PckBV/Qo4DPh5337Hqvo5cAzwCuAPgN2AXwCf7vf158BOwJ7AzsDbgP+d8w8sTWHgqzWrgW2BfZNsXVU3VtWPp2tYVV+pqh9X59vAhcDz1rPvtwHvq6qbq+o+4IPAEUkWAQ/QBf2Tqmp1VS1f3w3Vpblg4KspVfUj4J10YbwyybIku03XNslhSS5OcleSu4HDgV3Ws/snAuf2h2zuBlbQ/QOzK/DPwAV0N8r+eZJ/7G+wLg3GwFdzqur0qnouXUAXcGL/+/8l2RY4G/gIsGtVPQb4KpA1u5lm1zcBh1XVY6b8bFdVt1TVA1X1oaraF3gO3aGiN83JB5TWwcBXU5Lsk+SQPtB/TXcc/UHgdmBxkjV/J7ahO/QzCaxKchjw4im7uh3YOclOU147Cfi7JE/s+xpL8vL+8QuTPC3JVsAv6Q7xPDhnH1SahoGv1mwL/ANwB3Ab8DjgvcCX+/fvTHJ5Vd0DvAM4k+7L19cB563ZSVVdC5wB3NAfwtkN+GTf5sIk9wAXA8/qN3k8cBZd2K8Avk13mEcaTKqm+5+pJGmhcYYvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJasT/AUOdyxmuvNQYAAAAAElFTkSuQmCC\n" - }, - "metadata": { - "needs_background": "light" - } - } - ], - "source": [ - "# Compute the state-value function\n", - "v = np.amax(model.q, axis=1)\n", - "\n", - "# Plot state-value function\n", - "fig = plt.figure()\n", - "\n", - "ax = fig.add_subplot(111)\n", - "ax.set_xlabel('states', labelpad=10, fontsize=12)\n", - "ax.set_ylabel('$\\mathbf{v(s)}$', labelpad=0, fontsize=15)\n", - "plt.xticks(ticks=range(len(stas)), labels=stas)\n", - "x = np.arange(len(stas))\n", - "ax.bar(x, v, color=colors, alpha=0.8)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "clFn8dd7obRI" - }, - "source": [ - "Notice that we have trained our agent following an off-policy method: the actions were driven by a policy different than that one that our agent would follow. This is because the agent was using an epsilon-greedy policy to explore more rewarding actions. If we conclude we are happy with the learned policy, we can test it by setting `deterministic=True` with the `predict` method. For example, let's test our learned agent for the first day of February: \n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "WuYEBf9nsmH6", - "outputId": "d63ce6c1-f06d-49a6-c16e-e3fa226dbbe5" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "-------------------------------------------------------------------\n", - "State [Bin #] = 1\n", - "Action [ - ] = 0\n", - "-------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "env.stop()\n", - "env = BoptestGymEnvCustomReward(url = url,\n", - " testcase = 'bestest_hydronic_heat_pump',\n", - " actions = ['oveHeaPumY_u'],\n", - " observations = {'reaTZon_y':(lower_setp,upper_setp)}, \n", - " random_start_time = False,\n", - " start_time = 31*24*3600,\n", - " max_episode_length = 24*3600,\n", - " warmup_period = 24*3600,\n", - " step_period = 3600)\n", - "env = DiscretizedActionWrapper(env, n_bins_act=1)\n", - "env = DiscretizedObservationWrapper(env, n_bins_obs=3, outs_are_bins=True)\n", - "\n", - "done = False\n", - "obs = env.reset()\n", - "\n", - "from IPython.display import clear_output\n", - "while not done:\n", - " # Clear the display output at each step\n", - " clear_output(wait=True)\n", - " # Compute control signal \n", - " action = model.predict(obs, deterministic=True) \n", - " # Print the current operative temperature and decided action\n", - " print('-------------------------------------------------------------------')\n", - " print('State [Bin #] = {:.0f}'.format(obs))\n", - " print('Action [ - ] = {:.0f}'.format(action))\n", - " print('-------------------------------------------------------------------')\n", - " # Implement action \n", - " obs,reward,done,info = env.step(action) # send the action to the environment" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sLBD3joyxe9Z" - }, - "source": [ - "Now there is no randomness involved. The agent exploits its policy by ALWAYS picking action `a=1` when `s=0` because it has learned that that is the action with the highest value in that state. \n", - "\n", - "We can now evaluate our learned policy by calculating the core KPIs with BOPTEST:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "eLzZaaNzyeZv", - "outputId": "ab1c7d0d-8eb3-4192-cfe5-6f6848dce054" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "{'tdis_tot': 1.7415794961384694,\n", - " 'idis_tot': 0,\n", - " 'ener_tot': 0.17501300879744733,\n", - " 'cost_tot': 0.044365797730152895,\n", - " 'emis_tot': 0.029227172469173696,\n", - " 'pele_tot': 0.01990768126278055,\n", - " 'pgas_tot': None,\n", - " 'pdih_tot': None,\n", - " 'time_rat': 0.0003869439121605693}" - ] - }, - "metadata": {}, - "execution_count": 24 - } - ], - "source": [ - "env.get_kpis()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WbDAlStV2Tvx" - }, - "source": [ - "This prepares the ground for different RL configurations to be evaluated and compared between each other and to other types of controls like classical rule based controllers or more advanced model predictive control. Recall that there are specific [scenario periods for each test case in BOPTEST](https://github.com/ibpsa/project1-boptest/tree/master/testcases#test-cases) that are set for these comparisons. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FBL289bfsmcJ" - }, - "source": [ - "**Food for thought: 🤔**\n", - "- If the agent never receives a reward when the temperature is out of the comfort bounds (states 0 🔵 and 2 🔴), why is the q-function not 0 for those states?\n", - "- Could you think of measures to improve learning?\n", - "- Could you think of measures to improve performance?\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eKqvn5yb_mqJ" - }, - "source": [ - "# **Gearing up** 💪" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "X1sdwYm5b66G" - }, - "source": [ - "The previously stylished example had a very limited representation of the state space. It was useful to illustrate how we can configure and train a RL agent without needing too many interactions with the environment (our building). However, using RL for solving this environment may feel like overkilling the problem. Our `SimpleController` was already enough to decide when to turn on heating based on indoor temperature readings. You should note, however, that you have developed a general agent capable of learning from any environment and the potential to infer way more complex relationships between environment observations and actions. Examples of what this RL agent could infer for building control are the following:\n", - "- Dynamic energy pricing\n", - "- A heating schedule based on user inputs. \n", - "- A heating curve based on ambient temperature.\n", - "- The variable heat pump COP based on condenser, evaporator, and ambient temperature reaadings. \n", - "\n", - "We could for example extend our reward function as to minimize the building energy use or the greenhouse gas emissions while keeping comfort.\n", - "And all this can be inferred without the need of a model that requires domain knowledge. On the downside, learning more complex dynamics from higher dymensional observation spaces requires more training data. This means that more interactions with the environment (the building) are required, which sometimes are unavailable. For this reason, sample-efficiency is key in RL and there exist several tricks to expedite learning. \n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "V3ZF29MChF4F" - }, - "source": [ - "To finalize, we are going to instantiate a more complete building environment by extending the observation space with the time of the week as well as information about the ambient temeprature, solar irradiation, internal gains, electricity pricing, or temperature setpoints. With BOPTEST-Gym we can also establish a predictive and a regressive period that include predictions of the boundary condition data and past observations of the measured data, respectively. " - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "id": "Bdx3qCDvhFSX" - }, - "outputs": [], - "source": [ - "env.stop()\n", - "\n", - "from boptestGymEnv import NormalizedObservationWrapper\n", - "from stable_baselines3 import DQN\n", - "\n", - "env = BoptestGymEnvCustomReward(\n", - " url = url,\n", - " actions = ['oveHeaPumY_u'],\n", - " observations = {'time':(0,604800),\n", - " 'reaTZon_y':(280.,310.),\n", - " 'TDryBul':(265,303),\n", - " 'HDirNor':(0,862),\n", - " 'InternalGainsRad[1]':(0,219),\n", - " 'PriceElectricPowerHighlyDynamic':(-0.4,0.4),\n", - " 'LowerSetp[1]':(280.,310.),\n", - " 'UpperSetp[1]':(280.,310.)}, \n", - " predictive_period = 24*3600, \n", - " regressive_period = 6*3600, \n", - " random_start_time = True,\n", - " max_episode_length = 7*24*3600,\n", - " warmup_period = 24*3600,\n", - " step_period = 900)\n", - " \n", - "env = NormalizedObservationWrapper(env)\n", - "env = DiscretizedActionWrapper(env,n_bins_act=10)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Is0vIEcP-lmS" - }, - "source": [ - "This new environment has a way higher dimensional state-action space than the ones we treated before:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "GSg90XCe-26Q", - "outputId": "9a21fa91-b278-4295-a125-a6e1e93ce08a" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Observation space of the building environment (dimension):\n", - "(608,)\n", - "Action space of the building environment:\n", - "Discrete(11)\n" - ] - } - ], - "source": [ - "print('Observation space of the building environment (dimension):')\n", - "print(env.observation_space.shape)\n", - "print('Action space of the building environment:')\n", - "print(env.action_space)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NW3VhsGq-3Ee" - }, - "source": [ - "Because of this high dimensional state-action space, an agent will probably require many more interactions to solve this environment. Luckily, there are readily available state-of-the-art RL algorithms that implement all sort of tricks to expedite and stabilize learning while maintain the learning principle that you have learned above. For example, we can access the advanced Deep Q-Network (DQN) algorithm from Stable-Baselines3 to learn this more complex environment. We set here our agent to learn for `10` steps to show how this learning process would be initiated. \n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "4NvXkkFh-qH5", - "outputId": "2c368341-a92a-406a-935b-58258f5577cb" - }, - "outputs": [ - { - "metadata": { - "tags": null - }, - "name": "stdout", - "output_type": "stream", - "text": [ - "Using cpu device\n", - "Wrapping the env in a DummyVecEnv.\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 27 - } - ], - "source": [ - "model = DQN('MlpPolicy', env, verbose=1, gamma=0.99, seed=seed, \n", - " learning_rate=5e-4, batch_size=24, \n", - " buffer_size=365*24, learning_starts=24, train_freq=1)\n", - "\n", - "# Main training loop\n", - "model.learn(total_timesteps=10)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "07ZJaTPzEo8O" - }, - "source": [ - "However, this is clearly not enough! Solving an environment of these dimensions would probably require millions of steps or other tricks to accelerate learning. Could you think of any?" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "id": "EDJHCuQ2NFN6" - }, - "outputs": [], - "source": [ - "env.stop()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "F-_f2qRTB0Nw" - }, - "source": [ - "# **Further resources** 📚" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "p54OK_TGrtfp" - }, - "source": [ - "- For RL, check out the resources page from Stable-Baselines 3 [here](https://stable-baselines3.readthedocs.io/en/master/guide/rl.html) and the [open access book of Richard S. Sutton and Andrew G. Barto](http://incompleteideas.net/book/the-book-2nd.html) \n", - "- For BOPTEST, check out the websites of the [BOPTEST framework](https://ibpsa.github.io/project1-boptest/), its [GitHub repository](https://ibpsa.github.io/project1-boptest/), and its overarching project: [IBPSA Project 1](https://ibpsa.github.io/project1/). " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Jblq_C7CHQHj" - }, - "source": [ - "# **Feedback** 💬 " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jJ9lmUndHLMq" - }, - "source": [ - "Please help us improve by filling out [this form](https://forms.gle/JdprK6tgxQtwvhFV8). It'll only take a couple of minutes!" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WNB5MoRmOWc9" - }, - "source": [ - "#**Annex I: Formal Reinforcement Learning theory** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "G-ZId2TdCngy" - }, - "source": [ - "In RL we aim to derive an optimal control policy from the direct interaction of an agent (the RL algorithm) and an environment (the process to be optimized).\n", - "A policy is a mapping from environment states to actions that the agent \"decides\" to take. \n", - "This control method is based on the principle of dynamic programming. Unlike\n", - "classical dynamic programming, RL does not assume the existence of a perfect\n", - "system model and uses function approximations to build a policy from samples\n", - "of historical data. Hence, the agent performs empirical learning and decides on\n", - "actions to drive the environment towards favorable trajectories according to a reward function that the environment delivers every control step.\n", - "\n", - "The process of the RL agent interacting with the environment is a sequential decision-making problem formalized as a **Markov Decission Process (MDP)**. A diagram summarizing the RL approach is shown in the following figure:\n", - "\n", - "\n", - "\n", - "*Figure: Diagram of the RL approach. The RL agent decides an action. After the action is implemented, the environment returns the new state $\\pmb{S}_{k+1}$ and associated reward $R_{k+1}$.*\n", - "\n", - "In an MDP, the agent and the environment interact during a sequence of discrete-time steps indexed here as $k=0,1,2,...,K$, with $K$ being the terminal sample that could be $K=\\infty$. \n", - "Every time step $k$ the agent receives a representation of the environment named state: $\\pmb{S}_k \\in \\pmb{\\mathcal{S}}$, where $\\pmb{\\mathcal{S}}$ is the state space. \n", - "Note that the agent's observation of the state-space may or may not fully characterize the environment state. \n", - "In the latter case where the agent can only see a partial observation of the environment's state-space, we refer to **partially observable Markov decision processes (POMDPs)**.\n", - "\n", - "Upon receiving the state representation, the agent computes its control logic and in turn sends back to the environment a control action $\\pmb{A}_k \\in \\pmb{\\mathcal{A}}$, where $\\pmb{A}_k$ is the most appropriate action chosen from the action space $\\pmb{\\mathcal{A}}$. \n", - "One time step later, the agent observes a new state from the environment $\\pmb{S}_{k+1}$ along with a scalar value indicating its reward $R_{k+1} \\in \\mathcal{R} \\subset{\\mathbb{R}}$. Notice that the reward $R_{k+1}$ is an indicator of the agent's performance when taking action $\\pmb{A}_k$ from state $\\pmb{S}_k$.\n", - "\n", - "The environment $\\mathcal{E}_{\\pmb{f}}$ is governed by the natural laws of the system dynamics $\\pmb{f}$ and it is defined by $\\mathcal{E}_{\\pmb{f}}:\\pmb{\\mathcal{S}}\\times \\pmb{\\mathcal{A}} \\rightarrow \\pmb{\\mathcal{S}}\\times \\mathcal{R}$. \n", - "The goal of RL is to infer an **optimal control policy** $\\pi_{*}:\\pmb{\\mathcal{S}} \\rightarrow \\pmb{\\mathcal{A}}$ that maximizes the **expected cumulative return** $G$ when the agent acts according to it. \n", - "The cumulative return is defined as some function of the rewards sequence, and a typical definition is to discount the rewards with a **discount factor** $\\gamma \\in [0,1]$ as shown in the following equation:\n", - "\n", - "\\begin{align}\n", - " G_k = R_{k+1} + \\gamma R_{k+2} + \\gamma^2 R_{k+3} + ... = \\sum_{i=0}^\\infty \\gamma^i R_{k+i+1}\n", - "\\end{align}\n", - "\n", - "The **action-value function** $q(\\pmb{S},\\pmb{A})$ estimates the expected return when being in a specific state $\\pmb{S}$ and taking an action $\\pmb{A}$.\n", - "The **state-value function** $v(\\pmb{S})$ directly estimates the expected return for being in state $\\pmb{S}$.\n", - "Frequently, the policy and value functions are approximated by **function approximations** to cope with high-dimensional state-action spaces. \n", - "Examples of commonly used regressors are neural networks or randomized trees. \n", - "\n", - "\n", - "\n", - "A **trajectory** of an MDP is defined as a sequence of states, actions and rewards.\n", - "Most of the RL algorithms learn from finite trajectories of experience called **episodes**.\n", - "Sometimes, the trajectories are broken down into tuples of the form $(\\pmb{s}_k,\\pmb{a}_k,r_k,\\pmb{s}_{k+1})$ and stored in a **replay memory** $\\pmb{\\mathcal{D}}$. \n", - "Using a replay memory allows to serve the historical data in random batches of tuples to preserve as much as possible the independent and identically distributed assumption that is typically taken to parametrize policies and value functions. \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_RIO07aKaQHG" - }, - "source": [ - "# **References** " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cQM-3Ra5BYM7" - }, - "source": [ - "\n", - "- **[1]** *Blum, D., Arroyo, J., Huang, S., Drgona, J., Jorissen, F., Taxt Walnum, H., Yan, C., Benne, K., Vrabie, D., Wetter, M., and Helsen,\n", - "L. Building Optimization Testing Framework (BOPTEST) for Simulation-\n", - "Based Benchmarking of Control Strategies in Buildings. Journal of Building\n", - "Performance Simulation 14, 5 (2021), 586–610. https://doi.org/10.1080/19401493.2021.1986574*\n", - "\n", - "- **[2]** *Arroyo, J., Manna, C., Spiessens, F., and Helsen, L. An OpenAI-Gym\n", - "environment for the Building Optimization Testing (BOPTEST) framework.\n", - "In Proceedings of the 17th IBPSA Conference (Bruges, Belgium, September 2021) [https://doi.org/10.26868/25222708.2021.30380](https://www.conftool.pro/bs2021/index.php/30380_Arroyo_Javier.pdf?page=downloadPaper&filename=30380_Arroyo_Javier.pdf&form_id=30380)* \n", - "\n", - "- **[3]** *Drgona, J., Arroyo, J., Cupeiro Figueroa, I., Blum, D., Arendt, K., Kim, D.,Ollé, E. P., Oravec, J., Wetter, M., Vrabie, D. L., and Helsen, L. All you need to know about model predictive control for buildings. Annual Reviews in Control 50 (2020), 190–232. https://doi.org/10.1016/j.arcontrol.2020.09.001*\n", - "\n", - "- **[4]** *Vázquez-Canteli, J. R., and Nagy, Z. Reinforcement learning\n", - "for demand response: A review of algorithms and modeling techniques.\n", - "Applied energy 235 (2019), 1072–1089. https://doi.org/10.1016/j.apenergy.2018.11.002*\n", - "\n", - "- **[5]** *Chen, B., Cai, Z., and Bergés, M. Gnu-RL: A Practical and Scalable Reinforcement Learning Solution for Building HVAC Control Using a Differentiable MPC Policy. Frontiers in Built Environment 6 (2020). https://doi.org/10.3389/fbuil.2020.562239*\n", - "\n", - "- **[6]** *Sutton, R. S., and Barto, A. G. Reinforcement Learning: An Introduction, second ed. The MIT Press, 2018.*\n" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "provenance": [], - "include_colab_link": true - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/docs/tutorials/CCAI_Summer_School_2022/Building_Control_with_RL_using_BOPTEST.ipynb b/docs/tutorials/CCAI_Summer_School_2022/Building_Control_with_RL_using_BOPTEST.ipynb new file mode 100644 index 0000000..10b995d --- /dev/null +++ b/docs/tutorials/CCAI_Summer_School_2022/Building_Control_with_RL_using_BOPTEST.ipynb @@ -0,0 +1,3087 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V1CcDG8FanTw" + }, + "source": [ + "# **Key Learning Objectives** 🎯" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oT2QjTu24zwV" + }, + "source": [ + "\n", + "This is an introductory, hands-on tutorial to guide you through the main concepts of Reinforcement Learning (RL) for controlling Heating, Ventilation and Air Conditioning (HVAC) systems for buildings.\n", + "We are going to apply RL to a building emulator from the Building Optimization Testing (BOPTEST) framework **[1]** using the BOPTEST-Gym interface **[2]**.\n", + "BOPTEST is a framework for performance benchmarking of control algorithms.\n", + "Further information and documentation can be found here:\n", + "\n", + "[https://ibpsa.github.io/project1-boptest/](https://ibpsa.github.io/project1-boptest/)\n", + "\n", + "You will learn:\n", + "\n", + "- What RL is, how it works and how it can be used in the application of building energy management.\n", + "- The most popular standard for representing general RL problems: OpenAI-Gym.\n", + "- The BOPTEST API and its Gym interface.\n", + "\n", + "📌 **Note**: This tutorial is prepared for use with BOPTEST v0.4.0.\n", + "and uses a web-based version of BOPTEST (called \"BOPTEST-Service\") as not to require installation of any BOPTEST software on a user's own device. It is also possible to use BOPTEST on a user's own (local) device. Both the web-based and local versions have the same functionality, and will produce the same results, with only small changes in the API (changing the BOPTEST-service url to your localhost url, that is, to: `http://127.0.0.1:5000/`).\n", + "\n", + "**EDIT**: This tutorial was originally developed with BOPTEST v0.2.0. and has been continuously updated to work with the latest BOPTEST versions. Specifically, the following updates have been implemented:\n", + "\n", + "- **BOPTEST v0.4.0.** *Jul 13, 2023*. BOPTEST-Gym internally updates for new BOPTEST API changes when getting results and forecast. Update to Gym v0.26.2. and stable-baselines3 v2.0.0. Import Gymnasium instead of Gym. Change from `compute_reward` to `get_reward` not to fall into Stable Baseline's convention for goal environments. Use `terminated` and `trunctated` outputs from Gym instead of directly `done`. Return `info` upon calling the reset method of Gym.\n", + "- **BOPTEST v0.3.0.** *Oct 25, 2022*. There are just small changes required for this update, basically retrieving the `'payload'` after each request. That is the origin of the differences between the notebook explained in the recording and this updated notebook. \n", + "- **BOPTEST v0.2.0.** *Aug 18, 2022*. Initial version." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TSTpxm2GrjhR" + }, + "source": [ + "# **Outline** ⏰\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VUbaQ5GqrvIl" + }, + "source": [ + "[Part 1: Background](#background)\n", + " 1. [Introduction to Reinforcement Learning](#introRL)\n", + " 1. [Application of Reinforcement Learning in buildings](#applicationRL)\n", + "\n", + "[Part 2: The OpenAI-Gym standard](#openAIGym)\n", + " 1. [What is OpenAI-Gym?](#whatIsOpenAIGym)\n", + " 1. [Example using an OpenAI-Gym environment](#exampleOpenAIGym)\n", + "\n", + "[Part 3: The Building Optimization Testing (BOPTEST) Framework](#boptest)\n", + " 1. [What is BOPTEST?](#whatIsBoptest)\n", + " 1. [Selecting a building test case](#selectBuilding)\n", + " 1. [Obtaining general information about the building](#obtainInfo)\n", + " 1. [Getting control input and measurement points](#gettingIOs)\n", + "\n", + "[Part 4: Implementing RL for a building with BOPTEST-Gym](#implementingRL)\n", + " 1. [What is BOPTEST-Gym?](#whatIsBoptestGym)\n", + " 1. [Starting up a BOPTEST-Gym environment](#startingUpBoptestGym)\n", + " 1. [Interacting with a BOPTEST-Gym environment](#interactingWithBoptestGym)\n", + " 1. [Developing a basic RL algorithm](#developingRlAlgo)\n", + " 1. [Testing our RL algorithm in BOPTEST-Gym](#testingRlAlgo)\n", + "\n", + "[Gearing up](#gearingUp)\n", + "\n", + "[Further resources](#furtherResources)\n", + "\n", + "[Feedback](#feedbackForm)\n", + "\n", + "[Annex I: Formal RL theory](#theoryRlFormal)\n", + "\n", + "[References](#references)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oEzP9ZW4MXPv" + }, + "source": [ + "# **Part 1: Background** 📖 " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fas232CyMX6_" + }, + "source": [ + "## **Introduction to Reinforcement Learning** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TAy9fRjUTSdb" + }, + "source": [ + "Could you imagine a magic oracle able to decide on the best actions to optimize any process? Could you imagine this oracle not needing any prior information of the process but just learning from interacting with it? Powerful, right? Well, that is exactly what RL is meant for.\n", + "\n", + "Reinforcement Learning (RL) is one of the categories of machine learning, along with unsupervised learning and supervised learning. The main difference from the other categories is that RL learns from dynamic data, that is, data that are obtained while learning." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e853vYumSx08" + }, + "source": [ + "\n", + "\n", + "*Figure: The categories of machine learning. Source: [Mathworks](https://www.mathworks.com/discovery/reinforcement-learning.html)*\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2_qdE6Ab4aE9" + }, + "source": [ + "In RL the goal is to learn the actions to be taken to achieve a predefined objective. RL relies on the principle of *repetitive experimentation*, that is, an approach where we roll out several **episodes of experience** where an agent 🤖 (the RL algorithm) interacts with its environment 🌎 (the process to be optimized) to learn based on a **reward** signal that is returned for every **action** taken from a specific **state** of the environment.\n", + "\n", + "Let's take the example of teaching a dog to grab a stick. In this case, the dog is the agent and all its surroundings conform the environment. Whenever the dog observes that there is a person throwing a stick it will perform an action. In case it grabs the stick and brings it back, the person will provide a cookie as a reward to encourage that behavior. In case the dog does not go for the stick but just runs around or goes chasing other dogs, the person will not provide the cookie. Eventually, the dog will associate the actions that bring the most rewards to specific observations and will be taking those actions accordingly.\n", + "\n", + "A more formal introduction to RL and its associated terminology can be found at the end of this tutorial, in [Annex I: Formal RL theory](#theoryRlFormal)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mMppgppKX4Fy" + }, + "source": [ + "\n", + "\n", + "*Figure: RL notation when teaching a dog. Source: [Mathworks](https://www.mathworks.com/discovery/reinforcement-learning.html)*\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pNy7uRzo8foI" + }, + "source": [ + "⚠️ **Important note:** ⚠️ It is common to find in the RL literature that the same term indistinctly designates the\n", + "state and the observation. This is not strictly correct for partially observable environments (most of the cases) where the observation only conveys part of the information that defines the state. For example, the state of the Tic-Tac-Toe game can be fully observed because there is a finite number of possibilities that define the state of a game. On the contrary, the thermal state of a building is only partially oservable. We can observe the indoor air temperature, but we cannot measure all temperatures from walls, ground, furniture... which also influence the building's thermal state." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pNC0UnC2WYyE" + }, + "source": [ + "\n", + "\n", + "What is particularly extraordinary of RL algorithms is that the same algorithm can be successfully used for a variety of tasks, from [robotic motion control](https://www.technologyreview.com/2021/04/08/1022176/boston-dynamics-cassie-robot-walk-reinforcement-learning-ai/) to [defeat the human world champion in the game of Go](\n", + "https://www.youtube.com/watch?v=WXuK6gekU1Y&ab_channel=DeepMind).\n", + "The latter is an astonishing achievement. It is true that the IBM supercomputer Deep Blue could previously [defeat Garry Casparov in chess](https://en.wikipedia.org/wiki/Deep_Blue_versus_Garry_Kasparov), but Go is to chess what chess is to Tic-Tac-Toe ([*Chris Wiltz*](https://www.designnews.com/design-hardware-software/googles-ai-beat-go-champion-mimicking-human-intuition)). And what is more important, professionals of Go state that this game has so many possible combinations that mastering it requires certain intuition and creativity, qualities that have only been attributed to humans so far... if AlphaGo defeated the best human player of Go, could machines resemble these qualities? Well, that is more a philosophical question. This tutorial is limited to investigate whether machines can efficiently control buildings, which you will see is already an enormous challenge!\n", + "\n", + "\n", + "\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOcvaJpkNbho" + }, + "source": [ + "\n", + "\n", + "*Figure: Netflix documentary that explains how AlphaGo, a RL algorithm developed by [DeepMind](https://www.deepmind.com/), could defeat Lee Sedol (4-1) and Fan Hui (5-0), the human world champions in the game of Go.*\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "o1jhKBOeOciu" + }, + "source": [ + "📌 **QUICK FACTS:**\n", + "- RL is a **category of machine learning** algorithms, together with supervised and unsupervised learning.\n", + "- Contrarily to other machine learning techniques, RL learns from **dynamic data**, that is, data that are obtained from interactions with the environment.\n", + "- Particularly, it learns from **state-action-reward** samples, so there is no need of domain knowledge to model the environment.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6G1nWECgbmuW" + }, + "source": [ + "## **Application of Reinforcement Learning in buildings** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xoTh8XvAM_OR" + }, + "source": [ + "During the last decade, there has been a clear interest growth in using optimal control for HVAC systems **[3]**. The figure below underlines this increased interest by showing the number of yearly peer-reviewed scientific publications related to optimal control in buildings.\n", + "RL algorithms have\n", + "gained particular popularity for their application in a **demand response** setting.\n", + "An extensive review for this application was written by  Vázquez-Canteli et al. **[4]** This review is\n", + "not limited to HVAC systems but also demand response for charging electric\n", + "vehicles or thermal energy storage." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7NywyXo6hD5n" + }, + "source": [ + "\n", + "\n", + "*Figure: Evolution of the number of scientific publications about optimal control in buildings during the last decades. Data obtained from the Clarivate Web of Science.*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8KR-sTeJiJG2" + }, + "source": [ + "RL has already attracted the attention of the building control community for\n", + "many years. The figure below is obtained from the popular paper of Chen et al. **[5]** who graphically summarized the application of RL in buildings indicating the amount of data required by each research work to train the implemented RL algorithm.\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nCH0DBBbPEC7" + }, + "source": [ + "\n", + "\n", + "\n", + "*Figure: Summary of the data required in the history of RL applications to buildings. Chen et al.* **[5]** ." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ae18iXNKWV5I" + }, + "source": [ + "You can see from the figure that the feasibility and potential of applying RL for HVAC control\n", + "was first investigated by Liu and Henze back in 2006. Then, the interest was lost for a period, probably because Model Predictive Control (MPC) has been typically preferred for optimal control in buildings because it is much more data-efficient (it does not need as much data to be implemented). A comprehensive and complete review on the application of MPC for building energy management is provided by Drgona et al. **[6]**. \n", + "The reasons why RL is gaining momentum again are clear: \n", + "\n", + "- Evolution in deep learning\n", + "- We have much more data than before\n", + "- We have much more computational power than before\n", + "\n", + "In fact, there exist very recent developments for the application of RL in buildings, most of them using the OpenAI-Gym standard that is introduced in the next section. It is worth mentioning:\n", + "\n", + "- [CityLearn](https://github.com/intelligent-environments-lab/CityLearn) ➡️ Gym environment for providing demand response scenarios at an urban scale. That is, the goal of the RL agent is to flatten the energy demand of a district. It considers static\n", + "building heating and cooling load data and simplified models for energy storage.\n", + "- [Gym-Eplus](https://github.com/zhangzhizza/Gym-Eplus) ➡️ Gym environment wrapper around EnergyPlus simulation models.\n", + "- [Sinergym](https://github.com/ugr-sail/sinergym) ➡️ Extension of Gym-Eplus.\n", + "- [Energym](https://github.com/bsl546/energym) ➡️ Gym wrapper around building simulation models to assess controller performance.\n", + "- [Beobench](https://github.com/rdnfn/beobench) ➡️ A Toolkit for Unified Access to BuildingSimulations for Reinforcement Learning.\n", + "- 👉🏻[BOPTEST-Gym](https://github.com/ibpsa/project1-boptest-gym) ➡️ Gym environment for the BOPTEST Framework. The goal of the RL agent in this environment is to efficienty control an individual building. It allows testing against high-fidelity building models.\n", + "\n", + "The last of which is the focus of this tutorial.\n", + "\n", + "These RL frameworks for HVAC control bring hope\n", + "to the adoption of this technology in buildings. However, there is still a clear\n", + "need to different techniques and understand the best practices of RL\n", + "for this particular application. Let's investigate how!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7YnuNAQdM_L2" + }, + "source": [ + "# **Part 2: The OpenAI-Gym standard** 🤖 " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Sv728rc3M_Ir" + }, + "source": [ + "## **What is OpenAI-Gym and Gymnasium?** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AhQfyrBCUigq" + }, + "source": [ + "OpenAI-Gym is a software library that gathers a **collection of tasks** called environments with a **unique Python interface** to control all of them. This unique interface is a key feature in the software package, and has given rise to a standard for the format in which RL agents are developed and treated, independently of\n", + "their internal functioning. The tasks defined in the Gym environments involve\n", + "a wide variety of fields like video games, classic control theory problems, or high dimensional robotic locomotive tasks.\n", + "[Gymnasium](https://gymnasium.farama.org/) is a maintained fork of the OpenAI-Gym library. You can find a list of available environment [here](https://gymnasium.farama.org/environments/classic_control/).\n", + "\n", + "\n", + "\n", + "The OpenAI-Gym philosophy heavily relies on the episodic aspect of RL, i.e.\n", + "the agent’s history is broken down into a series of experiences called **episodes** that may be of\n", + "variable length. The agent interacts with the environment until it reaches a\n", + "terminal state when the episode is finished. The goal is to maximize the total\n", + "cumulative reward per episode.\n", + "\n", + "The main methods of the OpenAI-Gym interface are the following:\n", + "\n", + "- `obs = env.reset()` ➡️ The `reset` method is the one called first to initialize the environment `env` (whatever it is). The environment returns the first observation `obs` (state).\n", + "- `next_obs,reward,terminated,truncated,info = env.step(action)` ➡️ The `step` method is used iteratively to interact with the environment. The RL agent computes an `action`, and the environment returns the next observation, associated reward, whether the episode is done (=terminated), and some other optional information." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ntNSOBzJPJuF" + }, + "source": [ + "## **Example using an OpenAI-Gym environment** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mQ2879zIPOJ0" + }, + "source": [ + "Now that we understand the main concepts of OpenAI-Gym we are going to illustrate its typical usage with a quick example. We're going to use the [CartPole environment](https://gymnasium.farama.org/environments/classic_control/cart_pole/), which is one of the classic control problems available in the OpenAI-Gym framework.\n", + "Let's start by installing the dependencies that we require:\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "E0sfte45O8iN", + "outputId": "2bb75d65-ae6d-4559-f950-66d462bb3fd0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: gymnasium==0.28.1 in /usr/local/lib/python3.10/dist-packages (0.28.1)\n", + "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium==0.28.1) (1.25.0)\n", + "Requirement already satisfied: jax-jumpy>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium==0.28.1) (1.0.0)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium==0.28.1) (2.2.1)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium==0.28.1) (4.7.1)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium==0.28.1) (0.0.4)\n" + ] + } + ], + "source": [ + "!pip install gymnasium==0.28.1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RnmDsSAnM_F-" + }, + "source": [ + "**Cartpole environment description:**\n", + "\n", + "\"*A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces in the left (-1) and right (+1) direction on the cart. A reward of +1 is provided for every timestep that the pole remains upright.*\"\n", + "\n", + "\n", + "You can also check out the physics of the environment in the [GitHub repository of OpenAI-Gym](https://github.com/openai/gym).\n", + "See below an example of the evolution of an episode of the Cartpole environment. Note that most of the Gym envronments can be rendered to show how the RL agent is performing.\n", + "\n", + "![Cartpole](https://cdn-images-1.medium.com/max/1143/1*h4WTQNVIsvMXJTCpXm_TAw.gif)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eMa2F3q_pV-C" + }, + "source": [ + "First, we are going to import `gym` and then `make` our Cartpole environment (version 1). Note how it is possible to instantiate the registered Gym environments by referring to their names with a string.\n", + "After that, we are going to interact with the environment for a maximum number of episodes of experience that we are going to indicate with `max_num_episodes`. In each episode, the environment is initialized with the `reset` method, and then we interact with the environment until the episode is `done`, which happens when the pole is down.\n", + "\n", + "It is important to note that in this example we are not implementing RL just yet. Instead, we are using the `sample` method from the action space of the environment to compute a random control action. This is useful when we want to quickly check how an environment behaves, but we should aim to replace that line by some intelligent RL agent able to compute a control action that optimizes the performance of the environment." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LBxXhZc5nGb3", + "outputId": "85dc735f-6580-461e-d676-d13735bfa70d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Episode #1 had 24 steps and total_reward=24.0\n", + "\n", + " Episode #2 had 13 steps and total_reward=13.0\n", + "\n", + " Episode #3 had 12 steps and total_reward=12.0\n", + "\n", + " Episode #4 had 27 steps and total_reward=27.0\n", + "\n", + " Episode #5 had 30 steps and total_reward=30.0\n", + "\n", + " Episode #6 had 38 steps and total_reward=38.0\n", + "\n", + " Episode #7 had 34 steps and total_reward=34.0\n", + "\n", + " Episode #8 had 22 steps and total_reward=22.0\n", + "\n", + " Episode #9 had 25 steps and total_reward=25.0\n", + "\n", + " Episode #10 had 23 steps and total_reward=23.0\n" + ] + } + ], + "source": [ + "import gymnasium as gym\n", + "\n", + "env = gym.make('CartPole-v1')\n", + "max_num_episodes = 10 # maximum number of episodes\n", + "\n", + "for episode in range(max_num_episodes):\n", + " done = False\n", + " obs = env.reset()\n", + " total_reward = 0.0\n", + " step = 0\n", + " while not done:\n", + " action = env.action_space.sample() # Compute random action. This is to be replaced by a RL algo\n", + " obs,reward,terminated,truncated,info = env.step(action) # send the action to the environment\n", + " done = (terminated or truncated)\n", + " total_reward += reward\n", + " step += 1\n", + "\n", + " print('\\n Episode #{} had {} steps and total_reward={}'.format(episode+1,step,total_reward))\n", + "\n", + "env.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ai9dHGWksZeu" + }, + "source": [ + "Notice how every episode lasts for a different number of steps because we are applying random forces to the cart. Also, notice how the total reward of each episode is equal to the number of steps, because the Cartpole environment gives a reward of +1 every timestep that we get to maintain the pole upright.\n", + "\n", + "\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tpeeOA8BM-5L" + }, + "source": [ + "# **Part 3: The Building Optimization Testing (BOPTEST) framework** 🏠 " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z0ry1NQwuMXa" + }, + "source": [ + "Now that we understand how RL and OpenAI-Gym work, let's use that knowledge for the particular application of buildings.\n", + "In this tutorial we are going to connect with a BOPTEST building emulator that we will use as our environment to control through RL.\n", + "This emulator is a simulation model that was configured based on detailed physics and that has been peer-reviewed to ensure that it represents the behavior of an actual building as realistically as possible. Hence, although it is a simulation model, we are going to consider this emulator as the real building for control, testing and benchmarking.\n", + "\n", + "In this section we explain what BOPTEST is and how it can be generally used. Next section will exclusively focus on BOPTEST-Gym, the OpenAI-Gym interface of BOPTEST, to learn how we can use it to implement and assess RL algorithms for building control." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OcPk7llkJP4m" + }, + "source": [ + "## **What is BOPTEST?** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HtDrzTFuJU0e" + }, + "source": [ + "BOPTEST is a software framework enables the performance evaluation and benchmarking of advanced control algorithms for building HVAC control through simulations. The software is developed and is available on the BOPTEST GitHub respository at [https://github.com/ibpsa/project1-boptest](https://github.com/ibpsa/project1-boptest)\n", + "\n", + "and general information about BOPTEST can be found through the following link:\n", + "\n", + "[https://ibpsa.github.io/project1-boptest/](https://ibpsa.github.io/project1-boptest/)\n", + "\n", + "In the link below you can also find information about the overarching project that gave birth to BOPTEST, IBPSA Project 1:\n", + "\n", + "[https://ibpsa.github.io/project1/](https://ibpsa.github.io/project1/)\n", + "\n", + "\n", + "\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sNiHr2w0IFYI" + }, + "source": [ + "\n", + "\n", + "*Figure: The BOPTEST concept.*\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Jj7kbbF8JXEG" + }, + "source": [ + "The main use case of the BOPTEST framework is the one where a control developer wants to evaluate the performance of his/her building control strategy. Testing in a real building may be very expensive, or just not possible. BOPTEST offers a menu of emulator building models so that the control developer can select one of them, interact in co-simulation, and eventually assess the performance of his/her controller with a set of Key Performance Indicators (KPIs) that are calculated by the BOPTEST framework. \n", + "\n", + "Note that using a standardized set of building emulators, testing scenarios, and KPIs enables benchmarking, allows to compare across different controllers, and throws light on what are the best practices for building control. In addition, making these emulators easily and rapidly available to use allows for control developers without expertise in building modeling to utilize them for controls testing and evaluation.\n", + "\n", + "In this section we are going to explain the basic BOPTEST functionality to connect to a building test case and obtain available control inputs and measurement points. For a more complete description on how to use BOPTEST please visit this [BOPTEST Colab tutorial](https://github.com/ibpsa/project1-boptest/blob/master/docs/workshops/BS21Workshop_20210831/Introduction_to_the_BOPTEST_framework.ipynb). " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l5Fgw7eJHEjy" + }, + "source": [ + "## **Selecting a building test case** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "owb2Z2rqHEjz" + }, + "source": [ + "BOPTEST test cases are developed as [Functional Mock-up Units (FMU's)](https://fmi-standard.org/) and deployed within a containerized environment using the [Docker](https://www.docker.com/) software with:\n", + "\n", + "* A detailed emulator **building model**.\n", + "* Yearly **boundary condition data** for weather, schedules, pricing, and emission factors. These data are representative of the building location.\n", + "* An **API** that allows for, among other things, initializing a simulation or testing scenario, advancing a simulation with a control input, receiving forecast data, receiving emulator data, and receiving computed KPIs. The full API is described [here](https://github.com/ibpsa/project1-boptest/tree/boptest-service#test-case-restful-api).\n", + "\n", + "The basic workflow to test a controller is:\n", + "\n", + "1. Select a **test case** from the menu of those available.\n", + "2. Select one of the **testing scenarios** defined for the given test case. Testing scenarios are standardized for each emulator.\n", + "3. Set **parameters** for the interaction with your test controller, such as the control step or forecast horizon, if required. \n", + "4. Run the test case scenario in a **co-simulation** loop with your controller.\n", + "5. Request the KPIs and **evaluate** your controller's performance.\n", + "\n", + "We start by selecting and launching a BOPTEST building test case from the [repository of currently available test cases](https://ibpsa.github.io/project1-boptest/testcases/index.html). In this example, we are going to work with the test case called `bestest_hydronic_heat_pump`, which is a single-zone residential building with radiant floor heating and a heat pump. This is a high-fidelity, yet, relatively simple test case that allows us to focus on fundamental aspects. You may want to note the other test cases available in the repository as well as the fact that there are more under development. \n", + "\n", + "We can launch our chosen test case as follows. First, import the Python `requests` library so that we can make HTTP requests to the BOPTEST API at the address indicated by the `url`. Then, use the `POST /testcases//select` BOPTEST API endpoint to launch the test case and receive a corresponding `testid`. While the `url` is the common gateway for everyone to access the BOPTEST web-service, the `testid` is a unique identifier for you to address the test case that you have selected and launched." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "V_qU6ukZghTb" + }, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "# url for the BOPTEST service\n", + "url = 'https://api.boptest.net'\n", + "\n", + "# Select test case and get identifier\n", + "testcase = 'bestest_hydronic_heat_pump'\n", + "\n", + "# Check if already started a test case and stop it if so before starting another\n", + "try:\n", + " requests.put('{0}/stop/{1}'.format(url, testid))\n", + "except:\n", + " pass\n", + "\n", + "# Select and start a new test case\n", + "testid = \\\n", + "requests.post('{0}/testcases/{1}/select'.format(url,testcase)).json()['testid']\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eRZGKWDlHEj2" + }, + "source": [ + "Please do not get distracted by the `try-except` statement. We are using that one to stop already created test cases if we are revisiting this cell. This prevents from having several dangling test cases that can overwhelm our server.\n", + "\n", + "Once you have successfully obtained the `testid`, it is possible to start interacting with your selected test case using the rest of the BOPTEST API. You will need this `testid` for all further interactions with this test case. For example, use the `GET /name` BOPTEST API endpoint, along with your `testid`, to request the name of your test case and check that it matches the one we want." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8mdK5JtNI-e_", + "outputId": "e7ab2747-b9e7-4678-b3e9-cf36ee9adc06" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'name': 'bestest_hydronic_heat_pump'}\n" + ] + } + ], + "source": [ + "# Get test case name\n", + "name = requests.get('{0}/name/{1}'.format(url, testid)).json()['payload']\n", + "print(name)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gUOQXYjlHEj3" + }, + "source": [ + "With our unique `testid` in-hand and having some practice using the BOPTEST API, we are ready to move on to start using our building emulator. For this tutorial, we are going to explain only how to obtain information about the building using the BOPTEST API before moving to learn BOPTEST-Gym.\n", + "Note that the test case will timeout after 15 minutes of no requests. If the test case times out, you can simply select and start a new one by repeating the steps described above.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jmglIZGFHEj3" + }, + "source": [ + "## **Obtaining general information about the building** \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mJ6leLGvRJya" + }, + "source": [ + "The first thing we want to do is learn about the building and system that we want to control. All building information can be found under documentation provided for each specific test case on the [Test Cases tab](https://ibpsa.github.io/project1-boptest/testcases/index.html) of the BOPTEST website.\n", + "\n", + "The building information includes a description of the building envelope, the HVAC system design, the functioning of the baseline controller, available control inputs and measurement outputs, and available testing scenarios. Understanding how the system works is an important practice for control design, so take as much time as needed to understand the equipment, the points that can be measured, and the points that can be overwritten by your controller.\n", + "We briefly summarize the `bestest_hydronic_heat_pump` case here for completeness, but it is strongly recommended to have a deeper look into the [documentation](https://ibpsa.github.io/project1-boptest/testcases/ibpsa/testcases_ibpsa_bestest_hydronic_heat_pump/).\n", + "\n", + "The building represents a residential dwelling of 192 $m^2$ for a family of 5 members.\n", + "An air-to-water modulating heat pump of 15 $kW$ nominal heating capacity extracts energy from the ambient air to heat up the floor heating emission system, as shown in the figure below.\n", + "An evaporator fan blows ambient air through the heat pump evaporator when the heat pump is operating.\n", + "The floor heating system injects heat into the floor using water as the working fluid." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SMQcNDl1HEj4" + }, + "source": [ + "\n", + "\n", + "\n", + "*Figure: Schematic of HVAC system and control for the `bestest_hydronic_heat_pump` test case.*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lNLRXp2eHEj4" + }, + "source": [ + "A baseline controller is embedded in every test case emulator that is meant to be representative of a typical controller for that type of building. The baseline controller includes local loop control such that supervisory set points may be the focus of a test controller, although many of those local loop control signals are also available for overwriting if a user chooses. The baseline controller can also be considered an initial benchmark for control performance.\n", + "\n", + "In our selected test case, the baseline controller consists of a PI controller with the zone operative temperature as the controlled variable and the heat pump modulation signal for compressor frequency as the control variable, as depicted as C1 in the figure above and shown in the figure below.\n", + "The control variable is limited between 0 and 1, and it is computed to drive the zone operative temperature towards its set point, which is defined as a function of the occupancy schedule." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RqVtoDgTHEj4" + }, + "source": [ + "\n", + "\n", + "\n", + "*Figure: Primary PI controller C1.*\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ryOicW_KHEj5" + }, + "source": [ + "All other equipment (fan for the heat pump evaporator circuit and floor heating emission system pump) are switched on when the heat pump is working (modulating signal higher than 0) and switched off otherwise. This is depicted in the figure of the HVAC schematic as controller C2." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-J5j60bRHEj5" + }, + "source": [ + "## **Getting control input and measurement points** \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AGG7G6VeR4WB" + }, + "source": [ + "While control input and measurement points are described in the documentation, they are also available to retreive from the BOPTEST API. This is especially useful to store for later when requesting data for a specific point.\n", + "\n", + "Retrieve the control input and measurement outputs using the `GET /inputs` and `GET /measurements` BOPTEST API endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0IKRxBykJY6u", + "outputId": "536ba021-d0f6-4d9f-f4a5-8eda9fc16541" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TEST CASE INPUTS ---------------------------------------------\n", + "dict_keys(['oveTSet_activate', 'ovePum_activate', 'ovePum_u', 'oveHeaPumY_u', 'oveTSet_u', 'oveHeaPumY_activate', 'oveFan_activate', 'oveFan_u'])\n", + "TEST CASE MEASUREMENTS ---------------------------------------\n", + "dict_keys(['weaSta_reaWeaPAtm_y', 'reaPFan_y', 'reaQHeaPumCon_y', 'reaTRet_y', 'weaSta_reaWeaNOpa_y', 'weaSta_reaWeaTBlaSky_y', 'reaQHeaPumEva_y', 'weaSta_reaWeaNTot_y', 'weaSta_reaWeaSolAlt_y', 'reaTZon_y', 'weaSta_reaWeaHHorIR_y', 'weaSta_reaWeaLon_y', 'weaSta_reaWeaSolTim_y', 'weaSta_reaWeaCloTim_y', 'reaPPumEmi_y', 'weaSta_reaWeaHGloHor_y', 'weaSta_reaWeaHDifHor_y', 'weaSta_reaWeaRelHum_y', 'reaTSetHea_y', 'reaCO2RooAir_y', 'weaSta_reaWeaSolDec_y', 'reaPHeaPum_y', 'weaSta_reaWeaHDirNor_y', 'reaTSetCoo_y', 'weaSta_reaWeaWinDir_y', 'reaTSup_y', 'weaSta_reaWeaSolZen_y', 'reaQFloHea_y', 'reaCOP_y', 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaTWetBul_y', 'weaSta_reaWeaTDewPoi_y', 'weaSta_reaWeaWinSpe_y', 'weaSta_reaWeaLat_y', 'weaSta_reaWeaCeiHei_y', 'weaSta_reaWeaSolHouAng_y'])\n" + ] + } + ], + "source": [ + "# Get inputs available\n", + "inputs = requests.get('{0}/inputs/{1}'.format(url, testid)).json()['payload']\n", + "print('TEST CASE INPUTS ---------------------------------------------')\n", + "print(inputs.keys())\n", + "# Get measurements available\n", + "print('TEST CASE MEASUREMENTS ---------------------------------------')\n", + "measurements = requests.get('{0}/measurements/{1}'.format(url, testid)).json()['payload']\n", + "print(measurements.keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A4L6Gw6YJU5L" + }, + "source": [ + "The naming convention is such that the extension `_y` indicates a measurement point, `_u` indicates the value of an input which can be overwritten by a test controller, and `_activate` indicates the enabling (with value 0 or 1) of a test controller to overwrite the corresponding input value.\n", + "Hence, `_u` is enabled for overwriting by the test controller when `_activate=1`.\n", + "`weaSta_` indicates a measurement for a weather point, so that historical weather data can be easily retrieved.\n", + "\n", + "Notice that the jsons returned from the `GET /inputs` and `GET /measurements` BOPTEST API endpoints also include a description and unit of each variable, as well as the minimum and maximum value for inputs variables:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U7guJ_I10QOF" + }, + "source": [ + "Now let's stop the test case since we are not going to use it for a while. We do this to not overwhelm the server." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "v5_1Q_H80Z5k", + "outputId": "95577327-d7d9-4870-893b-7bdc1e164ac8" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "requests.put('{0}/stop/{1}'.format(url, testid))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UHYHM9MjSz_C" + }, + "source": [ + "# **Part 4: Implementing RL for a building with BOPTEST-Gym** 🤖 🏠 " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BEC76h9HT7gL" + }, + "source": [ + "## **What is BOPTEST-Gym?** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z7A9k7GFUBsK" + }, + "source": [ + "BOPTEST-Gym is the OpenAI-Gym interface of BOPTEST that helps to train RL agents for the application of building climate control.\n", + "The BOPTEST-Gym interface accomodates the BOPTEST API to have BOPTEST building emulators as environments that follow the OpenAI-Gym standard.\n", + "Therefore, the BOPTEST-Gym interface facilitates the development of RL agents as it allows interacting with the BOPTEST building emulators with a standard that is very well known by the machine learning community. Or even better, it allows us to directly use existing RL agents that have been developed following this standard, like those from the [Stable Baselines 3](https://stable-baselines3.readthedocs.io/en/master/) repository.\n", + "\n", + "You can find more information about BOPTEST-Gym in [this paper](https://publications.ibpsa.org/conference/paper/?id=bs2021_30380), but here we summarize the main points you should know:\n", + "- BOPTEST-Gym enables the interaction of RL agents with a set of physics-based and highly **detailed building models** to assess RL for the application of building climate control.\n", + "- All **hyperparameters** of the environment are initialized when the environment is instantiated. A particularly relevant hyperparameter is `testcase`, a string specifying the BOPTEST emulator of choice. This string selects the building model from the [menu of BOPTEST building emulators](https://ibpsa.github.io/project1-boptest/testcases/index.html).\n", + "- The **state** of any building emulator environment can have a *time* component e.g. a weekly schedule, a *measurement* component with a subset (or all) measurements available in the building, and an *exogenous* component including disturbances of any kind of boundary condition data to the building such as electricity prices, ambient temperature, or temperature set-points.\n", + "- The **action** space is defined based on any subset (or all) inputs available to the emulator. These can be either building set-points, like zone\n", + "operative temperature set-points, or lower level actuator signals, such as heat\n", + "pump modulating signal or a pump stage.\n", + "- The **`reset()`** method is called at the beginning of every episode to return the environment to a logical initial state.\n", + "- The **`step()`** method is called every time step to take the action computed by the RL agent, overwrite the building inputs with the vector of action values and advance the building simulation model during one time step period. BOPTEST-Gym also has wrappers for discretization of the state and action spaces. This functionality comes in handy when training RL agents.\n", + "- A default **reward** function is implemented in the `compute_reward` method of the BOPTEST-Gym environment that can be overwritten. It is convenient to use the BOPTEST `/kpis` API to obtain the KPI values at the present time for defining custom reward functions." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kvMgiRhLX2i8" + }, + "source": [ + "## **Starting up a BOPTEST-Gym environment** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WcrOX1Z_UvTY" + }, + "source": [ + "BOPTEST-Gym uses RL algorithms from the [Stable Baselines 3](https://stable-baselines3.readthedocs.io/en/master/) repository to exemplify and test its functionality. Therefore, we need to install stable-baselines3.\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jpZk6qJKTuYl", + "outputId": "1246faeb-3e8e-47e9-cb19-dcb2756c921f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: stable-baselines3==2.0.0 in /usr/local/lib/python3.10/dist-packages (2.0.0)\n", + "Requirement already satisfied: numpy==1.25.0 in /usr/local/lib/python3.10/dist-packages (1.25.0)\n", + "Requirement already satisfied: gymnasium==0.28.1 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3==2.0.0) (0.28.1)\n", + "Requirement already satisfied: torch>=1.11 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3==2.0.0) (2.0.1+cu118)\n", + "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from stable-baselines3==2.0.0) (2.2.1)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from stable-baselines3==2.0.0) (1.5.3)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from stable-baselines3==2.0.0) (3.7.1)\n", + "Requirement already satisfied: jax-jumpy>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium==0.28.1->stable-baselines3==2.0.0) (1.0.0)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium==0.28.1->stable-baselines3==2.0.0) (4.7.1)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium==0.28.1->stable-baselines3==2.0.0) (0.0.4)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.11->stable-baselines3==2.0.0) (3.12.2)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.11->stable-baselines3==2.0.0) (1.11.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11->stable-baselines3==2.0.0) (3.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11->stable-baselines3==2.0.0) (3.1.2)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11->stable-baselines3==2.0.0) (2.0.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.11->stable-baselines3==2.0.0) (3.25.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.11->stable-baselines3==2.0.0) (16.0.6)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3==2.0.0) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3==2.0.0) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3==2.0.0) (4.40.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3==2.0.0) (1.4.4)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3==2.0.0) (23.1)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3==2.0.0) (8.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3==2.0.0) (3.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3==2.0.0) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->stable-baselines3==2.0.0) (2022.7.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->stable-baselines3==2.0.0) (1.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.11->stable-baselines3==2.0.0) (2.1.3)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.11->stable-baselines3==2.0.0) (1.3.0)\n" + ] + } + ], + "source": [ + "!pip install stable-baselines3==2.0.0 numpy==1.25.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4ljXzd7W4R0H" + }, + "source": [ + "Now that we have all package dependencies, let's clone the BOPTEST-Gym repository. We are going to clone the `boptest-gym-service` branch which works in the same way as the `master` branch but allows us to directly use the web-based version of BOPTEST that is readily available such that we do not have to deploy the building test case Docker containers locally.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2adMIlJu_ZZ8", + "outputId": "cba698ae-874e-427c-8284-393621d85a11" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into 'boptestGymService'...\n", + "remote: Enumerating objects: 3305, done.\u001b[K\n", + "remote: Counting objects: 100% (851/851), done.\u001b[K\n", + "remote: Compressing objects: 100% (427/427), done.\u001b[K\n", + "remote: Total 3305 (delta 456), reused 764 (delta 378), pack-reused 2454\u001b[K\n", + "Receiving objects: 100% (3305/3305), 47.56 MiB | 17.14 MiB/s, done.\n", + "Resolving deltas: 100% (1733/1733), done.\n" + ] + } + ], + "source": [ + "try:\n", + " !rm -rf boptestGymService\n", + "except:\n", + " pass\n", + "!git clone -b boptest-gym-service https://github.com/ibpsa/project1-boptest-gym.git boptestGymService" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cs9guwYo5w50" + }, + "source": [ + "Now we move our working directory to our recently cloned repository, import the `BoptestGymEnv` class, and instantiate our first BOPTEST-Gym environment!" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mZsXUZIQ5iIj", + "outputId": "0f249174-2535-4b03-f372-09fc7b2e2ae0" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/gymnasium/spaces/box.py:130: UserWarning: \u001b[33mWARN: Box bound precision lowered by casting to float32\u001b[0m\n", + " gym.logger.warn(f\"Box bound precision lowered by casting to {self.dtype}\")\n" + ] + } + ], + "source": [ + "import sys\n", + "sys.path.insert(0,'boptestGymService')\n", + "from boptestGymEnv import BoptestGymEnv\n", + "\n", + "# Instantiate environment\n", + "env = BoptestGymEnv(url = url,\n", + " testcase = 'bestest_hydronic_heat_pump',\n", + " actions = ['oveHeaPumY_u'],\n", + " observations = {'reaTZon_y':(280.,310.)},\n", + " random_start_time = False,\n", + " start_time = 31*24*3600,\n", + " max_episode_length = 24*3600,\n", + " warmup_period = 24*3600,\n", + " step_period = 3600)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8XVI61rnU4QZ" + }, + "source": [ + "You have connected to a BOPTEST building emulator and wrapped it around a Gym environment. Let's examine more in detail the arguments that you have used:\n", + "- `url`: the domain where your test case lives. In this case it is the url to BOPTEST-service, but it could be your localhost if you decide to spin a test case in your machine using Docker.\n", + "- `testcase`: The string identifier of the testcase.\n", + "- `actions`: List of strings indicating the action space.\n", + "- `observations`: Dictionary mapping observation keys to a tuple with the lower and upper bound of each observation. These bounds define the typical operational range for discretization and normalization purposes. Observation keys must belong either to the set of measurements or to the set of forecasting variables of the BOPTEST test case.\n", + "- `max_episode_lenght`: Maximum duration of each episode in seconds.\n", + "- `random_start_time`: Set to True if desired to use a random start time for each episode. That is typically usefull when training an RL agent to run several episodes with different boundary condition data. In our case, we set it to False and specify the start time of the episode.\n", + "- `start_time`: start time of the episode. It is specified in seconds from the beginning of the year. To be used in combination with `random_start_time=False`. \n", + "- `warmup_period`: Desired simulation period to initialize each episode, in seconds. In our case, we simulate the testcase for one day right before the beginning of the episode.\n", + "- `step_period`: The period of each control step, in seconds. In this case is set to one hour." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-ZcNOH0SYEiR" + }, + "source": [ + "Now you can interact with the building emulator following the Gym standard. Everytime you use one of the methods of your environment, BOPTEST-Gym will send the associated commands through the BOPTEST API that you have learned above as to provide the desired functionality. A schematic of this process is shown in the figure below. This figure illustrates the typical steps that take place when training an agent and the mapping between the BOPTEST-Gym interface and the BOPTEST API. It is important to note that a state can be returned not only with current measurements, but also with boundary condition forecast or regressive values." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rcnaIJvhYDa5" + }, + "source": [ + "\n", + "\n", + "*Figure: Sequence diagram for training an agent withthe BOPTEST-Gym environment.*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AZVIz69qXyCZ" + }, + "source": [ + "## **Interacting with a BOPTEST-Gym environment** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dA9_wVo8bMxr" + }, + "source": [ + "Let's see what we can do with our building Gym environment. Recall that the first step is using the `reset` method to simulate the building right before the episode start time a time period specified in `warmup_period`. This will bring the building to a reasonable initial state and the environment will return an observation `obs` which, in our case, it is comprised of only the zone operative temperature (`reaTZon_y`). This temperature is in Kelvins, so we convert it to degrees Celsius." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Z4n5GsjXV08x", + "outputId": "d4cc4181-d8e8-418b-8f40-f137ab122002" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Zone temperature: 21.37 degC\n", + "Episode starting day: 31.0 (from beginning of the year)\n" + ] + } + ], + "source": [ + "obs, _ = env.reset()\n", + "print('Zone temperature: {:.2f} degC'.format(obs[0]-273.15))\n", + "print('Episode starting day: {:.1f} (from beginning of the year)'.format(env.start_time/24/3600))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IeJyBvLYqC11" + }, + "source": [ + "📌 **Note: About initialization**\n", + "\n", + "The initial state in the emulator consists of all states after simulation during the warmup period without any external input from an external controller. This particular emulator has 63 continuous time states comprising temperatures of walls, floor, roof, water, etc. During the warmup period, the baseline controller embedded in the emulator is used. After initialization the baseline controller will also work at any time unless some of the control variables are intentionally overwritten by an external controller." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9i9VfDrdYJ0e" + }, + "source": [ + "We can inspect the observation and action space of any environment as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a_PC0YAEYR5U", + "outputId": "fd621a64-2550-4a61-f5c4-d38a28cd06b7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Observation space of the building environment:\n", + "Box(280.0, 310.0, (1,), float32)\n", + "Action space of the building environment:\n", + "Box(0.0, 1.0, (1,), float32)\n" + ] + } + ], + "source": [ + "print('Observation space of the building environment:')\n", + "print(env.observation_space)\n", + "print('Action space of the building environment:')\n", + "print(env.action_space)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SBVCnncbePIQ" + }, + "source": [ + "So this environment has a Box (continuous and bounded) observation space which is the indoor building temperature. The operational range of this variable goes from $280$ $K$ to $310$ $K$. That is, from ~$7$ $°C$ to $37$ $°C$. On the other hand, the action space is a continuous variable that goes from $0$ to $1$. The latter variable represents the heat pump compressor frequency with $0$ meaning no heating, and $1$ meaning the heat pump working at full capacity. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pkx5Os6-Yltb" + }, + "source": [ + "But actually, the BOPTEST-Gym environment can be directly printed to show a lot of useful information to control the building:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FGzL_ZskfoyO", + "outputId": "82b22b21-7480-4cc8-f2d4-fc9122d34332" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "========================\n", + "BOPTEST CASE INFORMATION\n", + "========================\n", + "\n", + "Test case name\n", + "--------------\n", + "{'name': 'bestest_hydronic_heat_pump'}\n", + "\n", + "All measurement variables\n", + "-------------------------\n", + "{'reaCO2RooAir_y': {'Description': 'CO2 concentration in the zone',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'ppm'},\n", + " 'reaCOP_y': {'Description': 'Heat pump COP',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': '1'},\n", + " 'reaPFan_y': {'Description': 'Electrical power of the heat pump evaporator '\n", + " 'fan',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'W'},\n", + " 'reaPHeaPum_y': {'Description': 'Heat pump electrical power',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'W'},\n", + " 'reaPPumEmi_y': {'Description': 'Emission circuit pump electrical power',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'W'},\n", + " 'reaQFloHea_y': {'Description': 'Floor heating thermal power released to the '\n", + " 'zone',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'W'},\n", + " 'reaQHeaPumCon_y': {'Description': 'Heat pump thermal power exchanged in the '\n", + " 'condenser',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'W'},\n", + " 'reaQHeaPumEva_y': {'Description': 'Heat pump thermal power exchanged in the '\n", + " 'evaporator',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'W'},\n", + " 'reaTRet_y': {'Description': 'Return water temperature from radiant floor',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'K'},\n", + " 'reaTSetCoo_y': {'Description': 'Zone operative temperature setpoint for '\n", + " 'cooling',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'K'},\n", + " 'reaTSetHea_y': {'Description': 'Zone operative temperature setpoint for '\n", + " 'heating',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'K'},\n", + " 'reaTSup_y': {'Description': 'Supply water temperature to radiant floor',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'K'},\n", + " 'reaTZon_y': {'Description': 'Zone operative temperature',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'K'},\n", + " 'weaSta_reaWeaCeiHei_y': {'Description': 'Cloud cover ceiling height '\n", + " 'measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'm'},\n", + " 'weaSta_reaWeaCloTim_y': {'Description': 'Day number with units of seconds',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 's'},\n", + " 'weaSta_reaWeaHDifHor_y': {'Description': 'Horizontal diffuse solar radiation '\n", + " 'measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'W/m2'},\n", + " 'weaSta_reaWeaHDirNor_y': {'Description': 'Direct normal radiation '\n", + " 'measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'W/m2'},\n", + " 'weaSta_reaWeaHGloHor_y': {'Description': 'Global horizontal solar '\n", + " 'irradiation measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'W/m2'},\n", + " 'weaSta_reaWeaHHorIR_y': {'Description': 'Horizontal infrared irradiation '\n", + " 'measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'W/m2'},\n", + " 'weaSta_reaWeaLat_y': {'Description': 'Latitude of the location',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'rad'},\n", + " 'weaSta_reaWeaLon_y': {'Description': 'Longitude of the location',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'rad'},\n", + " 'weaSta_reaWeaNOpa_y': {'Description': 'Opaque sky cover measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': '1'},\n", + " 'weaSta_reaWeaNTot_y': {'Description': 'Sky cover measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': '1'},\n", + " 'weaSta_reaWeaPAtm_y': {'Description': 'Atmospheric pressure measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'Pa'},\n", + " 'weaSta_reaWeaRelHum_y': {'Description': 'Outside relative humidity '\n", + " 'measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': '1'},\n", + " 'weaSta_reaWeaSolAlt_y': {'Description': 'Solar altitude angle measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'rad'},\n", + " 'weaSta_reaWeaSolDec_y': {'Description': 'Solar declination angle measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'rad'},\n", + " 'weaSta_reaWeaSolHouAng_y': {'Description': 'Solar hour angle measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'rad'},\n", + " 'weaSta_reaWeaSolTim_y': {'Description': 'Solar time',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 's'},\n", + " 'weaSta_reaWeaSolZen_y': {'Description': 'Solar zenith angle measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'rad'},\n", + " 'weaSta_reaWeaTBlaSky_y': {'Description': 'Black-body sky temperature '\n", + " 'measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'K'},\n", + " 'weaSta_reaWeaTDewPoi_y': {'Description': 'Dew point temperature measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'K'},\n", + " 'weaSta_reaWeaTDryBul_y': {'Description': 'Outside drybulb temperature '\n", + " 'measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'K'},\n", + " 'weaSta_reaWeaTWetBul_y': {'Description': 'Wet bulb temperature measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'K'},\n", + " 'weaSta_reaWeaWinDir_y': {'Description': 'Wind direction measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'rad'},\n", + " 'weaSta_reaWeaWinSpe_y': {'Description': 'Wind speed measurement',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': 'm/s'}}\n", + "\n", + "All forecasting variables\n", + "-------------------------\n", + "['winDir',\n", + " 'TDewPoi',\n", + " 'LowerSetp[1]',\n", + " 'PriceElectricPowerConstant',\n", + " 'UpperSetp[1]',\n", + " 'PriceElectricPowerHighlyDynamic',\n", + " 'solTim',\n", + " 'solHouAng',\n", + " 'nOpa',\n", + " 'InternalGainsRad[1]',\n", + " 'nTot',\n", + " 'HGloHor',\n", + " 'winSpe',\n", + " 'TBlaSky',\n", + " 'solDec',\n", + " 'lon',\n", + " 'PriceElectricPowerDynamic',\n", + " 'HDifHor',\n", + " 'InternalGainsCon[1]',\n", + " 'solZen',\n", + " 'HHorIR',\n", + " 'relHum',\n", + " 'pAtm',\n", + " 'Occupancy[1]',\n", + " 'ceiHei',\n", + " 'lat',\n", + " 'InternalGainsLat[1]',\n", + " 'TWetBul',\n", + " 'TDryBul',\n", + " 'HDirNor',\n", + " 'EmissionsElectricPower',\n", + " 'cloTim',\n", + " 'solAlt',\n", + " 'UpperCO2[1]']\n", + "\n", + "All input variables\n", + "-------------------\n", + "{'oveFan_activate': {'Description': 'Activation for Integer signal to control '\n", + " 'the heat pump evaporator fan either on or '\n", + " 'off',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': None},\n", + " 'oveFan_u': {'Description': 'Integer signal to control the heat pump '\n", + " 'evaporator fan either on or off',\n", + " 'Maximum': 1,\n", + " 'Minimum': 0,\n", + " 'Unit': '1'},\n", + " 'oveHeaPumY_activate': {'Description': 'Activation for Heat pump modulating '\n", + " 'signal for compressor speed between 0 '\n", + " '(not working) and 1 (working at '\n", + " 'maximum capacity)',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': None},\n", + " 'oveHeaPumY_u': {'Description': 'Heat pump modulating signal for compressor '\n", + " 'speed between 0 (not working) and 1 (working '\n", + " 'at maximum capacity)',\n", + " 'Maximum': 1,\n", + " 'Minimum': 0,\n", + " 'Unit': '1'},\n", + " 'ovePum_activate': {'Description': 'Activation for Integer signal to control '\n", + " 'the emission circuit pump either on or '\n", + " 'off',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': None},\n", + " 'ovePum_u': {'Description': 'Integer signal to control the emission circuit '\n", + " 'pump either on or off',\n", + " 'Maximum': 1,\n", + " 'Minimum': 0,\n", + " 'Unit': '1'},\n", + " 'oveTSet_activate': {'Description': 'Activation for Zone operative '\n", + " 'temperature setpoint',\n", + " 'Maximum': None,\n", + " 'Minimum': None,\n", + " 'Unit': None},\n", + " 'oveTSet_u': {'Description': 'Zone operative temperature setpoint',\n", + " 'Maximum': 308.15,\n", + " 'Minimum': 278.15,\n", + " 'Unit': 'K'}}\n", + "\n", + "Default simulation step (seconds)\n", + "---------------------------------\n", + "3600\n", + "\n", + "Default scenario\n", + "----------------\n", + "{'electricity_price': 'constant'}\n", + "\n", + "Test case scenario\n", + "------------------\n", + "{'electricity_price': 'constant'}\n", + "\n", + "===========================\n", + "GYM ENVIRONMENT INFORMATION\n", + "===========================\n", + "\n", + "Observation space\n", + "-----------------\n", + "Box(280.0, 310.0, (1,), float32)\n", + "\n", + "Action space\n", + "------------\n", + "Box(0.0, 1.0, (1,), float32)\n", + "\n", + "Is a regressive environment\n", + "---------------------------\n", + "False\n", + "\n", + "Is a predictive environment\n", + "---------------------------\n", + "False\n", + "\n", + "Regressive period (seconds)\n", + "---------------------------\n", + "None\n", + "\n", + "Predictive period (seconds)\n", + "---------------------------\n", + "None\n", + "\n", + "Measurement variables used in observation space\n", + "-----------------------------------------------\n", + "['reaTZon_y']\n", + "\n", + "Predictive variables used in observation space\n", + "----------------------------------------------\n", + "[]\n", + "\n", + "Sampling time (seconds)\n", + "-----------------------\n", + "3600\n", + "\n", + "Random start time\n", + "-----------------\n", + "False\n", + "\n", + "Excluding periods (seconds from the beginning of the year)\n", + "----------------------------------------------------------\n", + "None\n", + "\n", + "Warmup period for each episode (seconds)\n", + "----------------------------------------\n", + "86400\n", + "\n", + "Maximum episode length (seconds)\n", + "--------------------------------\n", + "86400\n", + "\n", + "Environment reward function (source code)\n", + "-----------------------------------------\n", + "(' def get_reward(self):\\n'\n", + " \" '''\\n\"\n", + " \" Compute the reward of last state-action-state' tuple. The \\n\"\n", + " ' reward is implemented as the negated increase in the objective\\n'\n", + " ' integrand function. In turn, this objective integrand function \\n'\n", + " ' is calculated as the sum of the total operational cost plus\\n'\n", + " ' the weighted discomfort. \\n'\n", + " ' \\n'\n", + " ' Returns\\n'\n", + " ' -------\\n'\n", + " ' Reward: float\\n'\n", + " \" Reward of last state-action-state' tuple\\n\"\n", + " ' \\n'\n", + " ' Notes\\n'\n", + " ' -----\\n'\n", + " ' This method is just a default method to compute reward. It can be \\n'\n", + " ' overridden by defining a child from this class with\\n'\n", + " ' this same method name, i.e. `get_reward`. If a custom reward \\n'\n", + " ' is defined, it is strongly recommended to derive it using the KPIs\\n'\n", + " ' as returned from the BOPTEST framework, as it is done in this \\n'\n", + " ' default `get_reward` method. This ensures that all variables \\n'\n", + " ' that may contribute to any KPI are properly accounted and \\n'\n", + " ' integrated. \\n'\n", + " ' \\n'\n", + " \" '''\\n\"\n", + " ' \\n'\n", + " ' # Define a relative weight for the discomfort \\n'\n", + " ' w = 1\\n'\n", + " ' \\n'\n", + " ' # Compute BOPTEST core kpis\\n'\n", + " ' kpis = '\n", + " \"requests.get('{0}/kpi/{1}'.format(self.url,self.testid)).json()['payload']\\n\"\n", + " ' \\n'\n", + " ' # Calculate objective integrand function at this point\\n'\n", + " \" objective_integrand = kpis['cost_tot'] + w*kpis['tdis_tot']\\n\"\n", + " ' \\n'\n", + " ' # Compute reward\\n'\n", + " ' reward = -(objective_integrand - self.objective_integrand)\\n'\n", + " ' \\n'\n", + " ' self.objective_integrand = objective_integrand\\n'\n", + " ' \\n'\n", + " ' return reward\\n')\n", + "\n", + "Environment hierarchy\n", + "---------------------\n", + "(,\n", + " ,\n", + " ,\n", + " )\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(env)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-KYf1BksgfQj" + }, + "source": [ + "Note that this descriptive summary provides information not only about the Gym environment but also all information about the original BOPTEST test case. This may be useful, for example, if we want to extend our observation space or if we want to change our control action." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zQ_y22lrg1cM" + }, + "source": [ + "BOPTEST-Gym comes along with other functionality that may be useful when training RL agents, like the capacity to discretize and normalize observation and action spaces. For instance, we are dealing now with continuous action environment meaning that the agent could decide to take any action between 0 and 1. However, it is probably helpful to the agent to decide on just whether the heating needs to be turned on (action=1) or off (action=0). For that, we can wrap our environment around a discretization wrapper with only one action bin (one bin has two extremes). The concept of wrappers is very powerful in Gym environments. With them, we are capable to customize observation, action, step function, etc. of an environment. No matter how many wrappers are applied, `env.unwrapped` always gives back the internal original environment object. Let's see how it works with BOPTEST-Gym:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zIqfeNwgh9VK", + "outputId": "bb7087fd-19f3-4eee-828c-859b2ec641d0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Action space of the wrapped agent:\n", + "Discrete(2)\n", + "Action space of the original agent:\n", + "Box(0.0, 1.0, (1,), float32)\n" + ] + } + ], + "source": [ + "from boptestGymEnv import DiscretizedActionWrapper\n", + "env = DiscretizedActionWrapper(env,n_bins_act=1)\n", + "print('Action space of the wrapped agent:')\n", + "print(env.action_space)\n", + "print('Action space of the original agent:')\n", + "print(env.unwrapped.action_space)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ghlx_zaf282q" + }, + "source": [ + "Another thing that we can do is to interact with the building environment for one episode of experience (one day). This is similar to what we did with the Cartpole example, but this time we are going to run just one episode and use a hysteresis controller that will turn on the heating the temperature is below a predefined temperature setpoint, and turn it off when the temperature goes above the setpoint. We first configure such controller:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "MrO0o7hNf5pB" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "np.set_printoptions(precision=3)\n", + "\n", + "class SimpleController(object):\n", + " '''Simple controller for this emulator.\n", + "\n", + " '''\n", + " def __init__(self, TSet=22+273.15):\n", + " self.TSet = TSet\n", + "\n", + " def predict(self, obs):\n", + " # Compute control\n", + " if obs[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JAIt_IfivAHN" + }, + "source": [ + "In this section we are going to develop a very simple RL agent based on the very well known *q-learning* algorithm. Although simple, this exercise will help us understand the main concepts of RL and how this machine learning technique can be helpful to mitigate climate change by enhancing building's operational efficiency. Recall that our objective is to develop an RL agent that can decide on the best action to take in each situation (each state) just from interactions with the environment (the building). Imagine we are at time $k$ in a certain state $\\pmb{s}$ and take an action $\\pmb{a}$. In return, we obtain a reward $r'$ the next time step and end up in a state $\\pmb{s}'$ :" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nxO3gVx3vqrL" + }, + "source": [ + "![](https://drive.google.com/file/d/1XVbDEiHT2fWIGtnPLE0uphC2hV5XubKc/view?usp=sharing)\n", + "\n", + "\n", + "\n", + "*Figure: The backup diagram. Edited version from the book of Richard S. Sutton and Andrew G. Barto* **[6]**." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XflFYx7lylyw" + }, + "source": [ + "In *q-learning* we aim to derive an *action-value function*, the q-function. The q-function indicates what is the **long-term** value of taking an action $a$ from a certain state $s$. With this information we not only have an estimation of the value of each state, but we can also decide to take the next action $\\pmb{a}'$ that leads to the highest value from the next state $\\pmb{s}'$. This principle relies on the so-called *Bellman optimality equation* that is presented below:\n", + "\n", + "\\begin{align}\n", + " q(\\pmb{s},\\pmb{a}) = r' + \\gamma \\max_{\\pmb{a}'} q(\\pmb{s}',\\pmb{a}')\n", + "\\end{align}\n", + "\n", + "This equation states that the total expected cummulative return of taking action $\\pmb{a}$ from state $\\pmb{s}$ equals the immediate reward $r'$ plus the maximum achievable reward that we can obtain from the following state $\\pmb{s}'$. Note that the q-function estimates the **TOTAL EXPECTED CUMULATIVE RETURN** of taking action $\\pmb{a}$ from state $\\pmb{s}$ (not just the immediate reward). So given the q-function we can know straight-away what is the best action to take for each state $\\pmb{s}$. You can imagine a q-function with one-dimensional state and action spaces as follows:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HBpa3qjuysK-" + }, + "source": [ + "\n", + "\n", + "\n", + "\n", + "*Figure: Example of how a q-function may look like for the case with one-dimensional state and action spaces. Note that, given the q-function, we can pick the action $a$ that leads to the highest expected cumulative reward $q_*$ from state $s$.*\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "db8AVf3GoCz9" + }, + "source": [ + "Powerful, right? now the question remains how to derive the q-function 😅.\n", + "\n", + "The q-function is inferred iteratively using the reward received by the agent each control step and bootstrapping with the Bellman optimality equation presented above. The sum of the immediate reward and the next-state q-function estimate is called the target. We use this target to recursively update the q-function at a learning rate $\\alpha$. The difference between the target and our current q-function estimate is called *temporal difference*. In summary, the q-learning method consists of recursively updating the q-function using the following formula:\n", + "\n", + "\\begin{align}\n", + " q(\\pmb{s},\\pmb{a}) = q(\\pmb{s},\\pmb{a}) + \\alpha [ \\underbrace{\\underbrace{r' + \\gamma \\max_{\\pmb{a}'} q(\\pmb{s}',\\pmb{a}')}_\\text{target} - q(\\pmb{s},\\pmb{a})}_\\text{temporal difference}]\n", + "\\end{align}\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qjBqNfXd_pY2" + }, + "source": [ + "So in summary, the agent observes the reward once it has taken an action from a state. It has to explore the rewards from different state-action pairs and update its q-function as it goes.\n", + "\n", + "In our example we are going to use tabular state and action spaces to expedite learning and to easily store and visualize the q-function. Note, however, that we could use general function approximators like neural networks to configure the q-function.\n", + "\n", + "📌 **Note: The exploration-exploitation dilema** ⚖️\n", + "\n", + "RL always faces the so-called exploration-exploitation dilema. That is, how much of what we have learned we should exploit and how much we should explore to find even better solutions? In our case, we implement an *Epsilon-greedy* approach to balance exploration and exploitation of the RL agent. That is, the agent sometimes picks a random action (exploration), and sometimes picks an \"intelligent\" action (exploitation). The frequency at which the agent picks a random action is determined by *Epsilon* (`eps`) and it follows a linearly decaying schedule." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZQ7Um2UtLHk4" + }, + "source": [ + "\n", + "\n", + "*Figure: The epsilon-greedy strategy for balancing exploration and exploitation.*\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6aOHW96nLqOZ" + }, + "source": [ + "Our `Q_Learning_Agent` consists of only three methods:\n", + "\n", + "- `__init__` ➡️ The constructor.\n", + "- `predict` ➡️ Method to decide on an action given an observation.\n", + "- `learn` ➡️ Method for learning with the q-learning method explained above.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "9U81QUVcUfoW" + }, + "outputs": [], + "source": [ + "class Q_Learning_Agent(object):\n", + "\n", + " def __init__(self, env, eps_min=0.01, eps_decay=0.01, alpha=0.05, gamma=0.9):\n", + " '''Constructor of a q-learning agent. Assumes discrete state and action spaces.\n", + "\n", + " '''\n", + " self.env = env\n", + " self.eps_min = eps_min\n", + " self.eps_decay = eps_decay\n", + " self.alpha = alpha\n", + " self.gamma = gamma\n", + "\n", + " # Initialize epsilon\n", + " self.eps = 1.0\n", + "\n", + " # Initialize q-function as a null function\n", + " self.q = np.zeros((env.observation_space.n,\n", + " env.action_space.n))\n", + "\n", + " def predict(self, obs, deterministic=True):\n", + " '''Method to select an action with an epsilon-greedy policy.\n", + "\n", + " '''\n", + " if deterministic:\n", + " # Use q-function to decide action\n", + " return np.argmax(self.q[obs])\n", + " else:\n", + " if self.eps > self.eps_min:\n", + " # Linearly decreasing schedule\n", + " self.eps -= self.eps_decay\n", + " if np.random.random() < self.eps:\n", + " # Explore with random action\n", + " return np.random.choice([a for a in range(env.action_space.n)])\n", + " else:\n", + " # Exploit the information of our q-function\n", + " return np.argmax(self.q[obs])\n", + "\n", + " def learn(self, total_episodes=10):\n", + " '''Learn from a number of interactions with the environment.\n", + "\n", + " '''\n", + " for i in range(total_episodes):\n", + " # Initialize enviornment\n", + " done = False\n", + " obs, _ = env.reset()\n", + " # Print episode number and starting day from beginning of the year:\n", + " print('-------------------------------------------------------------------')\n", + " print('Episode number: {0}, starting day: {1:.1f} ' \\\n", + " '(from beginning of the year)'.format(i+1, env.unwrapped.start_time/24/3600))\n", + "\n", + " while not done:\n", + " # Get action with epsilon-greedy policy and simulate\n", + " act = self.predict(obs, deterministic=False)\n", + " nxt_obs, rew, terminated, truncated, _ = env.step(act)\n", + " done = (terminated or truncated)\n", + " # Compute temporal difference target and error to udpate q-function\n", + " td_target = rew + self.gamma*np.max(self.q[nxt_obs])\n", + " td_error = td_target - self.q[obs][act]\n", + " self.q[obs][act] += self.alpha*td_error\n", + " # Make our next observation the current observation\n", + " obs = nxt_obs\n", + " # Print the q-function after every episode to show progress\n", + " print('q(s,a) = ')\n", + " print(self.q)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RWPbW8WKaQET" + }, + "source": [ + "## **Testing our RL algorithm in BOPTEST-Gym** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SV8bk8x75C_0" + }, + "source": [ + "Now that we have a RL agent ready, let's test it in BOPTEST-Gym! We are going to exploit the features of BOPTEST-Gym to:\n", + "\n", + "- Define a custom reward function of the enviornment.\n", + "- Instantiate the environment and define its state and action spaces.\n", + "- Train our RL agent.\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wy1TpSGEPxYr" + }, + "source": [ + "### Define a custom reward function of the environment" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J9jwn5BQQCyj" + }, + "source": [ + "The definition of the reward function is **KEY**🗝 since it is what drives the learning of an agent.\n", + "The `BoptestGymEnv` Class allows to override its `get_reward` method that is called every control step as to freely define any reward function of choice.\n", + "\n", + "In our example, the goal is to implement a RL agent to identify the actions that keep comfort inside the building, and we should encode our reward function accordingly. We could implement this function by integrating the temperature deviations out of the comfort range. However, this approach is error-prone. We typically want to directly use signals from the environment to define the reward, preferrably those that are directly related to the function we want to optimize so that we make sure we strive for the ground truth optimum. In BOPTEST we use the `GET /kpis` API to obtain the so-called core KPIs at the present time, which are:\n", + "\n", + "\n", + "* **Thermal discomfort**: reported with units of [$K \\, h/zone$], defines the cumulative deviation of zone temperatures from upper and lower comfort limits that are predefined within the test case FMU for each zone, averaged over all zones. Air temperature is used for air-based systems and operative temperature is used for radiant systems.\n", + "* **Indoor Air Quality (IAQ) Discomfort**: reported with units of [$ppm \\, h/zone$], defines the extent that the CO$_2$ concentration levels in zones exceed bounds of the acceptable concentration level, which are predefined within the test case FMU for each zone, averaged over all zones.\n", + "* **Energy Use**: reported with units of [$kWh/m^2$], defines the HVAC energy usage.\n", + "* **Cost**: reported with units of [USD/$m^2$] or [EUR/$m^2$], defines the operational cost associated with the HVAC energy usage.\n", + "* **Emissions**: reported with units of [$kg \\, CO_2/m^2$], defines the CO$_2$ emissions from the HVAC energy usage.\n", + "* **Computational time ratio**: defines the average ratio between the controller computation time and the test simulation control step. The controller computation time is measured as the time between two emulator advances.\n", + "\n", + "The time series graph below shows how thermal discomfort and energy use are computed by the BOPTEST `GET /kpis` API call.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZQagxsvtr3ow" + }, + "source": [ + "\n", + "\n", + "*Figure: Integration of thermal discomfort (top) and energy use (bottom). In BOPTEST, the `GET /kpis` API can directly return these values every control step. Note that the integration step is significantly smaller than the control step.*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "esAUAwHdr8y1" + }, + "source": [ + "The core KPIs are normally calculated at the end of the simulation to assess the controller performance, although they can be computed at any time. The warmup period is not taken into account for the calculation of the KPIs. See below how we define the `get_reward` method using the `GET /kpi`. Every control step we check whether there has been a discomfort increment. If there is not discomfort increment, we reward our agent with $1$, otherwise we return a $0$ (no reward). Clipping the reward is a good practice to accelerate learning." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "hTcc3XiVP-A6" + }, + "outputs": [], + "source": [ + "# Redefine reward function\n", + "class BoptestGymEnvCustomReward(BoptestGymEnv):\n", + " '''Define a custom reward for this building\n", + "\n", + " '''\n", + " def get_reward(self):\n", + " '''Custom reward function. To expedite learning, we use a clipped reward\n", + " function that has a value of 1 when there is no increase in discomfort\n", + " and 0 otherwise. We use the BOPTEST `GET /kpis` API call to compute the\n", + " total cummulative discomfort from the beginning of the episode. Note\n", + " that this is the true value that BOPTEST uses when evaluating\n", + " controllers.\n", + "\n", + " '''\n", + " # Compute BOPTEST core kpis\n", + " kpis = requests.get('{0}/kpi/{1}'.format(self.url, self.testid)).json()['payload']\n", + " # Calculate objective integrand function as the total discomfort\n", + " objective_integrand = kpis['tdis_tot']\n", + " # Give reward if there is not immediate increment in discomfort\n", + " if objective_integrand == self.objective_integrand:\n", + " reward=1\n", + " else:\n", + " reward=0\n", + " # Record current objective integrand for next evaluation\n", + " self.objective_integrand = objective_integrand\n", + " return reward" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2hpd_svcOhDy" + }, + "source": [ + "### Instantiate the environment and define its state and action spaces" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xszlVIQtOkiz" + }, + "source": [ + "Similarly to our `SimpleController` example, now we are going to use an agent that observes only the current indoor temperature and decides whether to turn heating on or off. However, instead of hard-coding such logic, we are going to use our very own implementation of the `Q_Learning_Agent` to see if it can learn how to do that.\n", + "For this, we are going to let our RL agent interact with the building for some episodes of experience.\n", + "Since we are now going to run several episodes for training, we want to stop our previous environment and start one that randomly initializes our building emulator throughout the year.\n", + "This allows to train our agent when using different boundary condition data in our building environment. We are also going to exclude the Spring, Summer, and Fall periods for training since we are only focused on learning the heating behavior.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "24fsDMTv8tSF", + "outputId": "f65aaa3b-50d4-4232-98d9-75ef2f7c13a3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "env.stop()\n", + "import random\n", + "\n", + "# Seed for random starting times of episodes\n", + "seed = 123456\n", + "random.seed(seed)\n", + "# Seed for random exploration and epsilon-greedy schedule\n", + "np.random.seed(seed)\n", + "\n", + "# Winter period goes from December 21 (day 355) to March 20 (day 79)\n", + "excluding_periods = [(79*24*3600, 355*24*3600)]\n", + "# Temperature setpoints\n", + "lower_setp = 21 + 273.15\n", + "upper_setp = 24 + 273.15\n", + "# Instantiate environment\n", + "env = BoptestGymEnvCustomReward(url = url,\n", + " testcase = 'bestest_hydronic_heat_pump',\n", + " actions = ['oveHeaPumY_u'],\n", + " observations = {'reaTZon_y':(lower_setp,upper_setp)},\n", + " random_start_time = True,\n", + " excluding_periods = excluding_periods,\n", + " max_episode_length = 2*24*3600,\n", + " warmup_period = 24*3600,\n", + " step_period = 3600,\n", + " render_episodes = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NU8aoMvV9AdE" + }, + "source": [ + "We have set the zone temperature as the only observation of the environment state. We have also set the lower and upper bounds of this variable to be $21$ and $24 °C$, respectively, which are the bounds of the comfort range during occupied periods. These bounds can be used by the environment for normalization or discretization purposes. In fact, we are going to discretize both the action and observation spaces to expedite learning. We decide to set only one bin for the action space (two possible actions: heating on or off). We split the observation space in three bins with the outer bounds of the comfort range as bins of the observation space (`outs_are_bins=True`). That is, the observation space is defined by $[-∞,21,24,+∞]$ as shown on the left hand side of the figure below. Note that only the middle bin is always comfortable whereas the other bins may lead to discomfort. If we had set `outs_are_bins=False` we would have had all our bins within the comfort range. The latter would give the agent a notion of what is the temperature within the comfort range (close to the lower bound, middle, or close to the upper bound), but it would raise an error if the temperature is out of the range. " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "uCUZKrOMOIEN" + }, + "outputs": [], + "source": [ + "from boptestGymEnv import DiscretizedObservationWrapper\n", + "env = DiscretizedActionWrapper(env, n_bins_act=1)\n", + "env = DiscretizedObservationWrapper(env, n_bins_obs=3, outs_are_bins=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ab6WP3zLEvnb" + }, + "source": [ + "\n", + "\n", + "*Figure: Possibilities for the discretization of the state space.*\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GTvGxERwOOI6" + }, + "source": [ + "### Train our RL agent" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Sc3XqYSDOuGq" + }, + "source": [ + "The only missing step is to let our RL agent learn by rolling out episodes of experience with the environment. We use the previously defined `learn` method for this. Note that, since we set `render_episodes=True`, we will be seeing a plot with relevant variables after each episode is finished. This is helpful to check if the agent is learning as expected from early stages. If the agent is not showing any sign of life we can prematurely stop the learning process to use new learning settings while saving some valuable time and computational cost. " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "jtOpX5y_RTsV", + "outputId": "dd84cf65-cc65-4f43-8a23-8c9fd1283c5e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------------------------------------------------\n", + "Episode number: 1, starting day: 11.4 (from beginning of the year)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q(s,a) = \n", + "[[0. 0. ]\n", + " [1.936 1.398]\n", + " [0.594 0. ]]\n", + "-------------------------------------------------------------------\n", + "Episode number: 2, starting day: 67.8 (from beginning of the year)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q(s,a) = \n", + "[[0. 0.17 ]\n", + " [2.414 2.116]\n", + " [1.491 0.594]]\n", + "-------------------------------------------------------------------\n", + "Episode number: 3, starting day: 0.9 (from beginning of the year)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q(s,a) = \n", + "[[1.594 1.141]\n", + " [2.411 2.221]\n", + " [1.491 0.594]]\n", + "-------------------------------------------------------------------\n", + "Episode number: 4, starting day: 29.9 (from beginning of the year)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q(s,a) = \n", + "[[2.411 2.219]\n", + " [2.382 2.409]\n", + " [1.491 0.594]]\n", + "-------------------------------------------------------------------\n", + "Episode number: 5, starting day: 19.8 (from beginning of the year)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q(s,a) = \n", + "[[2.411 2.219]\n", + " [2.833 2.967]\n", + " [2.287 1.918]]\n", + "-------------------------------------------------------------------\n", + "Episode number: 6, starting day: 11.0 (from beginning of the year)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q(s,a) = \n", + "[[2.411 2.219]\n", + " [4.677 4.24 ]\n", + " [2.367 1.918]]\n", + "-------------------------------------------------------------------\n", + "Episode number: 7, starting day: 45.2 (from beginning of the year)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q(s,a) = \n", + "[[2.452 3.17 ]\n", + " [4.847 4.609]\n", + " [2.432 1.997]]\n", + "-------------------------------------------------------------------\n", + "Episode number: 8, starting day: 362.2 (from beginning of the year)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q(s,a) = \n", + "[[2.452 3.271]\n", + " [5.66 5.162]\n", + " [2.737 2.014]]\n", + "-------------------------------------------------------------------\n", + "Episode number: 9, starting day: 72.3 (from beginning of the year)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q(s,a) = \n", + "[[2.452 3.917]\n", + " [6.03 6.114]\n", + " [2.737 2.014]]\n", + "-------------------------------------------------------------------\n", + "Episode number: 10, starting day: 357.8 (from beginning of the year)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q(s,a) = \n", + "[[2.56 4.458]\n", + " [6.448 6.346]\n", + " [3.256 2.165]]\n" + ] + } + ], + "source": [ + "model = Q_Learning_Agent(env, eps_min=0.01, eps_decay=0.001, alpha=0.1, gamma=0.9)\n", + "model.learn(total_episodes=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Qblijq9WVobQ" + }, + "source": [ + "Since our environment has been defined with one-dimensional state and action spaces, we can plot the q-function after training as follows:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 408 + }, + "id": "z_u32nxmzSDm", + "outputId": "a8faf1dc-3bd4-4cf3-bc52-e838ee7fe16d" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "acts = ['a=0','a=1']\n", + "stas = ['T<21', '2124']\n", + "colors = ['b', 'g', 'r']\n", + "\n", + "fig = plt.figure()\n", + "ax = fig.add_subplot(111, projection='3d')\n", + "ax.set_xlabel('actions', labelpad=6, fontsize=12)\n", + "ax.set_ylabel('states', labelpad=10, fontsize=12)\n", + "ax.set_zlabel('$\\mathbf{q(s,a)}$', labelpad=0, fontsize=15)\n", + "plt.xticks(ticks=range(len(acts)), labels=acts)\n", + "plt.yticks(ticks=range(len(stas)), labels=stas)\n", + "\n", + "for i, s in enumerate(stas):\n", + " x = np.arange(len(acts))\n", + " h = model.q[i,:]\n", + "\n", + " # Set color\n", + " color = [colors[i]]*len(acts)\n", + "\n", + " # Plot the 3D bar graph\n", + " ax.bar(x, h, zs=i, zdir='y', color=color, alpha=0.8)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V0AIl-HeVyqs" + }, + "source": [ + "Does it sound familiar? this is actually the [q-function that we had conceptually introduced before](#qFunctionConcept), but for our specific case!\n", + "\n", + "We observe that the state with the highest value is the one in the middle (green bars 🟢👌, `2124`), there is more value on `a=0`, so there is a preference for the agent to turn heating off.\n", + "\n", + "Sometimes it is useful to know what is the value of being on a specific state, independently of the action to be taken. This is represented by the so-called state-value function, which relates to the action-value function as follows:\n", + "\n", + "\\begin{align}\n", + " v(\\pmb{s}) = \\max_{\\pmb{a}} q(\\pmb{s},\\pmb{a})\n", + "\\end{align}\n", + "\n", + "At this point we can easily compute and plot the value function for our case:\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 462 + }, + "id": "urJOkjSNoa-h", + "outputId": "8819e8b8-00b8-4177-898d-e81f283e2571" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Compute the state-value function\n", + "v = np.amax(model.q, axis=1)\n", + "\n", + "# Plot state-value function\n", + "fig = plt.figure()\n", + "\n", + "ax = fig.add_subplot(111)\n", + "ax.set_xlabel('states', labelpad=10, fontsize=12)\n", + "ax.set_ylabel('$\\mathbf{v(s)}$', labelpad=0, fontsize=15)\n", + "plt.xticks(ticks=range(len(stas)), labels=stas)\n", + "x = np.arange(len(stas))\n", + "ax.bar(x, v, color=colors, alpha=0.8)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "clFn8dd7obRI" + }, + "source": [ + "Notice that we have trained our agent following an off-policy method: the actions were driven by a policy different than that one that our agent would follow. This is because the agent was using an epsilon-greedy policy to explore more rewarding actions. If we conclude we are happy with the learned policy, we can test it by setting `deterministic=True` with the `predict` method. For example, let's test our learned agent for the first day of February:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WuYEBf9nsmH6", + "outputId": "661787af-457d-4c58-e710-bcb35c156eee" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------------------------------------------------\n", + "State [Bin #] = 1\n", + "Action [ - ] = 0\n", + "-------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "env.stop()\n", + "env = BoptestGymEnvCustomReward(url = url,\n", + " testcase = 'bestest_hydronic_heat_pump',\n", + " actions = ['oveHeaPumY_u'],\n", + " observations = {'reaTZon_y':(lower_setp,upper_setp)},\n", + " random_start_time = False,\n", + " start_time = 31*24*3600,\n", + " max_episode_length = 24*3600,\n", + " warmup_period = 24*3600,\n", + " step_period = 3600)\n", + "env = DiscretizedActionWrapper(env, n_bins_act=1)\n", + "env = DiscretizedObservationWrapper(env, n_bins_obs=3, outs_are_bins=True)\n", + "\n", + "done = False\n", + "obs, _ = env.reset()\n", + "\n", + "from IPython.display import clear_output\n", + "while not done:\n", + " # Clear the display output at each step\n", + " clear_output(wait=True)\n", + " # Compute control signal\n", + " action = model.predict(obs, deterministic=True)\n", + " # Print the current operative temperature and decided action\n", + " print('-------------------------------------------------------------------')\n", + " print('State [Bin #] = {:.0f}'.format(obs))\n", + " print('Action [ - ] = {:.0f}'.format(action))\n", + " print('-------------------------------------------------------------------')\n", + " # Implement action\n", + " obs,reward,terminated,truncated,info = env.step(action) # send the action to the environment\n", + " done = (terminated or truncated)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sLBD3joyxe9Z" + }, + "source": [ + "Now there is no randomness involved. The agent exploits its policy by ALWAYS picking action `a=1` when `s=0` because it has learned that that is the action with the highest value in that state.\n", + "\n", + "We can now evaluate our learned policy by calculating the core KPIs with BOPTEST:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "eLzZaaNzyeZv", + "outputId": "7af5fe45-51ea-4b2b-899d-cfd2db3feee7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'tdis_tot': 1.7415794961384694,\n", + " 'idis_tot': 0,\n", + " 'ener_tot': 0.17501300879744733,\n", + " 'cost_tot': 0.044365797730152895,\n", + " 'emis_tot': 0.029227172469173696,\n", + " 'pele_tot': 0.01990768126278055,\n", + " 'pgas_tot': None,\n", + " 'pdih_tot': None,\n", + " 'time_rat': 0.0002186830550576178}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "env.get_kpis()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WbDAlStV2Tvx" + }, + "source": [ + "This prepares the ground for different RL configurations to be evaluated and compared between each other and to other types of controls like classical rule based controllers or more advanced model predictive control. Recall that there are specific [scenario periods for each test case in BOPTEST](https://github.com/ibpsa/project1-boptest/tree/master/testcases#test-cases) that are set for these comparisons." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FBL289bfsmcJ" + }, + "source": [ + "**Food for thought: 🤔**\n", + "- If the agent never receives a reward when the temperature is out of the comfort bounds (states 0 🔵 and 2 🔴), why is the q-function not 0 for those states?\n", + "- Could you think of measures to improve learning?\n", + "- Could you think of measures to improve performance?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eKqvn5yb_mqJ" + }, + "source": [ + "# **Gearing up** 💪" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X1sdwYm5b66G" + }, + "source": [ + "The previously stylished example had a very limited representation of the state space. It was useful to illustrate how we can configure and train a RL agent without needing too many interactions with the environment (our building). However, using RL for solving this environment may feel like overkilling the problem. Our `SimpleController` was already enough to decide when to turn on heating based on indoor temperature readings. You should note, however, that you have developed a general agent capable of learning from any environment and the potential to infer way more complex relationships between environment observations and actions. Examples of what this RL agent could infer for building control are the following:\n", + "- Dynamic energy pricing\n", + "- A heating schedule based on user inputs.\n", + "- A heating curve based on ambient temperature.\n", + "- The variable heat pump COP based on condenser, evaporator, and ambient temperature reaadings.\n", + "\n", + "We could for example extend our reward function as to minimize the building energy use or the greenhouse gas emissions while keeping comfort.\n", + "And all this can be inferred without the need of a model that requires domain knowledge. On the downside, learning more complex dynamics from higher dymensional observation spaces requires more training data. This means that more interactions with the environment (the building) are required, which sometimes are unavailable. For this reason, sample-efficiency is key in RL and there exist several tricks to expedite learning.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V3ZF29MChF4F" + }, + "source": [ + "To finalize, we are going to instantiate a more complete building environment by extending the observation space with the time of the week as well as information about the ambient temeprature, solar irradiation, internal gains, electricity pricing, or temperature setpoints. With BOPTEST-Gym we can also establish a predictive and a regressive period that include predictions of the boundary condition data and past observations of the measured data, respectively.\n", + "\n", + "Because of its high dimensional state-action space, an agent will probably require many more interactions to solve this environment. Luckily, there are readily available state-of-the-art RL algorithms that use the learning principle you have learned above while implement all sort of tricks to expedite and stabilize learning. For example, we can access the advanced Deep Q-Network (DQN) algorithm from Stable-Baselines3 to learn this more complex environment. We set here our agent to learn for `10` steps to show how this learning process would be initiated." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bdx3qCDvhFSX", + "outputId": "d3fdf2ca-64d5-4e1d-ac9f-b999c7a3ff09" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using cpu device\n", + "Wrapping the env with a `Monitor` wrapper\n", + "Wrapping the env in a DummyVecEnv.\n" + ] + }, + { + "data": { + "text/plain": [ + "{'tdis_tot': 8.370911955136352,\n", + " 'idis_tot': 0,\n", + " 'ener_tot': 0.3965284774903316,\n", + " 'cost_tot': 0.10051996904379909,\n", + " 'emis_tot': 0.06622025574088537,\n", + " 'pele_tot': 0.021041629229404186,\n", + " 'pgas_tot': None,\n", + " 'pdih_tot': None,\n", + " 'time_rat': 0.0004946192215990137}" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# env.stop()\n", + "\n", + "from boptestGymEnv import BoptestGymEnv, NormalizedObservationWrapper, DiscretizedActionWrapper\n", + "from stable_baselines3 import DQN\n", + "\n", + "# url for the BOPTEST service\n", + "url = 'https://api.boptest.net'\n", + "\n", + "# Decide the state-action space of your test case\n", + "env = BoptestGymEnv(\n", + " url = url,\n", + " testcase = 'bestest_hydronic_heat_pump',\n", + " actions = ['oveHeaPumY_u'],\n", + " observations = {'time':(0,604800),\n", + " 'reaTZon_y':(280.,310.),\n", + " 'TDryBul':(265,303),\n", + " 'HDirNor':(0,862),\n", + " 'InternalGainsRad[1]':(0,219),\n", + " 'PriceElectricPowerHighlyDynamic':(-0.4,0.4),\n", + " 'LowerSetp[1]':(280.,310.),\n", + " 'UpperSetp[1]':(280.,310.)},\n", + " predictive_period = 24*3600,\n", + " regressive_period = 6*3600,\n", + " random_start_time = True,\n", + " max_episode_length = 24*3600,\n", + " warmup_period = 24*3600,\n", + " step_period = 3600)\n", + "\n", + "# Normalize observations and discretize action space\n", + "env = NormalizedObservationWrapper(env)\n", + "env = DiscretizedActionWrapper(env,n_bins_act=10)\n", + "\n", + "# Instantiate an RL agent\n", + "model = DQN('MlpPolicy', env, verbose=1, gamma=0.99,\n", + " learning_rate=5e-4, batch_size=24, seed=123456,\n", + " buffer_size=365*24, learning_starts=24, train_freq=1)\n", + "\n", + "# Main training loop\n", + "model.learn(total_timesteps=10)\n", + "\n", + "# Loop for one episode of experience (one day as set in max_episode_length)\n", + "done = False\n", + "obs, _ = env.reset()\n", + "while not done:\n", + " action, _ = model.predict(obs, deterministic=True)\n", + " obs,reward,terminated,truncated,info = env.step(action)\n", + " done = (terminated or truncated)\n", + "\n", + "# Obtain KPIs for evaluation\n", + "env.get_kpis()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vudahvPN_ZaA" + }, + "source": [ + "Learning for 10 interaction steps is clearly not enough and leads to poor performance. This new environment has a way higher dimensional state-action space than the ones we treated before:" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GSg90XCe-26Q", + "outputId": "8f6d8dcd-3f11-420f-d6a7-918f1616cffb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Observation space of the building environment (dimension):\n", + "(158,)\n", + "Action space of the building environment:\n", + "Discrete(11)\n" + ] + } + ], + "source": [ + "print('Observation space of the building environment (dimension):')\n", + "print(env.observation_space.shape)\n", + "print('Action space of the building environment:')\n", + "print(env.action_space)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sOs93H9X_ZaA" + }, + "source": [ + "Solving an environment of these dimensions requires millions of steps or other tricks to accelerate learning. Could you think of any?" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "id": "EDJHCuQ2NFN6" + }, + "outputs": [], + "source": [ + "env.stop()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F-_f2qRTB0Nw" + }, + "source": [ + "# **Further resources** 📚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p54OK_TGrtfp" + }, + "source": [ + "- For RL, check out the resources page from Stable-Baselines 3 [here](https://stable-baselines3.readthedocs.io/en/master/guide/rl.html) and the [open access book of Richard S. Sutton and Andrew G. Barto](http://incompleteideas.net/book/the-book-2nd.html)\n", + "- For BOPTEST, check out the websites of the [BOPTEST framework](https://ibpsa.github.io/project1-boptest/), its [GitHub repository](https://ibpsa.github.io/project1-boptest/), and its overarching project: [IBPSA Project 1](https://ibpsa.github.io/project1/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Jblq_C7CHQHj" + }, + "source": [ + "# **Feedback** 💬" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jJ9lmUndHLMq" + }, + "source": [ + "Please help us improve by filling out [this form](https://forms.gle/JdprK6tgxQtwvhFV8). It'll only take a couple of minutes!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WNB5MoRmOWc9" + }, + "source": [ + "#**Annex I: Formal Reinforcement Learning theory** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G-ZId2TdCngy" + }, + "source": [ + "In RL we aim to derive an optimal control policy from the direct interaction of an agent (the RL algorithm) and an environment (the process to be optimized).\n", + "A policy is a mapping from environment states to actions that the agent \"decides\" to take.\n", + "This control method is based on the principle of dynamic programming. Unlike\n", + "classical dynamic programming, RL does not assume the existence of a perfect\n", + "system model and uses function approximations to build a policy from samples\n", + "of historical data. Hence, the agent performs empirical learning and decides on\n", + "actions to drive the environment towards favorable trajectories according to a reward function that the environment delivers every control step.\n", + "\n", + "The process of the RL agent interacting with the environment is a sequential decision-making problem formalized as a **Markov Decission Process (MDP)**. A diagram summarizing the RL approach is shown in the following figure:\n", + "\n", + "\n", + "\n", + "*Figure: Diagram of the RL approach. The RL agent decides an action. After the action is implemented, the environment returns the new state $\\pmb{S}_{k+1}$ and associated reward $R_{k+1}$.*\n", + "\n", + "In an MDP, the agent and the environment interact during a sequence of discrete-time steps indexed here as $k=0,1,2,...,K$, with $K$ being the terminal sample that could be $K=\\infty$.\n", + "Every time step $k$ the agent receives a representation of the environment named state: $\\pmb{S}_k \\in \\pmb{\\mathcal{S}}$, where $\\pmb{\\mathcal{S}}$ is the state space.\n", + "Note that the agent's observation of the state-space may or may not fully characterize the environment state.\n", + "In the latter case where the agent can only see a partial observation of the environment's state-space, we refer to **partially observable Markov decision processes (POMDPs)**.\n", + "\n", + "Upon receiving the state representation, the agent computes its control logic and in turn sends back to the environment a control action $\\pmb{A}_k \\in \\pmb{\\mathcal{A}}$, where $\\pmb{A}_k$ is the most appropriate action chosen from the action space $\\pmb{\\mathcal{A}}$.\n", + "One time step later, the agent observes a new state from the environment $\\pmb{S}_{k+1}$ along with a scalar value indicating its reward $R_{k+1} \\in \\mathcal{R} \\subset{\\mathbb{R}}$. Notice that the reward $R_{k+1}$ is an indicator of the agent's performance when taking action $\\pmb{A}_k$ from state $\\pmb{S}_k$.\n", + "\n", + "The environment $\\mathcal{E}_{\\pmb{f}}$ is governed by the natural laws of the system dynamics $\\pmb{f}$ and it is defined by $\\mathcal{E}_{\\pmb{f}}:\\pmb{\\mathcal{S}}\\times \\pmb{\\mathcal{A}} \\rightarrow \\pmb{\\mathcal{S}}\\times \\mathcal{R}$.\n", + "The goal of RL is to infer an **optimal control policy** $\\pi_{*}:\\pmb{\\mathcal{S}} \\rightarrow \\pmb{\\mathcal{A}}$ that maximizes the **expected cumulative return** $G$ when the agent acts according to it.\n", + "The cumulative return is defined as some function of the rewards sequence, and a typical definition is to discount the rewards with a **discount factor** $\\gamma \\in [0,1]$ as shown in the following equation:\n", + "\n", + "\\begin{align}\n", + " G_k = R_{k+1} + \\gamma R_{k+2} + \\gamma^2 R_{k+3} + ... = \\sum_{i=0}^\\infty \\gamma^i R_{k+i+1}\n", + "\\end{align}\n", + "\n", + "The **action-value function** $q(\\pmb{S},\\pmb{A})$ estimates the expected return when being in a specific state $\\pmb{S}$ and taking an action $\\pmb{A}$.\n", + "The **state-value function** $v(\\pmb{S})$ directly estimates the expected return for being in state $\\pmb{S}$.\n", + "Frequently, the policy and value functions are approximated by **function approximations** to cope with high-dimensional state-action spaces.\n", + "Examples of commonly used regressors are neural networks or randomized trees.\n", + "\n", + "\n", + "\n", + "A **trajectory** of an MDP is defined as a sequence of states, actions and rewards.\n", + "Most of the RL algorithms learn from finite trajectories of experience called **episodes**.\n", + "Sometimes, the trajectories are broken down into tuples of the form $(\\pmb{s}_k,\\pmb{a}_k,r_k,\\pmb{s}_{k+1})$ and stored in a **replay memory** $\\pmb{\\mathcal{D}}$.\n", + "Using a replay memory allows to serve the historical data in random batches of tuples to preserve as much as possible the independent and identically distributed assumption that is typically taken to parametrize policies and value functions.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_RIO07aKaQHG" + }, + "source": [ + "# **References** " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cQM-3Ra5BYM7" + }, + "source": [ + "\n", + "- **[1]** *Blum, D., Arroyo, J., Huang, S., Drgona, J., Jorissen, F., Taxt Walnum, H., Yan, C., Benne, K., Vrabie, D., Wetter, M., and Helsen,\n", + "L. Building Optimization Testing Framework (BOPTEST) for Simulation-\n", + "Based Benchmarking of Control Strategies in Buildings. Journal of Building\n", + "Performance Simulation 14, 5 (2021), 586–610. https://doi.org/10.1080/19401493.2021.1986574*\n", + "\n", + "- **[2]** *Arroyo, J., Manna, C., Spiessens, F., and Helsen, L. An OpenAI-Gym\n", + "environment for the Building Optimization Testing (BOPTEST) framework.\n", + "In Proceedings of the 17th IBPSA Conference (Bruges, Belgium, September 2021) [https://doi.org/10.26868/25222708.2021.30380](https://www.conftool.pro/bs2021/index.php/30380_Arroyo_Javier.pdf?page=downloadPaper&filename=30380_Arroyo_Javier.pdf&form_id=30380)*\n", + "\n", + "- **[3]** *Drgona, J., Arroyo, J., Cupeiro Figueroa, I., Blum, D., Arendt, K., Kim, D.,Ollé, E. P., Oravec, J., Wetter, M., Vrabie, D. L., and Helsen, L. All you need to know about model predictive control for buildings. Annual Reviews in Control 50 (2020), 190–232. https://doi.org/10.1016/j.arcontrol.2020.09.001*\n", + "\n", + "- **[4]** *Vázquez-Canteli, J. R., and Nagy, Z. Reinforcement learning\n", + "for demand response: A review of algorithms and modeling techniques.\n", + "Applied energy 235 (2019), 1072–1089. https://doi.org/10.1016/j.apenergy.2018.11.002*\n", + "\n", + "- **[5]** *Chen, B., Cai, Z., and Bergés, M. Gnu-RL: A Practical and Scalable Reinforcement Learning Solution for Building HVAC Control Using a Differentiable MPC Policy. Frontiers in Built Environment 6 (2020). https://doi.org/10.3389/fbuil.2020.562239*\n", + "\n", + "- **[6]** *Sutton, R. S., and Barto, A. G. Reinforcement Learning: An Introduction, second ed. The MIT Press, 2018.*\n" + ] + } + ], + "metadata": { + "colab": { + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/docs/tutorials/CCAI Summer School 2022/README.md b/docs/tutorials/CCAI_Summer_School_2022/README.md similarity index 100% rename from docs/tutorials/CCAI Summer School 2022/README.md rename to docs/tutorials/CCAI_Summer_School_2022/README.md diff --git a/examples/test_and_plot.py b/examples/test_and_plot.py index 952759e..b6c43ca 100644 --- a/examples/test_and_plot.py +++ b/examples/test_and_plot.py @@ -68,7 +68,8 @@ def test_agent(env, model, start_time, episode_length, warmup_period, return observations, actions, rewards, kpis -def plot_results(env, rewards, points=['reaTZon_y','oveHeaPumY_u'], +def plot_results(env, rewards, points=['reaTZon_y','reaTSetHea_y','reaTSetCoo_y','oveHeaPumY_u', + 'weaSta_reaWeaTDryBul_y', 'weaSta_reaWeaHDirNor_y'], log_dir=os.getcwd(), model_name='last_model', save_to_file=False): @@ -79,48 +80,23 @@ def plot_results(env, rewards, points=['reaTZon_y','oveHeaPumY_u'], # Retrieve all simulation data # We use env.start_time+1 to ensure that we don't return the last # point from the initialization period to don't confuse it with - # actions taken by the agent + # actions taken by the agent in a previous episode. res = requests.put('{0}/results'.format(env.url), data={'point_names':points, - 'start_time':env.start_time+1, - 'final_time':3.1536e7}).json()['payload'] + 'start_time':env.start_time+1, + 'final_time':3.1536e7}).json()['payload'] - df_res = pd.DataFrame(res).set_index('time') - - # Retrieve boundary condition data. - # Only way we have is through the forecast request. + df = pd.DataFrame(res) + df = create_datetime_index(df) + df.dropna(axis=0, inplace=True) scenario = env.scenario - requests.put('{0}/initialize'.format(env.url), - data={'start_time':df_res['time'].iloc[0], - 'warmup_period':0}).json()['payload'] - - # Store original forecast parameters - forecast_parameters_original = requests.get('{0}/forecast_parameters'.format(env.url)).json()['payload'] - # Set forecast parameters for test. Take 10 points per step. - forecast_parameters = {'horizon':env.max_episode_length, - 'interval':env.step_period/10} - requests.put('{0}/forecast_parameters'.format(env.url), - data=forecast_parameters) - forecast = requests.get('{0}/forecast'.format(env.url)).json()['payload'] - # Back to original parameters, just in case we're testing during training - requests.put('{0}/forecast_parameters'.format(env.url), - data=forecast_parameters_original) - - df_for = pd.DataFrame(forecast) - df_for = reindex(df_for) - df_for.drop('time', axis=1, inplace=True) - - df = pd.concat((df_res,df_for), axis=1) - df = create_datetime(df) - - df.dropna(axis=0, inplace=True) - if save_to_file: df.to_csv(os.path.join(log_dir, 'results_tests_'+model_name+'_'+scenario['electricity_price'], 'results_sim_{}.csv'.format(str(int(res['time'][0]/3600/24))))) - - rewards_time_days = np.arange(df_res['time'].iloc[0], + + # Project rewards into results index + rewards_time_days = np.arange(df['time'][0], env.start_time+env.max_episode_length, env.step_period)/3600./24. f = interpolate.interp1d(rewards_time_days, rewards, kind='zero', @@ -129,43 +105,34 @@ def plot_results(env, rewards, points=['reaTZon_y','oveHeaPumY_u'], rewards_reindexed = f(res_time_days) if not plt.get_fignums(): - # no window(s) open - # fig = plt.figure(figsize=(10,8)) + # no window(s) are open, so open a new window. _, axs = plt.subplots(4, sharex=True, figsize=(8,6)) else: - # get current figure. Combine this with plt.ion(), plt.figure() + # There is a window open, so get current figure. + # Combine this with plt.ion(), plt.figure() fig = plt.gcf() axs = fig.subplots(nrows=4, ncols=1, sharex=True) x_time = df.index.to_pydatetime() axs[0].plot(x_time, df['reaTZon_y'] -273.15, color='darkorange', linestyle='-', linewidth=1, label='_nolegend_') - axs[0].plot(x_time, df['LowerSetp[1]'] -273.15, color='gray', linewidth=1, label='Comfort setp.') - axs[0].plot(x_time, df['UpperSetp[1]'] -273.15, color='gray', linewidth=1, label='_nolegend_') + axs[0].plot(x_time, df['reaTSetHea_y'] -273.15, color='gray', linewidth=1, label='Comfort setp.') + axs[0].plot(x_time, df['reaTSetCoo_y'] -273.15, color='gray', linewidth=1, label='_nolegend_') axs[0].set_yticks(np.arange(15, 31, 5)) axs[0].set_ylabel('Operative\ntemperature\n($^\circ$C)') - axt = axs[0].twinx() - axt.plot(x_time, df['PriceElectricPowerHighlyDynamic'], color='dimgray', linestyle='dotted', linewidth=1, label='Price') - axs[0].plot([],[], color='dimgray', linestyle='-', linewidth=1, label='Price') - - axt.set_ylim(0,0.3) - axt.set_yticks(np.arange(0, 0.31, 0.1)) - axt.set_ylabel('(EUR/kWh)') - axt.set_ylabel('Price\n(EUR/kWh)') - axs[1].plot(x_time, df['oveHeaPumY_u'], color='darkorange', linestyle='-', linewidth=1, label='_nolegend_') axs[1].set_ylabel('Heat pump\nmodulation\nsignal\n( - )') axs[2].plot(x_time, rewards_reindexed, 'b', linewidth=1, label='rewards') axs[2].set_ylabel('Rewards\n(-)') - axs[3].plot(x_time, df['TDryBul'] - 273.15, color='royalblue', linestyle='-', linewidth=1, label='_nolegend_') + axs[3].plot(x_time, df['weaSta_reaWeaTDryBul_y'] - 273.15, color='royalblue', linestyle='-', linewidth=1, label='_nolegend_') axs[3].set_ylabel('Ambient\ntemperature\n($^\circ$C)') axs[3].set_yticks(np.arange(-5, 16, 5)) axt = axs[3].twinx() - axt.plot(x_time, df['HDirNor'], color='gold', linestyle='-', linewidth=1, label='$\dot{Q}_rad$') + axt.plot(x_time, df['weaSta_reaWeaHDirNor_y'], color='gold', linestyle='-', linewidth=1, label='$\dot{Q}_rad$') axt.set_ylabel('Solar\nirradiation\n($W$)') axs[3].plot([],[], color='darkorange', linestyle='-', linewidth=1, label='RL') @@ -179,9 +146,10 @@ def plot_results(env, rewards, points=['reaTZon_y','oveHeaPumY_u'], plt.tight_layout() if save_to_file: - plt.savefig(os.path.join(log_dir, 'results_tests_'+model_name+'_'+scenario['electricity_price'], - 'results_sim_{}.pdf'.format(str(int(res['time'][0]/3600/24)))), - bbox_inches='tight') + dir_name = os.path.join(log_dir, 'results_tests_'+model_name+'_'+scenario['electricity_price']) + fil_name = os.path.join(dir_name,'results_sim_{}.pdf'.format(str(int(res['time'][0]/3600/24)))) + os.makedirs(dir_name, exist_ok=True) + plt.savefig(fil_name, bbox_inches='tight') if not save_to_file: # showing and saving to file are incompatible @@ -199,9 +167,9 @@ def reindex(df, interval=60, start=None, stop=None): ''' if start is None: - start = df['time'][df.index[0]] + start = df['time'][0] if stop is None: - stop = df['time'][df.index[-1]] + stop = df['time'][-1] index = np.arange(start,stop+0.1,interval).astype(int) df_reindexed = df.reindex(index) @@ -218,7 +186,7 @@ def reindex(df, interval=60, start=None, stop=None): return df_reindexed -def create_datetime(df): +def create_datetime_index(df): ''' Create a datetime index for the data @@ -226,7 +194,7 @@ def create_datetime(df): datetime = [] for t in df['time']: - datetime.append(pd.Timestamp('2020/1/1') + pd.Timedelta(t,'s')) + datetime.append(pd.Timestamp('2023/1/1') + pd.Timedelta(t,'s')) df['datetime'] = datetime df.set_index('datetime', inplace=True) diff --git a/testing/references/tutorial_output_get_name.json b/testing/references/tutorial_output_get_name.json new file mode 100644 index 0000000..1b51e94 --- /dev/null +++ b/testing/references/tutorial_output_get_name.json @@ -0,0 +1 @@ +{"name": "bestest_hydronic_heat_pump"} \ No newline at end of file diff --git a/testing/references/tutorial_output_kpis_DQN_alg.json b/testing/references/tutorial_output_kpis_DQN_alg.json new file mode 100644 index 0000000..4556eb1 --- /dev/null +++ b/testing/references/tutorial_output_kpis_DQN_alg.json @@ -0,0 +1 @@ +{"tdis_tot": 8.370911955136352, "idis_tot": 0, "ener_tot": 0.3965284774903316, "cost_tot": 0.10051996904379909, "emis_tot": 0.06622025574088537, "pele_tot": 0.021041629229404186, "pgas_tot": null, "pdih_tot": null} \ No newline at end of file diff --git a/testing/references/tutorial_output_kpis_Q_alg.json b/testing/references/tutorial_output_kpis_Q_alg.json new file mode 100644 index 0000000..f0faa5f --- /dev/null +++ b/testing/references/tutorial_output_kpis_Q_alg.json @@ -0,0 +1 @@ +{"tdis_tot": 1.7415794961384694, "idis_tot": 0, "ener_tot": 0.17501300879744733, "cost_tot": 0.044365797730152895, "emis_tot": 0.029227172469173696, "pele_tot": 0.01990768126278055, "pgas_tot": null, "pdih_tot": null} \ No newline at end of file diff --git a/testing/test_boptestGymEnv.py b/testing/test_boptestGymEnv.py index e104932..0b32c68 100644 --- a/testing/test_boptestGymEnv.py +++ b/testing/test_boptestGymEnv.py @@ -421,6 +421,89 @@ def check_obs_act_rew_kpi(self, obs=None, act=None, rew=None, kpi=None, df.dropna(inplace=True) ref_filepath = os.path.join(utilities.get_root_path(), 'testing', 'references', 'kpis_{}.csv'.format(label)) self.compare_ref_values_df(df, ref_filepath) + + def test_tutorial(self): + ''' + Test the tutorial in the `docs`. The tutorial is written as + an ipython notebook so the `nbconvert` package is used to convert + the notebook to plain python to execute the test by comparing the + outputs of some of the notebook cells with references. + Note that the notebook actually uses the `boptest-gym-service` + branch, which should be even with the `master` branch but uses + BOPTEST-Service. Therefore, this is a check for the + `boptest-gym-service` branch and, contrarily to other tests, + this one could be parallelized. The last section of the tutorial + (Gearing Up) is using the DQN algorithm from stable-baselines3 + and is used as such in the Quick Start example in the README.md + of this repository. Therefore, this is also testing the + Quick Start example. + + ''' + + from nbconvert.preprocessors import ExecutePreprocessor + import nbformat + + # Get root directory + root_dir = utilities.get_root_path() + + # Change working dir to tutorial directory + run_path = os.chdir(os.path.join(root_dir, 'docs', 'tutorials', 'CCAI_Summer_School_2022')) + + # Path to the notebook file + notebook_path = os.path.join(root_dir, 'docs', 'tutorials', 'CCAI_Summer_School_2022', + 'Building_Control_with_RL_using_BOPTEST.ipynb') + + # Read the notebook file + with open(notebook_path, 'r', encoding='utf-8') as f: + notebook_content = f.read() + + # Execute the notebook cells + executor = ExecutePreprocessor(timeout=-1, resources={'metadata': {'path': run_path}}) + executed_notebook, _ = executor.preprocess(nbformat.reads(notebook_content, as_version=4), + resources={'metadata': {'path': run_path}}) + + # Test output when requesting test case name + out_get_name = executed_notebook.cells[41].outputs[0]['text'] + self.check_from_cell_output(out_get_name, 'get_name') + + # Check KPIs when testing our Q-algorithm + out_kpis_Q_alg = executed_notebook.cells[119].outputs[0]['data']['text/plain'] + self.check_from_cell_output(out_kpis_Q_alg, 'kpis_Q_alg') + + # Check KPIs when testing DQN algorithm from stable-baselines3 + out_kpis_DQN_alg = executed_notebook.cells[125].outputs[2]['data']['text/plain'] + self.check_from_cell_output(out_kpis_DQN_alg, 'kpis_DQN_alg') + + def check_from_cell_output(self, cell_output, str_output): + '''Compares a cell output to a reference file. + Parameters + ---------- + cell_output: str + Content of the cell output that is + reformatted in this method to become json + str_ouput: str + Tag to identify the reference file of the output + + ''' + + import json + + # Conform to the json syntax rules to transform to json + out = cell_output.replace("\n","").replace("'","\"").replace("None","null") + + # Convert string to json + out_json = json.loads(out) + + # Drop time ratio if it is in output + if 'time_rat' in out_json: + del out_json['time_rat'] + + # Assign files + file_ref = os.path.join(utilities.get_root_path(), 'testing', 'references', + 'tutorial_output_{}.json'.format(str_output)) + # Check results + self.compare_ref_json(out_json, file_ref) + if __name__ == '__main__': utilities.run_tests(os.path.basename(__file__))