working on render method

HewlettPackard · Nov 6, 2024 · dabfe2a · dabfe2a
1 parent 9d918ab
commit dabfe2a
Show file tree

Hide file tree

Showing 8 changed files with 472 additions and 18 deletions.
diff --git a/envs/carbon_ls.py b/envs/carbon_ls.py
@@ -86,7 +86,8 @@ def reset(self, *, seed=None, options=None):
 
         state = np.asarray(np.hstack(([current_workload, queue_length/self.queue_max_len])), dtype=np.float32)
 
-        info = {"load": self.workload,
+        info = {"ls_original_workload": self.workload,
+                "ls_shifted_workload": self.workload,
                 "action": -1,
                 "info_load_left": 0,
                 "ls_tasks_dropped": 0,

diff --git a/evaluate_harl.py b/evaluate_harl.py
@@ -0,0 +1,81 @@
+#%%
+import os
+import json
+import torch
+import gymnasium as gym
+from dcrl_env_harl_partialobs import DCRL
+
+# Import HAPPO from here: /lustre/guillant/HARL/harl/algorithms/actors/happo.py
+# But I am working on /lustre/guillant/dc-rl
+
+import sys
+sys.path.append('/lustre/guillant/HARL/harl')
+
+import harl
+from algorithms.actors.happo import HAPPO
+
+
+#%%
+# Checkpoint and config path:
+checkpoint_path = os.path.join('/lustre/guillant/HARL/results/dcrl', 'CA/happo/ls_dc_bat/seed-00001-2024-05-28-23-23-34')
+config_path = os.path.join(checkpoint_path, 'config.json')
+
+# Read config_path
+# Read the config file
+with open(config_path, 'r') as f:
+    config = json.load(f)
+
+env_config = config['env_args']
+# Create the dcrl environment
+env = DCRL(env_config)
+
+# Obtain from env_config how many agents is active: ''agents': ['agent_ls', 'agent_dc', 'agent_bat']'
+# Obtain the number of active agents
+num_agents = len(env_config['agents'])
+agents = env_config['agents']
+actors = {}
+for agent_id, agent in enumerate(env_config['agents']):
+    checkpoint = torch.load(checkpoint_path + "/models/actor_agent" + str(agent_id) + ".pt")
+
+    # load_state_dict from checkpoint
+    model_args = config['algo_args']['model']
+    algo_args = config['algo_args']['algo']
+    agent = HAPPO({**algo_args["model"], **algo_args["algo"]},
+                  self.envs.observation_space[agent_id],
+                  self.envs.action_space[agent_id],
+                  device=self.device,
+                  )
+    actors[agent].load_state_dict(checkpoint['model_state_dict'])
+    actors[agent].eval()
+
+#%%
+# Reset the environment
+obs = env.reset()
+done = False
+total_reward = 0
+
+while not done:
+    # Get the actions for each actor
+    actions = {}
+    for agent in agents:
+        actor = actors[agent]
+        action = actor(torch.tensor(obs[agent]).float().unsqueeze(0)).detach().numpy()
+        actions[env_config['agents'][agent_id]] = action
+
+    obs[agent_id], reward, done, _ = env.step(action[0])
+    action = model.predict(obs)
+
+    # Take a step in the environment
+    obs, reward, done, _ = env.step(action)
+
+    # Accumulate the reward
+    total_reward += reward
+
+# Print the total reward
+print("Total reward:", total_reward)
+
+# Path to the trained model
+model_path = '/path/to/trained/model.h5'
+
+# Evaluate the model
+evaluate_model(model_path)
diff --git a/explore_ci.py b/explore_ci.py
@@ -0,0 +1,54 @@
+#%%
+import os
+import pandas as pd
+
+import matplotlib.pyplot as plt
+
+#%%
+folder_path = '/lustre/guillant/dc-rl/data/CarbonIntensity'
+
+# Get all CSV files in the folder
+csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]
+
+# Read each CSV file and obtain the avg_CI column and save it along with the location name in a dictionary
+values = {}
+
+for file in csv_files:
+    file_path = os.path.join(folder_path, file)
+    df = pd.read_csv(file_path)
+    values[file[:2]] = df['avg_CI']
+
+#%%
+# Now plot the values and the legend should be the key of the values dictionary
+# I want to plot only the month 7.
+# Knowing that the values on the csv are 1 hour apart, I can get the index of the first day of the month and the last day of the month
+# and then plot only the values between those indexes
+# I can use the index to get the values between those indexes
+import numpy as np
+selected_month = 7
+init_index = selected_month * 30 * 24
+end_index = (selected_month + 1) * 30 * 24
+
+x_range = np.arange(init_index, end_index)/24
+plt.figure(figsize=(10, 5))
+for key, value in values.items():
+    if key in ['IL', 'TX', 'NY', 'VA', 'GA', 'WA', 'AZ', 'CA']:
+        # plt.plot(value[init_index:end_index]**3/200000, label=key, linestyle='-', linewidth=2, alpha=0.9)
+        plt.plot(x_range, value[init_index:end_index], label=key, linestyle='-', linewidth=2, alpha=0.9)
+
+plt.ylabel('Carbon Intensity (gCO2/kWh)', fontsize=16)
+plt.xlabel('Day', fontsize=16)
+# plt.xlim(init_index/24, end_index/24)
+plt.title('Average Daily Carbon Intensity in Different Locations in July', fontsize=18)
+plt.grid('on', linestyle='--', alpha=0.5)
+
+plt.tick_params(axis='x', labelsize=12, rotation=45)  # Set the font size of xticks
+plt.tick_params(axis='y', labelsize=12)  # Set the font size of yticks
+plt.legend(fontsize=11.5, ncols=8)
+plt.xlim(210, 240)
+plt.ylim(-1)
+
+plt.savefig('plots/GreenDCC_ci_all_locations.pdf', bbox_inches='tight')
+plt.show()
+
+#%%
diff --git a/harl/envs/sustaindc/harlsustaindc_env.py b/harl/envs/sustaindc/harlsustaindc_env.py
@@ -43,7 +43,7 @@ def reset(self):
         """
         self._seed += 1
         self.cur_step = 0
-        obs = self.env.reset(seed=self._seed)
+        obs, infos_env = self.env.reset(seed=self._seed)
 
         # Extract the keys from obs in the same order
         agents = list(obs.keys())
@@ -210,4 +210,11 @@ def get_avail_agent_actions(self, agent_id):
         Returns:
             list: List of available actions.
         """
-        return [1] * self.action_space[agent_id].n
+        return [1] * self.action_space[agent_id].n
+
+    def render(self):
+        """
+        Render the environment.
+        """
+        self.env.render()
+        pass
diff --git a/harl/envs/sustaindc/sustaindc_ptzoo.py b/harl/envs/sustaindc/sustaindc_ptzoo.py
@@ -59,8 +59,8 @@ def reset(self, seed=None, options=None):
             np.random.seed(seed)  # Example of setting seed, adjust based on your environment's requirements
 
         # initial_observation should be a dictionary with agent names as keys and their observations as values
-        initial_observations_with_info = self.env.reset()
-        return initial_observations_with_info
+        initial_observations_with_info, infos = self.env.reset()
+        return initial_observations_with_info, []
 
     def step(self, actions):
         """
@@ -92,7 +92,7 @@ def render(self, mode='human'):
         Returns:
             Rendered output.
         """
-        return self.env.render(mode=mode)
+        return self.env.render(mode='pygame')
 
     def close(self):
         """

diff --git a/harl/runners/on_policy_base_runner.py b/harl/runners/on_policy_base_runner.py
@@ -23,6 +23,7 @@
 from harl.utils.models_tools import init_device
 from harl.utils.configs_tools import init_dir, save_config
 from harl.envs import LOGGER_REGISTRY
+import time
 
 
 class OnPolicyBaseRunner:
@@ -714,6 +715,8 @@ def dump_metrics_to_csv(self, metrics, eval_episode):
     def render(self):
         """Render the model."""
         print("start rendering")
+        last_render_time = time.time()
+
         if self.manual_expand_dims:
             # this env needs manual expansion of the num_of_parallel_envs dimension
             for _ in range(self.algo_args["render"]["render_episodes"]):
@@ -768,7 +771,13 @@ def render(self):
                         else None
                     )
                     if self.manual_render:
-                        self.envs.render()
+                        # self.envs.render()
+                        # Render every 1 second
+                        print("Rendering...")
+                        current_time = time.time()
+                        if current_time - last_render_time >= 1.0:
+                            self.envs.render()  # Render the environment plot
+                            last_render_time = current_time
                     if self.manual_delay:
                         time.sleep(0.1)
                     if eval_dones[0]:
@@ -777,6 +786,7 @@ def render(self):
         else:
             # this env does not need manual expansion of the num_of_parallel_envs dimension
             # such as dexhands, which instantiates a parallel env of 64 pair of hands
+            last_render_time = time.time()
             for _ in range(self.algo_args["render"]["render_episodes"]):
                 eval_obs, _, eval_available_actions = self.envs.reset()
                 eval_rnn_states = np.zeros(
@@ -817,7 +827,13 @@ def render(self):
                     ) = self.envs.step(eval_actions)
                     rewards += eval_rewards[0][0][0]
                     if self.manual_render:
-                        self.envs.render()
+                        # self.envs.render()
+                        # Render every 1 second
+                        print("Rendering...")
+                        current_time = time.time()
+                        if current_time - last_render_time >= 1.0:
+                            self.envs.render()  # Render the environment plot
+                            last_render_time = current_time
                     if self.manual_delay:
                         time.sleep(0.1)
                     if eval_dones[0][0]:

diff --git a/harl/utils/envs_tools.py b/harl/utils/envs_tools.py
@@ -110,8 +110,24 @@ def make_render_env(env_name, seed, env_args):
     manual_delay = True  # manually delay the rendering by time.sleep()
     env_num = 1  # number of parallel envs
 
-    print("Can not support the " + env_name + "environment.")
-    raise NotImplementedError
+    if env_name == 'sustaindc':
+        from harl.envs.sustaindc.harlsustaindc_env import HARLSustainDCEnv
+        if 'month' in env_args:
+            env_args['month'] = env_args['month']
+        elif rank < 12:
+            env_args['month'] = rank % 12
+        else:
+            # 33% June (5), 33% July (6), 33% August (7)
+            env_args['month'] = rank % 3 + 5
+
+        print("Rendering the environment with month: ", env_args['month'])
+        env = HARLSustainDCEnv(env_args)
+        env.seed(seed * 60000)
+
+    else:
+        print("Can not support the " + env_name + "environment.")
+        raise NotImplementedError
+
     return env, manual_render, manual_expand_dims, manual_delay, env_num