Skip to content

Commit

Permalink
working on render method
Browse files Browse the repository at this point in the history
  • Loading branch information
antonio-guillenperez committed Nov 6, 2024
1 parent 9d918ab commit dabfe2a
Show file tree
Hide file tree
Showing 8 changed files with 472 additions and 18 deletions.
3 changes: 2 additions & 1 deletion envs/carbon_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ def reset(self, *, seed=None, options=None):

state = np.asarray(np.hstack(([current_workload, queue_length/self.queue_max_len])), dtype=np.float32)

info = {"load": self.workload,
info = {"ls_original_workload": self.workload,
"ls_shifted_workload": self.workload,
"action": -1,
"info_load_left": 0,
"ls_tasks_dropped": 0,
Expand Down
81 changes: 81 additions & 0 deletions evaluate_harl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#%%
import os
import json
import torch
import gymnasium as gym
from dcrl_env_harl_partialobs import DCRL

# Import HAPPO from here: /lustre/guillant/HARL/harl/algorithms/actors/happo.py
# But I am working on /lustre/guillant/dc-rl

import sys
sys.path.append('/lustre/guillant/HARL/harl')

import harl
from algorithms.actors.happo import HAPPO


#%%
# Checkpoint and config path:
checkpoint_path = os.path.join('/lustre/guillant/HARL/results/dcrl', 'CA/happo/ls_dc_bat/seed-00001-2024-05-28-23-23-34')
config_path = os.path.join(checkpoint_path, 'config.json')

# Read config_path
# Read the config file
with open(config_path, 'r') as f:
config = json.load(f)

env_config = config['env_args']
# Create the dcrl environment
env = DCRL(env_config)

# Obtain from env_config how many agents is active: ''agents': ['agent_ls', 'agent_dc', 'agent_bat']'
# Obtain the number of active agents
num_agents = len(env_config['agents'])
agents = env_config['agents']
actors = {}
for agent_id, agent in enumerate(env_config['agents']):
checkpoint = torch.load(checkpoint_path + "/models/actor_agent" + str(agent_id) + ".pt")

# load_state_dict from checkpoint
model_args = config['algo_args']['model']
algo_args = config['algo_args']['algo']
agent = HAPPO({**algo_args["model"], **algo_args["algo"]},
self.envs.observation_space[agent_id],
self.envs.action_space[agent_id],
device=self.device,
)
actors[agent].load_state_dict(checkpoint['model_state_dict'])
actors[agent].eval()

#%%
# Reset the environment
obs = env.reset()
done = False
total_reward = 0

while not done:
# Get the actions for each actor
actions = {}
for agent in agents:
actor = actors[agent]
action = actor(torch.tensor(obs[agent]).float().unsqueeze(0)).detach().numpy()
actions[env_config['agents'][agent_id]] = action

obs[agent_id], reward, done, _ = env.step(action[0])
action = model.predict(obs)

# Take a step in the environment
obs, reward, done, _ = env.step(action)

# Accumulate the reward
total_reward += reward

# Print the total reward
print("Total reward:", total_reward)

# Path to the trained model
model_path = '/path/to/trained/model.h5'

# Evaluate the model
evaluate_model(model_path)
54 changes: 54 additions & 0 deletions explore_ci.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#%%
import os
import pandas as pd

import matplotlib.pyplot as plt

#%%
folder_path = '/lustre/guillant/dc-rl/data/CarbonIntensity'

# Get all CSV files in the folder
csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]

# Read each CSV file and obtain the avg_CI column and save it along with the location name in a dictionary
values = {}

for file in csv_files:
file_path = os.path.join(folder_path, file)
df = pd.read_csv(file_path)
values[file[:2]] = df['avg_CI']

#%%
# Now plot the values and the legend should be the key of the values dictionary
# I want to plot only the month 7.
# Knowing that the values on the csv are 1 hour apart, I can get the index of the first day of the month and the last day of the month
# and then plot only the values between those indexes
# I can use the index to get the values between those indexes
import numpy as np
selected_month = 7
init_index = selected_month * 30 * 24
end_index = (selected_month + 1) * 30 * 24

x_range = np.arange(init_index, end_index)/24
plt.figure(figsize=(10, 5))
for key, value in values.items():
if key in ['IL', 'TX', 'NY', 'VA', 'GA', 'WA', 'AZ', 'CA']:
# plt.plot(value[init_index:end_index]**3/200000, label=key, linestyle='-', linewidth=2, alpha=0.9)
plt.plot(x_range, value[init_index:end_index], label=key, linestyle='-', linewidth=2, alpha=0.9)

plt.ylabel('Carbon Intensity (gCO2/kWh)', fontsize=16)
plt.xlabel('Day', fontsize=16)
# plt.xlim(init_index/24, end_index/24)
plt.title('Average Daily Carbon Intensity in Different Locations in July', fontsize=18)
plt.grid('on', linestyle='--', alpha=0.5)

plt.tick_params(axis='x', labelsize=12, rotation=45) # Set the font size of xticks
plt.tick_params(axis='y', labelsize=12) # Set the font size of yticks
plt.legend(fontsize=11.5, ncols=8)
plt.xlim(210, 240)
plt.ylim(-1)

plt.savefig('plots/GreenDCC_ci_all_locations.pdf', bbox_inches='tight')
plt.show()

#%%
11 changes: 9 additions & 2 deletions harl/envs/sustaindc/harlsustaindc_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def reset(self):
"""
self._seed += 1
self.cur_step = 0
obs = self.env.reset(seed=self._seed)
obs, infos_env = self.env.reset(seed=self._seed)

# Extract the keys from obs in the same order
agents = list(obs.keys())
Expand Down Expand Up @@ -210,4 +210,11 @@ def get_avail_agent_actions(self, agent_id):
Returns:
list: List of available actions.
"""
return [1] * self.action_space[agent_id].n
return [1] * self.action_space[agent_id].n

def render(self):
"""
Render the environment.
"""
self.env.render()
pass
6 changes: 3 additions & 3 deletions harl/envs/sustaindc/sustaindc_ptzoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ def reset(self, seed=None, options=None):
np.random.seed(seed) # Example of setting seed, adjust based on your environment's requirements

# initial_observation should be a dictionary with agent names as keys and their observations as values
initial_observations_with_info = self.env.reset()
return initial_observations_with_info
initial_observations_with_info, infos = self.env.reset()
return initial_observations_with_info, []

def step(self, actions):
"""
Expand Down Expand Up @@ -92,7 +92,7 @@ def render(self, mode='human'):
Returns:
Rendered output.
"""
return self.env.render(mode=mode)
return self.env.render(mode='pygame')

def close(self):
"""
Expand Down
20 changes: 18 additions & 2 deletions harl/runners/on_policy_base_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from harl.utils.models_tools import init_device
from harl.utils.configs_tools import init_dir, save_config
from harl.envs import LOGGER_REGISTRY
import time


class OnPolicyBaseRunner:
Expand Down Expand Up @@ -714,6 +715,8 @@ def dump_metrics_to_csv(self, metrics, eval_episode):
def render(self):
"""Render the model."""
print("start rendering")
last_render_time = time.time()

if self.manual_expand_dims:
# this env needs manual expansion of the num_of_parallel_envs dimension
for _ in range(self.algo_args["render"]["render_episodes"]):
Expand Down Expand Up @@ -768,7 +771,13 @@ def render(self):
else None
)
if self.manual_render:
self.envs.render()
# self.envs.render()
# Render every 1 second
print("Rendering...")
current_time = time.time()
if current_time - last_render_time >= 1.0:
self.envs.render() # Render the environment plot
last_render_time = current_time
if self.manual_delay:
time.sleep(0.1)
if eval_dones[0]:
Expand All @@ -777,6 +786,7 @@ def render(self):
else:
# this env does not need manual expansion of the num_of_parallel_envs dimension
# such as dexhands, which instantiates a parallel env of 64 pair of hands
last_render_time = time.time()
for _ in range(self.algo_args["render"]["render_episodes"]):
eval_obs, _, eval_available_actions = self.envs.reset()
eval_rnn_states = np.zeros(
Expand Down Expand Up @@ -817,7 +827,13 @@ def render(self):
) = self.envs.step(eval_actions)
rewards += eval_rewards[0][0][0]
if self.manual_render:
self.envs.render()
# self.envs.render()
# Render every 1 second
print("Rendering...")
current_time = time.time()
if current_time - last_render_time >= 1.0:
self.envs.render() # Render the environment plot
last_render_time = current_time
if self.manual_delay:
time.sleep(0.1)
if eval_dones[0][0]:
Expand Down
20 changes: 18 additions & 2 deletions harl/utils/envs_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,24 @@ def make_render_env(env_name, seed, env_args):
manual_delay = True # manually delay the rendering by time.sleep()
env_num = 1 # number of parallel envs

print("Can not support the " + env_name + "environment.")
raise NotImplementedError
if env_name == 'sustaindc':
from harl.envs.sustaindc.harlsustaindc_env import HARLSustainDCEnv
if 'month' in env_args:
env_args['month'] = env_args['month']
elif rank < 12:
env_args['month'] = rank % 12
else:
# 33% June (5), 33% July (6), 33% August (7)
env_args['month'] = rank % 3 + 5

print("Rendering the environment with month: ", env_args['month'])
env = HARLSustainDCEnv(env_args)
env.seed(seed * 60000)

else:
print("Can not support the " + env_name + "environment.")
raise NotImplementedError

return env, manual_render, manual_expand_dims, manual_delay, env_num


Expand Down
Loading

0 comments on commit dabfe2a

Please sign in to comment.