ugr-sail · AlejandroCN7 · Nov 15, 2024 · Nov 14, 2024 · Nov 15, 2024 · Nov 15, 2024
diff --git a/docs/source/pages/deep-reinforcement-learning.rst b/docs/source/pages/deep-reinforcement-learning.rst
@@ -37,7 +37,9 @@ the Weights and Biases platform. Encapsulating the evaluation environment is not
 monitoring of these episodes is desired.
 
 The data logged on the platform, in the Evaluations section, will depend on the specific logger wrapper used 
-and its episode summary. Thus, to obtain new metrics, the logger wrapper must be modified, not the callback.
+and its episode summary. Thus, to obtain new metrics, the logger wrapper must be modified, not the callback. 
+Aditionally, whis callback will overwrite specific metrics for the best model obtained during the training process,
+in order to remember the metrics of the best model.
 
 The number of episodes run in each evaluation and their frequency can be configured, and metrics from the 
 underlying logger can be excluded if desired. Moreover, if normalization in observation space is applied,

diff --git a/docs/source/pages/wrappers.rst b/docs/source/pages/wrappers.rst
@@ -8,6 +8,11 @@ The code is available in
 You can create your own wrappers by inheriting from *gym.Wrapper* or one of its variants, as seen in the 
 `Gymnasium documentation <https://gymnasium.farama.org/tutorials/gymnasium_basics/implementing_custom_wrappers/>`__.
 
+.. note:: For examples about how to use these wrappers, visit :ref:`Wrappers example`.
+
+.. important:: You have to be careful if you are going to use several nested wrappers.
+               A wrapper works on top of the previous one. The order is flexible since *Sinergym* v3.0.5.
+
 ***********************
 MultiObjectiveReward
 ***********************
@@ -251,11 +256,6 @@ This wrapper adds energy cost information to the current observation.
 
 .. warning:: This wrapper internally uses the EnergyCostLinearReward reward function independently of the reward function set when creating the environment.
 
-.. note:: For examples about how to use these wrappers, visit :ref:`Wrappers example`.
-
-.. important:: You have to be careful if you are going to use several nested wrappers.
-               A wrapper works on top of the previous one. The order is flexible since *Sinergym* v3.0.5.
-
 **************************
 DeltaTempWrapper
 **************************

diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@
 package-mode = true
 name = "sinergym"
 
-version = "3.6.8"
+version = "3.6.9"
 description = "The goal of sinergym is to create an environment following OpenAI Gym interface for wrapping simulation engines for building control using deep reinforcement learning."
 license = "MIT"
 

diff --git a/sinergym/utils/callbacks.py b/sinergym/utils/callbacks.py
@@ -32,7 +32,9 @@ def __init__(
         excluded_metrics: List[str] = [
             'episode_num',
             'length (timesteps)',
-            'time_elapsed (hours)'],
+            'time_elapsed (hours)',
+            'truncated',
+            'terminated'],
         verbose: int = 1,
     ):
         """ Callback for evaluating an agent during training process logging all important data in WandB platform if is activated. It must be wrapped with BaseLoggerWrapper child class.
@@ -78,12 +80,18 @@ def __init__(
         self.evaluation_metrics = pd.DataFrame(
             columns=self.evaluation_columns)
 
-        # Define metric for evaluation as X axis if WandB session is activated
+        # session is activated
         if self.wandb_log:
+            # Define metric for evaluation as X axis if WandB
             self.train_env.get_wrapper_attr('wandb_run').define_metric(
                 'Evaluation/*',
                 step_metric='Evaluation/evaluation_num')
 
+            # Define metric to save best model found (last)
+            self.train_env.get_wrapper_attr('wandb_run').define_metric(
+                'best_model/*',
+                step_metric='best_model/evaluation_num',)
+
     def _on_step(self) -> bool:
 
         continue_training = True
@@ -127,8 +135,19 @@ def _on_event(self) -> None:
         # ------------------------------ Log information ----------------------------- #
 
         # Add evaluation summary to the evaluation metrics (CSV)
-        self.evaluation_metrics = self.evaluation_metrics._append(
-            evaluation_summary, ignore_index=True)
+        evaluation_summary_df = pd.DataFrame(
+            [evaluation_summary]).dropna(
+            axis=1, how="all")
+
+        if not evaluation_summary_df.empty:
+            evaluation_summary_df = evaluation_summary_df.reindex(
+                columns=self.evaluation_metrics.columns)
+            evaluation_summary_df = evaluation_summary_df.reset_index(
+                drop=True)
+            self.evaluation_metrics = self.evaluation_metrics.dropna(
+                axis=1, how="all")
+            self.evaluation_metrics = pd.concat(
+                [self.evaluation_metrics, evaluation_summary_df], ignore_index=True)
         self.evaluation_metrics.to_csv(
             self.save_path + '/evaluation_metrics.csv')
 
@@ -172,6 +191,10 @@ def _on_event(self) -> None:
                     fname=self.save_path +
                     '/var.txt',
                     X=self.eval_env.get_wrapper_attr('var'))
+            # Save best model found summary in wandb if its active
+            if self.wandb_log:
+                self.train_env.get_wrapper_attr('_log_data')(
+                    data={'best_model': evaluation_summary})
 
         # We close evaluation env and starts training env again
         self.eval_env.close()

diff --git a/sinergym/version.txt b/sinergym/version.txt
@@ -1 +1 @@
-3.6.8
+3.6.9