Eclectic-Sheep · belerico · Dec 22, 2023 · Dec 22, 2023 · Dec 22, 2023 · Dec 22, 2023
@@ -268,7 +268,7 @@ def ext_sota_main(fabric: Fabric, cfg: Dict[str, Any]):
 
             # Measure environment interaction time: this considers both the model forward
             # to get the action given the observation and the time taken into the environment
-            with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+            with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
                 with torch.no_grad():
                     # Sample an action given the observation received by the environment
                     normalized_obs = normalize_obs(next_obs, cfg.algo.cnn_keys.encoder, obs_keys)
@@ -370,7 +370,7 @@ def ext_sota_main(fabric: Fabric, cfg: Dict[str, Any]):
             fabric.call("on_checkpoint_coupled", fabric=fabric, ckpt_path=ckpt_path, state=state)
 
     envs.close()
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         test(agent.module, fabric, cfg, log_dir)
 
     # Optional part in case you want to give the possibility to register your models with MLFlow

@@ -265,7 +265,7 @@ def sota_main(fabric: Fabric, cfg: Dict[str, Any]):
 
             # Measure environment interaction time: this considers both the model forward
             # to get the action given the observation and the time taken into the environment
-            with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+            with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
                 with torch.no_grad():
                     # Sample an action given the observation received by the environment
                     normalized_obs = normalize_obs(next_obs, cfg.algo.cnn_keys.encoder, obs_keys)
@@ -367,7 +367,7 @@ def sota_main(fabric: Fabric, cfg: Dict[str, Any]):
             fabric.call("on_checkpoint_coupled", fabric=fabric, ckpt_path=ckpt_path, state=state)
 
     envs.close()
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         test(agent.module, fabric, cfg, log_dir)
 
     # Optional part in case you want to give the possibility to register your models with MLFlow

@@ -589,7 +589,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
 
         # Measure environment interaction time: this considers both the model forward
         # to get the action given the observation and the time taken into the environment
-        with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+        with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
             # Sample an action given the observation received by the environment
             if (
                 update <= learning_starts
@@ -681,7 +681,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
         # Train the agent
         if update > learning_starts and updates_before_training <= 0:
             # Start training
-            with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
+            with timer("Time/train_time", SumMetric, sync_on_compute=cfg.metric.sync_on_compute):
                 for i in range(cfg.algo.per_rank_gradient_steps):
                     sample = rb.sample_tensors(
                         batch_size=cfg.algo.per_rank_batch_size,
@@ -775,7 +775,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
             )
 
     envs.close()
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         test(player, fabric, cfg, log_dir)
 
     if not cfg.model_manager.disabled and fabric.is_global_zero:

@@ -631,7 +631,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
 
         # Measure environment interaction time: this considers both the model forward
         # to get the action given the observation and the time taken into the environment
-        with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+        with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
             # Sample an action given the observation received by the environment
             if (
                 update <= learning_starts
@@ -735,7 +735,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                 device=fabric.device,
                 from_numpy=cfg.buffer.from_numpy,
             )
-            with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
+            with timer("Time/train_time", SumMetric, sync_on_compute=cfg.metric.sync_on_compute):
                 for i in range(next(iter(local_data.values())).shape[0]):
                     if per_rank_gradient_steps % cfg.algo.critic.target_network_update_freq == 0:
                         for cp, tcp in zip(critic.module.parameters(), target_critic.parameters()):
@@ -828,7 +828,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
             )
 
     envs.close()
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         test(player, fabric, cfg, log_dir)
 
     if not cfg.model_manager.disabled and fabric.is_global_zero:

@@ -566,7 +566,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
 
         # Measure environment interaction time: this considers both the model forward
         # to get the action given the observation and the time taken into the environment
-        with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+        with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
             # Sample an action given the observation received by the environment
             if (
                 update <= learning_starts
@@ -678,7 +678,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                 device=fabric.device,
                 from_numpy=cfg.buffer.from_numpy,
             )
-            with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
+            with timer("Time/train_time", SumMetric, sync_on_compute=cfg.metric.sync_on_compute):
                 for i in range(next(iter(local_data.values())).shape[0]):
                     if per_rank_gradient_steps % cfg.algo.critic.target_network_update_freq == 0:
                         tau = 1 if per_rank_gradient_steps == 0 else cfg.algo.critic.tau
@@ -775,7 +775,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
             )
 
     envs.close()
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         test(player, fabric, cfg, log_dir, sample_actions=True)
 
     if not cfg.model_manager.disabled and fabric.is_global_zero:

@@ -79,7 +79,7 @@ def train(
         )
         actor_data = {k: actor_data[k][next(iter(actor_sampler))] for k in actor_data.keys()}
 
-    with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
+    with timer("Time/train_time", SumMetric, sync_on_compute=cfg.metric.sync_on_compute):
         # Update the soft-critic
         for batch_idxes in critic_sampler:
             critic_batch_data = {k: critic_data[k][batch_idxes] for k in critic_data.keys()}
@@ -283,7 +283,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
 
         # Measure environment interaction time: this considers both the model forward
         # to get the action given the observation and the time taken into the environment
-        with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+        with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
             with torch.no_grad():
                 # Sample an action given the observation received by the environment
                 actions, _ = agent.actor.module(torch.from_numpy(obs).to(device))
@@ -385,7 +385,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
             )
 
     envs.close()
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         test(agent.actor.module, fabric, cfg, log_dir)
 
     if not cfg.model_manager.disabled and fabric.is_global_zero:

@@ -629,7 +629,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
 
         # Measure environment interaction time: this considers both the model forward
         # to get the action given the observation and the time taken into the environment
-        with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+        with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
             # Sample an action given the observation received by the environment
             if (
                 update <= learning_starts
@@ -721,7 +721,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
         # Train the agent
         if update >= learning_starts and updates_before_training <= 0:
             # Start training
-            with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
+            with timer("Time/train_time", SumMetric, sync_on_compute=cfg.metric.sync_on_compute):
                 for i in range(cfg.algo.per_rank_gradient_steps):
                     sample = rb.sample_tensors(
                         batch_size=cfg.algo.per_rank_batch_size,
@@ -835,7 +835,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
 
     envs.close()
     # task test zero-shot
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         player.actor = actor_task.module
         player.actor_type = "task"
         test(player, fabric, cfg, log_dir, "zero-shot")

@@ -271,7 +271,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
 
         # Measure environment interaction time: this considers both the model forward
         # to get the action given the observation and the time taken into the environment
-        with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+        with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
             with torch.no_grad():
                 normalized_obs = {}
                 for k in obs_keys:
@@ -349,7 +349,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
             if player.actor_type == "exploration":
                 player.actor = actor_task.module
                 player.actor_type = "task"
-            with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
+            with timer("Time/train_time", SumMetric, sync_on_compute=cfg.metric.sync_on_compute):
                 for i in range(cfg.algo.per_rank_gradient_steps):
                     sample = rb.sample_tensors(
                         batch_size=cfg.algo.per_rank_batch_size,
@@ -452,7 +452,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
 
     envs.close()
     # task test few-shot
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         player.actor = actor_task.module
         player.actor_type = "task"
         test(player, fabric, cfg, log_dir, "few-shot")

@@ -776,7 +776,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
 
         # Measure environment interaction time: this considers both the model forward
         # to get the action given the observation and the time taken into the environment
-        with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+        with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
             # Sample an action given the observation received by the environment
             if (
                 update <= learning_starts
@@ -881,7 +881,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                 from_numpy=cfg.buffer.from_numpy,
             )
             # Start training
-            with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
+            with timer("Time/train_time", SumMetric, sync_on_compute=cfg.metric.sync_on_compute):
                 for i in range(next(iter(local_data.values())).shape[0]):
                     if per_rank_gradient_steps % cfg.algo.critic.target_network_update_freq == 0:
                         for cp, tcp in zip(critic_task.module.parameters(), target_critic_task.parameters()):
@@ -1000,7 +1000,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
 
     envs.close()
     # task test zero-shot
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         player.actor = actor_task.module
         player.actor_type = "task"
         test(player, fabric, cfg, log_dir, "zero-shot")

@@ -291,7 +291,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
 
         # Measure environment interaction time: this considers both the model forward
         # to get the action given the observation and the time taken into the environment
-        with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+        with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
             with torch.no_grad():
                 normalized_obs = {}
                 for k in obs_keys:
@@ -383,7 +383,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
                 from_numpy=cfg.buffer.from_numpy,
             )
             # Start training
-            with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
+            with timer("Time/train_time", SumMetric, sync_on_compute=cfg.metric.sync_on_compute):
                 for i in range(next(iter(local_data.values())).shape[0]):
                     if per_rank_gradient_steps % cfg.algo.critic.target_network_update_freq == 0:
                         for cp, tcp in zip(critic_task.module.parameters(), target_critic_task.parameters()):
@@ -484,7 +484,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
 
     envs.close()
     # task test few-shot
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         player.actor = actor_task.module
         player.actor_type = "task"
         test(player, fabric, cfg, log_dir, "few-shot")

@@ -837,7 +837,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
 
         # Measure environment interaction time: this considers both the model forward
         # to get the action given the observation and the time taken into the environment
-        with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+        with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
             # Sample an action given the observation received by the environment
             if (
                 update <= learning_starts
@@ -950,7 +950,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
                 from_numpy=cfg.buffer.from_numpy,
             )
             # Start training
-            with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
+            with timer("Time/train_time", SumMetric, sync_on_compute=cfg.metric.sync_on_compute):
                 for i in range(next(iter(local_data.values())).shape[0]):
                     if per_rank_gradient_steps % cfg.algo.critic.target_network_update_freq == 0:
                         tau = 1 if per_rank_gradient_steps == 0 else cfg.algo.critic.tau
@@ -1079,7 +1079,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
 
     envs.close()
     # task test zero-shot
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         player.actor = actor_task.module
         player.actor_type = "task"
         test(player, fabric, cfg, log_dir, "zero-shot")

@@ -282,7 +282,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
 
         # Measure environment interaction time: this considers both the model forward
         # to get the action given the observation and the time taken into the environment
-        with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+        with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
             with torch.no_grad():
                 preprocessed_obs = {}
                 for k, v in obs.items():
@@ -382,7 +382,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
                 from_numpy=cfg.buffer.from_numpy,
             )
             # Start training
-            with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
+            with timer("Time/train_time", SumMetric, sync_on_compute=cfg.metric.sync_on_compute):
                 for i in range(next(iter(local_data.values())).shape[0]):
                     tau = 1 if per_rank_gradient_steps == 0 else cfg.algo.critic.tau
                     if per_rank_gradient_steps % cfg.algo.critic.target_network_update_freq == 0:
@@ -487,7 +487,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
 
     envs.close()
     # task test few-shot
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         player.actor = actor_task.module
         player.actor_type = "task"
         test(player, fabric, cfg, log_dir, "few-shot")

@@ -269,7 +269,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
 
             # Measure environment interaction time: this considers both the model forward
             # to get the action given the observation and the time taken into the environment
-            with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+            with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
                 with torch.no_grad():
                     # Sample an action given the observation received by the environment
                     normalized_obs = normalize_obs(next_obs, cfg.algo.cnn_keys.encoder, obs_keys)
@@ -372,7 +372,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
             # Flatten the first two dimensions: [Buffer_Size, Num_Envs]
             gathered_data = {k: v.flatten(start_dim=0, end_dim=1).float() for k, v in local_data.items()}
 
-        with timer("Time/train_time", SumMetric(sync_on_compute=cfg.metric.sync_on_compute)):
+        with timer("Time/train_time", SumMetric, sync_on_compute=cfg.metric.sync_on_compute):
             train(fabric, agent, optimizer, gathered_data, aggregator, cfg)
         train_step += world_size
 
@@ -445,7 +445,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any]):
             fabric.call("on_checkpoint_coupled", fabric=fabric, ckpt_path=ckpt_path, state=state)
 
     envs.close()
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         test(agent.module, fabric, cfg, log_dir)
 
     if not cfg.model_manager.disabled and fabric.is_global_zero:

@@ -191,7 +191,7 @@ def player(
 
             # Measure environment interaction time: this considers both the model forward
             # to get the action given the observation and the time taken into the environment
-            with timer("Time/env_interaction_time", SumMetric(sync_on_compute=False)):
+            with timer("Time/env_interaction_time", SumMetric, sync_on_compute=False):
                 with torch.no_grad():
                     # Sample an action given the observation received by the environment
                     normalized_obs = normalize_obs(next_obs, cfg.algo.cnn_keys.encoder, obs_keys)
@@ -349,7 +349,7 @@ def player(
         )
 
     envs.close()
-    if fabric.is_global_zero:
+    if fabric.is_global_zero and cfg.algo.run_test:
         test(agent, fabric, cfg, log_dir)
 
     if not cfg.model_manager.disabled and fabric.is_global_zero: