From c4e0d78813a85fc7f6176ce67def781635b7b3de Mon Sep 17 00:00:00 2001 From: Cory Cornelius Date: Mon, 12 Jun 2023 16:04:53 -0700 Subject: [PATCH 1/3] Make metric logging keys configurable --- mart/models/modular.py | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/mart/models/modular.py b/mart/models/modular.py index eb5dd934..4fcfe783 100644 --- a/mart/models/modular.py +++ b/mart/models/modular.py @@ -36,6 +36,9 @@ def __init__( test_step_log=None, test_metrics=None, load_state_dict=None, + output_loss_key="loss", + output_preds_key="preds", + output_target_key="target", ): super().__init__() @@ -88,6 +91,10 @@ def __init__( logger.info(f"Loading state_dict {path} for {module.__class__.__name__}...") module.load_state_dict(torch.load(path, map_location="cpu")) + self.output_loss_key = output_loss_key + self.output_preds_key = output_preds_key + self.output_target_key = output_target_key + def configure_optimizers(self): config = {} config["optimizer"] = self.optimizer_fn(self.model) @@ -118,19 +125,15 @@ def training_step(self, batch, batch_idx): for name in self.training_step_log: self.log(f"training/{name}", output[name]) - assert "loss" in output - return output - - def training_step_end(self, output): if self.training_metrics is not None: # Some models only return loss in the training mode. - if "preds" not in output or "target" not in output: + if self.output_preds_key not in output or self.output_target_key not in output: raise ValueError( - "You have specified training_metrics, but the model does not return preds and target during training. You can either nullify training_metrics or configure the model to return preds and target in the training output." + f"You have specified training_metrics, but the model does not return {self.output_preds_key} or {self.output_target_key} during training. You can either nullify training_metrics or configure the model to return {self.output_preds_key} and {self.output_target_key} in the training output." ) - self.training_metrics(output["preds"], output["target"]) - loss = output.pop("loss") - return loss + self.training_metrics(output[self.output_preds_key], output[self.output_target_key]) + + return output[self.output_loss_key] def training_epoch_end(self, outputs): if self.training_metrics is not None: @@ -152,13 +155,9 @@ def validation_step(self, batch, batch_idx): for name in self.validation_step_log: self.log(f"validation/{name}", output[name]) - return output + self.validation_metrics(output[self.output_preds_key], output[self.output_target_key]) - def validation_step_end(self, output): - self.validation_metrics(output["preds"], output["target"]) - - # I don't know why this is required to prevent CUDA memory leak in validaiton and test. (Not required in training.) - output.clear() + return None def validation_epoch_end(self, outputs): metrics = self.validation_metrics.compute() @@ -178,13 +177,9 @@ def test_step(self, batch, batch_idx): for name in self.test_step_log: self.log(f"test/{name}", output[name]) - return output - - def test_step_end(self, output): - self.test_metrics(output["preds"], output["target"]) + self.test_metrics(output[self.output_preds_key], output[self.output_target_key]) - # I don't know why this is required to prevent CUDA memory leak in validaiton and test. (Not required in training.) - output.clear() + return None def test_epoch_end(self, outputs): metrics = self.test_metrics.compute() From 508798ca12d98d2ce757bcb18779b7c3fe474cdd Mon Sep 17 00:00:00 2001 From: Cory Cornelius Date: Tue, 13 Jun 2023 14:15:36 -0700 Subject: [PATCH 2/3] cleanup --- mart/models/modular.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/mart/models/modular.py b/mart/models/modular.py index 4fcfe783..d663dda3 100644 --- a/mart/models/modular.py +++ b/mart/models/modular.py @@ -125,6 +125,10 @@ def training_step(self, batch, batch_idx): for name in self.training_step_log: self.log(f"training/{name}", output[name]) + assert "loss" in output + return output + + def training_step_end(self, output): if self.training_metrics is not None: # Some models only return loss in the training mode. if self.output_preds_key not in output or self.output_target_key not in output: @@ -132,8 +136,8 @@ def training_step(self, batch, batch_idx): f"You have specified training_metrics, but the model does not return {self.output_preds_key} or {self.output_target_key} during training. You can either nullify training_metrics or configure the model to return {self.output_preds_key} and {self.output_target_key} in the training output." ) self.training_metrics(output[self.output_preds_key], output[self.output_target_key]) - - return output[self.output_loss_key] + loss = output.pop(self.output_loss_key) + return loss def training_epoch_end(self, outputs): if self.training_metrics is not None: @@ -155,9 +159,13 @@ def validation_step(self, batch, batch_idx): for name in self.validation_step_log: self.log(f"validation/{name}", output[name]) + return output + + def validation_step_end(self, output): self.validation_metrics(output[self.output_preds_key], output[self.output_target_key]) - return None + # I don't know why this is required to prevent CUDA memory leak in validaiton and test. (Not required in training.) + output.clear() def validation_epoch_end(self, outputs): metrics = self.validation_metrics.compute() @@ -177,9 +185,13 @@ def test_step(self, batch, batch_idx): for name in self.test_step_log: self.log(f"test/{name}", output[name]) + return output + + def test_step_end(self, output): self.test_metrics(output[self.output_preds_key], output[self.output_target_key]) - return None + # I don't know why this is required to prevent CUDA memory leak in validaiton and test. (Not required in training.) + output.clear() def test_epoch_end(self, outputs): metrics = self.test_metrics.compute() From fc770e81d7783edf7e0ef7bf04b4b12a25a2eaa0 Mon Sep 17 00:00:00 2001 From: Cory Cornelius Date: Tue, 13 Jun 2023 17:12:51 -0700 Subject: [PATCH 3/3] Remove *_step_end --- mart/models/modular.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/mart/models/modular.py b/mart/models/modular.py index d663dda3..4fcfe783 100644 --- a/mart/models/modular.py +++ b/mart/models/modular.py @@ -125,10 +125,6 @@ def training_step(self, batch, batch_idx): for name in self.training_step_log: self.log(f"training/{name}", output[name]) - assert "loss" in output - return output - - def training_step_end(self, output): if self.training_metrics is not None: # Some models only return loss in the training mode. if self.output_preds_key not in output or self.output_target_key not in output: @@ -136,8 +132,8 @@ def training_step_end(self, output): f"You have specified training_metrics, but the model does not return {self.output_preds_key} or {self.output_target_key} during training. You can either nullify training_metrics or configure the model to return {self.output_preds_key} and {self.output_target_key} in the training output." ) self.training_metrics(output[self.output_preds_key], output[self.output_target_key]) - loss = output.pop(self.output_loss_key) - return loss + + return output[self.output_loss_key] def training_epoch_end(self, outputs): if self.training_metrics is not None: @@ -159,13 +155,9 @@ def validation_step(self, batch, batch_idx): for name in self.validation_step_log: self.log(f"validation/{name}", output[name]) - return output - - def validation_step_end(self, output): self.validation_metrics(output[self.output_preds_key], output[self.output_target_key]) - # I don't know why this is required to prevent CUDA memory leak in validaiton and test. (Not required in training.) - output.clear() + return None def validation_epoch_end(self, outputs): metrics = self.validation_metrics.compute() @@ -185,13 +177,9 @@ def test_step(self, batch, batch_idx): for name in self.test_step_log: self.log(f"test/{name}", output[name]) - return output - - def test_step_end(self, output): self.test_metrics(output[self.output_preds_key], output[self.output_target_key]) - # I don't know why this is required to prevent CUDA memory leak in validaiton and test. (Not required in training.) - output.clear() + return None def test_epoch_end(self, outputs): metrics = self.test_metrics.compute()