remove TODOs and dead code

microsoft · ant0nsc · Jan 28, 2021 · Nov 17, 2020 · Nov 18, 2020 · Nov 19, 2020
commit b4da62c6b7443249df0fed5077bb1341842903f2
diff --git a/InnerEye/Common/generic_parsing.py b/InnerEye/Common/generic_parsing.py
@@ -56,7 +56,7 @@ def get_cuda_devices(self) -> List[Any]:
         from torch.cuda import device_count
         from torch import device
         if self.use_gpu:
-            return [device(type='cuda', index=ii) for ii in list(range(device_count()))]
+            return [device(type='cuda', index=i) for i in list(range(device_count()))]
         else:
             return []
 

diff --git a/InnerEye/ML/dataset/full_image_dataset.py b/InnerEye/ML/dataset/full_image_dataset.py
@@ -49,17 +49,6 @@ def collate_with_metadata(batch: List[Dict[str, Any]]) -> Dict[str, Any]:
     raise TypeError(f"Unexpected batch data: Expected a dictionary, but got: {type(elem)}")
 
 
-# TODO antonsc: Remove?
-def set_random_seed_for_dataloader_worker(worker_id: int) -> None:
-    """
-    Set the seed for the random number generators of python, numpy.
-    """
-    # Set the seeds for numpy and python random based on the offset of the worker_id and initial seed,
-    # converting the initial_seed which is a long to modulo int32 which is what numpy expects.
-    random_seed = (torch.initial_seed() + worker_id) % (2 ** 32)
-    ml_util.set_random_seed(random_seed, f"Data loader worker ({worker_id})")
-
-
 class _RepeatSampler(BatchSampler):
     """
     A batch sampler that wraps another batch sampler. It repeats the contents of that other sampler forever.

diff --git a/InnerEye/ML/lightning_models.py b/InnerEye/ML/lightning_models.py
@@ -710,7 +710,6 @@ def __init__(self, config: ScalarModelBase, *args: Any, **kwargs: Any) -> None:
         self.train_metric_computers = self.create_metric_computers()
         self.val_metric_computers = self.create_metric_computers()
 
-        # TODO antonsc: Work out how we handle mean teacher model
         # if config.compute_grad_cam:
         #     model_to_evaluate = self.train_val_params.mean_teacher_model if \
         #         config.compute_mean_teacher_model else self.train_val_params.model
@@ -841,14 +840,6 @@ def compute_and_log_metrics(self,
                 LoggingColumns.Label.value: label,
                 LoggingColumns.DataSplit.value: data_split.value
             })
-        # TODO antonsc: Find a better place for this code. We can only draw plots once all results are aggregated,
-        # maybe move to the report?
-        # if self._should_save_regression_error_plot(self.current_epoch):
-        #     error_plot_name = f"error_plot_{self.train_val_params.epoch}"
-        #     path = str(self.config.outputs_folder / f"{error_plot_name}.png")
-        #     plot_variation_error_prediction(epoch_metrics.get_labels(), epoch_metrics.get_predictions(), path)
-        #     logger = self.config.azure_loggers_train if is_training else self.config.azure_loggers_val
-        #     logger.log_image(error_plot_name, path)
 
     def training_or_validation_epoch_end(self, is_training: bool) -> None:
         """

diff --git a/InnerEye/ML/model_testing.py b/InnerEye/ML/model_testing.py
@@ -44,10 +44,6 @@
 THUMBNAILS_FOLDER = "thumbnails"
 
 
-# TODO antonsc:
-# We need to clarify if we want to keep the ability to test on multiple checkpoints
-
-
 def model_test(config: ModelConfigBase,
                data_split: ModelExecutionMode,
                checkpoint_handler: CheckpointHandler,

diff --git a/InnerEye/ML/model_training.py b/InnerEye/ML/model_training.py
@@ -139,8 +139,7 @@ def model_train(config: ModelConfigBase,
     config.read_dataset_if_needed()
 
     # Create the trainer object. Backup the environment variables before doing that, in case we need to run a second
-    # training in the unit tests.
-    # TODO antonsc: Can we do in-situ cross validation with multiple GPUs still?
+    # training in the unit tests.d
     old_environ = dict(os.environ)
     trainer, storing_logger = create_lightning_trainer(config, checkpoint_path)
 
@@ -182,7 +181,8 @@ def model_train(config: ModelConfigBase,
     logging.info("Starting training")
 
     lightning_data = TrainingAndValidationDataLightning(config)  # type: ignore
-    # TODO: Why can't we do that in the constructor?
+    # When trying to store the config object in the constructor, it does not appear to get stored at all, later
+    # reference of the object simply fail. Hence, have to set explicitly here.
     lightning_data.config = config
     trainer.fit(lightning_model,
                 datamodule=lightning_data)

diff --git a/InnerEye/ML/pipelines/scalar_inference.py b/InnerEye/ML/pipelines/scalar_inference.py
@@ -86,7 +86,6 @@ def create_from_checkpoint(path_to_checkpoint: Path,
             logging.warning(f"Could not recover model from checkpoint path {path_to_checkpoint}")
             return None
         if config.compute_mean_teacher_model:
-            # TODO antonsc: Need to adjust that
             raise NotImplementedError("Mean teacher models not supported yet.")
         else:
             model = load_from_checkpoint_and_adjust_for_inference(config, path_to_checkpoint)

diff --git a/InnerEye/ML/run_ml.py b/InnerEye/ML/run_ml.py
@@ -258,8 +258,6 @@ def run(self) -> None:
             # train a new model if required
             if self.azure_config.train:
                 with logging_section("Model training"):
-                    # TODO antonsc: Return the ModelCheckpoint object here, with the path to the best checkpoints,
-                    # or convert it into a checkpoint_handler object
                     model_train(self.model_config, checkpoint_handler)
             else:
                 self.model_config.write_dataset_files()

diff --git a/InnerEye/ML/utils/dataset_util.py b/InnerEye/ML/utils/dataset_util.py
@@ -139,21 +139,18 @@ def __post_init__(self) -> None:
 
 
 def store_and_upload_example(dataset_example: DatasetExample,
-                             args: Optional[SegmentationModelBase],
+                             args: Optional[SegmentationModelBase] = None,
                              images_folder: Optional[Path] = None) -> None:
     """
     Stores an example input and output of the network to Nifti files.
 
     :param dataset_example: The dataset example, with image, label and prediction, that should be written.
-    :param args: configuration information to be used for normalization. TODO: This should not be optional why is this
-    assigning to example_images_folder
+    :param args: configuration information to be used for normalization.
     :param images_folder: The folder to which the result Nifti files should be written. If args is not None,
     the args.example_images_folder is used instead.
     """
 
-    folder = Path("") if images_folder is None else images_folder
-    if args is not None:
-        folder = args.example_images_folder
+    folder = images_folder or args.example_images_folder
     if folder != "" and not os.path.exists(folder):
         os.mkdir(folder)
 

diff --git a/InnerEye/ML/utils/hdf5_util.py b/InnerEye/ML/utils/hdf5_util.py
@@ -127,27 +127,3 @@ def from_file(cls: Type[T], hdf5_path: Path, load_segmentation: bool) -> T:
                    volume=volume,
                    segmentation=segmentation,
                    acquisition_date=acquisition_date)
-
-
-def load_labels(hdf5: HDF5Object) -> np.ndarray:
-    """
-    Load labels containing segmentation binary labels in one-hot-encoding.
-    :return A numpy array containing ground-truth information.
-    """
-    # For labels we are using the segmentation data provided in the HDF5 files.
-    labels = hdf5.segmentation  # 1 x N x H x W
-    n_classes = int(np.amax(labels) - np.amin(labels)) + 1
-    labels = multi_label_array_to_binary(labels, n_classes)
-    return labels.astype(HDF5ImageDataType.SEGMENTATION.value)
-
-
-def get_mask(hdf5_object: HDF5Object) -> np.ndarray:
-    """
-    TODO: Replace this with actual mask
-    :param hdf5_object:
-    :return:
-    """
-    img_shape = hdf5_object.volume.shape
-    mask = np.ones(img_shape, dtype=HDF5ImageDataType.MASK.value)
-    mask[-1, -1, -1] = 0
-    return mask
diff --git a/InnerEye/ML/utils/metrics_util.py b/InnerEye/ML/utils/metrics_util.py
@@ -125,7 +125,6 @@ def get_number_of_voxels_per_class(labels: torch.Tensor) -> torch.Tensor:
     if len(labels.shape) == 4:
         labels = labels[None, ...]
 
-    # TODO antonsc: Switch to Pytorch 1.7 and use torch.count_nonzero
     return torch.tensor(np.count_nonzero(labels.cpu().numpy(), axis=(2, 3, 4)))
 
 

diff --git a/InnerEye/ML/utils/temperature_scaling.py b/InnerEye/ML/utils/temperature_scaling.py
@@ -82,10 +82,6 @@ def eval_criterion() -> torch.Tensor:
             # zero the gradients for the next optimization step
             optimizer.zero_grad()
             loss, ece = criterion_fn(self.temperature_scale(logits), labels)
-            # TODO antonsc: re-enable logging
-            # if logger:
-            #     logger.log_to_azure_and_tensorboard("Temp_Scale_LOSS", loss.item())
-            #     logger.log_to_azure_and_tensorboard("Temp_Scale_ECE", ece.item())
             loss.backward()
             return loss
 

diff --git a/InnerEye/ML/visualizers/plot_cross_validation.py b/InnerEye/ML/visualizers/plot_cross_validation.py
@@ -322,7 +322,6 @@ def download_metrics_file(config: PlotCrossValidationConfig,
     if config.model_category == ModelCategory.Segmentation:
         if epoch is None:
             raise ValueError("Epoch must be provided in segmentation runs")
-        # TODO remove epoch arg here
         src = get_epoch_results_path(mode) / SUBJECT_METRICS_FILE_NAME
     else:
         src = Path(mode.value) / SUBJECT_METRICS_FILE_NAME

diff --git a/Tests/ML/models/architectures/sequential/test_rnn_classifier.py b/Tests/ML/models/architectures/sequential/test_rnn_classifier.py
@@ -187,7 +187,6 @@ def _get_mock_sequence_dataset(dataset_contents: Optional[str] = None) -> pd.Dat
                           (True, ImagingFeatureType.ImageAndSegmentation)])
 @pytest.mark.parametrize("combine_hidden_state", (True, False))
 @pytest.mark.parametrize("use_encoder_layer_norm", (True, False))
-# TODO antonsc: re-enable when mean teacher is back in
 @pytest.mark.parametrize("use_mean_teacher_model", (False,))
 @pytest.mark.gpu
 def test_rnn_classifier_via_config_1(use_combined_model: bool,
@@ -388,7 +387,7 @@ def test_rnn_classifier_via_config_2(test_output_dirs: OutputFolderForTests) ->
     print(f"Validation loss after {config.num_epochs} epochs: {actual_val_loss}")
     assert actual_train_loss <= expected_max_train_loss, "Training loss too high"
     assert actual_val_loss <= expected_max_val_loss, "Validation loss too high"
-    # TODO antonsc: put back in when temperature scaling is enabled again
+    # Issue #374: put back in when temperature scaling is enabled again
     # assert np.allclose(results.optimal_temperature_scale_values_per_checkpoint_epoch, [0.97], rtol=0.1)
 
 

diff --git a/Tests/ML/models/test_scalar_model.py b/Tests/ML/models/test_scalar_model.py
@@ -291,7 +291,7 @@ def test_scalar_metrics(has_hues: bool, is_classification: bool) -> None:
         labels = [[2.0, 2.0, 2.0], [1.0, 1.0, 1.0]]
         expected_accuracy = [0.25, 5, 0]
         accuracy_metric_key = MetricType.MEAN_SQUARED_ERROR.value
-        # TODO antonsc: We have odd values here for ExplainedVariance, and had already for r2score
+        # Issue #373: We have odd values here for ExplainedVariance, and had already for r2score
         expected_info_format_strs = [
             "MeanSquaredError: 0.2500, MeanAbsoluteError: 0.5000, ExplainedVariance: 0.0000",
             "MeanSquaredError: 5.0000, MeanAbsoluteError: 2.0000, ExplainedVariance: -19.0000",

diff --git a/Tests/ML/pipelines/test_forward_pass.py b/Tests/ML/pipelines/test_forward_pass.py
@@ -54,7 +54,6 @@ def test_use_gpu_flag(use_gpu_override: bool) -> None:
             assert config.use_gpu == use_gpu_override
 
 # @pytest.mark.azureml
-# TODO antonsc: re-enable once we have mean teacher in place again
 # def test_mean_teacher_model(test_output_dirs: OutputFolderForTests) -> None:
 #    """
 #    Test training and weight updates of the mean teacher model computation.

diff --git a/Tests/ML/test_model_testing.py b/Tests/ML/test_model_testing.py
@@ -131,7 +131,7 @@ def __init__(self) -> None:
                          [(SimpleUNet(), InferencePipeline, EnsemblePipeline),
                           (ClassificationModelForTesting(mean_teacher_model=False),
                            ScalarInferencePipeline, ScalarEnsemblePipeline),
-                          # TODO: re-enable once we have mean teacher in place again
+                          # Re-enable once we have mean teacher in place again
                           # (ClassificationModelForTesting(mean_teacher_model=True),
                           #  ScalarInferencePipeline, ScalarEnsemblePipeline)
                           ])

diff --git a/Tests/ML/test_model_train_test_and_recovery.py b/Tests/ML/test_model_train_test_and_recovery.py
@@ -19,7 +19,6 @@
 from Tests.ML.util import get_default_checkpoint_handler
 
 
-# TODO: re-enable once we have mean teacher in place again
 # @pytest.mark.parametrize("mean_teacher_model", [True, False])
 @pytest.mark.parametrize("mean_teacher_model", [False])
 def test_recover_testing_from_run_recovery(mean_teacher_model: bool,

diff --git a/Tests/ML/test_model_training.py b/Tests/ML/test_model_training.py
@@ -187,9 +187,7 @@ def assert_all_close(metric: str, expected: List[float], **kwargs: Any) -> None:
     # Logging the metric is called, but they never make it to the logger object.
     # model_training_result.get_training_metric(MetricType.SECONDS_PER_BATCH.value)
 
-    # TODO antonsc: Check that both Train and Val epoch_metrics.csv have all relevant columns and 2 rows
-
-    # TODO antonsc: enable
+    # Issue #372
     # # Test for saving of example images
     # assert train_config.example_images_folder.is_dir()
     # example_files = list(train_config.example_images_folder.rglob("*.*"))

diff --git a/Tests/ML/utils/test_io_util.py b/Tests/ML/utils/test_io_util.py
@@ -172,7 +172,7 @@ def test_save_dataset_example(test_output_dirs: OutputFolderForTests) -> None:
                                     labels=labels)
 
     images_folder = test_output_dirs.root_dir
-    store_and_upload_example(dataset_sample, None, images_folder)
+    store_and_upload_example(dataset_sample, images_folder=images_folder)
     image_from_disk = io_util.load_nifti_image(os.path.join(images_folder, "p2_e_1_image.nii.gz"))
     labels_from_disk = io_util.load_nifti_image(os.path.join(images_folder, "p2_e_1_label.nii.gz"))
     prediction_from_disk = io_util.load_nifti_image(os.path.join(images_folder, "p2_e_1_prediction.nii.gz"))