Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Add more histopathology configs #616

Merged
merged 10 commits into from
Dec 11, 2021
2 changes: 2 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs that run in AzureML.
-([#603](https://github.com/microsoft/InnerEye-DeepLearning/pull/603)) Add histopathology module
-([#614](https://github.com/microsoft/InnerEye-DeepLearning/pull/614)) Checkpoint downloading falls back to looking into AzureML if no checkpoints on disk
-([#613](https://github.com/microsoft/InnerEye-DeepLearning/pull/613)) Add additional tests for histopathology datasets

-([#616](https://github.com/microsoft/InnerEye-DeepLearning/pull/616)) Add more histopathology configs and tests

### Changed
- ([#588](https://github.com/microsoft/InnerEye-DeepLearning/pull/588)) Replace SciPy with PIL.PngImagePlugin.PngImageFile to load png files.
Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/datamodules/base_module.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

import pickle
from enum import Enum
from pathlib import Path
Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/datamodules/panda_module.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from typing import Tuple

from InnerEye.ML.Histopathology.datamodules.base_module import TilesDataModule
Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/datamodules/tcga_crck_module.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from typing import Tuple, Any

from InnerEye.ML.Histopathology.datamodules.base_module import TilesDataModule
Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/datasets/base_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from pathlib import Path
from typing import Any, Dict, Optional, Union

Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/datasets/default_paths.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

PANDA_TILES_DATASET_ID = "PANDA_tiles"
TCGA_CRCK_DATASET_ID = "TCGA-CRCk"
TCGA_PRAD_DATASET_ID = "TCGA-PRAD"
Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/datasets/panda_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from pathlib import Path
from typing import Any, Dict, Union, Optional

Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from pathlib import Path
from typing import Any, Callable, Optional, Tuple, Union

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from pathlib import Path
from typing import Any, Callable, Optional, Tuple, Union

import pandas as pd

from torchvision.datasets.vision import VisionDataset

from InnerEye.ML.Histopathology.datasets.base_dataset import TilesDataset
Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/datasets/tcga_prad_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from pathlib import Path
from typing import Any, Dict, Optional, Union

Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/models/deepmil.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from pathlib import Path
import pandas as pd
import numpy as np
Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/models/encoders.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from pathlib import Path
from typing import Callable, Optional, Sequence, Tuple

Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/models/transforms.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from pathlib import Path
from typing import Mapping, Sequence, Union

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

import functools
import os
import logging
Expand Down
7 changes: 6 additions & 1 deletion InnerEye/ML/Histopathology/preprocessing/tiling.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

# These tiling implementations are adapted from PANDA Kaggle solutions, for example:
# https://github.com/kentaroy47/Kaggle-PANDA-1st-place-solution/blob/master/src/data_process/a00_save_tiles.py
from typing import Any, Optional, Tuple
Expand Down Expand Up @@ -25,7 +30,7 @@ def pad_for_tiling_2d(array: np.ndarray, tile_size: int, channels_first: Optiona
original array to obtain indices for the padded array.
"""
height, width = array.shape[1:] if channels_first else array.shape[:-1]
padding_h = get_1d_padding(height, tile_size)
padding_h = get_1d_padding(height, tile_size)
padding_w = get_1d_padding(width, tile_size)
padding = [padding_h, padding_w]
channels_axis = 0 if channels_first else 2
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

"""
Script to find mean and standard deviation of desired metrics from cross validation child runs.
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

"""
This script is an example of how to use the submit_to_azure_if_needed function from the hi-ml package to run the
main pre-processing function that creates tiles from slides in the PANDA dataset. The advantage of using this script
Expand Down
1 change: 1 addition & 0 deletions InnerEye/ML/Histopathology/scripts/mount_azure_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from health_azure import DatasetConfig
from health_azure.utils import get_workspace

Expand Down
7 changes: 6 additions & 1 deletion InnerEye/ML/Histopathology/utils/analysis_plot_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

import numpy as np
from typing import List, Any

Expand Down Expand Up @@ -85,7 +90,7 @@ def plot_box_whisker(data_list: List[Any], column_names: List[str], show_outlier
def plot_histogram(data: List[Any], title: str = "") -> None:
"""
Plot a histogram given some data
:param data: data to be plotted
:param data: data to be plotted
:param title: plot title string
"""
plt.figure()
Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/utils/download_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

import os
from pathlib import Path

Expand Down
7 changes: 6 additions & 1 deletion InnerEye/ML/Histopathology/utils/layer_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from typing import Callable, Tuple

from torch import as_tensor, device, nn, prod, rand
Expand Down Expand Up @@ -25,7 +30,7 @@ def setup_feature_extractor(pretrained_model: nn.Module,
def load_weights_to_model(weights_url: str, model: nn.Module) -> nn.Module:
"""
Load weights to the histoSSL model from the given URL
https://github.com/ozanciga/self-supervised-histopathology
https://github.com/ozanciga/self-supervised-histopathology
"""
map_location = device('cpu')
state = load_state_dict_from_url(weights_url, map_location=map_location)
Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/utils/metrics_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from typing import Tuple, List, Any, Dict
import torch
import matplotlib.pyplot as plt
Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/utils/naming.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

from enum import Enum

class ResultsKey(str, Enum):
Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/utils/tcga_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

import pandas as pd


Expand Down
5 changes: 5 additions & 0 deletions InnerEye/ML/Histopathology/utils/viz_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

import math
import matplotlib.pyplot as plt

Expand Down
112 changes: 112 additions & 0 deletions InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------

"""BaseMIL is an abstract container defining basic functionality for running MIL experiments.
It is responsible for instantiating the encoder and full DeepMIL model. Subclasses should define
their datamodules and configure experiment-specific parameters.
"""
import os
from pathlib import Path
from typing import Type

import param
from torch import nn
from torchvision.models.resnet import resnet18

from health_azure.utils import CheckpointDownloader, get_workspace
from health_ml.networks.layers.attention_layers import AttentionLayer, GatedAttentionLayer
from InnerEye.Common import fixed_paths
from InnerEye.ML.lightning_container import LightningContainer
from InnerEye.ML.Histopathology.datamodules.base_module import CacheMode, TilesDataModule
from InnerEye.ML.Histopathology.models.deepmil import DeepMILModule
from InnerEye.ML.Histopathology.models.encoders import (HistoSSLEncoder, IdentityEncoder,
ImageNetEncoder, ImageNetSimCLREncoder,
InnerEyeSSLEncoder, TileEncoder)


class BaseMIL(LightningContainer):
# Model parameters:
pooling_type: str = param.String(doc="Name of the pooling layer class to use.")
# l_rate, weight_decay, adam_betas are already declared in OptimizerParams superclass

# Encoder parameters:
encoder_type: str = param.String(doc="Name of the encoder class to use.")
tile_size: int = param.Integer(224, bounds=(1, None), doc="Tile width/height, in pixels.")
n_channels: int = param.Integer(3, bounds=(1, None), doc="Number of channels in the tile.")

# Data module parameters:
batch_size: int = param.Integer(16, bounds=(1, None), doc="Number of slides to load per batch.")
max_bag_size: int = param.Integer(1000, bounds=(0, None),
doc="Upper bound on number of tiles in each loaded bag. "
"If 0 (default), will return all samples in each bag. "
"If > 0, bags larger than `max_bag_size` will yield "
"random subsets of instances.")
cache_mode: CacheMode = param.ClassSelector(default=CacheMode.MEMORY, class_=CacheMode,
doc="The type of caching to perform: "
"'memory' (default), 'disk', or 'none'.")
save_precache: bool = param.Boolean(True, doc="Whether to pre-cache the entire transformed "
"dataset upfront and save it to disk.")
# local_dataset (used as data module root_path) is declared in DatasetParams superclass

@property
def cache_dir(self) -> Path:
raise NotImplementedError

def setup(self) -> None:
if self.encoder_type == InnerEyeSSLEncoder.__name__:
self.downloader = CheckpointDownloader(
aml_workspace=get_workspace(),
run_id="updated_transforms:updated_transforms_1636471522_5473e3ff",
checkpoint_filename="best_checkpoint.ckpt",
download_dir='outputs/'
)
os.chdir(fixed_paths.repository_root_directory())
self.downloader.download_checkpoint_if_necessary()

self.encoder = self.get_encoder()
self.encoder.cuda()
self.encoder.eval()

def get_encoder(self) -> TileEncoder:
if self.encoder_type == ImageNetEncoder.__name__:
return ImageNetEncoder(feature_extraction_model=resnet18,
tile_size=self.tile_size, n_channels=self.n_channels)

elif self.encoder_type == ImageNetSimCLREncoder.__name__:
return ImageNetSimCLREncoder(tile_size=self.tile_size, n_channels=self.n_channels)

elif self.encoder_type == HistoSSLEncoder.__name__:
return HistoSSLEncoder(tile_size=self.tile_size, n_channels=self.n_channels)

elif self.encoder_type == InnerEyeSSLEncoder.__name__:
return InnerEyeSSLEncoder(pl_checkpoint_path=self.downloader.local_checkpoint_path,
tile_size=self.tile_size, n_channels=self.n_channels)

else:
raise ValueError(f"Unsupported encoder type: {self.encoder_type}")

def get_pooling_layer(self) -> Type[nn.Module]:
if self.pooling_type == AttentionLayer.__name__:
return AttentionLayer
elif self.pooling_type == GatedAttentionLayer.__name__:
return GatedAttentionLayer
else:
raise ValueError(f"Unsupported pooling type: {self.pooling_type}")

def create_model(self) -> DeepMILModule:
self.data_module = self.get_data_module()
# Encoding is done in the datamodule, so here we provide instead a dummy
# no-op IdentityEncoder to be used inside the model
return DeepMILModule(encoder=IdentityEncoder(input_dim=(self.encoder.num_encoding,)),
label_column=self.data_module.train_dataset.LABEL_COLUMN,
n_classes=self.data_module.train_dataset.N_CLASSES,
pooling_layer=self.get_pooling_layer(),
class_weights=self.data_module.class_weights,
l_rate=self.l_rate,
weight_decay=self.weight_decay,
adam_betas=self.adam_betas)

def get_data_module(self) -> TilesDataModule:
raise NotImplementedError
Loading