Skip to content

Commit

Permalink
Add RadImageNet to Perceptual Loss (#153)
Browse files Browse the repository at this point in the history
* Add RadImageNet component

* Add tests and update docstrings

* Add tests with 3 channels

Signed-off-by: Walter Hugo Lopez Pinaya <[email protected]>
  • Loading branch information
Warvito authored Jan 8, 2023
1 parent 34c4b29 commit e9597bb
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 8 deletions.
87 changes: 79 additions & 8 deletions generative/losses/perceptual.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,18 @@
class PerceptualLoss(nn.Module):
"""
Perceptual loss using features from pretrained deep neural networks trained. The function supports networks
pretrained on ImageNet that use the LPIPS approach from: Zhang, et al. "The unreasonable effectiveness of deep
features as a perceptual metric." https://arxiv.org/abs/1801.03924
pretrained on: ImageNet that use the LPIPS approach from Zhang, et al. "The unreasonable effectiveness of deep
features as a perceptual metric." https://arxiv.org/abs/1801.03924 ; RadImagenet from Mei, et al. "RadImageNet: An
Open Radiologic Deep Learning Research Dataset for Effective Transfer Learning"; and MedicalNet from Chen et al.
"Med3D: Transfer Learning for 3D Medical Image Analysis" .
The fake 3D implementation is based on a 2.5D approach where we calculate the 2D perceptual on slices from the
three axis.
Args:
spatial_dims: number of spatial dimensions.
network_type: {``"alex"``, ``"vgg"``, ``"squeeze"``, ``"medicalnet_resnet10_23datasets"``,
``"medicalnet_resnet50_23datasets"``}
network_type: {``"alex"``, ``"vgg"``, ``"squeeze"``, ``"radimagenet_resnet50"``,
``"medicalnet_resnet10_23datasets"``, ``"medicalnet_resnet50_23datasets"``}
Specifies the network architecture to use. Defaults to ``"alex"``.
is_fake_3d: if True use 2.5D approach for a 3D perceptual loss.
fake_3d_ratio: ratio of how many slices per axis are used in the 2.5D approach.
Expand All @@ -48,6 +51,8 @@ def __init__(
self.spatial_dims = spatial_dims
if spatial_dims == 3 and is_fake_3d is False:
self.perceptual_function = MedicalNetPerceptualComponent(net=network_type, verbose=False)
elif "radimagenet_" in network_type:
self.perceptual_function = RadImageNetPerceptualComponent(net=network_type, verbose=False)
else:
self.perceptual_function = LPIPS(
pretrained=True,
Expand Down Expand Up @@ -116,15 +121,14 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
if target.shape != input.shape:
raise ValueError(f"ground truth has differing shape ({target.shape}) from input ({input.shape})")

if self.spatial_dims == 2:
loss = self.perceptual_function(input, target)
elif self.spatial_dims == 3 and self.is_fake_3d:
if self.spatial_dims == 3 and self.is_fake_3d:
# Compute 2.5D approach
loss_sagittal = self._calculate_axis_loss(input, target, spatial_axis=2)
loss_coronal = self._calculate_axis_loss(input, target, spatial_axis=3)
loss_axial = self._calculate_axis_loss(input, target, spatial_axis=4)
loss = loss_sagittal + loss_axial + loss_coronal
if self.spatial_dims == 3 and self.is_fake_3d is False:
else:
# 2D and real 3D cases
loss = self.perceptual_function(input, target)

return torch.mean(loss)
Expand Down Expand Up @@ -194,3 +198,70 @@ def medicalnet_intensity_normalisation(volume):
mean = volume.mean()
std = volume.std()
return (volume - mean) / std


class RadImageNetPerceptualComponent(nn.Module):
"""
Component to perform the perceptual evaluation with the networks pretrained on RadImagenet (pretrained by Mei, et
al. "RadImageNet: An Open Radiologic Deep Learning Research Dataset for Effective Transfer Learning"). This class
uses torch Hub to download the networks from "Warvito/radimagenet-models".
Args:
net: {``"radimagenet_resnet50"``}
Specifies the network architecture to use. Defaults to ``"radimagenet_resnet50"``.
verbose: if false, mute messages from torch Hub load function.
"""

def __init__(
self,
net: str = "radimagenet_resnet50",
verbose: bool = False,
) -> None:
super().__init__()
self.model = torch.hub.load("Warvito/radimagenet-models", model=net, verbose=verbose)
self.eval()

def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
"""
We expect that the input is normalised between [0, 1]. Given the preprocessing performed during the training at
https://github.com/BMEII-AI/RadImageNet, we make sure that the input and target have 3 channels, reorder it from
'RGB' to 'BGR', and then remove the mean components of each input data channel. The outputs are normalised
across the channels, and we obtain the mean from the spatial dimensions (similar approach to the lpips package).
"""
# If input has just 1 channel, repeat channel to have 3 channels
if input.shape[1] == 1 and target.shape[1] == 1:
input = input.repeat(1, 3, 1, 1)
target = target.repeat(1, 3, 1, 1)

# Change order from 'RGB' to 'BGR'
input = input[:, [2, 1, 0], ...]
target = target[:, [2, 1, 0], ...]

# Subtract mean used during training
input = subtract_mean(input)
target = subtract_mean(target)

# Get model outputs
outs_input = self.model.forward(input)
outs_target = self.model.forward(target)

# Normalise through the channels
feats_input = normalize_tensor(outs_input)
feats_target = normalize_tensor(outs_target)

results = (feats_input - feats_target) ** 2
results = spatial_average(results.sum(dim=1, keepdim=True), keepdim=True)

return results


def spatial_average(x: torch.Tensor, keepdim: bool = True) -> torch.Tensor:
return x.mean([2, 3], keepdim=keepdim)


def subtract_mean(x: torch.Tensor) -> torch.Tensor:
mean = [0.406, 0.456, 0.485]
x[:, 0, :, :] -= mean[0]
x[:, 1, :, :] -= mean[1]
x[:, 2, :, :] -= mean[2]
return x
15 changes: 15 additions & 0 deletions tests/test_perceptual_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,21 @@
(2, 1, 64, 64, 64),
(2, 1, 64, 64, 64),
],
[
{"spatial_dims": 2, "network_type": "radimagenet_resnet50"},
(2, 1, 64, 64),
(2, 1, 64, 64),
],
[
{"spatial_dims": 2, "network_type": "radimagenet_resnet50"},
(2, 3, 64, 64),
(2, 3, 64, 64),
],
[
{"spatial_dims": 3, "network_type": "radimagenet_resnet50", "is_fake_3d": True, "fake_3d_ratio": 0.1},
(2, 1, 64, 64, 64),
(2, 1, 64, 64, 64),
],
[
{"spatial_dims": 3, "network_type": "medicalnet_resnet10_23datasets", "is_fake_3d": False},
(2, 1, 64, 64, 64),
Expand Down

0 comments on commit e9597bb

Please sign in to comment.