Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Request Mobilenet fpn #1999

Closed
finnickniu opened this issue Mar 20, 2020 · 4 comments · Fixed by #3253
Closed

Request Mobilenet fpn #1999

finnickniu opened this issue Mar 20, 2020 · 4 comments · Fixed by #3253

Comments

@finnickniu
Copy link

finnickniu commented Mar 20, 2020

🚀 Feature

Hi I want to write mobilenet fpn.

Motivation

Improve MaskRCNN speed and accuracy.

Pitch

Alternatives

Additional context

Code:

/torchvision/models/detection/backbone_utils.py

from collections import OrderedDict
from torch import nn
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool

from torchvision.ops import misc as misc_nn_ops
from .._utils import IntermediateLayerGetter
from .. import resnet
from .. import mobilenet_v2

from torchvision.models import mobilenet_v2 as MobileNetV2
class BackboneWithFPN(nn.Sequential):
 
    def __init__(self, backbone, return_layers, in_channels_list, out_channels):
        body = IntermediateLayerGetter(backbone, return_layers=return_layers)
        fpn = FeaturePyramidNetwork(
            in_channels_list=in_channels_list,
            out_channels=out_channels,
            extra_blocks=LastLevelMaxPool(),
        )
        super(BackboneWithFPN, self).__init__(OrderedDict(
            [("body", body), ("fpn", fpn)]))
        self.out_channels = out_channels


def resnet_fpn_backbone(backbone_name, pretrained):
    backbone = resnet.__dict__[backbone_name](
        pretrained=pretrained,
        norm_layer=misc_nn_ops.FrozenBatchNorm2d)
    # freeze layers
    for name, parameter in backbone.named_parameters():
        if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
            parameter.requires_grad_(False)

    return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}
    in_channels_stage2 = backbone.inplanes // 8
    in_channels_list = [
        in_channels_stage2,
        in_channels_stage2 * 2,
        in_channels_stage2 * 4,
        in_channels_stage2 * 8,
    ]
    out_channels = 256
    return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)


class FPNMobileNet(nn.Module):
    def __init__(self, pretrained=True):
        super().__init__()
        net = MobileNetV2(pretrained)
        self.features = net.features
        self.layer1= nn.Sequential(*self.features[0:4])
        self.layer2 = nn.Sequential(*self.features[4:7])
        self.layer3 = nn.Sequential(*self.features[7:11])
        self.layer4 = nn.Sequential(*self.features[11:19])
        for param in self.features.parameters():
            param.requires_grad = False


    def forward(self, x):

        # Bottom-up pathway, from ResNet
        enc0 = self.layer1(x)

        enc1 = self.layer2(enc0) # 256

        enc2 = self.layer3(enc1) # 512

        enc3 = self.layer4(enc2) # 1024

        return enc3

def mobilenet_fpn_backbone(pretrained):
    backbone = FPNMobileNet(pretrained)
    print(backbone)
    # freeze layers
    for name, parameter in backbone.named_parameters():
        if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
            parameter.requires_grad_(False)

    return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}

    in_channels_stage2 =1280 // 8
    in_channels_list = [
        in_channels_stage2,
        in_channels_stage2 * 2,
        in_channels_stage2 * 4,
        in_channels_stage2 * 8,
    ]
    
    out_channels = 256
    return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)

/torchvision/models/detection/mobilenet_fpn.py

from .backbone_utils import mobilenet_fpn_backbone

def fpn(pretrained = True):
	backbone = mobilenet_fpn_backbone( pretrained)
	return backbone
	

demo.py

   backbone = mobilenet_fpn.fpn(True)
   backbone.eval()

   x = torch.rand(1,3, 100, 100)
   out = backbone(x)
   print(out)


Bug:

"RuntimeError: Given groups=1, weight of size 32 3 3 3, expected input[1, 1280, 4, 4] to have 3 channels, but got 1280 channels instead"

@fmassa
Copy link
Member

fmassa commented Mar 20, 2020

Hi,

There are two issues with your implementation:

  • in_channels_list is not correct, and should follow the number of channels in mobilenet_v2 (which is [24, 32, 64, 1280])
  • the current implementation for _IntermediateLayerGetter is very simplistic, and doesn't handle some use cases. You should remove self.features from FPNMobileNet

Here is a working version

class FPNMobileNet(nn.Module):
    def __init__(self, pretrained=True):
        super().__init__()
        net = MobileNetV2(pretrained)
        features = net.features
        self.layer1= nn.Sequential(*features[0:4])
        self.layer2 = nn.Sequential(*features[4:7])
        self.layer3 = nn.Sequential(*features[7:11])
        self.layer4 = nn.Sequential(*features[11:19])
        for param in features.parameters():
            param.requires_grad = False

    def forward(self, x):

        # Bottom-up pathway, from ResNet
        enc0 = self.layer1(x) # 24
        enc1 = self.layer2(enc0) # 32
        enc2 = self.layer3(enc1) # 64
        enc3 = self.layer4(enc2) # 1280
        return enc3


def mobilenet_fpn_backbone(pretrained):
    backbone = FPNMobileNet(pretrained)
    # freeze layers
    for name, parameter in backbone.named_parameters():
        if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
            parameter.requires_grad_(False)

    return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}

    in_channels_list = [
       24, 32, 64, 1280
    ]
    
    out_channels = 256
    return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)

@finnickniu
Copy link
Author

It works, many thanks!

@lucasjinreal
Copy link

@fmassa Hi, I want ask if I want extract certain layers usign _IntermediateLayerGetter should I using only index of that layer or using a [:n] ?

for example:

in mobilenetv2, take the 4th layer, should be features[4] or features[:4]?
what's the difference? Or if I want take 7th layer, should it be features[4:7] or features[7]`?

@shibefore
Copy link

    self.layer1= nn.Sequential(*features[0:4])
    self.layer2 = nn.Sequential(*features[4:7])
    self.layer3 = nn.Sequential(*features[7:11])
    self.layer4 = nn.Sequential(*features[11:19])
	
	or :
	
    self.layer1= nn.Sequential(*features[0:4])
    self.layer2 = nn.Sequential(*features[4:7])
    self.layer3 = nn.Sequential(*features[7:14])
    self.layer4 = nn.Sequential(*features[14:18])		
	
	as *features[19] have to much channel 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging a pull request may close this issue.

4 participants