Request Mobilenet fpn #1999

finnickniu · 2020-03-20T08:34:56Z

🚀 Feature

Hi I want to write mobilenet fpn.

Motivation

Improve MaskRCNN speed and accuracy.

Pitch

Alternatives

Additional context

Code:

/torchvision/models/detection/backbone_utils.py

from collections import OrderedDict
from torch import nn
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool

from torchvision.ops import misc as misc_nn_ops
from .._utils import IntermediateLayerGetter
from .. import resnet
from .. import mobilenet_v2

from torchvision.models import mobilenet_v2 as MobileNetV2
class BackboneWithFPN(nn.Sequential):
 
    def __init__(self, backbone, return_layers, in_channels_list, out_channels):
        body = IntermediateLayerGetter(backbone, return_layers=return_layers)
        fpn = FeaturePyramidNetwork(
            in_channels_list=in_channels_list,
            out_channels=out_channels,
            extra_blocks=LastLevelMaxPool(),
        )
        super(BackboneWithFPN, self).__init__(OrderedDict(
            [("body", body), ("fpn", fpn)]))
        self.out_channels = out_channels


def resnet_fpn_backbone(backbone_name, pretrained):
    backbone = resnet.__dict__[backbone_name](
        pretrained=pretrained,
        norm_layer=misc_nn_ops.FrozenBatchNorm2d)
    # freeze layers
    for name, parameter in backbone.named_parameters():
        if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
            parameter.requires_grad_(False)

    return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}
    in_channels_stage2 = backbone.inplanes // 8
    in_channels_list = [
        in_channels_stage2,
        in_channels_stage2 * 2,
        in_channels_stage2 * 4,
        in_channels_stage2 * 8,
    ]
    out_channels = 256
    return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)


class FPNMobileNet(nn.Module):
    def __init__(self, pretrained=True):
        super().__init__()
        net = MobileNetV2(pretrained)
        self.features = net.features
        self.layer1= nn.Sequential(*self.features[0:4])
        self.layer2 = nn.Sequential(*self.features[4:7])
        self.layer3 = nn.Sequential(*self.features[7:11])
        self.layer4 = nn.Sequential(*self.features[11:19])
        for param in self.features.parameters():
            param.requires_grad = False


    def forward(self, x):

        # Bottom-up pathway, from ResNet
        enc0 = self.layer1(x)

        enc1 = self.layer2(enc0) # 256

        enc2 = self.layer3(enc1) # 512

        enc3 = self.layer4(enc2) # 1024

        return enc3

def mobilenet_fpn_backbone(pretrained):
    backbone = FPNMobileNet(pretrained)
    print(backbone)
    # freeze layers
    for name, parameter in backbone.named_parameters():
        if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
            parameter.requires_grad_(False)

    return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}

    in_channels_stage2 =1280 // 8
    in_channels_list = [
        in_channels_stage2,
        in_channels_stage2 * 2,
        in_channels_stage2 * 4,
        in_channels_stage2 * 8,
    ]
    
    out_channels = 256
    return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)

/torchvision/models/detection/mobilenet_fpn.py

from .backbone_utils import mobilenet_fpn_backbone

def fpn(pretrained = True):
	backbone = mobilenet_fpn_backbone( pretrained)
	return backbone

demo.py

   backbone = mobilenet_fpn.fpn(True)
   backbone.eval()

   x = torch.rand(1,3, 100, 100)
   out = backbone(x)
   print(out)

Bug:

"RuntimeError: Given groups=1, weight of size 32 3 3 3, expected input[1, 1280, 4, 4] to have 3 channels, but got 1280 channels instead"

The text was updated successfully, but these errors were encountered:

fmassa · 2020-03-20T10:00:33Z

Hi,

There are two issues with your implementation:

in_channels_list is not correct, and should follow the number of channels in mobilenet_v2 (which is [24, 32, 64, 1280])
the current implementation for _IntermediateLayerGetter is very simplistic, and doesn't handle some use cases. You should remove self.features from FPNMobileNet

Here is a working version

class FPNMobileNet(nn.Module):
    def __init__(self, pretrained=True):
        super().__init__()
        net = MobileNetV2(pretrained)
        features = net.features
        self.layer1= nn.Sequential(*features[0:4])
        self.layer2 = nn.Sequential(*features[4:7])
        self.layer3 = nn.Sequential(*features[7:11])
        self.layer4 = nn.Sequential(*features[11:19])
        for param in features.parameters():
            param.requires_grad = False

    def forward(self, x):

        # Bottom-up pathway, from ResNet
        enc0 = self.layer1(x) # 24
        enc1 = self.layer2(enc0) # 32
        enc2 = self.layer3(enc1) # 64
        enc3 = self.layer4(enc2) # 1280
        return enc3


def mobilenet_fpn_backbone(pretrained):
    backbone = FPNMobileNet(pretrained)
    # freeze layers
    for name, parameter in backbone.named_parameters():
        if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
            parameter.requires_grad_(False)

    return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}

    in_channels_list = [
       24, 32, 64, 1280
    ]
    
    out_channels = 256
    return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)

finnickniu · 2020-03-20T16:55:43Z

It works, many thanks!

lucasjinreal · 2020-11-26T05:15:17Z

@fmassa Hi, I want ask if I want extract certain layers usign _IntermediateLayerGetter should I using only index of that layer or using a [:n] ?

for example:

in mobilenetv2, take the 4th layer, should be features[4] or features[:4]?
what's the difference? Or if I want take 7th layer, should it be features[4:7] or features[7]`?

shibefore · 2020-11-30T07:22:49Z

    self.layer1= nn.Sequential(*features[0:4])
    self.layer2 = nn.Sequential(*features[4:7])
    self.layer3 = nn.Sequential(*features[7:11])
    self.layer4 = nn.Sequential(*features[11:19])
	
	or :
	
    self.layer1= nn.Sequential(*features[0:4])
    self.layer2 = nn.Sequential(*features[4:7])
    self.layer3 = nn.Sequential(*features[7:14])
    self.layer4 = nn.Sequential(*features[14:18])		
	
	as *features[19] have to much channel

fmassa closed this as completed Mar 20, 2020

fmassa added module: models question topic: feature extraction topic: object detection labels Mar 20, 2020

datumbox mentioned this issue Jan 6, 2021

RetinaNet with MobileNetV3 FPN backbone #3223

Merged

oke-aditya mentioned this issue Jan 17, 2021

Are new models planned to be added? #2707

Open

37 tasks

datumbox mentioned this issue Jan 18, 2021

Add MobileNetV3 architecture for Detection #3253

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Request Mobilenet fpn #1999

Request Mobilenet fpn #1999

finnickniu commented Mar 20, 2020 •

edited

Loading

fmassa commented Mar 20, 2020

finnickniu commented Mar 20, 2020

lucasjinreal commented Nov 26, 2020

shibefore commented Nov 30, 2020

Request Mobilenet fpn #1999

Request Mobilenet fpn #1999

Comments

finnickniu commented Mar 20, 2020 • edited Loading

🚀 Feature

Motivation

Pitch

Alternatives

Additional context

Code:

Bug:

fmassa commented Mar 20, 2020

finnickniu commented Mar 20, 2020

lucasjinreal commented Nov 26, 2020

shibefore commented Nov 30, 2020

finnickniu commented Mar 20, 2020 •

edited

Loading