From 37a9ee5b3aead821dc1f795ec9274ccbeea695bb Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 26 Aug 2021 11:03:37 +0100 Subject: [PATCH] Add EfficientNet Architecture in TorchVision (#4293) * Adding code skeleton * Adding MBConvConfig. * Extend SqueezeExcitation to support custom min_value and activation. * Implement MBConv. * Replace stochastic_depth with operator. * Adding the rest of the EfficientNet implementation * Update torchvision/models/efficientnet.py * Replacing 1st activation of SE with SiLU. * Adding efficientnet_b3. * Replace mobilenetv3 assets with custom. * Switch to standard sigmoid and reconfiguring BN. * Reconfiguration of efficientnet. * Add repr * Add weights. * Update weights. * Adding B5-B7 weights. * Update docs and hubconf. * Fix doc link. * Fix typo on comment. --- docs/source/models.rst | 43 +- hubconf.py | 2 + references/classification/README.md | 6 + references/classification/presets.py | 6 +- references/classification/train.py | 17 +- ...odelTester.test_efficientnet_b0_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b1_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b2_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b3_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b4_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b5_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b6_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b7_expect.pkl | Bin 0 -> 939 bytes torchvision/models/__init__.py | 1 + torchvision/models/efficientnet.py | 369 ++++++++++++++++++ torchvision/ops/stochastic_depth.py | 4 +- 16 files changed, 441 insertions(+), 7 deletions(-) create mode 100644 test/expect/ModelTester.test_efficientnet_b0_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b1_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b2_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b3_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b4_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b5_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b6_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b7_expect.pkl create mode 100644 torchvision/models/efficientnet.py diff --git a/docs/source/models.rst b/docs/source/models.rst index b9bff7a36e8..64ca69f47ae 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -27,6 +27,7 @@ architectures for image classification: - `ResNeXt`_ - `Wide ResNet`_ - `MNASNet`_ +- `EfficientNet`_ You can construct a model with random weights by calling its constructor: @@ -47,6 +48,14 @@ You can construct a model with random weights by calling its constructor: resnext50_32x4d = models.resnext50_32x4d() wide_resnet50_2 = models.wide_resnet50_2() mnasnet = models.mnasnet1_0() + efficientnet_b0 = models.efficientnet_b0() + efficientnet_b1 = models.efficientnet_b1() + efficientnet_b2 = models.efficientnet_b2() + efficientnet_b3 = models.efficientnet_b3() + efficientnet_b4 = models.efficientnet_b4() + efficientnet_b5 = models.efficientnet_b5() + efficientnet_b6 = models.efficientnet_b6() + efficientnet_b7 = models.efficientnet_b7() We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`. These can be constructed by passing ``pretrained=True``: @@ -68,6 +77,14 @@ These can be constructed by passing ``pretrained=True``: resnext50_32x4d = models.resnext50_32x4d(pretrained=True) wide_resnet50_2 = models.wide_resnet50_2(pretrained=True) mnasnet = models.mnasnet1_0(pretrained=True) + efficientnet_b0 = models.efficientnet_b0(pretrained=True) + efficientnet_b1 = models.efficientnet_b1(pretrained=True) + efficientnet_b2 = models.efficientnet_b2(pretrained=True) + efficientnet_b3 = models.efficientnet_b3(pretrained=True) + efficientnet_b4 = models.efficientnet_b4(pretrained=True) + efficientnet_b5 = models.efficientnet_b5(pretrained=True) + efficientnet_b6 = models.efficientnet_b6(pretrained=True) + efficientnet_b7 = models.efficientnet_b7(pretrained=True) Instancing a pre-trained model will download its weights to a cache directory. This directory can be set using the `TORCH_MODEL_ZOO` environment variable. See @@ -113,7 +130,10 @@ Unfortunately, the concrete `subset` that was used is lost. For more information see `this discussion `_ or `these experiments `_. -ImageNet 1-crop error rates (224x224) +The sizes of the EfficientNet models depend on the variant. For the exact input sizes +`check here `_ + +ImageNet 1-crop error rates ================================ ============= ============= Model Acc@1 Acc@5 @@ -151,6 +171,14 @@ Wide ResNet-50-2 78.468 94.086 Wide ResNet-101-2 78.848 94.284 MNASNet 1.0 73.456 91.510 MNASNet 0.5 67.734 87.490 +EfficientNet-B0 77.692 93.532 +EfficientNet-B1 78.642 94.186 +EfficientNet-B2 80.608 95.310 +EfficientNet-B3 82.008 96.054 +EfficientNet-B4 83.384 96.594 +EfficientNet-B5 83.444 96.628 +EfficientNet-B6 84.008 96.916 +EfficientNet-B7 84.122 96.908 ================================ ============= ============= @@ -166,6 +194,7 @@ MNASNet 0.5 67.734 87.490 .. _MobileNetV3: https://arxiv.org/abs/1905.02244 .. _ResNeXt: https://arxiv.org/abs/1611.05431 .. _MNASNet: https://arxiv.org/abs/1807.11626 +.. _EfficientNet: https://arxiv.org/abs/1905.11946 .. currentmodule:: torchvision.models @@ -267,6 +296,18 @@ MNASNet .. autofunction:: mnasnet1_0 .. autofunction:: mnasnet1_3 +EfficientNet +------------ + +.. autofunction:: efficientnet_b0 +.. autofunction:: efficientnet_b1 +.. autofunction:: efficientnet_b2 +.. autofunction:: efficientnet_b3 +.. autofunction:: efficientnet_b4 +.. autofunction:: efficientnet_b5 +.. autofunction:: efficientnet_b6 +.. autofunction:: efficientnet_b7 + Quantized Models ---------------- diff --git a/hubconf.py b/hubconf.py index 097759bdd89..2bff6850525 100644 --- a/hubconf.py +++ b/hubconf.py @@ -15,6 +15,8 @@ from torchvision.models.mobilenetv3 import mobilenet_v3_large, mobilenet_v3_small from torchvision.models.mnasnet import mnasnet0_5, mnasnet0_75, mnasnet1_0, \ mnasnet1_3 +from torchvision.models.efficientnet import efficientnet_b0, efficientnet_b1, efficientnet_b2, \ + efficientnet_b3, efficientnet_b4, efficientnet_b5, efficientnet_b6, efficientnet_b7 # segmentation from torchvision.models.segmentation import fcn_resnet50, fcn_resnet101, \ diff --git a/references/classification/README.md b/references/classification/README.md index e0b7f210175..210a63c0bca 100644 --- a/references/classification/README.md +++ b/references/classification/README.md @@ -68,6 +68,12 @@ Then we averaged the parameters of the last 3 checkpoints that improved the Acc@ and [#3354](https://github.com/pytorch/vision/pull/3354) for details. +### EfficientNet + +The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](https://github.com/rwightman/pytorch-image-models/blob/01cb46a9a50e3ba4be167965b5764e9702f09b30/timm/models/efficientnet.py#L95-L108). + +The weights of the B5-B7 variants are ported from Luke Melas' [EfficientNet-PyTorch repo](https://github.com/lukemelas/EfficientNet-PyTorch/blob/1039e009545d9329ea026c9f7541341439712b96/efficientnet_pytorch/utils.py#L562-L564). + ## Mixed precision training Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [NVIDIA Apex extension](https://github.com/NVIDIA/apex). diff --git a/references/classification/presets.py b/references/classification/presets.py index 6bb389ba8db..ce5a6fe414f 100644 --- a/references/classification/presets.py +++ b/references/classification/presets.py @@ -1,4 +1,5 @@ from torchvision.transforms import autoaugment, transforms +from torchvision.transforms.functional import InterpolationMode class ClassificationPresetTrain: @@ -24,10 +25,11 @@ def __call__(self, img): class ClassificationPresetEval: - def __init__(self, crop_size, resize_size=256, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): + def __init__(self, crop_size, resize_size=256, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), + interpolation=InterpolationMode.BILINEAR): self.transforms = transforms.Compose([ - transforms.Resize(resize_size), + transforms.Resize(resize_size, interpolation=interpolation), transforms.CenterCrop(crop_size), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std), diff --git a/references/classification/train.py b/references/classification/train.py index b4e9d274662..9ba99b3dc54 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -6,6 +6,7 @@ import torch.utils.data from torch import nn import torchvision +from torchvision.transforms.functional import InterpolationMode import presets import utils @@ -82,7 +83,18 @@ def _get_cache_path(filepath): def load_data(traindir, valdir, args): # Data loading code print("Loading data") - resize_size, crop_size = (342, 299) if args.model == 'inception_v3' else (256, 224) + resize_size, crop_size = 256, 224 + interpolation = InterpolationMode.BILINEAR + if args.model == 'inception_v3': + resize_size, crop_size = 342, 299 + elif args.model.startswith('efficientnet_'): + sizes = { + 'b0': (256, 224), 'b1': (256, 240), 'b2': (288, 288), 'b3': (320, 300), + 'b4': (384, 380), 'b5': (456, 456), 'b6': (528, 528), 'b7': (600, 600), + } + e_type = args.model.replace('efficientnet_', '') + resize_size, crop_size = sizes[e_type] + interpolation = InterpolationMode.BICUBIC print("Loading training data") st = time.time() @@ -113,7 +125,8 @@ def load_data(traindir, valdir, args): else: dataset_test = torchvision.datasets.ImageFolder( valdir, - presets.ClassificationPresetEval(crop_size=crop_size, resize_size=resize_size)) + presets.ClassificationPresetEval(crop_size=crop_size, resize_size=resize_size, + interpolation=interpolation)) if args.cache_dataset: print("Saving dataset_test to {}".format(cache_path)) utils.mkdir(os.path.dirname(cache_path)) diff --git a/test/expect/ModelTester.test_efficientnet_b0_expect.pkl b/test/expect/ModelTester.test_efficientnet_b0_expect.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1de871ce0fbea9ddbab7e315b05f864bc5f6fa53 GIT binary patch literal 939 zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW zr zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5-018ua*Pfs^#xPFi_; zv9K0Hr_XYs!)hxHiiB76xiM&!7+hHqaUyty&u#rxmqYZm9FCn>e!TV0N{8tctD22Y zto-_HtET<$?;8KNtkLvZ8@#Hby-)M{2L`S7kJnb}?onTL4 z(nWmJ3Z2()R)9jw@aV*QCx9UZ!ni|=pTQa)T4kw4#lTo_b229~xR62)!ZhXr*?e(c zdMFdnRuB&GW&~02G>IIC0w4(#fSy9pbtC(U4@KuIAP-r$z5%*kWLNQ{=#>Dv5T+Lz z1_9n|Y&uXya?HAL<)Fk20x)_zgv&4q>`9P!*+6-N!4s+glnDa7S=m5h%s>cI4^ayM DfM5D$ literal 0 HcmV?d00001 diff --git a/test/expect/ModelTester.test_efficientnet_b1_expect.pkl b/test/expect/ModelTester.test_efficientnet_b1_expect.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1499a97028eef11527829f13476152d00d8cb90b GIT binary patch literal 939 zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW zr zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5~t%{$I3Y`E@{3?+`dBT8o%a@g$Aqq3Jo-$FwI0(K3b>rdNr2iX=piAm1#2Z zO;|bi!lIQ1zr8e9xlCI1lOs**LQgMRB!L3ZQz*J_WIyqt=zIm_A?wyRK-Y`xDt;8b5 zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK66alKk>+9fY|Tr8yERh|pIVuo7OWY`RI%#8jfLuN4~jJfR_AL9?)tFuz?+TFl9&C0F@kMb};5l`hRjTFp}$S6(SPu~KL9%oRO1ertT=vDW&>x@zS- zMK-O2#~*8U{L$805UjpjcEj!!cM1+_?7Q@9rNm2RtqNw|RX6lIRtn1XuH3Qi#R`Y1 zdo^FR-d-8P>!NXi?fc3_-Qrq2Yx^~L>v>j9KiRdCF_%NjC8b?6d+Prc;q5Y7Hp`f_ z!l!nv(0wPN1q!XA=X2do07D9dafcQ^gEc&~%2JDpfwAD`WKLvoA%z@-Y0L$(`Qp6v zP$r zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5-0rg$`xua*)#*ASv8-Wj$84~;r;v-qC8VI zf4EOtIawlHlOa8Dg>ql_$|F*b)x+)_Tj@}_QMGFFL`^>tDa~m*3M*Nj`L6u6KWZh1 z1FyQvf>|rGo`q`Op;eYzR1Ay-Hz#ul>i!MRpZGie3qz3t@Vp zVG!WW#-;;RB*&}^R}M zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK633uVe8mUTA*RQ;o@?ggxAaD)Qw!yl z2j1LJx9*Zz@kNS9qr2zmiZ!=iEN8jVptiPRnZ|@YTQqDQhHA`;3fE9kTCAbdE3m># z@uCL9zWvHBdOJ0^vQ$@24Sb<-|B=ATcia)n?ZXn5H>~ARx4t%U1sDIN70%z6t*}tL zqp|4nD)koT^c8z2{`hR zH_^`J&(bwCK%pi0=k$~lz>orA+@ZzKU=0tgvecqtU@W*fnG+dYNFfJd8gqeczBn&E zlnH1n2nTpGf+%>JM2 zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK66d#r>56sJ&S*R+oxft`o>?nReQ#-K#dT;*IU=KeY{@2#7pI#ww64TzG)1bf z^pM}Dp>rTo!^2rr^H|I_js5X&HN4ELG<4DfS6GT4*9g)-t)%}h80Eu$Cvwv zwP|R~ch&qqalQsygNw$WmjW7mO*hrE-gK^vdT?#U-;c935{g?_$af`Z*ce|~5%k$r z-LCAE#w_*^E9zhAYGi2osO8;`(>!x>-txTXZki38Jj*|prmDYLb!Yj>y>m2nttwq{ z+;zLgtLNvJgF;Iuqqg(}Fr+{jcWCi5Si?iBEVZZ@7z=Jr=0pY;QpiD=##|tqFV0I3 zWdhm?!U5ilAPSx)k>gMRB!L3ZQz*J_WIyqt=zIm_A?wyRK-Y`xDt;8b5 zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5=Y_3bG6o+A}hY|ny%RLJyzo%i<|~m{wj?#m%l8%aDD%ZY4XbID^Cfom=ir= z#j2_Q)DQGjFWcTCydp%~Tw~AIIQ6={tr{YV50}j|x}x!JN9Br)Fq0Kk-@DZ_FD_qR zah`w0AFaL>3_2fFzj4|wf04Lo>8fuR)Z64HtMja!w0wc?k!7u+5$ZApEz93eb6>u4 z?ph7Y-)S0OANDOTtYy@Y$kbVWHT;@-z=^`;NzZ%K`3@ghv7#?iL+^R^a*vgB)fNcE zsa|$opwV#n#u89yHO8NHI{^$S5XK!^{0!Fc&?-wUDh9@ao0B<_!G#oZ5T-E~$mWal z(nFbmwt{egHzSCGr%B{E6aYz}0Q3}!t{d4;d?-3!0eQ%}^$pPVBD;zoMXv zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK66f2#>&qwI;$Lpnv`Ia40_zH`Wabr~)w5T$-CTQ5p>vS#q@6>8VfGesIDp9ygcE?UX46s|K;Jf*2^n4GcJ3PsHFa-`t~xB zIg;uTPBm&(3TM=K!UWXW&7Ll^^4zVyW&ieNr#a6oUo_Qkg=OWh Tensor: + scale = F.adaptive_avg_pool2d(input, 1) + scale = self.fc1(scale) + scale = F.silu(scale, inplace=True) + scale = self.fc2(scale) + return scale.sigmoid() + + def forward(self, input: Tensor) -> Tensor: + scale = self._scale(input) + return scale * input + + +class MBConvConfig: + # Stores information listed at Table 1 of the EfficientNet paper + def __init__(self, + expand_ratio: float, kernel: int, stride: int, + input_channels: int, out_channels: int, num_layers: int, + width_mult: float, depth_mult: float) -> None: + self.expand_ratio = expand_ratio + self.kernel = kernel + self.stride = stride + self.input_channels = self.adjust_channels(input_channels, width_mult) + self.out_channels = self.adjust_channels(out_channels, width_mult) + self.num_layers = self.adjust_depth(num_layers, depth_mult) + + def __repr__(self) -> str: + s = self.__class__.__name__ + '(' + s += 'expand_ratio={expand_ratio}' + s += ', kernel={kernel}' + s += ', stride={stride}' + s += ', input_channels={input_channels}' + s += ', out_channels={out_channels}' + s += ', num_layers={num_layers}' + s += ')' + return s.format(**self.__dict__) + + @staticmethod + def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int: + return _make_divisible(channels * width_mult, 8, min_value) + + @staticmethod + def adjust_depth(num_layers: int, depth_mult: float): + return int(math.ceil(num_layers * depth_mult)) + + +class MBConv(nn.Module): + def __init__(self, cnf: MBConvConfig, stochastic_depth_prob: float, norm_layer: Callable[..., nn.Module], + se_layer: Callable[..., nn.Module] = SqueezeExcitation) -> None: + super().__init__() + + if not (1 <= cnf.stride <= 2): + raise ValueError('illegal stride value') + + self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels + + layers: List[nn.Module] = [] + activation_layer = nn.SiLU + + # expand + expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio) + if expanded_channels != cnf.input_channels: + layers.append(ConvBNActivation(cnf.input_channels, expanded_channels, kernel_size=1, + norm_layer=norm_layer, activation_layer=activation_layer)) + + # depthwise + layers.append(ConvBNActivation(expanded_channels, expanded_channels, kernel_size=cnf.kernel, + stride=cnf.stride, groups=expanded_channels, + norm_layer=norm_layer, activation_layer=activation_layer)) + + # squeeze and excitation + squeeze_channels = max(1, cnf.input_channels // 4) + layers.append(se_layer(expanded_channels, squeeze_channels)) + + # project + layers.append(ConvBNActivation(expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, + activation_layer=nn.Identity)) + + self.block = nn.Sequential(*layers) + self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row") + self.out_channels = cnf.out_channels + + def forward(self, input: Tensor) -> Tensor: + result = self.block(input) + if self.use_res_connect: + result = self.stochastic_depth(result) + result += input + return result + + +class EfficientNet(nn.Module): + def __init__( + self, + inverted_residual_setting: List[MBConvConfig], + dropout: float, + stochastic_depth_prob: float = 0.2, + num_classes: int = 1000, + block: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + **kwargs: Any + ) -> None: + """ + EfficientNet main class + + Args: + inverted_residual_setting (List[MBConvConfig]): Network structure + dropout (float): The droupout probability + stochastic_depth_prob (float): The stochastic depth probability + num_classes (int): Number of classes + block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet + norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use + """ + super().__init__() + + if not inverted_residual_setting: + raise ValueError("The inverted_residual_setting should not be empty") + elif not (isinstance(inverted_residual_setting, Sequence) and + all([isinstance(s, MBConvConfig) for s in inverted_residual_setting])): + raise TypeError("The inverted_residual_setting should be List[MBConvConfig]") + + if block is None: + block = MBConv + + if norm_layer is None: + norm_layer = nn.BatchNorm2d + + layers: List[nn.Module] = [] + + # building first layer + firstconv_output_channels = inverted_residual_setting[0].input_channels + layers.append(ConvBNActivation(3, firstconv_output_channels, kernel_size=3, stride=2, norm_layer=norm_layer, + activation_layer=nn.SiLU)) + + # building inverted residual blocks + total_stage_blocks = sum([cnf.num_layers for cnf in inverted_residual_setting]) + stage_block_id = 0 + for cnf in inverted_residual_setting: + stage: List[nn.Module] = [] + for _ in range(cnf.num_layers): + # copy to avoid modifications. shallow copy is enough + block_cnf = copy.copy(cnf) + + # overwrite info if not the first conv in the stage + if stage: + block_cnf.input_channels = block_cnf.out_channels + block_cnf.stride = 1 + + # adjust stochastic depth probability based on the depth of the stage block + sd_prob = stochastic_depth_prob * float(stage_block_id) / total_stage_blocks + + stage.append(block(block_cnf, sd_prob, norm_layer)) + stage_block_id += 1 + + layers.append(nn.Sequential(*stage)) + + # building last several layers + lastconv_input_channels = inverted_residual_setting[-1].out_channels + lastconv_output_channels = 4 * lastconv_input_channels + layers.append(ConvBNActivation(lastconv_input_channels, lastconv_output_channels, kernel_size=1, + norm_layer=norm_layer, activation_layer=nn.SiLU)) + + self.features = nn.Sequential(*layers) + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Sequential( + nn.Dropout(p=dropout, inplace=True), + nn.Linear(lastconv_output_channels, num_classes), + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + init_range = 1.0 / math.sqrt(m.out_features) + nn.init.uniform_(m.weight, -init_range, init_range) + nn.init.zeros_(m.bias) + + def _forward_impl(self, x: Tensor) -> Tensor: + x = self.features(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + + x = self.classifier(x) + + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +def _efficientnet_conf(width_mult: float, depth_mult: float, **kwargs: Any) -> List[MBConvConfig]: + bneck_conf = partial(MBConvConfig, width_mult=width_mult, depth_mult=depth_mult) + inverted_residual_setting = [ + bneck_conf(1, 3, 1, 32, 16, 1), + bneck_conf(6, 3, 2, 16, 24, 2), + bneck_conf(6, 5, 2, 24, 40, 2), + bneck_conf(6, 3, 2, 40, 80, 3), + bneck_conf(6, 5, 1, 80, 112, 3), + bneck_conf(6, 5, 2, 112, 192, 4), + bneck_conf(6, 3, 1, 192, 320, 1), + ] + return inverted_residual_setting + + +def _efficientnet_model( + arch: str, + inverted_residual_setting: List[MBConvConfig], + dropout: float, + pretrained: bool, + progress: bool, + **kwargs: Any +) -> EfficientNet: + model = EfficientNet(inverted_residual_setting, dropout, **kwargs) + if pretrained: + if model_urls.get(arch, None) is None: + raise ValueError("No checkpoint is available for model type {}".format(arch)) + state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) + model.load_state_dict(state_dict) + return model + + +def efficientnet_b0(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B0 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.0, depth_mult=1.0, **kwargs) + return _efficientnet_model("efficientnet_b0", inverted_residual_setting, 0.2, pretrained, progress, **kwargs) + + +def efficientnet_b1(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B1 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.0, depth_mult=1.1, **kwargs) + return _efficientnet_model("efficientnet_b1", inverted_residual_setting, 0.2, pretrained, progress, **kwargs) + + +def efficientnet_b2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B2 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.1, depth_mult=1.2, **kwargs) + return _efficientnet_model("efficientnet_b2", inverted_residual_setting, 0.3, pretrained, progress, **kwargs) + + +def efficientnet_b3(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B3 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.2, depth_mult=1.4, **kwargs) + return _efficientnet_model("efficientnet_b3", inverted_residual_setting, 0.3, pretrained, progress, **kwargs) + + +def efficientnet_b4(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B4 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.4, depth_mult=1.8, **kwargs) + return _efficientnet_model("efficientnet_b4", inverted_residual_setting, 0.4, pretrained, progress, **kwargs) + + +def efficientnet_b5(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B5 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.6, depth_mult=2.2, **kwargs) + return _efficientnet_model("efficientnet_b5", inverted_residual_setting, 0.4, pretrained, progress, + norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs) + + +def efficientnet_b6(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B6 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.8, depth_mult=2.6, **kwargs) + return _efficientnet_model("efficientnet_b6", inverted_residual_setting, 0.5, pretrained, progress, + norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs) + + +def efficientnet_b7(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B7 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=2.0, depth_mult=3.1, **kwargs) + return _efficientnet_model("efficientnet_b7", inverted_residual_setting, 0.5, pretrained, progress, + norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs) diff --git a/torchvision/ops/stochastic_depth.py b/torchvision/ops/stochastic_depth.py index f3338242a76..0b95e7cca67 100644 --- a/torchvision/ops/stochastic_depth.py +++ b/torchvision/ops/stochastic_depth.py @@ -22,12 +22,12 @@ def stochastic_depth(input: Tensor, p: float, mode: str, training: bool = True) """ if p < 0.0 or p > 1.0: raise ValueError("drop probability has to be between 0 and 1, but got {}".format(p)) + if mode not in ["batch", "row"]: + raise ValueError("mode has to be either 'batch' or 'row', but got {}".format(mode)) if not training or p == 0.0: return input survival_rate = 1.0 - p - if mode not in ["batch", "row"]: - raise ValueError("mode has to be either 'batch' or 'row', but got {}".format(mode)) size = [1] * input.ndim if mode == "row": size[0] = input.shape[0]