From 37a9ee5b3aead821dc1f795ec9274ccbeea695bb Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Thu, 26 Aug 2021 11:03:37 +0100
Subject: [PATCH] Add EfficientNet Architecture in TorchVision (#4293)

* Adding code skeleton

* Adding MBConvConfig.

* Extend SqueezeExcitation to support custom min_value and activation.

* Implement MBConv.

* Replace stochastic_depth with operator.

* Adding the rest of the EfficientNet implementation

* Update torchvision/models/efficientnet.py

* Replacing 1st activation of SE with SiLU.

* Adding efficientnet_b3.

* Replace mobilenetv3 assets with custom.

* Switch to standard sigmoid and reconfiguring BN.

* Reconfiguration of efficientnet.

* Add repr

* Add weights.

* Update weights.

* Adding B5-B7 weights.

* Update docs and hubconf.

* Fix doc link.

* Fix typo on comment.
---
 docs/source/models.rst                        |  43 +-
 hubconf.py                                    |   2 +
 references/classification/README.md           |   6 +
 references/classification/presets.py          |   6 +-
 references/classification/train.py            |  17 +-
 ...odelTester.test_efficientnet_b0_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b1_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b2_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b3_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b4_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b5_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b6_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b7_expect.pkl | Bin 0 -> 939 bytes
 torchvision/models/__init__.py                |   1 +
 torchvision/models/efficientnet.py            | 369 ++++++++++++++++++
 torchvision/ops/stochastic_depth.py           |   4 +-
 16 files changed, 441 insertions(+), 7 deletions(-)
 create mode 100644 test/expect/ModelTester.test_efficientnet_b0_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b1_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b2_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b3_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b4_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b5_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b6_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b7_expect.pkl
 create mode 100644 torchvision/models/efficientnet.py

diff --git a/docs/source/models.rst b/docs/source/models.rst
index b9bff7a36e8..64ca69f47ae 100644
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -27,6 +27,7 @@ architectures for image classification:
 -  `ResNeXt`_
 -  `Wide ResNet`_
 -  `MNASNet`_
+-  `EfficientNet`_
 
 You can construct a model with random weights by calling its constructor:
 
@@ -47,6 +48,14 @@ You can construct a model with random weights by calling its constructor:
     resnext50_32x4d = models.resnext50_32x4d()
     wide_resnet50_2 = models.wide_resnet50_2()
     mnasnet = models.mnasnet1_0()
+    efficientnet_b0 = models.efficientnet_b0()
+    efficientnet_b1 = models.efficientnet_b1()
+    efficientnet_b2 = models.efficientnet_b2()
+    efficientnet_b3 = models.efficientnet_b3()
+    efficientnet_b4 = models.efficientnet_b4()
+    efficientnet_b5 = models.efficientnet_b5()
+    efficientnet_b6 = models.efficientnet_b6()
+    efficientnet_b7 = models.efficientnet_b7()
 
 We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`.
 These can be constructed by passing ``pretrained=True``:
@@ -68,6 +77,14 @@ These can be constructed by passing ``pretrained=True``:
     resnext50_32x4d = models.resnext50_32x4d(pretrained=True)
     wide_resnet50_2 = models.wide_resnet50_2(pretrained=True)
     mnasnet = models.mnasnet1_0(pretrained=True)
+    efficientnet_b0 = models.efficientnet_b0(pretrained=True)
+    efficientnet_b1 = models.efficientnet_b1(pretrained=True)
+    efficientnet_b2 = models.efficientnet_b2(pretrained=True)
+    efficientnet_b3 = models.efficientnet_b3(pretrained=True)
+    efficientnet_b4 = models.efficientnet_b4(pretrained=True)
+    efficientnet_b5 = models.efficientnet_b5(pretrained=True)
+    efficientnet_b6 = models.efficientnet_b6(pretrained=True)
+    efficientnet_b7 = models.efficientnet_b7(pretrained=True)
 
 Instancing a pre-trained model will download its weights to a cache directory.
 This directory can be set using the `TORCH_MODEL_ZOO` environment variable. See
@@ -113,7 +130,10 @@ Unfortunately, the concrete `subset` that was used is lost. For more
 information see `this discussion <https://github.com/pytorch/vision/issues/1439>`_
 or `these experiments <https://github.com/pytorch/vision/pull/1965>`_.
 
-ImageNet 1-crop error rates (224x224)
+The sizes of the EfficientNet models depend on the variant. For the exact input sizes
+`check here <https://github.com/pytorch/vision/blob/d2bfd639e46e1c5dc3c177f889dc7750c8d137c7/references/classification/train.py#L92-L93>`_
+
+ImageNet 1-crop error rates
 
 ================================  =============   =============
 Model                             Acc@1           Acc@5
@@ -151,6 +171,14 @@ Wide ResNet-50-2                  78.468          94.086
 Wide ResNet-101-2                 78.848          94.284
 MNASNet 1.0                       73.456          91.510
 MNASNet 0.5                       67.734          87.490
+EfficientNet-B0                   77.692          93.532
+EfficientNet-B1                   78.642          94.186
+EfficientNet-B2                   80.608          95.310
+EfficientNet-B3                   82.008          96.054
+EfficientNet-B4                   83.384          96.594
+EfficientNet-B5                   83.444          96.628
+EfficientNet-B6                   84.008          96.916
+EfficientNet-B7                   84.122          96.908
 ================================  =============   =============
 
 
@@ -166,6 +194,7 @@ MNASNet 0.5                       67.734          87.490
 .. _MobileNetV3: https://arxiv.org/abs/1905.02244
 .. _ResNeXt: https://arxiv.org/abs/1611.05431
 .. _MNASNet: https://arxiv.org/abs/1807.11626
+.. _EfficientNet: https://arxiv.org/abs/1905.11946
 
 .. currentmodule:: torchvision.models
 
@@ -267,6 +296,18 @@ MNASNet
 .. autofunction:: mnasnet1_0
 .. autofunction:: mnasnet1_3
 
+EfficientNet
+------------
+
+.. autofunction:: efficientnet_b0
+.. autofunction:: efficientnet_b1
+.. autofunction:: efficientnet_b2
+.. autofunction:: efficientnet_b3
+.. autofunction:: efficientnet_b4
+.. autofunction:: efficientnet_b5
+.. autofunction:: efficientnet_b6
+.. autofunction:: efficientnet_b7
+
 Quantized Models
 ----------------
 
diff --git a/hubconf.py b/hubconf.py
index 097759bdd89..2bff6850525 100644
--- a/hubconf.py
+++ b/hubconf.py
@@ -15,6 +15,8 @@
 from torchvision.models.mobilenetv3 import mobilenet_v3_large, mobilenet_v3_small
 from torchvision.models.mnasnet import mnasnet0_5, mnasnet0_75, mnasnet1_0, \
     mnasnet1_3
+from torchvision.models.efficientnet import efficientnet_b0, efficientnet_b1, efficientnet_b2, \
+    efficientnet_b3, efficientnet_b4, efficientnet_b5, efficientnet_b6, efficientnet_b7
 
 # segmentation
 from torchvision.models.segmentation import fcn_resnet50, fcn_resnet101, \
diff --git a/references/classification/README.md b/references/classification/README.md
index e0b7f210175..210a63c0bca 100644
--- a/references/classification/README.md
+++ b/references/classification/README.md
@@ -68,6 +68,12 @@ Then we averaged the parameters of the last 3 checkpoints that improved the Acc@
 and [#3354](https://github.com/pytorch/vision/pull/3354) for details.
 
 
+### EfficientNet
+
+The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](https://github.com/rwightman/pytorch-image-models/blob/01cb46a9a50e3ba4be167965b5764e9702f09b30/timm/models/efficientnet.py#L95-L108).
+
+The weights of the B5-B7 variants are ported from Luke Melas' [EfficientNet-PyTorch repo](https://github.com/lukemelas/EfficientNet-PyTorch/blob/1039e009545d9329ea026c9f7541341439712b96/efficientnet_pytorch/utils.py#L562-L564).
+
 ## Mixed precision training
 Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [NVIDIA Apex extension](https://github.com/NVIDIA/apex).
 
diff --git a/references/classification/presets.py b/references/classification/presets.py
index 6bb389ba8db..ce5a6fe414f 100644
--- a/references/classification/presets.py
+++ b/references/classification/presets.py
@@ -1,4 +1,5 @@
 from torchvision.transforms import autoaugment, transforms
+from torchvision.transforms.functional import InterpolationMode
 
 
 class ClassificationPresetTrain:
@@ -24,10 +25,11 @@ def __call__(self, img):
 
 
 class ClassificationPresetEval:
-    def __init__(self, crop_size, resize_size=256, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
+    def __init__(self, crop_size, resize_size=256, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225),
+                 interpolation=InterpolationMode.BILINEAR):
 
         self.transforms = transforms.Compose([
-            transforms.Resize(resize_size),
+            transforms.Resize(resize_size, interpolation=interpolation),
             transforms.CenterCrop(crop_size),
             transforms.ToTensor(),
             transforms.Normalize(mean=mean, std=std),
diff --git a/references/classification/train.py b/references/classification/train.py
index b4e9d274662..9ba99b3dc54 100644
--- a/references/classification/train.py
+++ b/references/classification/train.py
@@ -6,6 +6,7 @@
 import torch.utils.data
 from torch import nn
 import torchvision
+from torchvision.transforms.functional import InterpolationMode
 
 import presets
 import utils
@@ -82,7 +83,18 @@ def _get_cache_path(filepath):
 def load_data(traindir, valdir, args):
     # Data loading code
     print("Loading data")
-    resize_size, crop_size = (342, 299) if args.model == 'inception_v3' else (256, 224)
+    resize_size, crop_size = 256, 224
+    interpolation = InterpolationMode.BILINEAR
+    if args.model == 'inception_v3':
+        resize_size, crop_size = 342, 299
+    elif args.model.startswith('efficientnet_'):
+        sizes = {
+            'b0': (256, 224), 'b1': (256, 240), 'b2': (288, 288), 'b3': (320, 300),
+            'b4': (384, 380), 'b5': (456, 456), 'b6': (528, 528), 'b7': (600, 600),
+        }
+        e_type = args.model.replace('efficientnet_', '')
+        resize_size, crop_size = sizes[e_type]
+        interpolation = InterpolationMode.BICUBIC
 
     print("Loading training data")
     st = time.time()
@@ -113,7 +125,8 @@ def load_data(traindir, valdir, args):
     else:
         dataset_test = torchvision.datasets.ImageFolder(
             valdir,
-            presets.ClassificationPresetEval(crop_size=crop_size, resize_size=resize_size))
+            presets.ClassificationPresetEval(crop_size=crop_size, resize_size=resize_size,
+                                             interpolation=interpolation))
         if args.cache_dataset:
             print("Saving dataset_test to {}".format(cache_path))
             utils.mkdir(os.path.dirname(cache_path))
diff --git a/test/expect/ModelTester.test_efficientnet_b0_expect.pkl b/test/expect/ModelTester.test_efficientnet_b0_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..1de871ce0fbea9ddbab7e315b05f864bc5f6fa53
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK5-018ua*Pfs^#x<c4;yM1+Uul`hmvhFTz?Hzh7vsG!t9-$k0=3kwexhiOR}V
zD|XJ+IHT^VwQ4!H7Q^wNWo-BRH9d@5HSgLzTluoGf2DQ1;;KCPO)KM;)@un>PFi_;
zv9K0Hr_XYs!)hxHiiB76xiM&!7+hHqaUyty&u#rxmqYZm9FCn>e!TV0N{8tctD22Y
zto-_HtET<$?;8KNtkLvZ8@#Hby-)M{2L`S7kJnb}?onTL<EiGV5<8t$`%agxx*f>4
z(nWmJ3Z2()R)9jw@aV*QCx9UZ!ni|=pTQa)T4kw4#lTo_b229~xR62)!ZhXr*?e(c
zdMFdnRuB&GW&~02G>IIC0w4(#fSy9pbtC(U4@KuIAP-r$z5%*kWLNQ{=#>Dv5T+Lz
z1_9n|Y&uXya?HAL<)Fk20x)_zgv&4q>`9P!*+6-N!4s+glnDa7S=m5h%s>cI4^ayM
DfM5D$

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b1_expect.pkl b/test/expect/ModelTester.test_efficientnet_b1_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..1499a97028eef11527829f13476152d00d8cb90b
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK5~t%{$I3Y`E@{3?+`dBT8o%a@g$Aqq3Jo-$FwI<bXJ6~8WnvCmB@N6gw_g)q
zbztdKO+S|38uFa;G#QpiFSY0})lzs9x5CW$r=~zw%c>0(K3b>rdNr2iX=piAm1#2Z
zO;|bi<i3?V?;h4Hm@T=2$)!&-N%yJdk}uwxsXe<_OkQoUYQ^hUnwm-9G%uK6(p+lf
ztHoFAvPw*%R<myM(iLLr_cQ|C(^kC*P}DMd-m7)>!lIQ1zr8e9xlCI1lOs**LQ<yI
zbR!)t$wM`(K%u2?lf3f;Fr+{jcWCi5Si?iBEVZZ@7z=Jr=0pY;QpiD=##|tqFV0I3
zWdhm?!U5ilAPSx)k>gMRB!L3ZQz*J_WIyqt=zIm_A?wyRK-Y`xDt;8b5<nNi^g_cR
zz?+Rt2dYSpSr@Jxl$b#PMsJ63876@}3Gyx*C~q)$LKT2AL4Y?a8%T^92tn#0Y5^sG
B@SFeu

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b2_expect.pkl b/test/expect/ModelTester.test_efficientnet_b2_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f0aeb8ec122d5a350052ad5788918607cfb0cf91
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK66alKk>+9fY|Tr8yERh|pIVuo7OWY`RI%#8jfLuN4~jJfR_AL9?)tFuz<oQd
z#ME;uPc>?+TFl9&C0F@kMb};5l`hRjTFp}$S6(SPu~KL9%oRO1ertT=vDW&>x@zS-
zMK-O2#~*8U{L$805UjpjcEj!!cM1+_?7Q@9rNm2RtqNw|RX6lIRtn1XuH3Qi#R`Y1
zdo^FR-d-8P>!NXi?fc3_-Qrq2Yx^~L>v>j9KiRdCF_%NjC8b?6d+Prc;q5Y7Hp`f_
z!l!nv(0wPN1q!XA=X2do07D9dafcQ^gEc&~%2JDpfwAD`WKLvoA%z@-Y0L$(`Qp6v
zP$r<QAROS$2%_L=5;+b9KoTebJ%ys{M)ngQiq2O+9<pwI19ZK}uHr}0D*<#NOfNJH
z0=(JSbfAjln04XGL5UdzVDxqfmthjvlOXT1f$|1}CsY9_69jm(vVp{yfe@q~q80!T
CIQ82A

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b3_expect.pkl b/test/expect/ModelTester.test_efficientnet_b3_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..989d6782fe799c4833239c51f08e8375a6592179
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK5-0rg$`xua*)#*ASv8-Wj$84~<H$;#$Htl;OP8#yV3S$Jm7%_>;r;v-qC8VI
zf4EOtIawlHlOa8Dg>ql_$|F*b)x+)_Tj@}_QMGFFL`^>tDa~m*3M*Nj`L6u6KWZh1
z1FyQvf>|rGo`q`O<QG`Ez{GpS!N*HivdQ|ckTdMkh?sC}`SWY1S8mL!)@=CBw(Ql@
zwv{Igl2?3deWzKGE46a{<^wBNsj_NbQrfY?z<K|Q&njUW5B`7FbU%Du^L9)7$^$+t
zmlb{6p)u{p;+3G#648p^cmfzwAdEY-_!+F>p;eYzR1Ay-Hz#u<g9|C-AWUN}kj)q8
zrH3*BZ3W=~Z$=OWPm{=TC;*Z`0q7|dT{p6y_)v7d0`ib`>l>i!MRpZGie3qz3t@Vp
zVG!WW#-;;RB*&}^R}M<dAONGcL%0l+z@7womkpFR7(Ag0K$#%Go0SbD#teiY^$@iH
D5Eb}{

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b4_expect.pkl b/test/expect/ModelTester.test_efficientnet_b4_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f4a0cc04bf0ff3eee9f110f43a4c39a530c51f5e
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK633uVe8mU<N6R^9#;tf>TA*RQ;o@?ggxAaD)<kObOh2i9BBo|VVN~D>Qw!yl
z2j1LJx9*Zz@kNS9qr2zmiZ!=iEN8jVptiPRnZ|@YTQqDQhHA`;3fE9kTCAbdE3m>#
z@uCL9zWvHBdOJ0^vQ$@24Sb<-|B=ATcia)n?ZXn5H>~ARx4t%U1sDIN70%z6t*}tL
zqp|4nD)koT^c8z2<Z8HRuxi@a?$&5Y6kPGRhfhO6-DKHDe&LlF2N~3D@6K7V>{`hR
zH_^`J&(bwCK%pi0=k$~lz>orA+@ZzKU=0tgvecqtU@W*fnG+dYNFfJd8gqeczBn&E
zlnH1n2nTpGf+%>JM2<rNkOT@qPoe0#k^RJnqVpAyhpb!Q09`M#tN2m$N&sC5(+drQ
z0B<%n9jGEXW?i^)P+|rF7`+|BWtar^B*?pLpuEB02~_~f1OeWxY#=daAOxw0s09F<
C2JyiF

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b5_expect.pkl b/test/expect/ModelTester.test_efficientnet_b5_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..7c674259cd99d2663d7a85d1b74042e93bc40a1e
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK66d#r>56sJ&S*R+oxft`o>?nReQ#-K#dT;*IU=KeY{@2#7pI#ww64TzG)1bf
z^pM}Dp>rTo!^2rr^H|I_js5X&HN4ELG<4DfS6GT4*9g)-t<nF5b>)%}h80Eu$Cvwv
zwP|R~ch&qqalQsygNw$WmjW7mO*hrE-gK^vdT?#U-;c935{g?_$af`Z*ce|~5%k$r
z-LCAE#w_*^E9zhAYGi2osO8;`(>!x>-txTXZki38Jj*|prmDYLb!Yj>y>m2nttwq{
z+;zLgtLNvJgF;Iuqqg(}Fr+{jcWCi5Si?iBEVZZ@7z=Jr=0pY;QpiD=##|tqFV0I3
zWdhm?!U5ilAPSx)k>gMRB!L3ZQz*J_WIyqt=zIm_A?wyRK-Y`xDt;8b5<nNi^g_cR
zz?+Rt2dYSpSr@Jxl$b#PMsJ63876@}3Gyx*C~q)$LKT2AL4Y?a8%T^92tn#0Y5~K?
B^v(bP

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b6_expect.pkl b/test/expect/ModelTester.test_efficientnet_b6_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..dfad29717e4ba731576248d2836582e04dc85b78
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK5=Y_3bG6o+A}hY|ny%RLJyzo%i<|~m{wj?#m%l8%aDD%ZY4XbID^Cfom=ir=
z#j2_Q)DQGjFWcTCydp%~Tw~AIIQ6={tr{YV50}j|x}x!JN9Br)Fq0Kk-@DZ_FD_qR
zah`w0AFaL>3_2fFzj4|wf04Lo>8fuR)Z64HtMja!w0wc?k!7u+5$ZApEz93eb6>u4
z?ph7Y-)S0OANDOTtYy@Y$kbVWHT;@-z=^`;NzZ%K`3@ghv7#?iL+^R^a*vgB)fNcE
zsa|$opwV#n#u89yHO8NHI{^$S5XK!^{0!Fc&?-wUDh9@ao0B<_!G#oZ5T-E~$mWal
z(nFbmwt{egHzSCGr%B{E6aYz}0Q3}!t{d4;d?-3!0eQ%}^$pPVBD;zoMXv<Vg)qI)
zFbMEwW7B~ul4I6|D+eWJ5P;F!AzX$@U{8X)%Ld9D44zO0piB_p&B_K6V+KNydWc#8
DqXzq;

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b7_expect.pkl b/test/expect/ModelTester.test_efficientnet_b7_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..965ee61a2efc11c7de3cd6af749f0a07b39c2ced
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK66f2#>&qwI;$Lpnv`Ia40_zH`Wabr~)w5T$-<Mt<t#(k|<ly<`A?8n)Y0W#n
ze1`36wJ*0et6OwkUbbr0<K-4=PnSQl4_)qXXtG+2gUO03F*lY?GU-@ub(%|Ee9iF{
z@8-2?Y>CTQ5p>vS#q@6>8VfGesIDp9ygcE?UX46s|K;Jf*2^n4GcJ3PsHFa-`t~xB
zIg;uTPBm&(3TM=K!UWXW&7Ll^^4zVyW&ieNr#a6oUo_Qkg=OWh<ykpR8Vc<*md~h=
zQ};XXseU7B-ZD^ViK}Y=H~|bP5XK!^{0!Fc&?-wUDh9@ao0B<_!G#oZ5T-E~$mWal
z(nFbmwt{egHzSCGr%B{E6aYz}0Q3}!t{d4;d?-3!0eQ%}^$pPVBD;zoMXv<Vg)qI)
zFbMEwW7B~ul4I6|D+eWJ5P;F!AzX$@U{8X)%Ld9D44zO0piB_p&B_K6V+KNydWc#8
D7^3%=

literal 0
HcmV?d00001

diff --git a/torchvision/models/__init__.py b/torchvision/models/__init__.py
index 283e544e98e..3c1519c1b42 100644
--- a/torchvision/models/__init__.py
+++ b/torchvision/models/__init__.py
@@ -8,6 +8,7 @@
 from .mobilenet import *
 from .mnasnet import *
 from .shufflenetv2 import *
+from .efficientnet import *
 from . import segmentation
 from . import detection
 from . import video
diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
new file mode 100644
index 00000000000..06b2a301b6d
--- /dev/null
+++ b/torchvision/models/efficientnet.py
@@ -0,0 +1,369 @@
+import copy
+import math
+import torch
+
+from functools import partial
+from torch import nn, Tensor
+from torch.nn import functional as F
+from typing import Any, Callable, List, Optional, Sequence
+
+from .._internally_replaced_utils import load_state_dict_from_url
+from torchvision.ops import StochasticDepth
+
+from torchvision.models.mobilenetv2 import ConvBNActivation, _make_divisible
+
+
+__all__ = ["EfficientNet", "efficientnet_b0", "efficientnet_b1", "efficientnet_b2", "efficientnet_b3",
+           "efficientnet_b4", "efficientnet_b5", "efficientnet_b6", "efficientnet_b7"]
+
+
+model_urls = {
+    # Weights ported from https://github.com/rwightman/pytorch-image-models/
+    "efficientnet_b0": "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth",
+    "efficientnet_b1": "https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth",
+    "efficientnet_b2": "https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth",
+    "efficientnet_b3": "https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth",
+    "efficientnet_b4": "https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth",
+    # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
+    "efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
+    "efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
+    "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
+}
+
+
+class SqueezeExcitation(nn.Module):
+    def __init__(self, input_channels: int, squeeze_channels: int):
+        super().__init__()
+        self.fc1 = nn.Conv2d(input_channels, squeeze_channels, 1)
+        self.fc2 = nn.Conv2d(squeeze_channels, input_channels, 1)
+
+    def _scale(self, input: Tensor) -> Tensor:
+        scale = F.adaptive_avg_pool2d(input, 1)
+        scale = self.fc1(scale)
+        scale = F.silu(scale, inplace=True)
+        scale = self.fc2(scale)
+        return scale.sigmoid()
+
+    def forward(self, input: Tensor) -> Tensor:
+        scale = self._scale(input)
+        return scale * input
+
+
+class MBConvConfig:
+    # Stores information listed at Table 1 of the EfficientNet paper
+    def __init__(self,
+                 expand_ratio: float, kernel: int, stride: int,
+                 input_channels: int, out_channels: int, num_layers: int,
+                 width_mult: float, depth_mult: float) -> None:
+        self.expand_ratio = expand_ratio
+        self.kernel = kernel
+        self.stride = stride
+        self.input_channels = self.adjust_channels(input_channels, width_mult)
+        self.out_channels = self.adjust_channels(out_channels, width_mult)
+        self.num_layers = self.adjust_depth(num_layers, depth_mult)
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + '('
+        s += 'expand_ratio={expand_ratio}'
+        s += ', kernel={kernel}'
+        s += ', stride={stride}'
+        s += ', input_channels={input_channels}'
+        s += ', out_channels={out_channels}'
+        s += ', num_layers={num_layers}'
+        s += ')'
+        return s.format(**self.__dict__)
+
+    @staticmethod
+    def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int:
+        return _make_divisible(channels * width_mult, 8, min_value)
+
+    @staticmethod
+    def adjust_depth(num_layers: int, depth_mult: float):
+        return int(math.ceil(num_layers * depth_mult))
+
+
+class MBConv(nn.Module):
+    def __init__(self, cnf: MBConvConfig, stochastic_depth_prob: float, norm_layer: Callable[..., nn.Module],
+                 se_layer: Callable[..., nn.Module] = SqueezeExcitation) -> None:
+        super().__init__()
+
+        if not (1 <= cnf.stride <= 2):
+            raise ValueError('illegal stride value')
+
+        self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
+
+        layers: List[nn.Module] = []
+        activation_layer = nn.SiLU
+
+        # expand
+        expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
+        if expanded_channels != cnf.input_channels:
+            layers.append(ConvBNActivation(cnf.input_channels, expanded_channels, kernel_size=1,
+                                           norm_layer=norm_layer, activation_layer=activation_layer))
+
+        # depthwise
+        layers.append(ConvBNActivation(expanded_channels, expanded_channels, kernel_size=cnf.kernel,
+                                       stride=cnf.stride, groups=expanded_channels,
+                                       norm_layer=norm_layer, activation_layer=activation_layer))
+
+        # squeeze and excitation
+        squeeze_channels = max(1, cnf.input_channels // 4)
+        layers.append(se_layer(expanded_channels, squeeze_channels))
+
+        # project
+        layers.append(ConvBNActivation(expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer,
+                                       activation_layer=nn.Identity))
+
+        self.block = nn.Sequential(*layers)
+        self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
+        self.out_channels = cnf.out_channels
+
+    def forward(self, input: Tensor) -> Tensor:
+        result = self.block(input)
+        if self.use_res_connect:
+            result = self.stochastic_depth(result)
+            result += input
+        return result
+
+
+class EfficientNet(nn.Module):
+    def __init__(
+            self,
+            inverted_residual_setting: List[MBConvConfig],
+            dropout: float,
+            stochastic_depth_prob: float = 0.2,
+            num_classes: int = 1000,
+            block: Optional[Callable[..., nn.Module]] = None,
+            norm_layer: Optional[Callable[..., nn.Module]] = None,
+            **kwargs: Any
+    ) -> None:
+        """
+        EfficientNet main class
+
+        Args:
+            inverted_residual_setting (List[MBConvConfig]): Network structure
+            dropout (float): The droupout probability
+            stochastic_depth_prob (float): The stochastic depth probability
+            num_classes (int): Number of classes
+            block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet
+            norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
+        """
+        super().__init__()
+
+        if not inverted_residual_setting:
+            raise ValueError("The inverted_residual_setting should not be empty")
+        elif not (isinstance(inverted_residual_setting, Sequence) and
+                  all([isinstance(s, MBConvConfig) for s in inverted_residual_setting])):
+            raise TypeError("The inverted_residual_setting should be List[MBConvConfig]")
+
+        if block is None:
+            block = MBConv
+
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+
+        layers: List[nn.Module] = []
+
+        # building first layer
+        firstconv_output_channels = inverted_residual_setting[0].input_channels
+        layers.append(ConvBNActivation(3, firstconv_output_channels, kernel_size=3, stride=2, norm_layer=norm_layer,
+                                       activation_layer=nn.SiLU))
+
+        # building inverted residual blocks
+        total_stage_blocks = sum([cnf.num_layers for cnf in inverted_residual_setting])
+        stage_block_id = 0
+        for cnf in inverted_residual_setting:
+            stage: List[nn.Module] = []
+            for _ in range(cnf.num_layers):
+                # copy to avoid modifications. shallow copy is enough
+                block_cnf = copy.copy(cnf)
+
+                # overwrite info if not the first conv in the stage
+                if stage:
+                    block_cnf.input_channels = block_cnf.out_channels
+                    block_cnf.stride = 1
+
+                # adjust stochastic depth probability based on the depth of the stage block
+                sd_prob = stochastic_depth_prob * float(stage_block_id) / total_stage_blocks
+
+                stage.append(block(block_cnf, sd_prob, norm_layer))
+                stage_block_id += 1
+
+            layers.append(nn.Sequential(*stage))
+
+        # building last several layers
+        lastconv_input_channels = inverted_residual_setting[-1].out_channels
+        lastconv_output_channels = 4 * lastconv_input_channels
+        layers.append(ConvBNActivation(lastconv_input_channels, lastconv_output_channels, kernel_size=1,
+                                       norm_layer=norm_layer, activation_layer=nn.SiLU))
+
+        self.features = nn.Sequential(*layers)
+        self.avgpool = nn.AdaptiveAvgPool2d(1)
+        self.classifier = nn.Sequential(
+            nn.Dropout(p=dropout, inplace=True),
+            nn.Linear(lastconv_output_channels, num_classes),
+        )
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                init_range = 1.0 / math.sqrt(m.out_features)
+                nn.init.uniform_(m.weight, -init_range, init_range)
+                nn.init.zeros_(m.bias)
+
+    def _forward_impl(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+
+        x = self.classifier(x)
+
+        return x
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self._forward_impl(x)
+
+
+def _efficientnet_conf(width_mult: float, depth_mult: float, **kwargs: Any) -> List[MBConvConfig]:
+    bneck_conf = partial(MBConvConfig, width_mult=width_mult, depth_mult=depth_mult)
+    inverted_residual_setting = [
+        bneck_conf(1, 3, 1, 32, 16, 1),
+        bneck_conf(6, 3, 2, 16, 24, 2),
+        bneck_conf(6, 5, 2, 24, 40, 2),
+        bneck_conf(6, 3, 2, 40, 80, 3),
+        bneck_conf(6, 5, 1, 80, 112, 3),
+        bneck_conf(6, 5, 2, 112, 192, 4),
+        bneck_conf(6, 3, 1, 192, 320, 1),
+    ]
+    return inverted_residual_setting
+
+
+def _efficientnet_model(
+    arch: str,
+    inverted_residual_setting: List[MBConvConfig],
+    dropout: float,
+    pretrained: bool,
+    progress: bool,
+    **kwargs: Any
+) -> EfficientNet:
+    model = EfficientNet(inverted_residual_setting, dropout, **kwargs)
+    if pretrained:
+        if model_urls.get(arch, None) is None:
+            raise ValueError("No checkpoint is available for model type {}".format(arch))
+        state_dict = load_state_dict_from_url(model_urls[arch], progress=progress)
+        model.load_state_dict(state_dict)
+    return model
+
+
+def efficientnet_b0(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B0 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.0, depth_mult=1.0, **kwargs)
+    return _efficientnet_model("efficientnet_b0", inverted_residual_setting, 0.2, pretrained, progress, **kwargs)
+
+
+def efficientnet_b1(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B1 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.0, depth_mult=1.1, **kwargs)
+    return _efficientnet_model("efficientnet_b1", inverted_residual_setting, 0.2, pretrained, progress, **kwargs)
+
+
+def efficientnet_b2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B2 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.1, depth_mult=1.2, **kwargs)
+    return _efficientnet_model("efficientnet_b2", inverted_residual_setting, 0.3, pretrained, progress, **kwargs)
+
+
+def efficientnet_b3(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B3 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.2, depth_mult=1.4, **kwargs)
+    return _efficientnet_model("efficientnet_b3", inverted_residual_setting, 0.3, pretrained, progress, **kwargs)
+
+
+def efficientnet_b4(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.4, depth_mult=1.8, **kwargs)
+    return _efficientnet_model("efficientnet_b4", inverted_residual_setting, 0.4, pretrained, progress, **kwargs)
+
+
+def efficientnet_b5(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B5 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.6, depth_mult=2.2, **kwargs)
+    return _efficientnet_model("efficientnet_b5", inverted_residual_setting, 0.4, pretrained, progress,
+                               norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs)
+
+
+def efficientnet_b6(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B6 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.8, depth_mult=2.6, **kwargs)
+    return _efficientnet_model("efficientnet_b6", inverted_residual_setting, 0.5, pretrained, progress,
+                               norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs)
+
+
+def efficientnet_b7(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B7 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=2.0, depth_mult=3.1, **kwargs)
+    return _efficientnet_model("efficientnet_b7", inverted_residual_setting, 0.5, pretrained, progress,
+                               norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs)
diff --git a/torchvision/ops/stochastic_depth.py b/torchvision/ops/stochastic_depth.py
index f3338242a76..0b95e7cca67 100644
--- a/torchvision/ops/stochastic_depth.py
+++ b/torchvision/ops/stochastic_depth.py
@@ -22,12 +22,12 @@ def stochastic_depth(input: Tensor, p: float, mode: str, training: bool = True)
     """
     if p < 0.0 or p > 1.0:
         raise ValueError("drop probability has to be between 0 and 1, but got {}".format(p))
+    if mode not in ["batch", "row"]:
+        raise ValueError("mode has to be either 'batch' or 'row', but got {}".format(mode))
     if not training or p == 0.0:
         return input
 
     survival_rate = 1.0 - p
-    if mode not in ["batch", "row"]:
-        raise ValueError("mode has to be either 'batch' or 'row', but got {}".format(mode))
     size = [1] * input.ndim
     if mode == "row":
         size[0] = input.shape[0]