Drop support for num gs parametrization in anynet (#40)

Summary: Pull Request resolved: #40 Reviewed By: vedanuj Differential Revision: D20983462 Pulled By: rajprateek fbshipit-source-id: 0019c9fbc179b301b8ca4b2732d5454caf51b902
facebookresearch · Apr 11, 2020 · 708d429 · 708d429
1 parent a86c9d5
commit 708d429
Show file tree

Hide file tree

Showing 15 changed files with 55 additions and 59 deletions.
diff --git a/configs/imagenet/anynet/R-101-1x64d_step_1gpu.yaml b/configs/imagenet/anynet/R-101-1x64d_step_1gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.25, 0.25, 0.25, 0.25]
-  GROUPS: [1, 1, 1, 1]
+  GROUP_WS: [64, 128, 256, 512]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/configs/imagenet/anynet/R-101-1x64d_step_2gpu.yaml b/configs/imagenet/anynet/R-101-1x64d_step_2gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.25, 0.25, 0.25, 0.25]
-  GROUPS: [1, 1, 1, 1]
+  GROUP_WS: [64, 128, 256, 512]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/configs/imagenet/anynet/R-101-1x64d_step_8gpu.yaml b/configs/imagenet/anynet/R-101-1x64d_step_8gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.25, 0.25, 0.25, 0.25]
-  GROUPS: [1, 1, 1, 1]
+  GROUP_WS: [64, 128, 256, 512]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/configs/imagenet/anynet/R-50-1x64d_step_1gpu.yaml b/configs/imagenet/anynet/R-50-1x64d_step_1gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.25, 0.25, 0.25, 0.25]
-  GROUPS: [1, 1, 1, 1]
+  GROUP_WS: [64, 128, 256, 512]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/configs/imagenet/anynet/R-50-1x64d_step_2gpu.yaml b/configs/imagenet/anynet/R-50-1x64d_step_2gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.25, 0.25, 0.25, 0.25]
-  GROUPS: [1, 1, 1, 1]
+  GROUP_WS: [64, 128, 256, 512]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/configs/imagenet/anynet/R-50-1x64d_step_8gpu.yaml b/configs/imagenet/anynet/R-50-1x64d_step_8gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.25, 0.25, 0.25, 0.25]
-  GROUPS: [1, 1, 1, 1]
+  GROUP_WS: [64, 128, 256, 512]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/configs/imagenet/anynet/X-101-32x4d_step_1gpu.yaml b/configs/imagenet/anynet/X-101-32x4d_step_1gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.5, 0.5, 0.5, 0.5]
-  GROUPS: [32, 32, 32, 32]
+  GROUP_WS: [4, 8, 16, 32]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/configs/imagenet/anynet/X-101-32x4d_step_2gpu.yaml b/configs/imagenet/anynet/X-101-32x4d_step_2gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.5, 0.5, 0.5, 0.5]
-  GROUPS: [32, 32, 32, 32]
+  GROUP_WS: [4, 8, 16, 32]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/configs/imagenet/anynet/X-101-32x4d_step_8gpu.yaml b/configs/imagenet/anynet/X-101-32x4d_step_8gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.5, 0.5, 0.5, 0.5]
-  GROUPS: [32, 32, 32, 32]
+  GROUP_WS: [4, 8, 16, 32]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/configs/imagenet/anynet/X-50-32x4d_step_1gpu.yaml b/configs/imagenet/anynet/X-50-32x4d_step_1gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.5, 0.5, 0.5, 0.5]
-  GROUPS: [32, 32, 32, 32]
+  GROUP_WS: [4, 8, 16, 32]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/configs/imagenet/anynet/X-50-32x4d_step_2gpu.yaml b/configs/imagenet/anynet/X-50-32x4d_step_2gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.5, 0.5, 0.5, 0.5]
-  GROUPS: [32, 32, 32, 32]
+  GROUP_WS: [4, 8, 16, 32]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/configs/imagenet/anynet/X-50-32x4d_step_8gpu.yaml b/configs/imagenet/anynet/X-50-32x4d_step_8gpu.yaml
@@ -9,7 +9,7 @@ ANYNET:
   WIDTHS: [256, 512, 1024, 2048]
   STRIDES: [1, 2, 2, 2]
   BOT_MULS: [0.5, 0.5, 0.5, 0.5]
-  GROUPS: [32, 32, 32, 32]
+  GROUP_WS: [4, 8, 16, 32]
 BN:
   ZERO_INIT_FINAL_GAMMA: True
 OPTIM:

diff --git a/pycls/core/config.py b/pycls/core/config.py
@@ -83,11 +83,8 @@
 # Bottleneck multipliers for each stage (applies to bottleneck block)
 _C.ANYNET.BOT_MULS = []
 
-# Group width parametrization (number of groups otherwise)
-_C.ANYNET.GW_PARAM = False
-
-# Group param for each stage (number of groups or group width)
-_C.ANYNET.GROUPS = []
+# Group widths for each stage (applies to bottleneck block)
+_C.ANYNET.GROUP_WS = []
 
 # Whether SE is enabled for res_bottleneck_block
 _C.ANYNET.SE_ENABLED = False

diff --git a/pycls/models/anynet.py b/pycls/models/anynet.py
@@ -82,9 +82,9 @@ def forward(self, x):
 class VanillaBlock(nn.Module):
     """Vanilla block: [3x3 conv, BN, Relu] x2"""
 
-    def __init__(self, w_in, w_out, stride, bm=None, g=None, gw=None):
+    def __init__(self, w_in, w_out, stride, bm=None, gw=None):
         assert (
-            bm is None and g is None and gw is None
+            bm is None and gw is None
         ), "Vanilla block does not support bm and g options"
         super(VanillaBlock, self).__init__()
         self._construct(w_in, w_out, stride)
@@ -135,9 +135,9 @@ def forward(self, x):
 class ResBasicBlock(nn.Module):
     """Residual basic block: x + F(x), F = basic transform"""
 
-    def __init__(self, w_in, w_out, stride, bm=None, g=None, gw=None):
+    def __init__(self, w_in, w_out, stride, bm=None, gw=None):
         assert (
-            bm is None and g is None and gw is None
+            bm is None and gw is None
         ), "Basic transform does not support bm and g options"
         super(ResBasicBlock, self).__init__()
         self._construct(w_in, w_out, stride)
@@ -168,15 +168,15 @@ def forward(self, x):
 class BottleneckTransform(nn.Module):
     """Bottlenect transformation: 1x1, 3x3, 1x1"""
 
-    def __init__(self, w_in, w_out, stride, bm, g, gw):
+    def __init__(self, w_in, w_out, stride, bm, gw):
         super(BottleneckTransform, self).__init__()
-        self._construct(w_in, w_out, stride, bm, g, gw)
+        self._construct(w_in, w_out, stride, bm, gw)
 
-    def _construct(self, w_in, w_out, stride, bm, g, gw):
+    def _construct(self, w_in, w_out, stride, bm, gw):
         # Compute the bottleneck width
         w_b = int(round(w_out * bm))
         # Compute the number of groups
-        num_gs = w_b // g if gw else g
+        num_gs = w_b // gw
         # 1x1, BN, ReLU
         self.a = nn.Conv2d(w_in, w_b, kernel_size=1, stride=1, padding=0, bias=False)
         self.a_bn = nn.BatchNorm2d(w_b, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
@@ -207,22 +207,22 @@ def forward(self, x):
 class ResBottleneckBlock(nn.Module):
     """Residual bottleneck block: x + F(x), F = bottleneck transform"""
 
-    def __init__(self, w_in, w_out, stride, bm=1.0, g=1, gw=False):
+    def __init__(self, w_in, w_out, stride, bm=1.0, gw=1):
         super(ResBottleneckBlock, self).__init__()
-        self._construct(w_in, w_out, stride, bm, g, gw)
+        self._construct(w_in, w_out, stride, bm, gw)
 
     def _add_skip_proj(self, w_in, w_out, stride):
         self.proj = nn.Conv2d(
             w_in, w_out, kernel_size=1, stride=stride, padding=0, bias=False
         )
         self.bn = nn.BatchNorm2d(w_out, eps=cfg.BN.EPS, momentum=cfg.BN.MOM)
 
-    def _construct(self, w_in, w_out, stride, bm, g, gw):
+    def _construct(self, w_in, w_out, stride, bm, gw):
         # Use skip connection with projection if shape changes
         self.proj_block = (w_in != w_out) or (stride != 1)
         if self.proj_block:
             self._add_skip_proj(w_in, w_out, stride)
-        self.f = BottleneckTransform(w_in, w_out, stride, bm, g, gw)
+        self.f = BottleneckTransform(w_in, w_out, stride, bm, gw)
         self.relu = nn.ReLU(cfg.MEM.RELU_INPLACE)
 
     def forward(self, x):
@@ -301,19 +301,19 @@ def forward(self, x):
 class AnyStage(nn.Module):
     """AnyNet stage (sequence of blocks w/ the same output shape)."""
 
-    def __init__(self, w_in, w_out, stride, d, block_fun, bm, g, gw):
+    def __init__(self, w_in, w_out, stride, d, block_fun, bm, gw):
         super(AnyStage, self).__init__()
-        self._construct(w_in, w_out, stride, d, block_fun, bm, g, gw)
+        self._construct(w_in, w_out, stride, d, block_fun, bm, gw)
 
-    def _construct(self, w_in, w_out, stride, d, block_fun, bm, g, gw):
+    def _construct(self, w_in, w_out, stride, d, block_fun, bm, gw):
         # Construct the blocks
         for i in range(d):
             # Stride and w_in apply to the first block of the stage
             b_stride = stride if i == 0 else 1
             b_w_in = w_in if i == 0 else w_out
             # Construct the block
             self.add_module(
-                "b{}".format(i + 1), block_fun(b_w_in, w_out, b_stride, bm, g, gw)
+                "b{}".format(i + 1), block_fun(b_w_in, w_out, b_stride, bm, gw)
             )
 
     def forward(self, x):
@@ -336,8 +336,7 @@ def __init__(self, **kwargs):
                 ws=kwargs["ws"],
                 ss=kwargs["ss"],
                 bms=kwargs["bms"],
-                gs=kwargs["gs"],
-                gw=kwargs["gw"],
+                gws=kwargs["gws"],
                 nc=kwargs["nc"],
             )
         else:
@@ -349,28 +348,27 @@ def __init__(self, **kwargs):
                 ws=cfg.ANYNET.WIDTHS,
                 ss=cfg.ANYNET.STRIDES,
                 bms=cfg.ANYNET.BOT_MULS,
-                gs=cfg.ANYNET.GROUPS,
-                gw=cfg.ANYNET.GW_PARAM,
+                gws=cfg.ANYNET.GROUP_WS,
                 nc=cfg.MODEL.NUM_CLASSES,
             )
         self.apply(nu.init_weights)
 
-    def _construct(self, stem_type, stem_w, block_type, ds, ws, ss, bms, gs, gw, nc):
+    def _construct(self, stem_type, stem_w, block_type, ds, ws, ss, bms, gws, nc):
         logger.info("Constructing AnyNet: ds={}, ws={}".format(ds, ws))
         # Generate dummy bot muls and gs for models that do not use them
         bms = bms if bms else [1.0 for _d in ds]
-        gs = gs if gs else [1 for _d in ds]
+        gws = gws if gws else [1 for _d in ds]
         # Group params by stage
-        stage_params = list(zip(ds, ws, ss, bms, gs))
+        stage_params = list(zip(ds, ws, ss, bms, gws))
         # Construct the stem
         stem_fun = get_stem_fun(stem_type)
         self.stem = stem_fun(3, stem_w)
         # Construct the stages
         block_fun = get_block_fun(block_type)
         prev_w = stem_w
-        for i, (d, w, s, bm, g) in enumerate(stage_params):
+        for i, (d, w, s, bm, gw) in enumerate(stage_params):
             self.add_module(
-                "s{}".format(i + 1), AnyStage(prev_w, w, s, d, block_fun, bm, g, gw)
+                "s{}".format(i + 1), AnyStage(prev_w, w, s, d, block_fun, bm, gw)
             )
             prev_w = w
         # Construct the head

diff --git a/pycls/models/regnet.py b/pycls/models/regnet.py
@@ -33,8 +33,8 @@ def adjust_ws_gs_comp(ws, bms, gs):
 def get_stages_from_blocks(ws, rs):
     """Gets ws/ds of network at each stage from per block values."""
     ts = [
-        w != wp or r != rp for w, wp, r, rp in
-        zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
+        w != wp or r != rp
+        for w, wp, r, rp in zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
     ]
     s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
     s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
@@ -64,22 +64,23 @@ def __init__(self):
         # Convert to per stage format
         ws, ds = get_stages_from_blocks(b_ws, b_ws)
         # Generate group widths and bot muls
-        gs = [cfg.REGNET.GROUP_W for _ in range(num_s)]
+        gws = [cfg.REGNET.GROUP_W for _ in range(num_s)]
         bms = [cfg.REGNET.BOT_MUL for _ in range(num_s)]
-        # Adjust the compatibility of ws and gs
-        ws, gs = adjust_ws_gs_comp(ws, bms, gs)
+        # Adjust the compatibility of ws and gws
+        ws, gws = adjust_ws_gs_comp(ws, bms, gws)
         # Use the same stride for each stage
         ss = [cfg.REGNET.STRIDE for _ in range(num_s)]
         # Construct the model
-        super(RegNet, self).__init__(**{
-            "stem_type": cfg.REGNET.STEM_TYPE,
-            "stem_w": cfg.REGNET.STEM_W,
-            "block_type": cfg.REGNET.BLOCK_TYPE,
-            "ss": ss,
-            "ds": ds,
-            "ws": ws,
-            "bms": bms,
-            "gs": gs,
-            "gw": cfg.REGNET.GW_PARAM,
-            "nc": cfg.MODEL.NUM_CLASSES,
-        })
+        super(RegNet, self).__init__(
+            **{
+                "stem_type": cfg.REGNET.STEM_TYPE,
+                "stem_w": cfg.REGNET.STEM_W,
+                "block_type": cfg.REGNET.BLOCK_TYPE,
+                "ss": ss,
+                "ds": ds,
+                "ws": ws,
+                "bms": bms,
+                "gws": gws,
+                "nc": cfg.MODEL.NUM_CLASSES,
+            }
+        )