Skip to content

Commit

Permalink
release rsb_a3 models
Browse files Browse the repository at this point in the history
  • Loading branch information
Lupin1998 committed Feb 26, 2023
1 parent d5b4a94 commit cb7ae3b
Show file tree
Hide file tree
Showing 25 changed files with 961 additions and 38 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
_base_ = [
'../../_base_/models/convmixer/convmixer_768_d32.py',
'../../_base_/datasets/imagenet/rsb_a3_sz160_8xbs256.py',
'../../_base_/default_runtime.py',
]

# model settings
model = dict(
alpha=[0.1, 1.0,], # RSB A3
mix_mode=["mixup", "cutmix",],
head=dict(
type='ClsMixupHead',
loss=dict(type='CrossEntropyLoss', # mixup BCE loss (one-hot encoding)
use_soft=False, use_sigmoid=True, loss_weight=1.0),
with_avg_pool=True, multi_label=True, two_hot=False,
in_channels=768, num_classes=1000),
)

# data
data = dict(imgs_per_gpu=256, workers_per_gpu=12)

# additional hooks
update_interval = 1 # 256 x 8gpus x 1 accumulates = bs2048

# optimizer
optimizer = dict(
type='LAMB', lr=0.006, weight_decay=0.02,
paramwise_options={
'(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
'norm': dict(weight_decay=0.),
'bias': dict(weight_decay=0.),
'layer_scale': dict(weight_decay=0.),
})

# fp16
use_fp16 = True
fp16 = dict(type='mmcv', loss_scale='dynamic')
optimizer_config = dict(
grad_clip=dict(max_norm=5.0), update_interval=update_interval)

# lr scheduler
lr_config = dict(
policy='CosineAnnealing',
by_epoch=False, min_lr=1e-6,
warmup='linear',
warmup_iters=5, warmup_by_epoch=True,
warmup_ratio=1e-5,
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=100)
3 changes: 3 additions & 0 deletions configs/classification/imagenet/convnext/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ This page is based on documents in [MMClassification](https://github.com/open-mm
| ConvNeXt-B\* | From scratch | 88.59 | 15.36 | 83.85 | 96.74 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/convnext/convnext_base_8xb128_accu4_fp16_ep300.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128_in1k_20220124-d0915162.pth) |
| ConvNeXt-L\* | From scratch | 197.77 | 34.37 | 84.30 | 96.89 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/convnext/convnext_large_8xb64_accu8_fp16_ep300.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_64xb64_in1k_20220124-f8a0ded0.pth) |
| ConvNeXt-XL\* | ImageNet-21k | 350.20 | 60.93 | 86.97 | 98.20 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/convnext/convnext_xlarge_8xb64_accu8_fp16_ep300.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_in21k-pre-3rdparty_64xb64_in1k_20220124-76b6863d.pth) |
| ConvNeXt-T (A3) | From scratch | 28.59 | 4.46 | 78.82 | 94.20 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/convnext/convnext_tiny_rsb_a3_sz160_8xb256_ep100.py) | [model](https://github.com/Westlake-AI/openmixup/releases/download/rsb-a3-weights/convnext_tiny_rsb_a3_sz160_8xb256_ep100.pth) \| [log](https://github.com/Westlake-AI/openmixup/releases/download/rsb-a3-weights/convnext_tiny_rsb_a3_sz160_8xb256_ep100.log.json) |
| ConvNeXt-S (A3) | From scratch | 50.22 | 8.69 | 80.10 | 94.91 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/convnext/convnext_small_rsb_a3_sz160_8xb256_ep100.py) | [model](https://github.com/Westlake-AI/openmixup/releases/download/rsb-a3-weights/convnext_small_rsb_a3_sz160_8xb256_ep100.pth) \| [log](https://github.com/Westlake-AI/openmixup/releases/download/rsb-a3-weights/convnext_small_rsb_a3_sz160_8xb256_ep100.log.json) |
| ConvNeXt-B (A3) | From scratch | 88.59 | 15.36 | 80.90 | 95.30 | [config](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/convnext/convnext_base_rsb_a3_sz160_8xb256_ep100.py) | [model](https://github.com/Westlake-AI/openmixup/releases/download/rsb-a3-weights/convnext_base_rsb_a3_sz160_8xb256_ep100.pth) \| [log](https://github.com/Westlake-AI/openmixup/releases/download/rsb-a3-weights/convnext_base_rsb_a3_sz160_8xb256_ep100.log.json) |

We follow the original training setting provided by the [official repo](https://github.com/facebookresearch/ConvNeXt). *Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt).* We also reproduce the performance of ConvNeXt-T in [mixup](https://github.com/Westlake-AI/openmixup/tree/main/configs/classification/imagenet/mixups/).

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
_base_ = [
'../../_base_/models/lit_v2/lit_v2_base.py',
'../../_base_/datasets/imagenet/rsb_a3_sz160_8xbs256.py',
'../../_base_/default_runtime.py',
]

# model settings
model = dict(
alpha=[0.1, 1.0,], # RSB A3
mix_mode=["mixup", "cutmix",],
backbone=dict(
type='LIT',
arch='base',
drop_path_rate=0.4,
alpha=0.9,
window_size=[0, 0, 2, 1],
attention_types=[None, None, "HiLo", "HiLo"],
init_values=1e-6,
),
head=dict(
type='ClsMixupHead',
loss=dict(type='CrossEntropyLoss', # mixup BCE loss (one-hot encoding)
use_soft=False, use_sigmoid=True, loss_weight=1.0),
multi_label=True, two_hot=False,
with_avg_pool=True,
in_channels=1024, num_classes=1000,
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
]),
)

# data
data = dict(imgs_per_gpu=256, workers_per_gpu=12)

# additional hooks
update_interval = 1 # 256 x 8gpus x 1 accumulates = bs2048

# optimizer
optimizer = dict(
type='LAMB', lr=0.008, weight_decay=0.02,
paramwise_options={
'(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
'norm': dict(weight_decay=0.),
'bias': dict(weight_decay=0.),
'gamma': dict(weight_decay=0.),
'offset': dict(lr_mul=0.1),
})

# fp16
use_fp16 = True
fp16 = dict(type='mmcv', loss_scale='dynamic')
optimizer_config = dict(
grad_clip=dict(max_norm=5.0), update_interval=update_interval)

# lr scheduler
lr_config = dict(
policy='CosineAnnealing',
min_lr=0.,
warmup='linear',
warmup_iters=5, warmup_by_epoch=True, # warmup 5 epochs.
warmup_ratio=1e-6,
by_epoch=False
)

# validation hook
evaluation = dict(
initial=True,
interval=1,
imgs_per_gpu=25, # dconv im2col_step
workers_per_gpu=4,
eval_param=dict(topk=(1, 5)))

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=100)
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
_base_ = [
'../../_base_/models/lit_v2/lit_v2_medium.py',
'../../_base_/datasets/imagenet/rsb_a3_sz160_8xbs256.py',
'../../_base_/default_runtime.py',
]

# model settings
model = dict(
alpha=[0.1, 1.0,], # RSB A3
mix_mode=["mixup", "cutmix",],
head=dict(
type='ClsMixupHead',
loss=dict(type='CrossEntropyLoss', # mixup BCE loss (one-hot encoding)
use_soft=False, use_sigmoid=True, loss_weight=1.0),
multi_label=True, two_hot=False,
with_avg_pool=True,
in_channels=768, num_classes=1000,
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
]),
)

# data
data = dict(imgs_per_gpu=256, workers_per_gpu=12)

# additional hooks
update_interval = 1 # 256 x 8gpus x 1 accumulates = bs2048

# optimizer
optimizer = dict(
type='LAMB', lr=0.008, weight_decay=0.02,
paramwise_options={
'(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
'norm': dict(weight_decay=0.),
'bias': dict(weight_decay=0.),
'gamma': dict(weight_decay=0.),
'offset': dict(lr_mul=0.1),
})

# fp16
use_fp16 = True
fp16 = dict(type='mmcv', loss_scale='dynamic')
optimizer_config = dict(
grad_clip=dict(max_norm=5.0), update_interval=update_interval)

# lr scheduler
lr_config = dict(
policy='CosineAnnealing',
min_lr=0.,
warmup='linear',
warmup_iters=5, warmup_by_epoch=True, # warmup 5 epochs.
warmup_ratio=1e-6,
by_epoch=False
)

# validation hook
evaluation = dict(
initial=True,
interval=1,
imgs_per_gpu=25, # dconv im2col_step
workers_per_gpu=4,
eval_param=dict(topk=(1, 5)))

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=100)
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
_base_ = [
'../../_base_/models/lit_v2/lit_v2_small.py',
'../../_base_/datasets/imagenet/rsb_a3_sz160_8xbs256.py',
'../../_base_/default_runtime.py',
]

# model settings
model = dict(
alpha=[0.1, 1.0,], # RSB A3
mix_mode=["mixup", "cutmix",],
head=dict(
type='ClsMixupHead',
loss=dict(type='CrossEntropyLoss', # mixup BCE loss (one-hot encoding)
use_soft=False, use_sigmoid=True, loss_weight=1.0),
multi_label=True, two_hot=False,
with_avg_pool=True,
in_channels=768, num_classes=1000,
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
]),
)

# data
data = dict(imgs_per_gpu=256, workers_per_gpu=12)

# additional hooks
update_interval = 1 # 256 x 8gpus x 1 accumulates = bs2048

# optimizer
optimizer = dict(
type='LAMB', lr=0.008, weight_decay=0.02,
paramwise_options={
'(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
'norm': dict(weight_decay=0.),
'bias': dict(weight_decay=0.),
'gamma': dict(weight_decay=0.),
'offset': dict(lr_mul=0.1),
})

# fp16
use_fp16 = True
fp16 = dict(type='mmcv', loss_scale='dynamic')
optimizer_config = dict(
grad_clip=dict(max_norm=5.0), update_interval=update_interval)

# lr scheduler
lr_config = dict(
policy='CosineAnnealing',
min_lr=0.,
warmup='linear',
warmup_iters=5, warmup_by_epoch=True, # warmup 5 epochs.
warmup_ratio=1e-6,
by_epoch=False
)

# validation hook
evaluation = dict(
initial=True,
interval=1,
imgs_per_gpu=25, # dconv im2col_step
workers_per_gpu=4,
eval_param=dict(topk=(1, 5)))

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=100)
Loading

0 comments on commit cb7ae3b

Please sign in to comment.