Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Improvement] Add metafile, readme and converted models for Mlp-Mixer #539

Merged
merged 6 commits into from
Nov 24, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions configs/_base_/datasets/imagenet_bs64_mixer_224.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# dataset settings
dataset_type = 'ImageNet'

# change according to https://github.com/rwightman/pytorch-image-models/blob
# /master/timm/models/mlp_mixer.py
img_norm_cfg = dict(
mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True)

# training is not supported for now
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='RandomResizedCrop', size=224, backend='cv2'),
dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='ToTensor', keys=['gt_label']),
dict(type='Collect', keys=['img', 'gt_label'])
]

test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='Resize', size=(256, -1), backend='cv2', interpolation='bicubic'),
dict(type='CenterCrop', crop_size=224),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
]
data = dict(
samples_per_gpu=32,
0x4f5da2 marked this conversation as resolved.
Show resolved Hide resolved
workers_per_gpu=8,
train=dict(
type=dataset_type,
data_prefix='data/imagenet/train',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_prefix='data/imagenet/val',
ann_file='data/imagenet/meta/val.txt',
pipeline=test_pipeline),
test=dict(
# replace `data/val` with `data/test` for standard test
type=dataset_type,
data_prefix='data/imagenet/val',
ann_file='data/imagenet/meta/val.txt',
pipeline=test_pipeline))

evaluation = dict(interval=10, metric='accuracy')
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
type='ImageClassifier',
backbone=dict(
type='MlpMixer',
arch='b',
arch='l',
img_size=224,
patch_size=32,
patch_size=16,
drop_rate=0.1,
init_cfg=[
dict(
Expand All @@ -18,7 +18,7 @@
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=768,
in_channels=1024,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
),
Expand Down
5 changes: 5 additions & 0 deletions configs/mixer/mlp_mixer_inf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
_base_ = [
'../_base_/models/mlp_mixer_b16.py',
'../_base_/datasets/imagenet_bs64_mixer_224.py',
'../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
]
50 changes: 50 additions & 0 deletions configs/mlp_mixer/metafile.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
Collections:
- Name: Mlp-Mixer
Metadata:
Training Data: ImageNet-1k
Architecture:
- MLP
- Layer Normalization
- Dropout
Paper:
URL: https://arxiv.org/abs/2105.01601
Title: "MLP-Mixer: An all-MLP Architecture for Vision"
README: configs/mlp_mixer/README.md
# Code:
# URL: # todo
# Version: # todo

Models:
- Name: -mixer_b16_224_3rdparty_64xb64_in1k
Metadata:
FLOPs: 12610000000 # 12.61 G
Parameters: 59880000 # 59.88 M
In Collection: Mlp-Mixer
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 76.68
Top 5 Accuracy: 92.25
Task: Image Classification
# Weights: # todo
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_mixer_b16_224-76587d61.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/mlp_mixer.py#L70
Config: configs/mlp_mixer/mlp_mixer-base_p16_64xb64_in1k.py

- Name: -mixer_l16_224_3rdparty_64xb64_in1k
Metadata:
FLOPs: 44570000000 # 44.57 G
Parameters: 208200000 # 208.2 M
In Collection: Mlp-Mixer
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 72.34
Top 5 Accuracy: 88.02
Task: Image Classification
# Weights: # todo
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_mixer_b16_224_in21k-617b3de2.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/mlp_mixer.py#L73
Config: configs/mlp_mixer/mlp_mixer-large_p16_64xb64_in1k.py
2 changes: 1 addition & 1 deletion configs/mlp_mixer/mlp_mixer-base_p16_64xb64_in1k.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
_base_ = [
'../_base_/models/mlp_mixer_base_patch16.py',
'../_base_/datasets/imagenet_bs64_pil_resize.py',
'../_base_/datasets/imagenet_bs64_mixer_224.py',
'../_base_/schedules/imagenet_bs4096_AdamW.py',
'../_base_/default_runtime.py',
]
6 changes: 0 additions & 6 deletions configs/mlp_mixer/mlp_mixer-base_p32_64xb64_in1k.py

This file was deleted.

6 changes: 6 additions & 0 deletions configs/mlp_mixer/mlp_mixer-large_p16_64xb64_in1k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
_base_ = [
'../_base_/models/mlp_mixer_large_patch16.py',
'../_base_/datasets/imagenet_bs64_mixer_224.py',
'../_base_/schedules/imagenet_bs4096_AdamW.py',
'../_base_/default_runtime.py',
]
14 changes: 9 additions & 5 deletions mmcls/models/backbones/mlp_mixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@


class MixerBlock(BaseModule):
"""Implements mixer block in MLP Mixer.
"""Mlp-Mixer basic block.

Basic module of `MLP-Mixer: An all-MLP Architecture for Vision
<https://arxiv.org/pdf/2105.01601.pdf>`_

Args:
num_tokens (int): The number of patched tokens
Expand Down Expand Up @@ -96,13 +99,14 @@ def forward(self, x):

@BACKBONES.register_module()
class MlpMixer(BaseBackbone):
"""Mlp Mixer.
"""Mlp-Mixer backbone.

Pytorch implementation of `MLP-Mixer: An all-MLP Architecture for Vision
<https://arxiv.org/pdf/2105.01601.pdf>`_

A PyTorch implement of : `MLP-Mixer: An all-MLP Architecture for Vision` -
https://arxiv.org/abs/2105.01601
Args:
arch (str | dict): MLP Mixer architecture
Default: 'b'.
Defaults to 'b'.
img_size (int | tuple): Input image size.
patch_size (int | tuple): The patch size.
out_indices (Sequence | int): Output from which layer.
Expand Down
57 changes: 57 additions & 0 deletions tools/convert_models/mlpmixer_to_mmcls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import argparse
from pathlib import Path

import torch


def convert_weights(weight):
"""Weight Converter.

Converts the weights from timm to mmcls

Args:
weight (dict): weight dict from timm

Returns: converted weight dict for mmcls
"""
result = dict()
result['meta'] = dict()
temp = dict()
mapping = {
'stem': 'patch_embed',
'proj': 'projection',
'mlp_tokens.fc1': 'token_mix.layers.0.0',
'mlp_tokens.fc2': 'token_mix.layers.1',
'mlp_channels.fc1': 'channel_mix.layers.0.0',
'mlp_channels.fc2': 'channel_mix.layers.1',
'norm1': 'ln1',
'norm2': 'ln2',
'norm.': 'ln1.',
'blocks': 'layers'
}
for k, v in weight.items():
for mk, mv in mapping.items():
if mk in k:
k = k.replace(mk, mv)
if k.startswith('head.'):
temp['head.fc.' + k[5:]] = v
else:
temp['backbone.' + k] = v
result['state_dict'] = temp
return result


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert model keys')
parser.add_argument('src', help='src detectron model path')
parser.add_argument('dst', help='save path')
args = parser.parse_args()
dst = Path(args.dst)
if dst.suffix != '.pth':
print('The path should contain the name of the pth format file.')
exit()
dst.parent.mkdir(parents=True, exist_ok=True)

original_model = torch.load(args.src, map_location='cpu')
converted_model = convert_weights(original_model)
torch.save(converted_model, args.dst)