-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmodels.py
76 lines (59 loc) · 2.72 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# SeiT++
# Copyright (c) 2024-present NAVER Cloud Corp.
# CC BY-NC 4.0 (https://creativecommons.org/licenses/by-nc/4.0/)
import torch
import torch.nn as nn
from functools import partial
from timm.models.vision_transformer import VisionTransformer, _cfg
from timm.models.registry import register_model
class TokenVisionTransformer(VisionTransformer):
def __init__(self, global_pool=False, *args, **kwargs):
super().__init__(*args, **kwargs)
self.global_pool = global_pool
def reset_patch_embed_conv(self, *args, **kwargs):
self.patch_embed.proj = nn.Conv2d(*args, **kwargs)
def forward_features(self, x):
x = self.patch_embed(x)
cls_token = self.cls_token.expand(x.shape[0], -1, -1) # stole cls_tokens impl from Phil Wang, thanks
x = torch.cat((cls_token, x), dim=1)
x = self.pos_drop(x + self.pos_embed)
x = self.blocks(x)
if self.global_pool:
x = x[:, 1:, :].mean(dim=1)
x = self.norm(x)
else:
x = self.norm(x)
x = x[:, 0]
return self.pre_logits(x)
@register_model
def deit_small_token_32(pretrained=False, **kwargs):
model = TokenVisionTransformer(
global_pool=True, in_chans=32, patch_size=2, embed_dim=384, depth=12, num_heads=6, mlp_ratio=4, qkv_bias=True,
norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
model.default_cfg = _cfg()
model.reset_patch_embed_conv(32, 384, kernel_size=(4, 4), stride=2, padding=1)
return model
@register_model
def deit_base_token_32(pretrained=False, **kwargs):
model = TokenVisionTransformer(
global_pool=True, in_chans=32, patch_size=2, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
model.default_cfg = _cfg()
model.reset_patch_embed_conv(32, 768, kernel_size=(4, 4), stride=2, padding=1)
return model
@register_model
def deit_small_token_4(pretrained=False, **kwargs):
model = TokenVisionTransformer(
global_pool=True, in_chans=4, patch_size=2, embed_dim=384, depth=12, num_heads=6, mlp_ratio=4, qkv_bias=True,
norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
model.default_cfg = _cfg()
model.reset_patch_embed_conv(4, 384, kernel_size=(4, 4), stride=2, padding=1)
return model
@register_model
def deit_base_token_4(pretrained=False, **kwargs):
model = TokenVisionTransformer(
global_pool=True, in_chans=4, patch_size=2, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
model.default_cfg = _cfg()
model.reset_patch_embed_conv(4, 768, kernel_size=(4, 4), stride=2, padding=1)
return model