-
Notifications
You must be signed in to change notification settings - Fork 4
/
utils.py
145 lines (112 loc) · 3.96 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import torch
import os
import torch.nn as nn
import numpy as np
import math
import torch.nn.functional as F
from bisect import bisect_right
__all__ = ['cal_param_size', 'cal_multi_adds', 'get_data_folder', 'CrossEntropyLoss_label_smooth',
'adjust_lr', 'DistillKL']
def cal_param_size(model):
return sum([i.numel() for i in model.parameters()])
count_ops = 0
def measure_layer(layer, x, multi_add=1):
delta_ops = 0
type_name = str(layer)[:str(layer).find('(')].strip()
# print(type_name)
if type_name in ['Conv2d']:
out_h = int((x.size()[2] + 2 * layer.padding[0] - layer.kernel_size[0]) //
layer.stride[0] + 1)
out_w = int((x.size()[3] + 2 * layer.padding[1] - layer.kernel_size[1]) //
layer.stride[1] + 1)
delta_ops = layer.in_channels * layer.out_channels * layer.kernel_size[0] * \
layer.kernel_size[1] * out_h * out_w // layer.groups * multi_add
elif type_name in ['Linear']:
weight_ops = layer.weight.numel() * multi_add
#bias_ops = layer.bias.numel()
delta_ops = weight_ops + 0#bias_ops
global count_ops
count_ops += delta_ops
return
def is_leaf(module):
return sum(1 for x in module.children()) == 0
def should_measure(module):
if is_leaf(module):
return True
return False
def cal_multi_adds(model, shape=(1,3,32,32)):
global count_ops
count_ops = 0
data = torch.zeros(shape)
def new_forward(m):
def lambda_forward(x):
measure_layer(m, x)
return m.old_forward(x)
return lambda_forward
def modify_forward(model):
for child in model.children():
if should_measure(child):
child.old_forward = child.forward
child.forward = new_forward(child)
else:
modify_forward(child)
def restore_forward(model):
for child in model.children():
if is_leaf(child) and hasattr(child, 'old_forward'):
child.forward = child.old_forward
child.old_forward = None
else:
restore_forward(child)
modify_forward(model)
model.forward(data)
restore_forward(model)
return count_ops
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self, name, fmt=':f'):
self.name = name
self.fmt = fmt
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def __str__(self):
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
return fmtstr.format(**self.__dict__)
def correct_num(output, target, topk=(1,)):
"""Computes the precision@k for the specified values of k"""
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
correct = pred.eq(target.view(-1, 1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:, :k].float().sum()
res.append(correct_k)
return res
class DistillKL(nn.Module):
"""Distilling the Knowledge in a Neural Network"""
def __init__(self, T):
super(DistillKL, self).__init__()
self.T = T
def forward(self, y_s, y_t):
p_s = F.log_softmax(y_s/self.T, dim=1)
p_t = F.softmax(y_t/self.T, dim=1)
loss = F.kl_div(p_s, p_t, reduction='batchmean') * (self.T**2)
return loss
def adjust_lr(optimizer, epoch, args):
cur_lr = 0.
if args.lr_type == 'multistep':
cur_lr = args.init_lr * 0.1 ** bisect_right(args.milestones, epoch)
elif args.lr_type == 'cosine':
cur_lr = args.init_lr * 0.5 * (1. + math.cos(np.pi * epoch / args.epochs))
for param_group in optimizer.param_groups:
param_group['lr'] = cur_lr
return cur_lr