-
Notifications
You must be signed in to change notification settings - Fork 15
/
create_swa_model.py
90 lines (76 loc) · 2.58 KB
/
create_swa_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""Create SWA model.
- Author: Hyunwook Kim
- Contact: [email protected]
"""
import argparse
import gc
import glob
import os
import torch
import tqdm
def get_parser() -> argparse.Namespace:
"""Get argument parser."""
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--model_dir",
type=str,
default="",
required=True,
help="directory of trained models to apply SWA",
)
parser.add_argument(
"--swa_model_name", type=str, default="swa.pt", help="file name of SWA model"
)
parser.add_argument(
"--best_num",
type=int,
default=5,
help="the number of trained models to apply SWA",
)
return parser.parse_args()
def create_swa_model(model_dir: str, model_name: str, best_num: int) -> None:
"""Save mean weights to apply SWA.
This function was implemented with reference to: https://github.com/hyz-xmaster/swa_object_detection/blob/master/swa/get_swa_model.py.
Args:
model_dir: directory of trained models to apply SWA
model_name: file name of SWA model
best_num: the number of trained models to apply SWA
"""
# Load trained models and select best models
model_paths = glob.glob(f"{model_dir}/epoch_*.pt")
models_with_map50 = []
for model_path in model_paths:
model = torch.load(model_path)
map50 = model["mAP50"]
models_with_map50.append((map50, model_path))
del model
gc.collect()
models_with_map50.sort(reverse=True)
models = [
torch.load(p) for i, (_, p) in enumerate(models_with_map50) if i < best_num
]
# Extract model information
model_num = len(models)
model_type = "ema" if models[-1].get("ema") else "model"
state_dict = models[-1][model_type].state_dict()
model_keys = state_dict.keys()
new_state_dict = state_dict.copy()
# Apply SWA and save SWA model
swa_model = models[-1]
for key in tqdm.tqdm(model_keys, "Apply SWA"):
sum_weight = 0.0
for m in models:
sum_weight += m[model_type].state_dict()[key]
avg_weight = sum_weight / model_num
new_state_dict[key] = avg_weight
swa_model[model_type].load_state_dict(new_state_dict)
save_dir = os.path.join(model_dir, model_name)
torch.save(swa_model, save_dir)
print(f"{model_dir}/{model_name} was saved.")
if __name__ == "__main__":
args = get_parser()
create_swa_model(
model_dir=args.model_dir, model_name=args.swa_model_name, best_num=args.best_num
)