-
Notifications
You must be signed in to change notification settings - Fork 0
/
ek100_SGEAR_vit_AR.txt
121 lines (100 loc) · 3.47 KB
/
ek100_SGEAR_vit_AR.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#LOSS weights
train.train_one_epoch_fn.loss_wts.feat=1.0
train.train_one_epoch_fn.loss_wts.past_cls_action=1.0
train.train_one_epoch_fn.loss_wts.cls_action=1.0
train.train_one_epoch_fn.loss_wts.global_simContrast=4.0 #Semantic loss
train.train_one_epoch_fn.loss_wts.target_distance=1.0 #regularization loss
train.init_from_model=[[backbone.model,${cwd}/DATA/pretrained/TIMM/vit_base384_patch16_in21kftin1k.pth]]
test_only=true
train.batch_size=3
eval.batch_size=3
train.num_epochs=50
train.train_one_epoch_fn.save_freq=1
pre_train=false #refers to pretraining from previos tasks on S-GEAR
opt.lr_wd=[[__all__,0.0001,0.00001]]
opt.bias_bn_wd_scale=1.0
opt/optimizer=sgd
opt/scheduler=cosine
opt.warmup.num_epochs=20
opt.scheduler.num_epochs=30
opt.accumulate=0
#Deracated, not recomended
+model.detector=false
+model.detector_dim=2048
+model.freeze_detector=false
+model.pretrained_detector=false
+model.pretrained_detector_weights=${cwd}/DATA/pretrained/TIMM/res_obj_det.pth
#Mixup at raw frame level setting (not recomended)
train.use_mixup=false
train.mixup_alpha=0.8
train.label_smoothing=0.1
#Object extracted_features
+model.use_object=false
+model.inp_multimodal_dim=2400
model.multimodal=false
+model.num_modalities=1
#Uso only in case of extracted_features
+model.feature_mixup=false
+model.transform_features=false
+model.extracted_features=false
#Freeze weights
+model.freeze_prototypes=false #gamma refers to prototypes
+model.freeze_future_predictor=false
model.freeze_weights=false
+model.train_cls=true
#Backbone setting
+model.reg=false
model/backbone=vit_b_384
model.backbone_last_n_modules_to_drop=0
model.backbone_dim=768
model.intermediate_featdim=768
model.cls_token=true
model.backbone_add_ts_blocks=2
+model.prot_attn=true
model/temporal_aggregator=identity
#Temporal encoder
model/future_predictor=transformer
+model.future_steps=0 #autoregressively predicts future after future_steps
model.dropout=0.2
+model.future_predictor.n_head=4
+model.future_predictor.n_layer=6
+model.future_predictor.output_len=1
+model.future_predictor.inter_dim=2048
+model.future_predictor.return_past_too=true
+model.future_predictor.future_pred_loss={_target_: torch.nn.MSELoss}
+model.future_predictor.future_pred_loss_wt=1.0
+model.future_predictor.avg_last_n=1
model.classifier_on_past=true
#DATA setup
data_train.num_frames=10
model.tmp_frames=${data_train.num_frames}
data_train.frame_rate=2
data_train.subclips.num_frames=1
data_train.subclips.stride=1
data_eval.num_frames=${data_train.num_frames}
data_eval.frame_rate=${data_train.frame_rate}
data_eval.subclips.num_frames=${data_train.subclips.num_frames}
data_eval.subclips.stride=${data_train.subclips.stride}
data_train.mean=[0.5, 0.5, 0.5]
data_train.std=[0.5, 0.5, 0.5]
data_eval.mean=${data_train.mean}
data_eval.std=${data_train.std}
data_eval.eval_num_crops=3
data_eval.eval_flip_crops=false
dataset@dataset_train=epic_kitchens100/anticipation_train
dataset@dataset_eval=epic_kitchens100/anticipation_val
dataset_train.sample_strategy=last_clip
dataset_eval.sample_strategy=last_clip
dataset_train.conv_to_anticipate_fn.tau_a=-1
dataset_train.conv_to_anticipate_fn.tau_o=5
dataset_eval.conv_to_anticipate_fn.tau_a=-1
dataset_eval.conv_to_anticipate_fn.tau_o=5
dataset.epic_kitchens100.common.label_type=action
+dataset_train.conv_to_anticipate_fn.drop_style=correct
+dataset_eval.conv_to_anticipate_fn.drop_style=correct
data_train.scale_h=384-425
data_train.scale_w=-1
data_train.crop_size=384
data_eval.scale_h=388
data_eval.scale_w=-1
data_eval.crop_size=384