-
Notifications
You must be signed in to change notification settings - Fork 239
/
Copy pathmatches.py
109 lines (91 loc) · 9.49 KB
/
matches.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
L3_attention_mse=[{"layer_T":4, "layer_S":1, "feature":"attention", "loss":"attention_mse", "weight":1},
{"layer_T":8, "layer_S":2, "feature":"attention", "loss":"attention_mse", "weight":1},
{"layer_T":12, "layer_S":3, "feature":"attention", "loss":"attention_mse", "weight":1}]
L3_attention_ce=[{"layer_T":4, "layer_S":1, "feature":"attention", "loss":"attention_ce", "weight":1},
{"layer_T":8, "layer_S":2, "feature":"attention", "loss":"attention_ce", "weight":1},
{"layer_T":12, "layer_S":3, "feature":"attention", "loss":"attention_ce", "weight":1}]
L3_attention_mse_sum=[{"layer_T":4, "layer_S":1, "feature":"attention", "loss":"attention_mse_sum", "weight":1},
{"layer_T":8, "layer_S":2, "feature":"attention", "loss":"attention_mse_sum", "weight":1},
{"layer_T":12, "layer_S":3, "feature":"attention", "loss":"attention_mse_sum", "weight":1}]
L3_attention_ce_mean=[{"layer_T":4, "layer_S":1, "feature":"attention", "loss":"attention_ce_mean", "weight":1},
{"layer_T":8, "layer_S":2, "feature":"attention", "loss":"attention_ce_mean", "weight":1},
{"layer_T":12, "layer_S":3, "feature":"attention", "loss":"attention_ce_mean", "weight":1}]
L3_hidden_smmd=[{"layer_T":[0,0], "layer_S":[0,0], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[4,4], "layer_S":[1,1], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[8,8], "layer_S":[2,2], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[12,12],"layer_S":[3,3], "feature":"hidden", "loss":"mmd", "weight":1}]
L3n_hidden_mse=[{"layer_T":0, "layer_S":0, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",384,768]},
{"layer_T":4, "layer_S":1, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",384,768]},
{"layer_T":8, "layer_S":2, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",384,768]},
{"layer_T":12,"layer_S":3, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",384,768]}]
L3_hidden_mse=[{"layer_T":0, "layer_S":0, "feature":"hidden", "loss":"hidden_mse", "weight":1},
{"layer_T":4, "layer_S":1, "feature":"hidden", "loss":"hidden_mse", "weight":1},
{"layer_T":8, "layer_S":2, "feature":"hidden", "loss":"hidden_mse", "weight":1},
{"layer_T":12,"layer_S":3, "feature":"hidden", "loss":"hidden_mse", "weight":1}]
L3l_hidden_mse=[{"layer_T":0, "layer_S":0, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",1024,768]},
{"layer_T":4, "layer_S":1, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",1024,768]},
{"layer_T":8, "layer_S":2, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",1024,768]},
{"layer_T":12,"layer_S":3, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",1024,768]}]
#######################L4################
L4_attention_mse=[{"layer_T":3, "layer_S":1, "feature":"attention", "loss":"attention_mse", "weight":1},
{"layer_T":6, "layer_S":2, "feature":"attention", "loss":"attention_mse", "weight":1},
{"layer_T":9, "layer_S":3, "feature":"attention", "loss":"attention_mse", "weight":1},
{"layer_T":12, "layer_S":4, "feature":"attention", "loss":"attention_mse", "weight":1}]
L4_attention_ce=[{"layer_T":3, "layer_S":1, "feature":"attention", "loss":"attention_ce", "weight":1},
{"layer_T":6, "layer_S":2, "feature":"attention", "loss":"attention_ce", "weight":1},
{"layer_T":9, "layer_S":3, "feature":"attention", "loss":"attention_ce", "weight":1},
{"layer_T":12, "layer_S":4, "feature":"attention", "loss":"attention_ce", "weight":1}]
L4_attention_mse_sum=[{"layer_T":3, "layer_S":1, "feature":"attention", "loss":"attention_mse_sum", "weight":1},
{"layer_T":6, "layer_S":2, "feature":"attention", "loss":"attention_mse_sum", "weight":1},
{"layer_T":9, "layer_S":3, "feature":"attention", "loss":"attention_mse_sum", "weight":1},
{"layer_T":12, "layer_S":4, "feature":"attention", "loss":"attention_mse_sum", "weight":1}]
L4_attention_ce_mean=[{"layer_T":3, "layer_S":1, "feature":"attention", "loss":"attention_ce_mean", "weight":1},
{"layer_T":6, "layer_S":2, "feature":"attention", "loss":"attention_ce_mean", "weight":1},
{"layer_T":9, "layer_S":3, "feature":"attention", "loss":"attention_ce_mean", "weight":1},
{"layer_T":12, "layer_S":4, "feature":"attention", "loss":"attention_ce_mean", "weight":1}]
L4_hidden_smmd=[{"layer_T":[0,0], "layer_S":[0,0], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[3,3], "layer_S":[1,1], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[6,6], "layer_S":[2,2], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[9,9], "layer_S":[3,3], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[12,12],"layer_S":[4,4], "feature":"hidden", "loss":"mmd", "weight":1}]
L4t_hidden_mse=[{"layer_T":0, "layer_S":0, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",312,768]},
{"layer_T":3, "layer_S":1, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",312,768]},
{"layer_T":6, "layer_S":2, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",312,768]},
{"layer_T":9, "layer_S":3, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",312,768]},
{"layer_T":12,"layer_S":4, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",312,768]}]
small_hidden_smmd=[{"layer_T":[0,0], "layer_S":[0,0], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[2,2], "layer_S":[2,2], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[4,4], "layer_S":[4,4], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[6,6], "layer_S":[6,6], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[8,8], "layer_S":[8,8], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[10,10],"layer_S":[10,10], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[12,12],"layer_S":[12,12], "feature":"hidden", "loss":"mmd", "weight":1}]
small_hidden_mse=[{"layer_T":0, "layer_S":0, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]},
{"layer_T":2, "layer_S":2, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]},
{"layer_T":4, "layer_S":4, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]},
{"layer_T":6, "layer_S":6, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]},
{"layer_T":8, "layer_S":8, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]},
{"layer_T":10,"layer_S":10, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]},
{"layer_T":12,"layer_S":12, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]}]
es_hidden_smmd=[{"layer_T":[2,2], "layer_S":[2,2], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[4,4], "layer_S":[4,4], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[6,6], "layer_S":[6,6], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[8,8], "layer_S":[8,8], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[10,10],"layer_S":[10,10], "feature":"hidden", "loss":"mmd", "weight":1},
{"layer_T":[12,12],"layer_S":[12,12], "feature":"hidden", "loss":"mmd", "weight":1}]
es_hidden_mse=[{"layer_T":2, "layer_S":2, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]},
{"layer_T":4, "layer_S":4, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]},
{"layer_T":6, "layer_S":6, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]},
{"layer_T":8, "layer_S":8, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]},
{"layer_T":10,"layer_S":10, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]},
{"layer_T":12,"layer_S":12, "feature":"hidden", "loss":"hidden_mse", "weight":1, "proj":["linear",256,768]}]
matches={'L3_attention_mse':L3_attention_mse,'L3_attention_mse_sum':L3_attention_mse_sum,
'L3_attention_ce' :L3_attention_ce, 'L3_attention_ce_mean':L3_attention_ce_mean,
'L3n_hidden_mse' :L3n_hidden_mse, 'L3_hidden_smmd' :L3_hidden_smmd, 'L3_hidden_mse': L3_hidden_mse,
'L3l_hidden_mse' :L3l_hidden_mse,
'L4_attention_mse':L4_attention_mse,'L4_attention_mse_sum':L4_attention_mse_sum,
'L4_attention_ce' :L4_attention_ce, 'L4_attention_ce_mean':L4_attention_ce_mean,
'L4t_hidden_mse' :L4t_hidden_mse, 'L4_hidden_smmd' :L4_hidden_smmd,
'small_hidden_mse':small_hidden_mse,'small_hidden_smmd' :small_hidden_smmd,
'es_hidden_mse' :es_hidden_mse, 'es_hidden_smmd' :es_hidden_smmd
}