-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvizwiz2_classification.yaml
89 lines (84 loc) · 2.07 KB
/
vizwiz2_classification.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
model_config:
topdown_vqa: &topdown_vqa
model_data_dir: ${env.data_dir}
losses:
- type: vizwiz_baseline_loss
text_embedding:
embedding_dim: 300
lstm:
input_size: 300
hidden_size: 512
bidirectional: false
batch_first: true
classifier:
type: mlp
params:
in_dim: 1024
out_dim: 2
num_layers: 2
dataset_config:
vizwiz:
data_dir: ${env.data_dir}/datasets
depth_first: false
fast_read: false
use_images: false
use_features: true
zoo_requirements:
- vizwiz.v2020
features:
train:
- vizwiz2/train_features
val:
- vizwiz2/val_features
test:
- vizwiz2/val_features
annotations:
train:
- vizwiz2/annotations_classification/imdb_vizwiz_train.npy
val:
- vizwiz2/annotations_classification/imdb_vizwiz_val.npy
test:
- vizwiz2/annotations_classification/imdb_vizwiz_val.npy
max_features: 100
processors:
text_processor:
type: vocab
params:
max_length: 14
vocab:
type: random
vocab_file: vizwiz2/extras/vocabs/vizwiz_vocab.txt
preprocessor:
type: simple_sentence
params: {}
answer_processor:
type: vqa_answer
params:
vocab_file: vizwiz2/extras/vocabs/answers_vizwiz_classification.txt
preprocessor:
type: simple_word
params: {}
num_answers: 1
return_features_info: true
# Return OCR information
use_ocr: false
# Return spatial information of OCR tokens if present
use_ocr_info: false
optimizer:
type: adam_w
params:
lr: 0.001
evaluation:
metrics:
- vqa_accuracy
- vizwiz_baseline_ap
- vizwiz_baseline_f1
- vizwiz_baseline_rec_f1
- vizwiz_baseline_rec_ap
training:
batch_size: 64
lr_scheduler: false
max_updates: 10000
early_stop:
criteria: vizwiz/vqa_accuracy
minimize: false