forked from Yuanhy1997/SeqDiffuSeq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
args_utils.py
156 lines (139 loc) · 3.98 KB
/
args_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import argparse
def create_argparser():
defaults = dict(
data_dir="",
src='src',
tgt='tgt',
schedule_sampler="uniform",
lr=1e-4,
weight_decay=0.0,
lr_anneal_steps=30000,
warmup=0,
batch_size=1,
microbatch=-1, # -1 disables microbatches
ema_rate="0.9999", # comma-separated list of EMA values
log_interval=50,
save_interval=25000,
resume_checkpoint="",
use_fp16=False,
fp16_scale_growth=1e-3,
seed=101,
gradient_clipping=-1.0,
eval_interval=2000,
checkpoint_path="diff_models",
train_txt_path="data/quotes_train.txt",
val_txt_path="data/quotes_valid.txt",
dataset="",
notes="",
)
text_defaults = dict(
modality="text",
emb_scale_factor=1.0,
in_channel=16,
out_channel=16,
noise_level=0.0,
cache_mode="no",
use_bert_tokenizer="no",
padding_mode="block",
preprocessing_num_workers=1,
tok_thresh=150
)
guided_generation_defaults = dict(
classifier_num_epochs=15
)
defaults.update(model_and_diffusion_defaults())
defaults.update(text_defaults)
defaults.update(guided_generation_defaults)
defaults.update(decoding_defaults())
defaults.update(additional_args_for_translation())
parser = argparse.ArgumentParser()
parser.add_argument("--debug", action="store_true")
add_dict_to_argparser(parser, defaults)
return parser
def additional_args_for_translation():
return dict(
pretrained_tokenizer=None,
sequence_len_src=64,
use_pretrained_tokenizer=False,
generate_by_q=False,
generate_by_mix=False,
generate_by_mix_prob=0.0,
generate_by_mix_part=1.0,
)
def model_and_diffusion_defaults():
"""
Defaults for text-diffusion model training.
"""
return dict(
encoder_layers=6,
decoder_layers=6,
sequence_len=64,
num_channels=16,
num_heads=4,
dropout=0.0,
learn_sigma=False,
sigma_small=False,
class_cond=False,
diffusion_steps=10000,
noise_schedule="linear",
timestep_respacing="",
use_kl=False,
predict_xstart=False,
rescale_timesteps=True,
rescale_learned_sigmas=True,
use_checkpoint=False,
model_arch="transformer",
in_channel=16,
out_channel=16,
vocab_size=66,
config_name="bert-base-uncased",
logits_mode=1,
training_mode="diffusion-lm",
init_pretrained=False,
freeze_embeddings=False,
use_pretrained_embeddings=True,
load_ckpt=None,
loss_update_granu=None,
schedule_update_stride=0,
)
def decoding_defaults():
return dict(
num_samples=50,
top_p=0.9,
out_dir="",
model_name_or_path="",
checkpoint_path="",
use_ddim=False,
clip_denoised=False,
batch_size=64,
mbr_sample=1,
verbose="yes",
clamp="clamp",
preprocessing_num_workers=1,
emb_scale_factor=1.0,
classifier_path="",
time_schedule_path='',
comment='',
)
def add_dict_to_argparser(parser, default_dict):
for k, v in default_dict.items():
v_type = type(v)
if v is None:
v_type = str
elif isinstance(v, bool):
v_type = str2bool
parser.add_argument(f"--{k}", default=v, type=v_type)
def args_to_dict(args, keys):
return {k: getattr(args, k) for k in keys}
def str2bool(v):
"""
https://stackoverflow.com/questions/15008758/parsing-boolean-values-with-argparse
"""
if isinstance(v, bool):
return v
if v.lower() in ("yes", "true", "t", "y", "1"):
return True
elif v.lower() in ("no", "false", "f", "n", "0"):
return False
else:
raise argparse.ArgumentTypeError("boolean value expected")