-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfigs.yaml
158 lines (147 loc) · 4.07 KB
/
configs.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
defaults:
use_wandb: False
wandb_key: None
logdir: null
traindir: null
evaldir: null
offline_traindir: ''
offline_evaldir: ''
seed: 0
deterministic_run: False
steps: 1e6
parallel: False
eval_every: 20000
eval_episode_num: 10
log_every: 20000
reset_every: 0
device: 'cuda:0'
compile: True
precision: 32
debug: False
video_pred_log: True
# Environment
task: 'minedojo'
size: [64, 64]
envs: 1
action_repeat: 1
time_limit: 1000
grayscale: False
prefill: 2500
reward_EMA: True
# Model
dyn_hidden: 512
dyn_deter: 512
dyn_stoch: 32
dyn_discrete: 32
dyn_rec_depth: 1
dyn_mean_act: 'none'
dyn_std_act: 'sigmoid2'
dyn_min_std: 0.1
grad_heads: ['decoder', 'reward', 'end', 'intrinsic', 'jump', 'jumping_steps', 'accumulated_reward']
units: 512
act: 'SiLU'
norm: True
encoder:
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, symlog_inputs: True}
decoder:
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0}
actor:
{layers: 2, dist: 'normal', entropy: 3e-4, unimix_ratio: 0.01, std: 'learned', min_std: 0.1, max_std: 1.0, temp: 0.1, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 1.0}
critic:
{layers: 2, dist: 'symlog_disc', slow_target: True, slow_target_update: 1, slow_target_fraction: 0.02, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 0.0}
reward_head:
{layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
end_head:
{layers: 2, loss_scale: 1.0, outscale: 1.0}
jump_head:
{layers: 2, loss_scale: 1.0, outscale: 1.0}
concentration_score_head:
{layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
intrinsic_head:
{layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
jumping_steps_head:
{layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
accumulated_reward_head:
{layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
dyn_scale: 0.5
rep_scale: 0.1
kl_free: 1.0
img_weight: 1.0
jmp_weight: 1.0
weight_decay: 0.0
unimix_ratio: 0.01
initial: 'learned'
# Training
batch_size: 16
batch_length: 32
train_ratio: 512
pretrain: 100
model_lr: 1e-4
opt_eps: 1e-8
grad_clip: 1000
dataset_size: 1000000
opt: 'adam'
# Behavior.
discount: 0.997
discount_lambda: 0.95
jump_prob: 0.7
jump_prob_decay: 0.9997
imag_horizon: 15
max_imag_sequences_num: 1024
imag_gradient: 'dynamics'
imag_gradient_mix: 0.0
eval_state_mean: False
# Exploration
expl_behavior: 'greedy'
expl_until: 0
expl_extr_scale: 0.0
expl_intr_scale: 1.0
disag_target: 'stoch'
disag_log: True
disag_models: 10
disag_offset: 1
disag_layers: 4
disag_units: 400
disag_action_cond: False
minedojo:
long_term_branch_weight: 1.0
gaussian_reward_weight: 1.0
gaussian_sigma_weight: 0.5
# rollout
is_random: True
agent_checkpoint_dir: ""
rollout_image_num: 2000
unet_batch_size: 64
results_dir: "./results"
name: "test"
target_item: "log"
steps: 1e6
parallel: False
envs: 1
eval_episode_num: 3
eval_every: 10000
action_repeat: 1
train_ratio: 16
video_pred_log: true
dyn_hidden: 1024
dyn_deter: 4096
units: 1024
encoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath|obs_reward', cnn_keys: '^(image|heatmap)$', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
decoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath', cnn_keys: '^(image|heatmap)$', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
actor: {layers: 5, dist: 'onehot', std: 'none'}
value: {layers: 5}
reward_head: {layers: 5}
intrinsic_head: {layers: 5}
jumping_steps_head: {layers: 5}
accumulated_reward_head: {layers: 5}
end_head: {layers: 5}
jump_head: {layers: 5}
imag_gradient: 'reinforce'
break_speed: 100.0
time_limit: 36000
debug:
debug: True
pretrain: 1
prefill: 1
batch_size: 10
batch_length: 20