configs.yaml

defaults:

  use_wandb: False
  wandb_key: None
  logdir: null
  traindir: null
  evaldir: null
  offline_traindir: ''
  offline_evaldir: ''
  seed: 0
  deterministic_run: False
  steps: 1e6
  parallel: False
  eval_every: 20000
  eval_episode_num: 10
  log_every: 20000
  reset_every: 0
  device: 'cuda:0'
  compile: True
  precision: 32
  debug: False
  video_pred_log: True

  # Environment
  task: 'minedojo'
  size: [64, 64]
  envs: 1
  action_repeat: 1
  time_limit: 1000
  grayscale: False
  prefill: 2500
  reward_EMA: True

  # Model
  dyn_hidden: 512
  dyn_deter: 512
  dyn_stoch: 32
  dyn_discrete: 32
  dyn_rec_depth: 1
  dyn_mean_act: 'none'
  dyn_std_act: 'sigmoid2'
  dyn_min_std: 0.1
  grad_heads: ['decoder', 'reward', 'end', 'intrinsic', 'jump', 'jumping_steps', 'accumulated_reward']
  units: 512
  act: 'SiLU'
  norm: True
  encoder:
    {mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, symlog_inputs: True}
  decoder:
    {mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0}
  actor:
    {layers: 2, dist: 'normal', entropy: 3e-4, unimix_ratio: 0.01, std: 'learned', min_std: 0.1, max_std: 1.0, temp: 0.1, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 1.0}
  critic:
    {layers: 2, dist: 'symlog_disc', slow_target: True, slow_target_update: 1, slow_target_fraction: 0.02, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 0.0}
  reward_head:
    {layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
  end_head:
    {layers: 2, loss_scale: 1.0, outscale: 1.0}
  jump_head:
    {layers: 2, loss_scale: 1.0, outscale: 1.0}
  concentration_score_head:
    {layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
  intrinsic_head:
    {layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
  jumping_steps_head:
    {layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
  accumulated_reward_head:
    {layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
  dyn_scale: 0.5
  rep_scale: 0.1
  kl_free: 1.0
  img_weight: 1.0
  jmp_weight: 1.0
  weight_decay: 0.0
  unimix_ratio: 0.01
  initial: 'learned'

  # Training
  batch_size: 16
  batch_length: 32
  train_ratio: 512
  pretrain: 100
  model_lr: 1e-4
  opt_eps: 1e-8
  grad_clip: 1000
  dataset_size: 1000000
  opt: 'adam'

  # Behavior.
  discount: 0.997
  discount_lambda: 0.95
  jump_prob: 0.7
  jump_prob_decay: 0.9997
  imag_horizon: 15
  max_imag_sequences_num: 1024
  imag_gradient: 'dynamics'
  imag_gradient_mix: 0.0
  eval_state_mean: False

  # Exploration
  expl_behavior: 'greedy'
  expl_until: 0
  expl_extr_scale: 0.0
  expl_intr_scale: 1.0
  disag_target: 'stoch'
  disag_log: True
  disag_models: 10
  disag_offset: 1
  disag_layers: 4
  disag_units: 400
  disag_action_cond: False

minedojo:

  long_term_branch_weight: 1.0
  gaussian_reward_weight: 1.0
  gaussian_sigma_weight: 0.5

  # rollout
  is_random: True
  agent_checkpoint_dir: ""
  rollout_image_num: 2000

  unet_batch_size: 64
  results_dir: "./results"
  name: "test"
  target_item: "log"
  steps: 1e6
  parallel: False
  envs: 1
  eval_episode_num: 3
  eval_every: 10000
  action_repeat: 1
  train_ratio: 16
  video_pred_log: true
  dyn_hidden: 1024
  dyn_deter: 4096
  units: 1024
  encoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath|obs_reward', cnn_keys: '^(image|heatmap)$', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
  decoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath', cnn_keys: '^(image|heatmap)$', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
  actor: {layers: 5, dist: 'onehot', std: 'none'}
  value: {layers: 5}
  reward_head: {layers: 5}
  intrinsic_head: {layers: 5}
  jumping_steps_head: {layers: 5}
  accumulated_reward_head: {layers: 5}
  end_head: {layers: 5}
  jump_head: {layers: 5}
  imag_gradient: 'reinforce'
  break_speed: 100.0
  time_limit: 36000

debug:
  debug: True
  pretrain: 1
  prefill: 1
  batch_size: 10
  batch_length: 20