default_config_pendulum_td3_opt.yaml

env_name: !!str Pendulum-v0
device: !!str cuda                          # torch device (cuda:0 or cpu)
render_env: !!bool False                    # render environment

agents:
  td3:
    train_episodes: !!int 70                # maximum number of episodes to optimize
    test_episodes: !!int 10                 # maximum number of episodes to evaluate
    init_episodes: !!int 20                 # number of episodes to fill the replay buffer
    batch_size: !!int  233                  # batch size when running a policy update step
    gamma: !!float 0.944                    # discount factor
    lr: !!float 0.00352                     # learning rate
    tau: !!float 0.0464                     # target network update rate
    policy_delay: !!int 1                   # frequency of delayed policy updates
    rb_size: !!int 1000000                  # size of the replay buffer
    same_action_num: !!int 1                # how often to perform the same action subsequently
    activation_fn: !!str leakyrelu          # activation function for actor/critic ('tanh', 'relu', 'leakyrelu' or 'prelu')
    hidden_size: !!int 134                  # size of the actor/critic hidden layer
    hidden_layer: !!int 2                   # number of hidden layers
    action_std: !!float 0.0571              # action noise standard deviation
    policy_std: !!float 0.102               # policy noise standard deviation
    policy_std_clip: !!float 0.250          # policy noise standard deviation
    print_rate: 100                         # update rate of avg meters
    early_out_num: !!int 5                  # based on how many training episodes shall an early out happen
    early_out_virtual_diff: !!float 0.0478  # performance difference for an early out for virtual envs

envs:
  Pendulum-v0:
    solved_reward: !!float -300             # used for early out in RL agent training
    max_steps: !!int 200                    # maximum number of steps per episode
    activation_fn: !!str leakyrelu          # activation function of the virtual environment
    hidden_size: 32                         # size of the hidden layer of the virtual environment
    hidden_layer: !!int 2                   # number of hidden layers of the virtual environment
    info_dim: !!int 0                       # additional information dimension from step function
    reward_env_type: !!int 2                # type of reward shaping function