-
Notifications
You must be signed in to change notification settings - Fork 3
/
default_config_pendulum_td3_opt.yaml
35 lines (33 loc) · 2.37 KB
/
default_config_pendulum_td3_opt.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
env_name: !!str Pendulum-v0
device: !!str cuda # torch device (cuda:0 or cpu)
render_env: !!bool False # render environment
agents:
td3:
train_episodes: !!int 70 # maximum number of episodes to optimize
test_episodes: !!int 10 # maximum number of episodes to evaluate
init_episodes: !!int 20 # number of episodes to fill the replay buffer
batch_size: !!int 233 # batch size when running a policy update step
gamma: !!float 0.944 # discount factor
lr: !!float 0.00352 # learning rate
tau: !!float 0.0464 # target network update rate
policy_delay: !!int 1 # frequency of delayed policy updates
rb_size: !!int 1000000 # size of the replay buffer
same_action_num: !!int 1 # how often to perform the same action subsequently
activation_fn: !!str leakyrelu # activation function for actor/critic ('tanh', 'relu', 'leakyrelu' or 'prelu')
hidden_size: !!int 134 # size of the actor/critic hidden layer
hidden_layer: !!int 2 # number of hidden layers
action_std: !!float 0.0571 # action noise standard deviation
policy_std: !!float 0.102 # policy noise standard deviation
policy_std_clip: !!float 0.250 # policy noise standard deviation
print_rate: 100 # update rate of avg meters
early_out_num: !!int 5 # based on how many training episodes shall an early out happen
early_out_virtual_diff: !!float 0.0478 # performance difference for an early out for virtual envs
envs:
Pendulum-v0:
solved_reward: !!float -300 # used for early out in RL agent training
max_steps: !!int 200 # maximum number of steps per episode
activation_fn: !!str leakyrelu # activation function of the virtual environment
hidden_size: 32 # size of the hidden layer of the virtual environment
hidden_layer: !!int 2 # number of hidden layers of the virtual environment
info_dim: !!int 0 # additional information dimension from step function
reward_env_type: !!int 2 # type of reward shaping function