Skip to content

Commit

Permalink
chore(exp): fix CompOscillator learning length (#424)
Browse files Browse the repository at this point in the history
This commit changed the CompOscillator learning length from 1e5 to 2e5.
This was done to deal with an inconsistency in Han et al.'s reserach.
  • Loading branch information
rickstaa authored Mar 7, 2024
1 parent 35a4d69 commit 88e2ff1
Show file tree
Hide file tree
Showing 36 changed files with 330 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ac_kwargs:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
epochs: 98
steps_per_epoch: 2048
start_steps: 0
update_every: 100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ac_kwargs:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
epochs: 98
steps_per_epoch: 2048
start_steps: 0
update_every: 100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ac_kwargs:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
epochs: 98
steps_per_epoch: 2048
start_steps: 0
update_every: 100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ac_kwargs:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
epochs: 98
steps_per_epoch: 2048
start_steps: 0
update_every: 100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ac_kwargs:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
epochs: 98
steps_per_epoch: 2048
start_steps: 0
update_every: 100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ac_kwargs:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
epochs: 98
steps_per_epoch: 2048
start_steps: 0
update_every: 100
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
alg_name: lac
exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
env_name: "stable_gym:OscillatorComplicated-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256, 16] # NOTE: Using [256, 256, 16] for consistency with the article.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_labda: "3e-4"
lr_a_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_labda_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_labda_decay_type: "constant"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 26000 # 49672 858762 388389 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_extra_seeds_short"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
alg_name: lac
exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
env_name: "stable_gym:OscillatorComplicated-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256, 16] # NOTE: Using [256, 256, 16] for consistency with the article.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_labda: "3e-4"
lr_a_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_labda_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_labda_decay_type: "constant"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 388389 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_extra_seeds_short"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
alg_name: lac
exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
env_name: "stable_gym:OscillatorComplicated-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256, 16] # NOTE: Using [256, 256, 16] for consistency with the article.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_labda: "3e-4"
lr_a_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_labda_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_labda_decay_type: "constant"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 408660 # 26000 49672 858762 388389 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_extra_seeds_short"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
alg_name: lac
exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
env_name: "stable_gym:OscillatorComplicated-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256, 16] # NOTE: Using [256, 256, 16] for consistency with the article.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_labda: "3e-4"
lr_a_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_labda_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_labda_decay_type: "constant"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 49672 # 858762 388389 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_extra_seeds_short"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
alg_name: lac
exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
env_name: "stable_gym:OscillatorComplicated-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256, 16] # NOTE: Using [256, 256, 16] for consistency with the article.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_labda: "3e-4"
lr_a_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_labda_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_labda_decay_type: "constant"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 858762 # 388389 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_extra_seeds_short"
device: "gpu:1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
alg_name: lac
exp_name: han2020_reproduction_lac_oscillator_complicated_alpha3_tune_exp_short
env_name: "stable_gym:OscillatorComplicated-v1"
ac_kwargs:
hidden_sizes:
actor: [256, 256] # NOTE: Using [256, 256] for consistency with the article.
critic: [256, 256, 16] # NOTE: Using [256, 256, 16] for consistency with the article.
activation:
actor: "nn.ReLU"
critic: "nn.ReLU"
output_activation:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
steps_per_epoch: 2048
start_steps: 0
update_every: 100
update_after: 1000
steps_per_update: 80
num_test_episodes: 10
alpha: 2.0
alpha3: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 # NOTE: Tuning alpha3.
labda: 0.99 # NOTE: Decreased from 1.0 to 0.99 for stability.
# gamma: 0.995 # NOTE: Not used for finite horizon tasks.
polyak: 0.995
adaptive_temperature: True
lr_a: "1e-4"
lr_c: "3e-4"
lr_alpha: "1e-4"
lr_labda: "3e-4"
lr_a_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_c_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_alpha_final: "1e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_labda_final: "3e-9" # NOTE: Not in Han et al. 2020, but aligned with original code.
lr_a_decay_type: "linear"
lr_c_decay_type: "linear"
lr_alpha_decay_type: "linear"
lr_labda_decay_type: "constant"
lr_decay_ref: "step"
batch_size: 256
replay_size: "int(1e6)"
horizon_length: 5
seed: 408660 26000 49672 858762 388389 # NOTE: Using 5 seeds for tuning.
save_freq: 10
use_wandb: True
wandb_group: "han2020_reproduction_extra_seeds_short"
device: "gpu:1"
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ac_kwargs:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
epochs: 98
steps_per_epoch: 2048
start_steps: 0
update_every: 100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ac_kwargs:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
epochs: 98
steps_per_epoch: 2048
start_steps: 0
update_every: 100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ac_kwargs:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
epochs: 98
steps_per_epoch: 2048
start_steps: 0
update_every: 100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ac_kwargs:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
epochs: 98
steps_per_epoch: 2048
start_steps: 0
update_every: 100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ac_kwargs:
actor: "nn.ReLU"
opt_type: "minimize"
max_ep_len: 400
epochs: 49
epochs: 98
steps_per_epoch: 2048
start_steps: 0
update_every: 100
Expand Down
Loading

0 comments on commit 88e2ff1

Please sign in to comment.