-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtuner_sac_ingenuity.py
75 lines (66 loc) · 2.27 KB
/
tuner_sac_ingenuity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import ray
from ray import tune
import os
from trainer_sac import _train
from skrl.agents.torch.sac import SAC_DEFAULT_CONFIG
from skrl.resources.preprocessors.torch import RunningStandardScaler
def trainer(tuner):
id = tuner['id']
grad_clip = tuner['grad_clip']
tau = tuner['tau']
alpha = tuner['alpha']
lr = tuner['lr']
loading = tuner['loading']
num_envs = tuner['num_envs']
bs = int(loading / num_envs)
timesteps = tuner['timesteps']
path = tuner['path']
task_name = tuner['task_name']
description = path + str(id)
# rewrite base config
# configure and instantiate the agent (visit its documentation to see all the options)
# https://skrl.readthedocs.io/en/latest/api/agents/sac.html#configuration-and-hyperparameters
cfg = SAC_DEFAULT_CONFIG.copy()
cfg["task_name"] = task_name
cfg["polyak"] = tau
cfg["initial_entropy_value"] = alpha
cfg["grad_norm_clip"] = grad_clip
cfg["actor_learning_rate"] = lr
cfg["critic_learning_rate"] = lr
cfg["batch_size"] = bs
cfg["num_envs"] = num_envs
cfg["timesteps"] = timesteps
cfg["experiment"]["directory"] = description
# --------
cfg["gradient_steps"] = 1
cfg["discount_factor"] = 0.99
cfg["random_timesteps"] = 100
cfg["learning_starts"] = 100
cfg["state_preprocessor"] = RunningStandardScaler
cfg["memory_size"] = 15000
cfg["experiment"]["write_interval"] = 5000
cfg["experiment"]["checkpoint_interval"] = timesteps
_train(cfg)
# ====================================
def main():
ray.init(num_gpus=1)
search_space = {
"task_name": tune.grid_search(["Ingenuity"]),
"grad_clip": tune.grid_search([0]),
"tau": tune.grid_search([0.0025]),
"alpha": tune.grid_search([0.1]),
"lr": tune.grid_search([3e-4]),
"loading": tune.grid_search([131072]),
"num_envs": tune.grid_search([128]),
"timesteps": tune.grid_search([500000]),
"id": tune.grid_search([0,1]),
"path": tune.grid_search(["/workspace/skrl/runs/results_ingenuity/sac/"]),
}
analysis = tune.run(
trainer,
num_samples=1,
resources_per_trial={'cpu': 2, 'gpu': 0.5},
config=search_space,
)
if __name__ == '__main__':
main()