-
Notifications
You must be signed in to change notification settings - Fork 14
/
run.py
154 lines (139 loc) · 6.93 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import argparse
import gym
import numpy as np
import safety_gym
import torch
import time
from tqdm import tqdm
from mbrl import SafeMPC, RegressionModelEnsemble, CostModel
from utils.logx import EpochLogger
from utils.run_utils import setup_logger_kwargs, combined_shape, load_config, seed_torch
from utils.env_utils import SafetyGymEnv
DEFAULT_ENV_CONFIG_POINT = dict(
action_repeat=3,
max_episode_length=300,
use_dist_reward=False,
stack_obs=False,
)
DEFAULT_ENV_CONFIG_CAR = dict(
action_repeat=2,
max_episode_length=150,
use_dist_reward=False,
stack_obs=False,
)
def run(logger, config, args):
if args.robot.lower() == "point":
env_config = DEFAULT_ENV_CONFIG_POINT
elif args.robot.lower() == "car":
env_config = DEFAULT_ENV_CONFIG_CAR
env = SafetyGymEnv(robot=args.robot, task="goal", level=args.level, seed=args.seed, config=env_config)
# MPC and dynamic model config
mpc_config = config['mpc_config']
mpc_config["optimizer"] = args.optimizer.upper()
cost_config = config['cost_config']
dynamic_config = config['dynamic_config']
if args.load is not None:
dynamic_config["load"] = True
dynamic_config["load_folder"] = args.load
cost_config["load"] = True
cost_config["load_folder"] = args.load
if args.save:
dynamic_config["save"] = True
dynamic_config["save_folder"] = logger.output_dir
cost_config["save"] = True
cost_config["save_folder"] = logger.output_dir
config["arguments"] = vars(args)
logger.save_config(config)
state_dim, action_dim = env.observation_size, env.action_size
if args.ensemble>0:
dynamic_config["n_ensembles"] = args.ensemble
dynamic_model = RegressionModelEnsemble(state_dim+action_dim, state_dim, config=dynamic_config)
cost_model = CostModel(env, cost_config)
mpc_controller = SafeMPC(env, mpc_config, cost_model=cost_model, n_ensembles=dynamic_config["n_ensembles"])
# Prepare random collected dataset
start_time = time.time()
pretrain_episodes = 1000 if args.load is None else 10
pretrain_max_step = 50
print("collecting random episodes...")
data_num = 0
for epi in tqdm(range(pretrain_episodes)):
obs = env.reset()
done = False
i = 0
while not done and i<pretrain_max_step:
action = env.action_space.sample()
obs_next, reward, done, info = env.step(action)
if not info["goal_met"] and not done: # otherwise the goal position will change
x, y = np.concatenate((obs, action)), obs_next
dynamic_model.add_data_point(x, y)
cost = 1 if info["cost"]>0 else 0
cost_model.add_data_point(obs_next, cost)
data_num += 1
i += 1
obs = obs_next
print("Finish to collect %i data "%data_num)
# training the model
if args.load is None:
dynamic_model.reset_model()
print("resetting model")
dynamic_model.fit(use_data_buf=True, normalize=True)
cost_model.fit()
# Main loop: collect experience in env and update/log each epoch
total_len = 0 # total interactions
total_epi = 0
for epoch in tqdm(range(args.epoch)): # update models per epoch
for test_episode in range(args.episode): # collect data for episodes length
obs, ep_ret, ep_cost, done = env.reset(), 0, 0, False
mpc_controller.reset()
if args.render:
env.render()
while not done:
action = np.squeeze(np.array([mpc_controller.act(model=dynamic_model, state=obs)]))
obs_next, reward, done, info = env.step(action)
if args.render:
env.render()
ep_ret += reward
total_len += 1
ep_cost += info["cost"]
if not info["goal_met"] and not done:
x = np.concatenate((obs, action))
y = obs_next #- obs
dynamic_model.add_data_point(x, y)
cost = 1 if info["cost"]>0 else 0
cost_model.add_data_point(obs_next, cost)
obs = obs_next
logger.store(EpRet=ep_ret, EpCost=ep_cost)
logger.log_tabular('Epoch', epoch)
logger.log_tabular('Episode', total_epi)
logger.log_tabular('EpRet', average_only=True) #with_min_and_max = False
logger.log_tabular('EpCost', average_only=True)
logger.log_tabular('TotalEnvInteracts', total_len)
logger.log_tabular('Time', time.time()-start_time)
logger.dump_tabular()
total_epi += 1
# training the model
if not args.test:
dynamic_model.fit(use_data_buf=True, normalize=True)
cost_model.fit()
env.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--robot', type=str, default='point', help="robot model, selected from `point` or `car` ")
parser.add_argument('--level', type=int, default=1, help="environment difficulty, selected from `1` or `2`, where `2` would be more difficult than `1`")
parser.add_argument('--epoch', type=int, default=60, help="maximum epochs to train")
parser.add_argument('--episode', type=int, default=10, help="determines how many episodes data to collect for each epoch")
parser.add_argument('--render','-r', action='store_true', help="render the environment")
parser.add_argument('--test', '-t', action='store_true', help="test the performance of pretrained models without training")
parser.add_argument('--seed', '-s', type=int, default=1, help="seed for Gym, PyTorch and Numpy")
parser.add_argument('--dir', '-d',type=str, default='./data/', help="directory to save the logging information")
parser.add_argument('--name','-n', type=str, default='test', help="name of the experiment, used to save data in a folder named by this parameter")
parser.add_argument('--save', action='store_true', help="save the trained dynamic model, data buffer, and cost model")
parser.add_argument('--load',type=str, default=None, help="load the trained dynamic model, data buffer, and cost model from a specified directory")
parser.add_argument('--ensemble',type=int, default=0, help="number of model ensembles, if this argument is greater than 0, then it will replace the default ensembles number in config.yml") # number of ensembles
parser.add_argument('--optimizer','-o',type=str, default="rce", help=" determine the optimizer, selected from `rce`, `cem`, or `random` ") # random, cem or CCE
parser.add_argument('--config', '-c', type=str, default='./config.yml', help="specify the path to the configuation file of the models")
args = parser.parse_args()
logger_kwargs = setup_logger_kwargs(args.name, args.seed, args.dir)
logger = EpochLogger(**logger_kwargs)
config = load_config(args.config)
run(logger, config, args)