forked from montrealrobotics/active-domainrand
-
Notifications
You must be signed in to change notification settings - Fork 1
/
real_robot_torquesweep.py
124 lines (96 loc) · 5.11 KB
/
real_robot_torquesweep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import h5py
import matplotlib
matplotlib.use('Agg')
import time
import numpy as np
import gym
import argparse
import os
import os.path as osp
from tqdm import tqdm, trange
from common.agents.ddpg_actor import DDPGActor
import poppy_helpers
import gym_ergojr
parser = argparse.ArgumentParser(description='Real Robot Experiment Driver')
parser.add_argument('--nepisodes', type=int, default=25, help='Number of trials per *seed*')
parser.add_argument('--torques', type=list, nargs='+', default=[25, 50, 100, 200, 400],
help='torque settings to iterate')
parser.add_argument('--experiment-prefix', type=str, default='real', help='Prefix to append to logs')
parser.add_argument('--log-dir', type=str, default='results/real-robot', help='Log Directory Prefix')
parser.add_argument('--model-dir', type=str, default='saved-models/real-robot', help='Model Directory Prefix')
parser.add_argument('--cont', type=str, default='190329-180631', help='To continue existing file, enter timestamp here')
args = parser.parse_args()
if len(args.cont) == 0:
TIMESTAMP = time.strftime("%y%m%d-%H%M%S")
file_flag = "w"
else:
TIMESTAMP = args.cont
file_flag = "r+"
file_path = "{}/{}-{}.hdf5".format(args.log_dir, args.experiment_prefix, TIMESTAMP)
MAX_EPISODE_STEPS = 100
EPISODES = args.nepisodes
TORQUES = args.torques
# Policies to look for
policies = ['baseline', 'usdr', 'adr']
env = gym.make('ErgoReacher-Live-v1')
# env = gym.make('ErgoReacher-Graphical-Simple-Halfdisk-v1')
npa = np.array
img_buffer = []
if not osp.exists(args.log_dir):
os.makedirs(args.log_dir)
with h5py.File(file_path, file_flag) as f:
for policy_type in tqdm(policies, desc="approaches"):
if policy_type not in f: # if dataset doesn't have these tables
log_group = f.create_group(policy_type)
rewards = log_group.create_dataset("rewards", (no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS),
dtype=np.float32)
distances = log_group.create_dataset("distances", (no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS),
dtype=np.float32)
trajectories = log_group.create_dataset("trajectories",
(no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS, 24),
dtype=np.float32)
imgs = log_group.create_dataset("images",
(no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS, 480, 640, 3),
dtype=np.uint8, compression="lzf")
else: # if tables are in dataset, grab their pointers
rewards = f.get("/{}/{}".format(policy_type, "rewards"))
distances = f.get("/{}/{}".format(policy_type, "distances"))
trajectories = f.get("/{}/{}".format(policy_type, "trajectories"))
imgs = f.get("/{}/{}".format(policy_type, "images"))
model_path = osp.join(args.model_dir, policy_type)
no_models = len(os.listdir(model_path))
tqdm.write('Starting analysis of {}'.format(policy_type))
for model_idx, actorpth in enumerate(tqdm(os.listdir(model_path), desc="models....")):
agent_policy = DDPGActor(
state_dim=env.observation_space.shape[0],
action_dim=env.action_space.shape[0],
agent_name='real-{}'.format(policy_type),
load_agent=True,
model_path=osp.join(model_path, actorpth)
)
for torque_idx, torque in enumerate(tqdm(TORQUES, desc="torques...")):
for ep_num in trange(EPISODES, desc="episodes.."):
non_zero_steps = np.count_nonzero(trajectories[model_idx, torque_idx, ep_num], axis=1)
if np.count_nonzero(non_zero_steps) == 0:
obs = env.reset()
env.unwrapped.setSpeed(torque)
done = False
cumulative = 0
counter = 0
img_buffer = []
while counter < MAX_EPISODE_STEPS:
action = agent_policy.select_action(obs)
nobs, reward, _, misc = env.step(action)
cumulative += reward
trajectories[model_idx, torque_idx, ep_num, counter, :] = np.concatenate(
[obs, action, nobs])
rewards[model_idx, torque_idx, ep_num, counter] = reward
distances[model_idx, torque_idx, ep_num, counter] = misc["distance"]
img_buffer.append(np.copy(misc["img"]))
obs = np.copy(nobs)
counter += 1
imgs[model_idx, torque_idx, ep_num, :counter, :, :, :] = img_buffer
# tqdm.write('Episode: {}, Reward: {}'.format(ep_num, cumulative))
# write to disk after every model run
f.flush()
env.reset()