-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
66 lines (51 loc) · 2.25 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import numpy as np
import torch
import common.utils
from procgen import ProcgenEnv
from baselines.common.vec_env import (
VecExtractDictObs,
VecMonitor,
VecNormalize
)
from common.envs import VecPyTorchProcgen, TransposeImageProcgen
def evaluate(args, actor_critic, device, num_processes=1, aug_id=None):
actor_critic.eval()
# Sample Levels From the Full Distribution
venv = ProcgenEnv(num_envs=num_processes, env_name=args.env_name, \
num_levels=0, start_level=0, \
distribution_mode=args.distribution_mode)
venv = VecExtractDictObs(venv, "rgb")
venv = VecMonitor(venv=venv, filename=None, keep_buf=100)
venv = VecNormalize(venv=venv, ob=False)
eval_envs = VecPyTorchProcgen(venv, device)
eval_episode_rewards = []
obs = eval_envs.reset()
eval_recurrent_hidden_states = torch.zeros(
num_processes, actor_critic.recurrent_hidden_state_size, device=device)
eval_masks = torch.ones(num_processes, 1, device=device)
while len(eval_episode_rewards) < 10:
with torch.no_grad():
if args.algo == "sar":
if aug_id:
obs = aug_id(obs)
value, action, action_log_prob, logits, recurrent_hidden_states, \
_, _ = actor_critic.act(obs, eval_recurrent_hidden_states,
eval_masks, deterministic=False)
else:
if aug_id:
obs = aug_id(obs)
_, action, _, _, eval_recurrent_hidden_states = actor_critic.act(obs, eval_recurrent_hidden_states,
eval_masks, deterministic=False)
obs, _, done, infos = eval_envs.step(action)
eval_masks = torch.tensor(
[[0.0] if done_ else [1.0] for done_ in done],
dtype=torch.float32,
device=device)
for info in infos:
if 'episode' in info.keys():
eval_episode_rewards.append(info['episode']['r'])
eval_envs.close()
print("Last {} test episodes: mean/median reward {:.1f}/{:.1f}\n"\
.format(len(eval_episode_rewards), \
np.mean(eval_episode_rewards), np.median(eval_episode_rewards)))
return eval_episode_rewards