diff --git a/dizoo/gym_hybrid/envs/gym-hybrid/gym_hybrid/environments.py b/dizoo/gym_hybrid/envs/gym-hybrid/gym_hybrid/environments.py index bbbb15c994..9716bc4484 100644 --- a/dizoo/gym_hybrid/envs/gym-hybrid/gym_hybrid/environments.py +++ b/dizoo/gym_hybrid/envs/gym-hybrid/gym_hybrid/environments.py @@ -171,7 +171,7 @@ def distance(self) -> float: @staticmethod def get_distance(x1: float, y1: float, x2: float, y2: float) -> float: - return np.sqrt(((x1 - x2) ** 2) + ((y1 - y2) ** 2)) + return np.sqrt(((x1 - x2) ** 2) + ((y1 - y2) ** 2)).item() def render(self, mode='human'): screen_width = 400 @@ -397,7 +397,7 @@ def distance(self) -> float: @staticmethod def get_distance(x1: float, y1: float, x2: float, y2: float) -> float: - return np.sqrt(((x1 - x2) ** 2) + ((y1 - y2) ** 2)) + return np.sqrt(((x1 - x2) ** 2) + ((y1 - y2) ** 2)).item() def close(self): if self.viewer: diff --git a/dizoo/gym_hybrid/envs/gym_hybrid_env.py b/dizoo/gym_hybrid/envs/gym_hybrid_env.py index bc0f544374..9f02925d1a 100644 --- a/dizoo/gym_hybrid/envs/gym_hybrid_env.py +++ b/dizoo/gym_hybrid/envs/gym_hybrid_env.py @@ -88,18 +88,6 @@ def step(self, action: Dict) -> BaseEnvTimestep: if self._save_replay: self._frames.append(self._env.render(mode='rgb_array')) obs, rew, done, info = self._env.step(action) - self._eval_episode_return += rew - if done: - info['eval_episode_return'] = self._eval_episode_return - if self._save_replay: - if self._env_id == 'HardMove-v0': - self._env_id = f'hardmove_n{self._cfg.num_actuators}' - path = os.path.join( - self._replay_path, '{}_episode_{}.gif'.format(self._env_id, self._save_replay_count) - ) - self.display_frames_as_gif(self._frames, path) - self._frames = [] - self._save_replay_count += 1 obs = to_ndarray(obs) if isinstance(obs, list): # corner case @@ -114,6 +102,18 @@ def step(self, action: Dict) -> BaseEnvTimestep: if isinstance(rew, list): rew = rew[0] assert isinstance(rew, np.ndarray) and rew.shape == (1, ) + self._eval_episode_return += rew.item() + if done: + info['eval_episode_return'] = self._eval_episode_return + if self._save_replay: + if self._env_id == 'HardMove-v0': + self._env_id = f'hardmove_n{self._cfg.num_actuators}' + path = os.path.join( + self._replay_path, '{}_episode_{}.gif'.format(self._env_id, self._save_replay_count) + ) + self.display_frames_as_gif(self._frames, path) + self._frames = [] + self._save_replay_count += 1 info['action_args_mask'] = np.array([[1, 0], [0, 1], [0, 0]]) return BaseEnvTimestep(obs, rew, done, info)