From 916f4b7bbb690b51010db0bc822b8c0c90a97521 Mon Sep 17 00:00:00 2001 From: Benjamin Piwowarski Date: Tue, 27 Aug 2024 17:33:25 +0200 Subject: [PATCH] updates --- CHANGELOG.md | 11 +++++++++-- README.md | 7 ++++--- src/pystk2_gymnasium/envs.py | 20 +++++++++++++------- src/pystk2_gymnasium/stk_wrappers.py | 11 ++++++++++- 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bba8a25..5eb9d94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,19 @@ +# Version 0.4.5 + +- `center_path_distance` is now relative (to indicate left/right of the path) +- `center_path` gives the vector to the center of the track +- Changed the reward to gives some importance to the final ranking +- Better regression tests + # Version 0.4.2 -- Fix in formula for `distance_center_path` +- Fix in formula for `center_path_distance` # Version 0.4.0 - Multi-agent environment - Use polar representation instead of coordinates (except for the "full" environment) - Only two base environments (multi/mono-agent) and wrappers for the rest: this allows races to be organized with different set of wrappers (depending on the agent) -- Added `distance_center_path` +- Added `center_path_distance` - Allow to change player name and camera mode - breaking: Agent spec is used for mono-kart environments diff --git a/README.md b/README.md index 69075a0..648d659 100644 --- a/README.md +++ b/README.md @@ -142,19 +142,20 @@ up): - `distance_down_track`: The distance from the start - `energy`: remaining collected energy - `front`: front of the kart (3D vector) -- `items_position`: position of the items (3D vectors) - `attachment`: the item attached to the kart (bonus box, banana, nitro/big, nitro/small, bubble gum, easter egg) - `attachment_time_left`: how much time the attachment will be kept +- `items_position`: position of the items (3D vectors) - `items_type`: type of the item - `jumping`: is the kart jumping - `karts_position`: position of other karts, beginning with the ones in front - `max_steer_angle` the max angle of the steering (given the current speed) -- `distance_center_path`: distance to the center of the path -- `paths_distance`: the distance of the paths +- `center_path_distance`: distance to the center of the path +- `center_path`: vector to the center of the path - `paths_start`, `paths_end`, `paths_width`: 3D vectors to the paths start and end, and vector of their widths (scalar). The paths are sorted so that the first element of the array is the current one. +- `paths_distance`: the distance of the paths starts and ends (vector of dimension 2) - `powerup`: collected power-up - `shield_time` - `skeed_factor` diff --git a/src/pystk2_gymnasium/envs.py b/src/pystk2_gymnasium/envs.py index a9993bc..34fce9d 100644 --- a/src/pystk2_gymnasium/envs.py +++ b/src/pystk2_gymnasium/envs.py @@ -52,9 +52,12 @@ def kart_observation_space(use_ai: bool): float("-inf"), float("inf"), dtype=np.float32, shape=(3,) ), "max_steer_angle": spaces.Box(-1, 1, dtype=np.float32, shape=(1,)), - "distance_down_track": spaces.Box(0.0, float("inf")), - "distance_center_path": spaces.Box( - 0, float("inf"), dtype=np.float32, shape=(1,) + "distance_down_track": spaces.Box(-float("inf"), float("inf")), + "center_path_distance": spaces.Box( + float("-inf"), float("inf"), dtype=np.float32, shape=(1,) + ), + "center_path": spaces.Box( + -float("inf"), float("inf"), dtype=np.float32, shape=(3,) ), "front": spaces.Box( -float("inf"), float("inf"), dtype=np.float32, shape=(3,) @@ -323,10 +326,11 @@ def sort_closest(positions, *lists): start, end = kartview(self.track.path_nodes[path_ix][0]), kartview( self.track.path_nodes[path_ix][1] ) + s_e = start - end - distance_center_path = np.linalg.norm( - start - np.dot(s_e, start) * s_e / np.linalg.norm(s_e) ** 2 - ) + x_orth = np.dot(s_e, start) * s_e / np.linalg.norm(s_e) ** 2 - start + + center_path_distance = np.linalg.norm(x_orth) * np.sign(x_orth[0]) # Add action if using AI bot # (this corresponds to the action before the observation) @@ -363,9 +367,11 @@ def sort_closest(positions, *lists): "distance_down_track": np.array( [kart.distance_down_track], dtype=np.float32 ), - "distance_center_path": np.array([distance_center_path], dtype=np.float32), "velocity": kart.velocity_lc, "front": kartview(kart.front), + # path center + "center_path_distance": np.array([center_path_distance], dtype=np.float32), + "center_path": np.array(x_orth), # Items (kart point of view) "items_position": tuple(items_position), "items_type": tuple(items_type), diff --git a/src/pystk2_gymnasium/stk_wrappers.py b/src/pystk2_gymnasium/stk_wrappers.py index c444ea9..68098de 100644 --- a/src/pystk2_gymnasium/stk_wrappers.py +++ b/src/pystk2_gymnasium/stk_wrappers.py @@ -22,8 +22,10 @@ class PolarObservations(gym.ObservationWrapper): output: (angle in the ZX plane, angle in the ZY plane, distance) """ - #: Keys to transform + #: Keys to transform (batch) KEYS = ["items_position", "karts_position", "paths_start", "paths_end"] + #: Keys to transform (single) + SIMPLE_KEYS = ["center_path"] def __init__(self, env: gym.Env, **kwargs): super().__init__(env, **kwargs) @@ -32,6 +34,13 @@ def observation(self, obs): # Shallow copy obs = {**obs} + for key in PolarObservations.SIMPLE_KEYS: + v = obs[key] + distance = np.linalg.norm(v, axis=None) + angle_zx = np.arctan2(v[0], v[2]) + angle_zy = np.arctan2(v[1], v[2]) + v[:] = angle_zx, angle_zy, distance + for key in PolarObservations.KEYS: v = obs[key] distance = np.linalg.norm(v, axis=1)