From 916f4b7bbb690b51010db0bc822b8c0c90a97521 Mon Sep 17 00:00:00 2001
From: Benjamin Piwowarski <benjamin@piwowarski.fr>
Date: Tue, 27 Aug 2024 17:33:25 +0200
Subject: [PATCH] updates

---
 CHANGELOG.md                         | 11 +++++++++--
 README.md                            |  7 ++++---
 src/pystk2_gymnasium/envs.py         | 20 +++++++++++++-------
 src/pystk2_gymnasium/stk_wrappers.py | 11 ++++++++++-
 4 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bba8a25..5eb9d94 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,12 +1,19 @@
+# Version 0.4.5
+
+- `center_path_distance` is now relative (to indicate left/right of the path)
+- `center_path` gives the vector to the center of the track
+- Changed the reward to gives some importance to the final ranking
+- Better regression tests
+
 # Version 0.4.2
 
-- Fix in formula for `distance_center_path`
+- Fix in formula for `center_path_distance`
 
 # Version 0.4.0
 
 - Multi-agent environment
 - Use polar representation instead of coordinates (except for the "full" environment)
 - Only two base environments (multi/mono-agent) and wrappers for the rest: this allows races to be organized with different set of wrappers (depending on the agent)
-- Added `distance_center_path`
+- Added `center_path_distance`
 - Allow to change player name and camera mode
 - breaking: Agent spec is used for mono-kart environments
diff --git a/README.md b/README.md
index 69075a0..648d659 100644
--- a/README.md
+++ b/README.md
@@ -142,19 +142,20 @@ up):
 - `distance_down_track`: The distance from the start
 - `energy`: remaining collected energy
 - `front`: front of the kart (3D vector)
-- `items_position`: position of the items (3D vectors)
 - `attachment`: the item attached to the kart (bonus box, banana, nitro/big,
   nitro/small, bubble gum, easter egg)
 - `attachment_time_left`: how much time the attachment will be kept
+- `items_position`: position of the items (3D vectors)
 - `items_type`: type of the item
 - `jumping`: is the kart jumping
 - `karts_position`: position of other karts, beginning with the ones in front
 - `max_steer_angle` the max angle of the steering (given the current speed)
-- `distance_center_path`: distance to the center of the path
-- `paths_distance`: the distance of the paths
+- `center_path_distance`: distance to the center of the path
+- `center_path`: vector to the center of the path
 - `paths_start`, `paths_end`, `paths_width`: 3D vectors to the paths start and
   end, and vector of their widths (scalar). The paths are sorted so that the
   first element of the array is the current one.
+- `paths_distance`: the distance of the paths starts and ends (vector of dimension 2)
 - `powerup`: collected power-up
 - `shield_time`
 - `skeed_factor`
diff --git a/src/pystk2_gymnasium/envs.py b/src/pystk2_gymnasium/envs.py
index a9993bc..34fce9d 100644
--- a/src/pystk2_gymnasium/envs.py
+++ b/src/pystk2_gymnasium/envs.py
@@ -52,9 +52,12 @@ def kart_observation_space(use_ai: bool):
                 float("-inf"), float("inf"), dtype=np.float32, shape=(3,)
             ),
             "max_steer_angle": spaces.Box(-1, 1, dtype=np.float32, shape=(1,)),
-            "distance_down_track": spaces.Box(0.0, float("inf")),
-            "distance_center_path": spaces.Box(
-                0, float("inf"), dtype=np.float32, shape=(1,)
+            "distance_down_track": spaces.Box(-float("inf"), float("inf")),
+            "center_path_distance": spaces.Box(
+                float("-inf"), float("inf"), dtype=np.float32, shape=(1,)
+            ),
+            "center_path": spaces.Box(
+                -float("inf"), float("inf"), dtype=np.float32, shape=(3,)
             ),
             "front": spaces.Box(
                 -float("inf"), float("inf"), dtype=np.float32, shape=(3,)
@@ -323,10 +326,11 @@ def sort_closest(positions, *lists):
         start, end = kartview(self.track.path_nodes[path_ix][0]), kartview(
             self.track.path_nodes[path_ix][1]
         )
+
         s_e = start - end
-        distance_center_path = np.linalg.norm(
-            start - np.dot(s_e, start) * s_e / np.linalg.norm(s_e) ** 2
-        )
+        x_orth = np.dot(s_e, start) * s_e / np.linalg.norm(s_e) ** 2 - start
+
+        center_path_distance = np.linalg.norm(x_orth) * np.sign(x_orth[0])
 
         # Add action if using AI bot
         # (this corresponds to the action before the observation)
@@ -363,9 +367,11 @@ def sort_closest(positions, *lists):
             "distance_down_track": np.array(
                 [kart.distance_down_track], dtype=np.float32
             ),
-            "distance_center_path": np.array([distance_center_path], dtype=np.float32),
             "velocity": kart.velocity_lc,
             "front": kartview(kart.front),
+            # path center
+            "center_path_distance": np.array([center_path_distance], dtype=np.float32),
+            "center_path": np.array(x_orth),
             # Items (kart point of view)
             "items_position": tuple(items_position),
             "items_type": tuple(items_type),
diff --git a/src/pystk2_gymnasium/stk_wrappers.py b/src/pystk2_gymnasium/stk_wrappers.py
index c444ea9..68098de 100644
--- a/src/pystk2_gymnasium/stk_wrappers.py
+++ b/src/pystk2_gymnasium/stk_wrappers.py
@@ -22,8 +22,10 @@ class PolarObservations(gym.ObservationWrapper):
     output: (angle in the ZX plane, angle in the ZY plane, distance)
     """
 
-    #: Keys to transform
+    #: Keys to transform (batch)
     KEYS = ["items_position", "karts_position", "paths_start", "paths_end"]
+    #: Keys to transform (single)
+    SIMPLE_KEYS = ["center_path"]
 
     def __init__(self, env: gym.Env, **kwargs):
         super().__init__(env, **kwargs)
@@ -32,6 +34,13 @@ def observation(self, obs):
         # Shallow copy
         obs = {**obs}
 
+        for key in PolarObservations.SIMPLE_KEYS:
+            v = obs[key]
+            distance = np.linalg.norm(v, axis=None)
+            angle_zx = np.arctan2(v[0], v[2])
+            angle_zy = np.arctan2(v[1], v[2])
+            v[:] = angle_zx, angle_zy, distance
+
         for key in PolarObservations.KEYS:
             v = obs[key]
             distance = np.linalg.norm(v, axis=1)