Added SoccerScoreGoal-v0 with updated reward function.

openai · Mar 23, 2019 · a65f514 · a65f514
1 parent b9bf721
commit a65f514
Show file tree

Hide file tree

Showing 6 changed files with 264 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -1,24 +1,41 @@
 # gym-soccer
 
 The [Soccer environment](https://github.com/LARG/HFO) is a multiagent
-domain featuring continuous state and action spaces. Currently,
-several tasks are supported:
+domain featuring continuous state and action spaces. 
 
-## Soccer
+## Changes
+
+Several changes have been made to more closely reflect the setup used by [[Hausknecht & Stone 2016]](https://arxiv.org/abs/1511.04143):
+
+- The number of steps without touching the ball before ending an episode has been reduced to 100.
+- The reward function has abeen updated to reflect the one used in their code (https://github.com/mhauskn/dqn-hfo). Specifically, the negative reward given for the distance between the ball and goal is only activated once the agent is in possession of the ball. A separate environment has been created with this change: `SoccerScoreGoal-v0`. It is the same as `SoccerEmptyGoal-v0` except for the reward function.
+- The state of the environment is returned after each step (useful for counting the number of goals).
+## Tasks
+
+There are several tasks supported at the moment:
+
+### Soccer
 
 The soccer task initializes a single offensive agent on the field and rewards +1 for scoring a goal and 0 otherwise. In order to score a goal, the agent will need to know how to approach the ball and kick towards the goal. The sparse nature of the goal reward makes this task very difficult to accomplish.
 
-## SoccerEmptyGoal
+### SoccerEmptyGoal
 
 The SoccerEmptyGoal task features a more informative reward signal than the Soccer task. As before, the objective is to score a goal. However, SoccerEmtpyGoal rewards the agent for approaching the ball and moving the ball towards the goal. These frequent rewards make the task much more accessible.
 
-## SoccerAgainstKeeper
+### SoccerAgainstKeeper
 
 The objective of the SoccerAgainstKeeper task is to score against a goal keeper. The agent is rewarded for moving the ball towards the goal and for scoring a goal. The goal keeper uses a hand-coded policy developed by the Helios RoboCup team. The difficulty in this task is learning how to shoot around the goal keeper.
 
+
 # Installation
 
 ```bash
 cd gym-soccer
 pip install -e .
 ```
+
+or
+
+```bash
+pip install -e git+https://github.com/cycraig/gym-soccer#egg=gym_soccer
+```
diff --git a/gym_soccer/__init__.py b/gym_soccer/__init__.py
@@ -19,9 +19,17 @@
     nondeterministic = True,
 )
 
+register(
+    id='SoccerScoreGoal-v0',
+    entry_point='gym_soccer.envs:SoccerScoreGoalEnv',
+    timestep_limit=1000,
+    reward_threshold=10.0,
+    nondeterministic = True,
+)
+
 register(
     id='SoccerAgainstKeeper-v0',
-    entry_point='gym.envs:SoccerAgainstKeeperEnv',
+    entry_point='gym_soccer.envs:SoccerAgainstKeeperEnv',
     timestep_limit=1000,
     reward_threshold=8.0,
     nondeterministic = True,

diff --git a/gym_soccer/envs/__init__.py b/gym_soccer/envs/__init__.py
@@ -1,3 +1,4 @@
 from gym_soccer.envs.soccer_env import SoccerEnv
 from gym_soccer.envs.soccer_empty_goal import SoccerEmptyGoalEnv
 from gym_soccer.envs.soccer_against_keeper import SoccerAgainstKeeperEnv
+from gym_soccer.envs.soccer_score_goal import SoccerScoreGoalEnv
diff --git a/gym_soccer/envs/soccer_against_keeper.py b/gym_soccer/envs/soccer_against_keeper.py
@@ -1,9 +1,10 @@
 import logging
 from gym_soccer.envs.soccer_empty_goal import SoccerEmptyGoalEnv
+from gym_soccer.envs.soccer_score_goal import SoccerScoreGoalEnv
 
 logger = logging.getLogger(__name__)
 
-class SoccerAgainstKeeperEnv(SoccerEmptyGoalEnv):
+class SoccerAgainstKeeperEnv(SoccerScoreGoalEnv):
     """
     SoccerAgainstKeeper initializes the agent most of the way down the
     field with the ball and tasks it with scoring on a keeper.

diff --git a/gym_soccer/envs/soccer_env.py b/gym_soccer/envs/soccer_env.py
@@ -1,17 +1,35 @@
 import os, subprocess, time, signal
+import numpy as np
 import gym
 from gym import error, spaces
 from gym import utils
 from gym.utils import seeding
 
+import socket
+from contextlib import closing
+
 try:
     import hfo_py
 except ImportError as e:
-    raise error.DependencyNotInstalled("{}. (HINT: you can install HFO dependencies with 'pip install gym[soccer].)'".format(e))
+    raise error.DependencyNotInstalled("{}. (HINT: you can install HFO dependencies with 'pip install gym[soccer].')".format(e))
 
 import logging
 logger = logging.getLogger(__name__)
 
+def find_free_port():
+    """Find a random free port. Does not guarantee that the port will still be free after return.
+    Note: HFO takes three consecutive port numbers, this only checks one.
+
+    Source: https://github.com/crowdAI/marLo/blob/master/marlo/utils.py
+
+    :rtype:  `int`
+    """
+
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.bind(('', 0))
+        return s.getsockname()[1]
+
+
 class SoccerEnv(gym.Env, utils.EzPickle):
     metadata = {'render.modes': ['human']}
 
@@ -20,19 +38,29 @@ def __init__(self):
         self.server_process = None
         self.server_port = None
         self.hfo_path = hfo_py.get_hfo_path()
+        print(self.hfo_path)
         self._configure_environment()
         self.env = hfo_py.HFOEnvironment()
-        self.env.connectToServer(config_dir=hfo_py.get_config_path())
+        self.env.connectToServer(config_dir=hfo_py.get_config_path(), server_port=self.server_port)
+        print("Shape =",self.env.getStateSize())
         self.observation_space = spaces.Box(low=-1, high=1,
-                                            shape=(self.env.getStateSize()))
+                                            shape=((self.env.getStateSize(),)), dtype=np.float32)
         # Action space omits the Tackle/Catch actions, which are useful on defense
+        low0 = np.array([0, -180], dtype=np.float32) 
+        high0 = np.array([100, 180], dtype=np.float32)
+        low1 = np.array([-180], dtype=np.float32)
+        high1 = np.array([180], dtype=np.float32)
+        low2 = np.array([0, -180], dtype=np.float32)
+        high2 = np.array([100, 180], dtype=np.float32)
+        low3 = np.array([-180], dtype=np.float32)
+        high3 = np.array([180], dtype=np.float32)
         self.action_space = spaces.Tuple((spaces.Discrete(3),
-                                          spaces.Box(low=0, high=100, shape=1),
-                                          spaces.Box(low=-180, high=180, shape=1),
-                                          spaces.Box(low=-180, high=180, shape=1),
-                                          spaces.Box(low=0, high=100, shape=1),
-                                          spaces.Box(low=-180, high=180, shape=1)))
+                                          spaces.Box(low=low0, high=high0, dtype=np.float32),
+                                          spaces.Box(low=low1, high=high1, dtype=np.float32),
+                                          spaces.Box(low=low2, high=high2, dtype=np.float32)))
+
         self.status = hfo_py.IN_GAME
+        self._seed = -1
 
     def __del__(self):
         self.env.act(hfo_py.QUIT)
@@ -50,9 +78,11 @@ def _configure_environment(self):
         self._start_hfo_server()
 
     def _start_hfo_server(self, frames_per_trial=500,
-                          untouched_time=100, offense_agents=1,
+                          #untouched_time=1000, 
+                          untouched_time=100, 
+                          offense_agents=1,
                           defense_agents=0, offense_npcs=0,
-                          defense_npcs=0, sync_mode=True, port=6000,
+                          defense_npcs=0, sync_mode=True, port=None,
                           offense_on_ball=0, fullstate=True, seed=-1,
                           ball_x_min=0.0, ball_x_max=0.2,
                           verbose=False, log_game=False,
@@ -75,13 +105,26 @@ def _start_hfo_server(self, frames_per_trial=500,
         log_game: Enable game logging. Logs can be used for replay + visualization.
         log_dir: Directory to place game logs (*.rcg).
         """
+        if port is None:
+            port = find_free_port()
         self.server_port = port
-        cmd = self.hfo_path + \
+        '''cmd = self.hfo_path + \
               " --headless --frames-per-trial %i --untouched-time %i --offense-agents %i"\
+	      " --defense-agents %i --offense-npcs %i --defense-npcs %i"\
+	      " --port %i --offense-on-ball %i --seed %i --ball-x-min %f"\
+	      " --ball-x-max %f --log-dir %s"\
+	      % (frames_per_trial, untouched_time, 
+		 offense_agents,
+		 defense_agents, offense_npcs, defense_npcs, port,
+		 offense_on_ball, seed, ball_x_min, ball_x_max,
+		 log_dir)'''
+        cmd = self.hfo_path + \
+              " --headless --frames-per-trial %i --offense-agents %i"\
               " --defense-agents %i --offense-npcs %i --defense-npcs %i"\
               " --port %i --offense-on-ball %i --seed %i --ball-x-min %f"\
               " --ball-x-max %f --log-dir %s"\
-              % (frames_per_trial, untouched_time, offense_agents,
+              % (frames_per_trial,
+                 offense_agents,
                  defense_agents, offense_npcs, defense_npcs, port,
                  offense_on_ball, seed, ball_x_min, ball_x_max,
                  log_dir)
@@ -109,7 +152,7 @@ def _step(self, action):
         reward = self._get_reward()
         ob = self.env.getState()
         episode_over = self.status != hfo_py.IN_GAME
-        return ob, reward, episode_over, {}
+        return ob, reward, episode_over, {'status': STATUS_LOOKUP[self.status]}
 
     def _take_action(self, action):
         """ Converts the action space into an HFO action. """
@@ -139,6 +182,9 @@ def _reset(self):
         while self.status != hfo_py.IN_GAME:
             self.env.act(hfo_py.NOOP)
             self.status = self.env.step()
+            # prevent infinite output when server dies
+            if self.status == hfo_py.SERVER_DOWN:
+                raise ServerDownException("HFO server down!")
         return self.env.getState()
 
     def _render(self, mode='human', close=False):
@@ -149,6 +195,21 @@ def _render(self, mode='human', close=False):
         else:
             if self.viewer is None:
                 self._start_viewer()
+
+    def close(self):
+        if self.server_process is not None:
+            try:
+                os.kill(self.server_process.pid, signal.SIGKILL)
+            except Exception:
+                pass
+
+
+class ServerDownException(Exception):
+    """
+    Custom error so agents can catch it and exit cleanly if the server dies unexpectedly.
+    """
+    pass
+
 
 ACTION_LOOKUP = {
     0 : hfo_py.DASH,
@@ -157,3 +218,12 @@ def _render(self, mode='human', close=False):
     3 : hfo_py.TACKLE, # Used on defense to slide tackle the ball
     4 : hfo_py.CATCH,  # Used only by goalie to catch the ball
 }
+
+STATUS_LOOKUP = {
+    hfo_py.IN_GAME: 'IN_GAME',
+    hfo_py.SERVER_DOWN: 'SERVER_DOWN',
+    hfo_py.GOAL: 'GOAL',
+    hfo_py.OUT_OF_BOUNDS: 'OUT_OF_BOUNDS',
+    hfo_py.OUT_OF_TIME: 'OUT_OF_TIME',
+    hfo_py.CAPTURED_BY_DEFENSE: 'CAPTURED_BY_DEFENSE',
+}