diff --git a/README.md b/README.md
index 9eae9c4..da2ac71 100644
--- a/README.md
+++ b/README.md
@@ -1,24 +1,41 @@
 # gym-soccer
 
 The [Soccer environment](https://github.com/LARG/HFO) is a multiagent
-domain featuring continuous state and action spaces. Currently,
-several tasks are supported:
+domain featuring continuous state and action spaces. 
 
-## Soccer
+## Changes
+
+Several changes have been made to more closely reflect the setup used by [[Hausknecht & Stone 2016]](https://arxiv.org/abs/1511.04143):
+
+- The number of steps without touching the ball before ending an episode has been reduced to 100.
+- The reward function has abeen updated to reflect the one used in their code (https://github.com/mhauskn/dqn-hfo). Specifically, the negative reward given for the distance between the ball and goal is only activated once the agent is in possession of the ball. A separate environment has been created with this change: `SoccerScoreGoal-v0`. It is the same as `SoccerEmptyGoal-v0` except for the reward function.
+- The state of the environment is returned after each step (useful for counting the number of goals).
+## Tasks
+
+There are several tasks supported at the moment:
+
+### Soccer
 
 The soccer task initializes a single offensive agent on the field and rewards +1 for scoring a goal and 0 otherwise. In order to score a goal, the agent will need to know how to approach the ball and kick towards the goal. The sparse nature of the goal reward makes this task very difficult to accomplish.
 
-## SoccerEmptyGoal
+### SoccerEmptyGoal
 
 The SoccerEmptyGoal task features a more informative reward signal than the Soccer task. As before, the objective is to score a goal. However, SoccerEmtpyGoal rewards the agent for approaching the ball and moving the ball towards the goal. These frequent rewards make the task much more accessible.
 
-## SoccerAgainstKeeper
+### SoccerAgainstKeeper
 
 The objective of the SoccerAgainstKeeper task is to score against a goal keeper. The agent is rewarded for moving the ball towards the goal and for scoring a goal. The goal keeper uses a hand-coded policy developed by the Helios RoboCup team. The difficulty in this task is learning how to shoot around the goal keeper.
 
+
 # Installation
 
 ```bash
 cd gym-soccer
 pip install -e .
 ```
+
+or
+
+```bash
+pip install -e git+https://github.com/cycraig/gym-soccer#egg=gym_soccer
+```
\ No newline at end of file
diff --git a/gym_soccer/__init__.py b/gym_soccer/__init__.py
index a682811..5f2ccbf 100644
--- a/gym_soccer/__init__.py
+++ b/gym_soccer/__init__.py
@@ -19,9 +19,17 @@
     nondeterministic = True,
 )
 
+register(
+    id='SoccerScoreGoal-v0',
+    entry_point='gym_soccer.envs:SoccerScoreGoalEnv',
+    timestep_limit=1000,
+    reward_threshold=10.0,
+    nondeterministic = True,
+)
+
 register(
     id='SoccerAgainstKeeper-v0',
-    entry_point='gym.envs:SoccerAgainstKeeperEnv',
+    entry_point='gym_soccer.envs:SoccerAgainstKeeperEnv',
     timestep_limit=1000,
     reward_threshold=8.0,
     nondeterministic = True,
diff --git a/gym_soccer/envs/__init__.py b/gym_soccer/envs/__init__.py
index 9bd1a76..61907a4 100644
--- a/gym_soccer/envs/__init__.py
+++ b/gym_soccer/envs/__init__.py
@@ -1,3 +1,4 @@
 from gym_soccer.envs.soccer_env import SoccerEnv
 from gym_soccer.envs.soccer_empty_goal import SoccerEmptyGoalEnv
 from gym_soccer.envs.soccer_against_keeper import SoccerAgainstKeeperEnv
+from gym_soccer.envs.soccer_score_goal import SoccerScoreGoalEnv
\ No newline at end of file
diff --git a/gym_soccer/envs/soccer_against_keeper.py b/gym_soccer/envs/soccer_against_keeper.py
index 3912b01..6d80cf1 100644
--- a/gym_soccer/envs/soccer_against_keeper.py
+++ b/gym_soccer/envs/soccer_against_keeper.py
@@ -1,9 +1,10 @@
 import logging
 from gym_soccer.envs.soccer_empty_goal import SoccerEmptyGoalEnv
+from gym_soccer.envs.soccer_score_goal import SoccerScoreGoalEnv
 
 logger = logging.getLogger(__name__)
 
-class SoccerAgainstKeeperEnv(SoccerEmptyGoalEnv):
+class SoccerAgainstKeeperEnv(SoccerScoreGoalEnv):
     """
     SoccerAgainstKeeper initializes the agent most of the way down the
     field with the ball and tasks it with scoring on a keeper.
diff --git a/gym_soccer/envs/soccer_env.py b/gym_soccer/envs/soccer_env.py
index 9c58c24..dc84a47 100644
--- a/gym_soccer/envs/soccer_env.py
+++ b/gym_soccer/envs/soccer_env.py
@@ -1,17 +1,35 @@
 import os, subprocess, time, signal
+import numpy as np
 import gym
 from gym import error, spaces
 from gym import utils
 from gym.utils import seeding
 
+import socket
+from contextlib import closing
+
 try:
     import hfo_py
 except ImportError as e:
-    raise error.DependencyNotInstalled("{}. (HINT: you can install HFO dependencies with 'pip install gym[soccer].)'".format(e))
+    raise error.DependencyNotInstalled("{}. (HINT: you can install HFO dependencies with 'pip install gym[soccer].')".format(e))
 
 import logging
 logger = logging.getLogger(__name__)
 
+def find_free_port():
+    """Find a random free port. Does not guarantee that the port will still be free after return.
+    Note: HFO takes three consecutive port numbers, this only checks one.
+
+    Source: https://github.com/crowdAI/marLo/blob/master/marlo/utils.py
+
+    :rtype:  `int`
+    """
+
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.bind(('', 0))
+        return s.getsockname()[1]
+
+
 class SoccerEnv(gym.Env, utils.EzPickle):
     metadata = {'render.modes': ['human']}
 
@@ -20,19 +38,29 @@ def __init__(self):
         self.server_process = None
         self.server_port = None
         self.hfo_path = hfo_py.get_hfo_path()
+        print(self.hfo_path)
         self._configure_environment()
         self.env = hfo_py.HFOEnvironment()
-        self.env.connectToServer(config_dir=hfo_py.get_config_path())
+        self.env.connectToServer(config_dir=hfo_py.get_config_path(), server_port=self.server_port)
+        print("Shape =",self.env.getStateSize())
         self.observation_space = spaces.Box(low=-1, high=1,
-                                            shape=(self.env.getStateSize()))
+                                            shape=((self.env.getStateSize(),)), dtype=np.float32)
         # Action space omits the Tackle/Catch actions, which are useful on defense
+        low0 = np.array([0, -180], dtype=np.float32) 
+        high0 = np.array([100, 180], dtype=np.float32)
+        low1 = np.array([-180], dtype=np.float32)
+        high1 = np.array([180], dtype=np.float32)
+        low2 = np.array([0, -180], dtype=np.float32)
+        high2 = np.array([100, 180], dtype=np.float32)
+        low3 = np.array([-180], dtype=np.float32)
+        high3 = np.array([180], dtype=np.float32)
         self.action_space = spaces.Tuple((spaces.Discrete(3),
-                                          spaces.Box(low=0, high=100, shape=1),
-                                          spaces.Box(low=-180, high=180, shape=1),
-                                          spaces.Box(low=-180, high=180, shape=1),
-                                          spaces.Box(low=0, high=100, shape=1),
-                                          spaces.Box(low=-180, high=180, shape=1)))
+                                          spaces.Box(low=low0, high=high0, dtype=np.float32),
+                                          spaces.Box(low=low1, high=high1, dtype=np.float32),
+                                          spaces.Box(low=low2, high=high2, dtype=np.float32)))
+
         self.status = hfo_py.IN_GAME
+        self._seed = -1
 
     def __del__(self):
         self.env.act(hfo_py.QUIT)
@@ -50,9 +78,11 @@ def _configure_environment(self):
         self._start_hfo_server()
 
     def _start_hfo_server(self, frames_per_trial=500,
-                          untouched_time=100, offense_agents=1,
+                          #untouched_time=1000, 
+                          untouched_time=100, 
+                          offense_agents=1,
                           defense_agents=0, offense_npcs=0,
-                          defense_npcs=0, sync_mode=True, port=6000,
+                          defense_npcs=0, sync_mode=True, port=None,
                           offense_on_ball=0, fullstate=True, seed=-1,
                           ball_x_min=0.0, ball_x_max=0.2,
                           verbose=False, log_game=False,
@@ -75,13 +105,26 @@ def _start_hfo_server(self, frames_per_trial=500,
         log_game: Enable game logging. Logs can be used for replay + visualization.
         log_dir: Directory to place game logs (*.rcg).
         """
+        if port is None:
+            port = find_free_port()
         self.server_port = port
-        cmd = self.hfo_path + \
+        '''cmd = self.hfo_path + \
               " --headless --frames-per-trial %i --untouched-time %i --offense-agents %i"\
+	      " --defense-agents %i --offense-npcs %i --defense-npcs %i"\
+	      " --port %i --offense-on-ball %i --seed %i --ball-x-min %f"\
+	      " --ball-x-max %f --log-dir %s"\
+	      % (frames_per_trial, untouched_time, 
+		 offense_agents,
+		 defense_agents, offense_npcs, defense_npcs, port,
+		 offense_on_ball, seed, ball_x_min, ball_x_max,
+		 log_dir)'''
+        cmd = self.hfo_path + \
+              " --headless --frames-per-trial %i --offense-agents %i"\
               " --defense-agents %i --offense-npcs %i --defense-npcs %i"\
               " --port %i --offense-on-ball %i --seed %i --ball-x-min %f"\
               " --ball-x-max %f --log-dir %s"\
-              % (frames_per_trial, untouched_time, offense_agents,
+              % (frames_per_trial,
+                 offense_agents,
                  defense_agents, offense_npcs, defense_npcs, port,
                  offense_on_ball, seed, ball_x_min, ball_x_max,
                  log_dir)
@@ -109,7 +152,7 @@ def _step(self, action):
         reward = self._get_reward()
         ob = self.env.getState()
         episode_over = self.status != hfo_py.IN_GAME
-        return ob, reward, episode_over, {}
+        return ob, reward, episode_over, {'status': STATUS_LOOKUP[self.status]}
 
     def _take_action(self, action):
         """ Converts the action space into an HFO action. """
@@ -139,6 +182,9 @@ def _reset(self):
         while self.status != hfo_py.IN_GAME:
             self.env.act(hfo_py.NOOP)
             self.status = self.env.step()
+            # prevent infinite output when server dies
+            if self.status == hfo_py.SERVER_DOWN:
+                raise ServerDownException("HFO server down!")
         return self.env.getState()
 
     def _render(self, mode='human', close=False):
@@ -149,6 +195,21 @@ def _render(self, mode='human', close=False):
         else:
             if self.viewer is None:
                 self._start_viewer()
+ 
+    def close(self):
+        if self.server_process is not None:
+            try:
+                os.kill(self.server_process.pid, signal.SIGKILL)
+            except Exception:
+                pass
+
+
+class ServerDownException(Exception):
+    """
+    Custom error so agents can catch it and exit cleanly if the server dies unexpectedly.
+    """
+    pass
+  
 
 ACTION_LOOKUP = {
     0 : hfo_py.DASH,
@@ -157,3 +218,12 @@ def _render(self, mode='human', close=False):
     3 : hfo_py.TACKLE, # Used on defense to slide tackle the ball
     4 : hfo_py.CATCH,  # Used only by goalie to catch the ball
 }
+
+STATUS_LOOKUP = {
+    hfo_py.IN_GAME: 'IN_GAME',
+    hfo_py.SERVER_DOWN: 'SERVER_DOWN',
+    hfo_py.GOAL: 'GOAL',
+    hfo_py.OUT_OF_BOUNDS: 'OUT_OF_BOUNDS',
+    hfo_py.OUT_OF_TIME: 'OUT_OF_TIME',
+    hfo_py.CAPTURED_BY_DEFENSE: 'CAPTURED_BY_DEFENSE',
+}
diff --git a/gym_soccer/envs/soccer_score_goal.py b/gym_soccer/envs/soccer_score_goal.py
new file mode 100644
index 0000000..0d3c841
--- /dev/null
+++ b/gym_soccer/envs/soccer_score_goal.py
@@ -0,0 +1,147 @@
+import logging
+import math
+import numpy as np
+from gym import spaces
+from gym_soccer.envs.soccer_env import SoccerEnv, ACTION_LOOKUP
+from gym_soccer.envs.soccer_empty_goal import SoccerEmptyGoalEnv
+
+try:
+    import hfo_py
+except ImportError as e:
+    raise error.DependencyNotInstalled("{}. (HINT: you can install HFO dependencies with 'pip install gym[soccer].)'".format(e))
+
+logger = logging.getLogger(__name__)
+
+class SoccerScoreGoalEnv(SoccerEmptyGoalEnv):
+    """
+    SoccerScoreGoal is the same task as SoccerEmptyGoal, which tasks the 
+    agent with approaching the ball, dribbling, and scoring a goal. Rewards 
+    are given as the agent nears the ball, kicks the ball towards the goal, 
+    and scores a goal.
+
+    The difference is that the reward structure is altered to be consistent
+    with the Hausknecht paper: "Deep Reinforcement Learning with Parameterised
+    Action Spaces".
+
+    """
+    def __init__(self):
+        super(SoccerScoreGoalEnv, self).__init__()
+        # dash, turn, kick, tackle
+        low0 = np.array([0, -180], dtype=np.float32)  # meant to be 0, not -100! (according to original soccer env and dqn-hfo inverting gradients)
+        high0 = np.array([100, 180], dtype=np.float32)
+        low1 = np.array([-180], dtype=np.float32)
+        high1 = np.array([180], dtype=np.float32)
+        low2 = np.array([0, -180], dtype=np.float32)
+        high2 = np.array([100, 180], dtype=np.float32)
+        low3 = np.array([-180], dtype=np.float32)
+        high3 = np.array([180], dtype=np.float32)
+        self.action_space = spaces.Tuple((spaces.Discrete(3),
+                                          spaces.Box(low=low0, high=high0, dtype=np.float32),
+                                          spaces.Box(low=low1, high=high1, dtype=np.float32),
+                                          spaces.Box(low=low2, high=high2, dtype=np.float32)))#,
+                                          #spaces.Box(low=low3, high=high3)))
+                                          
+        self.unum = self.env.getUnum()  # uniform number (identifier) of our lone agent
+        print("UNUM =",self.unum)
+        
+    '''def _take_action(self, action):
+        """ Converts the action space into an HFO action. """
+        action_type = ACTION_LOOKUP[action[0]]
+        if action_type == hfo_py.DASH:
+            self.env.act(action_type, action[1], action[2])
+        elif action_type == hfo_py.TURN:
+            self.env.act(action_type, action[3])
+        elif action_type == hfo_py.KICK:
+            self.env.act(action_type, action[4], action[5])
+        elif action_type == hfo_py.TACKLE:
+            self.env.act(action_type, action[6])
+        else:
+            print('Unrecognized action %d' % action_type)
+            self.env.act(hfo_py.NOOP)'''
+            
+    def _get_reward(self):
+        """
+        Agent is rewarded for minimizing the distance between itself and
+        the ball, minimizing the distance between the ball and the goal,
+        and scoring a goal.
+        """
+        current_state = self.env.getState()
+        #print("State =",current_state)
+        #print("len State =",len(current_state))
+        ball_proximity = current_state[53]
+        goal_proximity = current_state[15]
+        ball_dist = 1.0 - ball_proximity
+        goal_dist = 1.0 - goal_proximity
+        kickable = current_state[12]
+        ball_ang_sin_rad = current_state[51]
+        ball_ang_cos_rad = current_state[52]
+        ball_ang_rad = math.acos(ball_ang_cos_rad)
+        if ball_ang_sin_rad < 0:
+            ball_ang_rad *= -1.
+        goal_ang_sin_rad = current_state[13]
+        goal_ang_cos_rad = current_state[14]
+        goal_ang_rad = math.acos(goal_ang_cos_rad)
+        if goal_ang_sin_rad < 0:
+            goal_ang_rad *= -1.
+        alpha = max(ball_ang_rad, goal_ang_rad) - min(ball_ang_rad, goal_ang_rad)
+        ball_dist_goal = math.sqrt(ball_dist*ball_dist + goal_dist*goal_dist -
+                                   2.*ball_dist*goal_dist*math.cos(alpha))
+        # Compute the difference in ball proximity from the last step
+        if not self.first_step:
+            ball_prox_delta = ball_proximity - self.old_ball_prox
+            kickable_delta = kickable - self.old_kickable
+            ball_dist_goal_delta = ball_dist_goal - self.old_ball_dist_goal
+        self.old_ball_prox = ball_proximity
+        self.old_kickable = kickable
+        self.old_ball_dist_goal = ball_dist_goal
+        #print(self.env.playerOnBall())
+        #print(self.env.playerOnBall().unum)
+        #print(self.env.getUnum())
+        reward = 0
+        if not self.first_step:
+            '''# Reward the agent for moving towards the ball
+            reward += ball_prox_delta
+            if kickable_delta > 0 and not self.got_kickable_reward:
+                reward += 1.
+                self.got_kickable_reward = True
+            # Reward the agent for kicking towards the goal
+            reward += 0.6 * -ball_dist_goal_delta
+            # Reward the agent for scoring
+            if self.status == hfo_py.GOAL:
+                reward += 5.0'''
+            '''reward = self.__move_to_ball_reward(kickable_delta, ball_prox_delta) + \
+                    3. * self.__kick_to_goal_reward(ball_dist_goal_delta) + \
+                    self.__EOT_reward();'''
+            mtb = self.__move_to_ball_reward(kickable_delta, ball_prox_delta)
+            ktg = 3. * self.__kick_to_goal_reward(ball_dist_goal_delta)
+            eot = self.__EOT_reward()
+            reward = mtb + ktg + eot
+            #print("mtb: %.06f ktg: %.06f eot: %.06f"%(mtb,ktg,eot))
+            
+        self.first_step = False
+        #print("r =",reward)
+        return reward
+        
+    def __move_to_ball_reward(self, kickable_delta, ball_prox_delta):
+        reward = 0.
+        if self.env.playerOnBall().unum < 0 or self.env.playerOnBall().unum == self.unum:
+            reward += ball_prox_delta;
+        if kickable_delta >= 1 and not self.got_kickable_reward:
+            reward += 1.
+            self.got_kickable_reward = True
+        return reward;
+        
+    def __kick_to_goal_reward(self, ball_dist_goal_delta):
+        if(self.env.playerOnBall().unum == self.unum):
+            return -ball_dist_goal_delta
+        elif self.got_kickable_reward == True:
+            return 0.2 * -ball_dist_goal_delta
+        return 0.
+        
+    def __EOT_reward(self):
+        if self.status == hfo_py.GOAL:
+            return 5.
+        #elif self.status == hfo_py.CAPTURED_BY_DEFENSE:
+        #    return -1.
+        return 0.
+