From d1f962ab7e7fb624b5e68b63a44c28e259f0e14c Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Sat, 2 May 2020 19:19:36 -0700
Subject: [PATCH 1/3] Add the appropriate reward to the grid benchmark back

---
 flow/benchmarks/grid0.py        |  4 ++--
 flow/benchmarks/grid1.py        |  4 ++--
 flow/envs/__init__.py           |  3 ++-
 flow/envs/traffic_light_grid.py | 11 +++++++++++
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/flow/benchmarks/grid0.py b/flow/benchmarks/grid0.py
index 1655c3b3c..5c4ee5349 100644
--- a/flow/benchmarks/grid0.py
+++ b/flow/benchmarks/grid0.py
@@ -4,7 +4,7 @@
 - **Observation Dimension**: (339, )
 - **Horizon**: 400 steps
 """
-from flow.envs import TrafficLightGridPOEnv
+from flow.envs import TrafficLightGridBenchmarkEnv
 from flow.networks import TrafficLightGridNetwork
 from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams, \
     InFlows, SumoCarFollowingParams
@@ -68,7 +68,7 @@
     exp_tag="grid_0",
 
     # name of the flow environment the experiment is running on
-    env_name=TrafficLightGridPOEnv,
+    env_name=TrafficLightGridBenchmarkEnv,
 
     # name of the network class the experiment is running on
     network=TrafficLightGridNetwork,
diff --git a/flow/benchmarks/grid1.py b/flow/benchmarks/grid1.py
index ec2a27454..83055adfd 100644
--- a/flow/benchmarks/grid1.py
+++ b/flow/benchmarks/grid1.py
@@ -4,7 +4,7 @@
 - **Observation Dimension**: (915, )
 - **Horizon**: 400 steps
 """
-from flow.envs import TrafficLightGridPOEnv
+from flow.envs import TrafficLightGridBenchmarkEnv
 from flow.networks import TrafficLightGridNetwork
 from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams, \
     InFlows, SumoCarFollowingParams
@@ -68,7 +68,7 @@
     exp_tag="grid_1",
 
     # name of the flow environment the experiment is running on
-    env_name=TrafficLightGridPOEnv,
+    env_name=TrafficLightGridBenchmarkEnv,
 
     # name of the network class the experiment is running on
     network=TrafficLightGridNetwork,
diff --git a/flow/envs/__init__.py b/flow/envs/__init__.py
index 5befe6a33..611ed3d9a 100755
--- a/flow/envs/__init__.py
+++ b/flow/envs/__init__.py
@@ -4,7 +4,7 @@
 from flow.envs.bottleneck import BottleneckAccelEnv, BottleneckEnv, \
     BottleneckDesiredVelocityEnv
 from flow.envs.traffic_light_grid import TrafficLightGridEnv, \
-    TrafficLightGridPOEnv, TrafficLightGridTestEnv
+    TrafficLightGridPOEnv, TrafficLightGridTestEnv, TrafficLightGridBenchmarkEnv
 from flow.envs.ring.lane_change_accel import LaneChangeAccelEnv, \
     LaneChangeAccelPOEnv
 from flow.envs.ring.accel import AccelEnv
@@ -33,6 +33,7 @@
     'WaveAttenuationPOEnv',
     'TrafficLightGridEnv',
     'TrafficLightGridPOEnv',
+    'TrafficLightGridBenchmarkEnv',
     'BottleneckDesiredVelocityEnv',
     'TestEnv',
     'BayBridgeEnv',
diff --git a/flow/envs/traffic_light_grid.py b/flow/envs/traffic_light_grid.py
index 53391a329..8be0cb8a5 100644
--- a/flow/envs/traffic_light_grid.py
+++ b/flow/envs/traffic_light_grid.py
@@ -731,6 +731,17 @@ def additional_command(self):
         [self.k.vehicle.set_observed(veh_id) for veh_id in self.observed_ids]
 
 
+class TrafficLightGridBenchmarkEnv(TrafficLightGridPOEnv):
+    """Class used for the benchmarks in `Benchmarks for reinforcement learning inmixed-autonomy traffic`."""
+
+    def compute_reward(self, rl_actions, **kwargs):
+        """See class definition."""
+        if self.env_params.evaluate:
+            return - rewards.min_delay_unscaled(self)
+        else:
+            return rewards.desired_velocity(self)
+
+
 class TrafficLightGridTestEnv(TrafficLightGridEnv):
     """
     Class for use in testing.

From 2bf6b217bb654315fa2a6f2d8b935151d9ebb554 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Sat, 2 May 2020 19:23:44 -0700
Subject: [PATCH 2/3] Put the bottleneck in a congested regime

---
 flow/benchmarks/bottleneck0.py | 2 +-
 flow/benchmarks/bottleneck1.py | 2 +-
 flow/benchmarks/bottleneck2.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/flow/benchmarks/bottleneck0.py b/flow/benchmarks/bottleneck0.py
index b0e86844c..b07947ad7 100644
--- a/flow/benchmarks/bottleneck0.py
+++ b/flow/benchmarks/bottleneck0.py
@@ -66,7 +66,7 @@
 }
 
 # flow rate
-flow_rate = 2000 * SCALING
+flow_rate = 2500 * SCALING
 
 # percentage of flow coming out of each lane
 inflow = InFlows()
diff --git a/flow/benchmarks/bottleneck1.py b/flow/benchmarks/bottleneck1.py
index 26ae6527a..9c8d9c192 100644
--- a/flow/benchmarks/bottleneck1.py
+++ b/flow/benchmarks/bottleneck1.py
@@ -66,7 +66,7 @@
 }
 
 # flow rate
-flow_rate = 2000 * SCALING
+flow_rate = 2500 * SCALING
 
 # percentage of flow coming out of each lane
 inflow = InFlows()
diff --git a/flow/benchmarks/bottleneck2.py b/flow/benchmarks/bottleneck2.py
index 5052b3b88..4651d448b 100644
--- a/flow/benchmarks/bottleneck2.py
+++ b/flow/benchmarks/bottleneck2.py
@@ -66,7 +66,7 @@
 }
 
 # flow rate
-flow_rate = 2000 * SCALING
+flow_rate = 2500 * SCALING
 
 # percentage of flow coming out of each lane
 inflow = InFlows()

From a26c4c68118fce3ae6cae4387ebb805af6fce4a9 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Sat, 2 May 2020 19:25:03 -0700
Subject: [PATCH 3/3] Bump bottleneck inflows to put it in the congested regime

---
 flow/benchmarks/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flow/benchmarks/README.md b/flow/benchmarks/README.md
index 963ad5b70..bbcba9414 100644
--- a/flow/benchmarks/README.md
+++ b/flow/benchmarks/README.md
@@ -38,12 +38,12 @@ inflow = 300 veh/hour/lane S=(915,), A=(25,), T=400.
 this problem is to learn to avoid the *capacity drop* that is characteristic to 
 bottleneck structures in transportation networks, and maximize the total 
 outflow in a mixed-autonomy setting. 
-- `flow.benchmarks.bottleneck0` 4 lanes, inflow = 1900 veh/hour, 10% CAV 
+- `flow.benchmarks.bottleneck0` 4 lanes, inflow = 2500 veh/hour, 10% CAV
 penetration, no vehicles are allowed to lane change, S=(141,), A=(20,), T=1000.
-- `flow.benchmarks.bottleneck1` 4 lanes, inflow = 1900 veh/hour, 10% CAV 
+- `flow.benchmarks.bottleneck1` 4 lanes, inflow = 2500 veh/hour, 10% CAV
 penetration, the human drivers follow the standard lane changing model in the 
 simulator, S=(141,), A=(20,), T=1000.
-- `flow.benchmarks.bottleneck2` 8 lanes, inflow = 3800 veh/hour, 10% CAV 
+- `flow.benchmarks.bottleneck2` 8 lanes, inflow = 5000 veh/hour, 10% CAV
 penetration, no vehicles are allowed to lane change, S=(281,), A=(40,), T=1000.
 
 ## Training on Custom Algorithms