diff --git a/.gitignore b/.gitignore index f7cf314d3..4970c7a2d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ pytest/ *.pytest_cache/ .coverage +.converage.* pytest-coverage.txt wandb/ diff --git a/tests/algos/__snapshots__/test_algos.ambr b/tests/algos/__snapshots__/test_algos.ambr index a5bfb8b5b..34f14877e 100644 --- a/tests/algos/__snapshots__/test_algos.ambr +++ b/tests/algos/__snapshots__/test_algos.ambr @@ -77,17 +77,17 @@ ) # --- # name: TestTorchAlgos.test_reproducibility[stable_learning_control.algos.pytorch.sac.sac].1 - -0.3054710924625397 + -0.17646123468875885 # --- # name: TestTorchAlgos.test_reproducibility[stable_learning_control.algos.pytorch.sac.sac].2 - -0.5438500046730042 + -0.7749449014663696 # --- # name: TestTorchAlgos.test_reproducibility[stable_learning_control.algos.pytorch.sac.sac].3 - -0.5551561713218689 + -0.4696856439113617 # --- # name: TestTorchAlgos.test_reproducibility[stable_learning_control.algos.pytorch.sac.sac].4 - 0.7831009030342102 + 0.8471362590789795 # --- # name: TestTorchAlgos.test_reproducibility[stable_learning_control.algos.pytorch.sac.sac].5 - -0.43937113881111145 + -0.3496301770210266 # --- diff --git a/tests/algos/gpu/__snapshots__/test_algos_gpu.ambr b/tests/algos/gpu/__snapshots__/test_algos_gpu.ambr index 8dd58fc71..80921b8d3 100644 --- a/tests/algos/gpu/__snapshots__/test_algos_gpu.ambr +++ b/tests/algos/gpu/__snapshots__/test_algos_gpu.ambr @@ -77,17 +77,17 @@ ) # --- # name: TestTorchAlgosGPU.test_reproducibility[gpu-stable_learning_control.algos.pytorch.sac.sac].1 - -0.7098895311355591 + -0.5978871583938599 # --- # name: TestTorchAlgosGPU.test_reproducibility[gpu-stable_learning_control.algos.pytorch.sac.sac].2 - -0.1639314442873001 + -0.613029956817627 # --- # name: TestTorchAlgosGPU.test_reproducibility[gpu-stable_learning_control.algos.pytorch.sac.sac].3 - -0.7671857476234436 + -0.7007529735565186 # --- # name: TestTorchAlgosGPU.test_reproducibility[gpu-stable_learning_control.algos.pytorch.sac.sac].4 - 0.415353924036026 + 0.7336636781692505 # --- # name: TestTorchAlgosGPU.test_reproducibility[gpu-stable_learning_control.algos.pytorch.sac.sac].5 - -0.8201737403869629 + -0.8068016171455383 # --- diff --git a/tests/algos/gpu/test_algos_gpu.py b/tests/algos/gpu/test_algos_gpu.py index b1c968782..af7728220 100644 --- a/tests/algos/gpu/test_algos_gpu.py +++ b/tests/algos/gpu/test_algos_gpu.py @@ -20,21 +20,26 @@ @pytest.mark.parametrize("algo", ALGOS) @pytest.mark.parametrize("device", ["gpu"]) class TestTorchAlgosGPU: - env = gym.make("Pendulum-v1") # Used because it is a simple environment. + @pytest.fixture + def env(self): + """Create Pendulum environment.""" + env = gym.make("Pendulum-v1") # Used because it is a simple environment. - # Seed the environment. - env.np_random, seed = seeding.np_random(0) - env.action_space.seed(0) - env.observation_space.seed(0) + # Seed the environment. + env.np_random, _ = seeding.np_random(0) + env.action_space.seed(0) + env.observation_space.seed(0) - def test_reproducibility(self, algo, device, snapshot): + return env + + def test_reproducibility(self, algo, device, snapshot, env): """Checks if the algorithm is still working as expected.""" # Import the algorithm run function. run = getattr(importlib.import_module(algo), algo.split(".")[-1]) # Run the algorithm. trained_policy, replay_buffer = run( - lambda: self.env, + lambda: env, seed=0, epochs=1, update_after=400, @@ -48,5 +53,5 @@ def test_reproducibility(self, algo, device, snapshot): # Test if the actions returned by the policy are the same. for _ in range(5): - action = trained_policy.get_action(self.env.observation_space.sample()) + action = trained_policy.get_action(env.observation_space.sample()) assert action == snapshot diff --git a/tests/algos/test_algos.py b/tests/algos/test_algos.py index 7124ccea0..01a564e38 100644 --- a/tests/algos/test_algos.py +++ b/tests/algos/test_algos.py @@ -19,21 +19,26 @@ @pytest.mark.parametrize("algo", ALGOS) class TestTorchAlgos: - env = gym.make("Pendulum-v1") # Used because it is a simple environment. + @pytest.fixture + def env(self): + """Create Pendulum environment.""" + env = gym.make("Pendulum-v1") # Used because it is a simple environment. - # Seed the environment. - env.np_random, seed = seeding.np_random(0) - env.action_space.seed(0) - env.observation_space.seed(0) + # Seed the environment. + env.np_random, _ = seeding.np_random(0) + env.action_space.seed(0) + env.observation_space.seed(0) - def test_reproducibility(self, algo, snapshot): + return env + + def test_reproducibility(self, algo, snapshot, env): """Checks if the algorithm is still working as expected.""" # Import the algorithm run function. run = getattr(importlib.import_module(algo), algo.split(".")[-1]) # Run the algorithm. trained_policy, replay_buffer = run( - lambda: self.env, + lambda: env, seed=0, epochs=1, update_after=400, @@ -47,5 +52,5 @@ def test_reproducibility(self, algo, snapshot): # Test if the actions returned by the policy are the same. for _ in range(5): - action = trained_policy.get_action(self.env.observation_space.sample()) + action = trained_policy.get_action(env.observation_space.sample()) assert action == snapshot diff --git a/tests/algos/tf2/gpu/test_tf2_algos_gpu.py b/tests/algos/tf2/gpu/test_tf2_algos_gpu.py index d36002d42..567669127 100644 --- a/tests/algos/tf2/gpu/test_tf2_algos_gpu.py +++ b/tests/algos/tf2/gpu/test_tf2_algos_gpu.py @@ -21,14 +21,19 @@ @pytest.mark.parametrize("algo", ALGOS) @pytest.mark.parametrize("device", ["gpu"]) class TestTF2AlgosGPU: - env = gym.make("Pendulum-v1") # Used because it is a simple environment. + @pytest.fixture + def env(self): + """Create Pendulum environment.""" + env = gym.make("Pendulum-v1") # Used because it is a simple environment. - # Seed the environment. - env.np_random, seed = seeding.np_random(0) - env.action_space.seed(0) - env.observation_space.seed(0) + # Seed the environment. + env.np_random, _ = seeding.np_random(0) + env.action_space.seed(0) + env.observation_space.seed(0) - def test_reproducibility(self, algo, device, snapshot): + return env + + def test_reproducibility(self, algo, device, snapshot, env): """Checks if the algorithm is still working as expected.""" # Check if TensorFlow is available. if not importlib.util.find_spec("tensorflow"): @@ -45,7 +50,7 @@ def test_reproducibility(self, algo, device, snapshot): # Run the algorithm. trained_policy, replay_buffer = run( - lambda: self.env, + lambda: env, seed=0, epochs=1, update_after=400, @@ -59,5 +64,5 @@ def test_reproducibility(self, algo, device, snapshot): # Test if the actions returned by the policy are the same. for _ in range(5): - action = trained_policy.get_action(self.env.observation_space.sample()) + action = trained_policy.get_action(env.observation_space.sample()) assert action.numpy() == snapshot diff --git a/tests/algos/tf2/test_tf2_algos.py b/tests/algos/tf2/test_tf2_algos.py index f60ed740a..48ef036a6 100644 --- a/tests/algos/tf2/test_tf2_algos.py +++ b/tests/algos/tf2/test_tf2_algos.py @@ -19,14 +19,19 @@ @pytest.mark.parametrize("algo", ALGOS) class TestTF2Algos: - env = gym.make("Pendulum-v1") # Used because it is a simple environment. + @pytest.fixture + def env(self): + """Create Pendulum environment.""" + env = gym.make("Pendulum-v1") # Used because it is a simple environment. - # Seed the environment. - env.np_random, seed = seeding.np_random(0) - env.action_space.seed(0) - env.observation_space.seed(0) + # Seed the environment. + env.np_random, _ = seeding.np_random(0) + env.action_space.seed(0) + env.observation_space.seed(0) - def test_reproducibility(self, algo, snapshot): + return env + + def test_reproducibility(self, algo, snapshot, env): """Checks if the algorithm is still working as expected.""" # Check if TensorFlow is available. if not importlib.util.find_spec("tensorflow"): @@ -39,7 +44,7 @@ def test_reproducibility(self, algo, snapshot): # Run the algorithm. trained_policy, replay_buffer = run( - lambda: self.env, + lambda: env, seed=0, epochs=1, update_after=400, @@ -53,5 +58,5 @@ def test_reproducibility(self, algo, snapshot): # Test if the actions returned by the policy are the same. for _ in range(5): - action = trained_policy.get_action(self.env.observation_space.sample()) + action = trained_policy.get_action(env.observation_space.sample()) assert action.numpy() == snapshot