diff --git a/tests/test_eval.py b/tests/test_eval.py
index a62c0a7d..e3d15561 100644
--- a/tests/test_eval.py
+++ b/tests/test_eval.py
@@ -12,8 +12,6 @@ def test_eval_cli():
     result = runner.invoke(
         main,
         [
-            "--model",
-            "openai/gpt-4o",
             *test_set,
         ],
     )
@@ -23,8 +21,8 @@ def test_eval_cli():
     assert "correct output" in result.output
 
 
+# No idea why, but for some reason keeping this leads to better coverage than the above
 @pytest.mark.slow
-@pytest.mark.eval
 def test_eval(test):
     """
     This test will be run for each eval in the tests list.