From 8294d5f8a77a5cccc452056874e6a10255981f9c Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Thu, 9 May 2024 08:12:18 +0000
Subject: [PATCH 01/52] Create parallel testsuite using pytest Adds a
 dependency for pytest and configurates it to search for files following the
 pytest_* scheme.

After the transition to pytest is completed and the nose-based testsuite can be safely removed, it is probably a good idea to rename the pytest-based testsuite back to the current test_* scheme.

While having both testsuites at the same time causes temporary code duplication, it makes the transition process easier and allows quick three-way diffs between the main branch and the, on this branch, unmodified tests (via git diff) and the changes specific to pytest (via regular diff on this branch).
---
 pytest.ini | 9 +++++++++
 setup.cfg  | 1 +
 2 files changed, 10 insertions(+)
 create mode 100644 pytest.ini

diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 000000000..ef01c3910
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,9 @@
+# This file is part of BenchExec, a framework for reliable benchmarking:
+# https://github.com/sosy-lab/benchexec
+#
+# SPDX-FileCopyrightText: 2007-2024 Dirk Beyer <https://www.sosy-lab.org>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+[pytest]
+python_files = pytest_*.py
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index 76c30633a..219c3fd86 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -61,6 +61,7 @@ zip_safe = True
 [options.extras_require]
 dev =
   nose >= 1.0
+  pytest
   lxml
 systemd =
   pystemd >= 0.7.0

From 898cf571125e07e06030c4a53269d2f86eb4712a Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Thu, 9 May 2024 08:31:36 +0000
Subject: [PATCH 02/52] Exclude pytest_* files from ruff wildcard import
 warning The same already applies to the existing nose-based testsuite - using
 wildcard imports shortens test code significantly

---
 pyproject.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 87d2cbaf7..22c6b0189 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,6 +53,7 @@ exclude = [
     # TODO
     'benchexec/tools',
     '**/test_*.py',
+    '**/pytest_*.py',
     '**/test_*/**.py',
 ]
 
@@ -69,3 +70,6 @@ ignore = [
     # wildcard imports significantly shorten test code,
     'F405',
 ]
+'benchexec/pytest*.py' = [
+    'F405',
+]

From a4cc9cbf645e395adac905bb5775ee48c50f84e0 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Thu, 9 May 2024 08:47:36 +0000
Subject: [PATCH 03/52] Duplicated test_result.py

---
 benchexec/pytest_result.py | 952 +++++++++++++++++++++++++++++++++++++
 1 file changed, 952 insertions(+)
 create mode 100644 benchexec/pytest_result.py

diff --git a/benchexec/pytest_result.py b/benchexec/pytest_result.py
new file mode 100644
index 000000000..157247b62
--- /dev/null
+++ b/benchexec/pytest_result.py
@@ -0,0 +1,952 @@
+# This file is part of BenchExec, a framework for reliable benchmarking:
+# https://github.com/sosy-lab/benchexec
+#
+# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+import sys
+import tempfile
+import unittest
+
+from benchexec.result import *  # noqa: F403 @UnusedWildImport everything is tested
+from benchexec.result import (
+    _SCORE_CORRECT_FALSE,
+    _SCORE_CORRECT_TRUE,
+    _SCORE_WRONG_TRUE,
+    _SCORE_WRONG_FALSE,
+)
+
+sys.dont_write_bytecode = True  # prevent creation of .pyc files
+
+
+class TestExpectedResult(unittest.TestCase):
+    def test_via_string(self):
+        def test(result, subproperty):
+            expected_result = ExpectedResult(result, subproperty)
+            self.assertEqual(
+                ExpectedResult.from_str(str(expected_result)), expected_result
+            )
+
+        test(None, None)
+        test(True, None)
+        test(False, None)
+        test(True, "foo")
+        test(False, "foo")
+
+    def test_via_instance(self):
+        def test(s):
+            self.assertEqual(str(ExpectedResult.from_str(s)), s)
+
+        test("")
+        test("true")
+        test("false")
+        test("true(foo)")
+        test("false(foo)")
+
+    def test_invalid_string(self):
+        def test(s):
+            with self.assertRaises(ValueError, msg=f"for '{s}'"):
+                ExpectedResult.from_str(s)
+
+        test("foo")
+        test("unknown")
+        test("true()")
+
+
+class TestResult(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.longMessage = True
+        logging.disable(logging.CRITICAL)
+
+    def expected_result(self, result, subcategory=None):
+        return {"dummy.prp": ExpectedResult(result, subcategory)}
+
+    prop_call = Property("dummy.prp", True, "unreach-call")
+    prop_deadlock = Property("dummy.prp", True, "no-deadlock")
+    prop_memcleanup = Property("dummy.prp", True, "valid-memcleanup")
+    prop_memsafety = Property("dummy.prp", True, "valid-memsafety")
+    prop_overflow = Property("dummy.prp", True, "no-overflow")
+    prop_termination = Property("dummy.prp", True, "termination")
+    prop_sat = Property("dummy.prp", False, "satisfiable")
+
+    def _test_Property_from_file(self, content, is_svcomp):
+        with tempfile.NamedTemporaryFile(
+            mode="wt", prefix="BenchExec_test_result", suffix=".prp"
+        ) as temp_file:
+            temp_file.write(content)
+            temp_file.flush()
+            filename = temp_file.name
+
+            self.assertEqual(
+                Property(
+                    filename=filename,
+                    is_svcomp=is_svcomp,
+                    name=os.path.splitext(os.path.basename(filename))[0],
+                ),
+                Property.create(filename),
+                msg="different result for property file with content\n" + content,
+            )
+
+    def test_Property_from_non_standard_file(self):
+        self._test_Property_from_file("", False)
+        self._test_Property_from_file("  ", False)
+        self._test_Property_from_file("  CHECK( init(main()), LTL(G p) )", False)
+        self._test_Property_from_file("test property", False)
+        self._test_Property_from_file("CHECK( init(main()), LTL(G p) )\ntest", False)
+
+    def test_Property_from_sv_comp_file(self):
+        self._test_Property_from_file("CHECK( init(main()), LTL(G p) )", True)
+        self._test_Property_from_file(
+            "CHECK( init(main()), LTL(G p) )\n\nCHECK( init(main()), LTL(F end) )", True
+        )
+        self._test_Property_from_file(
+            "CHECK( init(main()), LTL(G valid-free) )\nCHECK( init(main()), LTL(G valid-deref) )",
+            True,
+        )
+        self._test_Property_from_file(
+            "CHECK( init(main()), LTL(G valid-free) and LTL(G valid-deref) )", True
+        )
+
+    def test_Property_max_score_not_available(self):
+        self.assertEqual(0, self.prop_call.max_score(ExpectedResult(None, None)))
+        self.assertEqual(None, self.prop_call.max_score(None))
+
+    def test_Property_max_score_smt(self):
+        self.assertEqual(None, self.prop_sat.max_score(ExpectedResult(True, None)))
+        self.assertEqual(None, self.prop_sat.max_score(ExpectedResult(False, None)))
+
+    def test_Property_max_score_svcomp(self):
+        self.assertEqual(
+            _SCORE_CORRECT_TRUE, self.prop_call.max_score(ExpectedResult(True, None))
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_FALSE, self.prop_call.max_score(ExpectedResult(False, None))
+        )
+
+        self.assertEqual(
+            _SCORE_CORRECT_TRUE,
+            self.prop_memsafety.max_score(ExpectedResult(True, None)),
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_FALSE,
+            self.prop_memsafety.max_score(ExpectedResult(False, None)),
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_FALSE,
+            self.prop_memsafety.max_score(ExpectedResult(False, "valid-free")),
+        )
+
+    def test_Property_compute_score_not_available(self):
+        self.assertEqual(
+            0, self.prop_call.compute_score(CATEGORY_MISSING, RESULT_TRUE_PROP)
+        )
+        self.assertEqual(
+            0, self.prop_call.compute_score(CATEGORY_ERROR, RESULT_TRUE_PROP)
+        )
+        self.assertEqual(
+            0, self.prop_call.compute_score(CATEGORY_UNKNOWN, RESULT_TRUE_PROP)
+        )
+
+    def test_Property_compute_score_smt(self):
+        self.assertIsNone(
+            self.prop_sat.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP)
+        )
+        self.assertIsNone(self.prop_sat.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP))
+
+    def test_Property_compute_score_svcomp(self):
+        self.assertEqual(
+            _SCORE_CORRECT_TRUE,
+            self.prop_call.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP),
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_FALSE,
+            self.prop_call.compute_score(CATEGORY_CORRECT, RESULT_FALSE_REACH),
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_TRUE,
+            self.prop_memsafety.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP),
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_FALSE,
+            self.prop_memsafety.compute_score(CATEGORY_CORRECT, RESULT_FALSE_MEMTRACK),
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_TRUE,
+            self.prop_termination.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP),
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_FALSE,
+            self.prop_termination.compute_score(
+                CATEGORY_CORRECT, RESULT_FALSE_TERMINATION
+            ),
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_TRUE,
+            self.prop_overflow.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP),
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_FALSE,
+            self.prop_overflow.compute_score(CATEGORY_CORRECT, RESULT_FALSE_OVERFLOW),
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_TRUE,
+            self.prop_deadlock.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP),
+        )
+        self.assertEqual(
+            _SCORE_CORRECT_FALSE,
+            self.prop_deadlock.compute_score(CATEGORY_CORRECT, RESULT_FALSE_DEADLOCK),
+        )
+
+        self.assertEqual(
+            _SCORE_WRONG_FALSE,
+            self.prop_call.compute_score(CATEGORY_WRONG, RESULT_FALSE_REACH),
+        )
+        self.assertEqual(
+            _SCORE_WRONG_TRUE,
+            self.prop_call.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP),
+        )
+        self.assertEqual(
+            _SCORE_WRONG_FALSE,
+            self.prop_memsafety.compute_score(CATEGORY_WRONG, RESULT_FALSE_MEMTRACK),
+        )
+        self.assertEqual(
+            _SCORE_WRONG_TRUE,
+            self.prop_memsafety.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP),
+        )
+        self.assertEqual(
+            _SCORE_WRONG_FALSE,
+            self.prop_memsafety.compute_score(CATEGORY_WRONG, RESULT_FALSE_DEREF),
+        )
+        self.assertEqual(
+            _SCORE_WRONG_FALSE,
+            self.prop_termination.compute_score(
+                CATEGORY_WRONG, RESULT_FALSE_TERMINATION
+            ),
+        )
+        self.assertEqual(
+            _SCORE_WRONG_TRUE,
+            self.prop_termination.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP),
+        )
+        self.assertEqual(
+            _SCORE_WRONG_FALSE,
+            self.prop_overflow.compute_score(CATEGORY_WRONG, RESULT_FALSE_OVERFLOW),
+        )
+        self.assertEqual(
+            _SCORE_WRONG_TRUE,
+            self.prop_overflow.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP),
+        )
+        self.assertEqual(
+            _SCORE_WRONG_FALSE,
+            self.prop_deadlock.compute_score(CATEGORY_WRONG, RESULT_FALSE_OVERFLOW),
+        )
+        self.assertEqual(
+            _SCORE_WRONG_TRUE,
+            self.prop_deadlock.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP),
+        )
+
+    def test_result_classification(self):
+        self.assertEqual(RESULT_CLASS_TRUE, get_result_classification(RESULT_TRUE_PROP))
+
+        self.assertEqual(
+            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_REACH)
+        )
+        self.assertEqual(
+            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_DEREF)
+        )
+        self.assertEqual(
+            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_FREE)
+        )
+        self.assertEqual(
+            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_MEMTRACK)
+        )
+        self.assertEqual(
+            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_TERMINATION)
+        )
+        self.assertEqual(
+            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_OVERFLOW)
+        )
+        self.assertEqual(
+            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_PROP)
+        )
+        self.assertEqual(
+            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_PROP + "(test)")
+        )
+
+        self.assertEqual(RESULT_CLASS_OTHER, get_result_classification(RESULT_DONE))
+        self.assertEqual(RESULT_CLASS_OTHER, get_result_classification(RESULT_UNKNOWN))
+        self.assertEqual(RESULT_CLASS_OTHER, get_result_classification("KILLED"))
+        self.assertEqual(RESULT_CLASS_OTHER, get_result_classification("TIMEOUT"))
+        self.assertEqual(RESULT_CLASS_OTHER, get_result_classification(""))
+
+    def test_result_category_true(self):
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(False), RESULT_TRUE_PROP, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_memsafety]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(False, "valid-memtrack"),
+                RESULT_TRUE_PROP,
+                [self.prop_memsafety],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_memcleanup]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(False), RESULT_TRUE_PROP, [self.prop_memcleanup]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_termination]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(False), RESULT_TRUE_PROP, [self.prop_termination]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_overflow]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(False), RESULT_TRUE_PROP, [self.prop_overflow]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_deadlock]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(False), RESULT_TRUE_PROP, [self.prop_deadlock]
+            ),
+        )
+
+        test_prop = Property("dummy.prp", True, "test prop")
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(True), RESULT_TRUE_PROP, [test_prop]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(False), RESULT_TRUE_PROP, [test_prop]
+            ),
+        )
+
+        test_prop = Property("dummy.prp", True, "test prop")
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(True), RESULT_TRUE_PROP, [test_prop]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(False, "a"), RESULT_TRUE_PROP, [test_prop]
+            ),
+        )
+
+    def test_result_category_false(self):
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_REACH, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False), RESULT_FALSE_REACH, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_DEREF, [self.prop_memsafety]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_FREE, [self.prop_memsafety]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_MEMTRACK, [self.prop_memsafety]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False, "valid-deref"),
+                RESULT_FALSE_DEREF,
+                [self.prop_memsafety],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False, "valid-free"),
+                RESULT_FALSE_FREE,
+                [self.prop_memsafety],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False, "valid-memtrack"),
+                RESULT_FALSE_MEMTRACK,
+                [self.prop_memsafety],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_UNKNOWN,
+            get_result_category(
+                self.expected_result(False, "valid-deref"),
+                RESULT_FALSE_FREE,
+                [self.prop_memsafety],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_UNKNOWN,
+            get_result_category(
+                self.expected_result(False, "valid-free"),
+                RESULT_FALSE_MEMTRACK,
+                [self.prop_memsafety],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_UNKNOWN,
+            get_result_category(
+                self.expected_result(False, "valid-memtrack"),
+                RESULT_FALSE_DEREF,
+                [self.prop_memsafety],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True),
+                RESULT_FALSE_TERMINATION,
+                [self.prop_termination],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False),
+                RESULT_FALSE_TERMINATION,
+                [self.prop_termination],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_OVERFLOW, [self.prop_overflow]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False), RESULT_FALSE_OVERFLOW, [self.prop_overflow]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_DEADLOCK, [self.prop_deadlock]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False), RESULT_FALSE_DEADLOCK, [self.prop_deadlock]
+            ),
+        )
+
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False), RESULT_FALSE_PROP, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_termination]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False), RESULT_FALSE_PROP, [self.prop_termination]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_overflow]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False), RESULT_FALSE_PROP, [self.prop_overflow]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_deadlock]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False), RESULT_FALSE_PROP, [self.prop_deadlock]
+            ),
+        )
+
+        test_prop = Property("dummy.prp", True, "test prop")
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_PROP, [test_prop]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False), RESULT_FALSE_PROP, [test_prop]
+            ),
+        )
+        # arbitrary subproperties allowed if property does not specify one
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False), RESULT_FALSE_PROP + "(a)", [test_prop]
+            ),
+        )
+
+        test_prop = Property("dummy.prp", True, "test prop")
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_PROP, [test_prop]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_PROP + "(a)", [test_prop]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                self.expected_result(False, "a"), RESULT_FALSE_PROP + "(a)", [test_prop]
+            ),
+        )
+
+    def test_result_category_different_false_result(self):
+        expected_result_false = self.expected_result(False)
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_DEREF, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_call]
+            ),
+        )
+
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_REACH, [self.prop_termination]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_DEREF, [self.prop_termination]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_termination]
+            ),
+        )
+
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_REACH, [self.prop_sat]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_DEREF, [self.prop_sat]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_sat]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_sat]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_sat]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_PROP, [self.prop_sat]
+            ),
+        )
+
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_REACH, [self.prop_overflow]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_DEREF, [self.prop_overflow]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_overflow]
+            ),
+        )
+
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_REACH, [self.prop_deadlock]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_DEREF, [self.prop_deadlock]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_CORRECT,
+            get_result_category(
+                expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_deadlock]
+            ),
+        )
+
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_OVERFLOW, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_REACH, [self.prop_termination]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_WRONG,
+            get_result_category(
+                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_memsafety]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_UNKNOWN,
+            get_result_category(
+                self.expected_result(False, "valid-deref"),
+                RESULT_FALSE_PROP,
+                [self.prop_memsafety],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_UNKNOWN,
+            get_result_category(
+                self.expected_result(False, "valid-free"),
+                RESULT_FALSE_PROP,
+                [self.prop_memsafety],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_UNKNOWN,
+            get_result_category(
+                self.expected_result(False, "valid-memtrack"),
+                RESULT_FALSE_PROP,
+                [self.prop_memsafety],
+            ),
+        )
+
+        test_prop = Property("dummy.prp", True, "test prop")
+        self.assertEqual(
+            CATEGORY_UNKNOWN,
+            get_result_category(
+                self.expected_result(False, "a"), RESULT_FALSE_PROP, [test_prop]
+            ),
+        )
+
+    def test_result_category_no_property(self):
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(self.expected_result(True), RESULT_TRUE_PROP, []),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(self.expected_result(False), RESULT_TRUE_PROP, []),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(self.expected_result(True), RESULT_TRUE_PROP, []),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                self.expected_result(False, "valid-memtrack.c"), RESULT_TRUE_PROP, []
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(self.expected_result(True), RESULT_TRUE_PROP, []),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(self.expected_result(False), RESULT_TRUE_PROP, []),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(self.expected_result(True), RESULT_TRUE_PROP, []),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(self.expected_result(False), RESULT_TRUE_PROP, []),
+        )
+
+    def test_result_category_no_expected_result(self):
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                self.expected_result(None), RESULT_TRUE_PROP, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                self.expected_result(None), RESULT_FALSE_PROP, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                self.expected_result(None), RESULT_TRUE_PROP, [self.prop_memsafety]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                self.expected_result(None), RESULT_FALSE_FREE, [self.prop_memsafety]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                self.expected_result(None), RESULT_TRUE_PROP, [self.prop_termination]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                self.expected_result(None), RESULT_FALSE_PROP, [self.prop_termination]
+            ),
+        )
+
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category({}, RESULT_TRUE_PROP, [self.prop_call]),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category({}, RESULT_FALSE_PROP, [self.prop_call]),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category({}, RESULT_TRUE_PROP, [self.prop_memsafety]),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category({}, RESULT_FALSE_FREE, [self.prop_memsafety]),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category({}, RESULT_TRUE_PROP, [self.prop_termination]),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category({}, RESULT_FALSE_PROP, [self.prop_termination]),
+        )
+
+    def test_result_category_different_property(self):
+        def other_expected_result(result, subcategory=None):
+            return {"different-file.prp": ExpectedResult(result, subcategory)}
+
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                other_expected_result(True), RESULT_TRUE_PROP, [self.prop_termination]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                other_expected_result(False), RESULT_TRUE_PROP, [self.prop_termination]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                other_expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                other_expected_result(False, "valid-memtrack"),
+                RESULT_TRUE_PROP,
+                [self.prop_call],
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                other_expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                other_expected_result(False), RESULT_TRUE_PROP, [self.prop_call]
+            ),
+        )
+
+    def test_result_category_other(self):
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                self.expected_result(True), RESULT_DONE, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(self.expected_result(True), RESULT_DONE, []),
+        )
+        self.assertEqual(
+            CATEGORY_MISSING,
+            get_result_category(
+                self.expected_result(None), RESULT_DONE, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_UNKNOWN,
+            get_result_category(
+                self.expected_result(True), RESULT_UNKNOWN, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_UNKNOWN,
+            get_result_category(self.expected_result(True), RESULT_UNKNOWN, []),
+        )
+        self.assertEqual(
+            CATEGORY_UNKNOWN,
+            get_result_category(
+                self.expected_result(None), RESULT_UNKNOWN, [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_ERROR,
+            get_result_category(self.expected_result(True), "KILLED", [self.prop_call]),
+        )
+        self.assertEqual(
+            CATEGORY_ERROR,
+            get_result_category(
+                self.expected_result(True), "TIMEOUT", [self.prop_call]
+            ),
+        )
+        self.assertEqual(
+            CATEGORY_ERROR,
+            get_result_category(self.expected_result(True), "", [self.prop_call]),
+        )

From f72b39db85e348c05e7c267882e032ce63f7c287 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Thu, 9 May 2024 09:59:11 +0000
Subject: [PATCH 04/52] Transitioned test_result to pytest

---
 benchexec/pytest_result.py | 1183 +++++++++++++-----------------------
 1 file changed, 422 insertions(+), 761 deletions(-)

diff --git a/benchexec/pytest_result.py b/benchexec/pytest_result.py
index 157247b62..cc55d5909 100644
--- a/benchexec/pytest_result.py
+++ b/benchexec/pytest_result.py
@@ -1,14 +1,14 @@
 # This file is part of BenchExec, a framework for reliable benchmarking:
 # https://github.com/sosy-lab/benchexec
 #
-# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
+# SPDX-FileCopyrightText: 2007-2024 Dirk Beyer <https://www.sosy-lab.org>
 #
 # SPDX-License-Identifier: Apache-2.0
 
 import logging
 import sys
 import tempfile
-import unittest
+import pytest
 
 from benchexec.result import *  # noqa: F403 @UnusedWildImport everything is tested
 from benchexec.result import (
@@ -21,13 +21,11 @@
 sys.dont_write_bytecode = True  # prevent creation of .pyc files
 
 
-class TestExpectedResult(unittest.TestCase):
+class TestExpectedResult:
     def test_via_string(self):
         def test(result, subproperty):
             expected_result = ExpectedResult(result, subproperty)
-            self.assertEqual(
-                ExpectedResult.from_str(str(expected_result)), expected_result
-            )
+            assert ExpectedResult.from_str(str(expected_result)) == expected_result
 
         test(None, None)
         test(True, None)
@@ -37,7 +35,7 @@ def test(result, subproperty):
 
     def test_via_instance(self):
         def test(s):
-            self.assertEqual(str(ExpectedResult.from_str(s)), s)
+            assert str(ExpectedResult.from_str(s)) == s
 
         test("")
         test("true")
@@ -47,20 +45,22 @@ def test(s):
 
     def test_invalid_string(self):
         def test(s):
-            with self.assertRaises(ValueError, msg=f"for '{s}'"):
+            with pytest.raises(ValueError) as exc_info:
                 ExpectedResult.from_str(s)
+            assert str(exc_info.value) == f"Not a valid expected verdict: {s}"
 
         test("foo")
         test("unknown")
         test("true()")
 
 
-class TestResult(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        logging.disable(logging.CRITICAL)
+@pytest.fixture(scope="class")
+def disable_non_critical_logging():
+    logging.disable(logging.CRITICAL)
 
+
+@pytest.mark.usefixtures("disable_non_critical_logging")
+class TestResult:
     def expected_result(self, result, subcategory=None):
         return {"dummy.prp": ExpectedResult(result, subcategory)}
 
@@ -80,15 +80,13 @@ def _test_Property_from_file(self, content, is_svcomp):
             temp_file.flush()
             filename = temp_file.name
 
-            self.assertEqual(
-                Property(
-                    filename=filename,
-                    is_svcomp=is_svcomp,
-                    name=os.path.splitext(os.path.basename(filename))[0],
-                ),
-                Property.create(filename),
-                msg="different result for property file with content\n" + content,
-            )
+            assert Property(
+                filename=filename,
+                is_svcomp=is_svcomp,
+                name=os.path.splitext(os.path.basename(filename))[0],
+            ) == Property.create(
+                filename
+            ), f"different result for property file with content\n{ content }"
 
     def test_Property_from_non_standard_file(self):
         self._test_Property_from_file("", False)
@@ -111,842 +109,505 @@ def test_Property_from_sv_comp_file(self):
         )
 
     def test_Property_max_score_not_available(self):
-        self.assertEqual(0, self.prop_call.max_score(ExpectedResult(None, None)))
-        self.assertEqual(None, self.prop_call.max_score(None))
+        assert 0 == self.prop_call.max_score(ExpectedResult(None, None))
+        assert None is self.prop_call.max_score(None)
 
     def test_Property_max_score_smt(self):
-        self.assertEqual(None, self.prop_sat.max_score(ExpectedResult(True, None)))
-        self.assertEqual(None, self.prop_sat.max_score(ExpectedResult(False, None)))
+        assert None is self.prop_sat.max_score(ExpectedResult(True, None))
+        assert None is self.prop_sat.max_score(ExpectedResult(False, None))
 
     def test_Property_max_score_svcomp(self):
-        self.assertEqual(
-            _SCORE_CORRECT_TRUE, self.prop_call.max_score(ExpectedResult(True, None))
+        assert _SCORE_CORRECT_TRUE == self.prop_call.max_score(
+            ExpectedResult(True, None)
         )
-        self.assertEqual(
-            _SCORE_CORRECT_FALSE, self.prop_call.max_score(ExpectedResult(False, None))
+        assert _SCORE_CORRECT_FALSE == self.prop_call.max_score(
+            ExpectedResult(False, None)
         )
 
-        self.assertEqual(
-            _SCORE_CORRECT_TRUE,
-            self.prop_memsafety.max_score(ExpectedResult(True, None)),
+        assert _SCORE_CORRECT_TRUE == self.prop_memsafety.max_score(
+            ExpectedResult(True, None)
         )
-        self.assertEqual(
-            _SCORE_CORRECT_FALSE,
-            self.prop_memsafety.max_score(ExpectedResult(False, None)),
+        assert _SCORE_CORRECT_FALSE == self.prop_memsafety.max_score(
+            ExpectedResult(False, None)
         )
-        self.assertEqual(
-            _SCORE_CORRECT_FALSE,
-            self.prop_memsafety.max_score(ExpectedResult(False, "valid-free")),
+        assert _SCORE_CORRECT_FALSE == self.prop_memsafety.max_score(
+            ExpectedResult(False, "valid-free")
         )
 
     def test_Property_compute_score_not_available(self):
-        self.assertEqual(
-            0, self.prop_call.compute_score(CATEGORY_MISSING, RESULT_TRUE_PROP)
-        )
-        self.assertEqual(
-            0, self.prop_call.compute_score(CATEGORY_ERROR, RESULT_TRUE_PROP)
-        )
-        self.assertEqual(
-            0, self.prop_call.compute_score(CATEGORY_UNKNOWN, RESULT_TRUE_PROP)
-        )
+        assert 0 == self.prop_call.compute_score(CATEGORY_MISSING, RESULT_TRUE_PROP)
+        assert 0 == self.prop_call.compute_score(CATEGORY_ERROR, RESULT_TRUE_PROP)
+        assert 0 == self.prop_call.compute_score(CATEGORY_UNKNOWN, RESULT_TRUE_PROP)
 
     def test_Property_compute_score_smt(self):
-        self.assertIsNone(
-            self.prop_sat.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP)
-        )
-        self.assertIsNone(self.prop_sat.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP))
+        assert None is self.prop_sat.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP)
+        assert None is self.prop_sat.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP)
 
     def test_Property_compute_score_svcomp(self):
-        self.assertEqual(
-            _SCORE_CORRECT_TRUE,
-            self.prop_call.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP),
+        assert _SCORE_CORRECT_TRUE == self.prop_call.compute_score(
+            CATEGORY_CORRECT, RESULT_TRUE_PROP
         )
-        self.assertEqual(
-            _SCORE_CORRECT_FALSE,
-            self.prop_call.compute_score(CATEGORY_CORRECT, RESULT_FALSE_REACH),
+        assert _SCORE_CORRECT_FALSE == self.prop_call.compute_score(
+            CATEGORY_CORRECT, RESULT_FALSE_REACH
         )
-        self.assertEqual(
-            _SCORE_CORRECT_TRUE,
-            self.prop_memsafety.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP),
+        assert _SCORE_CORRECT_TRUE == self.prop_memsafety.compute_score(
+            CATEGORY_CORRECT, RESULT_TRUE_PROP
         )
-        self.assertEqual(
-            _SCORE_CORRECT_FALSE,
-            self.prop_memsafety.compute_score(CATEGORY_CORRECT, RESULT_FALSE_MEMTRACK),
+        assert _SCORE_CORRECT_FALSE == self.prop_memsafety.compute_score(
+            CATEGORY_CORRECT, RESULT_FALSE_MEMTRACK
         )
-        self.assertEqual(
-            _SCORE_CORRECT_TRUE,
-            self.prop_termination.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP),
+        assert _SCORE_CORRECT_TRUE == self.prop_termination.compute_score(
+            CATEGORY_CORRECT, RESULT_TRUE_PROP
         )
-        self.assertEqual(
-            _SCORE_CORRECT_FALSE,
-            self.prop_termination.compute_score(
-                CATEGORY_CORRECT, RESULT_FALSE_TERMINATION
-            ),
+        assert _SCORE_CORRECT_FALSE == self.prop_termination.compute_score(
+            CATEGORY_CORRECT, RESULT_FALSE_TERMINATION
         )
-        self.assertEqual(
-            _SCORE_CORRECT_TRUE,
-            self.prop_overflow.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP),
+        assert _SCORE_CORRECT_TRUE == self.prop_overflow.compute_score(
+            CATEGORY_CORRECT, RESULT_TRUE_PROP
         )
-        self.assertEqual(
-            _SCORE_CORRECT_FALSE,
-            self.prop_overflow.compute_score(CATEGORY_CORRECT, RESULT_FALSE_OVERFLOW),
+        assert _SCORE_CORRECT_FALSE == self.prop_overflow.compute_score(
+            CATEGORY_CORRECT, RESULT_FALSE_OVERFLOW
         )
-        self.assertEqual(
-            _SCORE_CORRECT_TRUE,
-            self.prop_deadlock.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP),
+        assert _SCORE_CORRECT_TRUE == self.prop_deadlock.compute_score(
+            CATEGORY_CORRECT, RESULT_TRUE_PROP
         )
-        self.assertEqual(
-            _SCORE_CORRECT_FALSE,
-            self.prop_deadlock.compute_score(CATEGORY_CORRECT, RESULT_FALSE_DEADLOCK),
+        assert _SCORE_CORRECT_FALSE == self.prop_deadlock.compute_score(
+            CATEGORY_CORRECT, RESULT_FALSE_DEADLOCK
         )
 
-        self.assertEqual(
-            _SCORE_WRONG_FALSE,
-            self.prop_call.compute_score(CATEGORY_WRONG, RESULT_FALSE_REACH),
+        assert _SCORE_WRONG_FALSE == self.prop_call.compute_score(
+            CATEGORY_WRONG, RESULT_FALSE_REACH
         )
-        self.assertEqual(
-            _SCORE_WRONG_TRUE,
-            self.prop_call.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP),
+        assert _SCORE_WRONG_TRUE == self.prop_call.compute_score(
+            CATEGORY_WRONG, RESULT_TRUE_PROP
         )
-        self.assertEqual(
-            _SCORE_WRONG_FALSE,
-            self.prop_memsafety.compute_score(CATEGORY_WRONG, RESULT_FALSE_MEMTRACK),
+        assert _SCORE_WRONG_FALSE == self.prop_memsafety.compute_score(
+            CATEGORY_WRONG, RESULT_FALSE_MEMTRACK
         )
-        self.assertEqual(
-            _SCORE_WRONG_TRUE,
-            self.prop_memsafety.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP),
+        assert _SCORE_WRONG_TRUE == self.prop_memsafety.compute_score(
+            CATEGORY_WRONG, RESULT_TRUE_PROP
         )
-        self.assertEqual(
-            _SCORE_WRONG_FALSE,
-            self.prop_memsafety.compute_score(CATEGORY_WRONG, RESULT_FALSE_DEREF),
+        assert _SCORE_WRONG_FALSE == self.prop_memsafety.compute_score(
+            CATEGORY_WRONG, RESULT_FALSE_DEREF
         )
-        self.assertEqual(
-            _SCORE_WRONG_FALSE,
-            self.prop_termination.compute_score(
-                CATEGORY_WRONG, RESULT_FALSE_TERMINATION
-            ),
+        assert _SCORE_WRONG_FALSE == self.prop_termination.compute_score(
+            CATEGORY_WRONG, RESULT_FALSE_TERMINATION
         )
-        self.assertEqual(
-            _SCORE_WRONG_TRUE,
-            self.prop_termination.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP),
+        assert _SCORE_WRONG_TRUE == self.prop_termination.compute_score(
+            CATEGORY_WRONG, RESULT_TRUE_PROP
         )
-        self.assertEqual(
-            _SCORE_WRONG_FALSE,
-            self.prop_overflow.compute_score(CATEGORY_WRONG, RESULT_FALSE_OVERFLOW),
+        assert _SCORE_WRONG_FALSE == self.prop_overflow.compute_score(
+            CATEGORY_WRONG, RESULT_FALSE_OVERFLOW
         )
-        self.assertEqual(
-            _SCORE_WRONG_TRUE,
-            self.prop_overflow.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP),
+        assert _SCORE_WRONG_TRUE == self.prop_overflow.compute_score(
+            CATEGORY_WRONG, RESULT_TRUE_PROP
         )
-        self.assertEqual(
-            _SCORE_WRONG_FALSE,
-            self.prop_deadlock.compute_score(CATEGORY_WRONG, RESULT_FALSE_OVERFLOW),
+        assert _SCORE_WRONG_FALSE == self.prop_deadlock.compute_score(
+            CATEGORY_WRONG, RESULT_FALSE_OVERFLOW
         )
-        self.assertEqual(
-            _SCORE_WRONG_TRUE,
-            self.prop_deadlock.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP),
+        assert _SCORE_WRONG_TRUE == self.prop_deadlock.compute_score(
+            CATEGORY_WRONG, RESULT_TRUE_PROP
         )
 
     def test_result_classification(self):
-        self.assertEqual(RESULT_CLASS_TRUE, get_result_classification(RESULT_TRUE_PROP))
+        assert RESULT_CLASS_TRUE == get_result_classification(RESULT_TRUE_PROP)
+
+        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_REACH)
+        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_DEREF)
+        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_FREE)
+        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_MEMTRACK)
+        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_TERMINATION)
+        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_OVERFLOW)
+        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_PROP)
+        assert RESULT_CLASS_FALSE == get_result_classification(
+            RESULT_FALSE_PROP + "(test)"
+        )
+
+        assert RESULT_CLASS_OTHER == get_result_classification(RESULT_DONE)
+        assert RESULT_CLASS_OTHER == get_result_classification(RESULT_UNKNOWN)
+        assert RESULT_CLASS_OTHER == get_result_classification("KILLED")
+        assert RESULT_CLASS_OTHER == get_result_classification("TIMEOUT")
+        assert RESULT_CLASS_OTHER == get_result_classification("")
 
-        self.assertEqual(
-            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_REACH)
+    def test_result_category_true(self):
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
         )
-        self.assertEqual(
-            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_DEREF)
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(False), RESULT_TRUE_PROP, [self.prop_call]
         )
-        self.assertEqual(
-            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_FREE)
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_memsafety]
         )
-        self.assertEqual(
-            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_MEMTRACK)
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(False, "valid-memtrack"),
+            RESULT_TRUE_PROP,
+            [self.prop_memsafety],
         )
-        self.assertEqual(
-            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_TERMINATION)
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_memcleanup]
         )
-        self.assertEqual(
-            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_OVERFLOW)
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(False), RESULT_TRUE_PROP, [self.prop_memcleanup]
         )
-        self.assertEqual(
-            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_PROP)
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_termination]
         )
-        self.assertEqual(
-            RESULT_CLASS_FALSE, get_result_classification(RESULT_FALSE_PROP + "(test)")
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(False), RESULT_TRUE_PROP, [self.prop_termination]
         )
-
-        self.assertEqual(RESULT_CLASS_OTHER, get_result_classification(RESULT_DONE))
-        self.assertEqual(RESULT_CLASS_OTHER, get_result_classification(RESULT_UNKNOWN))
-        self.assertEqual(RESULT_CLASS_OTHER, get_result_classification("KILLED"))
-        self.assertEqual(RESULT_CLASS_OTHER, get_result_classification("TIMEOUT"))
-        self.assertEqual(RESULT_CLASS_OTHER, get_result_classification(""))
-
-    def test_result_category_true(self):
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(False), RESULT_TRUE_PROP, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_memsafety]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(False, "valid-memtrack"),
-                RESULT_TRUE_PROP,
-                [self.prop_memsafety],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_memcleanup]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(False), RESULT_TRUE_PROP, [self.prop_memcleanup]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_termination]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(False), RESULT_TRUE_PROP, [self.prop_termination]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_overflow]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(False), RESULT_TRUE_PROP, [self.prop_overflow]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(True), RESULT_TRUE_PROP, [self.prop_deadlock]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(False), RESULT_TRUE_PROP, [self.prop_deadlock]
-            ),
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_overflow]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(False), RESULT_TRUE_PROP, [self.prop_overflow]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_deadlock]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(False), RESULT_TRUE_PROP, [self.prop_deadlock]
         )
 
         test_prop = Property("dummy.prp", True, "test prop")
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(True), RESULT_TRUE_PROP, [test_prop]
-            ),
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, [test_prop]
         )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(False), RESULT_TRUE_PROP, [test_prop]
-            ),
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(False), RESULT_TRUE_PROP, [test_prop]
         )
 
         test_prop = Property("dummy.prp", True, "test prop")
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(True), RESULT_TRUE_PROP, [test_prop]
-            ),
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, [test_prop]
         )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(False, "a"), RESULT_TRUE_PROP, [test_prop]
-            ),
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(False, "a"), RESULT_TRUE_PROP, [test_prop]
         )
 
     def test_result_category_false(self):
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_REACH, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False), RESULT_FALSE_REACH, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_DEREF, [self.prop_memsafety]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_FREE, [self.prop_memsafety]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_MEMTRACK, [self.prop_memsafety]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False, "valid-deref"),
-                RESULT_FALSE_DEREF,
-                [self.prop_memsafety],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False, "valid-free"),
-                RESULT_FALSE_FREE,
-                [self.prop_memsafety],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False, "valid-memtrack"),
-                RESULT_FALSE_MEMTRACK,
-                [self.prop_memsafety],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_UNKNOWN,
-            get_result_category(
-                self.expected_result(False, "valid-deref"),
-                RESULT_FALSE_FREE,
-                [self.prop_memsafety],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_UNKNOWN,
-            get_result_category(
-                self.expected_result(False, "valid-free"),
-                RESULT_FALSE_MEMTRACK,
-                [self.prop_memsafety],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_UNKNOWN,
-            get_result_category(
-                self.expected_result(False, "valid-memtrack"),
-                RESULT_FALSE_DEREF,
-                [self.prop_memsafety],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True),
-                RESULT_FALSE_TERMINATION,
-                [self.prop_termination],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False),
-                RESULT_FALSE_TERMINATION,
-                [self.prop_termination],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_OVERFLOW, [self.prop_overflow]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False), RESULT_FALSE_OVERFLOW, [self.prop_overflow]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_DEADLOCK, [self.prop_deadlock]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False), RESULT_FALSE_DEADLOCK, [self.prop_deadlock]
-            ),
-        )
-
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False), RESULT_FALSE_PROP, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_termination]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False), RESULT_FALSE_PROP, [self.prop_termination]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_overflow]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False), RESULT_FALSE_PROP, [self.prop_overflow]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_deadlock]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False), RESULT_FALSE_PROP, [self.prop_deadlock]
-            ),
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_REACH, [self.prop_call]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False), RESULT_FALSE_REACH, [self.prop_call]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_DEREF, [self.prop_memsafety]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_FREE, [self.prop_memsafety]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_MEMTRACK, [self.prop_memsafety]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False, "valid-deref"),
+            RESULT_FALSE_DEREF,
+            [self.prop_memsafety],
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False, "valid-free"),
+            RESULT_FALSE_FREE,
+            [self.prop_memsafety],
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False, "valid-memtrack"),
+            RESULT_FALSE_MEMTRACK,
+            [self.prop_memsafety],
+        )
+        assert CATEGORY_UNKNOWN == get_result_category(
+            self.expected_result(False, "valid-deref"),
+            RESULT_FALSE_FREE,
+            [self.prop_memsafety],
+        )
+        assert CATEGORY_UNKNOWN == get_result_category(
+            self.expected_result(False, "valid-free"),
+            RESULT_FALSE_MEMTRACK,
+            [self.prop_memsafety],
+        )
+        assert CATEGORY_UNKNOWN == get_result_category(
+            self.expected_result(False, "valid-memtrack"),
+            RESULT_FALSE_DEREF,
+            [self.prop_memsafety],
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True),
+            RESULT_FALSE_TERMINATION,
+            [self.prop_termination],
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False),
+            RESULT_FALSE_TERMINATION,
+            [self.prop_termination],
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_OVERFLOW, [self.prop_overflow]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False), RESULT_FALSE_OVERFLOW, [self.prop_overflow]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_DEADLOCK, [self.prop_deadlock]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False), RESULT_FALSE_DEADLOCK, [self.prop_deadlock]
+        )
+
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_call]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False), RESULT_FALSE_PROP, [self.prop_call]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_termination]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False), RESULT_FALSE_PROP, [self.prop_termination]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_overflow]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False), RESULT_FALSE_PROP, [self.prop_overflow]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_deadlock]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False), RESULT_FALSE_PROP, [self.prop_deadlock]
         )
 
         test_prop = Property("dummy.prp", True, "test prop")
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_PROP, [test_prop]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False), RESULT_FALSE_PROP, [test_prop]
-            ),
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_PROP, [test_prop]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False), RESULT_FALSE_PROP, [test_prop]
         )
         # arbitrary subproperties allowed if property does not specify one
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False), RESULT_FALSE_PROP + "(a)", [test_prop]
-            ),
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False), RESULT_FALSE_PROP + "(a)", [test_prop]
         )
 
         test_prop = Property("dummy.prp", True, "test prop")
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_PROP, [test_prop]
-            ),
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_PROP, [test_prop]
         )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_PROP + "(a)", [test_prop]
-            ),
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_PROP + "(a)", [test_prop]
         )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                self.expected_result(False, "a"), RESULT_FALSE_PROP + "(a)", [test_prop]
-            ),
+        assert CATEGORY_CORRECT == get_result_category(
+            self.expected_result(False, "a"), RESULT_FALSE_PROP + "(a)", [test_prop]
         )
 
     def test_result_category_different_false_result(self):
         expected_result_false = self.expected_result(False)
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_DEREF, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_call]
-            ),
-        )
-
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_REACH, [self.prop_termination]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_DEREF, [self.prop_termination]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_termination]
-            ),
-        )
-
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_REACH, [self.prop_sat]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_DEREF, [self.prop_sat]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_sat]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_sat]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_sat]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_PROP, [self.prop_sat]
-            ),
-        )
-
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_REACH, [self.prop_overflow]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_DEREF, [self.prop_overflow]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_overflow]
-            ),
-        )
-
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_REACH, [self.prop_deadlock]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_DEREF, [self.prop_deadlock]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_CORRECT,
-            get_result_category(
-                expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_deadlock]
-            ),
-        )
-
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_OVERFLOW, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_REACH, [self.prop_termination]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_WRONG,
-            get_result_category(
-                self.expected_result(True), RESULT_FALSE_PROP, [self.prop_memsafety]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_UNKNOWN,
-            get_result_category(
-                self.expected_result(False, "valid-deref"),
-                RESULT_FALSE_PROP,
-                [self.prop_memsafety],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_UNKNOWN,
-            get_result_category(
-                self.expected_result(False, "valid-free"),
-                RESULT_FALSE_PROP,
-                [self.prop_memsafety],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_UNKNOWN,
-            get_result_category(
-                self.expected_result(False, "valid-memtrack"),
-                RESULT_FALSE_PROP,
-                [self.prop_memsafety],
-            ),
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_DEREF, [self.prop_call]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_call]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_call]
+        )
+
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_REACH, [self.prop_termination]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_DEREF, [self.prop_termination]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_termination]
+        )
+
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_REACH, [self.prop_sat]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_DEREF, [self.prop_sat]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_sat]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_sat]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_sat]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_PROP, [self.prop_sat]
+        )
+
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_REACH, [self.prop_overflow]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_DEREF, [self.prop_overflow]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_overflow]
+        )
+
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_REACH, [self.prop_deadlock]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_DEREF, [self.prop_deadlock]
+        )
+        assert CATEGORY_CORRECT == get_result_category(
+            expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_deadlock]
+        )
+
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_OVERFLOW, [self.prop_call]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_REACH, [self.prop_termination]
+        )
+        assert CATEGORY_WRONG == get_result_category(
+            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_memsafety]
+        )
+        assert CATEGORY_UNKNOWN == get_result_category(
+            self.expected_result(False, "valid-deref"),
+            RESULT_FALSE_PROP,
+            [self.prop_memsafety],
+        )
+        assert CATEGORY_UNKNOWN == get_result_category(
+            self.expected_result(False, "valid-free"),
+            RESULT_FALSE_PROP,
+            [self.prop_memsafety],
+        )
+        assert CATEGORY_UNKNOWN == get_result_category(
+            self.expected_result(False, "valid-memtrack"),
+            RESULT_FALSE_PROP,
+            [self.prop_memsafety],
         )
 
         test_prop = Property("dummy.prp", True, "test prop")
-        self.assertEqual(
-            CATEGORY_UNKNOWN,
-            get_result_category(
-                self.expected_result(False, "a"), RESULT_FALSE_PROP, [test_prop]
-            ),
+        assert CATEGORY_UNKNOWN == get_result_category(
+            self.expected_result(False, "a"), RESULT_FALSE_PROP, [test_prop]
         )
 
     def test_result_category_no_property(self):
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(self.expected_result(True), RESULT_TRUE_PROP, []),
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, []
         )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(self.expected_result(False), RESULT_TRUE_PROP, []),
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(False), RESULT_TRUE_PROP, []
         )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(self.expected_result(True), RESULT_TRUE_PROP, []),
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, []
         )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                self.expected_result(False, "valid-memtrack.c"), RESULT_TRUE_PROP, []
-            ),
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(False, "valid-memtrack.c"), RESULT_TRUE_PROP, []
         )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(self.expected_result(True), RESULT_TRUE_PROP, []),
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, []
         )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(self.expected_result(False), RESULT_TRUE_PROP, []),
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(False), RESULT_TRUE_PROP, []
         )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(self.expected_result(True), RESULT_TRUE_PROP, []),
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(True), RESULT_TRUE_PROP, []
         )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(self.expected_result(False), RESULT_TRUE_PROP, []),
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(False), RESULT_TRUE_PROP, []
         )
 
     def test_result_category_no_expected_result(self):
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                self.expected_result(None), RESULT_TRUE_PROP, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                self.expected_result(None), RESULT_FALSE_PROP, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                self.expected_result(None), RESULT_TRUE_PROP, [self.prop_memsafety]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                self.expected_result(None), RESULT_FALSE_FREE, [self.prop_memsafety]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                self.expected_result(None), RESULT_TRUE_PROP, [self.prop_termination]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                self.expected_result(None), RESULT_FALSE_PROP, [self.prop_termination]
-            ),
-        )
-
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category({}, RESULT_TRUE_PROP, [self.prop_call]),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category({}, RESULT_FALSE_PROP, [self.prop_call]),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category({}, RESULT_TRUE_PROP, [self.prop_memsafety]),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category({}, RESULT_FALSE_FREE, [self.prop_memsafety]),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category({}, RESULT_TRUE_PROP, [self.prop_termination]),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category({}, RESULT_FALSE_PROP, [self.prop_termination]),
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(None), RESULT_TRUE_PROP, [self.prop_call]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(None), RESULT_FALSE_PROP, [self.prop_call]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(None), RESULT_TRUE_PROP, [self.prop_memsafety]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(None), RESULT_FALSE_FREE, [self.prop_memsafety]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(None), RESULT_TRUE_PROP, [self.prop_termination]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(None), RESULT_FALSE_PROP, [self.prop_termination]
+        )
+
+        assert CATEGORY_MISSING == get_result_category(
+            {}, RESULT_TRUE_PROP, [self.prop_call]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            {}, RESULT_FALSE_PROP, [self.prop_call]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            {}, RESULT_TRUE_PROP, [self.prop_memsafety]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            {}, RESULT_FALSE_FREE, [self.prop_memsafety]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            {}, RESULT_TRUE_PROP, [self.prop_termination]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            {}, RESULT_FALSE_PROP, [self.prop_termination]
         )
 
     def test_result_category_different_property(self):
         def other_expected_result(result, subcategory=None):
             return {"different-file.prp": ExpectedResult(result, subcategory)}
 
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                other_expected_result(True), RESULT_TRUE_PROP, [self.prop_termination]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                other_expected_result(False), RESULT_TRUE_PROP, [self.prop_termination]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                other_expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                other_expected_result(False, "valid-memtrack"),
-                RESULT_TRUE_PROP,
-                [self.prop_call],
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                other_expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                other_expected_result(False), RESULT_TRUE_PROP, [self.prop_call]
-            ),
+        assert CATEGORY_MISSING == get_result_category(
+            other_expected_result(True), RESULT_TRUE_PROP, [self.prop_termination]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            other_expected_result(False), RESULT_TRUE_PROP, [self.prop_termination]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            other_expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            other_expected_result(False, "valid-memtrack"),
+            RESULT_TRUE_PROP,
+            [self.prop_call],
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            other_expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            other_expected_result(False), RESULT_TRUE_PROP, [self.prop_call]
         )
 
     def test_result_category_other(self):
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                self.expected_result(True), RESULT_DONE, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(self.expected_result(True), RESULT_DONE, []),
-        )
-        self.assertEqual(
-            CATEGORY_MISSING,
-            get_result_category(
-                self.expected_result(None), RESULT_DONE, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_UNKNOWN,
-            get_result_category(
-                self.expected_result(True), RESULT_UNKNOWN, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_UNKNOWN,
-            get_result_category(self.expected_result(True), RESULT_UNKNOWN, []),
-        )
-        self.assertEqual(
-            CATEGORY_UNKNOWN,
-            get_result_category(
-                self.expected_result(None), RESULT_UNKNOWN, [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_ERROR,
-            get_result_category(self.expected_result(True), "KILLED", [self.prop_call]),
-        )
-        self.assertEqual(
-            CATEGORY_ERROR,
-            get_result_category(
-                self.expected_result(True), "TIMEOUT", [self.prop_call]
-            ),
-        )
-        self.assertEqual(
-            CATEGORY_ERROR,
-            get_result_category(self.expected_result(True), "", [self.prop_call]),
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(True), RESULT_DONE, [self.prop_call]
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(True), RESULT_DONE, []
+        )
+        assert CATEGORY_MISSING == get_result_category(
+            self.expected_result(None), RESULT_DONE, [self.prop_call]
+        )
+        assert CATEGORY_UNKNOWN == get_result_category(
+            self.expected_result(True), RESULT_UNKNOWN, [self.prop_call]
+        )
+        assert CATEGORY_UNKNOWN == get_result_category(
+            self.expected_result(True), RESULT_UNKNOWN, []
+        )
+        assert CATEGORY_UNKNOWN == get_result_category(
+            self.expected_result(None), RESULT_UNKNOWN, [self.prop_call]
+        )
+        assert CATEGORY_ERROR == get_result_category(
+            self.expected_result(True), "KILLED", [self.prop_call]
+        )
+        assert CATEGORY_ERROR == get_result_category(
+            self.expected_result(True), "TIMEOUT", [self.prop_call]
+        )
+        assert CATEGORY_ERROR == get_result_category(
+            self.expected_result(True), "", [self.prop_call]
         )

From 9dd880e4153119a74f723ac4792cf3e7cc7a3f02 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Thu, 9 May 2024 18:20:09 +0000
Subject: [PATCH 05/52] Duplicated test_analyze_run_result.py

---
 benchexec/pytest_analyze_run_result.py | 221 +++++++++++++++++++++++++
 1 file changed, 221 insertions(+)
 create mode 100644 benchexec/pytest_analyze_run_result.py

diff --git a/benchexec/pytest_analyze_run_result.py b/benchexec/pytest_analyze_run_result.py
new file mode 100644
index 000000000..4c9461659
--- /dev/null
+++ b/benchexec/pytest_analyze_run_result.py
@@ -0,0 +1,221 @@
+# This file is part of BenchExec, a framework for reliable benchmarking:
+# https://github.com/sosy-lab/benchexec
+#
+# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+import sys
+import unittest
+import types
+
+from benchexec.util import ProcessExitCode
+from benchexec.model import Run
+from benchexec.result import (
+    RESULT_FALSE_REACH,
+    RESULT_ERROR,
+    RESULT_UNKNOWN,
+    RESULT_TRUE_PROP,
+)
+from benchexec.tools.template import BaseTool
+
+sys.dont_write_bytecode = True  # prevent creation of .pyc files
+
+normal_result = ProcessExitCode(raw=0, value=0, signal=None)
+
+
+class TestResult(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.longMessage = True
+        logging.disable(logging.CRITICAL)
+
+    def create_run(self, info_result=RESULT_UNKNOWN):
+        runSet = types.SimpleNamespace()
+        runSet.log_folder = "."
+        runSet.result_files_folder = "."
+        runSet.options = []
+        runSet.real_name = None
+        runSet.propertytag = None
+        runSet.benchmark = lambda: None
+        runSet.benchmark.base_dir = "."
+        runSet.benchmark.benchmark_file = "Test.xml"
+        runSet.benchmark.columns = []
+        runSet.benchmark.name = "Test"
+        runSet.benchmark.instance = "Test"
+        runSet.benchmark.rlimits = {}
+        runSet.benchmark.tool = BaseTool()
+
+        def determine_result(run):
+            return info_result
+
+        runSet.benchmark.tool.determine_result = determine_result
+
+        run = Run(
+            identifier="test.c",
+            sourcefiles=["test.c"],
+            task_options=None,
+            fileOptions=[],
+            runSet=runSet,
+        )
+        run._cmdline = ["dummy.bin", "test.c"]
+        return run
+
+    def test_simple(self):
+        run = self.create_run(info_result=RESULT_UNKNOWN)
+        self.assertEqual(RESULT_UNKNOWN, run._analyze_result(normal_result, "", None))
+
+        run = self.create_run(info_result=RESULT_TRUE_PROP)
+        self.assertEqual(RESULT_TRUE_PROP, run._analyze_result(normal_result, "", None))
+
+        run = self.create_run(info_result=RESULT_FALSE_REACH)
+        self.assertEqual(
+            RESULT_FALSE_REACH, run._analyze_result(normal_result, "", None)
+        )
+
+    def test_timeout(self):
+        run = self.create_run(info_result=RESULT_UNKNOWN)
+        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", "cputime"))
+        self.assertEqual(
+            "TIMEOUT", run._analyze_result(normal_result, "", "cputime-soft")
+        )
+        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", "walltime"))
+
+        run = self.create_run(info_result=RESULT_TRUE_PROP)
+        self.assertEqual(
+            f"TIMEOUT ({RESULT_TRUE_PROP})",
+            run._analyze_result(normal_result, "", "cputime"),
+        )
+
+        run = self.create_run(info_result=RESULT_FALSE_REACH)
+        self.assertEqual(
+            f"TIMEOUT ({RESULT_FALSE_REACH})",
+            run._analyze_result(normal_result, "", "cputime"),
+        )
+
+        run = self.create_run(info_result="SOME OTHER RESULT")
+        self.assertEqual(
+            "TIMEOUT (SOME OTHER RESULT)",
+            run._analyze_result(normal_result, "", "cputime"),
+        )
+
+        run = self.create_run(info_result=RESULT_ERROR)
+        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", "cputime"))
+
+        run = self.create_run(info_result=RESULT_ERROR)
+        run._is_timeout = lambda: True
+        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", None))
+
+    def test_out_of_memory(self):
+        run = self.create_run(info_result=RESULT_UNKNOWN)
+        self.assertEqual(
+            "OUT OF MEMORY", run._analyze_result(normal_result, "", "memory")
+        )
+
+        run = self.create_run(info_result=RESULT_TRUE_PROP)
+        self.assertEqual(
+            f"OUT OF MEMORY ({RESULT_TRUE_PROP})",
+            run._analyze_result(normal_result, "", "memory"),
+        )
+
+        run = self.create_run(info_result=RESULT_FALSE_REACH)
+        self.assertEqual(
+            f"OUT OF MEMORY ({RESULT_FALSE_REACH})",
+            run._analyze_result(normal_result, "", "memory"),
+        )
+
+        run = self.create_run(info_result="SOME OTHER RESULT")
+        self.assertEqual(
+            "OUT OF MEMORY (SOME OTHER RESULT)",
+            run._analyze_result(normal_result, "", "memory"),
+        )
+
+        run = self.create_run(info_result=RESULT_ERROR)
+        self.assertEqual(
+            "OUT OF MEMORY", run._analyze_result(normal_result, "", "memory")
+        )
+
+    def test_timeout_and_out_of_memory(self):
+        run = self.create_run(info_result=RESULT_UNKNOWN)
+        run._is_timeout = lambda: True
+        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", "memory"))
+
+        run = self.create_run(info_result=RESULT_TRUE_PROP)
+        run._is_timeout = lambda: True
+        self.assertEqual(
+            f"TIMEOUT ({RESULT_TRUE_PROP})",
+            run._analyze_result(normal_result, "", "memory"),
+        )
+
+        run = self.create_run(info_result=RESULT_FALSE_REACH)
+        run._is_timeout = lambda: True
+        self.assertEqual(
+            f"TIMEOUT ({RESULT_FALSE_REACH})",
+            run._analyze_result(normal_result, "", "memory"),
+        )
+
+        run = self.create_run(info_result="SOME OTHER RESULT")
+        run._is_timeout = lambda: True
+        self.assertEqual(
+            "TIMEOUT (SOME OTHER RESULT)",
+            run._analyze_result(normal_result, "", "memory"),
+        )
+
+        run = self.create_run(info_result=RESULT_ERROR)
+        run._is_timeout = lambda: True
+        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", "memory"))
+
+    def test_returnsignal(self):
+        def signal(sig):
+            """Encode a signal as it would be returned by os.wait"""
+            return ProcessExitCode(raw=sig, value=None, signal=sig)
+
+        run = self.create_run(info_result=RESULT_ERROR)
+        self.assertEqual("TIMEOUT", run._analyze_result(signal(9), "", "cputime"))
+
+        run = self.create_run(info_result=RESULT_ERROR)
+        self.assertEqual("OUT OF MEMORY", run._analyze_result(signal(9), "", "memory"))
+
+        run = self.create_run(info_result=RESULT_TRUE_PROP)
+        self.assertEqual(RESULT_TRUE_PROP, run._analyze_result(signal(9), "", None))
+
+        run = self.create_run(info_result=RESULT_FALSE_REACH)
+        self.assertEqual(RESULT_FALSE_REACH, run._analyze_result(signal(9), "", None))
+
+        run = self.create_run(info_result="SOME OTHER RESULT")
+        self.assertEqual("SOME OTHER RESULT", run._analyze_result(signal(9), "", None))
+
+        run = self.create_run(info_result=RESULT_UNKNOWN)
+        self.assertEqual("KILLED BY SIGNAL 9", run._analyze_result(signal(9), "", None))
+
+    def test_exitcode(self):
+        def returnvalue(value):
+            """Encode an exit of aprogram as it would be returned by os.wait"""
+            return ProcessExitCode(raw=value << 8, value=value, signal=None)
+
+        run = self.create_run(info_result=RESULT_UNKNOWN)
+        self.assertEqual("TIMEOUT", run._analyze_result(returnvalue(1), "", "cputime"))
+
+        run = self.create_run(info_result=RESULT_UNKNOWN)
+        self.assertEqual(
+            "OUT OF MEMORY", run._analyze_result(returnvalue(1), "", "memory")
+        )
+
+        run = self.create_run(info_result=RESULT_TRUE_PROP)
+        self.assertEqual(
+            RESULT_TRUE_PROP, run._analyze_result(returnvalue(1), "", None)
+        )
+
+        run = self.create_run(info_result=RESULT_FALSE_REACH)
+        self.assertEqual(
+            RESULT_FALSE_REACH, run._analyze_result(returnvalue(1), "", None)
+        )
+
+        run = self.create_run(info_result="SOME OTHER RESULT")
+        self.assertEqual(
+            "SOME OTHER RESULT", run._analyze_result(returnvalue(1), "", None)
+        )
+
+        run = self.create_run(info_result=RESULT_UNKNOWN)
+        self.assertEqual(RESULT_UNKNOWN, run._analyze_result(returnvalue(1), "", None))

From 569b6d829453f0b52caa5f70a0c1d4bfb982f948 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Thu, 9 May 2024 19:58:32 +0000
Subject: [PATCH 06/52] Transitioned test_analyze_run_result to pytest

---
 benchexec/pytest_analyze_run_result.py | 122 ++++++++++---------------
 1 file changed, 49 insertions(+), 73 deletions(-)

diff --git a/benchexec/pytest_analyze_run_result.py b/benchexec/pytest_analyze_run_result.py
index 4c9461659..16c6a18aa 100644
--- a/benchexec/pytest_analyze_run_result.py
+++ b/benchexec/pytest_analyze_run_result.py
@@ -7,7 +7,7 @@
 
 import logging
 import sys
-import unittest
+import pytest
 import types
 
 from benchexec.util import ProcessExitCode
@@ -25,12 +25,13 @@
 normal_result = ProcessExitCode(raw=0, value=0, signal=None)
 
 
-class TestResult(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        logging.disable(logging.CRITICAL)
+@pytest.fixture(scope="class")
+def disable_non_critical_logging():
+    logging.disable(logging.CRITICAL)
 
+
+@pytest.mark.usefixtures("disable_non_critical_logging")
+class TestResult:
     def create_run(self, info_result=RESULT_UNKNOWN):
         runSet = types.SimpleNamespace()
         runSet.log_folder = "."
@@ -64,107 +65,90 @@ def determine_result(run):
 
     def test_simple(self):
         run = self.create_run(info_result=RESULT_UNKNOWN)
-        self.assertEqual(RESULT_UNKNOWN, run._analyze_result(normal_result, "", None))
+        assert RESULT_UNKNOWN == run._analyze_result(normal_result, "", None)
 
         run = self.create_run(info_result=RESULT_TRUE_PROP)
-        self.assertEqual(RESULT_TRUE_PROP, run._analyze_result(normal_result, "", None))
+        assert RESULT_TRUE_PROP == run._analyze_result(normal_result, "", None)
 
         run = self.create_run(info_result=RESULT_FALSE_REACH)
-        self.assertEqual(
-            RESULT_FALSE_REACH, run._analyze_result(normal_result, "", None)
-        )
+        assert RESULT_FALSE_REACH == run._analyze_result(normal_result, "", None)
 
     def test_timeout(self):
         run = self.create_run(info_result=RESULT_UNKNOWN)
-        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", "cputime"))
-        self.assertEqual(
-            "TIMEOUT", run._analyze_result(normal_result, "", "cputime-soft")
-        )
-        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", "walltime"))
+        assert "TIMEOUT" == run._analyze_result(normal_result, "", "cputime")
+        assert "TIMEOUT" == run._analyze_result(normal_result, "", "cputime-soft")
+        assert "TIMEOUT" == run._analyze_result(normal_result, "", "walltime")
 
         run = self.create_run(info_result=RESULT_TRUE_PROP)
-        self.assertEqual(
-            f"TIMEOUT ({RESULT_TRUE_PROP})",
-            run._analyze_result(normal_result, "", "cputime"),
+        assert f"TIMEOUT ({RESULT_TRUE_PROP})" == run._analyze_result(
+            normal_result, "", "cputime"
         )
 
         run = self.create_run(info_result=RESULT_FALSE_REACH)
-        self.assertEqual(
-            f"TIMEOUT ({RESULT_FALSE_REACH})",
-            run._analyze_result(normal_result, "", "cputime"),
+        assert f"TIMEOUT ({RESULT_FALSE_REACH})" == run._analyze_result(
+            normal_result, "", "cputime"
         )
 
         run = self.create_run(info_result="SOME OTHER RESULT")
-        self.assertEqual(
-            "TIMEOUT (SOME OTHER RESULT)",
-            run._analyze_result(normal_result, "", "cputime"),
+        assert "TIMEOUT (SOME OTHER RESULT)" == run._analyze_result(
+            normal_result, "", "cputime"
         )
 
         run = self.create_run(info_result=RESULT_ERROR)
-        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", "cputime"))
+        assert "TIMEOUT" == run._analyze_result(normal_result, "", "cputime")
 
         run = self.create_run(info_result=RESULT_ERROR)
         run._is_timeout = lambda: True
-        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", None))
+        assert "TIMEOUT" == run._analyze_result(normal_result, "", None)
 
     def test_out_of_memory(self):
         run = self.create_run(info_result=RESULT_UNKNOWN)
-        self.assertEqual(
-            "OUT OF MEMORY", run._analyze_result(normal_result, "", "memory")
-        )
+        assert "OUT OF MEMORY" == run._analyze_result(normal_result, "", "memory")
 
         run = self.create_run(info_result=RESULT_TRUE_PROP)
-        self.assertEqual(
-            f"OUT OF MEMORY ({RESULT_TRUE_PROP})",
-            run._analyze_result(normal_result, "", "memory"),
+        assert f"OUT OF MEMORY ({RESULT_TRUE_PROP})" == run._analyze_result(
+            normal_result, "", "memory"
         )
 
         run = self.create_run(info_result=RESULT_FALSE_REACH)
-        self.assertEqual(
-            f"OUT OF MEMORY ({RESULT_FALSE_REACH})",
-            run._analyze_result(normal_result, "", "memory"),
+        assert f"OUT OF MEMORY ({RESULT_FALSE_REACH})" == run._analyze_result(
+            normal_result, "", "memory"
         )
 
         run = self.create_run(info_result="SOME OTHER RESULT")
-        self.assertEqual(
-            "OUT OF MEMORY (SOME OTHER RESULT)",
-            run._analyze_result(normal_result, "", "memory"),
+        assert "OUT OF MEMORY (SOME OTHER RESULT)" == run._analyze_result(
+            normal_result, "", "memory"
         )
 
         run = self.create_run(info_result=RESULT_ERROR)
-        self.assertEqual(
-            "OUT OF MEMORY", run._analyze_result(normal_result, "", "memory")
-        )
+        assert "OUT OF MEMORY" == run._analyze_result(normal_result, "", "memory")
 
     def test_timeout_and_out_of_memory(self):
         run = self.create_run(info_result=RESULT_UNKNOWN)
         run._is_timeout = lambda: True
-        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", "memory"))
+        assert "TIMEOUT" == run._analyze_result(normal_result, "", "memory")
 
         run = self.create_run(info_result=RESULT_TRUE_PROP)
         run._is_timeout = lambda: True
-        self.assertEqual(
-            f"TIMEOUT ({RESULT_TRUE_PROP})",
-            run._analyze_result(normal_result, "", "memory"),
+        assert f"TIMEOUT ({RESULT_TRUE_PROP})" == run._analyze_result(
+            normal_result, "", "memory"
         )
 
         run = self.create_run(info_result=RESULT_FALSE_REACH)
         run._is_timeout = lambda: True
-        self.assertEqual(
-            f"TIMEOUT ({RESULT_FALSE_REACH})",
-            run._analyze_result(normal_result, "", "memory"),
+        assert f"TIMEOUT ({RESULT_FALSE_REACH})" == run._analyze_result(
+            normal_result, "", "memory"
         )
 
         run = self.create_run(info_result="SOME OTHER RESULT")
         run._is_timeout = lambda: True
-        self.assertEqual(
-            "TIMEOUT (SOME OTHER RESULT)",
-            run._analyze_result(normal_result, "", "memory"),
+        assert "TIMEOUT (SOME OTHER RESULT)" == run._analyze_result(
+            normal_result, "", "memory"
         )
 
         run = self.create_run(info_result=RESULT_ERROR)
         run._is_timeout = lambda: True
-        self.assertEqual("TIMEOUT", run._analyze_result(normal_result, "", "memory"))
+        assert "TIMEOUT" == run._analyze_result(normal_result, "", "memory")
 
     def test_returnsignal(self):
         def signal(sig):
@@ -172,22 +156,22 @@ def signal(sig):
             return ProcessExitCode(raw=sig, value=None, signal=sig)
 
         run = self.create_run(info_result=RESULT_ERROR)
-        self.assertEqual("TIMEOUT", run._analyze_result(signal(9), "", "cputime"))
+        assert "TIMEOUT" == run._analyze_result(signal(9), "", "cputime")
 
         run = self.create_run(info_result=RESULT_ERROR)
-        self.assertEqual("OUT OF MEMORY", run._analyze_result(signal(9), "", "memory"))
+        assert "OUT OF MEMORY" == run._analyze_result(signal(9), "", "memory")
 
         run = self.create_run(info_result=RESULT_TRUE_PROP)
-        self.assertEqual(RESULT_TRUE_PROP, run._analyze_result(signal(9), "", None))
+        assert RESULT_TRUE_PROP == run._analyze_result(signal(9), "", None)
 
         run = self.create_run(info_result=RESULT_FALSE_REACH)
-        self.assertEqual(RESULT_FALSE_REACH, run._analyze_result(signal(9), "", None))
+        assert RESULT_FALSE_REACH == run._analyze_result(signal(9), "", None)
 
         run = self.create_run(info_result="SOME OTHER RESULT")
-        self.assertEqual("SOME OTHER RESULT", run._analyze_result(signal(9), "", None))
+        assert "SOME OTHER RESULT" == run._analyze_result(signal(9), "", None)
 
         run = self.create_run(info_result=RESULT_UNKNOWN)
-        self.assertEqual("KILLED BY SIGNAL 9", run._analyze_result(signal(9), "", None))
+        assert "KILLED BY SIGNAL 9" == run._analyze_result(signal(9), "", None)
 
     def test_exitcode(self):
         def returnvalue(value):
@@ -195,27 +179,19 @@ def returnvalue(value):
             return ProcessExitCode(raw=value << 8, value=value, signal=None)
 
         run = self.create_run(info_result=RESULT_UNKNOWN)
-        self.assertEqual("TIMEOUT", run._analyze_result(returnvalue(1), "", "cputime"))
+        assert "TIMEOUT" == run._analyze_result(returnvalue(1), "", "cputime")
 
         run = self.create_run(info_result=RESULT_UNKNOWN)
-        self.assertEqual(
-            "OUT OF MEMORY", run._analyze_result(returnvalue(1), "", "memory")
-        )
+        assert "OUT OF MEMORY" == run._analyze_result(returnvalue(1), "", "memory")
 
         run = self.create_run(info_result=RESULT_TRUE_PROP)
-        self.assertEqual(
-            RESULT_TRUE_PROP, run._analyze_result(returnvalue(1), "", None)
-        )
+        assert RESULT_TRUE_PROP == run._analyze_result(returnvalue(1), "", None)
 
         run = self.create_run(info_result=RESULT_FALSE_REACH)
-        self.assertEqual(
-            RESULT_FALSE_REACH, run._analyze_result(returnvalue(1), "", None)
-        )
+        assert RESULT_FALSE_REACH == run._analyze_result(returnvalue(1), "", None)
 
         run = self.create_run(info_result="SOME OTHER RESULT")
-        self.assertEqual(
-            "SOME OTHER RESULT", run._analyze_result(returnvalue(1), "", None)
-        )
+        assert "SOME OTHER RESULT" == run._analyze_result(returnvalue(1), "", None)
 
         run = self.create_run(info_result=RESULT_UNKNOWN)
-        self.assertEqual(RESULT_UNKNOWN, run._analyze_result(returnvalue(1), "", None))
+        assert RESULT_UNKNOWN == run._analyze_result(returnvalue(1), "", None)

From 469ac3cd11608c1e7ea596e4095cefd6c01ea197 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Thu, 9 May 2024 20:02:01 +0000
Subject: [PATCH 07/52] Duplicated pytest_benchmark_definition.py

---
 benchexec/pytest_benchmark_definition.py | 159 +++++++++++++++++++++++
 1 file changed, 159 insertions(+)
 create mode 100644 benchexec/pytest_benchmark_definition.py

diff --git a/benchexec/pytest_benchmark_definition.py b/benchexec/pytest_benchmark_definition.py
new file mode 100644
index 000000000..221de868e
--- /dev/null
+++ b/benchexec/pytest_benchmark_definition.py
@@ -0,0 +1,159 @@
+# This file is part of BenchExec, a framework for reliable benchmarking:
+# https://github.com/sosy-lab/benchexec
+#
+# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import collections
+import os
+import tempfile
+import unittest
+from unittest.mock import patch
+import yaml
+
+from benchexec.model import Benchmark
+import benchexec.result
+import benchexec.util as util
+
+here = os.path.dirname(__file__)
+base_dir = os.path.join(here, "..")
+test_dir = os.path.join(base_dir, "test", "tasks")
+
+DummyConfig = collections.namedtuple(
+    "DummyConfig",
+    [
+        "name",
+        "output_path",
+        "container",
+        "timelimit",
+        "walltimelimit",
+        "memorylimit",
+        "corelimit",
+        "num_of_threads",
+        "selected_run_definitions",
+        "selected_sourcefile_sets",
+        "description_file",
+    ],
+)(None, "test", False, None, None, None, None, None, None, None, None)
+
+ALL_TEST_TASKS = {
+    "false_other_sub_task.yml": "other_subproperty",
+    "false_sub_task.yml": "sub",
+    "false_sub2_task.yml": "sub2",
+    "false_task.yml": "expected_verdict: false",
+    "true_task.yml": "expected_verdict: true",
+    "unknown_task.yml": "",
+}
+
+
+def mock_expand_filename_pattern(pattern, base_dir):
+    if pattern == "*.yml":
+        return list(ALL_TEST_TASKS.keys()) + ["other_task.yml"]
+    return [pattern]
+
+
+def mock_load_task_def_file(f):
+    content = util.read_file(os.path.join(test_dir, f))
+    return yaml.safe_load(content)
+
+
+def mock_property_create(property_file):
+    assert property_file == "test.prp"
+    return benchexec.result.Property("test.prp", False, "test")
+
+
+class TestBenchmarkDefinition(unittest.TestCase):
+    """
+    Unit tests for reading benchmark definitions,
+    testing mostly the classes from benchexec.model.
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        cls.longMessage = True
+
+    @patch("benchexec.model.load_task_definition_file", new=mock_load_task_def_file)
+    @patch("benchexec.result.Property.create", new=mock_property_create)
+    @patch("benchexec.util.expand_filename_pattern", new=mock_expand_filename_pattern)
+    @patch("os.path.samefile", new=lambda a, b: a == b)
+    def parse_benchmark_definition(self, content):
+        with tempfile.NamedTemporaryFile(
+            prefix="BenchExec_test_benchmark_definition_", suffix=".xml", mode="w+"
+        ) as temp:
+            temp.write(content)
+            temp.flush()
+
+            # Because we mocked everything that accesses the file system,
+            # we can parse the benchmark definition although task files do not exist.
+            return Benchmark(temp.name, DummyConfig, util.read_local_time())
+
+    def check_task_filter(self, filter_attr, expected):
+        # The following three benchmark definitions are equivalent, we check each.
+        benchmark_definitions = [
+            """
+            <benchmark tool="dummy">
+              <propertyfile {}>test.prp</propertyfile>
+              <tasks><include>*.yml</include></tasks>
+              <rundefinition/>
+            </benchmark>
+            """,
+            """
+            <benchmark tool="dummy">
+              <tasks>
+                <propertyfile {}>test.prp</propertyfile>
+                <include>*.yml</include>
+              </tasks>
+              <rundefinition/>
+            </benchmark>
+            """,
+            """
+            <benchmark tool="dummy">
+              <tasks>
+                <include>*.yml</include>
+              </tasks>
+              <rundefinition>
+                <propertyfile {}>test.prp</propertyfile>
+              </rundefinition>
+            </benchmark>
+            """,
+        ]
+
+        for bench_def in benchmark_definitions:
+            benchmark = self.parse_benchmark_definition(bench_def.format(filter_attr))
+            run_ids = [run.identifier for run in benchmark.run_sets[0].runs]
+            self.assertListEqual(run_ids, sorted(expected))
+
+    def test_expected_verdict_no_filter(self):
+        self.check_task_filter("", ALL_TEST_TASKS.keys())
+
+    def test_expected_verdict_true_filter(self):
+        self.check_task_filter('expectedverdict="true"', ["true_task.yml"])
+
+    def test_expected_verdict_false_filter(self):
+        false_tasks = [f for f in ALL_TEST_TASKS.keys() if f.startswith("false")]
+        self.check_task_filter('expectedverdict="false"', false_tasks)
+
+    def test_expected_verdict_false_subproperty_filter(self):
+        self.check_task_filter('expectedverdict="false(sub)"', ["false_sub_task.yml"])
+
+    def test_expected_verdict_unknown_filter(self):
+        self.check_task_filter('expectedverdict="unknown"', ["unknown_task.yml"])
+
+    def test_expected_verdict_false_subproperties_filter(self):
+        benchmark_definition = """
+            <benchmark tool="dummy">
+              <tasks>
+                <propertyfile expectedverdict="false(sub)">test.prp</propertyfile>
+                <include>*.yml</include>
+              </tasks>
+              <tasks>
+                <propertyfile expectedverdict="false(sub2)">test.prp</propertyfile>
+                <include>*.yml</include>
+              </tasks>
+              <rundefinition/>
+            </benchmark>
+            """
+        benchmark = self.parse_benchmark_definition(benchmark_definition)
+        run_ids = [run.identifier for run in benchmark.run_sets[0].runs]
+        self.assertListEqual(run_ids, ["false_sub_task.yml", "false_sub2_task.yml"])

From 38daff118a2e947d113fb68e87179e476fb2d8e1 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Fri, 10 May 2024 05:59:18 +0000
Subject: [PATCH 08/52] Add dependency for pytest-mock

---
 setup.cfg | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.cfg b/setup.cfg
index 219c3fd86..7daa27de4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -62,6 +62,7 @@ zip_safe = True
 dev =
   nose >= 1.0
   pytest
+  pytest-mock
   lxml
 systemd =
   pystemd >= 0.7.0

From c70b08e2f7f4614784b5f3f52724b9edeaf02e44 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Fri, 10 May 2024 08:37:20 +0000
Subject: [PATCH 09/52] Transitioned test_benchmark_definition to pytest

---
 benchexec/pytest_benchmark_definition.py | 37 +++++++++++++++---------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/benchexec/pytest_benchmark_definition.py b/benchexec/pytest_benchmark_definition.py
index 221de868e..ae45b9ac6 100644
--- a/benchexec/pytest_benchmark_definition.py
+++ b/benchexec/pytest_benchmark_definition.py
@@ -8,8 +8,7 @@
 import collections
 import os
 import tempfile
-import unittest
-from unittest.mock import patch
+import pytest
 import yaml
 
 from benchexec.model import Benchmark
@@ -63,20 +62,25 @@ def mock_property_create(property_file):
     return benchexec.result.Property("test.prp", False, "test")
 
 
-class TestBenchmarkDefinition(unittest.TestCase):
+@pytest.fixture()
+def apply_mocks(mocker):
+    mocker.patch(
+        "benchexec.model.load_task_definition_file", new=mock_load_task_def_file
+    )
+    mocker.patch("benchexec.result.Property.create", new=mock_property_create)
+    mocker.patch(
+        "benchexec.util.expand_filename_pattern", new=mock_expand_filename_pattern
+    )
+    mocker.patch("os.path.samefile", new=lambda a, b: a == b)
+
+
+class TestBenchmarkDefinition:
     """
     Unit tests for reading benchmark definitions,
     testing mostly the classes from benchexec.model.
     """
 
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-
-    @patch("benchexec.model.load_task_definition_file", new=mock_load_task_def_file)
-    @patch("benchexec.result.Property.create", new=mock_property_create)
-    @patch("benchexec.util.expand_filename_pattern", new=mock_expand_filename_pattern)
-    @patch("os.path.samefile", new=lambda a, b: a == b)
+    @pytest.mark.usefixtures("apply_mocks")
     def parse_benchmark_definition(self, content):
         with tempfile.NamedTemporaryFile(
             prefix="BenchExec_test_benchmark_definition_", suffix=".xml", mode="w+"
@@ -88,6 +92,7 @@ def parse_benchmark_definition(self, content):
             # we can parse the benchmark definition although task files do not exist.
             return Benchmark(temp.name, DummyConfig, util.read_local_time())
 
+    @pytest.mark.usefixtures("apply_mocks")
     def check_task_filter(self, filter_attr, expected):
         # The following three benchmark definitions are equivalent, we check each.
         benchmark_definitions = [
@@ -122,24 +127,30 @@ def check_task_filter(self, filter_attr, expected):
         for bench_def in benchmark_definitions:
             benchmark = self.parse_benchmark_definition(bench_def.format(filter_attr))
             run_ids = [run.identifier for run in benchmark.run_sets[0].runs]
-            self.assertListEqual(run_ids, sorted(expected))
+            assert run_ids == sorted(expected)
 
+    @pytest.mark.usefixtures("apply_mocks")
     def test_expected_verdict_no_filter(self):
         self.check_task_filter("", ALL_TEST_TASKS.keys())
 
+    @pytest.mark.usefixtures("apply_mocks")
     def test_expected_verdict_true_filter(self):
         self.check_task_filter('expectedverdict="true"', ["true_task.yml"])
 
+    @pytest.mark.usefixtures("apply_mocks")
     def test_expected_verdict_false_filter(self):
         false_tasks = [f for f in ALL_TEST_TASKS.keys() if f.startswith("false")]
         self.check_task_filter('expectedverdict="false"', false_tasks)
 
+    @pytest.mark.usefixtures("apply_mocks")
     def test_expected_verdict_false_subproperty_filter(self):
         self.check_task_filter('expectedverdict="false(sub)"', ["false_sub_task.yml"])
 
+    @pytest.mark.usefixtures("apply_mocks")
     def test_expected_verdict_unknown_filter(self):
         self.check_task_filter('expectedverdict="unknown"', ["unknown_task.yml"])
 
+    @pytest.mark.usefixtures("apply_mocks")
     def test_expected_verdict_false_subproperties_filter(self):
         benchmark_definition = """
             <benchmark tool="dummy">
@@ -156,4 +167,4 @@ def test_expected_verdict_false_subproperties_filter(self):
             """
         benchmark = self.parse_benchmark_definition(benchmark_definition)
         run_ids = [run.identifier for run in benchmark.run_sets[0].runs]
-        self.assertListEqual(run_ids, ["false_sub_task.yml", "false_sub2_task.yml"])
+        assert run_ids == ["false_sub_task.yml", "false_sub2_task.yml"]

From 6ede8492658b125eada033833f7dfec6c33c29da Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Fri, 10 May 2024 08:40:02 +0000
Subject: [PATCH 10/52] Duplicated test_core_assignment.py

---
 benchexec/pytest_core_assignment.py | 701 ++++++++++++++++++++++++++++
 1 file changed, 701 insertions(+)
 create mode 100644 benchexec/pytest_core_assignment.py

diff --git a/benchexec/pytest_core_assignment.py b/benchexec/pytest_core_assignment.py
new file mode 100644
index 000000000..4e6d14adb
--- /dev/null
+++ b/benchexec/pytest_core_assignment.py
@@ -0,0 +1,701 @@
+# This file is part of BenchExec, a framework for reliable benchmarking:
+# https://github.com/sosy-lab/benchexec
+#
+# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import itertools
+import logging
+import sys
+import unittest
+import math
+
+from benchexec.resources import _get_cpu_cores_per_run0
+
+sys.dont_write_bytecode = True  # prevent creation of .pyc files
+
+
+def lrange(start, end):
+    return list(range(start, end))
+
+
+class TestCpuCoresPerRun(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.longMessage = True
+        logging.disable(logging.CRITICAL)
+
+    def assertValid(self, coreLimit, num_of_threads, expectedResult=None):
+        result = _get_cpu_cores_per_run0(
+            coreLimit, num_of_threads, self.use_ht, *self.machine()
+        )
+        if expectedResult:
+            self.assertEqual(
+                expectedResult,
+                result,
+                f"Incorrect result for {coreLimit} cores and {num_of_threads} threads.",
+            )
+
+    def assertInvalid(self, coreLimit, num_of_threads):
+        self.assertRaises(
+            SystemExit,
+            _get_cpu_cores_per_run0,
+            coreLimit,
+            num_of_threads,
+            self.use_ht,
+            *self.machine(),
+        )
+
+    def machine(self):
+        """Create the necessary parameters of _get_cpu_cores_per_run0 for a specific machine."""
+        core_count = self.cpus * self.cores
+        allCpus = range(core_count)
+        cores_of_package = {}
+        ht_spread = core_count // 2
+        for package in range(self.cpus):
+            start = package * self.cores // (2 if self.ht else 1)
+            end = (package + 1) * self.cores // (2 if self.ht else 1)
+            cores_of_package[package] = lrange(start, end)
+            if self.ht:
+                cores_of_package[package].extend(
+                    range(start + ht_spread, end + ht_spread)
+                )
+        siblings_of_core = {}
+        for core in allCpus:
+            siblings_of_core[core] = [core]
+        if self.ht:
+            for core in allCpus:
+                siblings_of_core[core].append((core + ht_spread) % core_count)
+                siblings_of_core[core].sort()
+        return allCpus, cores_of_package, siblings_of_core
+
+    def test_singleThread(self):
+        # test all possible coreLimits for a single thread
+        core_count = self.cpus * self.cores
+        if self.ht:
+            # Creates list alternating between real core and hyper-threading core
+            singleThread_assignment = list(
+                itertools.chain(
+                    *zip(range(core_count // 2), range(core_count // 2, core_count))
+                )
+            )
+        else:
+            singleThread_assignment = lrange(0, core_count)
+        if not self.use_ht and self.ht:
+            core_count = (self.cpus * self.cores) // 2
+            singleThread_assignment = lrange(0, core_count)
+
+        for coreLimit in range(1, core_count + 1):
+            self.assertValid(
+                coreLimit, 1, [sorted(singleThread_assignment[:coreLimit])]
+            )
+        self.assertInvalid(core_count + 1, 1)
+
+    # expected order in which cores are used for runs with coreLimit==1/2/3/4/8, used by the following tests
+    # these fields should be filled in by subclasses to activate the corresponding tests
+    # (same format as the expected return value by _get_cpu_cores_per_run)
+    oneCore_assignment = None
+    twoCore_assignment = None
+    threeCore_assignment = None
+    fourCore_assignment = None
+    eightCore_assignment = None
+    use_ht = True
+
+    def test_oneCorePerRun(self):
+        # test all possible numOfThread values for runs with one core
+        maxThreads = self.cpus * self.cores
+        if not self.use_ht and self.ht:
+            maxThreads = (self.cpus * self.cores) // 2
+        self.assertInvalid(1, maxThreads + 1)
+        if not self.oneCore_assignment:
+            self.skipTest("Need result specified")
+        for num_of_threads in range(1, maxThreads + 1):
+            self.assertValid(
+                1, num_of_threads, self.oneCore_assignment[:num_of_threads]
+            )
+
+    def test_twoCoresPerRun(self):
+        # test all possible numOfThread values for runs with two cores
+        maxThreads = self.cpus * (self.cores // 2)
+        if not self.use_ht and self.ht:
+            maxThreads = self.cpus * (self.cores // 4)
+            if maxThreads == 0:
+                # Test for runs that are split over cpus
+                cpus_per_run = int(math.ceil(2 / (self.cores // 2)))
+                maxThreads = self.cpus // cpus_per_run
+        self.assertInvalid(2, maxThreads + 1)
+        if not self.twoCore_assignment:
+            self.skipTest("Need result specified")
+        for num_of_threads in range(1, maxThreads + 1):
+            self.assertValid(
+                2, num_of_threads, self.twoCore_assignment[:num_of_threads]
+            )
+
+    def test_threeCoresPerRun(self):
+        # test all possible numOfThread values for runs with three cores
+        maxThreads = self.cpus * (self.cores // 3)
+        if not self.use_ht and self.ht:
+            maxThreads = self.cpus * (self.cores // 6)
+            if maxThreads == 0:
+                # Test for runs that are split over cpus
+                cpus_per_run = int(math.ceil(3 / (self.cores // 2)))
+                maxThreads = self.cpus // cpus_per_run
+
+        self.assertInvalid(3, maxThreads + 1)
+        if not self.threeCore_assignment:
+            self.skipTest("Need result specified")
+        for num_of_threads in range(1, maxThreads + 1):
+            self.assertValid(
+                3, num_of_threads, self.threeCore_assignment[:num_of_threads]
+            )
+
+    def test_fourCoresPerRun(self):
+        # test all possible numOfThread values for runs with four cores
+        maxThreads = self.cpus * (self.cores // 4)
+        if not self.use_ht and self.ht:
+            maxThreads = self.cpus * (self.cores // 8)
+            if maxThreads == 0:
+                # Test for runs that are split over cpus
+                cpus_per_run = int(math.ceil(4 / (self.cores // 2)))
+                maxThreads = self.cpus // cpus_per_run
+
+        self.assertInvalid(4, maxThreads + 1)
+        if not self.fourCore_assignment:
+            self.skipTest("Need result specified")
+        for num_of_threads in range(1, maxThreads + 1):
+            self.assertValid(
+                4, num_of_threads, self.fourCore_assignment[:num_of_threads]
+            )
+
+    def test_eightCoresPerRun(self):
+        # test all possible numOfThread values for runs with eight cores
+        maxThreads = self.cpus * (self.cores // 8)
+        if not self.use_ht and self.ht:
+            maxThreads = (self.cpus * self.cores) // 16
+            if maxThreads == 0:
+                # Test for runs that are split over cpus
+                cpus_per_run = int(math.ceil(8 / (self.cores // 2)))
+                maxThreads = self.cpus // cpus_per_run
+        if not maxThreads:
+            self.skipTest(
+                "Testing for runs that need to be split across CPUs is not implemented"
+            )
+        self.assertInvalid(8, maxThreads + 1)
+        if not self.eightCore_assignment:
+            self.skipTest("Need result specified")
+        for num_of_threads in range(1, maxThreads + 1):
+            self.assertValid(
+                8, num_of_threads, self.eightCore_assignment[:num_of_threads]
+            )
+
+
+class TestCpuCoresPerRun_singleCPU(TestCpuCoresPerRun):
+    cpus = 1
+    cores = 8
+    ht = False
+
+    oneCore_assignment = [[x] for x in range(8)]
+    twoCore_assignment = [[0, 1], [2, 3], [4, 5], [6, 7]]
+    threeCore_assignment = [[0, 1, 2], [3, 4, 5]]
+    fourCore_assignment = [[0, 1, 2, 3], [4, 5, 6, 7]]
+    eightCore_assignment = [list(range(8))]
+
+    def test_singleCPU_invalid(self):
+        self.assertInvalid(2, 5)
+        self.assertInvalid(5, 2)
+        self.assertInvalid(3, 3)
+
+
+class TestCpuCoresPerRun_singleCPU_HT(TestCpuCoresPerRun_singleCPU):
+    ht = True
+
+    twoCore_assignment = [[0, 4], [1, 5], [2, 6], [3, 7]]
+    threeCore_assignment = [[0, 1, 4], [2, 3, 6]]
+    fourCore_assignment = [[0, 1, 4, 5], [2, 3, 6, 7]]
+
+    def test_halfPhysicalCore(self):
+        # Cannot run if we have only half of one physical core
+        self.assertRaises(
+            SystemExit,
+            _get_cpu_cores_per_run0,
+            1,
+            1,
+            True,
+            [0],
+            {0: [0, 1]},
+            {0: [0, 1]},
+        )
+
+
+class TestCpuCoresPerRun_dualCPU_HT(TestCpuCoresPerRun):
+    cpus = 2
+    cores = 16
+    ht = True
+
+    oneCore_assignment = [
+        [x]
+        for x in [
+            0,
+            8,
+            1,
+            9,
+            2,
+            10,
+            3,
+            11,
+            4,
+            12,
+            5,
+            13,
+            6,
+            14,
+            7,
+            15,
+            16,
+            24,
+            17,
+            25,
+            18,
+            26,
+            19,
+            27,
+            20,
+            28,
+            21,
+            29,
+            22,
+            30,
+            23,
+            31,
+        ]
+    ]
+
+    twoCore_assignment = [
+        [0, 16],
+        [8, 24],
+        [1, 17],
+        [9, 25],
+        [2, 18],
+        [10, 26],
+        [3, 19],
+        [11, 27],
+        [4, 20],
+        [12, 28],
+        [5, 21],
+        [13, 29],
+        [6, 22],
+        [14, 30],
+        [7, 23],
+        [15, 31],
+    ]
+
+    # Note: the core assignment here is non-uniform, the last two threads are spread over three physical cores
+    # Currently, the assignment algorithm cannot do better for odd coreLimits,
+    # but this affects only cases where physical cores are split between runs, which is not recommended anyway.
+    threeCore_assignment = [
+        [0, 1, 16],
+        [8, 9, 24],
+        [2, 3, 18],
+        [10, 11, 26],
+        [4, 5, 20],
+        [12, 13, 28],
+        [6, 7, 22],
+        [14, 15, 30],
+        [17, 19, 21],
+        [25, 27, 29],
+    ]
+
+    fourCore_assignment = [
+        [0, 1, 16, 17],
+        [8, 9, 24, 25],
+        [2, 3, 18, 19],
+        [10, 11, 26, 27],
+        [4, 5, 20, 21],
+        [12, 13, 28, 29],
+        [6, 7, 22, 23],
+        [14, 15, 30, 31],
+    ]
+
+    eightCore_assignment = [
+        [0, 1, 2, 3, 16, 17, 18, 19],
+        [8, 9, 10, 11, 24, 25, 26, 27],
+        [4, 5, 6, 7, 20, 21, 22, 23],
+        [12, 13, 14, 15, 28, 29, 30, 31],
+    ]
+
+    def test_dualCPU_HT(self):
+        self.assertValid(
+            16, 2, [lrange(0, 8) + lrange(16, 24), lrange(8, 16) + lrange(24, 32)]
+        )
+
+    def test_dualCPU_HT_invalid(self):
+        self.assertInvalid(2, 17)
+        self.assertInvalid(17, 2)
+        self.assertInvalid(4, 9)
+        self.assertInvalid(9, 4)
+        self.assertInvalid(8, 5)
+        self.assertInvalid(5, 8)
+
+
+class TestCpuCoresPerRun_threeCPU(TestCpuCoresPerRun):
+    cpus = 3
+    cores = 5
+    ht = False
+
+    oneCore_assignment = [
+        [x] for x in [0, 5, 10, 1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14]
+    ]
+    twoCore_assignment = [[0, 1], [5, 6], [10, 11], [2, 3], [7, 8], [12, 13]]
+    threeCore_assignment = [[0, 1, 2], [5, 6, 7], [10, 11, 12]]
+    fourCore_assignment = [[0, 1, 2, 3], [5, 6, 7, 8], [10, 11, 12, 13]]
+
+    def test_threeCPU_invalid(self):
+        self.assertInvalid(6, 2)
+
+
+class TestCpuCoresPerRun_threeCPU_HT(TestCpuCoresPerRun):
+    cpus = 3
+    cores = 10
+    ht = True
+
+    oneCore_assignment = [
+        [x]
+        for x in [
+            0,
+            5,
+            10,
+            1,
+            6,
+            11,
+            2,
+            7,
+            12,
+            3,
+            8,
+            13,
+            4,
+            9,
+            14,
+            15,
+            20,
+            25,
+            16,
+            21,
+            26,
+            17,
+            22,
+            27,
+            18,
+            23,
+            28,
+            19,
+            24,
+            29,
+        ]
+    ]
+    twoCore_assignment = [
+        [0, 15],
+        [5, 20],
+        [10, 25],
+        [1, 16],
+        [6, 21],
+        [11, 26],
+        [2, 17],
+        [7, 22],
+        [12, 27],
+        [3, 18],
+        [8, 23],
+        [13, 28],
+        [4, 19],
+        [9, 24],
+        [14, 29],
+    ]
+    threeCore_assignment = [
+        [0, 1, 15],
+        [5, 6, 20],
+        [10, 11, 25],
+        [2, 3, 17],
+        [7, 8, 22],
+        [12, 13, 27],
+        [4, 16, 19],
+        [9, 21, 24],
+        [14, 26, 29],
+    ]
+    fourCore_assignment = [
+        [0, 1, 15, 16],
+        [5, 6, 20, 21],
+        [10, 11, 25, 26],
+        [2, 3, 17, 18],
+        [7, 8, 22, 23],
+        [12, 13, 27, 28],
+    ]
+    eightCore_assignment = [
+        [0, 1, 2, 3, 15, 16, 17, 18],
+        [5, 6, 7, 8, 20, 21, 22, 23],
+        [10, 11, 12, 13, 25, 26, 27, 28],
+    ]
+
+    def test_threeCPU_HT_invalid(self):
+        self.assertInvalid(11, 2)
+
+    def test_threeCPU_HT_noncontiguousId(self):
+        """3 CPUs with one core (plus HT) and non-contiguous core and package numbers.
+        This may happen on systems with administrative core restrictions,
+        because the ordering of core and package numbers is not always consistent."""
+        result = _get_cpu_cores_per_run0(
+            2,
+            3,
+            True,
+            [0, 1, 2, 3, 6, 7],
+            {0: [0, 1], 2: [2, 3], 3: [6, 7]},
+            {0: [0, 1], 1: [0, 1], 2: [2, 3], 3: [2, 3], 6: [6, 7], 7: [6, 7]},
+        )
+        self.assertEqual(
+            [[0, 1], [2, 3], [6, 7]],
+            result,
+            "Incorrect result for 2 cores and 3 threads.",
+        )
+
+
+class TestCpuCoresPerRun_quadCPU_HT(TestCpuCoresPerRun):
+    cpus = 4
+    cores = 16
+    ht = True
+
+    def test_quadCPU_HT_noncontiguousId(self):
+        """4 CPUs with 8 cores (plus HT) and non-contiguous core and package numbers.
+        This may happen on systems with administrative core restrictions,
+        because the ordering of core and package numbers is not always consistent.
+        Furthermore, sibling cores have numbers next to each other (occurs on AMD Opteron machines with shared L1/L2 caches)
+        and are not split as far as possible from each other (as it occurs on hyper-threading machines).
+        """
+        result = _get_cpu_cores_per_run0(
+            1,
+            8,
+            True,
+            [0, 1, 8, 9, 16, 17, 24, 25, 32, 33, 40, 41, 48, 49, 56, 57],
+            {
+                0: [0, 1, 8, 9],
+                1: [32, 33, 40, 41],
+                2: [48, 49, 56, 57],
+                3: [16, 17, 24, 25],
+            },
+            {
+                0: [0, 1],
+                1: [0, 1],
+                48: [48, 49],
+                33: [32, 33],
+                32: [32, 33],
+                40: [40, 41],
+                9: [8, 9],
+                16: [16, 17],
+                17: [16, 17],
+                56: [56, 57],
+                57: [56, 57],
+                8: [8, 9],
+                41: [40, 41],
+                24: [24, 25],
+                25: [24, 25],
+                49: [48, 49],
+            },
+        )
+        self.assertEqual(
+            [[0], [32], [48], [16], [8], [40], [56], [24]],
+            result,
+            "Incorrect result for 1 core and 8 threads.",
+        )
+
+    def test_quadCPU_HT(self):
+        self.assertValid(
+            16,
+            4,
+            [
+                lrange(0, 8) + lrange(32, 40),
+                lrange(8, 16) + lrange(40, 48),
+                lrange(16, 24) + lrange(48, 56),
+                lrange(24, 32) + lrange(56, 64),
+            ],
+        )
+
+        # Just test that no exception occurs
+        self.assertValid(1, 64)
+        self.assertValid(64, 1)
+        self.assertValid(2, 32)
+        self.assertValid(32, 2)
+        self.assertValid(3, 20)
+        self.assertValid(16, 3)
+        self.assertValid(4, 16)
+        self.assertValid(16, 4)
+        self.assertValid(5, 12)
+        self.assertValid(8, 8)
+
+    def test_quadCPU_HT_invalid(self):
+        self.assertInvalid(2, 33)
+        self.assertInvalid(33, 2)
+        self.assertInvalid(3, 21)
+        self.assertInvalid(17, 3)
+        self.assertInvalid(4, 17)
+        self.assertInvalid(17, 4)
+        self.assertInvalid(5, 13)
+        self.assertInvalid(9, 5)
+        self.assertInvalid(6, 9)
+        self.assertInvalid(9, 6)
+        self.assertInvalid(7, 9)
+        self.assertInvalid(9, 7)
+        self.assertInvalid(8, 9)
+        self.assertInvalid(9, 8)
+
+        self.assertInvalid(9, 5)
+        self.assertInvalid(6, 9)
+        self.assertInvalid(10, 5)
+        self.assertInvalid(6, 10)
+        self.assertInvalid(11, 5)
+        self.assertInvalid(6, 11)
+        self.assertInvalid(12, 5)
+        self.assertInvalid(6, 12)
+        self.assertInvalid(13, 5)
+        self.assertInvalid(5, 13)
+        self.assertInvalid(14, 5)
+        self.assertInvalid(5, 14)
+        self.assertInvalid(15, 5)
+        self.assertInvalid(5, 15)
+        self.assertInvalid(16, 5)
+        self.assertInvalid(5, 16)
+
+
+class TestCpuCoresPerRun_singleCPU_no_ht(TestCpuCoresPerRun):
+    cpus = 1
+    cores = 8
+    ht = True
+    use_ht = False
+
+    oneCore_assignment = [[x] for x in range(0, 4)]
+    twoCore_assignment = [[0, 1], [2, 3]]
+    threeCore_assignment = [[0, 1, 2]]
+    fourCore_assignment = [[0, 1, 2, 3]]
+
+    def test_singleCPU_no_ht_invalid(self):
+        self.assertInvalid(1, 5)
+        self.assertInvalid(2, 3)
+        self.assertInvalid(3, 2)
+        self.assertInvalid(4, 2)
+        self.assertInvalid(8, 1)
+
+
+class TestCpuCoresPerRun_dualCPU_no_ht(TestCpuCoresPerRun):
+    cpus = 2
+    cores = 8
+    ht = True
+    use_ht = False
+
+    oneCore_assignment = [[0], [4], [1], [5], [2], [6], [3], [7]]
+    twoCore_assignment = [[0, 1], [4, 5], [2, 3], [6, 7]]
+    threeCore_assignment = [[0, 1, 2], [4, 5, 6]]
+    fourCore_assignment = [[0, 1, 2, 3], [4, 5, 6, 7]]
+    eightCore_assignment = [[0, 1, 2, 3, 4, 5, 6, 7]]
+
+    def test_dualCPU_no_ht_invalid(self):
+        self.assertInvalid(1, 9)
+        self.assertInvalid(1, 10)
+        self.assertInvalid(2, 5)
+        self.assertInvalid(2, 6)
+        self.assertInvalid(3, 3)
+        self.assertInvalid(3, 4)
+        self.assertInvalid(4, 3)
+        self.assertInvalid(4, 4)
+        self.assertInvalid(8, 2)
+        self.assertInvalid(8, 3)
+
+    def test_dualCPU_noncontiguousID(self):
+        results = _get_cpu_cores_per_run0(
+            2,
+            3,
+            False,
+            [0, 4, 9, 15, 21, 19, 31, 12, 10, 11, 8, 23, 27, 14, 1, 20],
+            {0: [0, 4, 9, 12, 15, 19, 21, 31], 2: [10, 11, 8, 23, 27, 14, 1, 20]},
+            {
+                0: [0, 4],
+                4: [0, 4],
+                9: [9, 12],
+                12: [9, 12],
+                15: [15, 19],
+                19: [15, 19],
+                21: [21, 31],
+                31: [21, 31],
+                10: [10, 11],
+                11: [10, 11],
+                8: [8, 23],
+                23: [8, 23],
+                27: [27, 14],
+                14: [27, 14],
+                1: [1, 20],
+                20: [1, 20],
+            },
+        )
+        self.assertEqual(
+            results,
+            [[0, 9], [8, 10], [15, 21]],
+            "Incorrect result for 2 cores and 3 threads.",
+        )
+
+
+class TestCpuCoresPerRun_threeCPU_no_ht(TestCpuCoresPerRun):
+    cpus = 3
+    cores = 6
+    ht = True
+    use_ht = False
+
+    oneCore_assignment = [[x] for x in [0, 3, 6, 1, 4, 7, 2, 5, 8]]
+    twoCore_assignment = [[0, 1], [3, 4], [6, 7]]
+    threeCore_assignment = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
+    fourCore_assignment = [[0, 1, 2, 3]]
+    eightCore_assignment = [[0, 1, 2, 3, 4, 5, 6, 7]]
+
+    def test_threeCPU_no_ht_invalid(self):
+        self.assertInvalid(1, 10)
+        self.assertInvalid(2, 4)
+        self.assertInvalid(3, 4)
+        self.assertInvalid(4, 2)
+        self.assertInvalid(8, 2)
+
+
+class TestCpuCoresPerRun_quadCPU_no_ht(TestCpuCoresPerRun):
+    cpus = 4
+    cores = 8
+    ht = True
+    use_ht = False
+
+    oneCore_assignment = [
+        [x] for x in [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]
+    ]
+    twoCore_assignment = [
+        [0, 1],
+        [4, 5],
+        [8, 9],
+        [12, 13],
+        [2, 3],
+        [6, 7],
+        [10, 11],
+        [14, 15],
+    ]
+    threeCore_assignment = [[0, 1, 2], [4, 5, 6], [8, 9, 10], [12, 13, 14]]
+    fourCore_assignment = [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]
+    eightCore_assignment = [[0, 1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14, 15]]
+
+    def test_quadCPU_no_ht_invalid(self):
+        self.assertInvalid(1, 17)
+        self.assertInvalid(2, 9)
+        self.assertInvalid(3, 5)
+        self.assertInvalid(4, 5)
+        self.assertInvalid(8, 3)
+
+    def test_quadCPU_no_ht_valid(self):
+        self.assertValid(5, 2, [[0, 1, 2, 3, 4], [8, 9, 10, 11, 12]])
+        self.assertInvalid(5, 3)
+        self.assertValid(6, 2, [[0, 1, 2, 3, 4, 5], [8, 9, 10, 11, 12, 13]])
+        self.assertInvalid(6, 3)
+
+
+# prevent execution of base class as its own test
+del TestCpuCoresPerRun

From 715f77774b9e1a495cf8933f47b57f34ba8ba818 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Tue, 14 May 2024 22:19:41 +0000
Subject: [PATCH 11/52] Transitioned test_core_assignment to pytest

---
 benchexec/pytest_core_assignment.py | 93 +++++++++++++----------------
 1 file changed, 43 insertions(+), 50 deletions(-)

diff --git a/benchexec/pytest_core_assignment.py b/benchexec/pytest_core_assignment.py
index 4e6d14adb..9617f5f46 100644
--- a/benchexec/pytest_core_assignment.py
+++ b/benchexec/pytest_core_assignment.py
@@ -8,7 +8,7 @@
 import itertools
 import logging
 import sys
-import unittest
+import pytest
 import math
 
 from benchexec.resources import _get_cpu_cores_per_run0
@@ -20,32 +20,28 @@ def lrange(start, end):
     return list(range(start, end))
 
 
-class TestCpuCoresPerRun(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        logging.disable(logging.CRITICAL)
+@pytest.fixture(scope="class")
+def disable_non_critical_logging():
+    logging.disable(logging.CRITICAL)
+
+
+@pytest.mark.usefixtures("disable_non_critical_logging")
+class TestCpuCoresPerRun:
 
     def assertValid(self, coreLimit, num_of_threads, expectedResult=None):
         result = _get_cpu_cores_per_run0(
             coreLimit, num_of_threads, self.use_ht, *self.machine()
         )
         if expectedResult:
-            self.assertEqual(
-                expectedResult,
-                result,
-                f"Incorrect result for {coreLimit} cores and {num_of_threads} threads.",
-            )
+            assert (
+                expectedResult == result
+            ), f"Incorrect result for {coreLimit} cores and {num_of_threads} threads."
 
     def assertInvalid(self, coreLimit, num_of_threads):
-        self.assertRaises(
-            SystemExit,
-            _get_cpu_cores_per_run0,
-            coreLimit,
-            num_of_threads,
-            self.use_ht,
-            *self.machine(),
-        )
+        with pytest.raises(SystemExit):
+            _get_cpu_cores_per_run0(
+                coreLimit, num_of_threads, self.use_ht, *self.machine()
+            )
 
     def machine(self):
         """Create the necessary parameters of _get_cpu_cores_per_run0 for a specific machine."""
@@ -109,7 +105,7 @@ def test_oneCorePerRun(self):
             maxThreads = (self.cpus * self.cores) // 2
         self.assertInvalid(1, maxThreads + 1)
         if not self.oneCore_assignment:
-            self.skipTest("Need result specified")
+            pytest.skip("Need result specified")
         for num_of_threads in range(1, maxThreads + 1):
             self.assertValid(
                 1, num_of_threads, self.oneCore_assignment[:num_of_threads]
@@ -126,7 +122,7 @@ def test_twoCoresPerRun(self):
                 maxThreads = self.cpus // cpus_per_run
         self.assertInvalid(2, maxThreads + 1)
         if not self.twoCore_assignment:
-            self.skipTest("Need result specified")
+            pytest.skip("Need result specified")
         for num_of_threads in range(1, maxThreads + 1):
             self.assertValid(
                 2, num_of_threads, self.twoCore_assignment[:num_of_threads]
@@ -144,7 +140,7 @@ def test_threeCoresPerRun(self):
 
         self.assertInvalid(3, maxThreads + 1)
         if not self.threeCore_assignment:
-            self.skipTest("Need result specified")
+            pytest.skip("Need result specified")
         for num_of_threads in range(1, maxThreads + 1):
             self.assertValid(
                 3, num_of_threads, self.threeCore_assignment[:num_of_threads]
@@ -162,7 +158,7 @@ def test_fourCoresPerRun(self):
 
         self.assertInvalid(4, maxThreads + 1)
         if not self.fourCore_assignment:
-            self.skipTest("Need result specified")
+            pytest.skip("Need result specified")
         for num_of_threads in range(1, maxThreads + 1):
             self.assertValid(
                 4, num_of_threads, self.fourCore_assignment[:num_of_threads]
@@ -178,12 +174,12 @@ def test_eightCoresPerRun(self):
                 cpus_per_run = int(math.ceil(8 / (self.cores // 2)))
                 maxThreads = self.cpus // cpus_per_run
         if not maxThreads:
-            self.skipTest(
+            pytest.skip(
                 "Testing for runs that need to be split across CPUs is not implemented"
             )
         self.assertInvalid(8, maxThreads + 1)
         if not self.eightCore_assignment:
-            self.skipTest("Need result specified")
+            pytest.skip("Need result specified")
         for num_of_threads in range(1, maxThreads + 1):
             self.assertValid(
                 8, num_of_threads, self.eightCore_assignment[:num_of_threads]
@@ -216,16 +212,8 @@ class TestCpuCoresPerRun_singleCPU_HT(TestCpuCoresPerRun_singleCPU):
 
     def test_halfPhysicalCore(self):
         # Cannot run if we have only half of one physical core
-        self.assertRaises(
-            SystemExit,
-            _get_cpu_cores_per_run0,
-            1,
-            1,
-            True,
-            [0],
-            {0: [0, 1]},
-            {0: [0, 1]},
-        )
+        with pytest.raises(SystemExit):
+            _get_cpu_cores_per_run0(1, 1, True, [0], {0: [0, 1]}, {0: [0, 1]})
 
 
 class TestCpuCoresPerRun_dualCPU_HT(TestCpuCoresPerRun):
@@ -451,11 +439,11 @@ def test_threeCPU_HT_noncontiguousId(self):
             {0: [0, 1], 2: [2, 3], 3: [6, 7]},
             {0: [0, 1], 1: [0, 1], 2: [2, 3], 3: [2, 3], 6: [6, 7], 7: [6, 7]},
         )
-        self.assertEqual(
-            [[0, 1], [2, 3], [6, 7]],
-            result,
-            "Incorrect result for 2 cores and 3 threads.",
-        )
+        assert [
+            [0, 1],
+            [2, 3],
+            [6, 7],
+        ] == result, "Incorrect result for 2 cores and 3 threads."
 
 
 class TestCpuCoresPerRun_quadCPU_HT(TestCpuCoresPerRun):
@@ -500,11 +488,16 @@ def test_quadCPU_HT_noncontiguousId(self):
                 49: [48, 49],
             },
         )
-        self.assertEqual(
-            [[0], [32], [48], [16], [8], [40], [56], [24]],
-            result,
-            "Incorrect result for 1 core and 8 threads.",
-        )
+        assert [
+            [0],
+            [32],
+            [48],
+            [16],
+            [8],
+            [40],
+            [56],
+            [24],
+        ] == result, "Incorrect result for 1 core and 8 threads."
 
     def test_quadCPU_HT(self):
         self.assertValid(
@@ -633,11 +626,11 @@ def test_dualCPU_noncontiguousID(self):
                 20: [1, 20],
             },
         )
-        self.assertEqual(
-            results,
-            [[0, 9], [8, 10], [15, 21]],
-            "Incorrect result for 2 cores and 3 threads.",
-        )
+        assert results == [
+            [0, 9],
+            [8, 10],
+            [15, 21],
+        ], "Incorrect result for 2 cores and 3 threads."
 
 
 class TestCpuCoresPerRun_threeCPU_no_ht(TestCpuCoresPerRun):

From 68396cce193185f9d68c9e16d3fdefdc8db4d8ca Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Wed, 15 May 2024 07:32:22 +0000
Subject: [PATCH 12/52] Duplicated test_cgroups.py

---
 benchexec/pytest_cgroups.py | 68 +++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 benchexec/pytest_cgroups.py

diff --git a/benchexec/pytest_cgroups.py b/benchexec/pytest_cgroups.py
new file mode 100644
index 000000000..1fbf927eb
--- /dev/null
+++ b/benchexec/pytest_cgroups.py
@@ -0,0 +1,68 @@
+# This file is part of BenchExec, a framework for reliable benchmarking:
+# https://github.com/sosy-lab/benchexec
+#
+# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+import subprocess
+import sys
+import unittest
+
+from benchexec import check_cgroups
+
+sys.dont_write_bytecode = True  # prevent creation of .pyc files
+
+
+class TestCheckCgroups(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.longMessage = True
+        cls.maxDiff = None
+        logging.disable(logging.CRITICAL)
+
+    def execute_run_extern(self, *args, **kwargs):
+        try:
+            return subprocess.check_output(
+                args=["python3", "-m", "benchexec.check_cgroups"] + list(args),
+                stderr=subprocess.STDOUT,
+                universal_newlines=True,
+                **kwargs,
+            )
+        except subprocess.CalledProcessError as e:
+            if e.returncode != 1:  # 1 is expected if cgroups are not available
+                print(e.output)
+                raise e
+
+    def test_extern_command(self):
+        self.execute_run_extern()
+
+    def test_simple(self):
+        try:
+            check_cgroups.main(["--no-thread"])
+        except SystemExit as e:
+            # expected if cgroups are not available
+            self.skipTest(e)
+
+    def test_threaded(self):
+        try:
+            check_cgroups.main([])
+        except SystemExit as e:
+            # expected if cgroups are not available
+            self.skipTest(e)
+
+    def test_thread_result_is_returned(self):
+        """
+        Test that an error raised by check_cgroup_availability is correctly
+        re-raised in the main thread by replacing this function temporarily.
+        """
+        tmp = check_cgroups.check_cgroup_availability
+        try:
+            check_cgroups.check_cgroup_availability = lambda wait: exit(1)
+
+            with self.assertRaises(SystemExit):
+                check_cgroups.main([])
+
+        finally:
+            check_cgroups.check_cgroup_availability = tmp

From 22a79987fd79279463544a81800252b5262dc6cf Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Thu, 16 May 2024 17:03:49 +0000
Subject: [PATCH 13/52] Transitioned test_cgroups to pytest

---
 benchexec/pytest_cgroups.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/benchexec/pytest_cgroups.py b/benchexec/pytest_cgroups.py
index 1fbf927eb..b75b6f806 100644
--- a/benchexec/pytest_cgroups.py
+++ b/benchexec/pytest_cgroups.py
@@ -8,19 +8,20 @@
 import logging
 import subprocess
 import sys
-import unittest
+import pytest
 
 from benchexec import check_cgroups
 
 sys.dont_write_bytecode = True  # prevent creation of .pyc files
 
 
-class TestCheckCgroups(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-        logging.disable(logging.CRITICAL)
+@pytest.fixture(scope="class")
+def disable_non_critical_logging():
+    logging.disable(logging.CRITICAL)
+
+
+@pytest.mark.usefixtures("disable_non_critical_logging")
+class TestCheckCgroups:
 
     def execute_run_extern(self, *args, **kwargs):
         try:
@@ -43,14 +44,14 @@ def test_simple(self):
             check_cgroups.main(["--no-thread"])
         except SystemExit as e:
             # expected if cgroups are not available
-            self.skipTest(e)
+            pytest.skip(str(e))
 
     def test_threaded(self):
         try:
             check_cgroups.main([])
         except SystemExit as e:
             # expected if cgroups are not available
-            self.skipTest(e)
+            pytest.skip(str(e))
 
     def test_thread_result_is_returned(self):
         """
@@ -61,7 +62,7 @@ def test_thread_result_is_returned(self):
         try:
             check_cgroups.check_cgroup_availability = lambda wait: exit(1)
 
-            with self.assertRaises(SystemExit):
+            with pytest.raises(SystemExit):
                 check_cgroups.main([])
 
         finally:

From de9f2a1ebef5300b72084bcfc7aad176ae81ee9e Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Fri, 17 May 2024 11:40:58 +0000
Subject: [PATCH 14/52] Add pytest testsuite to Gitlab CI Saves the coverage of
 both testsuites, to allow us to compare them (and make sure the nose-based
 testsuite hasn't had a regression for some reason)

---
 .gitlab-ci.yml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index bfc92edd8..1ed2cfaa7 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -61,6 +61,34 @@ stages:
     paths:
       - coverage.xml
 
+.unit-tests-pytest: &unit-tests-pytest
+  stage: test
+  before_script:
+    # Create user, we do not want to test as root
+    - adduser --disabled-login --gecos "" $PRIMARY_USER
+    # Activate coverage for subprocesses
+    - printf 'import coverage\ncoverage.process_startup()\n' > "/usr/local/lib/python${PYTHON_VERSION}/site-packages/sitecustomize.py"
+    # Give $PRIMARY_USER permission to create cgroups
+    - test/for_each_of_my_cgroups.sh chgrp $PRIMARY_USER
+    - test/for_each_of_my_cgroups.sh chmod g+w $PRIMARY_USER
+    # Install BenchExec with `dev` dependencies
+    - sudo -u $PRIMARY_USER pip install --user ".[dev]"
+    # Start lxcfs
+    - lxcfs /var/lib/lxcfs &
+  script:
+    - sudo -u $PRIMARY_USER
+        COVERAGE_PROCESS_START=.coveragerc
+        coverage run -m pytest
+  after_script:
+    - sudo -u $PRIMARY_USER coverage combine
+    - sudo -u $PRIMARY_USER coverage report
+    - sudo -u $PRIMARY_USER coverage xml -o coverage-pytest.xml
+  tags:
+    - privileged
+  artifacts:
+    paths:
+      - coverage-pytest.xml
+
 unit-tests:python-3.7:
   <<: *unit-tests
   variables:

From e5f2f3ccbff75fbf0205cd77fcbf628779d42474 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Fri, 17 May 2024 13:38:19 +0000
Subject: [PATCH 15/52] ... and actually execute it with different Python
 versions

---
 .gitlab-ci.yml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 1ed2cfaa7..d7c8a969e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -114,6 +114,31 @@ unit-tests:python-3.11:
   variables:
     PYTHON_VERSION: '3.11'
 
+unit-tests-pytest:python-3.7:
+  <<: *unit-tests-pytest
+  variables:
+    PYTHON_VERSION: '3.7'
+
+unit-tests-pytest:python-3.8:
+  <<: *unit-tests-pytest
+  variables:
+    PYTHON_VERSION: '3.8'
+
+unit-tests-pytest:python-3.9:
+  <<: *unit-tests-pytest
+  variables:
+    PYTHON_VERSION: '3.9'
+
+unit-tests-pytest:python-3.10:
+  <<: *unit-tests-pytest
+  variables:
+    PYTHON_VERSION: '3.10'
+
+unit-tests-pytest:python-3.11:
+  <<: *unit-tests-pytest
+  variables:
+    PYTHON_VERSION: '3.11'
+
 # Static checks
 check-format:
   stage: test

From 890fa5d47203c605f19b12324d103083ee6eb5d9 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Tue, 21 May 2024 09:19:01 +0000
Subject: [PATCH 16/52] Duplicated test_pqos.py

---
 benchexec/pytest_pqos.py | 368 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 368 insertions(+)
 create mode 100644 benchexec/pytest_pqos.py

diff --git a/benchexec/pytest_pqos.py b/benchexec/pytest_pqos.py
new file mode 100644
index 000000000..eb00cbcaf
--- /dev/null
+++ b/benchexec/pytest_pqos.py
@@ -0,0 +1,368 @@
+# This file is part of BenchExec, a framework for reliable benchmarking:
+# https://github.com/sosy-lab/benchexec
+#
+# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+    Unit tests for pqos module
+"""
+import json
+import copy
+import logging
+import unittest
+from subprocess import CalledProcessError
+from unittest.mock import patch, MagicMock
+from benchexec.pqos import Pqos
+
+
+mock_pqos_wrapper_output = {
+    "load_pqos": {
+        "function_output": {},
+        "returncode": 0,
+        "function": "pqos_init",
+        "error": False,
+        "message": "MSR interface intialised",
+    },
+    "check_capability": {
+        "function_output": {"mem_size": 32, "cdp_on": 0, "num_classes": 4},
+        "returncode": 0,
+        "function": "get_capability_info",
+        "error": False,
+        "message": "Retrieved l3ca capability",
+    },
+    "allocate_resource": {
+        "function_output": {"cache_per_run": 4, "cores": {0: 0, 1: 0}},
+        "returncode": 0,
+        "function": "allocate_resource",
+        "error": False,
+        "message": "Allocated l3ca",
+    },
+    "monitor_events": {
+        "function_output": {
+            "monitoring_data": [
+                {
+                    "cores": [0, 1, 2],
+                    "ipc": 0.987,
+                    "llc_misses": 10240,
+                    "llc": {"avg": 25028, "max": 30000},
+                    "mbm_local": {"avg": 25028, "max": 30000},
+                }
+            ]
+        },
+        "returncode": 0,
+        "function": "monitor_events",
+        "error": False,
+        "message": "Event monitoring successfull",
+    },
+    "reset_monitoring": {
+        "returncode": 0,
+        "function": "reset_monitoring",
+        "error": False,
+        "message": "Reset monitoring successfull",
+    },
+    "reset_resources": {
+        "returncode": 0,
+        "function": "reset_resources",
+        "error": False,
+        "message": "Resource reset successfull",
+    },
+}
+
+mock_pqos_wrapper_error = {
+    "function": "mock_function",
+    "message": "error in pqos_wrapper function",
+    "returncode": 1,
+    "error": True,
+    "function_output": {},
+}
+
+
+def mock_check_output(args_list, **kwargs):
+    """
+    mock for subprocess.check_output function, this function returns a dummy
+    pqos_wrapper CLI output.
+    """
+    return json.dumps(mock_pqos_wrapper_output)
+
+
+def mock_check_output_error(args_list, **kwargs):
+    """
+    mock for subprocess.check_output, returns a dummy error output of pqos_wrapper
+    """
+    raise CalledProcessError(1, "cmd", json.dumps(mock_pqos_wrapper_error))
+
+
+def mock_check_output_capability_error(args_list, **kwargs):
+    """
+    mock for subprocess.check_output, returns a success pqos_wrapper output
+    if get_capability function is called otherwise returns a dummy error output
+    """
+    if "-c" in args_list:
+        return mock_check_output(args_list, **kwargs)
+    mock_check_output_error(args_list, **kwargs)  # noqa: R503 always raises
+
+
+class MockPopen:
+    """
+    A Mock class for subprocess.Popen
+    """
+
+    def __init__(self, args_list, universal_newlines=None, **kwargs):
+        assert universal_newlines  # required for this mock
+        self.args_list = args_list
+        self.returncode = 0
+
+    def send_signal(self, signal):
+        """
+        mock Popen.send_signal function
+        """
+        return 0
+
+    def kill(self):
+        """
+        mock Popen.kill function
+        """
+        return 0
+
+    def communicate(self):
+        """
+        mock Popen.communicate function
+        """
+        if self.returncode == 0:
+            return (mock_check_output(self.args_list), None)
+        return (None, json.dumps(mock_pqos_wrapper_error))
+
+
+def mock_popen(args_list, **kwargs):
+    """
+    A mock function to create a MockPopen object with given arguments
+    """
+    return MockPopen(args_list, **kwargs)
+
+
+class TestPqos(unittest.TestCase):
+    """
+    Unit tests for pqos module
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        logging.disable(logging.CRITICAL)
+
+    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
+    def test_pqos_init(self, mock_find_executable):
+        """
+        Test for initialisation of pqos module
+        """
+        pqos = Pqos()
+        self.assertIsInstance(pqos, Pqos)
+        self.assertIsNotNone(pqos.executable_path, True)
+
+    @patch("benchexec.pqos.find_executable2", return_value=None)
+    def test_pqos_init_error(self, mock_find_executable):
+        """
+        Test for initialisation of pqos module when pqos_wrapper CLI is not present
+        in the system.
+        """
+        pqos = Pqos()
+        self.assertIsInstance(pqos, Pqos)
+        self.assertIsNone(pqos.executable_path, False)
+
+    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
+    @patch("benchexec.pqos.check_output", side_effect=mock_check_output)
+    @patch("benchexec.pqos.Popen", side_effect=mock_popen)
+    def test_pqos_execute_command(
+        self, mock_find_executable, mock_check_output, mock_popen
+    ):
+        """
+        Test for Pqos.execute_command function
+        """
+        pqos = Pqos()
+        ret = pqos.execute_command("mon", "reset_monitoring", True, "-rm")
+        self.assertEqual(ret, True)
+        ret = pqos.execute_command("l3ca", "check_capability", False, "-c", "l3ca")
+        self.assertEqual(ret, True)
+        ret = pqos.execute_command(
+            "l3ca", "allocate_resource", False, "-a", "l3ca", "[[0,1],[2,3]]"
+        )
+        self.assertEqual(ret, True)
+        ret = pqos.execute_command("l3ca", "reset_resources", True, "-r")
+        self.assertEqual(ret, True)
+        ret = pqos.execute_command(
+            "mon", "monitor_events", False, "-m", "[[0,1],[2,3]]"
+        )
+        self.assertEqual(ret, True)
+
+    @patch("benchexec.pqos.find_executable2", return_value=None)
+    def test_pqos_execute_command_cli_non_existent(self, mock_find_executable):
+        """
+        Test for Pqos.execute_command function when pqos_wrapper CLI is not present.
+        """
+        pqos = Pqos()
+        ret = pqos.execute_command("mon", "reset_monitoring", True, "-rm")
+        self.assertEqual(ret, False)
+        ret = pqos.execute_command("l3ca", "check_capability", False, "-c", "l3ca")
+        self.assertEqual(ret, False)
+        ret = pqos.execute_command(
+            "l3ca", "allocate_resource", False, "-a", "l3ca", "[[0,1],[2,3]]"
+        )
+        self.assertEqual(ret, False)
+        ret = pqos.execute_command("l3ca", "reset_resources", True, "-r")
+        self.assertEqual(ret, False)
+        ret = pqos.execute_command(
+            "mon", "monitor_events", False, "-m", "[[0,1],[2,3]]"
+        )
+        self.assertEqual(ret, False)
+
+    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
+    @patch("benchexec.pqos.check_output", side_effect=mock_check_output_error)
+    def test_pqos_execute_command_cli_error(
+        self, mock_find_executable, mock_check_output
+    ):
+        """
+        Test for Pqos.execute_command function when pqos_wrapper throws an error
+        """
+        pqos = Pqos()
+        ret = pqos.execute_command("mon", "reset_monitoring", True, "-rm")
+        self.assertEqual(ret, False)
+        ret = pqos.execute_command("l3ca", "check_capability", False, "-c", "l3ca")
+        self.assertEqual(ret, False)
+        ret = pqos.execute_command(
+            "l3ca", "allocate_resource", False, "-a", "l3ca", "[[0,1],[2,3]]"
+        )
+        self.assertEqual(ret, False)
+        ret = pqos.execute_command("l3ca", "reset_resources", True, "-r")
+        self.assertEqual(ret, False)
+
+    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
+    @patch("benchexec.pqos.check_output", side_effect=mock_check_output)
+    def test_pqos_allocate_l3ca(self, mock_find_executable, mock_check_output):
+        """
+        Test for pqos.allocate_l3ca
+        """
+        pqos = Pqos()
+        pqos.allocate_l3ca([[0, 1], [2, 3]])
+        self.assertEqual(pqos.reset_required, True)
+
+    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
+    @patch(
+        "benchexec.pqos.check_output", side_effect=mock_check_output_capability_error
+    )
+    def test_pqos_allocate_l3ca_error(self, mock_find_executable, mock_check_output):
+        """
+        Test for pqos.allocate_l3ca when pqos_wrapper throws an error
+        """
+        pqos = Pqos()
+        pqos.reset_resources = MagicMock(return_value=0)
+        pqos.allocate_l3ca([[0, 1], [2, 3]])
+        self.assertEqual(pqos.reset_required, False)
+        pqos.reset_resources.assert_called_once_with()
+
+    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
+    @patch("benchexec.pqos.check_output", side_effect=mock_check_output)
+    @patch("benchexec.pqos.Popen", side_effect=mock_popen)
+    def test_pqos_stop_monitoring(
+        self, mock_find_executable, mock_check_output, mock_popen
+    ):
+        """
+        Test for pqos.stop_monitoring
+        """
+        flatten_mon_data = {
+            "ipc": 0.987,
+            "llc_misses": 10240,
+            "llc_avg": 25028,
+            "llc_max": 30000,
+            "mbm_local_avg": 25028,
+            "mbm_local_max": 30000,
+        }
+        pqos = Pqos()
+        pqos.start_monitoring([[0, 1, 2]])
+        ret = pqos.stop_monitoring()
+        self.assertDictEqual(ret, flatten_mon_data)
+        self.assertEqual(pqos.mon_process, None)
+
+    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
+    @patch("benchexec.pqos.Popen", side_effect=mock_popen)
+    def test_pqos_stop_monitoring_not_started(self, mock_find_executable, mock_popen):
+        """
+        Test for pqos.stop_monitoring, when monitoring is not started before
+        """
+        pqos = Pqos()
+        ret = pqos.stop_monitoring()
+        self.assertDictEqual(ret, {})
+        self.assertEqual(pqos.mon_process, None)
+
+    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
+    @patch("benchexec.pqos.check_output", side_effect=mock_check_output)
+    @patch("benchexec.pqos.Popen", side_effect=mock_popen)
+    def test_pqos_stop_monitoring_error(
+        self, mock_find_executable, mock_check_output, mock_popen
+    ):
+        """
+        Test for pqos.stop_monitoring, when pqos_wrapper throws an error
+        """
+        pqos = Pqos()
+        pqos.start_monitoring([[0, 1, 2]])
+        pqos.mon_process.returncode = 1
+        ret = pqos.stop_monitoring()
+        self.assertDictEqual(ret, {})
+        self.assertEqual(pqos.mon_process, None)
+
+    def test_pqos_flatten_mon_data(self):
+        """
+        Test for Pqos.flatten_mon_data when single monitoring data is received
+        """
+        flatten_mon_data = {
+            "ipc": 0.987,
+            "llc_misses": 10240,
+            "llc_avg": 25028,
+            "llc_max": 30000,
+            "mbm_local_avg": 25028,
+            "mbm_local_max": 30000,
+        }
+        mon_data = copy.deepcopy(
+            mock_pqos_wrapper_output["monitor_events"]["function_output"][
+                "monitoring_data"
+            ]
+        )
+        ret = Pqos.flatten_mon_data(mon_data)
+        self.assertDictEqual(ret, flatten_mon_data)
+
+    def test_pqos_flatten_mon_data_multiple(self):
+        """
+        Test for Pqos.flatten_mon_data when multiple monitoring data are received
+        """
+        flatten_mon_data_multiple = {
+            "ipc_cpus0,1,2": 0.987,
+            "llc_misses_cpus0,1,2": 10240,
+            "llc_avg_cpus0,1,2": 25028,
+            "llc_max_cpus0,1,2": 30000,
+            "mbm_local_avg_cpus0,1,2": 25028,
+            "mbm_local_max_cpus0,1,2": 30000,
+            "ipc_cpus3,4,5": 0.987,
+            "llc_misses_cpus3,4,5": 10240,
+            "llc_avg_cpus3,4,5": 25028,
+            "llc_max_cpus3,4,5": 30000,
+            "mbm_local_avg_cpus3,4,5": 25028,
+            "mbm_local_max_cpus3,4,5": 30000,
+        }
+        mon_data = copy.deepcopy(
+            mock_pqos_wrapper_output["monitor_events"]["function_output"][
+                "monitoring_data"
+            ]
+        )
+        first_core_set = copy.deepcopy(mon_data[0])
+        second_core_set = copy.deepcopy(mon_data[0])
+        second_core_set["cores"] = [3, 4, 5]
+        mon_data_multiple = [first_core_set, second_core_set]
+        ret = Pqos.flatten_mon_data(mon_data_multiple)
+        self.assertDictEqual(ret, flatten_mon_data_multiple)
+
+    def test_pqos_convert_core_list(self):
+        """
+        Test for pqos.convert_core_list function
+        """
+        ret = Pqos.convert_core_list([[0, 1], [2, 3]])
+        self.assertEqual(ret, "[[0,1],[2,3]]")

From 34abf3b315462b061e70b4d2ce25f8e580835b9e Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Tue, 21 May 2024 13:20:19 +0000
Subject: [PATCH 17/52] Transitioned test_pqos to pytest

---
 benchexec/pytest_pqos.py | 165 +++++++++++++++++++++------------------
 1 file changed, 88 insertions(+), 77 deletions(-)

diff --git a/benchexec/pytest_pqos.py b/benchexec/pytest_pqos.py
index eb00cbcaf..63e8e4a9e 100644
--- a/benchexec/pytest_pqos.py
+++ b/benchexec/pytest_pqos.py
@@ -11,9 +11,8 @@
 import json
 import copy
 import logging
-import unittest
+import pytest
 from subprocess import CalledProcessError
-from unittest.mock import patch, MagicMock
 from benchexec.pqos import Pqos
 
 
@@ -142,133 +141,143 @@ def mock_popen(args_list, **kwargs):
     return MockPopen(args_list, **kwargs)
 
 
-class TestPqos(unittest.TestCase):
+@pytest.fixture(scope="class")
+def disable_non_critical_logging():
+    logging.disable(logging.CRITICAL)
+
+
+@pytest.mark.usefixtures("disable_non_critical_logging")
+class TestPqos:
     """
     Unit tests for pqos module
     """
 
-    @classmethod
-    def setUpClass(cls):
-        logging.disable(logging.CRITICAL)
-
-    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
-    def test_pqos_init(self, mock_find_executable):
+    def test_pqos_init(self, mocker):
         """
         Test for initialisation of pqos module
         """
+        mocker.patch(
+            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
+        )
         pqos = Pqos()
-        self.assertIsInstance(pqos, Pqos)
-        self.assertIsNotNone(pqos.executable_path, True)
+        assert isinstance(pqos, Pqos)
+        assert pqos.executable_path is not None
 
-    @patch("benchexec.pqos.find_executable2", return_value=None)
-    def test_pqos_init_error(self, mock_find_executable):
+    def test_pqos_init_error(self, mocker):
         """
         Test for initialisation of pqos module when pqos_wrapper CLI is not present
         in the system.
         """
+        mocker.patch("benchexec.pqos.find_executable2", return_value=None)
         pqos = Pqos()
-        self.assertIsInstance(pqos, Pqos)
-        self.assertIsNone(pqos.executable_path, False)
+        assert isinstance(pqos, Pqos)
+        assert pqos.executable_path is None
 
-    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
-    @patch("benchexec.pqos.check_output", side_effect=mock_check_output)
-    @patch("benchexec.pqos.Popen", side_effect=mock_popen)
-    def test_pqos_execute_command(
-        self, mock_find_executable, mock_check_output, mock_popen
-    ):
+    def test_pqos_execute_command(self, mocker):
         """
         Test for Pqos.execute_command function
         """
+        mocker.patch(
+            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
+        )
+        mocker.patch("benchexec.pqos.check_output", side_effect=mock_check_output)
+        mocker.patch("benchexec.pqos.Popen", side_effect=mock_popen)
         pqos = Pqos()
         ret = pqos.execute_command("mon", "reset_monitoring", True, "-rm")
-        self.assertEqual(ret, True)
+        assert ret == True
         ret = pqos.execute_command("l3ca", "check_capability", False, "-c", "l3ca")
-        self.assertEqual(ret, True)
+        assert ret == True
         ret = pqos.execute_command(
             "l3ca", "allocate_resource", False, "-a", "l3ca", "[[0,1],[2,3]]"
         )
-        self.assertEqual(ret, True)
+        assert ret == True
         ret = pqos.execute_command("l3ca", "reset_resources", True, "-r")
-        self.assertEqual(ret, True)
+        assert ret == True
         ret = pqos.execute_command(
             "mon", "monitor_events", False, "-m", "[[0,1],[2,3]]"
         )
-        self.assertEqual(ret, True)
+        assert ret == True
 
-    @patch("benchexec.pqos.find_executable2", return_value=None)
-    def test_pqos_execute_command_cli_non_existent(self, mock_find_executable):
+    def test_pqos_execute_command_cli_non_existent(self, mocker):
         """
         Test for Pqos.execute_command function when pqos_wrapper CLI is not present.
         """
+        mocker.patch("benchexec.pqos.find_executable2", return_value=None)
         pqos = Pqos()
         ret = pqos.execute_command("mon", "reset_monitoring", True, "-rm")
-        self.assertEqual(ret, False)
+        assert ret == False
         ret = pqos.execute_command("l3ca", "check_capability", False, "-c", "l3ca")
-        self.assertEqual(ret, False)
+        assert ret == False
         ret = pqos.execute_command(
             "l3ca", "allocate_resource", False, "-a", "l3ca", "[[0,1],[2,3]]"
         )
-        self.assertEqual(ret, False)
+        assert ret == False
         ret = pqos.execute_command("l3ca", "reset_resources", True, "-r")
-        self.assertEqual(ret, False)
+        assert ret == False
         ret = pqos.execute_command(
             "mon", "monitor_events", False, "-m", "[[0,1],[2,3]]"
         )
-        self.assertEqual(ret, False)
+        assert ret == False
 
-    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
-    @patch("benchexec.pqos.check_output", side_effect=mock_check_output_error)
-    def test_pqos_execute_command_cli_error(
-        self, mock_find_executable, mock_check_output
-    ):
+    def test_pqos_execute_command_cli_error(self, mocker):
         """
         Test for Pqos.execute_command function when pqos_wrapper throws an error
         """
+        mocker.patch(
+            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
+        )
+        mocker.patch("benchexec.pqos.check_output", side_effect=mock_check_output_error)
         pqos = Pqos()
         ret = pqos.execute_command("mon", "reset_monitoring", True, "-rm")
-        self.assertEqual(ret, False)
+        assert ret == False
         ret = pqos.execute_command("l3ca", "check_capability", False, "-c", "l3ca")
-        self.assertEqual(ret, False)
+        assert ret == False
         ret = pqos.execute_command(
             "l3ca", "allocate_resource", False, "-a", "l3ca", "[[0,1],[2,3]]"
         )
-        self.assertEqual(ret, False)
+        assert ret == False
         ret = pqos.execute_command("l3ca", "reset_resources", True, "-r")
-        self.assertEqual(ret, False)
+        assert ret == False
 
-    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
-    @patch("benchexec.pqos.check_output", side_effect=mock_check_output)
-    def test_pqos_allocate_l3ca(self, mock_find_executable, mock_check_output):
+    def test_pqos_allocate_l3ca(self, mocker):
         """
         Test for pqos.allocate_l3ca
         """
+
+        mocker.patch(
+            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
+        )
+        mocker.patch("benchexec.pqos.check_output", side_effect=mock_check_output)
         pqos = Pqos()
         pqos.allocate_l3ca([[0, 1], [2, 3]])
-        self.assertEqual(pqos.reset_required, True)
+        assert pqos.reset_required == True
 
-    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
-    @patch(
-        "benchexec.pqos.check_output", side_effect=mock_check_output_capability_error
-    )
-    def test_pqos_allocate_l3ca_error(self, mock_find_executable, mock_check_output):
+    def test_pqos_allocate_l3ca_error(self, mocker):
         """
         Test for pqos.allocate_l3ca when pqos_wrapper throws an error
         """
+        mocker.patch(
+            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
+        )
+        mocker.patch(
+            "benchexec.pqos.check_output",
+            side_effect=mock_check_output_capability_error,
+        )
         pqos = Pqos()
-        pqos.reset_resources = MagicMock(return_value=0)
+        pqos.reset_resources = mocker.MagicMock(return_value=0)
         pqos.allocate_l3ca([[0, 1], [2, 3]])
-        self.assertEqual(pqos.reset_required, False)
+        assert pqos.reset_required == False
         pqos.reset_resources.assert_called_once_with()
 
-    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
-    @patch("benchexec.pqos.check_output", side_effect=mock_check_output)
-    @patch("benchexec.pqos.Popen", side_effect=mock_popen)
-    def test_pqos_stop_monitoring(
-        self, mock_find_executable, mock_check_output, mock_popen
-    ):
+    def test_pqos_stop_monitoring(self, mocker):
         """
         Test for pqos.stop_monitoring
         """
+        mocker.patch(
+            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
+        )
+        mocker.patch("benchexec.pqos.check_output", side_effect=mock_check_output)
+        mocker.patch("benchexec.pqos.Popen", side_effect=mock_popen)
         flatten_mon_data = {
             "ipc": 0.987,
             "llc_misses": 10240,
@@ -280,35 +289,37 @@ def test_pqos_stop_monitoring(
         pqos = Pqos()
         pqos.start_monitoring([[0, 1, 2]])
         ret = pqos.stop_monitoring()
-        self.assertDictEqual(ret, flatten_mon_data)
-        self.assertEqual(pqos.mon_process, None)
+        assert ret == flatten_mon_data
+        assert pqos.mon_process == None
 
-    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
-    @patch("benchexec.pqos.Popen", side_effect=mock_popen)
-    def test_pqos_stop_monitoring_not_started(self, mock_find_executable, mock_popen):
+    def test_pqos_stop_monitoring_not_started(self, mocker):
         """
         Test for pqos.stop_monitoring, when monitoring is not started before
         """
+        mocker.patch(
+            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
+        )
+        mocker.patch("benchexec.pqos.Popen", side_effect=mock_popen)
         pqos = Pqos()
         ret = pqos.stop_monitoring()
-        self.assertDictEqual(ret, {})
-        self.assertEqual(pqos.mon_process, None)
+        assert ret == {}
+        assert pqos.mon_process == None
 
-    @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
-    @patch("benchexec.pqos.check_output", side_effect=mock_check_output)
-    @patch("benchexec.pqos.Popen", side_effect=mock_popen)
-    def test_pqos_stop_monitoring_error(
-        self, mock_find_executable, mock_check_output, mock_popen
-    ):
+    def test_pqos_stop_monitoring_error(self, mocker):
         """
         Test for pqos.stop_monitoring, when pqos_wrapper throws an error
         """
+        mocker.patch(
+            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
+        )
+        mocker.patch("benchexec.pqos.check_output", side_effect=mock_check_output)
+        mocker.patch("benchexec.pqos.Popen", side_effect=mock_popen)
         pqos = Pqos()
         pqos.start_monitoring([[0, 1, 2]])
         pqos.mon_process.returncode = 1
         ret = pqos.stop_monitoring()
-        self.assertDictEqual(ret, {})
-        self.assertEqual(pqos.mon_process, None)
+        assert ret == {}
+        assert pqos.mon_process == None
 
     def test_pqos_flatten_mon_data(self):
         """
@@ -328,7 +339,7 @@ def test_pqos_flatten_mon_data(self):
             ]
         )
         ret = Pqos.flatten_mon_data(mon_data)
-        self.assertDictEqual(ret, flatten_mon_data)
+        assert ret == flatten_mon_data
 
     def test_pqos_flatten_mon_data_multiple(self):
         """
@@ -358,11 +369,11 @@ def test_pqos_flatten_mon_data_multiple(self):
         second_core_set["cores"] = [3, 4, 5]
         mon_data_multiple = [first_core_set, second_core_set]
         ret = Pqos.flatten_mon_data(mon_data_multiple)
-        self.assertDictEqual(ret, flatten_mon_data_multiple)
+        assert ret == flatten_mon_data_multiple
 
     def test_pqos_convert_core_list(self):
         """
         Test for pqos.convert_core_list function
         """
         ret = Pqos.convert_core_list([[0, 1], [2, 3]])
-        self.assertEqual(ret, "[[0,1],[2,3]]")
+        assert ret == "[[0,1],[2,3]]"

From d6eec079df29e9cd6916d3a09f0706491ea4f871 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Thu, 23 May 2024 06:12:25 +0000
Subject: [PATCH 18/52] Duplicated test_runexecutor.py

---
 benchexec/pytest_runexecutor.py | 1223 +++++++++++++++++++++++++++++++
 1 file changed, 1223 insertions(+)
 create mode 100644 benchexec/pytest_runexecutor.py

diff --git a/benchexec/pytest_runexecutor.py b/benchexec/pytest_runexecutor.py
new file mode 100644
index 000000000..340164887
--- /dev/null
+++ b/benchexec/pytest_runexecutor.py
@@ -0,0 +1,1223 @@
+# This file is part of BenchExec, a framework for reliable benchmarking:
+# https://github.com/sosy-lab/benchexec
+#
+# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import contextlib
+import logging
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+import unittest
+import shutil
+
+from benchexec import container
+from benchexec import containerexecutor
+from benchexec import filehierarchylimit
+from benchexec.runexecutor import RunExecutor
+from benchexec.cgroups import Cgroups
+from benchexec import runexecutor
+from benchexec import util
+
+sys.dont_write_bytecode = True  # prevent creation of .pyc files
+
+here = os.path.dirname(__file__)
+base_dir = os.path.join(here, "..")
+bin_dir = os.path.join(base_dir, "bin")
+runexec = os.path.join(bin_dir, "runexec")
+
+trivial_run_grace_time = 0.2
+
+
+class TestRunExecutor(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.longMessage = True
+        cls.maxDiff = None
+        logging.disable(logging.NOTSET)  # need to make sure to get all messages
+        if not hasattr(cls, "assertRegex"):
+            cls.assertRegex = cls.assertRegexpMatches
+
+        cls.cgroups = Cgroups.initialize()
+
+        cls.echo = shutil.which("echo") or "/bin/echo"
+        cls.sleep = shutil.which("sleep") or "/bin/sleep"
+        cls.cat = shutil.which("cat") or "/bin/cat"
+
+    def setUp(self, *args, **kwargs):
+        with self.skip_if_logs(
+            "Cannot reliably kill sub-processes without freezer cgroup"
+        ):
+            self.runexecutor = RunExecutor(*args, use_namespaces=False, **kwargs)
+
+    @contextlib.contextmanager
+    def skip_if_logs(self, error_msg):
+        """A context manager that automatically marks the test as skipped if SystemExit
+        is thrown and the given error message had been logged with level ERROR."""
+        # Note: assertLogs checks that there is at least one log message of given level.
+        # This is not what we want, so we just rely on one debug message being present.
+        try:
+            with self.assertLogs(level=logging.DEBUG) as log:
+                yield
+        except SystemExit as e:
+            if any(
+                record.levelno == logging.ERROR and record.msg.startswith(error_msg)
+                for record in log.records
+            ):
+                self.skipTest(e)
+            raise e
+
+    def execute_run(self, *args, expect_terminationreason=None, **kwargs):
+        (output_fd, output_filename) = tempfile.mkstemp(".log", "output_", text=True)
+        try:
+            result = self.runexecutor.execute_run(list(args), output_filename, **kwargs)
+            output = os.read(output_fd, 4096).decode()
+        finally:
+            os.close(output_fd)
+            os.remove(output_filename)
+
+        self.check_result_keys(result, "terminationreason")
+        if isinstance(expect_terminationreason, list):
+            self.assertIn(
+                result.get("terminationreason"),
+                expect_terminationreason,
+                "Unexpected terminationreason, output is \n" + output,
+            )
+        else:
+            self.assertEqual(
+                result.get("terminationreason"),
+                expect_terminationreason,
+                "Unexpected terminationreason, output is \n" + output,
+            )
+        return (result, output.splitlines())
+
+    def get_runexec_cmdline(self, *args, **kwargs):
+        return [
+            "python3",
+            runexec,
+            "--no-container",
+            "--output",
+            kwargs["output_filename"],
+        ] + list(args)
+
+    def execute_run_extern(self, *args, expect_terminationreason=None, **kwargs):
+        (output_fd, output_filename) = tempfile.mkstemp(".log", "output_", text=True)
+        try:
+            runexec_output = subprocess.check_output(
+                args=self.get_runexec_cmdline(*args, output_filename=output_filename),
+                stderr=subprocess.DEVNULL,
+                universal_newlines=True,
+                **kwargs,
+            )
+            output = os.read(output_fd, 4096).decode()
+        except subprocess.CalledProcessError as e:
+            print(e.output)
+            raise e
+        finally:
+            os.close(output_fd)
+            os.remove(output_filename)
+
+        result = {
+            key.strip(): value.strip()
+            for (key, _, value) in (
+                line.partition("=") for line in runexec_output.splitlines()
+            )
+        }
+        self.check_result_keys(result, "terminationreason", "returnvalue")
+        if isinstance(expect_terminationreason, list):
+            self.assertIn(
+                result.get("terminationreason"),
+                expect_terminationreason,
+                "Unexpected terminationreason, output is \n" + output,
+            )
+        else:
+            self.assertEqual(
+                result.get("terminationreason"),
+                expect_terminationreason,
+                "Unexpected terminationreason, output is \n" + output,
+            )
+        return (result, output.splitlines())
+
+    def check_command_in_output(self, output, cmd):
+        self.assertEqual(output[0], cmd, "run output misses executed command")
+
+    def check_result_keys(self, result, *additional_keys):
+        expected_keys = {
+            "cputime",
+            "walltime",
+            "memory",
+            "exitcode",
+            "cpuenergy",
+            "blkio-read",
+            "blkio-write",
+            "starttime",
+            "pressure-cpu-some",
+            "pressure-io-some",
+            "pressure-memory-some",
+        }
+        expected_keys.update(additional_keys)
+        for key in result.keys():
+            if key.startswith("cputime-cpu"):
+                self.assertRegex(
+                    key,
+                    "^cputime-cpu[0-9]+$",
+                    f"unexpected result entry '{key}={result[key]}'",
+                )
+            elif key.startswith("cpuenergy-"):
+                self.assertRegex(
+                    key,
+                    "^cpuenergy-pkg[0-9]+-(package|core|uncore|dram|psys)$",
+                    f"unexpected result entry '{key}={result[key]}'",
+                )
+            else:
+                self.assertIn(
+                    key,
+                    expected_keys,
+                    f"unexpected result entry '{key}={result[key]}'",
+                )
+
+    def check_exitcode(self, result, exitcode, msg=None):
+        self.assertEqual(result["exitcode"].raw, exitcode, msg)
+
+    def check_exitcode_extern(self, result, exitcode, msg=None):
+        exitcode = util.ProcessExitCode.from_raw(exitcode)
+        if exitcode.value is not None:
+            self.assertEqual(int(result["returnvalue"]), exitcode.value, msg)
+        else:
+            self.assertEqual(int(result["exitsignal"]), exitcode.signal, msg)
+
+    def test_command_output(self):
+        if not os.path.exists(self.echo):
+            self.skipTest("missing echo")
+        (_, output) = self.execute_run(self.echo, "TEST_TOKEN")
+        self.check_command_in_output(output, f"{self.echo} TEST_TOKEN")
+        self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output")
+        for line in output[1:-1]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_command_error_output(self):
+        if not os.path.exists(self.echo):
+            self.skipTest("missing echo")
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+
+        def execute_Run_intern(*args, **kwargs):
+            (error_fd, error_filename) = tempfile.mkstemp(".log", "error_", text=True)
+            try:
+                (_, output_lines) = self.execute_run(
+                    *args, error_filename=error_filename, **kwargs
+                )
+                error_lines = os.read(error_fd, 4096).decode().splitlines()
+                return (output_lines, error_lines)
+            finally:
+                os.close(error_fd)
+                os.remove(error_filename)
+
+        (output_lines, error_lines) = execute_Run_intern(
+            "/bin/sh", "-c", f"{self.echo} ERROR_TOKEN >&2"
+        )
+        self.assertEqual(
+            error_lines[-1], "ERROR_TOKEN", "run error output misses command output"
+        )
+        for line in output_lines[1:]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+        for line in error_lines[1:-1]:
+            self.assertRegex(line, "^-*$", "unexpected text in run error output")
+
+        (output_lines, error_lines) = execute_Run_intern(self.echo, "OUT_TOKEN")
+        self.check_command_in_output(output_lines, f"{self.echo} OUT_TOKEN")
+        self.check_command_in_output(error_lines, f"{self.echo} OUT_TOKEN")
+        self.assertEqual(
+            output_lines[-1], "OUT_TOKEN", "run output misses command output"
+        )
+        for line in output_lines[1:-1]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+        for line in error_lines[1:]:
+            self.assertRegex(line, "^-*$", "unexpected text in run error output")
+
+    def test_command_result(self):
+        if not os.path.exists(self.echo):
+            self.skipTest("missing echo")
+        (result, _) = self.execute_run(self.echo, "TEST_TOKEN")
+        self.check_exitcode(result, 0, "exit code of echo is not zero")
+        self.assertAlmostEqual(
+            result["walltime"],
+            trivial_run_grace_time,
+            delta=trivial_run_grace_time,
+            msg="walltime of echo not as expected",
+        )
+        if "cputime" in result:  # not present without cpuacct cgroup
+            self.assertAlmostEqual(
+                result["cputime"],
+                trivial_run_grace_time,
+                delta=trivial_run_grace_time,
+                msg="cputime of echo not as expected",
+            )
+        self.check_result_keys(result)
+
+    def test_wrong_command(self):
+        (result, _) = self.execute_run(
+            "/does/not/exist", expect_terminationreason="failed"
+        )
+
+    def test_wrong_command_extern(self):
+        (result, _) = self.execute_run(
+            "/does/not/exist", expect_terminationreason="failed"
+        )
+
+    def test_cputime_hardlimit(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        with self.skip_if_logs("Time limit cannot be specified without cpuacct cgroup"):
+            (result, output) = self.execute_run(
+                "/bin/sh",
+                "-c",
+                "i=0; while [ $i -lt 10000000 ]; do i=$(($i+1)); done; echo $i",
+                hardtimelimit=1,
+                expect_terminationreason="cputime",
+            )
+        self.check_exitcode(result, 9, "exit code of killed process is not 9")
+        self.assertAlmostEqual(
+            result["walltime"],
+            1.4,
+            delta=0.5,
+            msg="walltime is not approximately the time after which the process should have been killed",
+        )
+        self.assertAlmostEqual(
+            result["cputime"],
+            1.4,
+            delta=0.5,
+            msg="cputime is not approximately the time after which the process should have been killed",
+        )
+
+        for line in output[1:]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_cputime_softlimit(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        with self.skip_if_logs(
+            "Soft time limit cannot be specified without cpuacct cgroup"
+        ):
+            (result, output) = self.execute_run(
+                "/bin/sh",
+                "-c",
+                "i=0; while [ $i -lt 10000000 ]; do i=$(($i+1)); done; echo $i",
+                softtimelimit=1,
+                expect_terminationreason="cputime-soft",
+            )
+        self.check_exitcode(result, 15, "exit code of killed process is not 15")
+        self.assertAlmostEqual(
+            result["walltime"],
+            4,
+            delta=3,
+            msg="walltime is not approximately the time after which the process should have been killed",
+        )
+        self.assertAlmostEqual(
+            result["cputime"],
+            4,
+            delta=3,
+            msg="cputime is not approximately the time after which the process should have been killed",
+        )
+
+        for line in output[1:]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_walltime_limit(self):
+        if not os.path.exists(self.sleep):
+            self.skipTest("missing sleep")
+        (result, output) = self.execute_run(
+            self.sleep, "10", walltimelimit=1, expect_terminationreason="walltime"
+        )
+
+        self.check_exitcode(result, 9, "exit code of killed process is not 9")
+        self.assertAlmostEqual(
+            result["walltime"],
+            4,
+            delta=3,
+            msg="walltime is not approximately the time after which the process should have been killed",
+        )
+        if "cputime" in result:  # not present without cpuacct cgroup
+            self.assertAlmostEqual(
+                result["cputime"],
+                trivial_run_grace_time,
+                delta=trivial_run_grace_time,
+                msg="cputime of sleep is not approximately zero",
+            )
+
+        self.check_command_in_output(output, f"{self.sleep} 10")
+        for line in output[1:]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_cputime_walltime_limit(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        with self.skip_if_logs("Time limit cannot be specified without cpuacct cgroup"):
+            (result, output) = self.execute_run(
+                "/bin/sh",
+                "-c",
+                "i=0; while [ $i -lt 10000000 ]; do i=$(($i+1)); done; echo $i",
+                hardtimelimit=1,
+                walltimelimit=5,
+                expect_terminationreason="cputime",
+            )
+
+        self.check_exitcode(result, 9, "exit code of killed process is not 9")
+        self.assertAlmostEqual(
+            result["walltime"],
+            1.4,
+            delta=0.5,
+            msg="walltime is not approximately the time after which the process should have been killed",
+        )
+        self.assertAlmostEqual(
+            result["cputime"],
+            1.4,
+            delta=0.5,
+            msg="cputime is not approximately the time after which the process should have been killed",
+        )
+
+        for line in output[1:]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_all_timelimits(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        with self.skip_if_logs("Time limit cannot be specified without cpuacct cgroup"):
+            (result, output) = self.execute_run(
+                "/bin/sh",
+                "-c",
+                "i=0; while [ $i -lt 10000000 ]; do i=$(($i+1)); done; echo $i",
+                softtimelimit=1,
+                hardtimelimit=2,
+                walltimelimit=5,
+                expect_terminationreason="cputime-soft",
+            )
+
+        self.check_exitcode(result, 15, "exit code of killed process is not 15")
+        self.assertAlmostEqual(
+            result["walltime"],
+            1.4,
+            delta=0.5,
+            msg="walltime is not approximately the time after which the process should have been killed",
+        )
+        self.assertAlmostEqual(
+            result["cputime"],
+            1.4,
+            delta=0.5,
+            msg="cputime is not approximately the time after which the process should have been killed",
+        )
+
+        for line in output[1:]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_input_is_redirected_from_devnull(self):
+        if not os.path.exists(self.cat):
+            self.skipTest("missing cat")
+        (result, output) = self.execute_run(self.cat, walltimelimit=1)
+
+        self.check_exitcode(result, 0, "exit code of process is not 0")
+        self.assertAlmostEqual(
+            result["walltime"],
+            trivial_run_grace_time,
+            delta=trivial_run_grace_time,
+            msg='walltime of "cat < /dev/null" is not approximately zero',
+        )
+        if "cputime" in result:  # not present without cpuacct cgroup
+            self.assertAlmostEqual(
+                result["cputime"],
+                trivial_run_grace_time,
+                delta=trivial_run_grace_time,
+                msg='cputime of "cat < /dev/null" is not approximately zero',
+            )
+        self.check_result_keys(result)
+
+        self.check_command_in_output(output, self.cat)
+        for line in output[1:]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_input_is_redirected_from_file(self):
+        if not os.path.exists(self.cat):
+            self.skipTest("missing cat")
+        with tempfile.TemporaryFile() as tmp:
+            tmp.write(b"TEST_TOKEN")
+            tmp.flush()
+            tmp.seek(0)
+            (result, output) = self.execute_run(self.cat, stdin=tmp, walltimelimit=1)
+
+        self.check_exitcode(result, 0, "exit code of process is not 0")
+        self.assertAlmostEqual(
+            result["walltime"],
+            trivial_run_grace_time,
+            delta=trivial_run_grace_time,
+            msg='walltime of "cat < /dev/null" is not approximately zero',
+        )
+        if "cputime" in result:  # not present without cpuacct cgroup
+            self.assertAlmostEqual(
+                result["cputime"],
+                trivial_run_grace_time,
+                delta=trivial_run_grace_time,
+                msg='cputime of "cat < /dev/null" is not approximately zero',
+            )
+        self.check_result_keys(result)
+
+        self.check_command_in_output(output, self.cat)
+        self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output")
+        for line in output[1:-1]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_input_is_redirected_from_stdin(self):
+        if not os.path.exists(self.cat):
+            self.skipTest("missing cat")
+
+        (output_fd, output_filename) = tempfile.mkstemp(".log", "output_", text=True)
+        cmd = self.get_runexec_cmdline(
+            "--input",
+            "-",
+            "--walltime",
+            "1",
+            self.cat,
+            output_filename=output_filename,
+        )
+        try:
+            process = subprocess.Popen(
+                args=cmd,
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.DEVNULL,
+                universal_newlines=True,
+            )
+            try:
+                runexec_output, unused_err = process.communicate("TEST_TOKEN")
+            except BaseException:
+                # catch everything, we re-raise
+                process.kill()
+                process.wait()
+                raise
+            retcode = process.poll()
+            if retcode:
+                print(runexec_output)
+                raise subprocess.CalledProcessError(retcode, cmd, output=runexec_output)
+
+            output = os.read(output_fd, 4096).decode().splitlines()
+        finally:
+            os.close(output_fd)
+            os.remove(output_filename)
+
+        result = {
+            key.strip(): value.strip()
+            for (key, _, value) in (
+                line.partition("=") for line in runexec_output.splitlines()
+            )
+        }
+        self.check_exitcode_extern(result, 0, "exit code of process is not 0")
+        self.assertAlmostEqual(
+            float(result["walltime"].rstrip("s")),
+            trivial_run_grace_time,
+            delta=trivial_run_grace_time,
+            msg='walltime of "cat < /dev/null" is not approximately zero',
+        )
+        if "cputime" in result:  # not present without cpuacct cgroup
+            self.assertAlmostEqual(
+                float(result["cputime"].rstrip("s")),
+                trivial_run_grace_time,
+                delta=trivial_run_grace_time,
+                msg='cputime of "cat < /dev/null" is not approximately zero',
+            )
+        self.check_result_keys(result, "returnvalue")
+
+        self.check_command_in_output(output, self.cat)
+        self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output")
+        for line in output[1:-1]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_append_environment_variable(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        (_, output) = self.execute_run("/bin/sh", "-c", "echo $PATH")
+        path = output[-1]
+        (_, output) = self.execute_run(
+            "/bin/sh",
+            "-c",
+            "echo $PATH",
+            environments={"additionalEnv": {"PATH": ":TEST_TOKEN"}},
+        )
+        self.assertEqual(output[-1], path + ":TEST_TOKEN")
+
+    def test_new_environment_variable(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        (_, output) = self.execute_run(
+            "/bin/sh", "-c", "echo $PATH", environments={"newEnv": {"PATH": "/usr/bin"}}
+        )
+        self.assertEqual(output[-1], "/usr/bin")
+
+    def test_stop_run(self):
+        if not os.path.exists(self.sleep):
+            self.skipTest("missing sleep")
+        thread = _StopRunThread(1, self.runexecutor)
+        thread.start()
+        (result, output) = self.execute_run(
+            self.sleep, "10", expect_terminationreason="killed"
+        )
+        thread.join()
+
+        self.check_exitcode(result, 9, "exit code of killed process is not 9")
+        self.assertAlmostEqual(
+            result["walltime"],
+            1,
+            delta=0.5,
+            msg="walltime is not approximately the time after which the process should have been killed",
+        )
+        if "cputime" in result:  # not present without cpuacct cgroup
+            self.assertAlmostEqual(
+                result["cputime"],
+                trivial_run_grace_time,
+                delta=trivial_run_grace_time,
+                msg="cputime of sleep is not approximately zero",
+            )
+
+        self.check_command_in_output(output, f"{self.sleep} 10")
+        for line in output[1:]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_reduce_file_size_empty_file(self):
+        with tempfile.NamedTemporaryFile() as tmp:
+            runexecutor._reduce_file_size_if_necessary(tmp.name, 0)
+            self.assertEqual(os.path.getsize(tmp.name), 0)
+
+    def test_reduce_file_size_empty_file2(self):
+        with tempfile.NamedTemporaryFile() as tmp:
+            runexecutor._reduce_file_size_if_necessary(tmp.name, 500)
+            self.assertEqual(os.path.getsize(tmp.name), 0)
+
+    def test_reduce_file_size_long_line_not_truncated(self):
+        with tempfile.NamedTemporaryFile(mode="wt") as tmp:
+            content = "Long line " * 500
+            tmp.write(content)
+            tmp.flush()
+            runexecutor._reduce_file_size_if_necessary(tmp.name, 500)
+            with open(tmp.name, "rt") as tmp2:
+                self.assertMultiLineEqual(tmp2.read(), content)
+
+    REDUCE_WARNING_MSG = (
+        "WARNING: YOUR LOGFILE WAS TOO LONG, SOME LINES IN THE MIDDLE WERE REMOVED."
+    )
+    REDUCE_OVERHEAD = 100
+
+    def test_reduce_file_size(self):
+        with tempfile.NamedTemporaryFile(mode="wt") as tmp:
+            line = "Some text\n"
+            tmp.write(line * 500)
+            tmp.flush()
+            limit = 500
+            runexecutor._reduce_file_size_if_necessary(tmp.name, limit)
+            self.assertLessEqual(
+                os.path.getsize(tmp.name), limit + self.REDUCE_OVERHEAD
+            )
+            with open(tmp.name, "rt") as tmp2:
+                new_content = tmp2.read()
+        self.assertIn(self.REDUCE_WARNING_MSG, new_content)
+        self.assertTrue(new_content.startswith(line))
+        self.assertTrue(new_content.endswith(line))
+
+    def test_reduce_file_size_limit_zero(self):
+        with tempfile.NamedTemporaryFile(mode="wt") as tmp:
+            line = "Some text\n"
+            tmp.write(line * 500)
+            tmp.flush()
+            runexecutor._reduce_file_size_if_necessary(tmp.name, 0)
+            self.assertLessEqual(os.path.getsize(tmp.name), self.REDUCE_OVERHEAD)
+            with open(tmp.name, "rt") as tmp2:
+                new_content = tmp2.read()
+        self.assertIn(self.REDUCE_WARNING_MSG, new_content)
+        self.assertTrue(new_content.startswith(line))
+
+    def test_append_crash_dump_info(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        (result, output) = self.execute_run(
+            "/bin/sh",
+            "-c",
+            'echo "# An error report file with more information is saved as:";'
+            'echo "# $(pwd)/hs_err_pid_1234.txt";'
+            "echo TEST_TOKEN > hs_err_pid_1234.txt;"
+            "exit 2",
+        )
+        self.assertEqual(
+            output[-1], "TEST_TOKEN", "log file misses content from crash dump file"
+        )
+
+    def test_integration(self):
+        if not os.path.exists(self.echo):
+            self.skipTest("missing echo")
+        (result, output) = self.execute_run_extern(self.echo, "TEST_TOKEN")
+        self.check_exitcode_extern(result, 0, "exit code of echo is not zero")
+        self.check_result_keys(result, "returnvalue")
+
+        self.check_command_in_output(output, f"{self.echo} TEST_TOKEN")
+        self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output")
+        for line in output[1:-1]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_home_and_tmp_is_separate(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        (result, output) = self.execute_run("/bin/sh", "-c", "echo $HOME $TMPDIR")
+        self.check_exitcode(result, 0, "exit code of /bin/sh is not zero")
+        self.assertRegex(
+            output[-1],
+            "/BenchExec_run_[^/]*/home .*/BenchExec_run_[^/]*/tmp",
+            "HOME or TMPDIR variable does not contain expected temporary directory",
+        )
+
+    def test_temp_dirs_are_removed(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        (result, output) = self.execute_run("/bin/sh", "-c", "echo $HOME $TMPDIR")
+        self.check_exitcode(result, 0, "exit code of /bin/sh is not zero")
+        home_dir = output[-1].split(" ")[0]
+        temp_dir = output[-1].split(" ")[1]
+        self.assertFalse(
+            os.path.exists(home_dir),
+            f"temporary home directory {home_dir} was not cleaned up",
+        )
+        self.assertFalse(
+            os.path.exists(temp_dir),
+            f"temporary temp directory {temp_dir} was not cleaned up",
+        )
+
+    def test_home_is_writable(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        (result, output) = self.execute_run("/bin/sh", "-c", "touch $HOME/TEST_FILE")
+        self.check_exitcode(
+            result,
+            0,
+            f"Failed to write to $HOME/TEST_FILE, output was\n{output}",
+        )
+
+    def test_no_cleanup_temp(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        self.setUp(cleanup_temp_dir=False)  # create RunExecutor with desired parameter
+        (result, output) = self.execute_run(
+            "/bin/sh", "-c", 'echo "$TMPDIR"; echo "" > "$TMPDIR/test"'
+        )
+        self.check_exitcode(result, 0, "exit code of /bin/sh is not zero")
+        temp_dir = output[-1]
+        test_file = os.path.join(temp_dir, "test")
+        subprocess.run(["test", "-f", test_file], check=True)
+        self.assertEqual(
+            "tmp", os.path.basename(temp_dir), "unexpected name of temp dir"
+        )
+        self.assertNotEqual(
+            "/tmp", temp_dir, "temp dir should not be the global temp dir"
+        )
+        subprocess.run(["rm", "-r", os.path.dirname(temp_dir)], check=True)
+
+    def test_require_cgroup_invalid(self):
+        with self.assertLogs(level=logging.ERROR) as log:
+            with self.assertRaises(SystemExit):
+                RunExecutor(additional_cgroup_subsystems=["invalid"])
+
+        self.assertIn(
+            'Cgroup subsystem "invalid" was required but is not available',
+            "\n".join(log.output),
+        )
+
+    def test_require_cgroup_cpu(self):
+        try:
+            self.setUp(additional_cgroup_subsystems=["cpu"])
+        except SystemExit as e:
+            self.skipTest(e)
+        if not os.path.exists(self.cat):
+            self.skipTest("missing cat")
+        if self.cgroups.version != 1:
+            self.skipTest("not relevant in unified hierarchy")
+        (result, output) = self.execute_run(self.cat, "/proc/self/cgroup")
+        self.check_exitcode(result, 0, "exit code of cat is not zero")
+        for line in output:
+            if re.match(r"^[0-9]*:([^:]*,)?cpu(,[^:]*)?:/(.*/)?benchmark_.*$", line):
+                return  # Success
+        self.fail("Not in expected cgroup for subsystem cpu:\n" + "\n".join(output))
+
+    def test_set_cgroup_cpu_shares(self):
+        if not os.path.exists(self.echo):
+            self.skipTest("missing echo")
+        try:
+            if self.cgroups.version == 1:
+                self.setUp(additional_cgroup_subsystems=["cpu"])
+            else:
+                self.setUp(additional_cgroup_subsystems=["memory"])
+        except SystemExit as e:
+            self.skipTest(e)
+        if self.cgroups.version == 1:
+            cgValues = {("cpu", "shares"): 42}
+        else:
+            cgValues = {("memory", "high"): 420000000}
+        (result, _) = self.execute_run(self.echo, cgroupValues=cgValues)
+        self.check_exitcode(result, 0, "exit code of echo is not zero")
+        # Just assert that execution was successful,
+        # testing that the value was actually set is much more difficult.
+
+    def test_nested_runexec(self):
+        if not os.path.exists(self.echo):
+            self.skipTest("missing echo")
+        self.setUp(
+            dir_modes={
+                # Do not mark /home hidden, would fail with python from virtualenv
+                "/": containerexecutor.DIR_READ_ONLY,
+                "/tmp": containerexecutor.DIR_FULL_ACCESS,  # for inner_output_file
+                "/sys/fs/cgroup": containerexecutor.DIR_FULL_ACCESS,
+            }
+        )
+        inner_args = ["--", self.echo, "TEST_TOKEN"]
+
+        with tempfile.NamedTemporaryFile(
+            mode="r", prefix="inner_output_", suffix=".log"
+        ) as inner_output_file:
+            inner_cmdline = self.get_runexec_cmdline(
+                *inner_args, output_filename=inner_output_file.name
+            )
+            outer_result, outer_output = self.execute_run(*inner_cmdline)
+            inner_output = inner_output_file.read().strip().splitlines()
+
+        logging.info("Outer output:\n%s", "\n".join(outer_output))
+        logging.info("Inner output:\n%s", "\n".join(inner_output))
+        self.check_result_keys(outer_result, "returnvalue")
+        self.check_exitcode(outer_result, 0, "exit code of inner runexec is not zero")
+        self.check_command_in_output(inner_output, f"{self.echo} TEST_TOKEN")
+        self.assertEqual(
+            inner_output[-1], "TEST_TOKEN", "run output misses command output"
+        )
+
+    def test_starttime(self):
+        if not os.path.exists(self.echo):
+            self.skipTest("missing echo")
+        before = util.read_local_time()
+        (result, _) = self.execute_run(self.echo)
+        after = util.read_local_time()
+        self.check_result_keys(result)
+        run_starttime = result["starttime"]
+        self.assertIsNotNone(run_starttime.tzinfo, "start time is not a local time")
+        self.assertLessEqual(before, run_starttime)
+        self.assertLessEqual(run_starttime, after)
+
+    def test_frozen_process(self):
+        # https://github.com/sosy-lab/benchexec/issues/840
+        if not os.path.exists(self.sleep):
+            self.skipTest("missing sleep")
+        if self.cgroups.version == 1 and not os.path.exists("/sys/fs/cgroup/freezer"):
+            self.skipTest("missing freezer cgroup")
+        self.setUp(
+            dir_modes={
+                "/": containerexecutor.DIR_READ_ONLY,
+                "/home": containerexecutor.DIR_HIDDEN,
+                "/tmp": containerexecutor.DIR_HIDDEN,
+                "/sys/fs/cgroup": containerexecutor.DIR_FULL_ACCESS,
+            }
+        )
+        script_v1 = """#!/bin/sh
+# create process, move it to sub-cgroup, and freeze it
+set -eu
+
+cgroup="/sys/fs/cgroup/freezer/$(grep freezer /proc/self/cgroup | cut -f 3 -d :)"
+mkdir "$cgroup/tmp"
+mkdir "$cgroup/tmp/tmp"
+
+sleep 10 &
+child_pid=$!
+
+echo $child_pid > "$cgroup/tmp/tasks"
+echo FROZEN > "$cgroup/tmp/freezer.state"
+# remove permissions in order to test our handling of this case
+chmod 000 "$cgroup/tmp/freezer.state"
+chmod 000 "$cgroup/tmp/tasks"
+chmod 000 "$cgroup/tmp"
+chmod 000 "$cgroup/freezer.state"
+chmod 000 "$cgroup/tasks"
+echo FROZEN
+wait $child_pid
+"""
+        script_v2 = """#!/bin/sh
+# create process, move it to sub-cgroup, and freeze it
+set -eu
+
+cgroup="/sys/fs/cgroup/$(cut -f 3 -d : /proc/self/cgroup)"
+mkdir "$cgroup/tmp"
+mkdir "$cgroup/tmp/tmp"
+
+sleep 10 &
+child_pid=$!
+
+echo $child_pid > "$cgroup/tmp/cgroup.procs"
+echo 1 > "$cgroup/tmp/cgroup.freeze"
+# remove permissions in order to test our handling of this case
+chmod 000 "$cgroup/tmp/cgroup.freeze"
+chmod 000 "$cgroup/tmp/cgroup.procs"
+chmod 000 "$cgroup/tmp"
+chmod 000 "$cgroup/cgroup.freeze"
+chmod 000 "$cgroup/cgroup.kill"
+echo FROZEN
+wait $child_pid
+"""
+        (result, output) = self.execute_run(
+            "/bin/sh",
+            "-c",
+            script_v1 if self.cgroups.version == 1 else script_v2,
+            walltimelimit=1,
+            expect_terminationreason="walltime",
+        )
+        self.check_exitcode(result, 9, "exit code of killed process is not 9")
+        self.assertAlmostEqual(
+            result["walltime"],
+            2,
+            delta=0.5,
+            msg="walltime is not approximately the time after which the process should have been killed",
+        )
+        self.assertEqual(
+            output[-1],
+            "FROZEN",
+            "run output misses command output and was not executed properly",
+        )
+
+
+class TestRunExecutorWithContainer(TestRunExecutor):
+    def setUp(self, *args, **kwargs):
+        try:
+            container.execute_in_namespace(lambda: 0)
+        except OSError as e:
+            self.skipTest(f"Namespaces not supported: {os.strerror(e.errno)}")
+
+        dir_modes = kwargs.pop(
+            "dir_modes",
+            {
+                "/": containerexecutor.DIR_READ_ONLY,
+                "/home": containerexecutor.DIR_HIDDEN,
+                "/tmp": containerexecutor.DIR_HIDDEN,
+            },
+        )
+
+        self.runexecutor = RunExecutor(
+            *args, use_namespaces=True, dir_modes=dir_modes, **kwargs
+        )
+
+    def get_runexec_cmdline(self, *args, **kwargs):
+        return [
+            "python3",
+            runexec,
+            "--container",
+            "--read-only-dir",
+            "/",
+            "--hidden-dir",
+            "/home",
+            "--hidden-dir",
+            "/tmp",
+            "--dir",
+            "/tmp",
+            "--output",
+            kwargs["output_filename"],
+        ] + list(args)
+
+    def execute_run(self, *args, **kwargs):
+        return super(TestRunExecutorWithContainer, self).execute_run(
+            workingDir="/tmp", *args, **kwargs
+        )
+
+    def test_home_and_tmp_is_separate(self):
+        self.skipTest("not relevant in container")
+
+    def test_temp_dirs_are_removed(self):
+        self.skipTest("not relevant in container")
+
+    def test_no_cleanup_temp(self):
+        self.skipTest("not relevant in container")
+
+    def check_result_files(
+        self, shell_cmd, result_files_patterns, expected_result_files
+    ):
+        output_dir = tempfile.mkdtemp("", "output_")
+        try:
+            result, output = self.execute_run(
+                "/bin/sh",
+                "-c",
+                shell_cmd,
+                output_dir=output_dir,
+                result_files_patterns=result_files_patterns,
+            )
+            output_str = "\n".join(output)
+            self.assertEqual(
+                result["exitcode"].value,
+                0,
+                f"exit code of {' '.join(shell_cmd)} is not zero,\n"
+                f"result was {result!r},\noutput was\n{output_str}",
+            )
+            result_files = []
+            for root, _unused_dirs, files in os.walk(output_dir):
+                for file in files:
+                    result_files.append(
+                        os.path.relpath(os.path.join(root, file), output_dir)
+                    )
+            expected_result_files.sort()
+            result_files.sort()
+            self.assertListEqual(
+                result_files,
+                expected_result_files,
+                f"\nList of retrieved result files differs from expected list,\n"
+                f"result was {result!r},\noutput was\n{output_str}",
+            )
+        finally:
+            shutil.rmtree(output_dir, ignore_errors=True)
+
+    def test_result_file_simple(self):
+        self.check_result_files("echo TEST_TOKEN > TEST_FILE", ["."], ["TEST_FILE"])
+
+    def test_result_file_recursive(self):
+        self.check_result_files(
+            "mkdir TEST_DIR; echo TEST_TOKEN > TEST_DIR/TEST_FILE",
+            ["."],
+            ["TEST_DIR/TEST_FILE"],
+        )
+
+    def test_result_file_multiple(self):
+        self.check_result_files(
+            "echo TEST_TOKEN > TEST_FILE; echo TEST_TOKEN > TEST_FILE2",
+            ["."],
+            ["TEST_FILE", "TEST_FILE2"],
+        )
+
+    def test_result_file_symlink(self):
+        self.check_result_files(
+            "echo TEST_TOKEN > TEST_FILE; ln -s TEST_FILE TEST_LINK",
+            ["."],
+            ["TEST_FILE"],
+        )
+
+    def test_result_file_no_match(self):
+        self.check_result_files("echo TEST_TOKEN > TEST_FILE", ["NO_MATCH"], [])
+
+    def test_result_file_no_pattern(self):
+        self.check_result_files("echo TEST_TOKEN > TEST_FILE", [], [])
+
+    def test_result_file_empty_pattern(self):
+        self.assertRaises(
+            ValueError,
+            lambda: self.check_result_files("echo TEST_TOKEN > TEST_FILE", [""], []),
+        )
+
+    def test_result_file_partial_match(self):
+        self.check_result_files(
+            "echo TEST_TOKEN > TEST_FILE; mkdir TEST_DIR; echo TEST_TOKEN > TEST_DIR/TEST_FILE",
+            ["TEST_DIR"],
+            ["TEST_DIR/TEST_FILE"],
+        )
+
+    def test_result_file_multiple_patterns(self):
+        self.check_result_files(
+            "echo TEST_TOKEN > TEST_FILE; "
+            "echo TEST_TOKEN > TEST_FILE2; "
+            "mkdir TEST_DIR; "
+            "echo TEST_TOKEN > TEST_DIR/TEST_FILE; ",
+            ["TEST_FILE", "TEST_DIR/TEST_FILE"],
+            ["TEST_FILE", "TEST_DIR/TEST_FILE"],
+        )
+
+    def test_result_file_wildcard(self):
+        self.check_result_files(
+            "echo TEST_TOKEN > TEST_FILE; "
+            "echo TEST_TOKEN > TEST_FILE2; "
+            "echo TEST_TOKEN > TEST_NOFILE; ",
+            ["TEST_FILE*"],
+            ["TEST_FILE", "TEST_FILE2"],
+        )
+
+    def test_result_file_absolute_pattern(self):
+        self.check_result_files("echo TEST_TOKEN > TEST_FILE", ["/"], ["tmp/TEST_FILE"])
+
+    def test_result_file_absolute_and_pattern(self):
+        self.check_result_files(
+            "echo TEST_TOKEN > TEST_FILE; mkdir TEST_DIR; echo TEST_TOKEN > TEST_DIR/TEST_FILE",
+            ["TEST_FILE", "/tmp/TEST_DIR"],
+            ["tmp/TEST_FILE", "tmp/TEST_DIR/TEST_FILE"],
+        )
+
+    def test_result_file_relative_traversal(self):
+        self.check_result_files(
+            "echo TEST_TOKEN > TEST_FILE", ["foo/../TEST_FILE"], ["TEST_FILE"]
+        )
+
+    def test_result_file_illegal_relative_traversal(self):
+        self.assertRaises(
+            ValueError,
+            lambda: self.check_result_files(
+                "echo TEST_TOKEN > TEST_FILE", ["foo/../../bar"], []
+            ),
+        )
+
+    def test_result_file_recursive_pattern(self):
+        self.check_result_files(
+            "mkdir -p TEST_DIR/TEST_DIR; "
+            "echo TEST_TOKEN > TEST_FILE.txt; "
+            "echo TEST_TOKEN > TEST_DIR/TEST_FILE.txt; "
+            "echo TEST_TOKEN > TEST_DIR/TEST_DIR/TEST_FILE.txt; ",
+            ["**/*.txt"],
+            [
+                "TEST_FILE.txt",
+                "TEST_DIR/TEST_FILE.txt",
+                "TEST_DIR/TEST_DIR/TEST_FILE.txt",
+            ],
+        )
+
+    def test_result_file_log_limit(self):
+        file_count = containerexecutor._MAX_RESULT_FILE_LOG_COUNT + 10
+        with self.assertLogs(level=logging.DEBUG) as log:
+            # Check that all output files are transferred ...
+            self.check_result_files(
+                f"for i in $(seq 1 {file_count}); do touch $i; done",
+                ["*"],
+                list(map(str, range(1, file_count + 1))),
+            )
+        # ... but not all output files are logged ...
+        self.assertEqual(
+            len([msg for msg in log.output if "Transferring output file" in msg]),
+            containerexecutor._MAX_RESULT_FILE_LOG_COUNT,
+        )
+        # ... and the final count is correct.
+        count_msg = next(msg for msg in log.output if " output files matched" in msg)
+        self.assertIn(f"{file_count} output files matched", count_msg)
+
+    def test_file_count_limit(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        self.setUp(container_tmpfs=False)  # create RunExecutor with desired parameter
+        filehierarchylimit._CHECK_INTERVAL_SECONDS = 0.1
+        (result, output) = self.execute_run(
+            "/bin/sh",
+            "-c",
+            "for i in $(seq 1 10000); do touch $i; done",
+            files_count_limit=100,
+            result_files_patterns=None,
+            expect_terminationreason="files-count",
+        )
+
+        self.check_exitcode(result, 9, "exit code of killed process is not 15")
+
+        for line in output[1:]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_file_size_limit(self):
+        if not os.path.exists("/bin/sh"):
+            self.skipTest("missing /bin/sh")
+        self.setUp(container_tmpfs=False)  # create RunExecutor with desired parameter
+        filehierarchylimit._CHECK_INTERVAL_SECONDS = 0.1
+        (result, output) = self.execute_run(
+            "/bin/sh",
+            "-c",
+            "for i in $(seq 1 100000); do echo $i >> TEST_FILE; done",
+            files_size_limit=100,
+            result_files_patterns=None,
+            expect_terminationreason="files-size",
+        )
+
+        self.check_exitcode(result, 9, "exit code of killed process is not 15")
+
+        for line in output[1:]:
+            self.assertRegex(line, "^-*$", "unexpected text in run output")
+
+    def test_path_with_space(self):
+        temp_dir = tempfile.mkdtemp(prefix="BenchExec test")
+        try:
+            # create RunExecutor with desired parameter
+            self.setUp(
+                dir_modes={
+                    "/": containerexecutor.DIR_READ_ONLY,
+                    "/home": containerexecutor.DIR_HIDDEN,
+                    "/tmp": containerexecutor.DIR_HIDDEN,
+                    temp_dir: containerexecutor.DIR_FULL_ACCESS,
+                }
+            )
+            temp_file = os.path.join(temp_dir, "TEST_FILE")
+            result, output = self.execute_run(
+                "/bin/sh", "-c", f"echo TEST_TOKEN > '{temp_file}'"
+            )
+            self.check_result_keys(result)
+            self.check_exitcode(result, 0, "exit code of process is not 0")
+            self.assertTrue(
+                os.path.exists(temp_file),
+                f"File '{temp_file}' not created, output was:\n" + "\n".join(output),
+            )
+            with open(temp_file, "r") as f:
+                self.assertEqual(f.read().strip(), "TEST_TOKEN")
+        finally:
+            shutil.rmtree(temp_dir)
+
+    def test_uptime_with_lxcfs(self):
+        if not os.path.exists("/var/lib/lxcfs/proc"):
+            self.skipTest("missing lxcfs")
+        result, output = self.execute_run(self.cat, "/proc/uptime")
+        self.check_result_keys(result)
+        self.check_exitcode(result, 0, "exit code for reading uptime is not zero")
+        uptime = float(output[-1].split(" ")[0])
+        self.assertLessEqual(
+            uptime, 10, f"Uptime {uptime}s unexpectedly high in container"
+        )
+
+    def test_uptime_without_lxcfs(self):
+        if not os.path.exists("/var/lib/lxcfs/proc"):
+            self.skipTest("missing lxcfs")
+        # create RunExecutor with desired parameter
+        self.setUp(container_system_config=False)
+        result, output = self.execute_run(self.cat, "/proc/uptime")
+        self.check_result_keys(result)
+        self.check_exitcode(result, 0, "exit code for reading uptime is not zero")
+        uptime = float(output[-1].split(" ")[0])
+        # If uptime was less than 10s, LXCFS probably was in use
+        self.assertGreaterEqual(
+            uptime, 10, f"Uptime {uptime}s unexpectedly low in container"
+        )
+
+
+class _StopRunThread(threading.Thread):
+    def __init__(self, delay, runexecutor):
+        super(_StopRunThread, self).__init__()
+        self.daemon = True
+        self.delay = delay
+        self.runexecutor = runexecutor
+
+    def run(self):
+        time.sleep(self.delay)
+        self.runexecutor.stop()
+
+
+class TestRunExecutorUnits(unittest.TestCase):
+    """unit tests for parts of RunExecutor"""
+
+    def test_get_debug_output_with_error_report_and_invalid_utf8(self):
+        invalid_utf8 = b"\xFF"
+        with tempfile.NamedTemporaryFile(mode="w+b", delete=False) as report_file:
+            with tempfile.NamedTemporaryFile(mode="w+b") as output:
+                output_content = f"""Dummy output
+# An error report file with more information is saved as:
+# {report_file.name}
+More output
+""".encode()  # noqa: E800 false alarm
+                report_content = b"Report output\nMore lines"
+                output_content += invalid_utf8
+                report_content += invalid_utf8
+
+                output.write(output_content)
+                output.flush()
+                output.seek(0)
+                report_file.write(report_content)
+                report_file.flush()
+
+                runexecutor._get_debug_output_after_crash(output.name, "")
+
+                self.assertFalse(os.path.exists(report_file.name))
+                self.assertEqual(output.read(), output_content + report_content)

From e3f291bde4c3edd94be2c6951fe08fe162f43cb6 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Tue, 4 Jun 2024 09:01:21 +0000
Subject: [PATCH 19/52] Reverting all changes on branch

---
 .gitlab-ci.yml                           |   53 -
 benchexec/pytest_analyze_run_result.py   |  197 ----
 benchexec/pytest_benchmark_definition.py |  170 ---
 benchexec/pytest_cgroups.py              |   69 --
 benchexec/pytest_core_assignment.py      |  694 ------------
 benchexec/pytest_pqos.py                 |  379 -------
 benchexec/pytest_result.py               |  613 -----------
 benchexec/pytest_runexecutor.py          | 1223 ----------------------
 pyproject.toml                           |    4 -
 setup.cfg                                |    1 -
 10 files changed, 3403 deletions(-)
 delete mode 100644 benchexec/pytest_analyze_run_result.py
 delete mode 100644 benchexec/pytest_benchmark_definition.py
 delete mode 100644 benchexec/pytest_cgroups.py
 delete mode 100644 benchexec/pytest_core_assignment.py
 delete mode 100644 benchexec/pytest_pqos.py
 delete mode 100644 benchexec/pytest_result.py
 delete mode 100644 benchexec/pytest_runexecutor.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d7c8a969e..bfc92edd8 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -61,34 +61,6 @@ stages:
     paths:
       - coverage.xml
 
-.unit-tests-pytest: &unit-tests-pytest
-  stage: test
-  before_script:
-    # Create user, we do not want to test as root
-    - adduser --disabled-login --gecos "" $PRIMARY_USER
-    # Activate coverage for subprocesses
-    - printf 'import coverage\ncoverage.process_startup()\n' > "/usr/local/lib/python${PYTHON_VERSION}/site-packages/sitecustomize.py"
-    # Give $PRIMARY_USER permission to create cgroups
-    - test/for_each_of_my_cgroups.sh chgrp $PRIMARY_USER
-    - test/for_each_of_my_cgroups.sh chmod g+w $PRIMARY_USER
-    # Install BenchExec with `dev` dependencies
-    - sudo -u $PRIMARY_USER pip install --user ".[dev]"
-    # Start lxcfs
-    - lxcfs /var/lib/lxcfs &
-  script:
-    - sudo -u $PRIMARY_USER
-        COVERAGE_PROCESS_START=.coveragerc
-        coverage run -m pytest
-  after_script:
-    - sudo -u $PRIMARY_USER coverage combine
-    - sudo -u $PRIMARY_USER coverage report
-    - sudo -u $PRIMARY_USER coverage xml -o coverage-pytest.xml
-  tags:
-    - privileged
-  artifacts:
-    paths:
-      - coverage-pytest.xml
-
 unit-tests:python-3.7:
   <<: *unit-tests
   variables:
@@ -114,31 +86,6 @@ unit-tests:python-3.11:
   variables:
     PYTHON_VERSION: '3.11'
 
-unit-tests-pytest:python-3.7:
-  <<: *unit-tests-pytest
-  variables:
-    PYTHON_VERSION: '3.7'
-
-unit-tests-pytest:python-3.8:
-  <<: *unit-tests-pytest
-  variables:
-    PYTHON_VERSION: '3.8'
-
-unit-tests-pytest:python-3.9:
-  <<: *unit-tests-pytest
-  variables:
-    PYTHON_VERSION: '3.9'
-
-unit-tests-pytest:python-3.10:
-  <<: *unit-tests-pytest
-  variables:
-    PYTHON_VERSION: '3.10'
-
-unit-tests-pytest:python-3.11:
-  <<: *unit-tests-pytest
-  variables:
-    PYTHON_VERSION: '3.11'
-
 # Static checks
 check-format:
   stage: test
diff --git a/benchexec/pytest_analyze_run_result.py b/benchexec/pytest_analyze_run_result.py
deleted file mode 100644
index 16c6a18aa..000000000
--- a/benchexec/pytest_analyze_run_result.py
+++ /dev/null
@@ -1,197 +0,0 @@
-# This file is part of BenchExec, a framework for reliable benchmarking:
-# https://github.com/sosy-lab/benchexec
-#
-# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import logging
-import sys
-import pytest
-import types
-
-from benchexec.util import ProcessExitCode
-from benchexec.model import Run
-from benchexec.result import (
-    RESULT_FALSE_REACH,
-    RESULT_ERROR,
-    RESULT_UNKNOWN,
-    RESULT_TRUE_PROP,
-)
-from benchexec.tools.template import BaseTool
-
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
-normal_result = ProcessExitCode(raw=0, value=0, signal=None)
-
-
-@pytest.fixture(scope="class")
-def disable_non_critical_logging():
-    logging.disable(logging.CRITICAL)
-
-
-@pytest.mark.usefixtures("disable_non_critical_logging")
-class TestResult:
-    def create_run(self, info_result=RESULT_UNKNOWN):
-        runSet = types.SimpleNamespace()
-        runSet.log_folder = "."
-        runSet.result_files_folder = "."
-        runSet.options = []
-        runSet.real_name = None
-        runSet.propertytag = None
-        runSet.benchmark = lambda: None
-        runSet.benchmark.base_dir = "."
-        runSet.benchmark.benchmark_file = "Test.xml"
-        runSet.benchmark.columns = []
-        runSet.benchmark.name = "Test"
-        runSet.benchmark.instance = "Test"
-        runSet.benchmark.rlimits = {}
-        runSet.benchmark.tool = BaseTool()
-
-        def determine_result(run):
-            return info_result
-
-        runSet.benchmark.tool.determine_result = determine_result
-
-        run = Run(
-            identifier="test.c",
-            sourcefiles=["test.c"],
-            task_options=None,
-            fileOptions=[],
-            runSet=runSet,
-        )
-        run._cmdline = ["dummy.bin", "test.c"]
-        return run
-
-    def test_simple(self):
-        run = self.create_run(info_result=RESULT_UNKNOWN)
-        assert RESULT_UNKNOWN == run._analyze_result(normal_result, "", None)
-
-        run = self.create_run(info_result=RESULT_TRUE_PROP)
-        assert RESULT_TRUE_PROP == run._analyze_result(normal_result, "", None)
-
-        run = self.create_run(info_result=RESULT_FALSE_REACH)
-        assert RESULT_FALSE_REACH == run._analyze_result(normal_result, "", None)
-
-    def test_timeout(self):
-        run = self.create_run(info_result=RESULT_UNKNOWN)
-        assert "TIMEOUT" == run._analyze_result(normal_result, "", "cputime")
-        assert "TIMEOUT" == run._analyze_result(normal_result, "", "cputime-soft")
-        assert "TIMEOUT" == run._analyze_result(normal_result, "", "walltime")
-
-        run = self.create_run(info_result=RESULT_TRUE_PROP)
-        assert f"TIMEOUT ({RESULT_TRUE_PROP})" == run._analyze_result(
-            normal_result, "", "cputime"
-        )
-
-        run = self.create_run(info_result=RESULT_FALSE_REACH)
-        assert f"TIMEOUT ({RESULT_FALSE_REACH})" == run._analyze_result(
-            normal_result, "", "cputime"
-        )
-
-        run = self.create_run(info_result="SOME OTHER RESULT")
-        assert "TIMEOUT (SOME OTHER RESULT)" == run._analyze_result(
-            normal_result, "", "cputime"
-        )
-
-        run = self.create_run(info_result=RESULT_ERROR)
-        assert "TIMEOUT" == run._analyze_result(normal_result, "", "cputime")
-
-        run = self.create_run(info_result=RESULT_ERROR)
-        run._is_timeout = lambda: True
-        assert "TIMEOUT" == run._analyze_result(normal_result, "", None)
-
-    def test_out_of_memory(self):
-        run = self.create_run(info_result=RESULT_UNKNOWN)
-        assert "OUT OF MEMORY" == run._analyze_result(normal_result, "", "memory")
-
-        run = self.create_run(info_result=RESULT_TRUE_PROP)
-        assert f"OUT OF MEMORY ({RESULT_TRUE_PROP})" == run._analyze_result(
-            normal_result, "", "memory"
-        )
-
-        run = self.create_run(info_result=RESULT_FALSE_REACH)
-        assert f"OUT OF MEMORY ({RESULT_FALSE_REACH})" == run._analyze_result(
-            normal_result, "", "memory"
-        )
-
-        run = self.create_run(info_result="SOME OTHER RESULT")
-        assert "OUT OF MEMORY (SOME OTHER RESULT)" == run._analyze_result(
-            normal_result, "", "memory"
-        )
-
-        run = self.create_run(info_result=RESULT_ERROR)
-        assert "OUT OF MEMORY" == run._analyze_result(normal_result, "", "memory")
-
-    def test_timeout_and_out_of_memory(self):
-        run = self.create_run(info_result=RESULT_UNKNOWN)
-        run._is_timeout = lambda: True
-        assert "TIMEOUT" == run._analyze_result(normal_result, "", "memory")
-
-        run = self.create_run(info_result=RESULT_TRUE_PROP)
-        run._is_timeout = lambda: True
-        assert f"TIMEOUT ({RESULT_TRUE_PROP})" == run._analyze_result(
-            normal_result, "", "memory"
-        )
-
-        run = self.create_run(info_result=RESULT_FALSE_REACH)
-        run._is_timeout = lambda: True
-        assert f"TIMEOUT ({RESULT_FALSE_REACH})" == run._analyze_result(
-            normal_result, "", "memory"
-        )
-
-        run = self.create_run(info_result="SOME OTHER RESULT")
-        run._is_timeout = lambda: True
-        assert "TIMEOUT (SOME OTHER RESULT)" == run._analyze_result(
-            normal_result, "", "memory"
-        )
-
-        run = self.create_run(info_result=RESULT_ERROR)
-        run._is_timeout = lambda: True
-        assert "TIMEOUT" == run._analyze_result(normal_result, "", "memory")
-
-    def test_returnsignal(self):
-        def signal(sig):
-            """Encode a signal as it would be returned by os.wait"""
-            return ProcessExitCode(raw=sig, value=None, signal=sig)
-
-        run = self.create_run(info_result=RESULT_ERROR)
-        assert "TIMEOUT" == run._analyze_result(signal(9), "", "cputime")
-
-        run = self.create_run(info_result=RESULT_ERROR)
-        assert "OUT OF MEMORY" == run._analyze_result(signal(9), "", "memory")
-
-        run = self.create_run(info_result=RESULT_TRUE_PROP)
-        assert RESULT_TRUE_PROP == run._analyze_result(signal(9), "", None)
-
-        run = self.create_run(info_result=RESULT_FALSE_REACH)
-        assert RESULT_FALSE_REACH == run._analyze_result(signal(9), "", None)
-
-        run = self.create_run(info_result="SOME OTHER RESULT")
-        assert "SOME OTHER RESULT" == run._analyze_result(signal(9), "", None)
-
-        run = self.create_run(info_result=RESULT_UNKNOWN)
-        assert "KILLED BY SIGNAL 9" == run._analyze_result(signal(9), "", None)
-
-    def test_exitcode(self):
-        def returnvalue(value):
-            """Encode an exit of aprogram as it would be returned by os.wait"""
-            return ProcessExitCode(raw=value << 8, value=value, signal=None)
-
-        run = self.create_run(info_result=RESULT_UNKNOWN)
-        assert "TIMEOUT" == run._analyze_result(returnvalue(1), "", "cputime")
-
-        run = self.create_run(info_result=RESULT_UNKNOWN)
-        assert "OUT OF MEMORY" == run._analyze_result(returnvalue(1), "", "memory")
-
-        run = self.create_run(info_result=RESULT_TRUE_PROP)
-        assert RESULT_TRUE_PROP == run._analyze_result(returnvalue(1), "", None)
-
-        run = self.create_run(info_result=RESULT_FALSE_REACH)
-        assert RESULT_FALSE_REACH == run._analyze_result(returnvalue(1), "", None)
-
-        run = self.create_run(info_result="SOME OTHER RESULT")
-        assert "SOME OTHER RESULT" == run._analyze_result(returnvalue(1), "", None)
-
-        run = self.create_run(info_result=RESULT_UNKNOWN)
-        assert RESULT_UNKNOWN == run._analyze_result(returnvalue(1), "", None)
diff --git a/benchexec/pytest_benchmark_definition.py b/benchexec/pytest_benchmark_definition.py
deleted file mode 100644
index ae45b9ac6..000000000
--- a/benchexec/pytest_benchmark_definition.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# This file is part of BenchExec, a framework for reliable benchmarking:
-# https://github.com/sosy-lab/benchexec
-#
-# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import collections
-import os
-import tempfile
-import pytest
-import yaml
-
-from benchexec.model import Benchmark
-import benchexec.result
-import benchexec.util as util
-
-here = os.path.dirname(__file__)
-base_dir = os.path.join(here, "..")
-test_dir = os.path.join(base_dir, "test", "tasks")
-
-DummyConfig = collections.namedtuple(
-    "DummyConfig",
-    [
-        "name",
-        "output_path",
-        "container",
-        "timelimit",
-        "walltimelimit",
-        "memorylimit",
-        "corelimit",
-        "num_of_threads",
-        "selected_run_definitions",
-        "selected_sourcefile_sets",
-        "description_file",
-    ],
-)(None, "test", False, None, None, None, None, None, None, None, None)
-
-ALL_TEST_TASKS = {
-    "false_other_sub_task.yml": "other_subproperty",
-    "false_sub_task.yml": "sub",
-    "false_sub2_task.yml": "sub2",
-    "false_task.yml": "expected_verdict: false",
-    "true_task.yml": "expected_verdict: true",
-    "unknown_task.yml": "",
-}
-
-
-def mock_expand_filename_pattern(pattern, base_dir):
-    if pattern == "*.yml":
-        return list(ALL_TEST_TASKS.keys()) + ["other_task.yml"]
-    return [pattern]
-
-
-def mock_load_task_def_file(f):
-    content = util.read_file(os.path.join(test_dir, f))
-    return yaml.safe_load(content)
-
-
-def mock_property_create(property_file):
-    assert property_file == "test.prp"
-    return benchexec.result.Property("test.prp", False, "test")
-
-
-@pytest.fixture()
-def apply_mocks(mocker):
-    mocker.patch(
-        "benchexec.model.load_task_definition_file", new=mock_load_task_def_file
-    )
-    mocker.patch("benchexec.result.Property.create", new=mock_property_create)
-    mocker.patch(
-        "benchexec.util.expand_filename_pattern", new=mock_expand_filename_pattern
-    )
-    mocker.patch("os.path.samefile", new=lambda a, b: a == b)
-
-
-class TestBenchmarkDefinition:
-    """
-    Unit tests for reading benchmark definitions,
-    testing mostly the classes from benchexec.model.
-    """
-
-    @pytest.mark.usefixtures("apply_mocks")
-    def parse_benchmark_definition(self, content):
-        with tempfile.NamedTemporaryFile(
-            prefix="BenchExec_test_benchmark_definition_", suffix=".xml", mode="w+"
-        ) as temp:
-            temp.write(content)
-            temp.flush()
-
-            # Because we mocked everything that accesses the file system,
-            # we can parse the benchmark definition although task files do not exist.
-            return Benchmark(temp.name, DummyConfig, util.read_local_time())
-
-    @pytest.mark.usefixtures("apply_mocks")
-    def check_task_filter(self, filter_attr, expected):
-        # The following three benchmark definitions are equivalent, we check each.
-        benchmark_definitions = [
-            """
-            <benchmark tool="dummy">
-              <propertyfile {}>test.prp</propertyfile>
-              <tasks><include>*.yml</include></tasks>
-              <rundefinition/>
-            </benchmark>
-            """,
-            """
-            <benchmark tool="dummy">
-              <tasks>
-                <propertyfile {}>test.prp</propertyfile>
-                <include>*.yml</include>
-              </tasks>
-              <rundefinition/>
-            </benchmark>
-            """,
-            """
-            <benchmark tool="dummy">
-              <tasks>
-                <include>*.yml</include>
-              </tasks>
-              <rundefinition>
-                <propertyfile {}>test.prp</propertyfile>
-              </rundefinition>
-            </benchmark>
-            """,
-        ]
-
-        for bench_def in benchmark_definitions:
-            benchmark = self.parse_benchmark_definition(bench_def.format(filter_attr))
-            run_ids = [run.identifier for run in benchmark.run_sets[0].runs]
-            assert run_ids == sorted(expected)
-
-    @pytest.mark.usefixtures("apply_mocks")
-    def test_expected_verdict_no_filter(self):
-        self.check_task_filter("", ALL_TEST_TASKS.keys())
-
-    @pytest.mark.usefixtures("apply_mocks")
-    def test_expected_verdict_true_filter(self):
-        self.check_task_filter('expectedverdict="true"', ["true_task.yml"])
-
-    @pytest.mark.usefixtures("apply_mocks")
-    def test_expected_verdict_false_filter(self):
-        false_tasks = [f for f in ALL_TEST_TASKS.keys() if f.startswith("false")]
-        self.check_task_filter('expectedverdict="false"', false_tasks)
-
-    @pytest.mark.usefixtures("apply_mocks")
-    def test_expected_verdict_false_subproperty_filter(self):
-        self.check_task_filter('expectedverdict="false(sub)"', ["false_sub_task.yml"])
-
-    @pytest.mark.usefixtures("apply_mocks")
-    def test_expected_verdict_unknown_filter(self):
-        self.check_task_filter('expectedverdict="unknown"', ["unknown_task.yml"])
-
-    @pytest.mark.usefixtures("apply_mocks")
-    def test_expected_verdict_false_subproperties_filter(self):
-        benchmark_definition = """
-            <benchmark tool="dummy">
-              <tasks>
-                <propertyfile expectedverdict="false(sub)">test.prp</propertyfile>
-                <include>*.yml</include>
-              </tasks>
-              <tasks>
-                <propertyfile expectedverdict="false(sub2)">test.prp</propertyfile>
-                <include>*.yml</include>
-              </tasks>
-              <rundefinition/>
-            </benchmark>
-            """
-        benchmark = self.parse_benchmark_definition(benchmark_definition)
-        run_ids = [run.identifier for run in benchmark.run_sets[0].runs]
-        assert run_ids == ["false_sub_task.yml", "false_sub2_task.yml"]
diff --git a/benchexec/pytest_cgroups.py b/benchexec/pytest_cgroups.py
deleted file mode 100644
index b75b6f806..000000000
--- a/benchexec/pytest_cgroups.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# This file is part of BenchExec, a framework for reliable benchmarking:
-# https://github.com/sosy-lab/benchexec
-#
-# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import logging
-import subprocess
-import sys
-import pytest
-
-from benchexec import check_cgroups
-
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
-
-@pytest.fixture(scope="class")
-def disable_non_critical_logging():
-    logging.disable(logging.CRITICAL)
-
-
-@pytest.mark.usefixtures("disable_non_critical_logging")
-class TestCheckCgroups:
-
-    def execute_run_extern(self, *args, **kwargs):
-        try:
-            return subprocess.check_output(
-                args=["python3", "-m", "benchexec.check_cgroups"] + list(args),
-                stderr=subprocess.STDOUT,
-                universal_newlines=True,
-                **kwargs,
-            )
-        except subprocess.CalledProcessError as e:
-            if e.returncode != 1:  # 1 is expected if cgroups are not available
-                print(e.output)
-                raise e
-
-    def test_extern_command(self):
-        self.execute_run_extern()
-
-    def test_simple(self):
-        try:
-            check_cgroups.main(["--no-thread"])
-        except SystemExit as e:
-            # expected if cgroups are not available
-            pytest.skip(str(e))
-
-    def test_threaded(self):
-        try:
-            check_cgroups.main([])
-        except SystemExit as e:
-            # expected if cgroups are not available
-            pytest.skip(str(e))
-
-    def test_thread_result_is_returned(self):
-        """
-        Test that an error raised by check_cgroup_availability is correctly
-        re-raised in the main thread by replacing this function temporarily.
-        """
-        tmp = check_cgroups.check_cgroup_availability
-        try:
-            check_cgroups.check_cgroup_availability = lambda wait: exit(1)
-
-            with pytest.raises(SystemExit):
-                check_cgroups.main([])
-
-        finally:
-            check_cgroups.check_cgroup_availability = tmp
diff --git a/benchexec/pytest_core_assignment.py b/benchexec/pytest_core_assignment.py
deleted file mode 100644
index 9617f5f46..000000000
--- a/benchexec/pytest_core_assignment.py
+++ /dev/null
@@ -1,694 +0,0 @@
-# This file is part of BenchExec, a framework for reliable benchmarking:
-# https://github.com/sosy-lab/benchexec
-#
-# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import itertools
-import logging
-import sys
-import pytest
-import math
-
-from benchexec.resources import _get_cpu_cores_per_run0
-
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
-
-def lrange(start, end):
-    return list(range(start, end))
-
-
-@pytest.fixture(scope="class")
-def disable_non_critical_logging():
-    logging.disable(logging.CRITICAL)
-
-
-@pytest.mark.usefixtures("disable_non_critical_logging")
-class TestCpuCoresPerRun:
-
-    def assertValid(self, coreLimit, num_of_threads, expectedResult=None):
-        result = _get_cpu_cores_per_run0(
-            coreLimit, num_of_threads, self.use_ht, *self.machine()
-        )
-        if expectedResult:
-            assert (
-                expectedResult == result
-            ), f"Incorrect result for {coreLimit} cores and {num_of_threads} threads."
-
-    def assertInvalid(self, coreLimit, num_of_threads):
-        with pytest.raises(SystemExit):
-            _get_cpu_cores_per_run0(
-                coreLimit, num_of_threads, self.use_ht, *self.machine()
-            )
-
-    def machine(self):
-        """Create the necessary parameters of _get_cpu_cores_per_run0 for a specific machine."""
-        core_count = self.cpus * self.cores
-        allCpus = range(core_count)
-        cores_of_package = {}
-        ht_spread = core_count // 2
-        for package in range(self.cpus):
-            start = package * self.cores // (2 if self.ht else 1)
-            end = (package + 1) * self.cores // (2 if self.ht else 1)
-            cores_of_package[package] = lrange(start, end)
-            if self.ht:
-                cores_of_package[package].extend(
-                    range(start + ht_spread, end + ht_spread)
-                )
-        siblings_of_core = {}
-        for core in allCpus:
-            siblings_of_core[core] = [core]
-        if self.ht:
-            for core in allCpus:
-                siblings_of_core[core].append((core + ht_spread) % core_count)
-                siblings_of_core[core].sort()
-        return allCpus, cores_of_package, siblings_of_core
-
-    def test_singleThread(self):
-        # test all possible coreLimits for a single thread
-        core_count = self.cpus * self.cores
-        if self.ht:
-            # Creates list alternating between real core and hyper-threading core
-            singleThread_assignment = list(
-                itertools.chain(
-                    *zip(range(core_count // 2), range(core_count // 2, core_count))
-                )
-            )
-        else:
-            singleThread_assignment = lrange(0, core_count)
-        if not self.use_ht and self.ht:
-            core_count = (self.cpus * self.cores) // 2
-            singleThread_assignment = lrange(0, core_count)
-
-        for coreLimit in range(1, core_count + 1):
-            self.assertValid(
-                coreLimit, 1, [sorted(singleThread_assignment[:coreLimit])]
-            )
-        self.assertInvalid(core_count + 1, 1)
-
-    # expected order in which cores are used for runs with coreLimit==1/2/3/4/8, used by the following tests
-    # these fields should be filled in by subclasses to activate the corresponding tests
-    # (same format as the expected return value by _get_cpu_cores_per_run)
-    oneCore_assignment = None
-    twoCore_assignment = None
-    threeCore_assignment = None
-    fourCore_assignment = None
-    eightCore_assignment = None
-    use_ht = True
-
-    def test_oneCorePerRun(self):
-        # test all possible numOfThread values for runs with one core
-        maxThreads = self.cpus * self.cores
-        if not self.use_ht and self.ht:
-            maxThreads = (self.cpus * self.cores) // 2
-        self.assertInvalid(1, maxThreads + 1)
-        if not self.oneCore_assignment:
-            pytest.skip("Need result specified")
-        for num_of_threads in range(1, maxThreads + 1):
-            self.assertValid(
-                1, num_of_threads, self.oneCore_assignment[:num_of_threads]
-            )
-
-    def test_twoCoresPerRun(self):
-        # test all possible numOfThread values for runs with two cores
-        maxThreads = self.cpus * (self.cores // 2)
-        if not self.use_ht and self.ht:
-            maxThreads = self.cpus * (self.cores // 4)
-            if maxThreads == 0:
-                # Test for runs that are split over cpus
-                cpus_per_run = int(math.ceil(2 / (self.cores // 2)))
-                maxThreads = self.cpus // cpus_per_run
-        self.assertInvalid(2, maxThreads + 1)
-        if not self.twoCore_assignment:
-            pytest.skip("Need result specified")
-        for num_of_threads in range(1, maxThreads + 1):
-            self.assertValid(
-                2, num_of_threads, self.twoCore_assignment[:num_of_threads]
-            )
-
-    def test_threeCoresPerRun(self):
-        # test all possible numOfThread values for runs with three cores
-        maxThreads = self.cpus * (self.cores // 3)
-        if not self.use_ht and self.ht:
-            maxThreads = self.cpus * (self.cores // 6)
-            if maxThreads == 0:
-                # Test for runs that are split over cpus
-                cpus_per_run = int(math.ceil(3 / (self.cores // 2)))
-                maxThreads = self.cpus // cpus_per_run
-
-        self.assertInvalid(3, maxThreads + 1)
-        if not self.threeCore_assignment:
-            pytest.skip("Need result specified")
-        for num_of_threads in range(1, maxThreads + 1):
-            self.assertValid(
-                3, num_of_threads, self.threeCore_assignment[:num_of_threads]
-            )
-
-    def test_fourCoresPerRun(self):
-        # test all possible numOfThread values for runs with four cores
-        maxThreads = self.cpus * (self.cores // 4)
-        if not self.use_ht and self.ht:
-            maxThreads = self.cpus * (self.cores // 8)
-            if maxThreads == 0:
-                # Test for runs that are split over cpus
-                cpus_per_run = int(math.ceil(4 / (self.cores // 2)))
-                maxThreads = self.cpus // cpus_per_run
-
-        self.assertInvalid(4, maxThreads + 1)
-        if not self.fourCore_assignment:
-            pytest.skip("Need result specified")
-        for num_of_threads in range(1, maxThreads + 1):
-            self.assertValid(
-                4, num_of_threads, self.fourCore_assignment[:num_of_threads]
-            )
-
-    def test_eightCoresPerRun(self):
-        # test all possible numOfThread values for runs with eight cores
-        maxThreads = self.cpus * (self.cores // 8)
-        if not self.use_ht and self.ht:
-            maxThreads = (self.cpus * self.cores) // 16
-            if maxThreads == 0:
-                # Test for runs that are split over cpus
-                cpus_per_run = int(math.ceil(8 / (self.cores // 2)))
-                maxThreads = self.cpus // cpus_per_run
-        if not maxThreads:
-            pytest.skip(
-                "Testing for runs that need to be split across CPUs is not implemented"
-            )
-        self.assertInvalid(8, maxThreads + 1)
-        if not self.eightCore_assignment:
-            pytest.skip("Need result specified")
-        for num_of_threads in range(1, maxThreads + 1):
-            self.assertValid(
-                8, num_of_threads, self.eightCore_assignment[:num_of_threads]
-            )
-
-
-class TestCpuCoresPerRun_singleCPU(TestCpuCoresPerRun):
-    cpus = 1
-    cores = 8
-    ht = False
-
-    oneCore_assignment = [[x] for x in range(8)]
-    twoCore_assignment = [[0, 1], [2, 3], [4, 5], [6, 7]]
-    threeCore_assignment = [[0, 1, 2], [3, 4, 5]]
-    fourCore_assignment = [[0, 1, 2, 3], [4, 5, 6, 7]]
-    eightCore_assignment = [list(range(8))]
-
-    def test_singleCPU_invalid(self):
-        self.assertInvalid(2, 5)
-        self.assertInvalid(5, 2)
-        self.assertInvalid(3, 3)
-
-
-class TestCpuCoresPerRun_singleCPU_HT(TestCpuCoresPerRun_singleCPU):
-    ht = True
-
-    twoCore_assignment = [[0, 4], [1, 5], [2, 6], [3, 7]]
-    threeCore_assignment = [[0, 1, 4], [2, 3, 6]]
-    fourCore_assignment = [[0, 1, 4, 5], [2, 3, 6, 7]]
-
-    def test_halfPhysicalCore(self):
-        # Cannot run if we have only half of one physical core
-        with pytest.raises(SystemExit):
-            _get_cpu_cores_per_run0(1, 1, True, [0], {0: [0, 1]}, {0: [0, 1]})
-
-
-class TestCpuCoresPerRun_dualCPU_HT(TestCpuCoresPerRun):
-    cpus = 2
-    cores = 16
-    ht = True
-
-    oneCore_assignment = [
-        [x]
-        for x in [
-            0,
-            8,
-            1,
-            9,
-            2,
-            10,
-            3,
-            11,
-            4,
-            12,
-            5,
-            13,
-            6,
-            14,
-            7,
-            15,
-            16,
-            24,
-            17,
-            25,
-            18,
-            26,
-            19,
-            27,
-            20,
-            28,
-            21,
-            29,
-            22,
-            30,
-            23,
-            31,
-        ]
-    ]
-
-    twoCore_assignment = [
-        [0, 16],
-        [8, 24],
-        [1, 17],
-        [9, 25],
-        [2, 18],
-        [10, 26],
-        [3, 19],
-        [11, 27],
-        [4, 20],
-        [12, 28],
-        [5, 21],
-        [13, 29],
-        [6, 22],
-        [14, 30],
-        [7, 23],
-        [15, 31],
-    ]
-
-    # Note: the core assignment here is non-uniform, the last two threads are spread over three physical cores
-    # Currently, the assignment algorithm cannot do better for odd coreLimits,
-    # but this affects only cases where physical cores are split between runs, which is not recommended anyway.
-    threeCore_assignment = [
-        [0, 1, 16],
-        [8, 9, 24],
-        [2, 3, 18],
-        [10, 11, 26],
-        [4, 5, 20],
-        [12, 13, 28],
-        [6, 7, 22],
-        [14, 15, 30],
-        [17, 19, 21],
-        [25, 27, 29],
-    ]
-
-    fourCore_assignment = [
-        [0, 1, 16, 17],
-        [8, 9, 24, 25],
-        [2, 3, 18, 19],
-        [10, 11, 26, 27],
-        [4, 5, 20, 21],
-        [12, 13, 28, 29],
-        [6, 7, 22, 23],
-        [14, 15, 30, 31],
-    ]
-
-    eightCore_assignment = [
-        [0, 1, 2, 3, 16, 17, 18, 19],
-        [8, 9, 10, 11, 24, 25, 26, 27],
-        [4, 5, 6, 7, 20, 21, 22, 23],
-        [12, 13, 14, 15, 28, 29, 30, 31],
-    ]
-
-    def test_dualCPU_HT(self):
-        self.assertValid(
-            16, 2, [lrange(0, 8) + lrange(16, 24), lrange(8, 16) + lrange(24, 32)]
-        )
-
-    def test_dualCPU_HT_invalid(self):
-        self.assertInvalid(2, 17)
-        self.assertInvalid(17, 2)
-        self.assertInvalid(4, 9)
-        self.assertInvalid(9, 4)
-        self.assertInvalid(8, 5)
-        self.assertInvalid(5, 8)
-
-
-class TestCpuCoresPerRun_threeCPU(TestCpuCoresPerRun):
-    cpus = 3
-    cores = 5
-    ht = False
-
-    oneCore_assignment = [
-        [x] for x in [0, 5, 10, 1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14]
-    ]
-    twoCore_assignment = [[0, 1], [5, 6], [10, 11], [2, 3], [7, 8], [12, 13]]
-    threeCore_assignment = [[0, 1, 2], [5, 6, 7], [10, 11, 12]]
-    fourCore_assignment = [[0, 1, 2, 3], [5, 6, 7, 8], [10, 11, 12, 13]]
-
-    def test_threeCPU_invalid(self):
-        self.assertInvalid(6, 2)
-
-
-class TestCpuCoresPerRun_threeCPU_HT(TestCpuCoresPerRun):
-    cpus = 3
-    cores = 10
-    ht = True
-
-    oneCore_assignment = [
-        [x]
-        for x in [
-            0,
-            5,
-            10,
-            1,
-            6,
-            11,
-            2,
-            7,
-            12,
-            3,
-            8,
-            13,
-            4,
-            9,
-            14,
-            15,
-            20,
-            25,
-            16,
-            21,
-            26,
-            17,
-            22,
-            27,
-            18,
-            23,
-            28,
-            19,
-            24,
-            29,
-        ]
-    ]
-    twoCore_assignment = [
-        [0, 15],
-        [5, 20],
-        [10, 25],
-        [1, 16],
-        [6, 21],
-        [11, 26],
-        [2, 17],
-        [7, 22],
-        [12, 27],
-        [3, 18],
-        [8, 23],
-        [13, 28],
-        [4, 19],
-        [9, 24],
-        [14, 29],
-    ]
-    threeCore_assignment = [
-        [0, 1, 15],
-        [5, 6, 20],
-        [10, 11, 25],
-        [2, 3, 17],
-        [7, 8, 22],
-        [12, 13, 27],
-        [4, 16, 19],
-        [9, 21, 24],
-        [14, 26, 29],
-    ]
-    fourCore_assignment = [
-        [0, 1, 15, 16],
-        [5, 6, 20, 21],
-        [10, 11, 25, 26],
-        [2, 3, 17, 18],
-        [7, 8, 22, 23],
-        [12, 13, 27, 28],
-    ]
-    eightCore_assignment = [
-        [0, 1, 2, 3, 15, 16, 17, 18],
-        [5, 6, 7, 8, 20, 21, 22, 23],
-        [10, 11, 12, 13, 25, 26, 27, 28],
-    ]
-
-    def test_threeCPU_HT_invalid(self):
-        self.assertInvalid(11, 2)
-
-    def test_threeCPU_HT_noncontiguousId(self):
-        """3 CPUs with one core (plus HT) and non-contiguous core and package numbers.
-        This may happen on systems with administrative core restrictions,
-        because the ordering of core and package numbers is not always consistent."""
-        result = _get_cpu_cores_per_run0(
-            2,
-            3,
-            True,
-            [0, 1, 2, 3, 6, 7],
-            {0: [0, 1], 2: [2, 3], 3: [6, 7]},
-            {0: [0, 1], 1: [0, 1], 2: [2, 3], 3: [2, 3], 6: [6, 7], 7: [6, 7]},
-        )
-        assert [
-            [0, 1],
-            [2, 3],
-            [6, 7],
-        ] == result, "Incorrect result for 2 cores and 3 threads."
-
-
-class TestCpuCoresPerRun_quadCPU_HT(TestCpuCoresPerRun):
-    cpus = 4
-    cores = 16
-    ht = True
-
-    def test_quadCPU_HT_noncontiguousId(self):
-        """4 CPUs with 8 cores (plus HT) and non-contiguous core and package numbers.
-        This may happen on systems with administrative core restrictions,
-        because the ordering of core and package numbers is not always consistent.
-        Furthermore, sibling cores have numbers next to each other (occurs on AMD Opteron machines with shared L1/L2 caches)
-        and are not split as far as possible from each other (as it occurs on hyper-threading machines).
-        """
-        result = _get_cpu_cores_per_run0(
-            1,
-            8,
-            True,
-            [0, 1, 8, 9, 16, 17, 24, 25, 32, 33, 40, 41, 48, 49, 56, 57],
-            {
-                0: [0, 1, 8, 9],
-                1: [32, 33, 40, 41],
-                2: [48, 49, 56, 57],
-                3: [16, 17, 24, 25],
-            },
-            {
-                0: [0, 1],
-                1: [0, 1],
-                48: [48, 49],
-                33: [32, 33],
-                32: [32, 33],
-                40: [40, 41],
-                9: [8, 9],
-                16: [16, 17],
-                17: [16, 17],
-                56: [56, 57],
-                57: [56, 57],
-                8: [8, 9],
-                41: [40, 41],
-                24: [24, 25],
-                25: [24, 25],
-                49: [48, 49],
-            },
-        )
-        assert [
-            [0],
-            [32],
-            [48],
-            [16],
-            [8],
-            [40],
-            [56],
-            [24],
-        ] == result, "Incorrect result for 1 core and 8 threads."
-
-    def test_quadCPU_HT(self):
-        self.assertValid(
-            16,
-            4,
-            [
-                lrange(0, 8) + lrange(32, 40),
-                lrange(8, 16) + lrange(40, 48),
-                lrange(16, 24) + lrange(48, 56),
-                lrange(24, 32) + lrange(56, 64),
-            ],
-        )
-
-        # Just test that no exception occurs
-        self.assertValid(1, 64)
-        self.assertValid(64, 1)
-        self.assertValid(2, 32)
-        self.assertValid(32, 2)
-        self.assertValid(3, 20)
-        self.assertValid(16, 3)
-        self.assertValid(4, 16)
-        self.assertValid(16, 4)
-        self.assertValid(5, 12)
-        self.assertValid(8, 8)
-
-    def test_quadCPU_HT_invalid(self):
-        self.assertInvalid(2, 33)
-        self.assertInvalid(33, 2)
-        self.assertInvalid(3, 21)
-        self.assertInvalid(17, 3)
-        self.assertInvalid(4, 17)
-        self.assertInvalid(17, 4)
-        self.assertInvalid(5, 13)
-        self.assertInvalid(9, 5)
-        self.assertInvalid(6, 9)
-        self.assertInvalid(9, 6)
-        self.assertInvalid(7, 9)
-        self.assertInvalid(9, 7)
-        self.assertInvalid(8, 9)
-        self.assertInvalid(9, 8)
-
-        self.assertInvalid(9, 5)
-        self.assertInvalid(6, 9)
-        self.assertInvalid(10, 5)
-        self.assertInvalid(6, 10)
-        self.assertInvalid(11, 5)
-        self.assertInvalid(6, 11)
-        self.assertInvalid(12, 5)
-        self.assertInvalid(6, 12)
-        self.assertInvalid(13, 5)
-        self.assertInvalid(5, 13)
-        self.assertInvalid(14, 5)
-        self.assertInvalid(5, 14)
-        self.assertInvalid(15, 5)
-        self.assertInvalid(5, 15)
-        self.assertInvalid(16, 5)
-        self.assertInvalid(5, 16)
-
-
-class TestCpuCoresPerRun_singleCPU_no_ht(TestCpuCoresPerRun):
-    cpus = 1
-    cores = 8
-    ht = True
-    use_ht = False
-
-    oneCore_assignment = [[x] for x in range(0, 4)]
-    twoCore_assignment = [[0, 1], [2, 3]]
-    threeCore_assignment = [[0, 1, 2]]
-    fourCore_assignment = [[0, 1, 2, 3]]
-
-    def test_singleCPU_no_ht_invalid(self):
-        self.assertInvalid(1, 5)
-        self.assertInvalid(2, 3)
-        self.assertInvalid(3, 2)
-        self.assertInvalid(4, 2)
-        self.assertInvalid(8, 1)
-
-
-class TestCpuCoresPerRun_dualCPU_no_ht(TestCpuCoresPerRun):
-    cpus = 2
-    cores = 8
-    ht = True
-    use_ht = False
-
-    oneCore_assignment = [[0], [4], [1], [5], [2], [6], [3], [7]]
-    twoCore_assignment = [[0, 1], [4, 5], [2, 3], [6, 7]]
-    threeCore_assignment = [[0, 1, 2], [4, 5, 6]]
-    fourCore_assignment = [[0, 1, 2, 3], [4, 5, 6, 7]]
-    eightCore_assignment = [[0, 1, 2, 3, 4, 5, 6, 7]]
-
-    def test_dualCPU_no_ht_invalid(self):
-        self.assertInvalid(1, 9)
-        self.assertInvalid(1, 10)
-        self.assertInvalid(2, 5)
-        self.assertInvalid(2, 6)
-        self.assertInvalid(3, 3)
-        self.assertInvalid(3, 4)
-        self.assertInvalid(4, 3)
-        self.assertInvalid(4, 4)
-        self.assertInvalid(8, 2)
-        self.assertInvalid(8, 3)
-
-    def test_dualCPU_noncontiguousID(self):
-        results = _get_cpu_cores_per_run0(
-            2,
-            3,
-            False,
-            [0, 4, 9, 15, 21, 19, 31, 12, 10, 11, 8, 23, 27, 14, 1, 20],
-            {0: [0, 4, 9, 12, 15, 19, 21, 31], 2: [10, 11, 8, 23, 27, 14, 1, 20]},
-            {
-                0: [0, 4],
-                4: [0, 4],
-                9: [9, 12],
-                12: [9, 12],
-                15: [15, 19],
-                19: [15, 19],
-                21: [21, 31],
-                31: [21, 31],
-                10: [10, 11],
-                11: [10, 11],
-                8: [8, 23],
-                23: [8, 23],
-                27: [27, 14],
-                14: [27, 14],
-                1: [1, 20],
-                20: [1, 20],
-            },
-        )
-        assert results == [
-            [0, 9],
-            [8, 10],
-            [15, 21],
-        ], "Incorrect result for 2 cores and 3 threads."
-
-
-class TestCpuCoresPerRun_threeCPU_no_ht(TestCpuCoresPerRun):
-    cpus = 3
-    cores = 6
-    ht = True
-    use_ht = False
-
-    oneCore_assignment = [[x] for x in [0, 3, 6, 1, 4, 7, 2, 5, 8]]
-    twoCore_assignment = [[0, 1], [3, 4], [6, 7]]
-    threeCore_assignment = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
-    fourCore_assignment = [[0, 1, 2, 3]]
-    eightCore_assignment = [[0, 1, 2, 3, 4, 5, 6, 7]]
-
-    def test_threeCPU_no_ht_invalid(self):
-        self.assertInvalid(1, 10)
-        self.assertInvalid(2, 4)
-        self.assertInvalid(3, 4)
-        self.assertInvalid(4, 2)
-        self.assertInvalid(8, 2)
-
-
-class TestCpuCoresPerRun_quadCPU_no_ht(TestCpuCoresPerRun):
-    cpus = 4
-    cores = 8
-    ht = True
-    use_ht = False
-
-    oneCore_assignment = [
-        [x] for x in [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]
-    ]
-    twoCore_assignment = [
-        [0, 1],
-        [4, 5],
-        [8, 9],
-        [12, 13],
-        [2, 3],
-        [6, 7],
-        [10, 11],
-        [14, 15],
-    ]
-    threeCore_assignment = [[0, 1, 2], [4, 5, 6], [8, 9, 10], [12, 13, 14]]
-    fourCore_assignment = [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]
-    eightCore_assignment = [[0, 1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14, 15]]
-
-    def test_quadCPU_no_ht_invalid(self):
-        self.assertInvalid(1, 17)
-        self.assertInvalid(2, 9)
-        self.assertInvalid(3, 5)
-        self.assertInvalid(4, 5)
-        self.assertInvalid(8, 3)
-
-    def test_quadCPU_no_ht_valid(self):
-        self.assertValid(5, 2, [[0, 1, 2, 3, 4], [8, 9, 10, 11, 12]])
-        self.assertInvalid(5, 3)
-        self.assertValid(6, 2, [[0, 1, 2, 3, 4, 5], [8, 9, 10, 11, 12, 13]])
-        self.assertInvalid(6, 3)
-
-
-# prevent execution of base class as its own test
-del TestCpuCoresPerRun
diff --git a/benchexec/pytest_pqos.py b/benchexec/pytest_pqos.py
deleted file mode 100644
index 63e8e4a9e..000000000
--- a/benchexec/pytest_pqos.py
+++ /dev/null
@@ -1,379 +0,0 @@
-# This file is part of BenchExec, a framework for reliable benchmarking:
-# https://github.com/sosy-lab/benchexec
-#
-# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""
-    Unit tests for pqos module
-"""
-import json
-import copy
-import logging
-import pytest
-from subprocess import CalledProcessError
-from benchexec.pqos import Pqos
-
-
-mock_pqos_wrapper_output = {
-    "load_pqos": {
-        "function_output": {},
-        "returncode": 0,
-        "function": "pqos_init",
-        "error": False,
-        "message": "MSR interface intialised",
-    },
-    "check_capability": {
-        "function_output": {"mem_size": 32, "cdp_on": 0, "num_classes": 4},
-        "returncode": 0,
-        "function": "get_capability_info",
-        "error": False,
-        "message": "Retrieved l3ca capability",
-    },
-    "allocate_resource": {
-        "function_output": {"cache_per_run": 4, "cores": {0: 0, 1: 0}},
-        "returncode": 0,
-        "function": "allocate_resource",
-        "error": False,
-        "message": "Allocated l3ca",
-    },
-    "monitor_events": {
-        "function_output": {
-            "monitoring_data": [
-                {
-                    "cores": [0, 1, 2],
-                    "ipc": 0.987,
-                    "llc_misses": 10240,
-                    "llc": {"avg": 25028, "max": 30000},
-                    "mbm_local": {"avg": 25028, "max": 30000},
-                }
-            ]
-        },
-        "returncode": 0,
-        "function": "monitor_events",
-        "error": False,
-        "message": "Event monitoring successfull",
-    },
-    "reset_monitoring": {
-        "returncode": 0,
-        "function": "reset_monitoring",
-        "error": False,
-        "message": "Reset monitoring successfull",
-    },
-    "reset_resources": {
-        "returncode": 0,
-        "function": "reset_resources",
-        "error": False,
-        "message": "Resource reset successfull",
-    },
-}
-
-mock_pqos_wrapper_error = {
-    "function": "mock_function",
-    "message": "error in pqos_wrapper function",
-    "returncode": 1,
-    "error": True,
-    "function_output": {},
-}
-
-
-def mock_check_output(args_list, **kwargs):
-    """
-    mock for subprocess.check_output function, this function returns a dummy
-    pqos_wrapper CLI output.
-    """
-    return json.dumps(mock_pqos_wrapper_output)
-
-
-def mock_check_output_error(args_list, **kwargs):
-    """
-    mock for subprocess.check_output, returns a dummy error output of pqos_wrapper
-    """
-    raise CalledProcessError(1, "cmd", json.dumps(mock_pqos_wrapper_error))
-
-
-def mock_check_output_capability_error(args_list, **kwargs):
-    """
-    mock for subprocess.check_output, returns a success pqos_wrapper output
-    if get_capability function is called otherwise returns a dummy error output
-    """
-    if "-c" in args_list:
-        return mock_check_output(args_list, **kwargs)
-    mock_check_output_error(args_list, **kwargs)  # noqa: R503 always raises
-
-
-class MockPopen:
-    """
-    A Mock class for subprocess.Popen
-    """
-
-    def __init__(self, args_list, universal_newlines=None, **kwargs):
-        assert universal_newlines  # required for this mock
-        self.args_list = args_list
-        self.returncode = 0
-
-    def send_signal(self, signal):
-        """
-        mock Popen.send_signal function
-        """
-        return 0
-
-    def kill(self):
-        """
-        mock Popen.kill function
-        """
-        return 0
-
-    def communicate(self):
-        """
-        mock Popen.communicate function
-        """
-        if self.returncode == 0:
-            return (mock_check_output(self.args_list), None)
-        return (None, json.dumps(mock_pqos_wrapper_error))
-
-
-def mock_popen(args_list, **kwargs):
-    """
-    A mock function to create a MockPopen object with given arguments
-    """
-    return MockPopen(args_list, **kwargs)
-
-
-@pytest.fixture(scope="class")
-def disable_non_critical_logging():
-    logging.disable(logging.CRITICAL)
-
-
-@pytest.mark.usefixtures("disable_non_critical_logging")
-class TestPqos:
-    """
-    Unit tests for pqos module
-    """
-
-    def test_pqos_init(self, mocker):
-        """
-        Test for initialisation of pqos module
-        """
-        mocker.patch(
-            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
-        )
-        pqos = Pqos()
-        assert isinstance(pqos, Pqos)
-        assert pqos.executable_path is not None
-
-    def test_pqos_init_error(self, mocker):
-        """
-        Test for initialisation of pqos module when pqos_wrapper CLI is not present
-        in the system.
-        """
-        mocker.patch("benchexec.pqos.find_executable2", return_value=None)
-        pqos = Pqos()
-        assert isinstance(pqos, Pqos)
-        assert pqos.executable_path is None
-
-    def test_pqos_execute_command(self, mocker):
-        """
-        Test for Pqos.execute_command function
-        """
-        mocker.patch(
-            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
-        )
-        mocker.patch("benchexec.pqos.check_output", side_effect=mock_check_output)
-        mocker.patch("benchexec.pqos.Popen", side_effect=mock_popen)
-        pqos = Pqos()
-        ret = pqos.execute_command("mon", "reset_monitoring", True, "-rm")
-        assert ret == True
-        ret = pqos.execute_command("l3ca", "check_capability", False, "-c", "l3ca")
-        assert ret == True
-        ret = pqos.execute_command(
-            "l3ca", "allocate_resource", False, "-a", "l3ca", "[[0,1],[2,3]]"
-        )
-        assert ret == True
-        ret = pqos.execute_command("l3ca", "reset_resources", True, "-r")
-        assert ret == True
-        ret = pqos.execute_command(
-            "mon", "monitor_events", False, "-m", "[[0,1],[2,3]]"
-        )
-        assert ret == True
-
-    def test_pqos_execute_command_cli_non_existent(self, mocker):
-        """
-        Test for Pqos.execute_command function when pqos_wrapper CLI is not present.
-        """
-        mocker.patch("benchexec.pqos.find_executable2", return_value=None)
-        pqos = Pqos()
-        ret = pqos.execute_command("mon", "reset_monitoring", True, "-rm")
-        assert ret == False
-        ret = pqos.execute_command("l3ca", "check_capability", False, "-c", "l3ca")
-        assert ret == False
-        ret = pqos.execute_command(
-            "l3ca", "allocate_resource", False, "-a", "l3ca", "[[0,1],[2,3]]"
-        )
-        assert ret == False
-        ret = pqos.execute_command("l3ca", "reset_resources", True, "-r")
-        assert ret == False
-        ret = pqos.execute_command(
-            "mon", "monitor_events", False, "-m", "[[0,1],[2,3]]"
-        )
-        assert ret == False
-
-    def test_pqos_execute_command_cli_error(self, mocker):
-        """
-        Test for Pqos.execute_command function when pqos_wrapper throws an error
-        """
-        mocker.patch(
-            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
-        )
-        mocker.patch("benchexec.pqos.check_output", side_effect=mock_check_output_error)
-        pqos = Pqos()
-        ret = pqos.execute_command("mon", "reset_monitoring", True, "-rm")
-        assert ret == False
-        ret = pqos.execute_command("l3ca", "check_capability", False, "-c", "l3ca")
-        assert ret == False
-        ret = pqos.execute_command(
-            "l3ca", "allocate_resource", False, "-a", "l3ca", "[[0,1],[2,3]]"
-        )
-        assert ret == False
-        ret = pqos.execute_command("l3ca", "reset_resources", True, "-r")
-        assert ret == False
-
-    def test_pqos_allocate_l3ca(self, mocker):
-        """
-        Test for pqos.allocate_l3ca
-        """
-
-        mocker.patch(
-            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
-        )
-        mocker.patch("benchexec.pqos.check_output", side_effect=mock_check_output)
-        pqos = Pqos()
-        pqos.allocate_l3ca([[0, 1], [2, 3]])
-        assert pqos.reset_required == True
-
-    def test_pqos_allocate_l3ca_error(self, mocker):
-        """
-        Test for pqos.allocate_l3ca when pqos_wrapper throws an error
-        """
-        mocker.patch(
-            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
-        )
-        mocker.patch(
-            "benchexec.pqos.check_output",
-            side_effect=mock_check_output_capability_error,
-        )
-        pqos = Pqos()
-        pqos.reset_resources = mocker.MagicMock(return_value=0)
-        pqos.allocate_l3ca([[0, 1], [2, 3]])
-        assert pqos.reset_required == False
-        pqos.reset_resources.assert_called_once_with()
-
-    def test_pqos_stop_monitoring(self, mocker):
-        """
-        Test for pqos.stop_monitoring
-        """
-        mocker.patch(
-            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
-        )
-        mocker.patch("benchexec.pqos.check_output", side_effect=mock_check_output)
-        mocker.patch("benchexec.pqos.Popen", side_effect=mock_popen)
-        flatten_mon_data = {
-            "ipc": 0.987,
-            "llc_misses": 10240,
-            "llc_avg": 25028,
-            "llc_max": 30000,
-            "mbm_local_avg": 25028,
-            "mbm_local_max": 30000,
-        }
-        pqos = Pqos()
-        pqos.start_monitoring([[0, 1, 2]])
-        ret = pqos.stop_monitoring()
-        assert ret == flatten_mon_data
-        assert pqos.mon_process == None
-
-    def test_pqos_stop_monitoring_not_started(self, mocker):
-        """
-        Test for pqos.stop_monitoring, when monitoring is not started before
-        """
-        mocker.patch(
-            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
-        )
-        mocker.patch("benchexec.pqos.Popen", side_effect=mock_popen)
-        pqos = Pqos()
-        ret = pqos.stop_monitoring()
-        assert ret == {}
-        assert pqos.mon_process == None
-
-    def test_pqos_stop_monitoring_error(self, mocker):
-        """
-        Test for pqos.stop_monitoring, when pqos_wrapper throws an error
-        """
-        mocker.patch(
-            "benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib"
-        )
-        mocker.patch("benchexec.pqos.check_output", side_effect=mock_check_output)
-        mocker.patch("benchexec.pqos.Popen", side_effect=mock_popen)
-        pqos = Pqos()
-        pqos.start_monitoring([[0, 1, 2]])
-        pqos.mon_process.returncode = 1
-        ret = pqos.stop_monitoring()
-        assert ret == {}
-        assert pqos.mon_process == None
-
-    def test_pqos_flatten_mon_data(self):
-        """
-        Test for Pqos.flatten_mon_data when single monitoring data is received
-        """
-        flatten_mon_data = {
-            "ipc": 0.987,
-            "llc_misses": 10240,
-            "llc_avg": 25028,
-            "llc_max": 30000,
-            "mbm_local_avg": 25028,
-            "mbm_local_max": 30000,
-        }
-        mon_data = copy.deepcopy(
-            mock_pqos_wrapper_output["monitor_events"]["function_output"][
-                "monitoring_data"
-            ]
-        )
-        ret = Pqos.flatten_mon_data(mon_data)
-        assert ret == flatten_mon_data
-
-    def test_pqos_flatten_mon_data_multiple(self):
-        """
-        Test for Pqos.flatten_mon_data when multiple monitoring data are received
-        """
-        flatten_mon_data_multiple = {
-            "ipc_cpus0,1,2": 0.987,
-            "llc_misses_cpus0,1,2": 10240,
-            "llc_avg_cpus0,1,2": 25028,
-            "llc_max_cpus0,1,2": 30000,
-            "mbm_local_avg_cpus0,1,2": 25028,
-            "mbm_local_max_cpus0,1,2": 30000,
-            "ipc_cpus3,4,5": 0.987,
-            "llc_misses_cpus3,4,5": 10240,
-            "llc_avg_cpus3,4,5": 25028,
-            "llc_max_cpus3,4,5": 30000,
-            "mbm_local_avg_cpus3,4,5": 25028,
-            "mbm_local_max_cpus3,4,5": 30000,
-        }
-        mon_data = copy.deepcopy(
-            mock_pqos_wrapper_output["monitor_events"]["function_output"][
-                "monitoring_data"
-            ]
-        )
-        first_core_set = copy.deepcopy(mon_data[0])
-        second_core_set = copy.deepcopy(mon_data[0])
-        second_core_set["cores"] = [3, 4, 5]
-        mon_data_multiple = [first_core_set, second_core_set]
-        ret = Pqos.flatten_mon_data(mon_data_multiple)
-        assert ret == flatten_mon_data_multiple
-
-    def test_pqos_convert_core_list(self):
-        """
-        Test for pqos.convert_core_list function
-        """
-        ret = Pqos.convert_core_list([[0, 1], [2, 3]])
-        assert ret == "[[0,1],[2,3]]"
diff --git a/benchexec/pytest_result.py b/benchexec/pytest_result.py
deleted file mode 100644
index cc55d5909..000000000
--- a/benchexec/pytest_result.py
+++ /dev/null
@@ -1,613 +0,0 @@
-# This file is part of BenchExec, a framework for reliable benchmarking:
-# https://github.com/sosy-lab/benchexec
-#
-# SPDX-FileCopyrightText: 2007-2024 Dirk Beyer <https://www.sosy-lab.org>
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import logging
-import sys
-import tempfile
-import pytest
-
-from benchexec.result import *  # noqa: F403 @UnusedWildImport everything is tested
-from benchexec.result import (
-    _SCORE_CORRECT_FALSE,
-    _SCORE_CORRECT_TRUE,
-    _SCORE_WRONG_TRUE,
-    _SCORE_WRONG_FALSE,
-)
-
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
-
-class TestExpectedResult:
-    def test_via_string(self):
-        def test(result, subproperty):
-            expected_result = ExpectedResult(result, subproperty)
-            assert ExpectedResult.from_str(str(expected_result)) == expected_result
-
-        test(None, None)
-        test(True, None)
-        test(False, None)
-        test(True, "foo")
-        test(False, "foo")
-
-    def test_via_instance(self):
-        def test(s):
-            assert str(ExpectedResult.from_str(s)) == s
-
-        test("")
-        test("true")
-        test("false")
-        test("true(foo)")
-        test("false(foo)")
-
-    def test_invalid_string(self):
-        def test(s):
-            with pytest.raises(ValueError) as exc_info:
-                ExpectedResult.from_str(s)
-            assert str(exc_info.value) == f"Not a valid expected verdict: {s}"
-
-        test("foo")
-        test("unknown")
-        test("true()")
-
-
-@pytest.fixture(scope="class")
-def disable_non_critical_logging():
-    logging.disable(logging.CRITICAL)
-
-
-@pytest.mark.usefixtures("disable_non_critical_logging")
-class TestResult:
-    def expected_result(self, result, subcategory=None):
-        return {"dummy.prp": ExpectedResult(result, subcategory)}
-
-    prop_call = Property("dummy.prp", True, "unreach-call")
-    prop_deadlock = Property("dummy.prp", True, "no-deadlock")
-    prop_memcleanup = Property("dummy.prp", True, "valid-memcleanup")
-    prop_memsafety = Property("dummy.prp", True, "valid-memsafety")
-    prop_overflow = Property("dummy.prp", True, "no-overflow")
-    prop_termination = Property("dummy.prp", True, "termination")
-    prop_sat = Property("dummy.prp", False, "satisfiable")
-
-    def _test_Property_from_file(self, content, is_svcomp):
-        with tempfile.NamedTemporaryFile(
-            mode="wt", prefix="BenchExec_test_result", suffix=".prp"
-        ) as temp_file:
-            temp_file.write(content)
-            temp_file.flush()
-            filename = temp_file.name
-
-            assert Property(
-                filename=filename,
-                is_svcomp=is_svcomp,
-                name=os.path.splitext(os.path.basename(filename))[0],
-            ) == Property.create(
-                filename
-            ), f"different result for property file with content\n{ content }"
-
-    def test_Property_from_non_standard_file(self):
-        self._test_Property_from_file("", False)
-        self._test_Property_from_file("  ", False)
-        self._test_Property_from_file("  CHECK( init(main()), LTL(G p) )", False)
-        self._test_Property_from_file("test property", False)
-        self._test_Property_from_file("CHECK( init(main()), LTL(G p) )\ntest", False)
-
-    def test_Property_from_sv_comp_file(self):
-        self._test_Property_from_file("CHECK( init(main()), LTL(G p) )", True)
-        self._test_Property_from_file(
-            "CHECK( init(main()), LTL(G p) )\n\nCHECK( init(main()), LTL(F end) )", True
-        )
-        self._test_Property_from_file(
-            "CHECK( init(main()), LTL(G valid-free) )\nCHECK( init(main()), LTL(G valid-deref) )",
-            True,
-        )
-        self._test_Property_from_file(
-            "CHECK( init(main()), LTL(G valid-free) and LTL(G valid-deref) )", True
-        )
-
-    def test_Property_max_score_not_available(self):
-        assert 0 == self.prop_call.max_score(ExpectedResult(None, None))
-        assert None is self.prop_call.max_score(None)
-
-    def test_Property_max_score_smt(self):
-        assert None is self.prop_sat.max_score(ExpectedResult(True, None))
-        assert None is self.prop_sat.max_score(ExpectedResult(False, None))
-
-    def test_Property_max_score_svcomp(self):
-        assert _SCORE_CORRECT_TRUE == self.prop_call.max_score(
-            ExpectedResult(True, None)
-        )
-        assert _SCORE_CORRECT_FALSE == self.prop_call.max_score(
-            ExpectedResult(False, None)
-        )
-
-        assert _SCORE_CORRECT_TRUE == self.prop_memsafety.max_score(
-            ExpectedResult(True, None)
-        )
-        assert _SCORE_CORRECT_FALSE == self.prop_memsafety.max_score(
-            ExpectedResult(False, None)
-        )
-        assert _SCORE_CORRECT_FALSE == self.prop_memsafety.max_score(
-            ExpectedResult(False, "valid-free")
-        )
-
-    def test_Property_compute_score_not_available(self):
-        assert 0 == self.prop_call.compute_score(CATEGORY_MISSING, RESULT_TRUE_PROP)
-        assert 0 == self.prop_call.compute_score(CATEGORY_ERROR, RESULT_TRUE_PROP)
-        assert 0 == self.prop_call.compute_score(CATEGORY_UNKNOWN, RESULT_TRUE_PROP)
-
-    def test_Property_compute_score_smt(self):
-        assert None is self.prop_sat.compute_score(CATEGORY_CORRECT, RESULT_TRUE_PROP)
-        assert None is self.prop_sat.compute_score(CATEGORY_WRONG, RESULT_TRUE_PROP)
-
-    def test_Property_compute_score_svcomp(self):
-        assert _SCORE_CORRECT_TRUE == self.prop_call.compute_score(
-            CATEGORY_CORRECT, RESULT_TRUE_PROP
-        )
-        assert _SCORE_CORRECT_FALSE == self.prop_call.compute_score(
-            CATEGORY_CORRECT, RESULT_FALSE_REACH
-        )
-        assert _SCORE_CORRECT_TRUE == self.prop_memsafety.compute_score(
-            CATEGORY_CORRECT, RESULT_TRUE_PROP
-        )
-        assert _SCORE_CORRECT_FALSE == self.prop_memsafety.compute_score(
-            CATEGORY_CORRECT, RESULT_FALSE_MEMTRACK
-        )
-        assert _SCORE_CORRECT_TRUE == self.prop_termination.compute_score(
-            CATEGORY_CORRECT, RESULT_TRUE_PROP
-        )
-        assert _SCORE_CORRECT_FALSE == self.prop_termination.compute_score(
-            CATEGORY_CORRECT, RESULT_FALSE_TERMINATION
-        )
-        assert _SCORE_CORRECT_TRUE == self.prop_overflow.compute_score(
-            CATEGORY_CORRECT, RESULT_TRUE_PROP
-        )
-        assert _SCORE_CORRECT_FALSE == self.prop_overflow.compute_score(
-            CATEGORY_CORRECT, RESULT_FALSE_OVERFLOW
-        )
-        assert _SCORE_CORRECT_TRUE == self.prop_deadlock.compute_score(
-            CATEGORY_CORRECT, RESULT_TRUE_PROP
-        )
-        assert _SCORE_CORRECT_FALSE == self.prop_deadlock.compute_score(
-            CATEGORY_CORRECT, RESULT_FALSE_DEADLOCK
-        )
-
-        assert _SCORE_WRONG_FALSE == self.prop_call.compute_score(
-            CATEGORY_WRONG, RESULT_FALSE_REACH
-        )
-        assert _SCORE_WRONG_TRUE == self.prop_call.compute_score(
-            CATEGORY_WRONG, RESULT_TRUE_PROP
-        )
-        assert _SCORE_WRONG_FALSE == self.prop_memsafety.compute_score(
-            CATEGORY_WRONG, RESULT_FALSE_MEMTRACK
-        )
-        assert _SCORE_WRONG_TRUE == self.prop_memsafety.compute_score(
-            CATEGORY_WRONG, RESULT_TRUE_PROP
-        )
-        assert _SCORE_WRONG_FALSE == self.prop_memsafety.compute_score(
-            CATEGORY_WRONG, RESULT_FALSE_DEREF
-        )
-        assert _SCORE_WRONG_FALSE == self.prop_termination.compute_score(
-            CATEGORY_WRONG, RESULT_FALSE_TERMINATION
-        )
-        assert _SCORE_WRONG_TRUE == self.prop_termination.compute_score(
-            CATEGORY_WRONG, RESULT_TRUE_PROP
-        )
-        assert _SCORE_WRONG_FALSE == self.prop_overflow.compute_score(
-            CATEGORY_WRONG, RESULT_FALSE_OVERFLOW
-        )
-        assert _SCORE_WRONG_TRUE == self.prop_overflow.compute_score(
-            CATEGORY_WRONG, RESULT_TRUE_PROP
-        )
-        assert _SCORE_WRONG_FALSE == self.prop_deadlock.compute_score(
-            CATEGORY_WRONG, RESULT_FALSE_OVERFLOW
-        )
-        assert _SCORE_WRONG_TRUE == self.prop_deadlock.compute_score(
-            CATEGORY_WRONG, RESULT_TRUE_PROP
-        )
-
-    def test_result_classification(self):
-        assert RESULT_CLASS_TRUE == get_result_classification(RESULT_TRUE_PROP)
-
-        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_REACH)
-        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_DEREF)
-        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_FREE)
-        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_MEMTRACK)
-        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_TERMINATION)
-        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_OVERFLOW)
-        assert RESULT_CLASS_FALSE == get_result_classification(RESULT_FALSE_PROP)
-        assert RESULT_CLASS_FALSE == get_result_classification(
-            RESULT_FALSE_PROP + "(test)"
-        )
-
-        assert RESULT_CLASS_OTHER == get_result_classification(RESULT_DONE)
-        assert RESULT_CLASS_OTHER == get_result_classification(RESULT_UNKNOWN)
-        assert RESULT_CLASS_OTHER == get_result_classification("KILLED")
-        assert RESULT_CLASS_OTHER == get_result_classification("TIMEOUT")
-        assert RESULT_CLASS_OTHER == get_result_classification("")
-
-    def test_result_category_true(self):
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(False), RESULT_TRUE_PROP, [self.prop_call]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_memsafety]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(False, "valid-memtrack"),
-            RESULT_TRUE_PROP,
-            [self.prop_memsafety],
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_memcleanup]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(False), RESULT_TRUE_PROP, [self.prop_memcleanup]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_termination]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(False), RESULT_TRUE_PROP, [self.prop_termination]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_overflow]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(False), RESULT_TRUE_PROP, [self.prop_overflow]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, [self.prop_deadlock]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(False), RESULT_TRUE_PROP, [self.prop_deadlock]
-        )
-
-        test_prop = Property("dummy.prp", True, "test prop")
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, [test_prop]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(False), RESULT_TRUE_PROP, [test_prop]
-        )
-
-        test_prop = Property("dummy.prp", True, "test prop")
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, [test_prop]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(False, "a"), RESULT_TRUE_PROP, [test_prop]
-        )
-
-    def test_result_category_false(self):
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_REACH, [self.prop_call]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False), RESULT_FALSE_REACH, [self.prop_call]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_DEREF, [self.prop_memsafety]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_FREE, [self.prop_memsafety]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_MEMTRACK, [self.prop_memsafety]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False, "valid-deref"),
-            RESULT_FALSE_DEREF,
-            [self.prop_memsafety],
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False, "valid-free"),
-            RESULT_FALSE_FREE,
-            [self.prop_memsafety],
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False, "valid-memtrack"),
-            RESULT_FALSE_MEMTRACK,
-            [self.prop_memsafety],
-        )
-        assert CATEGORY_UNKNOWN == get_result_category(
-            self.expected_result(False, "valid-deref"),
-            RESULT_FALSE_FREE,
-            [self.prop_memsafety],
-        )
-        assert CATEGORY_UNKNOWN == get_result_category(
-            self.expected_result(False, "valid-free"),
-            RESULT_FALSE_MEMTRACK,
-            [self.prop_memsafety],
-        )
-        assert CATEGORY_UNKNOWN == get_result_category(
-            self.expected_result(False, "valid-memtrack"),
-            RESULT_FALSE_DEREF,
-            [self.prop_memsafety],
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True),
-            RESULT_FALSE_TERMINATION,
-            [self.prop_termination],
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False),
-            RESULT_FALSE_TERMINATION,
-            [self.prop_termination],
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_OVERFLOW, [self.prop_overflow]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False), RESULT_FALSE_OVERFLOW, [self.prop_overflow]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_DEADLOCK, [self.prop_deadlock]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False), RESULT_FALSE_DEADLOCK, [self.prop_deadlock]
-        )
-
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_call]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False), RESULT_FALSE_PROP, [self.prop_call]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_termination]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False), RESULT_FALSE_PROP, [self.prop_termination]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_overflow]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False), RESULT_FALSE_PROP, [self.prop_overflow]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_deadlock]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False), RESULT_FALSE_PROP, [self.prop_deadlock]
-        )
-
-        test_prop = Property("dummy.prp", True, "test prop")
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_PROP, [test_prop]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False), RESULT_FALSE_PROP, [test_prop]
-        )
-        # arbitrary subproperties allowed if property does not specify one
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False), RESULT_FALSE_PROP + "(a)", [test_prop]
-        )
-
-        test_prop = Property("dummy.prp", True, "test prop")
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_PROP, [test_prop]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_PROP + "(a)", [test_prop]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            self.expected_result(False, "a"), RESULT_FALSE_PROP + "(a)", [test_prop]
-        )
-
-    def test_result_category_different_false_result(self):
-        expected_result_false = self.expected_result(False)
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_DEREF, [self.prop_call]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_call]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_call]
-        )
-
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_REACH, [self.prop_termination]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_DEREF, [self.prop_termination]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_termination]
-        )
-
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_REACH, [self.prop_sat]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_DEREF, [self.prop_sat]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_sat]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_OVERFLOW, [self.prop_sat]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_sat]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_PROP, [self.prop_sat]
-        )
-
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_REACH, [self.prop_overflow]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_DEREF, [self.prop_overflow]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_overflow]
-        )
-
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_REACH, [self.prop_deadlock]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_DEREF, [self.prop_deadlock]
-        )
-        assert CATEGORY_CORRECT == get_result_category(
-            expected_result_false, RESULT_FALSE_TERMINATION, [self.prop_deadlock]
-        )
-
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_OVERFLOW, [self.prop_call]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_REACH, [self.prop_termination]
-        )
-        assert CATEGORY_WRONG == get_result_category(
-            self.expected_result(True), RESULT_FALSE_PROP, [self.prop_memsafety]
-        )
-        assert CATEGORY_UNKNOWN == get_result_category(
-            self.expected_result(False, "valid-deref"),
-            RESULT_FALSE_PROP,
-            [self.prop_memsafety],
-        )
-        assert CATEGORY_UNKNOWN == get_result_category(
-            self.expected_result(False, "valid-free"),
-            RESULT_FALSE_PROP,
-            [self.prop_memsafety],
-        )
-        assert CATEGORY_UNKNOWN == get_result_category(
-            self.expected_result(False, "valid-memtrack"),
-            RESULT_FALSE_PROP,
-            [self.prop_memsafety],
-        )
-
-        test_prop = Property("dummy.prp", True, "test prop")
-        assert CATEGORY_UNKNOWN == get_result_category(
-            self.expected_result(False, "a"), RESULT_FALSE_PROP, [test_prop]
-        )
-
-    def test_result_category_no_property(self):
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, []
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(False), RESULT_TRUE_PROP, []
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, []
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(False, "valid-memtrack.c"), RESULT_TRUE_PROP, []
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, []
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(False), RESULT_TRUE_PROP, []
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(True), RESULT_TRUE_PROP, []
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(False), RESULT_TRUE_PROP, []
-        )
-
-    def test_result_category_no_expected_result(self):
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(None), RESULT_TRUE_PROP, [self.prop_call]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(None), RESULT_FALSE_PROP, [self.prop_call]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(None), RESULT_TRUE_PROP, [self.prop_memsafety]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(None), RESULT_FALSE_FREE, [self.prop_memsafety]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(None), RESULT_TRUE_PROP, [self.prop_termination]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(None), RESULT_FALSE_PROP, [self.prop_termination]
-        )
-
-        assert CATEGORY_MISSING == get_result_category(
-            {}, RESULT_TRUE_PROP, [self.prop_call]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            {}, RESULT_FALSE_PROP, [self.prop_call]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            {}, RESULT_TRUE_PROP, [self.prop_memsafety]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            {}, RESULT_FALSE_FREE, [self.prop_memsafety]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            {}, RESULT_TRUE_PROP, [self.prop_termination]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            {}, RESULT_FALSE_PROP, [self.prop_termination]
-        )
-
-    def test_result_category_different_property(self):
-        def other_expected_result(result, subcategory=None):
-            return {"different-file.prp": ExpectedResult(result, subcategory)}
-
-        assert CATEGORY_MISSING == get_result_category(
-            other_expected_result(True), RESULT_TRUE_PROP, [self.prop_termination]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            other_expected_result(False), RESULT_TRUE_PROP, [self.prop_termination]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            other_expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            other_expected_result(False, "valid-memtrack"),
-            RESULT_TRUE_PROP,
-            [self.prop_call],
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            other_expected_result(True), RESULT_TRUE_PROP, [self.prop_call]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            other_expected_result(False), RESULT_TRUE_PROP, [self.prop_call]
-        )
-
-    def test_result_category_other(self):
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(True), RESULT_DONE, [self.prop_call]
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(True), RESULT_DONE, []
-        )
-        assert CATEGORY_MISSING == get_result_category(
-            self.expected_result(None), RESULT_DONE, [self.prop_call]
-        )
-        assert CATEGORY_UNKNOWN == get_result_category(
-            self.expected_result(True), RESULT_UNKNOWN, [self.prop_call]
-        )
-        assert CATEGORY_UNKNOWN == get_result_category(
-            self.expected_result(True), RESULT_UNKNOWN, []
-        )
-        assert CATEGORY_UNKNOWN == get_result_category(
-            self.expected_result(None), RESULT_UNKNOWN, [self.prop_call]
-        )
-        assert CATEGORY_ERROR == get_result_category(
-            self.expected_result(True), "KILLED", [self.prop_call]
-        )
-        assert CATEGORY_ERROR == get_result_category(
-            self.expected_result(True), "TIMEOUT", [self.prop_call]
-        )
-        assert CATEGORY_ERROR == get_result_category(
-            self.expected_result(True), "", [self.prop_call]
-        )
diff --git a/benchexec/pytest_runexecutor.py b/benchexec/pytest_runexecutor.py
deleted file mode 100644
index 340164887..000000000
--- a/benchexec/pytest_runexecutor.py
+++ /dev/null
@@ -1,1223 +0,0 @@
-# This file is part of BenchExec, a framework for reliable benchmarking:
-# https://github.com/sosy-lab/benchexec
-#
-# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer <https://www.sosy-lab.org>
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import contextlib
-import logging
-import os
-import re
-import subprocess
-import sys
-import tempfile
-import threading
-import time
-import unittest
-import shutil
-
-from benchexec import container
-from benchexec import containerexecutor
-from benchexec import filehierarchylimit
-from benchexec.runexecutor import RunExecutor
-from benchexec.cgroups import Cgroups
-from benchexec import runexecutor
-from benchexec import util
-
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
-here = os.path.dirname(__file__)
-base_dir = os.path.join(here, "..")
-bin_dir = os.path.join(base_dir, "bin")
-runexec = os.path.join(bin_dir, "runexec")
-
-trivial_run_grace_time = 0.2
-
-
-class TestRunExecutor(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-        logging.disable(logging.NOTSET)  # need to make sure to get all messages
-        if not hasattr(cls, "assertRegex"):
-            cls.assertRegex = cls.assertRegexpMatches
-
-        cls.cgroups = Cgroups.initialize()
-
-        cls.echo = shutil.which("echo") or "/bin/echo"
-        cls.sleep = shutil.which("sleep") or "/bin/sleep"
-        cls.cat = shutil.which("cat") or "/bin/cat"
-
-    def setUp(self, *args, **kwargs):
-        with self.skip_if_logs(
-            "Cannot reliably kill sub-processes without freezer cgroup"
-        ):
-            self.runexecutor = RunExecutor(*args, use_namespaces=False, **kwargs)
-
-    @contextlib.contextmanager
-    def skip_if_logs(self, error_msg):
-        """A context manager that automatically marks the test as skipped if SystemExit
-        is thrown and the given error message had been logged with level ERROR."""
-        # Note: assertLogs checks that there is at least one log message of given level.
-        # This is not what we want, so we just rely on one debug message being present.
-        try:
-            with self.assertLogs(level=logging.DEBUG) as log:
-                yield
-        except SystemExit as e:
-            if any(
-                record.levelno == logging.ERROR and record.msg.startswith(error_msg)
-                for record in log.records
-            ):
-                self.skipTest(e)
-            raise e
-
-    def execute_run(self, *args, expect_terminationreason=None, **kwargs):
-        (output_fd, output_filename) = tempfile.mkstemp(".log", "output_", text=True)
-        try:
-            result = self.runexecutor.execute_run(list(args), output_filename, **kwargs)
-            output = os.read(output_fd, 4096).decode()
-        finally:
-            os.close(output_fd)
-            os.remove(output_filename)
-
-        self.check_result_keys(result, "terminationreason")
-        if isinstance(expect_terminationreason, list):
-            self.assertIn(
-                result.get("terminationreason"),
-                expect_terminationreason,
-                "Unexpected terminationreason, output is \n" + output,
-            )
-        else:
-            self.assertEqual(
-                result.get("terminationreason"),
-                expect_terminationreason,
-                "Unexpected terminationreason, output is \n" + output,
-            )
-        return (result, output.splitlines())
-
-    def get_runexec_cmdline(self, *args, **kwargs):
-        return [
-            "python3",
-            runexec,
-            "--no-container",
-            "--output",
-            kwargs["output_filename"],
-        ] + list(args)
-
-    def execute_run_extern(self, *args, expect_terminationreason=None, **kwargs):
-        (output_fd, output_filename) = tempfile.mkstemp(".log", "output_", text=True)
-        try:
-            runexec_output = subprocess.check_output(
-                args=self.get_runexec_cmdline(*args, output_filename=output_filename),
-                stderr=subprocess.DEVNULL,
-                universal_newlines=True,
-                **kwargs,
-            )
-            output = os.read(output_fd, 4096).decode()
-        except subprocess.CalledProcessError as e:
-            print(e.output)
-            raise e
-        finally:
-            os.close(output_fd)
-            os.remove(output_filename)
-
-        result = {
-            key.strip(): value.strip()
-            for (key, _, value) in (
-                line.partition("=") for line in runexec_output.splitlines()
-            )
-        }
-        self.check_result_keys(result, "terminationreason", "returnvalue")
-        if isinstance(expect_terminationreason, list):
-            self.assertIn(
-                result.get("terminationreason"),
-                expect_terminationreason,
-                "Unexpected terminationreason, output is \n" + output,
-            )
-        else:
-            self.assertEqual(
-                result.get("terminationreason"),
-                expect_terminationreason,
-                "Unexpected terminationreason, output is \n" + output,
-            )
-        return (result, output.splitlines())
-
-    def check_command_in_output(self, output, cmd):
-        self.assertEqual(output[0], cmd, "run output misses executed command")
-
-    def check_result_keys(self, result, *additional_keys):
-        expected_keys = {
-            "cputime",
-            "walltime",
-            "memory",
-            "exitcode",
-            "cpuenergy",
-            "blkio-read",
-            "blkio-write",
-            "starttime",
-            "pressure-cpu-some",
-            "pressure-io-some",
-            "pressure-memory-some",
-        }
-        expected_keys.update(additional_keys)
-        for key in result.keys():
-            if key.startswith("cputime-cpu"):
-                self.assertRegex(
-                    key,
-                    "^cputime-cpu[0-9]+$",
-                    f"unexpected result entry '{key}={result[key]}'",
-                )
-            elif key.startswith("cpuenergy-"):
-                self.assertRegex(
-                    key,
-                    "^cpuenergy-pkg[0-9]+-(package|core|uncore|dram|psys)$",
-                    f"unexpected result entry '{key}={result[key]}'",
-                )
-            else:
-                self.assertIn(
-                    key,
-                    expected_keys,
-                    f"unexpected result entry '{key}={result[key]}'",
-                )
-
-    def check_exitcode(self, result, exitcode, msg=None):
-        self.assertEqual(result["exitcode"].raw, exitcode, msg)
-
-    def check_exitcode_extern(self, result, exitcode, msg=None):
-        exitcode = util.ProcessExitCode.from_raw(exitcode)
-        if exitcode.value is not None:
-            self.assertEqual(int(result["returnvalue"]), exitcode.value, msg)
-        else:
-            self.assertEqual(int(result["exitsignal"]), exitcode.signal, msg)
-
-    def test_command_output(self):
-        if not os.path.exists(self.echo):
-            self.skipTest("missing echo")
-        (_, output) = self.execute_run(self.echo, "TEST_TOKEN")
-        self.check_command_in_output(output, f"{self.echo} TEST_TOKEN")
-        self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output")
-        for line in output[1:-1]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_command_error_output(self):
-        if not os.path.exists(self.echo):
-            self.skipTest("missing echo")
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-
-        def execute_Run_intern(*args, **kwargs):
-            (error_fd, error_filename) = tempfile.mkstemp(".log", "error_", text=True)
-            try:
-                (_, output_lines) = self.execute_run(
-                    *args, error_filename=error_filename, **kwargs
-                )
-                error_lines = os.read(error_fd, 4096).decode().splitlines()
-                return (output_lines, error_lines)
-            finally:
-                os.close(error_fd)
-                os.remove(error_filename)
-
-        (output_lines, error_lines) = execute_Run_intern(
-            "/bin/sh", "-c", f"{self.echo} ERROR_TOKEN >&2"
-        )
-        self.assertEqual(
-            error_lines[-1], "ERROR_TOKEN", "run error output misses command output"
-        )
-        for line in output_lines[1:]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-        for line in error_lines[1:-1]:
-            self.assertRegex(line, "^-*$", "unexpected text in run error output")
-
-        (output_lines, error_lines) = execute_Run_intern(self.echo, "OUT_TOKEN")
-        self.check_command_in_output(output_lines, f"{self.echo} OUT_TOKEN")
-        self.check_command_in_output(error_lines, f"{self.echo} OUT_TOKEN")
-        self.assertEqual(
-            output_lines[-1], "OUT_TOKEN", "run output misses command output"
-        )
-        for line in output_lines[1:-1]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-        for line in error_lines[1:]:
-            self.assertRegex(line, "^-*$", "unexpected text in run error output")
-
-    def test_command_result(self):
-        if not os.path.exists(self.echo):
-            self.skipTest("missing echo")
-        (result, _) = self.execute_run(self.echo, "TEST_TOKEN")
-        self.check_exitcode(result, 0, "exit code of echo is not zero")
-        self.assertAlmostEqual(
-            result["walltime"],
-            trivial_run_grace_time,
-            delta=trivial_run_grace_time,
-            msg="walltime of echo not as expected",
-        )
-        if "cputime" in result:  # not present without cpuacct cgroup
-            self.assertAlmostEqual(
-                result["cputime"],
-                trivial_run_grace_time,
-                delta=trivial_run_grace_time,
-                msg="cputime of echo not as expected",
-            )
-        self.check_result_keys(result)
-
-    def test_wrong_command(self):
-        (result, _) = self.execute_run(
-            "/does/not/exist", expect_terminationreason="failed"
-        )
-
-    def test_wrong_command_extern(self):
-        (result, _) = self.execute_run(
-            "/does/not/exist", expect_terminationreason="failed"
-        )
-
-    def test_cputime_hardlimit(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        with self.skip_if_logs("Time limit cannot be specified without cpuacct cgroup"):
-            (result, output) = self.execute_run(
-                "/bin/sh",
-                "-c",
-                "i=0; while [ $i -lt 10000000 ]; do i=$(($i+1)); done; echo $i",
-                hardtimelimit=1,
-                expect_terminationreason="cputime",
-            )
-        self.check_exitcode(result, 9, "exit code of killed process is not 9")
-        self.assertAlmostEqual(
-            result["walltime"],
-            1.4,
-            delta=0.5,
-            msg="walltime is not approximately the time after which the process should have been killed",
-        )
-        self.assertAlmostEqual(
-            result["cputime"],
-            1.4,
-            delta=0.5,
-            msg="cputime is not approximately the time after which the process should have been killed",
-        )
-
-        for line in output[1:]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_cputime_softlimit(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        with self.skip_if_logs(
-            "Soft time limit cannot be specified without cpuacct cgroup"
-        ):
-            (result, output) = self.execute_run(
-                "/bin/sh",
-                "-c",
-                "i=0; while [ $i -lt 10000000 ]; do i=$(($i+1)); done; echo $i",
-                softtimelimit=1,
-                expect_terminationreason="cputime-soft",
-            )
-        self.check_exitcode(result, 15, "exit code of killed process is not 15")
-        self.assertAlmostEqual(
-            result["walltime"],
-            4,
-            delta=3,
-            msg="walltime is not approximately the time after which the process should have been killed",
-        )
-        self.assertAlmostEqual(
-            result["cputime"],
-            4,
-            delta=3,
-            msg="cputime is not approximately the time after which the process should have been killed",
-        )
-
-        for line in output[1:]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_walltime_limit(self):
-        if not os.path.exists(self.sleep):
-            self.skipTest("missing sleep")
-        (result, output) = self.execute_run(
-            self.sleep, "10", walltimelimit=1, expect_terminationreason="walltime"
-        )
-
-        self.check_exitcode(result, 9, "exit code of killed process is not 9")
-        self.assertAlmostEqual(
-            result["walltime"],
-            4,
-            delta=3,
-            msg="walltime is not approximately the time after which the process should have been killed",
-        )
-        if "cputime" in result:  # not present without cpuacct cgroup
-            self.assertAlmostEqual(
-                result["cputime"],
-                trivial_run_grace_time,
-                delta=trivial_run_grace_time,
-                msg="cputime of sleep is not approximately zero",
-            )
-
-        self.check_command_in_output(output, f"{self.sleep} 10")
-        for line in output[1:]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_cputime_walltime_limit(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        with self.skip_if_logs("Time limit cannot be specified without cpuacct cgroup"):
-            (result, output) = self.execute_run(
-                "/bin/sh",
-                "-c",
-                "i=0; while [ $i -lt 10000000 ]; do i=$(($i+1)); done; echo $i",
-                hardtimelimit=1,
-                walltimelimit=5,
-                expect_terminationreason="cputime",
-            )
-
-        self.check_exitcode(result, 9, "exit code of killed process is not 9")
-        self.assertAlmostEqual(
-            result["walltime"],
-            1.4,
-            delta=0.5,
-            msg="walltime is not approximately the time after which the process should have been killed",
-        )
-        self.assertAlmostEqual(
-            result["cputime"],
-            1.4,
-            delta=0.5,
-            msg="cputime is not approximately the time after which the process should have been killed",
-        )
-
-        for line in output[1:]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_all_timelimits(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        with self.skip_if_logs("Time limit cannot be specified without cpuacct cgroup"):
-            (result, output) = self.execute_run(
-                "/bin/sh",
-                "-c",
-                "i=0; while [ $i -lt 10000000 ]; do i=$(($i+1)); done; echo $i",
-                softtimelimit=1,
-                hardtimelimit=2,
-                walltimelimit=5,
-                expect_terminationreason="cputime-soft",
-            )
-
-        self.check_exitcode(result, 15, "exit code of killed process is not 15")
-        self.assertAlmostEqual(
-            result["walltime"],
-            1.4,
-            delta=0.5,
-            msg="walltime is not approximately the time after which the process should have been killed",
-        )
-        self.assertAlmostEqual(
-            result["cputime"],
-            1.4,
-            delta=0.5,
-            msg="cputime is not approximately the time after which the process should have been killed",
-        )
-
-        for line in output[1:]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_input_is_redirected_from_devnull(self):
-        if not os.path.exists(self.cat):
-            self.skipTest("missing cat")
-        (result, output) = self.execute_run(self.cat, walltimelimit=1)
-
-        self.check_exitcode(result, 0, "exit code of process is not 0")
-        self.assertAlmostEqual(
-            result["walltime"],
-            trivial_run_grace_time,
-            delta=trivial_run_grace_time,
-            msg='walltime of "cat < /dev/null" is not approximately zero',
-        )
-        if "cputime" in result:  # not present without cpuacct cgroup
-            self.assertAlmostEqual(
-                result["cputime"],
-                trivial_run_grace_time,
-                delta=trivial_run_grace_time,
-                msg='cputime of "cat < /dev/null" is not approximately zero',
-            )
-        self.check_result_keys(result)
-
-        self.check_command_in_output(output, self.cat)
-        for line in output[1:]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_input_is_redirected_from_file(self):
-        if not os.path.exists(self.cat):
-            self.skipTest("missing cat")
-        with tempfile.TemporaryFile() as tmp:
-            tmp.write(b"TEST_TOKEN")
-            tmp.flush()
-            tmp.seek(0)
-            (result, output) = self.execute_run(self.cat, stdin=tmp, walltimelimit=1)
-
-        self.check_exitcode(result, 0, "exit code of process is not 0")
-        self.assertAlmostEqual(
-            result["walltime"],
-            trivial_run_grace_time,
-            delta=trivial_run_grace_time,
-            msg='walltime of "cat < /dev/null" is not approximately zero',
-        )
-        if "cputime" in result:  # not present without cpuacct cgroup
-            self.assertAlmostEqual(
-                result["cputime"],
-                trivial_run_grace_time,
-                delta=trivial_run_grace_time,
-                msg='cputime of "cat < /dev/null" is not approximately zero',
-            )
-        self.check_result_keys(result)
-
-        self.check_command_in_output(output, self.cat)
-        self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output")
-        for line in output[1:-1]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_input_is_redirected_from_stdin(self):
-        if not os.path.exists(self.cat):
-            self.skipTest("missing cat")
-
-        (output_fd, output_filename) = tempfile.mkstemp(".log", "output_", text=True)
-        cmd = self.get_runexec_cmdline(
-            "--input",
-            "-",
-            "--walltime",
-            "1",
-            self.cat,
-            output_filename=output_filename,
-        )
-        try:
-            process = subprocess.Popen(
-                args=cmd,
-                stdin=subprocess.PIPE,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.DEVNULL,
-                universal_newlines=True,
-            )
-            try:
-                runexec_output, unused_err = process.communicate("TEST_TOKEN")
-            except BaseException:
-                # catch everything, we re-raise
-                process.kill()
-                process.wait()
-                raise
-            retcode = process.poll()
-            if retcode:
-                print(runexec_output)
-                raise subprocess.CalledProcessError(retcode, cmd, output=runexec_output)
-
-            output = os.read(output_fd, 4096).decode().splitlines()
-        finally:
-            os.close(output_fd)
-            os.remove(output_filename)
-
-        result = {
-            key.strip(): value.strip()
-            for (key, _, value) in (
-                line.partition("=") for line in runexec_output.splitlines()
-            )
-        }
-        self.check_exitcode_extern(result, 0, "exit code of process is not 0")
-        self.assertAlmostEqual(
-            float(result["walltime"].rstrip("s")),
-            trivial_run_grace_time,
-            delta=trivial_run_grace_time,
-            msg='walltime of "cat < /dev/null" is not approximately zero',
-        )
-        if "cputime" in result:  # not present without cpuacct cgroup
-            self.assertAlmostEqual(
-                float(result["cputime"].rstrip("s")),
-                trivial_run_grace_time,
-                delta=trivial_run_grace_time,
-                msg='cputime of "cat < /dev/null" is not approximately zero',
-            )
-        self.check_result_keys(result, "returnvalue")
-
-        self.check_command_in_output(output, self.cat)
-        self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output")
-        for line in output[1:-1]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_append_environment_variable(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        (_, output) = self.execute_run("/bin/sh", "-c", "echo $PATH")
-        path = output[-1]
-        (_, output) = self.execute_run(
-            "/bin/sh",
-            "-c",
-            "echo $PATH",
-            environments={"additionalEnv": {"PATH": ":TEST_TOKEN"}},
-        )
-        self.assertEqual(output[-1], path + ":TEST_TOKEN")
-
-    def test_new_environment_variable(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        (_, output) = self.execute_run(
-            "/bin/sh", "-c", "echo $PATH", environments={"newEnv": {"PATH": "/usr/bin"}}
-        )
-        self.assertEqual(output[-1], "/usr/bin")
-
-    def test_stop_run(self):
-        if not os.path.exists(self.sleep):
-            self.skipTest("missing sleep")
-        thread = _StopRunThread(1, self.runexecutor)
-        thread.start()
-        (result, output) = self.execute_run(
-            self.sleep, "10", expect_terminationreason="killed"
-        )
-        thread.join()
-
-        self.check_exitcode(result, 9, "exit code of killed process is not 9")
-        self.assertAlmostEqual(
-            result["walltime"],
-            1,
-            delta=0.5,
-            msg="walltime is not approximately the time after which the process should have been killed",
-        )
-        if "cputime" in result:  # not present without cpuacct cgroup
-            self.assertAlmostEqual(
-                result["cputime"],
-                trivial_run_grace_time,
-                delta=trivial_run_grace_time,
-                msg="cputime of sleep is not approximately zero",
-            )
-
-        self.check_command_in_output(output, f"{self.sleep} 10")
-        for line in output[1:]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_reduce_file_size_empty_file(self):
-        with tempfile.NamedTemporaryFile() as tmp:
-            runexecutor._reduce_file_size_if_necessary(tmp.name, 0)
-            self.assertEqual(os.path.getsize(tmp.name), 0)
-
-    def test_reduce_file_size_empty_file2(self):
-        with tempfile.NamedTemporaryFile() as tmp:
-            runexecutor._reduce_file_size_if_necessary(tmp.name, 500)
-            self.assertEqual(os.path.getsize(tmp.name), 0)
-
-    def test_reduce_file_size_long_line_not_truncated(self):
-        with tempfile.NamedTemporaryFile(mode="wt") as tmp:
-            content = "Long line " * 500
-            tmp.write(content)
-            tmp.flush()
-            runexecutor._reduce_file_size_if_necessary(tmp.name, 500)
-            with open(tmp.name, "rt") as tmp2:
-                self.assertMultiLineEqual(tmp2.read(), content)
-
-    REDUCE_WARNING_MSG = (
-        "WARNING: YOUR LOGFILE WAS TOO LONG, SOME LINES IN THE MIDDLE WERE REMOVED."
-    )
-    REDUCE_OVERHEAD = 100
-
-    def test_reduce_file_size(self):
-        with tempfile.NamedTemporaryFile(mode="wt") as tmp:
-            line = "Some text\n"
-            tmp.write(line * 500)
-            tmp.flush()
-            limit = 500
-            runexecutor._reduce_file_size_if_necessary(tmp.name, limit)
-            self.assertLessEqual(
-                os.path.getsize(tmp.name), limit + self.REDUCE_OVERHEAD
-            )
-            with open(tmp.name, "rt") as tmp2:
-                new_content = tmp2.read()
-        self.assertIn(self.REDUCE_WARNING_MSG, new_content)
-        self.assertTrue(new_content.startswith(line))
-        self.assertTrue(new_content.endswith(line))
-
-    def test_reduce_file_size_limit_zero(self):
-        with tempfile.NamedTemporaryFile(mode="wt") as tmp:
-            line = "Some text\n"
-            tmp.write(line * 500)
-            tmp.flush()
-            runexecutor._reduce_file_size_if_necessary(tmp.name, 0)
-            self.assertLessEqual(os.path.getsize(tmp.name), self.REDUCE_OVERHEAD)
-            with open(tmp.name, "rt") as tmp2:
-                new_content = tmp2.read()
-        self.assertIn(self.REDUCE_WARNING_MSG, new_content)
-        self.assertTrue(new_content.startswith(line))
-
-    def test_append_crash_dump_info(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        (result, output) = self.execute_run(
-            "/bin/sh",
-            "-c",
-            'echo "# An error report file with more information is saved as:";'
-            'echo "# $(pwd)/hs_err_pid_1234.txt";'
-            "echo TEST_TOKEN > hs_err_pid_1234.txt;"
-            "exit 2",
-        )
-        self.assertEqual(
-            output[-1], "TEST_TOKEN", "log file misses content from crash dump file"
-        )
-
-    def test_integration(self):
-        if not os.path.exists(self.echo):
-            self.skipTest("missing echo")
-        (result, output) = self.execute_run_extern(self.echo, "TEST_TOKEN")
-        self.check_exitcode_extern(result, 0, "exit code of echo is not zero")
-        self.check_result_keys(result, "returnvalue")
-
-        self.check_command_in_output(output, f"{self.echo} TEST_TOKEN")
-        self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output")
-        for line in output[1:-1]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_home_and_tmp_is_separate(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        (result, output) = self.execute_run("/bin/sh", "-c", "echo $HOME $TMPDIR")
-        self.check_exitcode(result, 0, "exit code of /bin/sh is not zero")
-        self.assertRegex(
-            output[-1],
-            "/BenchExec_run_[^/]*/home .*/BenchExec_run_[^/]*/tmp",
-            "HOME or TMPDIR variable does not contain expected temporary directory",
-        )
-
-    def test_temp_dirs_are_removed(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        (result, output) = self.execute_run("/bin/sh", "-c", "echo $HOME $TMPDIR")
-        self.check_exitcode(result, 0, "exit code of /bin/sh is not zero")
-        home_dir = output[-1].split(" ")[0]
-        temp_dir = output[-1].split(" ")[1]
-        self.assertFalse(
-            os.path.exists(home_dir),
-            f"temporary home directory {home_dir} was not cleaned up",
-        )
-        self.assertFalse(
-            os.path.exists(temp_dir),
-            f"temporary temp directory {temp_dir} was not cleaned up",
-        )
-
-    def test_home_is_writable(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        (result, output) = self.execute_run("/bin/sh", "-c", "touch $HOME/TEST_FILE")
-        self.check_exitcode(
-            result,
-            0,
-            f"Failed to write to $HOME/TEST_FILE, output was\n{output}",
-        )
-
-    def test_no_cleanup_temp(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        self.setUp(cleanup_temp_dir=False)  # create RunExecutor with desired parameter
-        (result, output) = self.execute_run(
-            "/bin/sh", "-c", 'echo "$TMPDIR"; echo "" > "$TMPDIR/test"'
-        )
-        self.check_exitcode(result, 0, "exit code of /bin/sh is not zero")
-        temp_dir = output[-1]
-        test_file = os.path.join(temp_dir, "test")
-        subprocess.run(["test", "-f", test_file], check=True)
-        self.assertEqual(
-            "tmp", os.path.basename(temp_dir), "unexpected name of temp dir"
-        )
-        self.assertNotEqual(
-            "/tmp", temp_dir, "temp dir should not be the global temp dir"
-        )
-        subprocess.run(["rm", "-r", os.path.dirname(temp_dir)], check=True)
-
-    def test_require_cgroup_invalid(self):
-        with self.assertLogs(level=logging.ERROR) as log:
-            with self.assertRaises(SystemExit):
-                RunExecutor(additional_cgroup_subsystems=["invalid"])
-
-        self.assertIn(
-            'Cgroup subsystem "invalid" was required but is not available',
-            "\n".join(log.output),
-        )
-
-    def test_require_cgroup_cpu(self):
-        try:
-            self.setUp(additional_cgroup_subsystems=["cpu"])
-        except SystemExit as e:
-            self.skipTest(e)
-        if not os.path.exists(self.cat):
-            self.skipTest("missing cat")
-        if self.cgroups.version != 1:
-            self.skipTest("not relevant in unified hierarchy")
-        (result, output) = self.execute_run(self.cat, "/proc/self/cgroup")
-        self.check_exitcode(result, 0, "exit code of cat is not zero")
-        for line in output:
-            if re.match(r"^[0-9]*:([^:]*,)?cpu(,[^:]*)?:/(.*/)?benchmark_.*$", line):
-                return  # Success
-        self.fail("Not in expected cgroup for subsystem cpu:\n" + "\n".join(output))
-
-    def test_set_cgroup_cpu_shares(self):
-        if not os.path.exists(self.echo):
-            self.skipTest("missing echo")
-        try:
-            if self.cgroups.version == 1:
-                self.setUp(additional_cgroup_subsystems=["cpu"])
-            else:
-                self.setUp(additional_cgroup_subsystems=["memory"])
-        except SystemExit as e:
-            self.skipTest(e)
-        if self.cgroups.version == 1:
-            cgValues = {("cpu", "shares"): 42}
-        else:
-            cgValues = {("memory", "high"): 420000000}
-        (result, _) = self.execute_run(self.echo, cgroupValues=cgValues)
-        self.check_exitcode(result, 0, "exit code of echo is not zero")
-        # Just assert that execution was successful,
-        # testing that the value was actually set is much more difficult.
-
-    def test_nested_runexec(self):
-        if not os.path.exists(self.echo):
-            self.skipTest("missing echo")
-        self.setUp(
-            dir_modes={
-                # Do not mark /home hidden, would fail with python from virtualenv
-                "/": containerexecutor.DIR_READ_ONLY,
-                "/tmp": containerexecutor.DIR_FULL_ACCESS,  # for inner_output_file
-                "/sys/fs/cgroup": containerexecutor.DIR_FULL_ACCESS,
-            }
-        )
-        inner_args = ["--", self.echo, "TEST_TOKEN"]
-
-        with tempfile.NamedTemporaryFile(
-            mode="r", prefix="inner_output_", suffix=".log"
-        ) as inner_output_file:
-            inner_cmdline = self.get_runexec_cmdline(
-                *inner_args, output_filename=inner_output_file.name
-            )
-            outer_result, outer_output = self.execute_run(*inner_cmdline)
-            inner_output = inner_output_file.read().strip().splitlines()
-
-        logging.info("Outer output:\n%s", "\n".join(outer_output))
-        logging.info("Inner output:\n%s", "\n".join(inner_output))
-        self.check_result_keys(outer_result, "returnvalue")
-        self.check_exitcode(outer_result, 0, "exit code of inner runexec is not zero")
-        self.check_command_in_output(inner_output, f"{self.echo} TEST_TOKEN")
-        self.assertEqual(
-            inner_output[-1], "TEST_TOKEN", "run output misses command output"
-        )
-
-    def test_starttime(self):
-        if not os.path.exists(self.echo):
-            self.skipTest("missing echo")
-        before = util.read_local_time()
-        (result, _) = self.execute_run(self.echo)
-        after = util.read_local_time()
-        self.check_result_keys(result)
-        run_starttime = result["starttime"]
-        self.assertIsNotNone(run_starttime.tzinfo, "start time is not a local time")
-        self.assertLessEqual(before, run_starttime)
-        self.assertLessEqual(run_starttime, after)
-
-    def test_frozen_process(self):
-        # https://github.com/sosy-lab/benchexec/issues/840
-        if not os.path.exists(self.sleep):
-            self.skipTest("missing sleep")
-        if self.cgroups.version == 1 and not os.path.exists("/sys/fs/cgroup/freezer"):
-            self.skipTest("missing freezer cgroup")
-        self.setUp(
-            dir_modes={
-                "/": containerexecutor.DIR_READ_ONLY,
-                "/home": containerexecutor.DIR_HIDDEN,
-                "/tmp": containerexecutor.DIR_HIDDEN,
-                "/sys/fs/cgroup": containerexecutor.DIR_FULL_ACCESS,
-            }
-        )
-        script_v1 = """#!/bin/sh
-# create process, move it to sub-cgroup, and freeze it
-set -eu
-
-cgroup="/sys/fs/cgroup/freezer/$(grep freezer /proc/self/cgroup | cut -f 3 -d :)"
-mkdir "$cgroup/tmp"
-mkdir "$cgroup/tmp/tmp"
-
-sleep 10 &
-child_pid=$!
-
-echo $child_pid > "$cgroup/tmp/tasks"
-echo FROZEN > "$cgroup/tmp/freezer.state"
-# remove permissions in order to test our handling of this case
-chmod 000 "$cgroup/tmp/freezer.state"
-chmod 000 "$cgroup/tmp/tasks"
-chmod 000 "$cgroup/tmp"
-chmod 000 "$cgroup/freezer.state"
-chmod 000 "$cgroup/tasks"
-echo FROZEN
-wait $child_pid
-"""
-        script_v2 = """#!/bin/sh
-# create process, move it to sub-cgroup, and freeze it
-set -eu
-
-cgroup="/sys/fs/cgroup/$(cut -f 3 -d : /proc/self/cgroup)"
-mkdir "$cgroup/tmp"
-mkdir "$cgroup/tmp/tmp"
-
-sleep 10 &
-child_pid=$!
-
-echo $child_pid > "$cgroup/tmp/cgroup.procs"
-echo 1 > "$cgroup/tmp/cgroup.freeze"
-# remove permissions in order to test our handling of this case
-chmod 000 "$cgroup/tmp/cgroup.freeze"
-chmod 000 "$cgroup/tmp/cgroup.procs"
-chmod 000 "$cgroup/tmp"
-chmod 000 "$cgroup/cgroup.freeze"
-chmod 000 "$cgroup/cgroup.kill"
-echo FROZEN
-wait $child_pid
-"""
-        (result, output) = self.execute_run(
-            "/bin/sh",
-            "-c",
-            script_v1 if self.cgroups.version == 1 else script_v2,
-            walltimelimit=1,
-            expect_terminationreason="walltime",
-        )
-        self.check_exitcode(result, 9, "exit code of killed process is not 9")
-        self.assertAlmostEqual(
-            result["walltime"],
-            2,
-            delta=0.5,
-            msg="walltime is not approximately the time after which the process should have been killed",
-        )
-        self.assertEqual(
-            output[-1],
-            "FROZEN",
-            "run output misses command output and was not executed properly",
-        )
-
-
-class TestRunExecutorWithContainer(TestRunExecutor):
-    def setUp(self, *args, **kwargs):
-        try:
-            container.execute_in_namespace(lambda: 0)
-        except OSError as e:
-            self.skipTest(f"Namespaces not supported: {os.strerror(e.errno)}")
-
-        dir_modes = kwargs.pop(
-            "dir_modes",
-            {
-                "/": containerexecutor.DIR_READ_ONLY,
-                "/home": containerexecutor.DIR_HIDDEN,
-                "/tmp": containerexecutor.DIR_HIDDEN,
-            },
-        )
-
-        self.runexecutor = RunExecutor(
-            *args, use_namespaces=True, dir_modes=dir_modes, **kwargs
-        )
-
-    def get_runexec_cmdline(self, *args, **kwargs):
-        return [
-            "python3",
-            runexec,
-            "--container",
-            "--read-only-dir",
-            "/",
-            "--hidden-dir",
-            "/home",
-            "--hidden-dir",
-            "/tmp",
-            "--dir",
-            "/tmp",
-            "--output",
-            kwargs["output_filename"],
-        ] + list(args)
-
-    def execute_run(self, *args, **kwargs):
-        return super(TestRunExecutorWithContainer, self).execute_run(
-            workingDir="/tmp", *args, **kwargs
-        )
-
-    def test_home_and_tmp_is_separate(self):
-        self.skipTest("not relevant in container")
-
-    def test_temp_dirs_are_removed(self):
-        self.skipTest("not relevant in container")
-
-    def test_no_cleanup_temp(self):
-        self.skipTest("not relevant in container")
-
-    def check_result_files(
-        self, shell_cmd, result_files_patterns, expected_result_files
-    ):
-        output_dir = tempfile.mkdtemp("", "output_")
-        try:
-            result, output = self.execute_run(
-                "/bin/sh",
-                "-c",
-                shell_cmd,
-                output_dir=output_dir,
-                result_files_patterns=result_files_patterns,
-            )
-            output_str = "\n".join(output)
-            self.assertEqual(
-                result["exitcode"].value,
-                0,
-                f"exit code of {' '.join(shell_cmd)} is not zero,\n"
-                f"result was {result!r},\noutput was\n{output_str}",
-            )
-            result_files = []
-            for root, _unused_dirs, files in os.walk(output_dir):
-                for file in files:
-                    result_files.append(
-                        os.path.relpath(os.path.join(root, file), output_dir)
-                    )
-            expected_result_files.sort()
-            result_files.sort()
-            self.assertListEqual(
-                result_files,
-                expected_result_files,
-                f"\nList of retrieved result files differs from expected list,\n"
-                f"result was {result!r},\noutput was\n{output_str}",
-            )
-        finally:
-            shutil.rmtree(output_dir, ignore_errors=True)
-
-    def test_result_file_simple(self):
-        self.check_result_files("echo TEST_TOKEN > TEST_FILE", ["."], ["TEST_FILE"])
-
-    def test_result_file_recursive(self):
-        self.check_result_files(
-            "mkdir TEST_DIR; echo TEST_TOKEN > TEST_DIR/TEST_FILE",
-            ["."],
-            ["TEST_DIR/TEST_FILE"],
-        )
-
-    def test_result_file_multiple(self):
-        self.check_result_files(
-            "echo TEST_TOKEN > TEST_FILE; echo TEST_TOKEN > TEST_FILE2",
-            ["."],
-            ["TEST_FILE", "TEST_FILE2"],
-        )
-
-    def test_result_file_symlink(self):
-        self.check_result_files(
-            "echo TEST_TOKEN > TEST_FILE; ln -s TEST_FILE TEST_LINK",
-            ["."],
-            ["TEST_FILE"],
-        )
-
-    def test_result_file_no_match(self):
-        self.check_result_files("echo TEST_TOKEN > TEST_FILE", ["NO_MATCH"], [])
-
-    def test_result_file_no_pattern(self):
-        self.check_result_files("echo TEST_TOKEN > TEST_FILE", [], [])
-
-    def test_result_file_empty_pattern(self):
-        self.assertRaises(
-            ValueError,
-            lambda: self.check_result_files("echo TEST_TOKEN > TEST_FILE", [""], []),
-        )
-
-    def test_result_file_partial_match(self):
-        self.check_result_files(
-            "echo TEST_TOKEN > TEST_FILE; mkdir TEST_DIR; echo TEST_TOKEN > TEST_DIR/TEST_FILE",
-            ["TEST_DIR"],
-            ["TEST_DIR/TEST_FILE"],
-        )
-
-    def test_result_file_multiple_patterns(self):
-        self.check_result_files(
-            "echo TEST_TOKEN > TEST_FILE; "
-            "echo TEST_TOKEN > TEST_FILE2; "
-            "mkdir TEST_DIR; "
-            "echo TEST_TOKEN > TEST_DIR/TEST_FILE; ",
-            ["TEST_FILE", "TEST_DIR/TEST_FILE"],
-            ["TEST_FILE", "TEST_DIR/TEST_FILE"],
-        )
-
-    def test_result_file_wildcard(self):
-        self.check_result_files(
-            "echo TEST_TOKEN > TEST_FILE; "
-            "echo TEST_TOKEN > TEST_FILE2; "
-            "echo TEST_TOKEN > TEST_NOFILE; ",
-            ["TEST_FILE*"],
-            ["TEST_FILE", "TEST_FILE2"],
-        )
-
-    def test_result_file_absolute_pattern(self):
-        self.check_result_files("echo TEST_TOKEN > TEST_FILE", ["/"], ["tmp/TEST_FILE"])
-
-    def test_result_file_absolute_and_pattern(self):
-        self.check_result_files(
-            "echo TEST_TOKEN > TEST_FILE; mkdir TEST_DIR; echo TEST_TOKEN > TEST_DIR/TEST_FILE",
-            ["TEST_FILE", "/tmp/TEST_DIR"],
-            ["tmp/TEST_FILE", "tmp/TEST_DIR/TEST_FILE"],
-        )
-
-    def test_result_file_relative_traversal(self):
-        self.check_result_files(
-            "echo TEST_TOKEN > TEST_FILE", ["foo/../TEST_FILE"], ["TEST_FILE"]
-        )
-
-    def test_result_file_illegal_relative_traversal(self):
-        self.assertRaises(
-            ValueError,
-            lambda: self.check_result_files(
-                "echo TEST_TOKEN > TEST_FILE", ["foo/../../bar"], []
-            ),
-        )
-
-    def test_result_file_recursive_pattern(self):
-        self.check_result_files(
-            "mkdir -p TEST_DIR/TEST_DIR; "
-            "echo TEST_TOKEN > TEST_FILE.txt; "
-            "echo TEST_TOKEN > TEST_DIR/TEST_FILE.txt; "
-            "echo TEST_TOKEN > TEST_DIR/TEST_DIR/TEST_FILE.txt; ",
-            ["**/*.txt"],
-            [
-                "TEST_FILE.txt",
-                "TEST_DIR/TEST_FILE.txt",
-                "TEST_DIR/TEST_DIR/TEST_FILE.txt",
-            ],
-        )
-
-    def test_result_file_log_limit(self):
-        file_count = containerexecutor._MAX_RESULT_FILE_LOG_COUNT + 10
-        with self.assertLogs(level=logging.DEBUG) as log:
-            # Check that all output files are transferred ...
-            self.check_result_files(
-                f"for i in $(seq 1 {file_count}); do touch $i; done",
-                ["*"],
-                list(map(str, range(1, file_count + 1))),
-            )
-        # ... but not all output files are logged ...
-        self.assertEqual(
-            len([msg for msg in log.output if "Transferring output file" in msg]),
-            containerexecutor._MAX_RESULT_FILE_LOG_COUNT,
-        )
-        # ... and the final count is correct.
-        count_msg = next(msg for msg in log.output if " output files matched" in msg)
-        self.assertIn(f"{file_count} output files matched", count_msg)
-
-    def test_file_count_limit(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        self.setUp(container_tmpfs=False)  # create RunExecutor with desired parameter
-        filehierarchylimit._CHECK_INTERVAL_SECONDS = 0.1
-        (result, output) = self.execute_run(
-            "/bin/sh",
-            "-c",
-            "for i in $(seq 1 10000); do touch $i; done",
-            files_count_limit=100,
-            result_files_patterns=None,
-            expect_terminationreason="files-count",
-        )
-
-        self.check_exitcode(result, 9, "exit code of killed process is not 15")
-
-        for line in output[1:]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_file_size_limit(self):
-        if not os.path.exists("/bin/sh"):
-            self.skipTest("missing /bin/sh")
-        self.setUp(container_tmpfs=False)  # create RunExecutor with desired parameter
-        filehierarchylimit._CHECK_INTERVAL_SECONDS = 0.1
-        (result, output) = self.execute_run(
-            "/bin/sh",
-            "-c",
-            "for i in $(seq 1 100000); do echo $i >> TEST_FILE; done",
-            files_size_limit=100,
-            result_files_patterns=None,
-            expect_terminationreason="files-size",
-        )
-
-        self.check_exitcode(result, 9, "exit code of killed process is not 15")
-
-        for line in output[1:]:
-            self.assertRegex(line, "^-*$", "unexpected text in run output")
-
-    def test_path_with_space(self):
-        temp_dir = tempfile.mkdtemp(prefix="BenchExec test")
-        try:
-            # create RunExecutor with desired parameter
-            self.setUp(
-                dir_modes={
-                    "/": containerexecutor.DIR_READ_ONLY,
-                    "/home": containerexecutor.DIR_HIDDEN,
-                    "/tmp": containerexecutor.DIR_HIDDEN,
-                    temp_dir: containerexecutor.DIR_FULL_ACCESS,
-                }
-            )
-            temp_file = os.path.join(temp_dir, "TEST_FILE")
-            result, output = self.execute_run(
-                "/bin/sh", "-c", f"echo TEST_TOKEN > '{temp_file}'"
-            )
-            self.check_result_keys(result)
-            self.check_exitcode(result, 0, "exit code of process is not 0")
-            self.assertTrue(
-                os.path.exists(temp_file),
-                f"File '{temp_file}' not created, output was:\n" + "\n".join(output),
-            )
-            with open(temp_file, "r") as f:
-                self.assertEqual(f.read().strip(), "TEST_TOKEN")
-        finally:
-            shutil.rmtree(temp_dir)
-
-    def test_uptime_with_lxcfs(self):
-        if not os.path.exists("/var/lib/lxcfs/proc"):
-            self.skipTest("missing lxcfs")
-        result, output = self.execute_run(self.cat, "/proc/uptime")
-        self.check_result_keys(result)
-        self.check_exitcode(result, 0, "exit code for reading uptime is not zero")
-        uptime = float(output[-1].split(" ")[0])
-        self.assertLessEqual(
-            uptime, 10, f"Uptime {uptime}s unexpectedly high in container"
-        )
-
-    def test_uptime_without_lxcfs(self):
-        if not os.path.exists("/var/lib/lxcfs/proc"):
-            self.skipTest("missing lxcfs")
-        # create RunExecutor with desired parameter
-        self.setUp(container_system_config=False)
-        result, output = self.execute_run(self.cat, "/proc/uptime")
-        self.check_result_keys(result)
-        self.check_exitcode(result, 0, "exit code for reading uptime is not zero")
-        uptime = float(output[-1].split(" ")[0])
-        # If uptime was less than 10s, LXCFS probably was in use
-        self.assertGreaterEqual(
-            uptime, 10, f"Uptime {uptime}s unexpectedly low in container"
-        )
-
-
-class _StopRunThread(threading.Thread):
-    def __init__(self, delay, runexecutor):
-        super(_StopRunThread, self).__init__()
-        self.daemon = True
-        self.delay = delay
-        self.runexecutor = runexecutor
-
-    def run(self):
-        time.sleep(self.delay)
-        self.runexecutor.stop()
-
-
-class TestRunExecutorUnits(unittest.TestCase):
-    """unit tests for parts of RunExecutor"""
-
-    def test_get_debug_output_with_error_report_and_invalid_utf8(self):
-        invalid_utf8 = b"\xFF"
-        with tempfile.NamedTemporaryFile(mode="w+b", delete=False) as report_file:
-            with tempfile.NamedTemporaryFile(mode="w+b") as output:
-                output_content = f"""Dummy output
-# An error report file with more information is saved as:
-# {report_file.name}
-More output
-""".encode()  # noqa: E800 false alarm
-                report_content = b"Report output\nMore lines"
-                output_content += invalid_utf8
-                report_content += invalid_utf8
-
-                output.write(output_content)
-                output.flush()
-                output.seek(0)
-                report_file.write(report_content)
-                report_file.flush()
-
-                runexecutor._get_debug_output_after_crash(output.name, "")
-
-                self.assertFalse(os.path.exists(report_file.name))
-                self.assertEqual(output.read(), output_content + report_content)
diff --git a/pyproject.toml b/pyproject.toml
index 22c6b0189..87d2cbaf7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,6 @@ exclude = [
     # TODO
     'benchexec/tools',
     '**/test_*.py',
-    '**/pytest_*.py',
     '**/test_*/**.py',
 ]
 
@@ -70,6 +69,3 @@ ignore = [
     # wildcard imports significantly shorten test code,
     'F405',
 ]
-'benchexec/pytest*.py' = [
-    'F405',
-]
diff --git a/setup.cfg b/setup.cfg
index 7daa27de4..219c3fd86 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -62,7 +62,6 @@ zip_safe = True
 dev =
   nose >= 1.0
   pytest
-  pytest-mock
   lxml
 systemd =
   pystemd >= 0.7.0

From 5952499295b1cde7009e6478ab9484d7594e4612 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 9 Jun 2024 23:41:19 +0000
Subject: [PATCH 20/52] Add correct globs to pytest

---
 pytest.ini | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytest.ini b/pytest.ini
index ef01c3910..68de4a7dd 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -6,4 +6,5 @@
 # SPDX-License-Identifier: Apache-2.0
 
 [pytest]
-python_files = pytest_*.py
\ No newline at end of file
+python_files = test_*.py test_integration/__init__.py
+norecursedirs = contrib .docker build
\ No newline at end of file

From 57f8852aacb4d251da180f384ce75d285c0c4260 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 9 Jun 2024 23:42:43 +0000
Subject: [PATCH 21/52] Move nose to separate testsuite

---
 setup.cfg | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 219c3fd86..42ce24aaa 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -79,7 +79,8 @@ benchexec.tablegenerator =
   react-table/build/*.min.css
 
 [nosetests]
+exclude='^test'
 # Necessary for nose since Python 3.8 to find the tests on Windows
 traverse-namespace=1
 # Necessary to find tests in non-package
-include=contrib
+include=contrib
\ No newline at end of file

From 2a200637c427966cfc3192877a37fa19c0710046 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Mon, 10 Jun 2024 04:13:59 +0000
Subject: [PATCH 22/52] Ported test_result, test_runexecutor and test_util to
 pytest

---
 benchexec/test_result.py      |  6 ------
 benchexec/test_runexecutor.py |  7 -------
 benchexec/test_util.py        | 16 ----------------
 3 files changed, 29 deletions(-)

diff --git a/benchexec/test_result.py b/benchexec/test_result.py
index 157247b62..6de61ef79 100644
--- a/benchexec/test_result.py
+++ b/benchexec/test_result.py
@@ -18,8 +18,6 @@
     _SCORE_WRONG_FALSE,
 )
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 class TestExpectedResult(unittest.TestCase):
     def test_via_string(self):
@@ -56,10 +54,6 @@ def test(s):
 
 
 class TestResult(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        logging.disable(logging.CRITICAL)
 
     def expected_result(self, result, subcategory=None):
         return {"dummy.prp": ExpectedResult(result, subcategory)}
diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py
index 340164887..c1111a17e 100644
--- a/benchexec/test_runexecutor.py
+++ b/benchexec/test_runexecutor.py
@@ -10,7 +10,6 @@
 import os
 import re
 import subprocess
-import sys
 import tempfile
 import threading
 import time
@@ -25,8 +24,6 @@
 from benchexec import runexecutor
 from benchexec import util
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 here = os.path.dirname(__file__)
 base_dir = os.path.join(here, "..")
 bin_dir = os.path.join(base_dir, "bin")
@@ -34,13 +31,9 @@
 
 trivial_run_grace_time = 0.2
 
-
 class TestRunExecutor(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-        logging.disable(logging.NOTSET)  # need to make sure to get all messages
         if not hasattr(cls, "assertRegex"):
             cls.assertRegex = cls.assertRegexpMatches
 
diff --git a/benchexec/test_util.py b/benchexec/test_util.py
index 523d7161a..4aba17c42 100644
--- a/benchexec/test_util.py
+++ b/benchexec/test_util.py
@@ -6,7 +6,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from decimal import Decimal
-import sys
 import unittest
 from benchexec.util import ProcessExitCode
 import tempfile
@@ -15,14 +14,8 @@
 
 from benchexec import util
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 class TestParse(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
 
     def assertEqualNumberAndUnit(self, value, number, unit):
         self.assertEqual(util.split_number_and_unit(value), (number, unit))
@@ -103,10 +96,6 @@ def test_print_decimal_float(self):
 
 
 class TestProcessExitCode(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
 
     def ProcessExitCode_with_value(self, value):
         return ProcessExitCode(raw=value << 8, value=value, signal=None)
@@ -137,11 +126,6 @@ def test_signal(self):
 
 
 class TestRmtree(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-
     def setUp(self):
         self.base_dir = tempfile.mkdtemp(prefix="BenchExec_test_util_rmtree")
 

From e2c614df3d293f1f43f3169aae9d5773933021ba Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 16 Jun 2024 03:02:02 +0000
Subject: [PATCH 23/52] Ported test_analyze_run_result,
 test_benchmark_definition, test_core_assignment, test_pqos to pytest

---
 benchexec/test_analyze_run_result.py   | 7 -------
 benchexec/test_benchmark_definition.py | 4 ----
 benchexec/test_core_assignment.py      | 6 ------
 benchexec/test_pqos.py                 | 5 -----
 4 files changed, 22 deletions(-)

diff --git a/benchexec/test_analyze_run_result.py b/benchexec/test_analyze_run_result.py
index 4c9461659..ff1796839 100644
--- a/benchexec/test_analyze_run_result.py
+++ b/benchexec/test_analyze_run_result.py
@@ -20,17 +20,10 @@
 )
 from benchexec.tools.template import BaseTool
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 normal_result = ProcessExitCode(raw=0, value=0, signal=None)
 
 
 class TestResult(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        logging.disable(logging.CRITICAL)
-
     def create_run(self, info_result=RESULT_UNKNOWN):
         runSet = types.SimpleNamespace()
         runSet.log_folder = "."
diff --git a/benchexec/test_benchmark_definition.py b/benchexec/test_benchmark_definition.py
index 221de868e..0ccf07fb6 100644
--- a/benchexec/test_benchmark_definition.py
+++ b/benchexec/test_benchmark_definition.py
@@ -69,10 +69,6 @@ class TestBenchmarkDefinition(unittest.TestCase):
     testing mostly the classes from benchexec.model.
     """
 
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-
     @patch("benchexec.model.load_task_definition_file", new=mock_load_task_def_file)
     @patch("benchexec.result.Property.create", new=mock_property_create)
     @patch("benchexec.util.expand_filename_pattern", new=mock_expand_filename_pattern)
diff --git a/benchexec/test_core_assignment.py b/benchexec/test_core_assignment.py
index 4e6d14adb..fd2e4aa66 100644
--- a/benchexec/test_core_assignment.py
+++ b/benchexec/test_core_assignment.py
@@ -13,18 +13,12 @@
 
 from benchexec.resources import _get_cpu_cores_per_run0
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 def lrange(start, end):
     return list(range(start, end))
 
 
 class TestCpuCoresPerRun(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        logging.disable(logging.CRITICAL)
 
     def assertValid(self, coreLimit, num_of_threads, expectedResult=None):
         result = _get_cpu_cores_per_run0(
diff --git a/benchexec/test_pqos.py b/benchexec/test_pqos.py
index eb00cbcaf..0964e9e35 100644
--- a/benchexec/test_pqos.py
+++ b/benchexec/test_pqos.py
@@ -16,7 +16,6 @@
 from unittest.mock import patch, MagicMock
 from benchexec.pqos import Pqos
 
-
 mock_pqos_wrapper_output = {
     "load_pqos": {
         "function_output": {},
@@ -147,10 +146,6 @@ class TestPqos(unittest.TestCase):
     Unit tests for pqos module
     """
 
-    @classmethod
-    def setUpClass(cls):
-        logging.disable(logging.CRITICAL)
-
     @patch("benchexec.pqos.find_executable2", return_value="/path/to/pqos_wrapper/lib")
     def test_pqos_init(self, mock_find_executable):
         """

From f417ae57c4befd83520e75a2d9328c57850b588e Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 16 Jun 2024 19:47:27 +0000
Subject: [PATCH 24/52] Ported test_cgroups, working for 3.9

---
 benchexec/test_cgroups.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/benchexec/test_cgroups.py b/benchexec/test_cgroups.py
index 1fbf927eb..fc7b6948c 100644
--- a/benchexec/test_cgroups.py
+++ b/benchexec/test_cgroups.py
@@ -12,16 +12,8 @@
 
 from benchexec import check_cgroups
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 class TestCheckCgroups(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-        logging.disable(logging.CRITICAL)
-
     def execute_run_extern(self, *args, **kwargs):
         try:
             return subprocess.check_output(

From 5ed6c2e1a379de7ec966db37f88ea3b9af3fdb4b Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 16 Jun 2024 19:51:00 +0000
Subject: [PATCH 25/52] reformatted test

---
 benchexec/test_runexecutor.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py
index c1111a17e..75cf1886a 100644
--- a/benchexec/test_runexecutor.py
+++ b/benchexec/test_runexecutor.py
@@ -31,6 +31,7 @@
 
 trivial_run_grace_time = 0.2
 
+
 class TestRunExecutor(unittest.TestCase):
     @classmethod
     def setUpClass(cls):

From 51ae6a344eaf469144f052c399b938b706d6cd37 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 16 Jun 2024 23:38:54 +0000
Subject: [PATCH 26/52] reverted test_cgroups

---
 benchexec/test_cgroups.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/benchexec/test_cgroups.py b/benchexec/test_cgroups.py
index fc7b6948c..1fbf927eb 100644
--- a/benchexec/test_cgroups.py
+++ b/benchexec/test_cgroups.py
@@ -12,8 +12,16 @@
 
 from benchexec import check_cgroups
 
+sys.dont_write_bytecode = True  # prevent creation of .pyc files
+
 
 class TestCheckCgroups(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.longMessage = True
+        cls.maxDiff = None
+        logging.disable(logging.CRITICAL)
+
     def execute_run_extern(self, *args, **kwargs):
         try:
             return subprocess.check_output(

From 3ce248083baf407358e4d7d1b4b3b755de34fa13 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Mon, 24 Jun 2024 07:02:16 +0000
Subject: [PATCH 27/52] ported test_integration

---
 benchexec/test_integration/__init__.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/benchexec/test_integration/__init__.py b/benchexec/test_integration/__init__.py
index f38847580..44afaed78 100644
--- a/benchexec/test_integration/__init__.py
+++ b/benchexec/test_integration/__init__.py
@@ -17,8 +17,6 @@
 
 from xml.etree import ElementTree
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 here = os.path.dirname(__file__)
 base_dir = os.path.join(here, "..", "..")
 bin_dir = os.path.join(base_dir, "bin")
@@ -45,10 +43,6 @@
 
 
 class BenchExecIntegrationTests(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
 
     def _build_tmp_dir(self):
         """

From 72647bfd72f544e5bb4e754227f1ca7df33d62bf Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Mon, 24 Jun 2024 13:03:28 +0000
Subject: [PATCH 28/52] ported tablegenerator integration test

---
 benchexec/tablegenerator/test_columns.py         | 16 ++++++++++++++++
 .../tablegenerator/test_integration/__init__.py  |  7 -------
 benchexec/tablegenerator/test_statvalue.py       |  7 -------
 benchexec/tablegenerator/test_util.py            |  6 ------
 4 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/benchexec/tablegenerator/test_columns.py b/benchexec/tablegenerator/test_columns.py
index b1422a4e7..0e114adcd 100644
--- a/benchexec/tablegenerator/test_columns.py
+++ b/benchexec/tablegenerator/test_columns.py
@@ -7,6 +7,7 @@
 
 from decimal import Decimal
 import unittest
+import pytest
 
 from benchexec.tablegenerator.columns import (
     Column,
@@ -182,6 +183,21 @@ def test_format_value_small_value(self):
         formatted_value_aligned = small_value_column.format_value("2", "html_cell")
         self.assertEqual(formatted_value_aligned, ".0000000002&#x2007;&#x2007;")
 
+    def test_invalid_rounding_mode(self):
+        import decimal
+
+        decimal.getcontext().rounding = decimal.ROUND_HALF_EVEN
+        with pytest.raises(AssertionError):
+            Column(
+                "CpuTime",
+                None,
+                3,
+                None,
+                ColumnMeasureType(12),
+                unit="dummy",
+                scale_factor=1e-10,
+            )
+
     def test_format_value_align_int(self):
         formatted_value_int_aligned = self.measure_column.format_value(
             "20", "html_cell"
diff --git a/benchexec/tablegenerator/test_integration/__init__.py b/benchexec/tablegenerator/test_integration/__init__.py
index 2695b9ca4..239611e8a 100644
--- a/benchexec/tablegenerator/test_integration/__init__.py
+++ b/benchexec/tablegenerator/test_integration/__init__.py
@@ -17,8 +17,6 @@
 import benchexec.util
 import benchexec.tablegenerator.util
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 here = os.path.relpath(os.path.dirname(__file__))
 base_dir = os.path.join(here, "..", "..", "..")
 bin_dir = os.path.join(base_dir, "bin")
@@ -38,11 +36,6 @@ class TableGeneratorIntegrationTests(unittest.TestCase):
     # Tests compare the generated CSV files and ignore the HTML files
     # because we assume the HTML files change more often on purpose.
 
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-
     def setUp(self):
         # We use a temporary directory inside the source tree to avoid mismatching
         # path names inside HTML tables.
diff --git a/benchexec/tablegenerator/test_statvalue.py b/benchexec/tablegenerator/test_statvalue.py
index 4b8344f44..9592f7eb9 100644
--- a/benchexec/tablegenerator/test_statvalue.py
+++ b/benchexec/tablegenerator/test_statvalue.py
@@ -11,15 +11,8 @@
 
 from benchexec.tablegenerator.statistics import StatValue
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 class TestStatValue(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-
     def test_empty(self):
         self.assertIsNone(StatValue.from_list([]))
 
diff --git a/benchexec/tablegenerator/test_util.py b/benchexec/tablegenerator/test_util.py
index f7709c954..dbd5fb65e 100644
--- a/benchexec/tablegenerator/test_util.py
+++ b/benchexec/tablegenerator/test_util.py
@@ -11,14 +11,8 @@
 
 from benchexec.tablegenerator import util
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 class TestUnit(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
 
     def assertEqualNumberAndUnit(self, value, number, unit):
         self.assertEqual(util.split_number_and_unit(value), (number, unit))

From c78717f544ac8384f88adfd4e34869648c55948d Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Mon, 24 Jun 2024 18:52:34 +0000
Subject: [PATCH 29/52] changed code under test: added additional assertions,
 copying context from default context

The current implementation fails under pytest, as the context (as given by getcontext())
is already derived from the DefaultContext *before* we change the rounding to our
preferred value. Thus, the assertion correctly fails, as the first process still has
the default ROUND_HALF_EVEN value, even though all decending processes do possess the
correct value of ROUND_HALF_UP.

As a solution, we can copy the DefaultContext as the context of the initial thread.
As the assertion afterwards becomes very obviously true, additional assertions are added,
to check if the individual processes have the correct rounding.
---
 benchexec/tablegenerator/columns.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/benchexec/tablegenerator/columns.py b/benchexec/tablegenerator/columns.py
index 3197b4975..ce9d1967d 100644
--- a/benchexec/tablegenerator/columns.py
+++ b/benchexec/tablegenerator/columns.py
@@ -18,12 +18,15 @@
 
 __all__ = ["Column", "ColumnType", "ColumnMeasureType"]
 
-# This sets the rounding mode for all Decimal operations in the process.
-# It is actually used only as default context for new contexts, but because we set this
-# at import time and before any threads are started, it should work according to its
-# documentation. We double check with the context of the current thread.
+# This sets the rounding mode for all Decimal operations in the process and explicitly for the current context
+# As asserting the rounding mode for the current context is almost a tautology, we also assert in the methods
+# which are called in their own process, i.e. we additionally assert that each process has the correct rounding
+# individually.
 decimal.DefaultContext.rounding = decimal.ROUND_HALF_UP
-assert decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+decimal.setcontext(decimal.DefaultContext)
+assert (
+    decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
 
 DEFAULT_TIME_PRECISION = 3
 DEFAULT_TOOLTIP_PRECISION = 2
@@ -132,6 +135,10 @@ def __init__(
         relevant_for_diff=None,
         display_title=None,
     ):
+        assert (
+            decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+        ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+
         # If scaling on the variables is performed, a display unit must be defined, explicitly
         if scale_factor is not None and scale_factor != 1 and unit is None:
             raise util.TableDefinitionError(
@@ -315,6 +322,10 @@ def _format_number_align(formattedValue, max_number_of_dec_digits):
 
 
 def _get_significant_digits(value):
+    assert (
+        decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+    ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+
     if not Decimal(value).is_finite():
         return 0
 
@@ -360,6 +371,10 @@ def _format_number(
     with the specified number of significant digits,
     optionally aligned at the decimal point.
     """
+    assert (
+        decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+    ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+
     assert format_target in POSSIBLE_FORMAT_TARGETS, "Invalid format " + format_target
 
     if number == 0:
@@ -580,6 +595,10 @@ def _get_decimal_digits(decimal_number_match, number_of_significant_digits):
     @return: the number of decimal digits of the given decimal number match's representation, after expanding
         the number to the required amount of significant digits
     """
+    assert (
+        decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+    ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+
     # check that only decimal notation is used
     assert "e" not in decimal_number_match.group()
 

From f6bec7db5d76f35540a14d5b77acf9bb9eb535bc Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 30 Jun 2024 14:21:02 +0000
Subject: [PATCH 30/52] ported test_cgroups

---
 benchexec/test_cgroups.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/benchexec/test_cgroups.py b/benchexec/test_cgroups.py
index 1fbf927eb..9b2171629 100644
--- a/benchexec/test_cgroups.py
+++ b/benchexec/test_cgroups.py
@@ -5,23 +5,13 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import logging
 import subprocess
-import sys
 import unittest
 
 from benchexec import check_cgroups
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 
 class TestCheckCgroups(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.longMessage = True
-        cls.maxDiff = None
-        logging.disable(logging.CRITICAL)
-
     def execute_run_extern(self, *args, **kwargs):
         try:
             return subprocess.check_output(

From b9e00867c69b7088eadf599ca02fed88d2bbfc2b Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 30 Jun 2024 19:02:26 +0000
Subject: [PATCH 31/52] Removed nose dependencies

---
 .appveyor.yml  |  2 +-
 .gitlab-ci.yml |  2 +-
 .travis.yml    |  2 +-
 setup.cfg      | 12 ++----------
 4 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 203854d2a..ff6124958 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -20,4 +20,4 @@ install:
   - python -m pip install --user ".[dev]"
 
 test_script:
-  - python -m nose --tests benchexec.tablegenerator
+  - python -m pytest
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index bfc92edd8..647051dd0 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -50,7 +50,7 @@ stages:
   script:
     - sudo -u $PRIMARY_USER
         COVERAGE_PROCESS_START=.coveragerc
-        coverage run -m nose
+        coverage run -m pytest
   after_script:
     - sudo -u $PRIMARY_USER coverage combine
     - sudo -u $PRIMARY_USER coverage report
diff --git a/.travis.yml b/.travis.yml
index 2ae961704..c5b1479cf 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,7 +26,7 @@ before_script:
   # and this is difficult to fix, but occurs only in high-load environments.
   - sed -i benchexec/test_integration/__init__.py -e '/test_simple_parallel/ i \    @unittest.skip("Fails nondeterministically on Travis, probably issue 656")'
 script:
-  - python -m nose
+  - python -m pytest
   # Revert local modification before checking source format
   - git checkout .
   - if which black; then black . --check --diff; fi
diff --git a/setup.cfg b/setup.cfg
index 42ce24aaa..2650e0c5d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -55,12 +55,11 @@ license_files =
 packages = benchexec, benchexec.tablegenerator, benchexec.tools
 install_requires =
   PyYAML >= 3.12
-test_suite = nose.collector
+test_suite = pytest
 zip_safe = True
 
 [options.extras_require]
 dev =
-  nose >= 1.0
   pytest
   lxml
 systemd =
@@ -76,11 +75,4 @@ console_scripts =
 [options.package_data]
 benchexec.tablegenerator =
   react-table/build/*.min.js
-  react-table/build/*.min.css
-
-[nosetests]
-exclude='^test'
-# Necessary for nose since Python 3.8 to find the tests on Windows
-traverse-namespace=1
-# Necessary to find tests in non-package
-include=contrib
\ No newline at end of file
+  react-table/build/*.min.css
\ No newline at end of file

From 2307f4aaf18cd0a5bcce34a45d91b084b7d2239a Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sat, 13 Jul 2024 17:53:12 +0000
Subject: [PATCH 32/52] removed unnecessary imports

---
 benchexec/tablegenerator/test_statvalue.py | 1 -
 benchexec/tablegenerator/test_util.py      | 1 -
 benchexec/test_analyze_run_result.py       | 2 --
 benchexec/test_core_assignment.py          | 2 --
 benchexec/test_pqos.py                     | 1 -
 benchexec/test_result.py                   | 2 --
 6 files changed, 9 deletions(-)

diff --git a/benchexec/tablegenerator/test_statvalue.py b/benchexec/tablegenerator/test_statvalue.py
index 9592f7eb9..3557e7b89 100644
--- a/benchexec/tablegenerator/test_statvalue.py
+++ b/benchexec/tablegenerator/test_statvalue.py
@@ -6,7 +6,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from decimal import Decimal
-import sys
 import unittest
 
 from benchexec.tablegenerator.statistics import StatValue
diff --git a/benchexec/tablegenerator/test_util.py b/benchexec/tablegenerator/test_util.py
index dbd5fb65e..bd469e831 100644
--- a/benchexec/tablegenerator/test_util.py
+++ b/benchexec/tablegenerator/test_util.py
@@ -6,7 +6,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from decimal import Decimal
-import sys
 import unittest
 
 from benchexec.tablegenerator import util
diff --git a/benchexec/test_analyze_run_result.py b/benchexec/test_analyze_run_result.py
index ff1796839..1edd7584a 100644
--- a/benchexec/test_analyze_run_result.py
+++ b/benchexec/test_analyze_run_result.py
@@ -5,8 +5,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import logging
-import sys
 import unittest
 import types
 
diff --git a/benchexec/test_core_assignment.py b/benchexec/test_core_assignment.py
index fd2e4aa66..64e8ecfb2 100644
--- a/benchexec/test_core_assignment.py
+++ b/benchexec/test_core_assignment.py
@@ -6,8 +6,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import itertools
-import logging
-import sys
 import unittest
 import math
 
diff --git a/benchexec/test_pqos.py b/benchexec/test_pqos.py
index 0964e9e35..6ffaf0936 100644
--- a/benchexec/test_pqos.py
+++ b/benchexec/test_pqos.py
@@ -10,7 +10,6 @@
 """
 import json
 import copy
-import logging
 import unittest
 from subprocess import CalledProcessError
 from unittest.mock import patch, MagicMock
diff --git a/benchexec/test_result.py b/benchexec/test_result.py
index 6de61ef79..07c4956d3 100644
--- a/benchexec/test_result.py
+++ b/benchexec/test_result.py
@@ -5,8 +5,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import logging
-import sys
 import tempfile
 import unittest
 

From 373b379376d7518cdf1b348f188d134c249988e2 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sat, 13 Jul 2024 18:05:01 +0000
Subject: [PATCH 33/52] ... all redundant imports

---
 benchexec/test_integration/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/benchexec/test_integration/__init__.py b/benchexec/test_integration/__init__.py
index 44afaed78..aace858d9 100644
--- a/benchexec/test_integration/__init__.py
+++ b/benchexec/test_integration/__init__.py
@@ -10,7 +10,6 @@
 import os
 import shutil
 import subprocess
-import sys
 import tempfile
 import unittest
 import zipfile

From bcd7c966a42bce8ebada61f5e271320a895520a4 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sat, 13 Jul 2024 18:06:47 +0000
Subject: [PATCH 34/52] only test the table generator on appveyor

---
 .appveyor.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index ff6124958..4f8f51977 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -20,4 +20,4 @@ install:
   - python -m pip install --user ".[dev]"
 
 test_script:
-  - python -m pytest
+  - python -m pytest benchexec/tablegenerator/

From 89a86b1ce318faf3d645ea6a45adf856beec1ca9 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 21 Jul 2024 04:02:17 +0000
Subject: [PATCH 35/52] ported tools/test.py

---
 benchexec/tools/test.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/benchexec/tools/test.py b/benchexec/tools/test.py
index 8581a4562..9c6d2e154 100644
--- a/benchexec/tools/test.py
+++ b/benchexec/tools/test.py
@@ -8,12 +8,9 @@
 import logging
 import os
 import unittest
-import sys
 
 import benchexec.model
 
-sys.dont_write_bytecode = True  # prevent creation of .pyc files
-
 here = os.path.dirname(__file__)
 
 

From 54ec0d36ff68be4669adfa9b3480ce167d1997a4 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 21 Jul 2024 08:31:21 +0000
Subject: [PATCH 36/52] removed deprecated option for setup.py tests

---
 setup.cfg | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 0ecbe98ff..70c3da291 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -61,7 +61,6 @@ license_files =
 packages = benchexec, benchexec.tablegenerator, benchexec.tools
 install_requires =
   PyYAML >= 3.12
-test_suite = pytest
 zip_safe = True
 
 [options.extras_require]

From 67eb18617c3eefee0948f13d4b045346a34043dc Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 21 Jul 2024 08:32:12 +0000
Subject: [PATCH 37/52] fixed formatting

---
 setup.cfg | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 70c3da291..dbdc90ece 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -80,4 +80,5 @@ console_scripts =
 [options.package_data]
 benchexec.tablegenerator =
   react-table/build/*.min.js
-  react-table/build/*.min.css
\ No newline at end of file
+  react-table/build/*.min.css
+  
\ No newline at end of file

From 674f7e0a34087be7d8fa5c8527da1c84020466c4 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sat, 27 Jul 2024 02:03:54 +0000
Subject: [PATCH 38/52] removed tautological assertion, clarified comment

---
 benchexec/tablegenerator/columns.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/benchexec/tablegenerator/columns.py b/benchexec/tablegenerator/columns.py
index ce9d1967d..ef56eabc6 100644
--- a/benchexec/tablegenerator/columns.py
+++ b/benchexec/tablegenerator/columns.py
@@ -18,15 +18,14 @@
 
 __all__ = ["Column", "ColumnType", "ColumnMeasureType"]
 
-# This sets the rounding mode for all Decimal operations in the process and explicitly for the current context
-# As asserting the rounding mode for the current context is almost a tautology, we also assert in the methods
-# which are called in their own process, i.e. we additionally assert that each process has the correct rounding
-# individually.
+# Important: If the context is set before we can change the default, we are "locked in" the wrong
+#            (default) rounding
+#            Thus, it's important to make sure on *all* entry points that the correct rounding / context
+#            is used.
+# See https://github.com/sosy-lab/benchexec/issues/991
 decimal.DefaultContext.rounding = decimal.ROUND_HALF_UP
 decimal.setcontext(decimal.DefaultContext)
-assert (
-    decimal.getcontext().rounding == decimal.ROUND_HALF_UP
-), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+# These two lines should be removed after issue 991 has been resolved
 
 DEFAULT_TIME_PRECISION = 3
 DEFAULT_TOOLTIP_PRECISION = 2

From 203014fd9387a5b3c345bc9c14433132c5d7b89f Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sat, 27 Jul 2024 04:21:04 +0000
Subject: [PATCH 39/52] fixed wrong globs for pytest

---
 pytest.ini | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 68de4a7dd..a77834495 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -6,5 +6,5 @@
 # SPDX-License-Identifier: Apache-2.0
 
 [pytest]
-python_files = test_*.py test_integration/__init__.py
-norecursedirs = contrib .docker build
\ No newline at end of file
+python_files = test_*.py test_integration/__init__.py test.py
+norecursedirs = docker_files
\ No newline at end of file

From f33c3bd59cc1d4869278f5cb3f296556077b9bae Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sat, 27 Jul 2024 21:04:16 +0000
Subject: [PATCH 40/52] fixed issue 991, using local context instead of
 modifying the default context

---
 benchexec/tablegenerator/columns.py | 430 +++++++++++++++-------------
 1 file changed, 223 insertions(+), 207 deletions(-)

diff --git a/benchexec/tablegenerator/columns.py b/benchexec/tablegenerator/columns.py
index ef56eabc6..20ca80ee0 100644
--- a/benchexec/tablegenerator/columns.py
+++ b/benchexec/tablegenerator/columns.py
@@ -18,14 +18,10 @@
 
 __all__ = ["Column", "ColumnType", "ColumnMeasureType"]
 
-# Important: If the context is set before we can change the default, we are "locked in" the wrong
-#            (default) rounding
-#            Thus, it's important to make sure on *all* entry points that the correct rounding / context
-#            is used.
-# See https://github.com/sosy-lab/benchexec/issues/991
-decimal.DefaultContext.rounding = decimal.ROUND_HALF_UP
-decimal.setcontext(decimal.DefaultContext)
-# These two lines should be removed after issue 991 has been resolved
+# Important: It's important to make sure on *all* entry points / methods which perform arithmetics that the correct
+#            rounding / context is used.
+local_context = decimal.getcontext()
+local_context.rounding = decimal.ROUND_HALF_UP
 
 DEFAULT_TIME_PRECISION = 3
 DEFAULT_TOOLTIP_PRECISION = 2
@@ -134,36 +130,37 @@ def __init__(
         relevant_for_diff=None,
         display_title=None,
     ):
-        assert (
-            decimal.getcontext().rounding == decimal.ROUND_HALF_UP
-        ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
-
-        # If scaling on the variables is performed, a display unit must be defined, explicitly
-        if scale_factor is not None and scale_factor != 1 and unit is None:
-            raise util.TableDefinitionError(
-                f"Scale factor is defined, but display unit is not (in column {title})"
-            )
+        with decimal.localcontext(local_context):
+            assert (
+                decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+            ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+
+            # If scaling on the variables is performed, a display unit must be defined, explicitly
+            if scale_factor is not None and scale_factor != 1 and unit is None:
+                raise util.TableDefinitionError(
+                    f"Scale factor is defined, but display unit is not (in column {title})"
+                )
 
-        self.title = title
-        self.pattern = pattern
-        self.number_of_significant_digits = (
-            int(num_of_digits) if num_of_digits else None
-        )
-        self.type = col_type
-        self.unit = unit
-        self.source_unit = source_unit
-        self.scale_factor = Decimal(scale_factor) if scale_factor else scale_factor
-        self.href = href
-        if relevant_for_diff is None:
-            self.relevant_for_diff = False
-        else:
-            self.relevant_for_diff = (
-                True if relevant_for_diff.lower() == "true" else False
+            self.title = title
+            self.pattern = pattern
+            self.number_of_significant_digits = (
+                int(num_of_digits) if num_of_digits else None
             )
-        self.display_title = display_title
+            self.type = col_type
+            self.unit = unit
+            self.source_unit = source_unit
+            self.scale_factor = Decimal(scale_factor) if scale_factor else scale_factor
+            self.href = href
+            if relevant_for_diff is None:
+                self.relevant_for_diff = False
+            else:
+                self.relevant_for_diff = (
+                    True if relevant_for_diff.lower() == "true" else False
+                )
+            self.display_title = display_title
 
-        # expected maximum width (in characters)
-        self.max_width = None
+            # expected maximum width (in characters)
+            self.max_width = None
 
     def is_numeric(self):
         return (
@@ -196,68 +193,76 @@ def format_value(self, value, format_target):
         @param format_target the target the value should be formatted for
         @return: a formatted String representation of the given value.
         """
-        # Only format counts and measures
-        if self.type.type != ColumnType.count and self.type.type != ColumnType.measure:
-            return value
-
-        if format_target not in POSSIBLE_FORMAT_TARGETS:
-            raise ValueError("Unknown format target")
-
-        if value is None or value == "":
-            return ""
-
-        if isinstance(value, str):
-            # If the number ends with "s" or another unit, remove it.
-            # Units should not occur in table cells, but in the table head.
-            number_str = util.remove_unit(value.strip())
-            number = Decimal(number_str)
-        elif isinstance(value, Decimal):
-            number = value
-            number_str = print_decimal(number)
-        else:
-            raise TypeError(f"Unexpected number type {type(value)}")
+        with decimal.localcontext(local_context):
+            assert (
+                decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+            ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
 
-        if number.is_nan():
-            return "NaN"
-        elif number == inf:
-            return "Inf"
-        elif number == -inf:
-            return "-Inf"
+            # Only format counts and measures
+            if (
+                self.type.type != ColumnType.count
+                and self.type.type != ColumnType.measure
+            ):
+                return value
+
+            if format_target not in POSSIBLE_FORMAT_TARGETS:
+                raise ValueError("Unknown format target")
+
+            if value is None or value == "":
+                return ""
+
+            if isinstance(value, str):
+                # If the number ends with "s" or another unit, remove it.
+                # Units should not occur in table cells, but in the table head.
+                number_str = util.remove_unit(value.strip())
+                number = Decimal(number_str)
+            elif isinstance(value, Decimal):
+                number = value
+                number_str = print_decimal(number)
+            else:
+                raise TypeError(f"Unexpected number type {type(value)}")
 
-        # Apply the scale factor to the value
-        if self.scale_factor is not None:
-            number *= self.scale_factor
-        assert number.is_finite()
+            if number.is_nan():
+                return "NaN"
+            elif number == inf:
+                return "Inf"
+            elif number == -inf:
+                return "-Inf"
 
-        if (
-            self.number_of_significant_digits is None
-            and self.type.type != ColumnType.measure
-            and format_target == "tooltip_stochastic"
-        ):
-            # Column of type count (integral values) without specified sig. digits.
-            # However, we need to round values like stdev, so we just round somehow.
-            return print_decimal(round(number, DEFAULT_TOOLTIP_PRECISION))
+            # Apply the scale factor to the value
+            if self.scale_factor is not None:
+                number *= self.scale_factor
+            assert number.is_finite()
 
-        number_of_significant_digits = self.get_number_of_significant_digits(
-            format_target
-        )
-        max_dec_digits = (
-            self.type.max_decimal_digits
-            if isinstance(self.type, ColumnMeasureType)
-            else 0
-        )
+            if (
+                self.number_of_significant_digits is None
+                and self.type.type != ColumnType.measure
+                and format_target == "tooltip_stochastic"
+            ):
+                # Column of type count (integral values) without specified sig. digits.
+                # However, we need to round values like stdev, so we just round somehow.
+                return print_decimal(round(number, DEFAULT_TOOLTIP_PRECISION))
 
-        if number_of_significant_digits is not None:
-            current_significant_digits = _get_significant_digits(number_str)
-            return _format_number(
-                number,
-                current_significant_digits,
-                number_of_significant_digits,
-                max_dec_digits,
-                format_target,
+            number_of_significant_digits = self.get_number_of_significant_digits(
+                format_target
             )
-        else:
-            return print_decimal(number)
+            max_dec_digits = (
+                self.type.max_decimal_digits
+                if isinstance(self.type, ColumnMeasureType)
+                else 0
+            )
+
+            if number_of_significant_digits is not None:
+                current_significant_digits = _get_significant_digits(number_str)
+                return _format_number(
+                    number,
+                    current_significant_digits,
+                    number_of_significant_digits,
+                    max_dec_digits,
+                    format_target,
+                )
+            else:
+                return print_decimal(number)
 
     def set_column_type_from(self, column_values):
         """
@@ -321,40 +326,43 @@ def _format_number_align(formattedValue, max_number_of_dec_digits):
 
 
 def _get_significant_digits(value):
-    assert (
-        decimal.getcontext().rounding == decimal.ROUND_HALF_UP
-    ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
-
-    if not Decimal(value).is_finite():
-        return 0
-
-    # Regular expression returns multiple groups:
-    #
-    # Group GROUP_SIGN: Optional sign of value
-    # Group GROUP_INT_PART: Digits in front of decimal point
-    # Group GROUP_DEC_PART: Optional decimal point and digits after it
-    # Group GROUP_SIG_DEC_DIGITS: Digits after decimal point, starting at the first value not 0
-    # Group GROUP_EXP: Optional exponent part (e.g. 'e-5')
-    # Group GROUP_EXP_SIGN: Optional sign of exponent part
-    # Group GROUP_EXP_VALUE: Value of exponent part (e.g. '5' for 'e-5')
-    # Use these groups to compute the number of zeros that have to be added to the current number's
-    # decimal positions.
-    match = REGEX_MEASURE.match(value)
-    assert match, "unexpected output format for number formatting"
-
-    if int(match.group(GROUP_INT_PART)) == 0 and Decimal(value) != 0:
-        sig_digits = len(match.group(GROUP_SIG_DEC_PART))
+    with decimal.localcontext(local_context):
+        assert (
+            decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+        ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+
+        if not Decimal(value).is_finite():
+            return 0
+
+        # Regular expression returns multiple groups:
+        #
+        # Group GROUP_SIGN: Optional sign of value
+        # Group GROUP_INT_PART: Digits in front of decimal point
+        # Group GROUP_DEC_PART: Optional decimal point and digits after it
+        # Group GROUP_SIG_DEC_DIGITS: Digits after decimal point, starting at the first value not 0
+        # Group GROUP_EXP: Optional exponent part (e.g. 'e-5')
+        # Group GROUP_EXP_SIGN: Optional sign of exponent part
+        # Group GROUP_EXP_VALUE: Value of exponent part (e.g. '5' for 'e-5')
+        # Use these groups to compute the number of zeros that have to be added to the current number's
+        # decimal positions.
+        match = REGEX_MEASURE.match(value)
+        assert match, "unexpected output format for number formatting"
+
+        if int(match.group(GROUP_INT_PART)) == 0 and Decimal(value) != 0:
+            sig_digits = len(match.group(GROUP_SIG_DEC_PART))
 
-    else:
-        if Decimal(value) != 0:
-            sig_digits = len(match.group(GROUP_INT_PART))
         else:
-            # If the value consists of only zeros, do not count the 0 in front of the decimal
-            sig_digits = 0
-        if match.group(GROUP_DEC_PART):
-            sig_digits += len(match.group(GROUP_DEC_PART)) - 1  # -1 for decimal point
+            if Decimal(value) != 0:
+                sig_digits = len(match.group(GROUP_INT_PART))
+            else:
+                # If the value consists of only zeros, do not count the 0 in front of the decimal
+                sig_digits = 0
+            if match.group(GROUP_DEC_PART):
+                sig_digits += (
+                    len(match.group(GROUP_DEC_PART)) - 1
+                )  # -1 for decimal point
 
-    return sig_digits
+        return sig_digits
 
 
 def _format_number(
@@ -370,59 +378,66 @@ def _format_number(
     with the specified number of significant digits,
     optionally aligned at the decimal point.
     """
-    assert (
-        decimal.getcontext().rounding == decimal.ROUND_HALF_UP
-    ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+    with decimal.localcontext(local_context):
+        assert (
+            decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+        ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
 
-    assert format_target in POSSIBLE_FORMAT_TARGETS, "Invalid format " + format_target
+        assert format_target in POSSIBLE_FORMAT_TARGETS, (
+            "Invalid format " + format_target
+        )
 
-    if number == 0:
-        intended_digits = min(number_of_significant_digits, initial_value_sig_digits)
-        # Add as many trailing zeros as desired
-        rounded_value = Decimal(0).scaleb(-intended_digits)
+        if number == 0:
+            intended_digits = min(
+                number_of_significant_digits, initial_value_sig_digits
+            )
+            # Add as many trailing zeros as desired
+            rounded_value = Decimal(0).scaleb(-intended_digits)
 
-    else:
-        # Round to the given amount of significant digits
-        intended_digits = min(initial_value_sig_digits, number_of_significant_digits)
-
-        assert number.adjusted() == int(floor(abs(number).log10()))
-        rounding_point = -number.adjusted() + (intended_digits - 1)
-        # Contrary to its documentation, round() seems to be affected by the rounding
-        # mode of decimal's context (which is good for us) when rounding Decimals.
-        # We add an assertion to double check (calling round() is easier to understand).
-        rounded_value = round(number, rounding_point)
-        assert rounded_value == number.quantize(Decimal(1).scaleb(-rounding_point))
-
-    formatted_value = print_decimal(rounded_value)
-
-    # Get the number of resulting significant digits.
-    current_sig_digits = _get_significant_digits(formatted_value)
-
-    if current_sig_digits > intended_digits:
-        if "." in formatted_value:
-            # Happens when rounding 9.99 to 10 with 2 significant digits,
-            # the formatted_value will be 10.0 and we need to cut one trailing zero.
-            assert current_sig_digits == intended_digits + 1
-            assert formatted_value.endswith("0")
-            formatted_value = formatted_value[:-1].rstrip(".")
         else:
-            # happens for cases like 12300 with 3 significant digits
-            assert formatted_value == str(round(rounded_value))
-    else:
-        assert current_sig_digits == intended_digits
-
-    # Cut the 0 in front of the decimal point for values < 1.
-    # Example: 0.002 => .002
-    if _is_to_cut(formatted_value, format_target):
-        assert formatted_value.startswith("0.")
-        formatted_value = formatted_value[1:]
-
-    # Alignment
-    if format_target == "html_cell":
-        formatted_value = _format_number_align(
-            formatted_value, max_digits_after_decimal
-        )
-    return formatted_value
+            # Round to the given amount of significant digits
+            intended_digits = min(
+                initial_value_sig_digits, number_of_significant_digits
+            )
+
+            assert number.adjusted() == int(floor(abs(number).log10()))
+            rounding_point = -number.adjusted() + (intended_digits - 1)
+            # Contrary to its documentation, round() seems to be affected by the rounding
+            # mode of decimal's context (which is good for us) when rounding Decimals.
+            # We add an assertion to double check (calling round() is easier to understand).
+            rounded_value = round(number, rounding_point)
+            assert rounded_value == number.quantize(Decimal(1).scaleb(-rounding_point))
+
+        formatted_value = print_decimal(rounded_value)
+
+        # Get the number of resulting significant digits.
+        current_sig_digits = _get_significant_digits(formatted_value)
+
+        if current_sig_digits > intended_digits:
+            if "." in formatted_value:
+                # Happens when rounding 9.99 to 10 with 2 significant digits,
+                # the formatted_value will be 10.0 and we need to cut one trailing zero.
+                assert current_sig_digits == intended_digits + 1
+                assert formatted_value.endswith("0")
+                formatted_value = formatted_value[:-1].rstrip(".")
+            else:
+                # happens for cases like 12300 with 3 significant digits
+                assert formatted_value == str(round(rounded_value))
+        else:
+            assert current_sig_digits == intended_digits
+
+        # Cut the 0 in front of the decimal point for values < 1.
+        # Example: 0.002 => .002
+        if _is_to_cut(formatted_value, format_target):
+            assert formatted_value.startswith("0.")
+            formatted_value = formatted_value[1:]
+
+        # Alignment
+        if format_target == "html_cell":
+            formatted_value = _format_number_align(
+                formatted_value, max_digits_after_decimal
+            )
+        return formatted_value
 
 
 def _is_to_cut(value, format_target):
@@ -594,48 +609,49 @@ def _get_decimal_digits(decimal_number_match, number_of_significant_digits):
     @return: the number of decimal digits of the given decimal number match's representation, after expanding
         the number to the required amount of significant digits
     """
-    assert (
-        decimal.getcontext().rounding == decimal.ROUND_HALF_UP
-    ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+    with decimal.localcontext(local_context):
+        assert (
+            decimal.getcontext().rounding == decimal.ROUND_HALF_UP
+        ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
 
-    # check that only decimal notation is used
-    assert "e" not in decimal_number_match.group()
+        # check that only decimal notation is used
+        assert "e" not in decimal_number_match.group()
 
-    try:
-        num_of_digits = int(number_of_significant_digits)
-    except TypeError:
-        num_of_digits = DEFAULT_NUMBER_OF_SIGNIFICANT_DIGITS
+        try:
+            num_of_digits = int(number_of_significant_digits)
+        except TypeError:
+            num_of_digits = DEFAULT_NUMBER_OF_SIGNIFICANT_DIGITS
 
-    if not decimal_number_match.group(GROUP_DEC_PART):
-        return 0
+        if not decimal_number_match.group(GROUP_DEC_PART):
+            return 0
 
-    # If 1 > value > 0, only look at the decimal digits.
-    # In the second condition, we have to remove the first character from the decimal part group because the
-    # first character always is '.'
-    if (
-        int(decimal_number_match.group(GROUP_INT_PART)) == 0
-        and int(decimal_number_match.group(GROUP_DEC_PART)[1:]) != 0
-    ):
-        max_num_of_digits = len(decimal_number_match.group(GROUP_SIG_DEC_PART))
-        num_of_digits = min(num_of_digits, max_num_of_digits)
-        # number of needed decimal digits = number of zeroes after decimal point + significant digits
-        curr_dec_digits = len(decimal_number_match.group(GROUP_ZEROES)) + int(
-            num_of_digits
-        )
+        # If 1 > value > 0, only look at the decimal digits.
+        # In the second condition, we have to remove the first character from the decimal part group because the
+        # first character always is '.'
+        if (
+            int(decimal_number_match.group(GROUP_INT_PART)) == 0
+            and int(decimal_number_match.group(GROUP_DEC_PART)[1:]) != 0
+        ):
+            max_num_of_digits = len(decimal_number_match.group(GROUP_SIG_DEC_PART))
+            num_of_digits = min(num_of_digits, max_num_of_digits)
+            # number of needed decimal digits = number of zeroes after decimal point + significant digits
+            curr_dec_digits = len(decimal_number_match.group(GROUP_ZEROES)) + int(
+                num_of_digits
+            )
 
-    else:
-        max_num_of_digits = (
-            len(decimal_number_match.group(GROUP_INT_PART))
-            + len(decimal_number_match.group(GROUP_DEC_PART))
-            - 1  # for decimal point, which is guaranteed to exist at this point
-        )
-        num_of_digits = min(num_of_digits, max_num_of_digits)
-        # number of needed decimal digits = significant digits - number of digits in front of decimal point
-        curr_dec_digits = int(num_of_digits) - len(
-            decimal_number_match.group(GROUP_INT_PART)
-        )
+        else:
+            max_num_of_digits = (
+                len(decimal_number_match.group(GROUP_INT_PART))
+                + len(decimal_number_match.group(GROUP_DEC_PART))
+                - 1  # for decimal point, which is guaranteed to exist at this point
+            )
+            num_of_digits = min(num_of_digits, max_num_of_digits)
+            # number of needed decimal digits = significant digits - number of digits in front of decimal point
+            curr_dec_digits = int(num_of_digits) - len(
+                decimal_number_match.group(GROUP_INT_PART)
+            )
 
-    return curr_dec_digits
+        return curr_dec_digits
 
 
 def _get_int_digits(decimal_number_match):

From e99ecc992df7dd45e523d4b83f45730d2f7d0857 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sun, 28 Jul 2024 12:53:21 +0000
Subject: [PATCH 41/52] moved config for pytest to pyproject.toml

---
 pyproject.toml |  4 ++++
 pytest.ini     | 10 ----------
 2 files changed, 4 insertions(+), 10 deletions(-)
 delete mode 100644 pytest.ini

diff --git a/pyproject.toml b/pyproject.toml
index 5e4b78ba9..6d094f93c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,3 +70,7 @@ ignore = [
     # wildcard imports significantly shorten test code,
     'F405',
 ]
+
+[tool.pytest.ini_options]
+python_files = ["test_*.py", "test_integration/__init__.py", "test.py"]
+norecursedirs = ["docker_files"] # otherwise we execute files which are intended to be executed inside a docker container
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index a77834495..000000000
--- a/pytest.ini
+++ /dev/null
@@ -1,10 +0,0 @@
-# This file is part of BenchExec, a framework for reliable benchmarking:
-# https://github.com/sosy-lab/benchexec
-#
-# SPDX-FileCopyrightText: 2007-2024 Dirk Beyer <https://www.sosy-lab.org>
-#
-# SPDX-License-Identifier: Apache-2.0
-
-[pytest]
-python_files = test_*.py test_integration/__init__.py test.py
-norecursedirs = docker_files
\ No newline at end of file

From 49f9f929c607b49539f84787c2491c7028a9d002 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Fri, 2 Aug 2024 22:31:02 +0000
Subject: [PATCH 42/52] added build directory, made docker path more precise

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6d094f93c..6be7e209c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,4 +73,4 @@ ignore = [
 
 [tool.pytest.ini_options]
 python_files = ["test_*.py", "test_integration/__init__.py", "test.py"]
-norecursedirs = ["docker_files"] # otherwise we execute files which are intended to be executed inside a docker container
+norecursedirs = ["contrib/p4/docker_files", "build"]

From cb3ce3972bca1b37d95e17f30c0a40aebef19cf6 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Fri, 2 Aug 2024 23:59:41 +0000
Subject: [PATCH 43/52] modified additional test

now requires no assertion to be violated when the rounding mode is modified for the thread,
which should always be the case as long as local context is used (which sets the rounding mode
independently of the rounding mode of the actual, global decimal context of the thread
---
 benchexec/tablegenerator/test_columns.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/benchexec/tablegenerator/test_columns.py b/benchexec/tablegenerator/test_columns.py
index 0e114adcd..ab533febf 100644
--- a/benchexec/tablegenerator/test_columns.py
+++ b/benchexec/tablegenerator/test_columns.py
@@ -186,17 +186,11 @@ def test_format_value_small_value(self):
     def test_invalid_rounding_mode(self):
         import decimal
 
-        decimal.getcontext().rounding = decimal.ROUND_HALF_EVEN
-        with pytest.raises(AssertionError):
-            Column(
-                "CpuTime",
-                None,
-                3,
-                None,
-                ColumnMeasureType(12),
-                unit="dummy",
-                scale_factor=1e-10,
-            )
+        decimal.getcontext().rounding = decimal.ROUND_HALF_DOWN
+        formatted_value_no_align_zeros_cut = self.measure_column.format_value(
+            "5.7715", *self.default_optionals
+        )
+        self.assertEqual(formatted_value_no_align_zeros_cut, "5.772")
 
     def test_format_value_align_int(self):
         formatted_value_int_aligned = self.measure_column.format_value(

From 7b9490710241beb562aa435e94e8ba5d1f10e037 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Sat, 3 Aug 2024 01:04:21 +0000
Subject: [PATCH 44/52] fixed comment, use new context instead of setting the
 thread context, extend localcontext to additonal methods

---
 benchexec/tablegenerator/columns.py | 290 ++++++++++++++--------------
 1 file changed, 142 insertions(+), 148 deletions(-)

diff --git a/benchexec/tablegenerator/columns.py b/benchexec/tablegenerator/columns.py
index 20ca80ee0..042600b09 100644
--- a/benchexec/tablegenerator/columns.py
+++ b/benchexec/tablegenerator/columns.py
@@ -18,10 +18,9 @@
 
 __all__ = ["Column", "ColumnType", "ColumnMeasureType"]
 
-# Important: It's important to make sure on *all* entry points / methods which perform arithmetics that the correct
-#            rounding / context is used.
-local_context = decimal.getcontext()
-local_context.rounding = decimal.ROUND_HALF_UP
+# It's important to make sure on *all* entry points / methods which perform arithmetics that the correct
+# rounding / context is used by using a local context.
+DECIMAL_CONTEXT = decimal.Context(rounding=decimal.ROUND_HALF_UP)
 
 DEFAULT_TIME_PRECISION = 3
 DEFAULT_TOOLTIP_PRECISION = 2
@@ -130,10 +129,7 @@ def __init__(
         relevant_for_diff=None,
         display_title=None,
     ):
-        with decimal.localcontext(local_context):
-            assert (
-                decimal.getcontext().rounding == decimal.ROUND_HALF_UP
-            ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+        with decimal.localcontext(DECIMAL_CONTEXT):
 
             # If scaling on the variables is performed, a display unit must be defined, explicitly
             if scale_factor is not None and scale_factor != 1 and unit is None:
@@ -193,10 +189,7 @@ def format_value(self, value, format_target):
         @param format_target the target the value should be formatted for
         @return: a formatted String representation of the given value.
         """
-        with decimal.localcontext(local_context):
-            assert (
-                decimal.getcontext().rounding == decimal.ROUND_HALF_UP
-            ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+        with decimal.localcontext(DECIMAL_CONTEXT):
 
             # Only format counts and measures
             if (
@@ -313,23 +306,21 @@ def __str__(self):
 
 
 def _format_number_align(formattedValue, max_number_of_dec_digits):
-    alignment = max_number_of_dec_digits
+    with decimal.localcontext(DECIMAL_CONTEXT):
+        alignment = max_number_of_dec_digits
 
-    if formattedValue.find(".") >= 0:
-        # Subtract spaces for digits after the decimal point.
-        alignment -= len(formattedValue) - formattedValue.find(".") - 1
-    elif max_number_of_dec_digits > 0:
-        # Add punctuation space.
-        formattedValue += "&#x2008;"
+        if formattedValue.find(".") >= 0:
+            # Subtract spaces for digits after the decimal point.
+            alignment -= len(formattedValue) - formattedValue.find(".") - 1
+        elif max_number_of_dec_digits > 0:
+            # Add punctuation space.
+            formattedValue += "&#x2008;"
 
-    return formattedValue + ("&#x2007;" * alignment)
+        return formattedValue + ("&#x2007;" * alignment)
 
 
 def _get_significant_digits(value):
-    with decimal.localcontext(local_context):
-        assert (
-            decimal.getcontext().rounding == decimal.ROUND_HALF_UP
-        ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+    with decimal.localcontext(DECIMAL_CONTEXT):
 
         if not Decimal(value).is_finite():
             return 0
@@ -378,10 +369,7 @@ def _format_number(
     with the specified number of significant digits,
     optionally aligned at the decimal point.
     """
-    with decimal.localcontext(local_context):
-        assert (
-            decimal.getcontext().rounding == decimal.ROUND_HALF_UP
-        ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+    with decimal.localcontext(DECIMAL_CONTEXT):
 
         assert format_target in POSSIBLE_FORMAT_TARGETS, (
             "Invalid format " + format_target
@@ -441,8 +429,9 @@ def _format_number(
 
 
 def _is_to_cut(value, format_target):
-    correct_target = format_target == "html_cell"
-    return correct_target and "." in value and 1 > Decimal(value) >= 0
+    with decimal.localcontext(DECIMAL_CONTEXT):
+        correct_target = format_target == "html_cell"
+        return correct_target and "." in value and 1 > Decimal(value) >= 0
 
 
 def _get_column_type_heur(
@@ -451,132 +440,139 @@ def _get_column_type_heur(
     ColumnType,
     Tuple[Union[ColumnType, ColumnMeasureType], str, str, Union[int, Decimal], int],
 ]:
-    if column.title == "status":
-        return ColumnType.status
-
-    column_type = column.type or None
-    if column_type and column_type.type == ColumnType.measure:
-        column_type = ColumnMeasureType(0)
-    column_unit = column.unit  # May be None
-    column_source_unit = column.source_unit  # May be None
-    column_scale_factor = column.scale_factor  # May be None
-
-    column_max_int_digits = 0
-    column_max_dec_digits = 0
-    column_has_numbers = False
-    column_has_decimal_numbers = False
-
-    if column_unit:
-        explicit_unit_defined = True
-    else:
-        explicit_unit_defined = False
+    with decimal.localcontext(DECIMAL_CONTEXT):
+        if column.title == "status":
+            return ColumnType.status
+
+        column_type = column.type or None
+        if column_type and column_type.type == ColumnType.measure:
+            column_type = ColumnMeasureType(0)
+        column_unit = column.unit  # May be None
+        column_source_unit = column.source_unit  # May be None
+        column_scale_factor = column.scale_factor  # May be None
+
+        column_max_int_digits = 0
+        column_max_dec_digits = 0
+        column_has_numbers = False
+        column_has_decimal_numbers = False
+
+        if column_unit:
+            explicit_unit_defined = True
+        else:
+            explicit_unit_defined = False
 
-    if column_scale_factor is None:
-        explicit_scale_defined = False
-    else:
-        explicit_scale_defined = True
+        if column_scale_factor is None:
+            explicit_scale_defined = False
+        else:
+            explicit_scale_defined = True
 
-    for value in column_values:
-        if value is None or value == "":
-            continue
+        for value in column_values:
+            if value is None or value == "":
+                continue
 
-        value_match = REGEX_MEASURE.match(str(value))
+            value_match = REGEX_MEASURE.match(str(value))
 
-        # As soon as one row's value is no number, the column type is 'text'
-        if value_match is None:
-            return ColumnType.text
-        else:
-            column_has_numbers = True
-            curr_column_unit = value_match.group(GROUP_UNIT)
-
-            # If the units in two different rows of the same column differ,
-            # 1. Raise an error if an explicit unit is defined by the displayUnit attribute
-            #    and the unit in the column cell differs from the defined sourceUnit, or
-            # 2. Handle the column as 'text' type, if no displayUnit was defined for the column's values.
-            #    In that case, a unit different from the definition of sourceUnit does not lead to an error.
-            if curr_column_unit:
-                if column_source_unit is None and not explicit_scale_defined:
-                    column_source_unit = curr_column_unit
-                elif column_source_unit != curr_column_unit:
-                    raise util.TableDefinitionError(
-                        f"Attribute sourceUnit different from real source unit: "
-                        f"{column_source_unit} and {curr_column_unit} (in column {column.title})"
-                    )
-                if column_unit and curr_column_unit != column_unit:
-                    if explicit_unit_defined:
-                        _check_unit_consistency(
-                            curr_column_unit, column_source_unit, column
+            # As soon as one row's value is no number, the column type is 'text'
+            if value_match is None:
+                return ColumnType.text
+            else:
+                column_has_numbers = True
+                curr_column_unit = value_match.group(GROUP_UNIT)
+
+                # If the units in two different rows of the same column differ,
+                # 1. Raise an error if an explicit unit is defined by the displayUnit attribute
+                #    and the unit in the column cell differs from the defined sourceUnit, or
+                # 2. Handle the column as 'text' type, if no displayUnit was defined for the column's values.
+                #    In that case, a unit different from the definition of sourceUnit does not lead to an error.
+                if curr_column_unit:
+                    if column_source_unit is None and not explicit_scale_defined:
+                        column_source_unit = curr_column_unit
+                    elif column_source_unit != curr_column_unit:
+                        raise util.TableDefinitionError(
+                            f"Attribute sourceUnit different from real source unit: "
+                            f"{column_source_unit} and {curr_column_unit} (in column {column.title})"
                         )
+                    if column_unit and curr_column_unit != column_unit:
+                        if explicit_unit_defined:
+                            _check_unit_consistency(
+                                curr_column_unit, column_source_unit, column
+                            )
+                        else:
+                            return ColumnType.text
                     else:
-                        return ColumnType.text
-                else:
-                    column_unit = curr_column_unit
+                        column_unit = curr_column_unit
+
+                if column_scale_factor is None:
+                    column_scale_factor = _get_scale_factor(
+                        column_unit, column_source_unit, column
+                    )
 
-            if column_scale_factor is None:
-                column_scale_factor = _get_scale_factor(
-                    column_unit, column_source_unit, column
+                # Compute the number of decimal digits of the current value, considering the number of significant
+                # digits for this column.
+                # Use the column's scale factor for computing the decimal digits of the current value.
+                # Otherwise, they might be different from output.
+                scaled_value = (
+                    Decimal(util.remove_unit(str(value))) * column_scale_factor
                 )
 
-            # Compute the number of decimal digits of the current value, considering the number of significant
-            # digits for this column.
-            # Use the column's scale factor for computing the decimal digits of the current value.
-            # Otherwise, they might be different from output.
-            scaled_value = Decimal(util.remove_unit(str(value))) * column_scale_factor
-
-            # Due to the scaling operation above, floats in the exponent notation may be created. Since this creates
-            # special cases, immediately convert the value back to decimal notation.
-            if value_match.group(GROUP_DEC_PART):
-                # -1 since GROUP_DEC_PART includes the decimal point
-                dec_digits_before_scale = len(value_match.group(GROUP_DEC_PART)) - 1
-            else:
-                dec_digits_before_scale = 0
-            max_number_of_dec_digits_after_scale = max(
-                0, dec_digits_before_scale - ceil(log10(column_scale_factor))
-            )
+                # Due to the scaling operation above, floats in the exponent notation may be created. Since this creates
+                # special cases, immediately convert the value back to decimal notation.
+                if value_match.group(GROUP_DEC_PART):
+                    # -1 since GROUP_DEC_PART includes the decimal point
+                    dec_digits_before_scale = len(value_match.group(GROUP_DEC_PART)) - 1
+                else:
+                    dec_digits_before_scale = 0
+                max_number_of_dec_digits_after_scale = max(
+                    0, dec_digits_before_scale - ceil(log10(column_scale_factor))
+                )
 
-            scaled_value = f"{scaled_value:.{max_number_of_dec_digits_after_scale}f}"
-            scaled_value_match = REGEX_MEASURE.match(scaled_value)
-            assert scaled_value_match, "unexpected output format for number formatting"
+                scaled_value = (
+                    f"{scaled_value:.{max_number_of_dec_digits_after_scale}f}"
+                )
+                scaled_value_match = REGEX_MEASURE.match(scaled_value)
+                assert (
+                    scaled_value_match
+                ), "unexpected output format for number formatting"
 
-            curr_dec_digits = _get_decimal_digits(
-                scaled_value_match, column.number_of_significant_digits
-            )
-            column_max_dec_digits = max(column_max_dec_digits, curr_dec_digits)
+                curr_dec_digits = _get_decimal_digits(
+                    scaled_value_match, column.number_of_significant_digits
+                )
+                column_max_dec_digits = max(column_max_dec_digits, curr_dec_digits)
 
-            curr_int_digits = _get_int_digits(scaled_value_match)
-            column_max_int_digits = max(column_max_int_digits, curr_int_digits)
+                curr_int_digits = _get_int_digits(scaled_value_match)
+                column_max_int_digits = max(column_max_int_digits, curr_int_digits)
 
-            if (
-                scaled_value_match.group(GROUP_DEC_PART) is not None
-                or value_match.group(GROUP_DEC_PART) is not None
-                or scaled_value_match.group(GROUP_SPECIAL_FLOATS_PART) is not None
-            ):
-                column_has_decimal_numbers = True
+                if (
+                    scaled_value_match.group(GROUP_DEC_PART) is not None
+                    or value_match.group(GROUP_DEC_PART) is not None
+                    or scaled_value_match.group(GROUP_SPECIAL_FLOATS_PART) is not None
+                ):
+                    column_has_decimal_numbers = True
 
-    if not column_has_numbers:
-        # only empty values
-        return ColumnType.text
+        if not column_has_numbers:
+            # only empty values
+            return ColumnType.text
 
-    if (
-        column_has_decimal_numbers
-        or column_max_dec_digits
-        or int(column_scale_factor) != column_scale_factor  # non-int scaling factor
-    ):
-        column_type = ColumnMeasureType(column_max_dec_digits)
-    else:
-        column_type = ColumnType.count
+        if (
+            column_has_decimal_numbers
+            or column_max_dec_digits
+            or int(column_scale_factor) != column_scale_factor  # non-int scaling factor
+        ):
+            column_type = ColumnMeasureType(column_max_dec_digits)
+        else:
+            column_type = ColumnType.count
 
-    column_width = column_max_int_digits
-    if column_max_dec_digits:
-        column_width += column_max_dec_digits + 1
+        column_width = column_max_int_digits
+        if column_max_dec_digits:
+            column_width += column_max_dec_digits + 1
 
-    return (
-        column_type,
-        column_unit,
-        column_source_unit,
-        column_scale_factor,
-        column_width,
-    )
+        return (
+            column_type,
+            column_unit,
+            column_source_unit,
+            column_scale_factor,
+            column_width,
+        )
 
 
 # This function assumes that scale_factor is not defined.
@@ -609,10 +605,7 @@ def _get_decimal_digits(decimal_number_match, number_of_significant_digits):
     @return: the number of decimal digits of the given decimal number match's representation, after expanding
         the number to the required amount of significant digits
     """
-    with decimal.localcontext(local_context):
-        assert (
-            decimal.getcontext().rounding == decimal.ROUND_HALF_UP
-        ), f"rounding of context is {decimal.getcontext().rounding}, expected ROUND_HALF_UP"
+    with decimal.localcontext(DECIMAL_CONTEXT):
 
         # check that only decimal notation is used
         assert "e" not in decimal_number_match.group()
@@ -659,11 +652,12 @@ def _get_int_digits(decimal_number_match):
     Returns the amount of integer digits of the given regex match.
     @param number_of_significant_digits: the number of significant digits required
     """
-    int_part = decimal_number_match.group(GROUP_INT_PART) or ""
-    if int_part == "0":
-        # we skip leading zeros of numbers < 1
-        int_part = ""
-    return len(int_part)
+    with decimal.localcontext(DECIMAL_CONTEXT):
+        int_part = decimal_number_match.group(GROUP_INT_PART) or ""
+        if int_part == "0":
+            # we skip leading zeros of numbers < 1
+            int_part = ""
+        return len(int_part)
 
 
 def _check_unit_consistency(actual_unit, wanted_unit, column):

From 64a4e51197d7d42de82221add801d935b40c4e46 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Mon, 5 Aug 2024 03:05:44 +0000
Subject: [PATCH 45/52] added local context to method which also depends of
 correct rounding mode, formerly set for the whole thread

---
 benchexec/tablegenerator/statistics.py | 102 +++++++++++++------------
 1 file changed, 53 insertions(+), 49 deletions(-)

diff --git a/benchexec/tablegenerator/statistics.py b/benchexec/tablegenerator/statistics.py
index d22fef02a..4a0528a29 100644
--- a/benchexec/tablegenerator/statistics.py
+++ b/benchexec/tablegenerator/statistics.py
@@ -14,6 +14,9 @@
 from benchexec.tablegenerator import util
 from benchexec.tablegenerator.columns import ColumnType
 
+# It's important to make sure on *all* entry points / methods which perform arithmetics that the correct
+# rounding / context is used.
+DECIMAL_CONTEXT = decimal.Context(rounding=decimal.ROUND_HALF_UP)
 
 nan = Decimal("nan")
 inf = Decimal("inf")
@@ -65,55 +68,56 @@ def __str__(self):
 
     @classmethod
     def from_list(cls, values):
-        if any(v is not None and v.is_nan() for v in values):
-            return StatValue(nan, nan, nan, nan, nan, nan)
-
-        values = sorted(v for v in values if v is not None)
-        if not values:
-            return None
-
-        values_len = len(values)
-        min_value = values[0]
-        max_value = values[-1]
-
-        if min_value == -inf and max_value == +inf:
-            values_sum = nan
-            mean = nan
-            stdev = nan
-        elif max_value == inf:
-            values_sum = inf
-            mean = inf
-            stdev = inf
-        elif min_value == -inf:
-            values_sum = -inf
-            mean = -inf
-            stdev = inf
-        else:
-            values_sum = sum(values)
-            mean = values_sum / values_len
-
-            # The scaling is just to avoid having too few decimal digits when printing,
-            # the value is still just 0.
-            stdev = Decimal(0).scaleb(-decimal.getcontext().prec)
-            for v in values:
-                diff = v - mean
-                stdev += diff * diff
-            stdev = (stdev / values_len).sqrt()
-
-        half, len_is_odd = divmod(values_len, 2)
-        if len_is_odd:
-            median = values[half]
-        else:
-            median = (values[half - 1] + values[half]) / Decimal(2)
-
-        return StatValue(
-            values_sum,
-            min=min_value,
-            max=max_value,
-            avg=mean,
-            median=median,
-            stdev=stdev,
-        )
+        with decimal.localcontext(DECIMAL_CONTEXT):
+            if any(v is not None and v.is_nan() for v in values):
+                return StatValue(nan, nan, nan, nan, nan, nan)
+
+            values = sorted(v for v in values if v is not None)
+            if not values:
+                return None
+
+            values_len = len(values)
+            min_value = values[0]
+            max_value = values[-1]
+
+            if min_value == -inf and max_value == +inf:
+                values_sum = nan
+                mean = nan
+                stdev = nan
+            elif max_value == inf:
+                values_sum = inf
+                mean = inf
+                stdev = inf
+            elif min_value == -inf:
+                values_sum = -inf
+                mean = -inf
+                stdev = inf
+            else:
+                values_sum = sum(values)
+                mean = values_sum / values_len
+
+                # The scaling is just to avoid having too few decimal digits when printing,
+                # the value is still just 0.
+                stdev = Decimal(0).scaleb(-decimal.getcontext().prec)
+                for v in values:
+                    diff = v - mean
+                    stdev += diff * diff
+                stdev = (stdev / values_len).sqrt()
+
+            half, len_is_odd = divmod(values_len, 2)
+            if len_is_odd:
+                median = values[half]
+            else:
+                median = (values[half - 1] + values[half]) / Decimal(2)
+
+            return StatValue(
+                values_sum,
+                min=min_value,
+                max=max_value,
+                avg=mean,
+                median=median,
+                stdev=stdev,
+            )
 
 
 def get_stats_of_run_set(runResults, correct_only):

From a77a2e86789dd80cea4f5482720b87bc28fcab51 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Mon, 5 Aug 2024 20:03:14 +0000
Subject: [PATCH 46/52] add trailing space

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index dbdc90ece..be3291295 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -81,4 +81,4 @@ console_scripts =
 benchexec.tablegenerator =
   react-table/build/*.min.js
   react-table/build/*.min.css
-  
\ No newline at end of file
+

From 36c605bc41635e05102e93f0c37c4cd48fff10b0 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Mon, 5 Aug 2024 20:12:58 +0000
Subject: [PATCH 47/52] purge nose from all files (except release.sh)

---
 test/Dockerfile.python-3.10 | 8 +-------
 test/Dockerfile.python-3.11 | 8 +-------
 test/Dockerfile.python-3.8  | 1 -
 test/Dockerfile.python-3.9  | 1 -
 4 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/test/Dockerfile.python-3.10 b/test/Dockerfile.python-3.10
index 2f42b9225..74ecd6cf3 100644
--- a/test/Dockerfile.python-3.10
+++ b/test/Dockerfile.python-3.10
@@ -29,10 +29,4 @@ RUN pip install \
   "coverage[toml] >= 5.0" \
   lxml \
   pystemd \
-  pyyaml \
-  'setuptools < 58'
-
-# Avoid the wheel on PyPi for nose, because it does not work on Python 3.10.
-# An installation from source does work, though, if setuptools<58 exists.
-# Cf. https://github.com/nose-devs/nose/issues/1099
-RUN pip install nose --no-binary :all:
+  pyyaml
diff --git a/test/Dockerfile.python-3.11 b/test/Dockerfile.python-3.11
index 628e05b18..06233c1da 100644
--- a/test/Dockerfile.python-3.11
+++ b/test/Dockerfile.python-3.11
@@ -29,10 +29,4 @@ RUN pip install \
   "coverage[toml] >= 5.0" \
   lxml \
   pystemd \
-  pyyaml \
-  'setuptools < 58'
-
-# Avoid the wheel on PyPi for nose, because it does not work on Python 3.11.
-# An installation from source does work, though, if setuptools<58 exists.
-# Cf. https://github.com/nose-devs/nose/issues/1099
-RUN pip install nose --no-binary :all:
+  pyyaml
diff --git a/test/Dockerfile.python-3.8 b/test/Dockerfile.python-3.8
index 379e7f39a..b720018b2 100644
--- a/test/Dockerfile.python-3.8
+++ b/test/Dockerfile.python-3.8
@@ -28,6 +28,5 @@ RUN pip install \
   coloredlogs \
   "coverage[toml] >= 5.0" \
   lxml \
-  nose \
   pystemd \
   pyyaml
diff --git a/test/Dockerfile.python-3.9 b/test/Dockerfile.python-3.9
index c2e7af0a6..57f93c492 100644
--- a/test/Dockerfile.python-3.9
+++ b/test/Dockerfile.python-3.9
@@ -28,6 +28,5 @@ RUN pip install \
   coloredlogs \
   "coverage[toml] >= 5.0" \
   lxml \
-  nose \
   pystemd \
   pyyaml

From 1842b29968cd300686253f79af22e47288b49985 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Mon, 5 Aug 2024 20:20:07 +0000
Subject: [PATCH 48/52] removed redundant import

---
 benchexec/tablegenerator/test_columns.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/benchexec/tablegenerator/test_columns.py b/benchexec/tablegenerator/test_columns.py
index ab533febf..e436858cd 100644
--- a/benchexec/tablegenerator/test_columns.py
+++ b/benchexec/tablegenerator/test_columns.py
@@ -7,7 +7,6 @@
 
 from decimal import Decimal
 import unittest
-import pytest
 
 from benchexec.tablegenerator.columns import (
     Column,

From 4eb5da3d45684fe2b503f817085888b3e308f2d3 Mon Sep 17 00:00:00 2001
From: Florian Eder <h.eder@campus.lmu.de>
Date: Mon, 5 Aug 2024 20:41:43 +0000
Subject: [PATCH 49/52] removed nose from release.sh, replaced it w/ pytest

---
 release.sh | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/release.sh b/release.sh
index 5c887e886..a19cb7b09 100755
--- a/release.sh
+++ b/release.sh
@@ -80,13 +80,9 @@ python3 -m venv "$TEMP3"
 . "$TEMP3/bin/activate"
 git clone "file://$DIR" "$TEMP3/benchexec"
 pushd "$TEMP3/benchexec"
-# Avoid the wheel on PyPi for nose, it does not work on Python 3.10.
-# Local building from source works, but only with setuptools<58.
-pip install "setuptools < 58"
-pip install nose --no-binary :all:
 pip install build
 pip install -e ".[dev]"
-python -m nose
+python -m pytest
 python -m build
 popd
 deactivate

From 4b11194a4af8cb91cd06dca7fd139e7b33ee6ccf Mon Sep 17 00:00:00 2001
From: Philipp Wendler <uni@philippwendler.de>
Date: Tue, 6 Aug 2024 07:32:09 +0200
Subject: [PATCH 50/52] Update build dependency of Debian package

---
 debian/control | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/debian/control b/debian/control
index 798ccc835..73ddc6d2e 100644
--- a/debian/control
+++ b/debian/control
@@ -9,7 +9,7 @@ Build-Depends: debhelper-compat (= 12),
                python3-setuptools,
                python3-lxml,
                python3-yaml (>= 3.12),
-               python3-nose
+               python3-pytest
 Standards-Version: 3.9.6.1
 X-Python3-Version: >= 3.8
 Homepage: https://github.com/sosy-lab/benchexec

From d4ee4a6ffccac229099642665f632bd95c7916d0 Mon Sep 17 00:00:00 2001
From: Philipp Wendler <uni@philippwendler.de>
Date: Tue, 6 Aug 2024 07:40:32 +0200
Subject: [PATCH 51/52] Add some documentation on how to run our tests

Since "setup.py test" is no longer possible,
we need to tell people.
---
 doc/DEVELOPMENT.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/doc/DEVELOPMENT.md b/doc/DEVELOPMENT.md
index 776bd450d..35d27ee0f 100644
--- a/doc/DEVELOPMENT.md
+++ b/doc/DEVELOPMENT.md
@@ -51,10 +51,23 @@ Please format all code using `black .`.
 Apart from what is formatted automatically,
 we try to follow the official Python style guide [PEP8](https://www.python.org/dev/peps/pep-0008/).
 
-We also check our code using the static-analysis tool [flake8](http://flake8.pycqa.org).
+
+## Tests and CI
+
+To run the test suite of BenchExec, use the following command:
+
+    python3 -m pytest
+
+We also check our code using the static-analysis tools
+[flake8](http://flake8.pycqa.org) and [ruff](https://github.com/astral-sh/ruff/).
 If you find a rule that should not be enforced in your opinion,
 please raise an issue.
 
+As main CI we use GitLab, which runs all tests and checks,
+but only on branches from our repository (not on PRs from forks).
+GitHub Actions and AppVeyor also run a subset of checks
+(mostly for the JavaScript part of BenchExec) on all PRs.
+
 
 ## Releasing a new Version
 

From 959f89d2a7856235a8f99d76ee7ed3676a601c8f Mon Sep 17 00:00:00 2001
From: Philipp Wendler <uni@philippwendler.de>
Date: Tue, 6 Aug 2024 07:44:55 +0200
Subject: [PATCH 52/52] Add pytest to our container images

This fastens up CI jobs by not having to install it on every job.
---
 test/Dockerfile.python-3.10 | 1 +
 test/Dockerfile.python-3.11 | 1 +
 test/Dockerfile.python-3.8  | 1 +
 test/Dockerfile.python-3.9  | 1 +
 4 files changed, 4 insertions(+)

diff --git a/test/Dockerfile.python-3.10 b/test/Dockerfile.python-3.10
index 74ecd6cf3..825aa9e5e 100644
--- a/test/Dockerfile.python-3.10
+++ b/test/Dockerfile.python-3.10
@@ -29,4 +29,5 @@ RUN pip install \
   "coverage[toml] >= 5.0" \
   lxml \
   pystemd \
+  pytest \
   pyyaml
diff --git a/test/Dockerfile.python-3.11 b/test/Dockerfile.python-3.11
index 06233c1da..825b67d09 100644
--- a/test/Dockerfile.python-3.11
+++ b/test/Dockerfile.python-3.11
@@ -29,4 +29,5 @@ RUN pip install \
   "coverage[toml] >= 5.0" \
   lxml \
   pystemd \
+  pytest \
   pyyaml
diff --git a/test/Dockerfile.python-3.8 b/test/Dockerfile.python-3.8
index b720018b2..276b79774 100644
--- a/test/Dockerfile.python-3.8
+++ b/test/Dockerfile.python-3.8
@@ -29,4 +29,5 @@ RUN pip install \
   "coverage[toml] >= 5.0" \
   lxml \
   pystemd \
+  pytest \
   pyyaml
diff --git a/test/Dockerfile.python-3.9 b/test/Dockerfile.python-3.9
index 57f93c492..ad464d9ff 100644
--- a/test/Dockerfile.python-3.9
+++ b/test/Dockerfile.python-3.9
@@ -29,4 +29,5 @@ RUN pip install \
   "coverage[toml] >= 5.0" \
   lxml \
   pystemd \
+  pytest \
   pyyaml