From ac9ab3142a02ef8841988947c30db44c1457b134 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Tue, 8 Oct 2024 13:42:04 -0400
Subject: [PATCH 01/22] DAOS-16100 test: Fix stopping daos_test during timeout

Properly dstop the daos_test process if the test encounters a timeout
while running.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: test_daos_management

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/daos_test/suite.yaml      |  2 +-
 src/tests/ftest/util/cmocka_utils.py      |  3 ++-
 src/tests/ftest/util/job_manager_utils.py | 12 ++++++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml
index 3f8572f2bcf..8ea3980b9c2 100644
--- a/src/tests/ftest/daos_test/suite.yaml
+++ b/src/tests/ftest/daos_test/suite.yaml
@@ -8,7 +8,7 @@ hosts:
 timeout: 600
 timeouts:
   test_daos_degraded_mode: 450
-  test_daos_management: 110
+  test_daos_management: 30
   test_daos_pool: 180
   test_daos_container: 700
   test_daos_epoch: 125
diff --git a/src/tests/ftest/util/cmocka_utils.py b/src/tests/ftest/util/cmocka_utils.py
index 69ffe767e35..4e26ae3228a 100644
--- a/src/tests/ftest/util/cmocka_utils.py
+++ b/src/tests/ftest/util/cmocka_utils.py
@@ -1,5 +1,5 @@
 """
-  (C) Copyright 2022-2023 Intel Corporation.
+  (C) Copyright 2022-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
@@ -117,6 +117,7 @@ def run_cmocka_test(self, test, command):
             test.fail(error_message)
 
         finally:
+            run_remote(test.log, self.hosts, "ps -ejH")
             self._collect_cmocka_results(test)
             if not self._check_cmocka_files():
                 if error_message is None:
diff --git a/src/tests/ftest/util/job_manager_utils.py b/src/tests/ftest/util/job_manager_utils.py
index 2b5f2cd6c26..a3e7d671e9d 100644
--- a/src/tests/ftest/util/job_manager_utils.py
+++ b/src/tests/ftest/util/job_manager_utils.py
@@ -143,6 +143,18 @@ def job(self, value):
                 and self._job.check_results_list):
             self.check_results_list.extend(self._job.check_results_list)
 
+    @property
+    def command_regex(self):
+        """Get the regular expression to use to search for the command.
+
+        Typical use would include combining with pgrep to verify a subprocess is running.
+
+        Returns:
+            str: regular expression to use to search for the command
+        """
+        # pylint: disable=protected-access
+        return "'({})'".format("|".join(self._exe_names + self.job._exe_names))
+
     def __str__(self):
         """Return the command with all of its defined parameters as a string.
 

From 71a414242434013c0e51377c08ea5d30c2fe692b Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Wed, 23 Oct 2024 12:19:44 -0400
Subject: [PATCH 02/22] Force a timeout for debug.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: test_daos_management

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/cmocka_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tests/ftest/util/cmocka_utils.py b/src/tests/ftest/util/cmocka_utils.py
index 4e26ae3228a..0c3cb9e9c22 100644
--- a/src/tests/ftest/util/cmocka_utils.py
+++ b/src/tests/ftest/util/cmocka_utils.py
@@ -108,6 +108,7 @@ def run_cmocka_test(self, test, command):
         error_message = None
         error_exception = None
         try:
+            command.timeout = 10
             command.run()
 
         except CommandFailure as error:

From cb91989410f478908f3dd4f039127746f13b7b41 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Thu, 24 Oct 2024 08:43:11 -0400
Subject: [PATCH 03/22] Force test timeout

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: test_daos_management

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/cmocka_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/tests/ftest/util/cmocka_utils.py b/src/tests/ftest/util/cmocka_utils.py
index 0c3cb9e9c22..a4183b7b95f 100644
--- a/src/tests/ftest/util/cmocka_utils.py
+++ b/src/tests/ftest/util/cmocka_utils.py
@@ -4,6 +4,7 @@
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
 import os
+import time
 
 from agent_utils import include_local_host
 from command_utils import ExecutableCommand
@@ -108,7 +109,7 @@ def run_cmocka_test(self, test, command):
         error_message = None
         error_exception = None
         try:
-            command.timeout = 10
+            time.sleep(30)      # Debug
             command.run()
 
         except CommandFailure as error:

From 6c05d568164f98cf965f6fc00982af8180eb463b Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Fri, 25 Oct 2024 17:10:05 -0400
Subject: [PATCH 04/22] Force timeout

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: test_daos_management

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/cmocka_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/ftest/util/cmocka_utils.py b/src/tests/ftest/util/cmocka_utils.py
index a4183b7b95f..4efa3c2177f 100644
--- a/src/tests/ftest/util/cmocka_utils.py
+++ b/src/tests/ftest/util/cmocka_utils.py
@@ -109,7 +109,7 @@ def run_cmocka_test(self, test, command):
         error_message = None
         error_exception = None
         try:
-            time.sleep(30)      # Debug
+            time.sleep(50)      # Debug
             command.run()
 
         except CommandFailure as error:

From bd8b1dd965791daac97458844d6d6091f5a668b2 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Mon, 28 Oct 2024 18:43:30 -0400
Subject: [PATCH 05/22] Updates.

When stopping cmocka commands only use the executable name to find a
pkill match.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/daos_test/dfs.py       | 13 ++---
 src/tests/ftest/daos_test/dfuse.py     | 14 +++--
 src/tests/ftest/daos_test/suite.yaml   |  2 +-
 src/tests/ftest/harness/basic.py       | 52 +++++++++++++-----
 src/tests/ftest/util/cmocka_utils.py   | 73 ++++++++++++++++----------
 src/tests/ftest/util/daos_core_base.py | 19 +++++--
 6 files changed, 111 insertions(+), 62 deletions(-)

diff --git a/src/tests/ftest/daos_test/dfs.py b/src/tests/ftest/daos_test/dfs.py
index 6b43757a8d7..721012d8026 100644
--- a/src/tests/ftest/daos_test/dfs.py
+++ b/src/tests/ftest/daos_test/dfs.py
@@ -1,11 +1,9 @@
 """
-  (C) Copyright 2020-2023 Intel Corporation.
+  (C) Copyright 2020-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
 
-import os
-
 from daos_core_base import DaosCoreBase
 
 
@@ -34,8 +32,7 @@ def test_daos_dfs_unit(self):
         :avocado: tags=daos_test,dfs_test,dfs
         :avocado: tags=DaosCoreTestDfs,test_daos_dfs_unit
         """
-        self.daos_test = os.path.join(self.bin, 'dfs_test')
-        self.run_subtest()
+        self.run_subtest('dfs_test')
 
     def test_daos_dfs_parallel(self):
         """Jira ID: DAOS-5409.
@@ -51,8 +48,7 @@ def test_daos_dfs_parallel(self):
         :avocado: tags=daos_test,dfs_test,dfs
         :avocado: tags=DaosCoreTestDfs,test_daos_dfs_parallel
         """
-        self.daos_test = os.path.join(self.bin, 'dfs_test')
-        self.run_subtest()
+        self.run_subtest('dfs_test')
 
     def test_daos_dfs_sys(self):
         """Jira ID: DAOS-7759.
@@ -68,5 +64,4 @@ def test_daos_dfs_sys(self):
         :avocado: tags=daos_test,dfs_test,dfs
         :avocado: tags=DaosCoreTestDfs,test_daos_dfs_sys
         """
-        self.daos_test = os.path.join(self.bin, 'dfs_test')
-        self.run_subtest()
+        self.run_subtest('dfs_test')
diff --git a/src/tests/ftest/daos_test/dfuse.py b/src/tests/ftest/daos_test/dfuse.py
index 322ff407fe3..20d0e160a3c 100644
--- a/src/tests/ftest/daos_test/dfuse.py
+++ b/src/tests/ftest/daos_test/dfuse.py
@@ -8,7 +8,7 @@
 from collections import OrderedDict
 
 from apricot import TestWithServers
-from cmocka_utils import CmockaUtils
+from cmocka_utils import CmockaUtils, get_cmocka_command
 from dfuse_utils import get_dfuse, start_dfuse
 from file_utils import create_directory
 from general_utils import get_log_file
@@ -31,8 +31,6 @@ def run_test(self, il_lib=None):
         if il_lib is None:
             self.fail('il_lib is not defined.')
 
-        daos_test = os.path.join(self.bin, 'dfuse_test')
-
         # Create a pool, container and start dfuse.
         pool = self.get_pool(connect=False)
         container = self.get_container(pool)
@@ -105,8 +103,7 @@ def run_test(self, il_lib=None):
                 daos_test_env['D_IL_MAX_EQ'] = '2'
                 daos_test_env['D_IL_NO_BYPASS'] = '1'
 
-        command = [
-            daos_test,
+        parameters = [
             '--test-dir',
             mount_dir,
             '--io',
@@ -117,7 +114,7 @@ def run_test(self, il_lib=None):
             '--cache'
         ]
         if use_dfuse:
-            command.append('--lowfd')
+            parameters.append('--lowfd')
         else:
             # make D_IL_MOUNT_POINT different from mount_dir so it tests a non-DAOS filesystem
             dummy_dir = '/tmp/dummy'
@@ -126,9 +123,10 @@ def run_test(self, il_lib=None):
                 self.fail(f"Error creating {dummy_dir} on {result.failed_hosts}")
             daos_test_env['D_IL_MOUNT_POINT'] = dummy_dir
         if cache_mode != 'writeback':
-            command.append('--metadata')
+            parameters.append('--metadata')
 
-        job = get_job_manager(self, "Clush", cmocka_utils.get_cmocka_command(" ".join(command)))
+        job = get_job_manager(
+            self, "Clush", get_cmocka_command(self.bin, 'dfuse_test', " ".join(parameters)))
         job.assign_hosts(cmocka_utils.hosts)
         job.assign_environment(daos_test_env)
 
diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml
index 8ea3980b9c2..3f8572f2bcf 100644
--- a/src/tests/ftest/daos_test/suite.yaml
+++ b/src/tests/ftest/daos_test/suite.yaml
@@ -8,7 +8,7 @@ hosts:
 timeout: 600
 timeouts:
   test_daos_degraded_mode: 450
-  test_daos_management: 30
+  test_daos_management: 110
   test_daos_pool: 180
   test_daos_container: 700
   test_daos_epoch: 125
diff --git a/src/tests/ftest/harness/basic.py b/src/tests/ftest/harness/basic.py
index 49759f4be09..cdc583119f2 100644
--- a/src/tests/ftest/harness/basic.py
+++ b/src/tests/ftest/harness/basic.py
@@ -6,7 +6,7 @@
 import os
 
 from apricot import TestWithoutServers
-from cmocka_utils import CmockaUtils
+from cmocka_utils import CmockaUtils, get_cmocka_command
 from command_utils import SubProcessCommand
 from exception_utils import CommandFailure
 from job_manager_utils import Mpirun, Orterun
@@ -141,30 +141,58 @@ def test_no_cmocka_xml(self):
         self.log.info("  This should generate a cmocka xml file with a 'Missing file' error")
         name = "no_cmocka_xml_file_test"
         cmocka_utils = CmockaUtils(None, name, self.outputdir, self.test_dir, self.log)
-        command = cmocka_utils.get_cmocka_command("hostname")
+        command = get_cmocka_command("", "hostname")
         cmocka_utils.run_cmocka_test(self, command)
+        self._verify_no_cmocka_xml(name)
+        self.log.info("Test passed")
+
+    def test_no_cmocka_xml_timeout(self):
+        """Test to verify CmockaUtils handles timed out process correctly.
+
+        If working correctly this test should fail due to a test timeout and a missing cmocka file.
+
+        :avocado: tags=all
+        :avocado: tags=vm
+        :avocado: tags=harness,harness_cmocka,failure_expected
+        :avocado: tags=HarnessBasicTest,test_no_cmocka_xml_timeout
+        """
+        self.log.info("=" * 80)
+        self.log.info("Running the 'sleep 30' command via CmockaUtils")
+        self.log.info("  This should generate a test timeout failure")
+        self.log.info("  This should generate a cmocka xml file with a 'Missing file' error")
+        name = "no_cmocka_xml_file_timeout_test"
+        cmocka_utils = CmockaUtils(None, name, self.outputdir, self.test_dir, self.log)
+        command = get_cmocka_command("", "sleep", "60")
+        try:
+            cmocka_utils.run_cmocka_test(self, command)
+        finally:
+            self._verify_no_cmocka_xml(name)
+        self.fail("Test did not timeout")
 
+    def _verify_no_cmocka_xml(self, name):
+        """Verify a cmocka xml file was generated with the expected error.
+
+        Args:
+            name (str): name of the cmocka test
+        """
         # Verify a generated cmocka xml file exists
-        expected = os.path.join(self.outputdir, "{}_cmocka_results.xml".format(name))
+        expected = os.path.join(self.outputdir, f"{name}_cmocka_results.xml")
         self.log.info("Verifying the existence of the generated cmocka file: %s", expected)
         if not os.path.isfile(expected):
-            self.fail("No {} file found".format(expected))
+            self.fail(f"No {expected} file found")
 
         # Verify the generated cmocka xml file contains the expected error
         self.log.info("Verifying contents of the generated cmocka file: %s", expected)
         with open(expected, "r", encoding="utf-8") as file_handle:
             actual_contents = file_handle.readlines()
-        error_message = "Missing cmocka results for hostname in {}".format(self.outputdir)
+        error_message = f"Missing cmocka results for hostname in {self.outputdir}"
         expected_lines = [
-            "<testsuite errors=\"1\" failures=\"0\" name=\"{}\" skipped=\"0\" tests=\"1\"".format(
-                name),
-            "<testcase classname=\"{}\" name=\"{}\"".format(name, self.name),
-            "<error message=\"{}\" type=\"Missing file\">".format(error_message)
+            f"<testsuite errors=\"1\" failures=\"0\" name=\"{name}\" skipped=\"0\" tests=\"1\"",
+            f"<testcase classname=\"{name}\" name=\"{self.name}\"",
+            f"<error message=\"{error_message}\" type=\"Missing file\">"
         ]
         for index, actual_line in enumerate(actual_contents[1:4]):
             self.log.debug("  expecting: %s", expected_lines[index])
             self.log.debug("  in actual: %s", actual_line[:-1].strip())
             if expected_lines[index] not in actual_line:
-                self.fail("Badly formed {} file".format(expected))
-
-        self.log.info("Test passed")
+                self.fail(f"Badly formed {expected} file")
diff --git a/src/tests/ftest/util/cmocka_utils.py b/src/tests/ftest/util/cmocka_utils.py
index 4efa3c2177f..8c3a14e99c3 100644
--- a/src/tests/ftest/util/cmocka_utils.py
+++ b/src/tests/ftest/util/cmocka_utils.py
@@ -4,16 +4,55 @@
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
 import os
-import time
 
 from agent_utils import include_local_host
 from command_utils import ExecutableCommand
-from command_utils_base import EnvironmentVariables
+from command_utils_base import BasicParameter, EnvironmentVariables
 from exception_utils import CommandFailure
 from results_utils import Job, Results, TestName, TestResult, create_xml
 from run_utils import get_clush_command, run_local, run_remote
 
 
+def get_cmocka_command(path, executable, parameters=None):
+    """Get an ExecutableCommand representing the provided command string.
+
+    Adds detection of any bad keywords in the command output that, if found, will result in a
+    command failure.
+
+    Args:
+        path (str): the command path to use to create the CmockaCommand
+        executable (str): the command name to use to create the CmockaCommand
+        parameters (str): parameters to use to create the CmockaCommand
+
+    Returns:
+        ExecutableCommand: the object setup to run the command
+
+    """
+    keywords = ["Process received signal", "stack smashing detected", "End of error message",
+                "buffer overflow detected"]
+    command = CmockaCommand(path, executable, keywords)
+    command.parameters.value = parameters
+    return command
+
+
+class CmockaCommand(ExecutableCommand):
+    """Defines a object representing a daos command."""
+
+    def __init__(self, path, executable, keywords):
+        """Create a CmockaCommand object.
+
+        Args:
+            path (str): the command path. This is excluded from the search when the process is
+                killed.
+            executable (str): the command executable. Also the string used to search for the process
+                when it is killed.
+            keywords (list): list of words used to mark the command as failed if any are found in
+                the command output. Defaults to None.
+        """
+        super().__init__(None, executable, path, check_results=keywords)
+        self.parameters = BasicParameter(None)
+
+
 class CmockaUtils():
     """Utilities for running test that generate cmocka xml results."""
 
@@ -77,25 +116,7 @@ def get_cmocka_env(self):
             "CMOCKA_MESSAGE_OUTPUT": "xml",
         })
 
-    @staticmethod
-    def get_cmocka_command(command):
-        """Get an ExecutableCommand representing the provided command string.
-
-        Adds detection of any bad keywords in the command output that, if found, will result in a
-        command failure.
-
-        Args:
-            command (str): the command string to use to create the ExecutableCommand
-
-        Returns:
-            ExecutableCommand: the object setup to run the command
-
-        """
-        keywords = ["Process received signal", "stack smashing detected", "End of error message",
-                    "buffer overflow detected"]
-        return ExecutableCommand(namespace=None, command=command, check_results=keywords)
-
-    def run_cmocka_test(self, test, command):
+    def run_cmocka_test(self, test, command='daos_test'):
         """Run the cmocka test command.
 
         After the command completes, copy any remote cmocka results that may exist back to this host
@@ -109,22 +130,20 @@ def run_cmocka_test(self, test, command):
         error_message = None
         error_exception = None
         try:
-            time.sleep(50)      # Debug
             command.run()
 
         except CommandFailure as error:
-            error_message = "Error detected running {}".format(job_command)
+            error_message = f"Error detected running {job_command}"
             error_exception = error
             test.log.exception(error_message)
             test.fail(error_message)
 
         finally:
-            run_remote(test.log, self.hosts, "ps -ejH")
+            run_remote(test.log, self.hosts, "ps -ejH")     # TODO: remove debug
             self._collect_cmocka_results(test)
             if not self._check_cmocka_files():
                 if error_message is None:
-                    error_message = "Missing cmocka results for {} in {}".format(
-                        job_command, self.cmocka_dir)
+                    error_message = f"Missing cmocka results for {job_command} in {self.cmocka_dir}"
                 self._generate_cmocka_files(test, error_message, error_exception)
 
     def _collect_cmocka_results(self, test):
@@ -190,7 +209,7 @@ def _generate_cmocka_files(self, test, error_message, error_exception):
         test_result.traceback = error_exception
         test_result.time_elapsed = 0
 
-        cmocka_xml = os.path.join(self.outputdir, "{}_cmocka_results.xml".format(self.test_name))
+        cmocka_xml = os.path.join(self.outputdir, f"{self.test_name}_cmocka_results.xml")
         job = Job(self.test_name, xml_output=cmocka_xml)
         result = Results(test.logfile)
         result.tests.append(test_result)
diff --git a/src/tests/ftest/util/daos_core_base.py b/src/tests/ftest/util/daos_core_base.py
index f5eee477822..a1a6f6e2554 100644
--- a/src/tests/ftest/util/daos_core_base.py
+++ b/src/tests/ftest/util/daos_core_base.py
@@ -8,7 +8,7 @@
 import shutil
 
 from apricot import TestWithServers
-from cmocka_utils import CmockaUtils
+from cmocka_utils import CmockaUtils, get_cmocka_command
 from general_utils import get_log_file
 from job_manager_utils import get_job_manager
 from test_utils_pool import POOL_TIMEOUT_INCREMENT
@@ -51,8 +51,17 @@ def get_test_param(self, name, default=None):
         path = "/".join(["/run/daos_tests", name, "*"])
         return self.params.get(self.get_test_name(), path, default)
 
-    def run_subtest(self):
-        """Run daos_test with a subtest argument."""
+    def run_subtest(self, executable='daos_test', path=None):
+        """Run the executable with a subtest argument.
+
+        Args:
+            executable (str, optional): name of the executable. Defaults to 'daos_test'.
+            path (str, optional): path for the executable. Defaults to self.bin.
+        """
+        if path is None:
+            # path=None yields the default self.bin path; path="" yields no path
+            path = self.bin
+
         subtest = self.get_test_param("daos_test")
         num_clients = self.get_test_param("num_clients")
         if num_clients is None:
@@ -81,8 +90,8 @@ def run_subtest(self):
         daos_test_env["COVFILE"] = "/tmp/test.cov"
         daos_test_env["POOL_SCM_SIZE"] = str(scm_size)
         daos_test_env["POOL_NVME_SIZE"] = str(nvme_size)
-        daos_test_cmd = cmocka_utils.get_cmocka_command(
-            " ".join([self.daos_test, "-n", dmg_config_file, "".join(["-", subtest]), str(args)]))
+        daos_test_cmd = get_cmocka_command(
+            path, executable, f"-n {dmg_config_file} -{subtest} {str(args)}")
         job = get_job_manager(self, "Orterun", daos_test_cmd, mpi_type="openmpi")
         job.assign_hosts(cmocka_utils.hosts, self.workdir, None)
         job.assign_processes(num_clients)

From 456f2561984e3b9800c84f27dceb8c3e71ccb3c6 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Wed, 30 Oct 2024 14:34:00 -0400
Subject: [PATCH 06/22] Updates.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/harness/basic.py             | 15 +++++++++------
 src/tests/ftest/util/apricot/apricot/test.py |  2 --
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/tests/ftest/harness/basic.py b/src/tests/ftest/harness/basic.py
index cdc583119f2..3c8a45f1549 100644
--- a/src/tests/ftest/harness/basic.py
+++ b/src/tests/ftest/harness/basic.py
@@ -9,7 +9,7 @@
 from cmocka_utils import CmockaUtils, get_cmocka_command
 from command_utils import SubProcessCommand
 from exception_utils import CommandFailure
-from job_manager_utils import Mpirun, Orterun
+from job_manager_utils import JobManager, Mpirun, Orterun
 
 
 class HarnessBasicTest(TestWithoutServers):
@@ -143,7 +143,7 @@ def test_no_cmocka_xml(self):
         cmocka_utils = CmockaUtils(None, name, self.outputdir, self.test_dir, self.log)
         command = get_cmocka_command("", "hostname")
         cmocka_utils.run_cmocka_test(self, command)
-        self._verify_no_cmocka_xml(name)
+        self._verify_no_cmocka_xml(name, str(command))
         self.log.info("Test passed")
 
     def test_no_cmocka_xml_timeout(self):
@@ -163,17 +163,20 @@ def test_no_cmocka_xml_timeout(self):
         name = "no_cmocka_xml_file_timeout_test"
         cmocka_utils = CmockaUtils(None, name, self.outputdir, self.test_dir, self.log)
         command = get_cmocka_command("", "sleep", "60")
+        job = JobManager("/run/job_manager/time/*", "time", command)
+        job.register_cleanup_method = self.register_cleanup
         try:
-            cmocka_utils.run_cmocka_test(self, command)
+            cmocka_utils.run_cmocka_test(self, job)
         finally:
-            self._verify_no_cmocka_xml(name)
+            self._verify_no_cmocka_xml(name, str(job))
         self.fail("Test did not timeout")
 
-    def _verify_no_cmocka_xml(self, name):
+    def _verify_no_cmocka_xml(self, name, command):
         """Verify a cmocka xml file was generated with the expected error.
 
         Args:
             name (str): name of the cmocka test
+            command (str): command for the cmocka test
         """
         # Verify a generated cmocka xml file exists
         expected = os.path.join(self.outputdir, f"{name}_cmocka_results.xml")
@@ -185,7 +188,7 @@ def _verify_no_cmocka_xml(self, name):
         self.log.info("Verifying contents of the generated cmocka file: %s", expected)
         with open(expected, "r", encoding="utf-8") as file_handle:
             actual_contents = file_handle.readlines()
-        error_message = f"Missing cmocka results for hostname in {self.outputdir}"
+        error_message = f"Missing cmocka results for {command} in {self.outputdir}"
         expected_lines = [
             f"<testsuite errors=\"1\" failures=\"0\" name=\"{name}\" skipped=\"0\" tests=\"1\"",
             f"<testcase classname=\"{name}\" name=\"{self.name}\"",
diff --git a/src/tests/ftest/util/apricot/apricot/test.py b/src/tests/ftest/util/apricot/apricot/test.py
index 4d58b3a7c25..f401b263ddd 100644
--- a/src/tests/ftest/util/apricot/apricot/test.py
+++ b/src/tests/ftest/util/apricot/apricot/test.py
@@ -501,7 +501,6 @@ def __init__(self, *args, **kwargs):
 
         self.client_mca = None
         self.bin = None
-        self.daos_test = None
         self.cart_prefix = None
         self.cart_bin = None
         self.tmp = None
@@ -519,7 +518,6 @@ def setUp(self):
         """Set up run before each test."""
         super().setUp()
         self.bin = os.path.join(self.prefix, 'bin')
-        self.daos_test = os.path.join(self.prefix, 'bin', 'daos_test')
 
         # set the shared directory for daos tests
         self.tmp = self.test_env.shared_dir

From c366c64f24fc9083a67a7f8ac00f5aafd398dca4 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Wed, 30 Oct 2024 14:42:09 -0400
Subject: [PATCH 07/22] Cleanup.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/cmocka_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tests/ftest/util/cmocka_utils.py b/src/tests/ftest/util/cmocka_utils.py
index 8c3a14e99c3..b0561eabeaa 100644
--- a/src/tests/ftest/util/cmocka_utils.py
+++ b/src/tests/ftest/util/cmocka_utils.py
@@ -36,7 +36,7 @@ def get_cmocka_command(path, executable, parameters=None):
 
 
 class CmockaCommand(ExecutableCommand):
-    """Defines a object representing a daos command."""
+    """Defines a object representing a cmocka test command."""
 
     def __init__(self, path, executable, keywords):
         """Create a CmockaCommand object.
@@ -116,7 +116,7 @@ def get_cmocka_env(self):
             "CMOCKA_MESSAGE_OUTPUT": "xml",
         })
 
-    def run_cmocka_test(self, test, command='daos_test'):
+    def run_cmocka_test(self, test, command):
         """Run the cmocka test command.
 
         After the command completes, copy any remote cmocka results that may exist back to this host

From e496214879b6dfd3190b8b85946768c1888318f0 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Wed, 30 Oct 2024 15:46:12 -0400
Subject: [PATCH 08/22] Ensure registered tearDown steps are run in
 TestWithoutServers

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management MultiEnginesPerSocketTest FaultDomain
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/apricot/apricot/test.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/tests/ftest/util/apricot/apricot/test.py b/src/tests/ftest/util/apricot/apricot/test.py
index f401b263ddd..614a0d99fbd 100644
--- a/src/tests/ftest/util/apricot/apricot/test.py
+++ b/src/tests/ftest/util/apricot/apricot/test.py
@@ -476,6 +476,9 @@ def tearDown(self):
         self.report_timeout()
         super().tearDown()
 
+        # Execute any tear down steps in the reverse order of which they were registered.
+        self._teardown_errors.extend(self._cleanup())
+
         # Clean up any temporary files
         self._teardown_errors.extend(self.remove_temp_test_dir())
 
@@ -892,6 +895,7 @@ def start_agents(self, agent_groups=None, force=False):
         self.setup_agents(agent_groups)
         if self.agent_managers:
             self.start_agent_managers(force)
+            self.register_cleanup(self.stop_agents)
 
     def start_servers(self, server_groups=None, force=False):
         """Start the daos_server processes.
@@ -915,6 +919,7 @@ def start_servers(self, server_groups=None, force=False):
         self.setup_servers(server_groups)
         if self.server_managers:
             force_agent_start = self.start_server_managers(force)
+            self.register_cleanup(self.stop_servers)
         return force_agent_start
 
     def restart_servers(self):
@@ -1402,16 +1407,6 @@ def tearDown(self):
         # Tear down any test-specific items
         self._teardown_errors = self.pre_tear_down()
 
-        # Destroy any job managers, containers, pools, and dfuse instances next
-        # Eventually this call will encompass all teardown steps
-        self._teardown_errors.extend(self._cleanup())
-
-        # Stop the agents
-        self._teardown_errors.extend(self.stop_agents())
-
-        # Stop the servers
-        self._teardown_errors.extend(self.stop_servers())
-
         super().tearDown()
 
     def pre_tear_down(self):

From 9cada0e3464408c58cadb19357a3f5918e989199 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Wed, 30 Oct 2024 15:49:25 -0400
Subject: [PATCH 09/22] Add missing modified file.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management MultiEnginesPerSocketTest FaultDomain
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/harness/basic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/ftest/harness/basic.py b/src/tests/ftest/harness/basic.py
index 3c8a45f1549..3e04e0f6007 100644
--- a/src/tests/ftest/harness/basic.py
+++ b/src/tests/ftest/harness/basic.py
@@ -168,7 +168,7 @@ def test_no_cmocka_xml_timeout(self):
         try:
             cmocka_utils.run_cmocka_test(self, job)
         finally:
-            self._verify_no_cmocka_xml(name, str(job))
+            self._verify_no_cmocka_xml(name, str(command))
         self.fail("Test did not timeout")
 
     def _verify_no_cmocka_xml(self, name, command):

From 0c3493f7d951e42bfb9b23473303b3eed986ccba Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Wed, 30 Oct 2024 17:57:19 -0400
Subject: [PATCH 10/22] Define hosts for cleanup.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management MultiEnginesPerSocketTest FaultDomain
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/harness/basic.py          |  2 +
 src/tests/ftest/util/job_manager_utils.py | 98 +++++++++++------------
 2 files changed, 51 insertions(+), 49 deletions(-)

diff --git a/src/tests/ftest/harness/basic.py b/src/tests/ftest/harness/basic.py
index 3e04e0f6007..3443d6a7364 100644
--- a/src/tests/ftest/harness/basic.py
+++ b/src/tests/ftest/harness/basic.py
@@ -9,6 +9,7 @@
 from cmocka_utils import CmockaUtils, get_cmocka_command
 from command_utils import SubProcessCommand
 from exception_utils import CommandFailure
+from host_utils import get_local_host
 from job_manager_utils import JobManager, Mpirun, Orterun
 
 
@@ -164,6 +165,7 @@ def test_no_cmocka_xml_timeout(self):
         cmocka_utils = CmockaUtils(None, name, self.outputdir, self.test_dir, self.log)
         command = get_cmocka_command("", "sleep", "60")
         job = JobManager("/run/job_manager/time/*", "time", command)
+        job.assign_hosts(get_local_host())
         job.register_cleanup_method = self.register_cleanup
         try:
             cmocka_utils.run_cmocka_test(self, job)
diff --git a/src/tests/ftest/util/job_manager_utils.py b/src/tests/ftest/util/job_manager_utils.py
index a3e7d671e9d..3b1bdfc2689 100644
--- a/src/tests/ftest/util/job_manager_utils.py
+++ b/src/tests/ftest/util/job_manager_utils.py
@@ -177,7 +177,7 @@ def check_subprocess_status(self, sub_process):
         """
         return self.job.check_subprocess_status(sub_process)
 
-    def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
+    def assign_hosts(self, *args, **kwargs):
         """Assign the hosts to use with the command.
 
         Set the appropriate command line parameter with the specified value.
@@ -191,6 +191,36 @@ def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
             hostfile (bool, optional): whether or not to also update any host related command
                 parameters to keep them in sync with the hosts. Defaults to True.
         """
+        self._set_hosts(*args, **kwargs)
+
+    def _set_hosts(self, hosts):
+        """Assign hosts.
+
+        Args:
+            hosts (NodeSet): hosts to specify on the command line
+        """
+        self._hosts = hosts.copy()
+
+    def _setup_hostfile(self, path=None, slots=None, hostfile=True):
+        """Setup the hostfile to use with the command.
+
+        Args:
+            path (str, optional): path to use when specifying the hosts through
+                a hostfile. Defaults to None.
+            slots (int, optional): number of slots per host to specify in the
+                optional hostfile. Defaults to None.
+            hostfile (bool, optional): whether or not to also update any host related command
+                parameters to keep them in sync with the hosts. Defaults to True.
+
+        Returns:
+            str: the full path of the written hostfile; None if one is not written
+        """
+        if not hostfile:
+            return None
+        kwargs = {"hosts": self._hosts, "slots": slots}
+        if path is not None:
+            kwargs["path"] = path
+        return write_host_file(**kwargs)
 
     def assign_processes(self, processes):
         """Assign the number of processes.
@@ -367,7 +397,7 @@ def __init__(self, job, subprocess=False, mpi_type="openmpi"):
         self.bind_to = FormattedParameter("--bind-to {}", None)
         self.mpi_type = mpi_type
 
-    def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
+    def assign_hosts(self, *args, **kwargs):
         """Assign the hosts to use with the command (--hostfile).
 
         Args:
@@ -378,13 +408,8 @@ def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
             hostfile (bool, optional): whether or not to also update any host related command
                 parameters to keep them in sync with the hosts. Defaults to True.
         """
-        self._hosts = hosts.copy()
-        if not hostfile:
-            return
-        kwargs = {"hosts": self._hosts, "slots": slots}
-        if path is not None:
-            kwargs["path"] = path
-        self.hostfile.value = write_host_file(**kwargs)
+        super().assign_hosts(*args, **kwargs)
+        self.hostfile.value = self._setup_hostfile(*args, **kwargs)
 
     def assign_processes(self, processes):
         """Assign the number of processes (-np).
@@ -486,7 +511,7 @@ def __init__(self, job, subprocess=False, mpi_type="openmpi"):
         self.args = BasicParameter(None, None)
         self.mpi_type = mpi_type
 
-    def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
+    def assign_hosts(self, *args, **kwargs):
         """Assign the hosts to use with the command (-f).
 
         Args:
@@ -497,13 +522,8 @@ def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
             hostfile (bool, optional): whether or not to also update any host related command
                 parameters to keep them in sync with the hosts. Defaults to True.
         """
-        self._hosts = hosts.copy()
-        if not hostfile:
-            return
-        kwargs = {"hosts": self._hosts, "slots": slots}
-        if path is not None:
-            kwargs["path"] = path
-        self.hostfile.value = write_host_file(**kwargs)
+        super().assign_hosts(*args, **kwargs)
+        self.hostfile.value = self._setup_hostfile(*args, **kwargs)
 
     def assign_processes(self, processes=None, ppn=None):
         """Assign the number of processes (-np) and processes per node (-ppn).
@@ -590,7 +610,7 @@ def __init__(self, job, path="", subprocess=False):
         self.partition = FormattedParameter("--partition={}", None)
         self.output = FormattedParameter("--output={}", None)
 
-    def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
+    def assign_hosts(self, *args, **kwargs):
         """Assign the hosts to use with the command (-f).
 
         Args:
@@ -601,13 +621,17 @@ def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
             hostfile (bool, optional): whether or not to also update any host related command
                 parameters to keep them in sync with the hosts. Defaults to True.
         """
-        self._hosts = hosts.copy()
-        if not hostfile:
-            return
-        kwargs = {"hosts": self._hosts, "slots": None}
-        if path is not None:
-            kwargs["path"] = path
-        self.nodefile.value = write_host_file(**kwargs)
+        super().assign_hosts(*args, **kwargs)
+        self.nodefile.value = self._setup_hostfile(*args, **kwargs)
+        self._set_ntasks_per_node(*args, **kwargs)
+
+    def _set_ntasks_per_node(self, slots=None):
+        """Assign the ntasks_per_node value.
+
+        Args:
+            slots (int, optional): number of slots per host to specify in the
+                hostfile. Defaults to None.
+        """
         self.ntasks_per_node.value = slots
 
     def assign_processes(self, processes):
@@ -768,19 +792,6 @@ def check_subprocess_status(self, sub_process):
             self.job.pattern, self.timestamps["start"], None,
             self.job.pattern_count, self.job.pattern_timeout.value)
 
-    def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
-        """Assign the hosts to use with the command.
-
-        Set the appropriate command line parameter with the specified value.
-
-        Args:
-            hosts (NodeSet): hosts to specify on the command line
-            path (str, optional): not used. Defaults to None.
-            slots (int, optional): not used. Defaults to None.
-            hostfile (bool, optional): not used. Defaults to True.
-        """
-        self._hosts = hosts.copy()
-
     def assign_environment(self, env_vars, append=False):
         """Assign or add environment variables to the command.
 
@@ -1225,17 +1236,6 @@ def __str__(self):
         commands = [super().__str__(), "-w {}".format(self.hosts), str(self.job)]
         return " ".join(commands)
 
-    def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
-        """Assign the hosts to use with the command (--hostfile).
-
-        Args:
-            hosts (NodeSet): hosts to specify in the hostfile
-            path (str, optional): not used. Defaults to None.
-            slots (int, optional): not used. Defaults to None.
-            hostfile (bool, optional): not used. Defaults to True.
-        """
-        self._hosts = hosts.copy()
-
     def assign_environment(self, env_vars, append=False):
         """Assign or add environment variables to the command.
 

From 34fc71b2dec728ce50b7a28d0131956176c5293c Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Wed, 30 Oct 2024 19:04:13 -0400
Subject: [PATCH 11/22] Updates.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management MultiEnginesPerSocketTest FaultDomain
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/cmocka_utils.py      |  2 +-
 src/tests/ftest/util/job_manager_utils.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/tests/ftest/util/cmocka_utils.py b/src/tests/ftest/util/cmocka_utils.py
index b0561eabeaa..63a92e64e04 100644
--- a/src/tests/ftest/util/cmocka_utils.py
+++ b/src/tests/ftest/util/cmocka_utils.py
@@ -139,7 +139,7 @@ def run_cmocka_test(self, test, command):
             test.fail(error_message)
 
         finally:
-            run_remote(test.log, self.hosts, "ps -ejH")     # TODO: remove debug
+            run_remote(test.log, self.hosts, "ps -jH")
             self._collect_cmocka_results(test)
             if not self._check_cmocka_files():
                 if error_message is None:
diff --git a/src/tests/ftest/util/job_manager_utils.py b/src/tests/ftest/util/job_manager_utils.py
index 3b1bdfc2689..8f15fdf4398 100644
--- a/src/tests/ftest/util/job_manager_utils.py
+++ b/src/tests/ftest/util/job_manager_utils.py
@@ -153,7 +153,7 @@ def command_regex(self):
             str: regular expression to use to search for the command
         """
         # pylint: disable=protected-access
-        return "'({})'".format("|".join(self._exe_names + self.job._exe_names))
+        return f"'({'|'.join(self._exe_names + self.job._exe_names)})'"
 
     def __str__(self):
         """Return the command with all of its defined parameters as a string.
@@ -338,19 +338,19 @@ def kill(self):
         """Forcibly terminate any job processes running on hosts."""
         if not self.job:
             return
-        regex = self.job.command_regex
-        detected, running = stop_processes(self.log, self._hosts, regex)
+        detected, running = stop_processes(self.log, self._hosts, self.command_regex)
         if not detected:
             self.log.info(
-                "No remote %s processes killed on %s (none found), done.", regex, self._hosts)
+                "No remote %s processes killed on %s (none found), done.",
+                self.command_regex, self._hosts)
         elif running:
             self.log.info(
                 "***Unable to kill remote %s process on %s! Please investigate/report.***",
-                regex, running)
+                self.command_regex, running)
         else:
             self.log.info(
                 "***At least one remote %s process needed to be killed on %s! Please investigate/"
-                "report.***", regex, detected)
+                "report.***", self.command_regex, detected)
 
 
 class Orterun(JobManager):

From 4b58ad74d91c195e7f15e2671cff2b58df8ab7d9 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Wed, 30 Oct 2024 19:21:53 -0400
Subject: [PATCH 12/22] Fix assign hosts.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management MultiEnginesPerSocketTest FaultDomain
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/job_manager_utils.py | 37 ++++++-----------------
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/src/tests/ftest/util/job_manager_utils.py b/src/tests/ftest/util/job_manager_utils.py
index 8f15fdf4398..d702432ffca 100644
--- a/src/tests/ftest/util/job_manager_utils.py
+++ b/src/tests/ftest/util/job_manager_utils.py
@@ -177,7 +177,7 @@ def check_subprocess_status(self, sub_process):
         """
         return self.job.check_subprocess_status(sub_process)
 
-    def assign_hosts(self, *args, **kwargs):
+    def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
         """Assign the hosts to use with the command.
 
         Set the appropriate command line parameter with the specified value.
@@ -191,14 +191,6 @@ def assign_hosts(self, *args, **kwargs):
             hostfile (bool, optional): whether or not to also update any host related command
                 parameters to keep them in sync with the hosts. Defaults to True.
         """
-        self._set_hosts(*args, **kwargs)
-
-    def _set_hosts(self, hosts):
-        """Assign hosts.
-
-        Args:
-            hosts (NodeSet): hosts to specify on the command line
-        """
         self._hosts = hosts.copy()
 
     def _setup_hostfile(self, path=None, slots=None, hostfile=True):
@@ -397,7 +389,7 @@ def __init__(self, job, subprocess=False, mpi_type="openmpi"):
         self.bind_to = FormattedParameter("--bind-to {}", None)
         self.mpi_type = mpi_type
 
-    def assign_hosts(self, *args, **kwargs):
+    def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
         """Assign the hosts to use with the command (--hostfile).
 
         Args:
@@ -408,8 +400,8 @@ def assign_hosts(self, *args, **kwargs):
             hostfile (bool, optional): whether or not to also update any host related command
                 parameters to keep them in sync with the hosts. Defaults to True.
         """
-        super().assign_hosts(*args, **kwargs)
-        self.hostfile.value = self._setup_hostfile(*args, **kwargs)
+        super().assign_hosts(hosts, path, slots, hostfile)
+        self.hostfile.value = self._setup_hostfile(path, slots, hostfile)
 
     def assign_processes(self, processes):
         """Assign the number of processes (-np).
@@ -511,7 +503,7 @@ def __init__(self, job, subprocess=False, mpi_type="openmpi"):
         self.args = BasicParameter(None, None)
         self.mpi_type = mpi_type
 
-    def assign_hosts(self, *args, **kwargs):
+    def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
         """Assign the hosts to use with the command (-f).
 
         Args:
@@ -522,8 +514,8 @@ def assign_hosts(self, *args, **kwargs):
             hostfile (bool, optional): whether or not to also update any host related command
                 parameters to keep them in sync with the hosts. Defaults to True.
         """
-        super().assign_hosts(*args, **kwargs)
-        self.hostfile.value = self._setup_hostfile(*args, **kwargs)
+        super().assign_hosts(hosts, path, slots, hostfile)
+        self.hostfile.value = self._setup_hostfile(path, slots, hostfile)
 
     def assign_processes(self, processes=None, ppn=None):
         """Assign the number of processes (-np) and processes per node (-ppn).
@@ -610,7 +602,7 @@ def __init__(self, job, path="", subprocess=False):
         self.partition = FormattedParameter("--partition={}", None)
         self.output = FormattedParameter("--output={}", None)
 
-    def assign_hosts(self, *args, **kwargs):
+    def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
         """Assign the hosts to use with the command (-f).
 
         Args:
@@ -621,17 +613,8 @@ def assign_hosts(self, *args, **kwargs):
             hostfile (bool, optional): whether or not to also update any host related command
                 parameters to keep them in sync with the hosts. Defaults to True.
         """
-        super().assign_hosts(*args, **kwargs)
-        self.nodefile.value = self._setup_hostfile(*args, **kwargs)
-        self._set_ntasks_per_node(*args, **kwargs)
-
-    def _set_ntasks_per_node(self, slots=None):
-        """Assign the ntasks_per_node value.
-
-        Args:
-            slots (int, optional): number of slots per host to specify in the
-                hostfile. Defaults to None.
-        """
+        super().assign_hosts(hosts, path, slots, hostfile)
+        self.nodefile.value = self._setup_hostfile(path, slots, hostfile)
         self.ntasks_per_node.value = slots
 
     def assign_processes(self, processes):

From 28c779af7ee02612cb8f57295e28014786e9c62d Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Thu, 31 Oct 2024 10:05:58 -0400
Subject: [PATCH 13/22] Kill manager after job.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management MultiEnginesPerSocketTest FaultDomain
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/cmocka_utils.py      |  4 ++-
 src/tests/ftest/util/job_manager_utils.py | 33 +++++++++++------------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/tests/ftest/util/cmocka_utils.py b/src/tests/ftest/util/cmocka_utils.py
index 63a92e64e04..cb47e33ef1d 100644
--- a/src/tests/ftest/util/cmocka_utils.py
+++ b/src/tests/ftest/util/cmocka_utils.py
@@ -139,7 +139,9 @@ def run_cmocka_test(self, test, command):
             test.fail(error_message)
 
         finally:
-            run_remote(test.log, self.hosts, "ps -jH")
+            if test.status is not None and test.status != 'PASS' and test.status != 'SKIP':
+                test.log.debug("Currently running processes for non-passing test:")
+                run_remote(test.log, self.hosts, "ps -jH")
             self._collect_cmocka_results(test)
             if not self._check_cmocka_files():
                 if error_message is None:
diff --git a/src/tests/ftest/util/job_manager_utils.py b/src/tests/ftest/util/job_manager_utils.py
index d702432ffca..272cba915b9 100644
--- a/src/tests/ftest/util/job_manager_utils.py
+++ b/src/tests/ftest/util/job_manager_utils.py
@@ -143,18 +143,6 @@ def job(self, value):
                 and self._job.check_results_list):
             self.check_results_list.extend(self._job.check_results_list)
 
-    @property
-    def command_regex(self):
-        """Get the regular expression to use to search for the command.
-
-        Typical use would include combining with pgrep to verify a subprocess is running.
-
-        Returns:
-            str: regular expression to use to search for the command
-        """
-        # pylint: disable=protected-access
-        return f"'({'|'.join(self._exe_names + self.job._exe_names)})'"
-
     def __str__(self):
         """Return the command with all of its defined parameters as a string.
 
@@ -330,19 +318,30 @@ def kill(self):
         """Forcibly terminate any job processes running on hosts."""
         if not self.job:
             return
-        detected, running = stop_processes(self.log, self._hosts, self.command_regex)
+        # Kill the job command
+        self._kill_process(self.job.command_regex)
+        time.sleep(5)
+        # Kill the manager command
+        self._kill_process(self.command_regex)
+
+    def _kill_process(self, pattern):
+        """Forcibly terminate the specified process.
+
+        Args:
+            pattern (str): regular expression used to find process names to stop
+        """
+        detected, running = stop_processes(self.log, self._hosts, pattern)
         if not detected:
             self.log.info(
-                "No remote %s processes killed on %s (none found), done.",
-                self.command_regex, self._hosts)
+                "No remote %s processes killed on %s (none found), done.", pattern, self._hosts)
         elif running:
             self.log.info(
                 "***Unable to kill remote %s process on %s! Please investigate/report.***",
-                self.command_regex, running)
+                pattern, running)
         else:
             self.log.info(
                 "***At least one remote %s process needed to be killed on %s! Please investigate/"
-                "report.***", self.command_regex, detected)
+                "report.***", pattern, detected)
 
 
 class Orterun(JobManager):

From 04594cbc17e7ff3a444c1834c47ca16bdb72bbb0 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Thu, 31 Oct 2024 16:59:40 -0400
Subject: [PATCH 14/22] Don't stop the systemctl command.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management MultiEnginesPerSocketTest FaultDomain
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/harness/basic.py          |  9 ++++-----
 src/tests/ftest/harness/basic.yaml        |  8 ++++++++
 src/tests/ftest/util/job_manager_utils.py | 17 +++++++++++++----
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/src/tests/ftest/harness/basic.py b/src/tests/ftest/harness/basic.py
index 3443d6a7364..41443de59ea 100644
--- a/src/tests/ftest/harness/basic.py
+++ b/src/tests/ftest/harness/basic.py
@@ -10,7 +10,7 @@
 from command_utils import SubProcessCommand
 from exception_utils import CommandFailure
 from host_utils import get_local_host
-from job_manager_utils import JobManager, Mpirun, Orterun
+from job_manager_utils import Mpirun, Orterun, get_job_manager
 
 
 class HarnessBasicTest(TestWithoutServers):
@@ -164,11 +164,10 @@ def test_no_cmocka_xml_timeout(self):
         name = "no_cmocka_xml_file_timeout_test"
         cmocka_utils = CmockaUtils(None, name, self.outputdir, self.test_dir, self.log)
         command = get_cmocka_command("", "sleep", "60")
-        job = JobManager("/run/job_manager/time/*", "time", command)
-        job.assign_hosts(get_local_host())
-        job.register_cleanup_method = self.register_cleanup
+        manager = get_job_manager(self, job=command)
+        manager.assign_hosts(get_local_host())
         try:
-            cmocka_utils.run_cmocka_test(self, job)
+            cmocka_utils.run_cmocka_test(self, manager)
         finally:
             self._verify_no_cmocka_xml(name, str(command))
         self.fail("Test did not timeout")
diff --git a/src/tests/ftest/harness/basic.yaml b/src/tests/ftest/harness/basic.yaml
index be8aad8fd10..050cb1b9961 100644
--- a/src/tests/ftest/harness/basic.yaml
+++ b/src/tests/ftest/harness/basic.yaml
@@ -1 +1,9 @@
 timeout: 10
+
+job_manager: !mux
+  manager_1:
+    class_name: Clush
+  manager_2:
+    class_name: Orterun
+  manager_3:
+    class_name: Mpirun
diff --git a/src/tests/ftest/util/job_manager_utils.py b/src/tests/ftest/util/job_manager_utils.py
index 272cba915b9..42f087ab630 100644
--- a/src/tests/ftest/util/job_manager_utils.py
+++ b/src/tests/ftest/util/job_manager_utils.py
@@ -179,6 +179,7 @@ def assign_hosts(self, hosts, path=None, slots=None, hostfile=True):
             hostfile (bool, optional): whether or not to also update any host related command
                 parameters to keep them in sync with the hosts. Defaults to True.
         """
+        # pylint: disable=unused-argument
         self._hosts = hosts.copy()
 
     def _setup_hostfile(self, path=None, slots=None, hostfile=True):
@@ -318,11 +319,7 @@ def kill(self):
         """Forcibly terminate any job processes running on hosts."""
         if not self.job:
             return
-        # Kill the job command
         self._kill_process(self.job.command_regex)
-        time.sleep(5)
-        # Kill the manager command
-        self._kill_process(self.command_regex)
 
     def _kill_process(self, pattern):
         """Forcibly terminate the specified process.
@@ -458,6 +455,12 @@ def run(self, raise_exception=None):
 
         return super().run(raise_exception)
 
+    def kill(self):
+        """Forcibly terminate any job processes running on hosts."""
+        super().kill()
+        time.sleep(1)
+        self._kill_process(self.command_regex)
+
 
 class Mpirun(JobManager):
     """A class for the mpirun job manager command."""
@@ -572,6 +575,12 @@ def run(self, raise_exception=None):
 
         return super().run(raise_exception)
 
+    def kill(self):
+        """Forcibly terminate any job processes running on hosts."""
+        super().kill()
+        time.sleep(1)
+        self._kill_process(self.command_regex)
+
 
 class Srun(JobManager):
     """A class for the srun job manager command."""

From cd45c235abd4da8f42402bf14076cc89a8f4ed51 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Fri, 1 Nov 2024 17:19:34 -0400
Subject: [PATCH 15/22] Fix basic tests.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management MultiEnginesPerSocketTest FaultDomain
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/harness/basic.py    |  84 ++---------------
 src/tests/ftest/harness/basic.yaml  |   8 --
 src/tests/ftest/harness/cmocka.py   | 135 ++++++++++++++++++++++++++++
 src/tests/ftest/harness/cmocka.yaml |   1 +
 4 files changed, 141 insertions(+), 87 deletions(-)
 create mode 100644 src/tests/ftest/harness/cmocka.py
 create mode 100644 src/tests/ftest/harness/cmocka.yaml

diff --git a/src/tests/ftest/harness/basic.py b/src/tests/ftest/harness/basic.py
index 41443de59ea..0ff102e2074 100644
--- a/src/tests/ftest/harness/basic.py
+++ b/src/tests/ftest/harness/basic.py
@@ -6,11 +6,9 @@
 import os
 
 from apricot import TestWithoutServers
-from cmocka_utils import CmockaUtils, get_cmocka_command
 from command_utils import SubProcessCommand
 from exception_utils import CommandFailure
-from host_utils import get_local_host
-from job_manager_utils import Mpirun, Orterun, get_job_manager
+from job_manager_utils import Mpirun, Orterun
 
 
 class HarnessBasicTest(TestWithoutServers):
@@ -80,12 +78,14 @@ def test_load_mpi(self):
         try:
             Orterun(None)
         except CommandFailure as error:
-            self.fail("Orterun initialization failed: {}".format(error))
+            self.log.error("Orterun initialization failed: %s", error)
+            self.fail("Orterun initialization failed")
 
         try:
             Mpirun(None, mpi_type="mpich")
         except CommandFailure as error:
-            self.fail("Mpirun initialization failed: {}".format(error))
+            self.log.error("Mpirun initialization failed: %s", error)
+            self.fail("Mpirun initialization failed")
 
     def test_load_mpi_hw(self):
         """Simple test of apricot test code to load the openmpi module.
@@ -126,77 +126,3 @@ def test_sub_process_command(self):
         if failed:
             self.fail("The '{}' command failed".format(command))
         self.log.info("Test passed")
-
-    def test_no_cmocka_xml(self):
-        """Test to verify CmockaUtils detects lack of cmocka file generation.
-
-        If working correctly this test should fail due to a missing cmocka file.
-
-        :avocado: tags=all
-        :avocado: tags=vm
-        :avocado: tags=harness,harness_cmocka,failure_expected
-        :avocado: tags=HarnessBasicTest,test_no_cmocka_xml
-        """
-        self.log.info("=" * 80)
-        self.log.info("Running the 'hostname' command via CmockaUtils")
-        self.log.info("  This should generate a cmocka xml file with a 'Missing file' error")
-        name = "no_cmocka_xml_file_test"
-        cmocka_utils = CmockaUtils(None, name, self.outputdir, self.test_dir, self.log)
-        command = get_cmocka_command("", "hostname")
-        cmocka_utils.run_cmocka_test(self, command)
-        self._verify_no_cmocka_xml(name, str(command))
-        self.log.info("Test passed")
-
-    def test_no_cmocka_xml_timeout(self):
-        """Test to verify CmockaUtils handles timed out process correctly.
-
-        If working correctly this test should fail due to a test timeout and a missing cmocka file.
-
-        :avocado: tags=all
-        :avocado: tags=vm
-        :avocado: tags=harness,harness_cmocka,failure_expected
-        :avocado: tags=HarnessBasicTest,test_no_cmocka_xml_timeout
-        """
-        self.log.info("=" * 80)
-        self.log.info("Running the 'sleep 30' command via CmockaUtils")
-        self.log.info("  This should generate a test timeout failure")
-        self.log.info("  This should generate a cmocka xml file with a 'Missing file' error")
-        name = "no_cmocka_xml_file_timeout_test"
-        cmocka_utils = CmockaUtils(None, name, self.outputdir, self.test_dir, self.log)
-        command = get_cmocka_command("", "sleep", "60")
-        manager = get_job_manager(self, job=command)
-        manager.assign_hosts(get_local_host())
-        try:
-            cmocka_utils.run_cmocka_test(self, manager)
-        finally:
-            self._verify_no_cmocka_xml(name, str(command))
-        self.fail("Test did not timeout")
-
-    def _verify_no_cmocka_xml(self, name, command):
-        """Verify a cmocka xml file was generated with the expected error.
-
-        Args:
-            name (str): name of the cmocka test
-            command (str): command for the cmocka test
-        """
-        # Verify a generated cmocka xml file exists
-        expected = os.path.join(self.outputdir, f"{name}_cmocka_results.xml")
-        self.log.info("Verifying the existence of the generated cmocka file: %s", expected)
-        if not os.path.isfile(expected):
-            self.fail(f"No {expected} file found")
-
-        # Verify the generated cmocka xml file contains the expected error
-        self.log.info("Verifying contents of the generated cmocka file: %s", expected)
-        with open(expected, "r", encoding="utf-8") as file_handle:
-            actual_contents = file_handle.readlines()
-        error_message = f"Missing cmocka results for {command} in {self.outputdir}"
-        expected_lines = [
-            f"<testsuite errors=\"1\" failures=\"0\" name=\"{name}\" skipped=\"0\" tests=\"1\"",
-            f"<testcase classname=\"{name}\" name=\"{self.name}\"",
-            f"<error message=\"{error_message}\" type=\"Missing file\">"
-        ]
-        for index, actual_line in enumerate(actual_contents[1:4]):
-            self.log.debug("  expecting: %s", expected_lines[index])
-            self.log.debug("  in actual: %s", actual_line[:-1].strip())
-            if expected_lines[index] not in actual_line:
-                self.fail(f"Badly formed {expected} file")
diff --git a/src/tests/ftest/harness/basic.yaml b/src/tests/ftest/harness/basic.yaml
index 050cb1b9961..be8aad8fd10 100644
--- a/src/tests/ftest/harness/basic.yaml
+++ b/src/tests/ftest/harness/basic.yaml
@@ -1,9 +1 @@
 timeout: 10
-
-job_manager: !mux
-  manager_1:
-    class_name: Clush
-  manager_2:
-    class_name: Orterun
-  manager_3:
-    class_name: Mpirun
diff --git a/src/tests/ftest/harness/cmocka.py b/src/tests/ftest/harness/cmocka.py
new file mode 100644
index 00000000000..50f51a9abaa
--- /dev/null
+++ b/src/tests/ftest/harness/cmocka.py
@@ -0,0 +1,135 @@
+"""
+  (C) Copyright 2022-2024 Intel Corporation.
+
+  SPDX-License-Identifier: BSD-2-Clause-Patent
+"""
+import os
+
+from apricot import TestWithoutServers
+from cmocka_utils import CmockaUtils, get_cmocka_command
+from host_utils import get_local_host
+from job_manager_utils import get_job_manager
+
+
+class HarnessCmockaTest(TestWithoutServers):
+    """Cmocka harness test cases.
+
+    :avocado: recursive
+    """
+
+    def test_no_cmocka_xml(self):
+        """Test to verify CmockaUtils detects lack of cmocka file generation.
+
+        If working correctly this test should fail due to a missing cmocka file.
+
+        :avocado: tags=all
+        :avocado: tags=vm
+        :avocado: tags=harness,harness_cmocka,failure_expected
+        :avocado: tags=HarnessCmockaTest,test_no_cmocka_xml
+        """
+        self._run_cmocka_test(get_cmocka_command("", "hostname"), False, True)
+        self.log.info("Test passed")
+
+    def test_clush_manager_timeout(self):
+        """Test to verify CmockaUtils handles timed out process correctly.
+
+        If working correctly this test should fail due to a test timeout and a missing cmocka file.
+
+        :avocado: tags=all
+        :avocado: tags=vm
+        :avocado: tags=harness,harness_cmocka,failure_expected
+        :avocado: tags=HarnessCmockaTest,test_clush_manager_timeout
+        """
+        self._run_cmocka_test(self._get_manager_command("Clush", "sleep", "60"), True, True)
+        self.fail("Test did not timeout")
+
+    def test_orterun_manager_timeout(self):
+        """Test to verify CmockaUtils handles timed out process correctly.
+
+        If working correctly this test should fail due to a test timeout and a missing cmocka file.
+
+        :avocado: tags=all
+        :avocado: tags=vm
+        :avocado: tags=harness,harness_cmocka,failure_expected
+        :avocado: tags=HarnessCmockaTest,test_orterun_manager_timeout
+        """
+        self._run_cmocka_test(self._get_manager_command("Orterun", "sleep", "60"), True, True)
+        self.fail("Test did not timeout")
+
+    def test_mpirun_manager_timeout(self):
+        """Test to verify CmockaUtils handles timed out process correctly.
+
+        If working correctly this test should fail due to a test timeout and a missing cmocka file.
+
+        :avocado: tags=all
+        :avocado: tags=vm
+        :avocado: tags=harness,harness_cmocka,failure_expected
+        :avocado: tags=HarnessCmockaTest,test_mpirun_manager_timeout
+        """
+        self._run_cmocka_test(self._get_manager_command("Mpirun", "sleep", "60"), True, True)
+        self.fail("Test did not timeout")
+
+    def _run_cmocka_test(self, command, timeout, missing):
+        """Run the cmocka test case.
+
+        Args:
+            command (ExecutableCommand): the command to run
+            timeout (bool): is the test expected to timeout
+            missing (bool): is the test expected to be missing a cmocka result
+        """
+        self.log.info("Running the '%s' command via CmockaUtils", str(command))
+        if timeout:
+            self.log.info("  This should generate a test timeout failure")
+        if missing:
+            self.log.info("  This should generate a cmocka xml file with a 'Missing file' error")
+
+        cmocka_utils = CmockaUtils(None, self.test_id, self.outputdir, self.test_dir, self.log)
+        try:
+            cmocka_utils.run_cmocka_test(self, command)
+        finally:
+            self._verify_no_cmocka_xml(self.test_id, str(command))
+
+    def _get_manager_command(self, class_name, executable, parameters):
+        """Get a JobManager command object.
+
+        Args:
+            class_name (str): JobManager class name
+            executable (str): executable to be managed
+            parameters (str): parameters for the executable to be managed
+
+        Returns:
+            JobManager: the requested JobManager class
+        """
+        command = get_cmocka_command("", executable, parameters)
+        manager = get_job_manager(self, class_name, command)
+        manager.assign_hosts(get_local_host())
+        return manager
+
+    def _verify_no_cmocka_xml(self, name, command):
+        """Verify a cmocka xml file was generated with the expected error.
+
+        Args:
+            name (str): name of the cmocka test
+            command (str): command for the cmocka test
+        """
+        # Verify a generated cmocka xml file exists
+        expected = os.path.join(self.outputdir, f"{name}_cmocka_results.xml")
+        self.log.info("Verifying the existence of the generated cmocka file: %s", expected)
+        if not os.path.isfile(expected):
+            self.fail(f"No {expected} file found")
+
+        # Verify the generated cmocka xml file contains the expected error
+        self.log.info("Verifying contents of the generated cmocka file: %s", expected)
+        with open(expected, "r", encoding="utf-8") as file_handle:
+            actual_contents = file_handle.readlines()
+        error_message = f"Missing cmocka results for {command} in {self.outputdir}"
+        expected_lines = [
+            f"<testsuite errors=\"1\" failures=\"0\" name=\"{name}\" skipped=\"0\" tests=\"1\"",
+            f"<testcase classname=\"{name}\" name=\"{self.name}\"",
+            f"<error message=\"{error_message}\" type=\"Missing file\">"
+        ]
+        for index, actual_line in enumerate(actual_contents[1:4]):
+            self.log.debug("  expecting: %s", expected_lines[index])
+            self.log.debug("  in actual: %s", actual_line[:-1].strip())
+            if expected_lines[index] not in actual_line:
+                self.fail(f"Badly formed {expected} file")
diff --git a/src/tests/ftest/harness/cmocka.yaml b/src/tests/ftest/harness/cmocka.yaml
new file mode 100644
index 00000000000..be8aad8fd10
--- /dev/null
+++ b/src/tests/ftest/harness/cmocka.yaml
@@ -0,0 +1 @@
+timeout: 10

From 12cd18af9e1f9b13e5ee24c3e31f17a6980983ae Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Fri, 1 Nov 2024 21:42:58 -0400
Subject: [PATCH 16/22] Don't kill orterun/mpirun commands.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: DaosCoreTestDfs DaosCoreTestDfuse harness_cmocka test_daos_management MultiEnginesPerSocketTest FaultDomain
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/harness/cmocka.py         |  9 ++++++---
 src/tests/ftest/util/job_manager_utils.py | 17 +++++------------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/src/tests/ftest/harness/cmocka.py b/src/tests/ftest/harness/cmocka.py
index 50f51a9abaa..53e43ff91dd 100644
--- a/src/tests/ftest/harness/cmocka.py
+++ b/src/tests/ftest/harness/cmocka.py
@@ -87,7 +87,7 @@ def _run_cmocka_test(self, command, timeout, missing):
         try:
             cmocka_utils.run_cmocka_test(self, command)
         finally:
-            self._verify_no_cmocka_xml(self.test_id, str(command))
+            self._verify_no_cmocka_xml(self.test_id, command)
 
     def _get_manager_command(self, class_name, executable, parameters):
         """Get a JobManager command object.
@@ -110,7 +110,7 @@ def _verify_no_cmocka_xml(self, name, command):
 
         Args:
             name (str): name of the cmocka test
-            command (str): command for the cmocka test
+            command (ExecutableCommand): command for the cmocka test
         """
         # Verify a generated cmocka xml file exists
         expected = os.path.join(self.outputdir, f"{name}_cmocka_results.xml")
@@ -122,7 +122,10 @@ def _verify_no_cmocka_xml(self, name, command):
         self.log.info("Verifying contents of the generated cmocka file: %s", expected)
         with open(expected, "r", encoding="utf-8") as file_handle:
             actual_contents = file_handle.readlines()
-        error_message = f"Missing cmocka results for {command} in {self.outputdir}"
+        if hasattr(command, "job"):
+            error_message = f"Missing cmocka results for {str(command.job)} in {self.outputdir}"
+        else:
+            error_message = f"Missing cmocka results for {str(command)} in {self.outputdir}"
         expected_lines = [
             f"<testsuite errors=\"1\" failures=\"0\" name=\"{name}\" skipped=\"0\" tests=\"1\"",
             f"<testcase classname=\"{name}\" name=\"{self.name}\"",
diff --git a/src/tests/ftest/util/job_manager_utils.py b/src/tests/ftest/util/job_manager_utils.py
index 42f087ab630..3da91fa8ca2 100644
--- a/src/tests/ftest/util/job_manager_utils.py
+++ b/src/tests/ftest/util/job_manager_utils.py
@@ -455,12 +455,6 @@ def run(self, raise_exception=None):
 
         return super().run(raise_exception)
 
-    def kill(self):
-        """Forcibly terminate any job processes running on hosts."""
-        super().kill()
-        time.sleep(1)
-        self._kill_process(self.command_regex)
-
 
 class Mpirun(JobManager):
     """A class for the mpirun job manager command."""
@@ -575,12 +569,6 @@ def run(self, raise_exception=None):
 
         return super().run(raise_exception)
 
-    def kill(self):
-        """Forcibly terminate any job processes running on hosts."""
-        super().kill()
-        time.sleep(1)
-        self._kill_process(self.command_regex)
-
 
 class Srun(JobManager):
     """A class for the srun job manager command."""
@@ -1256,6 +1244,11 @@ def run(self, raise_exception=None):
         if raise_exception is None:
             raise_exception = self.exit_status_exception
 
+        if callable(self.register_cleanup_method):
+            # Stop any running processes started by this job manager when the test completes
+            # pylint: disable=not-callable
+            self.register_cleanup_method(stop_job_manager, job_manager=self)
+
         command = " ".join([self.env.to_export_str(), str(self.job)]).strip()
         self.result = run_remote(self.log, self._hosts, command, self.verbose, self.timeout)
 

From f76b169056e1b3b468249ffb741453c6e185cd71 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Sat, 2 Nov 2024 01:24:17 -0400
Subject: [PATCH 17/22] Cleanup.

Updated description:

Fix stopping timed out processes run by a JobManager class by only
searching for and killing the command executable being run by clush,
orterun, mpirun, etc. Add a new harness/cmocka.py test to verify the
stopping of the processes with a test timeout.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: pr daos_test dfuse_test test_load_mpi HarnessCmockaTest
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/daos_test/dfs.py       |  8 +++++---
 src/tests/ftest/daos_test/dfuse.py     |  4 ++--
 src/tests/ftest/harness/cmocka.py      | 12 ++++++------
 src/tests/ftest/util/cmocka_utils.py   |  7 +++----
 src/tests/ftest/util/daos_core_base.py | 13 +++++--------
 5 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/src/tests/ftest/daos_test/dfs.py b/src/tests/ftest/daos_test/dfs.py
index 721012d8026..90b5c0799d6 100644
--- a/src/tests/ftest/daos_test/dfs.py
+++ b/src/tests/ftest/daos_test/dfs.py
@@ -4,6 +4,8 @@
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
 
+import os
+
 from daos_core_base import DaosCoreBase
 
 
@@ -32,7 +34,7 @@ def test_daos_dfs_unit(self):
         :avocado: tags=daos_test,dfs_test,dfs
         :avocado: tags=DaosCoreTestDfs,test_daos_dfs_unit
         """
-        self.run_subtest('dfs_test')
+        self.run_subtest(os.path.join(self.bin, "dfs_test"))
 
     def test_daos_dfs_parallel(self):
         """Jira ID: DAOS-5409.
@@ -48,7 +50,7 @@ def test_daos_dfs_parallel(self):
         :avocado: tags=daos_test,dfs_test,dfs
         :avocado: tags=DaosCoreTestDfs,test_daos_dfs_parallel
         """
-        self.run_subtest('dfs_test')
+        self.run_subtest(os.path.join(self.bin, "dfs_test"))
 
     def test_daos_dfs_sys(self):
         """Jira ID: DAOS-7759.
@@ -64,4 +66,4 @@ def test_daos_dfs_sys(self):
         :avocado: tags=daos_test,dfs_test,dfs
         :avocado: tags=DaosCoreTestDfs,test_daos_dfs_sys
         """
-        self.run_subtest('dfs_test')
+        self.run_subtest(os.path.join(self.bin, "dfs_test"))
diff --git a/src/tests/ftest/daos_test/dfuse.py b/src/tests/ftest/daos_test/dfuse.py
index 20d0e160a3c..86b3e383142 100644
--- a/src/tests/ftest/daos_test/dfuse.py
+++ b/src/tests/ftest/daos_test/dfuse.py
@@ -103,6 +103,7 @@ def run_test(self, il_lib=None):
                 daos_test_env['D_IL_MAX_EQ'] = '2'
                 daos_test_env['D_IL_NO_BYPASS'] = '1'
 
+        command = os.path.join(self.bin, 'dfuse_test')
         parameters = [
             '--test-dir',
             mount_dir,
@@ -125,8 +126,7 @@ def run_test(self, il_lib=None):
         if cache_mode != 'writeback':
             parameters.append('--metadata')
 
-        job = get_job_manager(
-            self, "Clush", get_cmocka_command(self.bin, 'dfuse_test', " ".join(parameters)))
+        job = get_job_manager(self, "Clush", get_cmocka_command(command, ' '.join(parameters)))
         job.assign_hosts(cmocka_utils.hosts)
         job.assign_environment(daos_test_env)
 
diff --git a/src/tests/ftest/harness/cmocka.py b/src/tests/ftest/harness/cmocka.py
index 53e43ff91dd..adc482132f2 100644
--- a/src/tests/ftest/harness/cmocka.py
+++ b/src/tests/ftest/harness/cmocka.py
@@ -24,10 +24,10 @@ def test_no_cmocka_xml(self):
 
         :avocado: tags=all
         :avocado: tags=vm
-        :avocado: tags=harness,harness_cmocka,failure_expected
+        :avocado: tags=harness,failure_expected
         :avocado: tags=HarnessCmockaTest,test_no_cmocka_xml
         """
-        self._run_cmocka_test(get_cmocka_command("", "hostname"), False, True)
+        self._run_cmocka_test(get_cmocka_command("hostname"), False, True)
         self.log.info("Test passed")
 
     def test_clush_manager_timeout(self):
@@ -37,7 +37,7 @@ def test_clush_manager_timeout(self):
 
         :avocado: tags=all
         :avocado: tags=vm
-        :avocado: tags=harness,harness_cmocka,failure_expected
+        :avocado: tags=harness,failure_expected
         :avocado: tags=HarnessCmockaTest,test_clush_manager_timeout
         """
         self._run_cmocka_test(self._get_manager_command("Clush", "sleep", "60"), True, True)
@@ -50,7 +50,7 @@ def test_orterun_manager_timeout(self):
 
         :avocado: tags=all
         :avocado: tags=vm
-        :avocado: tags=harness,harness_cmocka,failure_expected
+        :avocado: tags=harness,failure_expected
         :avocado: tags=HarnessCmockaTest,test_orterun_manager_timeout
         """
         self._run_cmocka_test(self._get_manager_command("Orterun", "sleep", "60"), True, True)
@@ -63,7 +63,7 @@ def test_mpirun_manager_timeout(self):
 
         :avocado: tags=all
         :avocado: tags=vm
-        :avocado: tags=harness,harness_cmocka,failure_expected
+        :avocado: tags=harness,failure_expected
         :avocado: tags=HarnessCmockaTest,test_mpirun_manager_timeout
         """
         self._run_cmocka_test(self._get_manager_command("Mpirun", "sleep", "60"), True, True)
@@ -100,7 +100,7 @@ def _get_manager_command(self, class_name, executable, parameters):
         Returns:
             JobManager: the requested JobManager class
         """
-        command = get_cmocka_command("", executable, parameters)
+        command = get_cmocka_command(executable, parameters)
         manager = get_job_manager(self, class_name, command)
         manager.assign_hosts(get_local_host())
         return manager
diff --git a/src/tests/ftest/util/cmocka_utils.py b/src/tests/ftest/util/cmocka_utils.py
index cb47e33ef1d..c21cb366213 100644
--- a/src/tests/ftest/util/cmocka_utils.py
+++ b/src/tests/ftest/util/cmocka_utils.py
@@ -13,23 +13,22 @@
 from run_utils import get_clush_command, run_local, run_remote
 
 
-def get_cmocka_command(path, executable, parameters=None):
+def get_cmocka_command(command, parameters=None):
     """Get an ExecutableCommand representing the provided command string.
 
     Adds detection of any bad keywords in the command output that, if found, will result in a
     command failure.
 
     Args:
-        path (str): the command path to use to create the CmockaCommand
-        executable (str): the command name to use to create the CmockaCommand
+        command (str): the command path to use to create the CmockaCommand
         parameters (str): parameters to use to create the CmockaCommand
 
     Returns:
         ExecutableCommand: the object setup to run the command
-
     """
     keywords = ["Process received signal", "stack smashing detected", "End of error message",
                 "buffer overflow detected"]
+    path, executable = os.path.split(command)
     command = CmockaCommand(path, executable, keywords)
     command.parameters.value = parameters
     return command
diff --git a/src/tests/ftest/util/daos_core_base.py b/src/tests/ftest/util/daos_core_base.py
index a1a6f6e2554..1b0ad42fecc 100644
--- a/src/tests/ftest/util/daos_core_base.py
+++ b/src/tests/ftest/util/daos_core_base.py
@@ -51,16 +51,14 @@ def get_test_param(self, name, default=None):
         path = "/".join(["/run/daos_tests", name, "*"])
         return self.params.get(self.get_test_name(), path, default)
 
-    def run_subtest(self, executable='daos_test', path=None):
+    def run_subtest(self, command=None):
         """Run the executable with a subtest argument.
 
         Args:
-            executable (str, optional): name of the executable. Defaults to 'daos_test'.
-            path (str, optional): path for the executable. Defaults to self.bin.
+            command (str, optional): command to run. Defaults to None which will yield daos_test.
         """
-        if path is None:
-            # path=None yields the default self.bin path; path="" yields no path
-            path = self.bin
+        if command is None:
+            command = os.path.join(self.bin, "daos_test")
 
         subtest = self.get_test_param("daos_test")
         num_clients = self.get_test_param("num_clients")
@@ -90,8 +88,7 @@ def run_subtest(self, executable='daos_test', path=None):
         daos_test_env["COVFILE"] = "/tmp/test.cov"
         daos_test_env["POOL_SCM_SIZE"] = str(scm_size)
         daos_test_env["POOL_NVME_SIZE"] = str(nvme_size)
-        daos_test_cmd = get_cmocka_command(
-            path, executable, f"-n {dmg_config_file} -{subtest} {str(args)}")
+        daos_test_cmd = get_cmocka_command(command, f"-n {dmg_config_file} -{subtest} {str(args)}")
         job = get_job_manager(self, "Orterun", daos_test_cmd, mpi_type="openmpi")
         job.assign_hosts(cmocka_utils.hosts, self.workdir, None)
         job.assign_processes(num_clients)

From 6d17d3dfec65753cdadbfb89c7a25c60a5a974fb Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Wed, 6 Nov 2024 18:13:17 -0500
Subject: [PATCH 18/22] Add check for stopping pid 1.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: pr daos_test dfuse_test test_load_mpi HarnessCmockaTest
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/cmocka_utils.py      |  1 +
 src/tests/ftest/util/command_utils.py     | 10 +++++++---
 src/tests/ftest/util/job_manager_utils.py | 17 +++++------------
 src/tests/ftest/util/run_utils.py         |  8 ++++++++
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/tests/ftest/util/cmocka_utils.py b/src/tests/ftest/util/cmocka_utils.py
index c21cb366213..6a9f0441938 100644
--- a/src/tests/ftest/util/cmocka_utils.py
+++ b/src/tests/ftest/util/cmocka_utils.py
@@ -49,6 +49,7 @@ def __init__(self, path, executable, keywords):
                 the command output. Defaults to None.
         """
         super().__init__(None, executable, path, check_results=keywords)
+        self.full_command_regex = True
         self.parameters = BasicParameter(None)
 
 
diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py
index 56ceb4c7fe3..2c9c335b1c9 100644
--- a/src/tests/ftest/util/command_utils.py
+++ b/src/tests/ftest/util/command_utils.py
@@ -72,6 +72,9 @@ def __init__(self, namespace, command, path="", subprocess=False, check_results=
         # used to check on the progress or terminate the command.
         self._exe_names = [self.command]
 
+        # If set use the full command string when returning the 'command_regex' property
+        self.full_command_regex = False
+
         # Define an attribute to store the CmdResult from the last run() call.
         # A CmdResult object has the following properties:
         #   command         - command string
@@ -132,10 +135,11 @@ def command_regex(self):
         Typical use would include combining with pgrep to verify a subprocess is running.
 
         Returns:
-            str: regular expression to use to search for the command
-
+            str: regular expression to use to search for the command, typically with pgrep or pkill
         """
-        return "'({})'".format("|".join(self._exe_names))
+        if self.full_command_regex:
+            return f"--full '{str(self)}'"
+        return f"'({'|'.join(self._exe_names)})'"
 
     @property
     def with_bind(self):
diff --git a/src/tests/ftest/util/job_manager_utils.py b/src/tests/ftest/util/job_manager_utils.py
index 3da91fa8ca2..8ca10a8c47e 100644
--- a/src/tests/ftest/util/job_manager_utils.py
+++ b/src/tests/ftest/util/job_manager_utils.py
@@ -319,26 +319,19 @@ def kill(self):
         """Forcibly terminate any job processes running on hosts."""
         if not self.job:
             return
-        self._kill_process(self.job.command_regex)
-
-    def _kill_process(self, pattern):
-        """Forcibly terminate the specified process.
-
-        Args:
-            pattern (str): regular expression used to find process names to stop
-        """
-        detected, running = stop_processes(self.log, self._hosts, pattern)
+        regex = self.job.command_regex
+        detected, running = stop_processes(self.log, self._hosts, regex)
         if not detected:
             self.log.info(
-                "No remote %s processes killed on %s (none found), done.", pattern, self._hosts)
+                "No remote %s processes killed on %s (none found), done.", regex, self._hosts)
         elif running:
             self.log.info(
                 "***Unable to kill remote %s process on %s! Please investigate/report.***",
-                pattern, running)
+                regex, running)
         else:
             self.log.info(
                 "***At least one remote %s process needed to be killed on %s! Please investigate/"
-                "report.***", pattern, detected)
+                "report.***", regex, detected)
 
 
 class Orterun(JobManager):
diff --git a/src/tests/ftest/util/run_utils.py b/src/tests/ftest/util/run_utils.py
index f4893558fb0..6e946ad3ddc 100644
--- a/src/tests/ftest/util/run_utils.py
+++ b/src/tests/ftest/util/run_utils.py
@@ -4,6 +4,7 @@
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
 import os
+import re
 import subprocess  # nosec
 import time
 from getpass import getuser
@@ -543,6 +544,9 @@ def stop_processes(log, hosts, pattern, verbose=True, timeout=60, exclude=None,
             processes. Defaults to False which will attempt to kill w/o a signal, then with the ABRT
             signal, and finally with the KILL signal.
 
+    Raises:
+        ValueError: if the pattern ends up matching process 1.
+
     Returns:
         tuple: (NodeSet, NodeSet) where the first NodeSet indicates on which hosts processes
             matching the pattern were initially detected and the second NodeSet indicates on which
@@ -564,6 +568,10 @@ def stop_processes(log, hosts, pattern, verbose=True, timeout=60, exclude=None,
         log.debug("No processes found on %s that match %s", result.failed_hosts, pattern_match)
         return processes_detected, processes_running
 
+    # Catch any attempt to kill process 1.
+    if "1" in re.findall(r"^(\d+)\s+", result.joined_stdout, re.MULTILINE):
+        raise ValueError(f"Attempting to kill process 1 as a match for {pattern}!")
+
     # Indicate on which hosts processes matching the pattern were found running in the return status
     processes_detected.add(result.passed_hosts)
 

From a6ff5d274c87c361d99605b5a55e9a4c253f5d7a Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Fri, 8 Nov 2024 16:02:06 -0500
Subject: [PATCH 19/22] Only match exact full commands if requested in
 stop_processes.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: pr daos_test dfuse_test test_load_mpi HarnessCmockaTest
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/command_utils.py     |  7 +++----
 src/tests/ftest/util/job_manager_utils.py |  5 ++++-
 src/tests/ftest/util/run_utils.py         | 17 ++++++++++++-----
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py
index 2c9c335b1c9..dbed7ac3c44 100644
--- a/src/tests/ftest/util/command_utils.py
+++ b/src/tests/ftest/util/command_utils.py
@@ -135,11 +135,10 @@ def command_regex(self):
         Typical use would include combining with pgrep to verify a subprocess is running.
 
         Returns:
-            str: regular expression to use to search for the command, typically with pgrep or pkill
+            str: regular expression to use to search for the command
+
         """
-        if self.full_command_regex:
-            return f"--full '{str(self)}'"
-        return f"'({'|'.join(self._exe_names)})'"
+        return "'({})'".format("|".join(self._exe_names))
 
     @property
     def with_bind(self):
diff --git a/src/tests/ftest/util/job_manager_utils.py b/src/tests/ftest/util/job_manager_utils.py
index 8ca10a8c47e..932a9f7a306 100644
--- a/src/tests/ftest/util/job_manager_utils.py
+++ b/src/tests/ftest/util/job_manager_utils.py
@@ -320,7 +320,10 @@ def kill(self):
         if not self.job:
             return
         regex = self.job.command_regex
-        detected, running = stop_processes(self.log, self._hosts, regex)
+        if self.job.full_command_regex:
+            regex = f"'{str(self.job)}'"
+        detected, running = stop_processes(
+            self.log, self._hosts, regex, full_command=self.job.full_command_regex)
         if not detected:
             self.log.info(
                 "No remote %s processes killed on %s (none found), done.", regex, self._hosts)
diff --git a/src/tests/ftest/util/run_utils.py b/src/tests/ftest/util/run_utils.py
index 6e946ad3ddc..8e96e2228f0 100644
--- a/src/tests/ftest/util/run_utils.py
+++ b/src/tests/ftest/util/run_utils.py
@@ -530,7 +530,8 @@ def find_command(source, pattern, depth, other=None):
     return " ".join(command)
 
 
-def stop_processes(log, hosts, pattern, verbose=True, timeout=60, exclude=None, force=False):
+def stop_processes(log, hosts, pattern, verbose=True, timeout=60, exclude=None, force=False,
+                   full_command=False):
     """Stop the processes on each hosts that match the pattern.
 
     Args:
@@ -543,6 +544,8 @@ def stop_processes(log, hosts, pattern, verbose=True, timeout=60, exclude=None,
         force (bool, optional): if set use the KILL signal to immediately stop any running
             processes. Defaults to False which will attempt to kill w/o a signal, then with the ABRT
             signal, and finally with the KILL signal.
+        full_command (bool, optional): if set match the pattern using the full command with
+            pgrep/pkill. Defaults to False.
 
     Raises:
         ValueError: if the pattern ends up matching process 1.
@@ -555,15 +558,17 @@ def stop_processes(log, hosts, pattern, verbose=True, timeout=60, exclude=None,
     """
     processes_detected = NodeSet()
     processes_running = NodeSet()
-    command = f"/usr/bin/pgrep --list-full {pattern}"
+    search_command = f"/usr/bin/pgrep --list-full {pattern}"
     pattern_match = str(pattern)
     if exclude:
-        command = f"/usr/bin/ps xa | grep -E {pattern} | grep -vE {exclude}"
+        search_command = f"/usr/bin/ps xa | grep -E {pattern} | grep -vE {exclude}"
         pattern_match += " and doesn't match " + str(exclude)
+    elif full_command:
+        search_command = f"/usr/bin/pgrep --list-full --full -x {pattern}"
 
     # Search for any active processes
     log.debug("Searching for any processes on %s that match %s", hosts, pattern_match)
-    result = run_remote(log, hosts, command, verbose, timeout)
+    result = run_remote(log, hosts, search_command, verbose, timeout)
     if not result.passed_hosts:
         log.debug("No processes found on %s that match %s", result.failed_hosts, pattern_match)
         return processes_detected, processes_running
@@ -588,9 +593,11 @@ def stop_processes(log, hosts, pattern, verbose=True, timeout=60, exclude=None,
             "Killing%s any processes on %s that match %s and then waiting %s seconds",
             step[0], result.passed_hosts, pattern_match, step[1])
         kill_command = f"sudo /usr/bin/pkill{step[0]} {pattern}"
+        if full_command:
+            kill_command = f"sudo /usr/bin/pkill{step[0]} --full -x {pattern}"
         run_remote(log, result.passed_hosts, kill_command, verbose, timeout)
         time.sleep(step[1])
-        result = run_remote(log, result.passed_hosts, command, verbose, timeout)
+        result = run_remote(log, result.passed_hosts, search_command, verbose, timeout)
         if not result.passed_hosts:
             # Indicate all running processes matching the pattern were stopped in the return status
             log.debug(

From 2eaace2f1e33a7a616396223e794a8f30909730d Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Thu, 21 Nov 2024 14:55:35 -0500
Subject: [PATCH 20/22] Remove register clanup calls for agent/server stop.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: pr daos_test dfuse_test test_load_mpi HarnessCmockaTest
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/harness/cmocka.py            | 14 ++++++++++++--
 src/tests/ftest/util/apricot/apricot/test.py | 15 ++++++++++-----
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/tests/ftest/harness/cmocka.py b/src/tests/ftest/harness/cmocka.py
index adc482132f2..b4b3d8f0620 100644
--- a/src/tests/ftest/harness/cmocka.py
+++ b/src/tests/ftest/harness/cmocka.py
@@ -5,18 +5,28 @@
 """
 import os
 
-from apricot import TestWithoutServers
+from apricot import TestWithServers
 from cmocka_utils import CmockaUtils, get_cmocka_command
 from host_utils import get_local_host
 from job_manager_utils import get_job_manager
 
 
-class HarnessCmockaTest(TestWithoutServers):
+class HarnessCmockaTest(TestWithServers):
     """Cmocka harness test cases.
 
+    Inherit TestWithServers so that tearDown() will call self._cleanup().
+
     :avocado: recursive
     """
 
+    def __init__(self, *args, **kwargs):
+        """Initialize a TestWithServers object."""
+        super().__init__(*args, **kwargs)
+
+        # Disable starting agents and servers
+        self.setup_start_agents = False
+        self.setup_start_servers = False
+
     def test_no_cmocka_xml(self):
         """Test to verify CmockaUtils detects lack of cmocka file generation.
 
diff --git a/src/tests/ftest/util/apricot/apricot/test.py b/src/tests/ftest/util/apricot/apricot/test.py
index 614a0d99fbd..f401b263ddd 100644
--- a/src/tests/ftest/util/apricot/apricot/test.py
+++ b/src/tests/ftest/util/apricot/apricot/test.py
@@ -476,9 +476,6 @@ def tearDown(self):
         self.report_timeout()
         super().tearDown()
 
-        # Execute any tear down steps in the reverse order of which they were registered.
-        self._teardown_errors.extend(self._cleanup())
-
         # Clean up any temporary files
         self._teardown_errors.extend(self.remove_temp_test_dir())
 
@@ -895,7 +892,6 @@ def start_agents(self, agent_groups=None, force=False):
         self.setup_agents(agent_groups)
         if self.agent_managers:
             self.start_agent_managers(force)
-            self.register_cleanup(self.stop_agents)
 
     def start_servers(self, server_groups=None, force=False):
         """Start the daos_server processes.
@@ -919,7 +915,6 @@ def start_servers(self, server_groups=None, force=False):
         self.setup_servers(server_groups)
         if self.server_managers:
             force_agent_start = self.start_server_managers(force)
-            self.register_cleanup(self.stop_servers)
         return force_agent_start
 
     def restart_servers(self):
@@ -1407,6 +1402,16 @@ def tearDown(self):
         # Tear down any test-specific items
         self._teardown_errors = self.pre_tear_down()
 
+        # Destroy any job managers, containers, pools, and dfuse instances next
+        # Eventually this call will encompass all teardown steps
+        self._teardown_errors.extend(self._cleanup())
+
+        # Stop the agents
+        self._teardown_errors.extend(self.stop_agents())
+
+        # Stop the servers
+        self._teardown_errors.extend(self.stop_servers())
+
         super().tearDown()
 
     def pre_tear_down(self):

From 056b931b4883a638fec3b990526148b3aecd0fae Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Fri, 22 Nov 2024 09:41:00 -0500
Subject: [PATCH 21/22] Restore register cleanup for server/agent stop.

Remove stopping agents when stopping servers as DAOS-6873 is resolved.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: pr daos_test dfuse_test test_load_mpi HarnessCmockaTest ConfigGenerateRun
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 .../ftest/control/config_generate_run.py      | 17 +++++---------
 src/tests/ftest/harness/cmocka.py             | 14 ++----------
 src/tests/ftest/util/apricot/apricot/test.py  | 22 +++++--------------
 3 files changed, 14 insertions(+), 39 deletions(-)

diff --git a/src/tests/ftest/control/config_generate_run.py b/src/tests/ftest/control/config_generate_run.py
index efaa992df31..0062cb3b769 100644
--- a/src/tests/ftest/control/config_generate_run.py
+++ b/src/tests/ftest/control/config_generate_run.py
@@ -52,6 +52,7 @@ def test_config_generate_run(self):
             control_metadata = os.path.join(self.test_env.log_dir, 'control_metadata')
 
         # Call dmg config generate. AP is always the first server host.
+        self.log_step("Generating server configuration")
         server_host = self.hostlist_servers[0]
         result = self.get_dmg_command().config_generate(
             access_points=server_host, num_engines=num_engines, scm_only=scm_only,
@@ -66,25 +67,19 @@ def test_config_generate_run(self):
         # Stop and restart daos_server. self.start_server_managers() has the
         # server start-up check built into it, so if there's something wrong,
         # it'll throw an error.
-        self.log.info("Stopping servers")
+        self.log_step("Stopping servers")
         self.stop_servers()
 
         # Create a new server config from generated_yaml and update SCM-related
         # data in engine_params so that the cleanup before the server start
         # works.
-        self.log.info("Copy config to %s and update engine_params", self.test_env.server_config)
+        self.log_step(f"Copy config to {self.test_env.server_config} and update engine_params")
         self.server_managers[0].update_config_file_from_file(generated_yaml)
 
         # Start server with the generated config.
-        self.log.info("Restarting server with the generated config")
+        self.log_step("Restarting server with the generated config")
         try:
-            agent_force = self.start_server_managers(force=True)
+            self.start_server_managers(force=True)
         except ServerFailed as error:
             self.fail(f"Restarting server failed! {error}")
-
-        # We don't need agent for this test. However, when we stop the server,
-        # agent is also stopped. Then the harness checks that the agent is
-        # running during the teardown. If agent isn't running at that point, it
-        # would cause an error, so start it here.
-        self.log.info("Restarting agents")
-        self.start_agent_managers(force=agent_force)
+        self.log.info("Test passed")
diff --git a/src/tests/ftest/harness/cmocka.py b/src/tests/ftest/harness/cmocka.py
index b4b3d8f0620..adc482132f2 100644
--- a/src/tests/ftest/harness/cmocka.py
+++ b/src/tests/ftest/harness/cmocka.py
@@ -5,28 +5,18 @@
 """
 import os
 
-from apricot import TestWithServers
+from apricot import TestWithoutServers
 from cmocka_utils import CmockaUtils, get_cmocka_command
 from host_utils import get_local_host
 from job_manager_utils import get_job_manager
 
 
-class HarnessCmockaTest(TestWithServers):
+class HarnessCmockaTest(TestWithoutServers):
     """Cmocka harness test cases.
 
-    Inherit TestWithServers so that tearDown() will call self._cleanup().
-
     :avocado: recursive
     """
 
-    def __init__(self, *args, **kwargs):
-        """Initialize a TestWithServers object."""
-        super().__init__(*args, **kwargs)
-
-        # Disable starting agents and servers
-        self.setup_start_agents = False
-        self.setup_start_servers = False
-
     def test_no_cmocka_xml(self):
         """Test to verify CmockaUtils detects lack of cmocka file generation.
 
diff --git a/src/tests/ftest/util/apricot/apricot/test.py b/src/tests/ftest/util/apricot/apricot/test.py
index f401b263ddd..bb4dba4f1e0 100644
--- a/src/tests/ftest/util/apricot/apricot/test.py
+++ b/src/tests/ftest/util/apricot/apricot/test.py
@@ -476,6 +476,9 @@ def tearDown(self):
         self.report_timeout()
         super().tearDown()
 
+        # Execute any tear down steps in the reverse order of which they were registered.
+        self._teardown_errors.extend(self._cleanup())
+
         # Clean up any temporary files
         self._teardown_errors.extend(self.remove_temp_test_dir())
 
@@ -892,6 +895,7 @@ def start_agents(self, agent_groups=None, force=False):
         self.setup_agents(agent_groups)
         if self.agent_managers:
             self.start_agent_managers(force)
+            self.register_cleanup(self.stop_agents)
 
     def start_servers(self, server_groups=None, force=False):
         """Start the daos_server processes.
@@ -915,6 +919,7 @@ def start_servers(self, server_groups=None, force=False):
         self.setup_servers(server_groups)
         if self.server_managers:
             force_agent_start = self.start_server_managers(force)
+            self.register_cleanup(self.stop_servers)
         return force_agent_start
 
     def restart_servers(self):
@@ -1394,7 +1399,7 @@ def tearDown(self):
         # class (see DAOS-1452/DAOS-9941 and Avocado issue #5217 with
         # associated PR-5224)
         if self.status is not None and self.status != 'PASS' and self.status != 'SKIP':
-            self.__dump_engines_stacks("Test status is {}".format(self.status))
+            self.__dump_engines_stacks(f"Test status is {self.status}")
 
         # Report whether or not the timeout has expired
         self.report_timeout()
@@ -1402,16 +1407,6 @@ def tearDown(self):
         # Tear down any test-specific items
         self._teardown_errors = self.pre_tear_down()
 
-        # Destroy any job managers, containers, pools, and dfuse instances next
-        # Eventually this call will encompass all teardown steps
-        self._teardown_errors.extend(self._cleanup())
-
-        # Stop the agents
-        self._teardown_errors.extend(self.stop_agents())
-
-        # Stop the servers
-        self._teardown_errors.extend(self.stop_servers())
-
         super().tearDown()
 
     def pre_tear_down(self):
@@ -1611,11 +1606,6 @@ def stop_servers(self):
                 "Stopping %s group(s) of servers", len(self.server_managers))
             errors.extend(self._stop_managers(self.server_managers, "servers"))
 
-            # Stopping agents whenever servers are stopped for DAOS-6873
-            self.log.info(
-                "Workaround for DAOS-6873: Stopping %s group(s) of agents",
-                len(self.agent_managers))
-            errors.extend(self._stop_managers(self.agent_managers, "agents"))
         return errors
 
     def _stop_managers(self, managers, name):

From 34711e410f18c92b636b7217913078a1f3e404ce Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Mon, 25 Nov 2024 18:18:49 -0500
Subject: [PATCH 22/22] Fix merge.

Skip-unit-tests: true
Skip-fault-injection-test: true
Test-tag: pr daos_test dfuse_test test_load_mpi HarnessCmockaTest
Allow-unstable-test: true

Required-githooks: true

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/control/config_generate_run.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tests/ftest/control/config_generate_run.py b/src/tests/ftest/control/config_generate_run.py
index 0062cb3b769..f73ae0f5c7e 100644
--- a/src/tests/ftest/control/config_generate_run.py
+++ b/src/tests/ftest/control/config_generate_run.py
@@ -82,4 +82,5 @@ def test_config_generate_run(self):
             self.start_server_managers(force=True)
         except ServerFailed as error:
             self.fail(f"Restarting server failed! {error}")
+
         self.log.info("Test passed")