Skip to content

Commit

Permalink
DAOS-16100 test: Fix stopping daos_test during timeout (#15275) (#15603)
Browse files Browse the repository at this point in the history
Fix stopping timed out processes run by a JobManager class by only
searching for and killing the command executable being run by clush,
orterun, mpirun, etc. Add a new harness/cmocka.py test to verify the
stopping of the processes with a test timeout.

Signed-off-by: Phil Henderson <[email protected]>
  • Loading branch information
phender authored Dec 12, 2024
1 parent d238ceb commit 7f1eab0
Show file tree
Hide file tree
Showing 11 changed files with 273 additions and 143 deletions.
11 changes: 4 additions & 7 deletions src/tests/ftest/daos_test/dfs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2020-2023 Intel Corporation.
(C) Copyright 2020-2024 Intel Corporation.
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -34,8 +34,7 @@ def test_daos_dfs_unit(self):
:avocado: tags=daos_test,dfs_test,dfs
:avocado: tags=DaosCoreTestDfs,test_daos_dfs_unit
"""
self.daos_test = os.path.join(self.bin, 'dfs_test')
self.run_subtest()
self.run_subtest(os.path.join(self.bin, "dfs_test"))

def test_daos_dfs_parallel(self):
"""Jira ID: DAOS-5409.
Expand All @@ -51,8 +50,7 @@ def test_daos_dfs_parallel(self):
:avocado: tags=daos_test,dfs_test,dfs
:avocado: tags=DaosCoreTestDfs,test_daos_dfs_parallel
"""
self.daos_test = os.path.join(self.bin, 'dfs_test')
self.run_subtest()
self.run_subtest(os.path.join(self.bin, "dfs_test"))

def test_daos_dfs_sys(self):
"""Jira ID: DAOS-7759.
Expand All @@ -68,5 +66,4 @@ def test_daos_dfs_sys(self):
:avocado: tags=daos_test,dfs_test,dfs
:avocado: tags=DaosCoreTestDfs,test_daos_dfs_sys
"""
self.daos_test = os.path.join(self.bin, 'dfs_test')
self.run_subtest()
self.run_subtest(os.path.join(self.bin, "dfs_test"))
14 changes: 6 additions & 8 deletions src/tests/ftest/daos_test/dfuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from collections import OrderedDict

from apricot import TestWithServers
from cmocka_utils import CmockaUtils
from cmocka_utils import CmockaUtils, get_cmocka_command
from dfuse_utils import get_dfuse, start_dfuse
from file_utils import create_directory
from general_utils import get_log_file
Expand All @@ -31,8 +31,6 @@ def run_test(self, il_lib=None):
if il_lib is None:
self.fail('il_lib is not defined.')

daos_test = os.path.join(self.bin, 'dfuse_test')

# Create a pool, container and start dfuse.
pool = self.get_pool(connect=False)
container = self.get_container(pool)
Expand Down Expand Up @@ -105,8 +103,8 @@ def run_test(self, il_lib=None):
daos_test_env['D_IL_MAX_EQ'] = '2'
daos_test_env['D_IL_ENFORCE_EXEC_ENV'] = '1'

command = [
daos_test,
command = os.path.join(self.bin, 'dfuse_test')
parameters = [
'--test-dir',
mount_dir,
'--io',
Expand All @@ -117,7 +115,7 @@ def run_test(self, il_lib=None):
'--cache'
]
if use_dfuse:
command.append('--lowfd')
parameters.append('--lowfd')
else:
# make D_IL_MOUNT_POINT different from mount_dir so it tests a non-DAOS filesystem
dummy_dir = '/tmp/dummy'
Expand All @@ -126,9 +124,9 @@ def run_test(self, il_lib=None):
self.fail(f"Error creating {dummy_dir} on {result.failed_hosts}")
daos_test_env['D_IL_MOUNT_POINT'] = dummy_dir
if cache_mode != 'writeback':
command.append('--metadata')
parameters.append('--metadata')

job = get_job_manager(self, "Clush", cmocka_utils.get_cmocka_command(" ".join(command)))
job = get_job_manager(self, "Clush", get_cmocka_command(command, ' '.join(parameters)))
job.assign_hosts(cmocka_utils.hosts)
job.assign_environment(daos_test_env)

Expand Down
50 changes: 4 additions & 46 deletions src/tests/ftest/harness/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import os

from apricot import TestWithoutServers
from cmocka_utils import CmockaUtils
from command_utils import SubProcessCommand
from exception_utils import CommandFailure
from job_manager_utils import Mpirun, Orterun
Expand Down Expand Up @@ -79,12 +78,14 @@ def test_load_mpi(self):
try:
Orterun(None)
except CommandFailure as error:
self.fail("Orterun initialization failed: {}".format(error))
self.log.error("Orterun initialization failed: %s", error)
self.fail("Orterun initialization failed")

try:
Mpirun(None, mpi_type="mpich")
except CommandFailure as error:
self.fail("Mpirun initialization failed: {}".format(error))
self.log.error("Mpirun initialization failed: %s", error)
self.fail("Mpirun initialization failed")

def test_load_mpi_hw(self):
"""Simple test of apricot test code to load the openmpi module.
Expand Down Expand Up @@ -125,46 +126,3 @@ def test_sub_process_command(self):
if failed:
self.fail("The '{}' command failed".format(command))
self.log.info("Test passed")

def test_no_cmocka_xml(self):
"""Test to verify CmockaUtils detects lack of cmocka file generation.
If working correctly this test should fail due to a missing cmocka file.
:avocado: tags=all
:avocado: tags=vm
:avocado: tags=harness,harness_cmocka,failure_expected
:avocado: tags=HarnessBasicTest,test_no_cmocka_xml
"""
self.log.info("=" * 80)
self.log.info("Running the 'hostname' command via CmockaUtils")
self.log.info(" This should generate a cmocka xml file with a 'Missing file' error")
name = "no_cmocka_xml_file_test"
cmocka_utils = CmockaUtils(None, name, self.outputdir, self.test_dir, self.log)
command = cmocka_utils.get_cmocka_command("hostname")
cmocka_utils.run_cmocka_test(self, command)

# Verify a generated cmocka xml file exists
expected = os.path.join(self.outputdir, "{}_cmocka_results.xml".format(name))
self.log.info("Verifying the existence of the generated cmocka file: %s", expected)
if not os.path.isfile(expected):
self.fail("No {} file found".format(expected))

# Verify the generated cmocka xml file contains the expected error
self.log.info("Verifying contents of the generated cmocka file: %s", expected)
with open(expected, "r", encoding="utf-8") as file_handle:
actual_contents = file_handle.readlines()
error_message = "Missing cmocka results for hostname in {}".format(self.outputdir)
expected_lines = [
"<testsuite errors=\"1\" failures=\"0\" name=\"{}\" skipped=\"0\" tests=\"1\"".format(
name),
"<testcase classname=\"{}\" name=\"{}\"".format(name, self.name),
"<error message=\"{}\" type=\"Missing file\">".format(error_message)
]
for index, actual_line in enumerate(actual_contents[1:4]):
self.log.debug(" expecting: %s", expected_lines[index])
self.log.debug(" in actual: %s", actual_line[:-1].strip())
if expected_lines[index] not in actual_line:
self.fail("Badly formed {} file".format(expected))

self.log.info("Test passed")
138 changes: 138 additions & 0 deletions src/tests/ftest/harness/cmocka.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
(C) Copyright 2022-2024 Intel Corporation.
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
import os

from apricot import TestWithoutServers
from cmocka_utils import CmockaUtils, get_cmocka_command
from host_utils import get_local_host
from job_manager_utils import get_job_manager


class HarnessCmockaTest(TestWithoutServers):
"""Cmocka harness test cases.
:avocado: recursive
"""

def test_no_cmocka_xml(self):
"""Test to verify CmockaUtils detects lack of cmocka file generation.
If working correctly this test should fail due to a missing cmocka file.
:avocado: tags=all
:avocado: tags=vm
:avocado: tags=harness,failure_expected
:avocado: tags=HarnessCmockaTest,test_no_cmocka_xml
"""
self._run_cmocka_test(get_cmocka_command("hostname"), False, True)
self.log.info("Test passed")

def test_clush_manager_timeout(self):
"""Test to verify CmockaUtils handles timed out process correctly.
If working correctly this test should fail due to a test timeout and a missing cmocka file.
:avocado: tags=all
:avocado: tags=vm
:avocado: tags=harness,failure_expected
:avocado: tags=HarnessCmockaTest,test_clush_manager_timeout
"""
self._run_cmocka_test(self._get_manager_command("Clush", "sleep", "60"), True, True)
self.fail("Test did not timeout")

def test_orterun_manager_timeout(self):
"""Test to verify CmockaUtils handles timed out process correctly.
If working correctly this test should fail due to a test timeout and a missing cmocka file.
:avocado: tags=all
:avocado: tags=vm
:avocado: tags=harness,failure_expected
:avocado: tags=HarnessCmockaTest,test_orterun_manager_timeout
"""
self._run_cmocka_test(self._get_manager_command("Orterun", "sleep", "60"), True, True)
self.fail("Test did not timeout")

def test_mpirun_manager_timeout(self):
"""Test to verify CmockaUtils handles timed out process correctly.
If working correctly this test should fail due to a test timeout and a missing cmocka file.
:avocado: tags=all
:avocado: tags=vm
:avocado: tags=harness,failure_expected
:avocado: tags=HarnessCmockaTest,test_mpirun_manager_timeout
"""
self._run_cmocka_test(self._get_manager_command("Mpirun", "sleep", "60"), True, True)
self.fail("Test did not timeout")

def _run_cmocka_test(self, command, timeout, missing):
"""Run the cmocka test case.
Args:
command (ExecutableCommand): the command to run
timeout (bool): is the test expected to timeout
missing (bool): is the test expected to be missing a cmocka result
"""
self.log.info("Running the '%s' command via CmockaUtils", str(command))
if timeout:
self.log.info(" This should generate a test timeout failure")
if missing:
self.log.info(" This should generate a cmocka xml file with a 'Missing file' error")

cmocka_utils = CmockaUtils(None, self.test_id, self.outputdir, self.test_dir, self.log)
try:
cmocka_utils.run_cmocka_test(self, command)
finally:
self._verify_no_cmocka_xml(self.test_id, command)

def _get_manager_command(self, class_name, executable, parameters):
"""Get a JobManager command object.
Args:
class_name (str): JobManager class name
executable (str): executable to be managed
parameters (str): parameters for the executable to be managed
Returns:
JobManager: the requested JobManager class
"""
command = get_cmocka_command(executable, parameters)
manager = get_job_manager(self, class_name, command)
manager.assign_hosts(get_local_host())
return manager

def _verify_no_cmocka_xml(self, name, command):
"""Verify a cmocka xml file was generated with the expected error.
Args:
name (str): name of the cmocka test
command (ExecutableCommand): command for the cmocka test
"""
# Verify a generated cmocka xml file exists
expected = os.path.join(self.outputdir, f"{name}_cmocka_results.xml")
self.log.info("Verifying the existence of the generated cmocka file: %s", expected)
if not os.path.isfile(expected):
self.fail(f"No {expected} file found")

# Verify the generated cmocka xml file contains the expected error
self.log.info("Verifying contents of the generated cmocka file: %s", expected)
with open(expected, "r", encoding="utf-8") as file_handle:
actual_contents = file_handle.readlines()
if hasattr(command, "job"):
error_message = f"Missing cmocka results for {str(command.job)} in {self.outputdir}"
else:
error_message = f"Missing cmocka results for {str(command)} in {self.outputdir}"
expected_lines = [
f"<testsuite errors=\"1\" failures=\"0\" name=\"{name}\" skipped=\"0\" tests=\"1\"",
f"<testcase classname=\"{name}\" name=\"{self.name}\"",
f"<error message=\"{error_message}\" type=\"Missing file\">"
]
for index, actual_line in enumerate(actual_contents[1:4]):
self.log.debug(" expecting: %s", expected_lines[index])
self.log.debug(" in actual: %s", actual_line[:-1].strip())
if expected_lines[index] not in actual_line:
self.fail(f"Badly formed {expected} file")
1 change: 1 addition & 0 deletions src/tests/ftest/harness/cmocka.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
timeout: 10
2 changes: 0 additions & 2 deletions src/tests/ftest/util/apricot/apricot/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,6 @@ def __init__(self, *args, **kwargs):

self.client_mca = None
self.bin = None
self.daos_test = None
self.cart_prefix = None
self.cart_bin = None
self.tmp = None
Expand All @@ -522,7 +521,6 @@ def setUp(self):
"""Set up run before each test."""
super().setUp()
self.bin = os.path.join(self.prefix, 'bin')
self.daos_test = os.path.join(self.prefix, 'bin', 'daos_test')

# set the shared directory for daos tests
self.tmp = self.test_env.shared_dir
Expand Down
Loading

0 comments on commit 7f1eab0

Please sign in to comment.