diff --git a/src/tests/ftest/aggregation/dfuse_space_check.py b/src/tests/ftest/aggregation/dfuse_space_check.py index 1d119f807a78..4bae72ef6c5c 100644 --- a/src/tests/ftest/aggregation/dfuse_space_check.py +++ b/src/tests/ftest/aggregation/dfuse_space_check.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2020-2022 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -9,6 +9,7 @@ from dfuse_utils import get_dfuse, start_dfuse from ior_test_base import IorTestBase +from run_utils import run_remote class DfuseSpaceCheck(IorTestBase): @@ -72,8 +73,11 @@ def write_multiple_files(self, dfuse): while self.get_nvme_free_space(False) >= self.block_size: file_path = os.path.join(dfuse.mount_dir.value, "file{}.txt".format(file_count)) write_dd_cmd = "dd if=/dev/zero of={} bs={} count=1".format(file_path, self.block_size) - if 0 in self.execute_cmd(write_dd_cmd, fail_on_err=True, display_output=False): - file_count += 1 + result = run_remote( + self.log, self.hostlist_clients, write_dd_cmd, verbose=False, timeout=300) + if not result.passed: + self.fail(f"Error running: {write_dd_cmd}") + file_count += 1 return file_count @@ -118,14 +122,16 @@ def test_dfusespacecheck(self): # Create a file as large as we can large_file = os.path.join(dfuse.mount_dir.value, 'largefile.txt') - self.execute_cmd('touch {}'.format(large_file)) + if not run_remote(self.log, self.hostlist_clients, f'touch {large_file}').passed: + self.fail(f"Error creating {large_file}") dd_count = (self.initial_space // self.block_size) + 1 write_dd_cmd = "dd if=/dev/zero of={} bs={} count={}".format( large_file, self.block_size, dd_count) - self.execute_cmd(write_dd_cmd, False) + run_remote(self.log, self.hostlist_clients, write_dd_cmd) # Remove the file - self.execute_cmd('rm -rf {}'.format(large_file)) + if not run_remote(self.log, self.hostlist_clients, f'rm -rf {large_file}').passed: + self.fail(f"Error removing {large_file}") # Wait for aggregation to complete self.wait_for_aggregation() @@ -142,7 +148,10 @@ def test_dfusespacecheck(self): self.pool.set_property("reclaim", "time") # remove all the small files created above. - self.execute_cmd("rm -rf {}".format(os.path.join(dfuse.mount_dir.value, '*'))) + result = run_remote( + self.log, self.hostlist_clients, f"rm -rf {os.path.join(dfuse.mount_dir.value, '*')}") + if not result.passed: + self.fail("Error removing files in mount dir") # Wait for aggregation to complete after file removal self.wait_for_aggregation() diff --git a/src/tests/ftest/datamover/posix_meta_entry.py b/src/tests/ftest/datamover/posix_meta_entry.py index a37818ccbcca..2e19a74d665e 100644 --- a/src/tests/ftest/datamover/posix_meta_entry.py +++ b/src/tests/ftest/datamover/posix_meta_entry.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -7,7 +7,7 @@ from data_mover_test_base import DataMoverTestBase from dfuse_utils import get_dfuse, start_dfuse -from exception_utils import CommandFailure +from run_utils import run_remote class DmvrPosixMetaEntry(DataMoverTestBase): @@ -143,7 +143,9 @@ def create_data(self, path): "popd" ] - self.execute_cmd_list(cmd_list) + cmd = " &&\n".join(cmd_list) + if not run_remote(self.log, self.hostlist_clients, cmd, timeout=300).passed: + self.fail("Failed to create data in path") def compare_data(self, path1, path2, cmp_filetype=True, cmp_perms=True, cmp_owner=True, cmp_times=False, @@ -190,11 +192,9 @@ def compare_data(self, path1, path2, cmp_filetype=True, field_printf, entry2) diff_cmd = "diff <({} 2>&1) <({} 2>&1)".format( stat_cmd1, stat_cmd2) - result = self.execute_cmd(diff_cmd, fail_on_err=False) - if 0 not in result or len(result) > 1: - hosts = [str(nodes) for code, nodes in list(result.items()) if code != 0] - raise CommandFailure( - "Command to check files failed '{}' on {}".format(diff_cmd, hosts)) + result = run_remote(self.log, self.hostlist_clients, diff_cmd, timeout=300) + if not result.passed or not result.homogeneous: + self.fail(f"Unexpected diff between {entry1} and {entry2}") if cmp_xattr: # Use getfattr to get the xattrs @@ -202,13 +202,6 @@ def compare_data(self, path1, path2, cmp_filetype=True, xattr_cmd2 = "getfattr -d -h '{}'".format(entry2) diff_cmd = "diff -I '^#' <({} 2>&1) <({} 2>&1)".format( xattr_cmd1, xattr_cmd2) - self.execute_cmd(diff_cmd) - - def execute_cmd_list(self, cmd_list): - """Execute a list of commands, separated by &&. - - Args: - cmd_list (list): A list of commands to execute. - """ - cmd = " &&\n".join(cmd_list) - self.execute_cmd(cmd) + result = run_remote(self.log, self.hostlist_clients, diff_cmd, timeout=300) + if not result.passed or not result.homogeneous: + self.fail(f"Unexpected diff between {entry1} and {entry2}") diff --git a/src/tests/ftest/datamover/posix_symlinks.py b/src/tests/ftest/datamover/posix_symlinks.py index f1dc87e46d7a..47fd9ca25b74 100644 --- a/src/tests/ftest/datamover/posix_symlinks.py +++ b/src/tests/ftest/datamover/posix_symlinks.py @@ -1,5 +1,5 @@ ''' - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent ''' @@ -7,6 +7,7 @@ from data_mover_test_base import DataMoverTestBase from dfuse_utils import get_dfuse, start_dfuse +from run_utils import run_remote class DmvrPosixSymlinks(DataMoverTestBase): @@ -119,8 +120,9 @@ def run_dm_posix_symlinks_fun(self, pool, cont, link_fun, link_desc): if do_deref: # Use POSIX cp to create a baseline for dereferencing deref_baseline_path = join(posix_test_path, "baseline_" + link_desc) - self.execute_cmd("cp -r --dereference '{}' '{}'".format( - src_posix_path, deref_baseline_path)) + cp_cmd = f"cp -r --dereference '{src_posix_path}' '{deref_baseline_path}'" + if not run_remote(self.log, self.hostlist_clients, cp_cmd, timeout=300).passed: + self.fail("Failed to create dereference baseline") diff_src = deref_baseline_path else: # Just compare against the original @@ -195,7 +197,9 @@ def create_links_forward(self, path): "popd" ] - self.execute_cmd_list(cmd_list) + cmd = " &&\n".join(cmd_list) + if not run_remote(self.log, self.hostlist_clients, cmd, timeout=300).passed: + self.fail(f"Failed to create forward symlinks in {path}") def create_links_backward(self, path): """ @@ -225,7 +229,9 @@ def create_links_backward(self, path): "popd" ] - self.execute_cmd_list(cmd_list) + cmd = " &&\n".join(cmd_list) + if not run_remote(self.log, self.hostlist_clients, cmd, timeout=300).passed: + self.fail(f"Failed to create backward symlinks in {path}") def create_links_mixed(self, path): """ @@ -256,12 +262,6 @@ def create_links_mixed(self, path): "popd" ] - self.execute_cmd_list(cmd_list) - - def execute_cmd_list(self, cmd_list): - """Execute a list of commands, separated by &&. - Args: - cmd_list (list): A list of commands to execute. - """ cmd = " &&\n".join(cmd_list) - self.execute_cmd(cmd) + if not run_remote(self.log, self.hostlist_clients, cmd, timeout=300).passed: + self.fail(f"Failed to create mixed symlinks in {path}") diff --git a/src/tests/ftest/deployment/critical_integration.py b/src/tests/ftest/deployment/critical_integration.py index c8b9b296f3c6..d1cf28ab5557 100644 --- a/src/tests/ftest/deployment/critical_integration.py +++ b/src/tests/ftest/deployment/critical_integration.py @@ -11,7 +11,7 @@ from ClusterShell.NodeSet import NodeSet from exception_utils import CommandFailure from general_utils import DaosTestError, get_journalctl, journalctl_time, run_command -from ior_test_base import IorTestBase +from run_utils import run_remote # pylint: disable-next=fixme # TODO Provision all daos nodes using provisioning tool provided by HPCM @@ -67,7 +67,8 @@ def test_passwdlessssh_versioncheck(self): daos_server_version_list.append(out['response']['version']) if check_remote_root_access: run_command(remote_root_access) - IorTestBase._execute_command(self, command_for_inter_node, hosts=[host]) + if not run_remote(self.log, NodeSet(host), command_for_inter_node).passed: + self.fail(f"Inter-node clush failed on {host}") except (DaosTestError, CommandFailure, KeyError) as error: self.log.error("Error: %s", error) failed_nodes.add(host) diff --git a/src/tests/ftest/dfuse/sparse_file.py b/src/tests/ftest/dfuse/sparse_file.py index ef31c3816e64..484b787c38c6 100644 --- a/src/tests/ftest/dfuse/sparse_file.py +++ b/src/tests/ftest/dfuse/sparse_file.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -11,6 +11,7 @@ from dfuse_utils import get_dfuse, start_dfuse from general_utils import get_remote_file_size from ior_test_base import IorTestBase +from run_utils import run_remote class SparseFile(IorTestBase): @@ -60,7 +61,8 @@ def test_sparsefile(self): # create large file and perform write to it so that if goes out of # space. sparse_file = os.path.join(dfuse.mount_dir.value, 'sparsefile.txt') - self.execute_cmd("touch {}".format(sparse_file)) + if not run_remote(self.log, self.hostlist_clients, f"touch {sparse_file}").passed: + self.fail(f"Failed to create {sparse_file}") self.log.info("File size (in bytes) before truncate: %s", get_remote_file_size(self.hostlist_clients[0], sparse_file)) @@ -84,7 +86,8 @@ def test_sparsefile(self): # write to the first byte of the file with char 'A' dd_first_byte = "echo 'A' | dd conv=notrunc of={} bs=1 count=1".format(sparse_file) - self.execute_cmd(dd_first_byte) + if not run_remote(self.log, self.hostlist_clients, dd_first_byte).passed: + self.fail(f"Failed to create first byte in {sparse_file}") fsize_write_1stbyte = get_remote_file_size(self.hostlist_clients[0], sparse_file) self.log.info("File size (in bytes) after writing first byte: %s", fsize_write_1stbyte) # verify file did not got overwritten after dd write. @@ -93,7 +96,8 @@ def test_sparsefile(self): # write to the 1024th byte position of the file dd_1024_byte = "echo 'A' | dd conv=notrunc of={} obs=1 seek=1023 bs=1 count=1".format( sparse_file) - self.execute_cmd(dd_1024_byte) + if not run_remote(self.log, self.hostlist_clients, dd_1024_byte).passed: + self.fail(f"Failed to create 1024th byte in {sparse_file}") fsize_write_1024thwrite = get_remote_file_size(self.hostlist_clients[0], sparse_file) self.log.info("File size (in bytes) after writing 1024th byte: %s", fsize_write_1024thwrite) # verify file did not got overwritten after dd write. @@ -110,13 +114,13 @@ def test_sparsefile(self): # check the middle 1022 bytes if they are filled with zeros middle_1022_bytes = "cmp --ignore-initial=1 --bytes=1022 {} {}".format( sparse_file, "/dev/zero") - self.execute_cmd(middle_1022_bytes) + if not run_remote(self.log, self.hostlist_clients, middle_1022_bytes).passed: + self.fail(f"Unexpected bytes in {sparse_file}") # read last 512 bytes which should be zeros till end of file. ignore_bytes = self.space_before - 512 read_till_eof = "cmp --ignore-initial={} {} {}".format( ignore_bytes, sparse_file, "/dev/zero") - # self.execute_cmd(read_till_eof, False) # fail the test if the above command is successful. - if 0 in self.execute_cmd(read_till_eof, False): + if run_remote(self.log, self.hostlist_clients, read_till_eof).passed: self.fail("read_till_eof command was supposed to fail. But it completed successfully.") diff --git a/src/tests/ftest/util/data_mover_test_base.py b/src/tests/ftest/util/data_mover_test_base.py index 4c0a278e50fc..db272febc941 100644 --- a/src/tests/ftest/util/data_mover_test_base.py +++ b/src/tests/ftest/util/data_mover_test_base.py @@ -802,7 +802,8 @@ def run_diff(self, src, dst, deref=False): cmd = "diff -r {} '{}' '{}'".format( deref_str, src, dst) - self.execute_cmd(cmd) + if not run_remote(self.log, self.hostlist_clients, cmd, timeout=300).passed: + self.fail(f"Unexpected diff between {src} and {dst}") # pylint: disable=too-many-arguments def run_datamover(self, test_desc=None, diff --git a/src/tests/ftest/util/ior_test_base.py b/src/tests/ftest/util/ior_test_base.py index 625a283593ef..987924d42c23 100644 --- a/src/tests/ftest/util/ior_test_base.py +++ b/src/tests/ftest/util/ior_test_base.py @@ -6,10 +6,9 @@ import os from apricot import TestWithServers -from ClusterShell.NodeSet import NodeSet from dfuse_utils import get_dfuse, start_dfuse from exception_utils import CommandFailure -from general_utils import get_random_string, pcmd +from general_utils import get_random_string from host_utils import get_local_host from ior_utils import IorCommand from job_manager_utils import get_job_manager @@ -371,59 +370,3 @@ def verify_pool_size(self, original_pool_info, processes): self.fail( "Pool Free Size did not match: actual={}, expected={}".format( actual_pool_size, expected_pool_size)) - - def execute_cmd(self, command, fail_on_err=True, display_output=True): - """Execute cmd using general_utils.pcmd. - - Args: - command (str): the command to execute on the client hosts - fail_on_err (bool, optional): whether or not to fail the test if - command returns a non zero return code. Defaults to True. - display_output (bool, optional): whether or not to display output. - Defaults to True. - - Returns: - dict: a dictionary of return codes keys and accompanying NodeSet - values indicating which hosts yielded the return code. - - """ - try: - # Execute the bash command on each client host - result = self._execute_command(command, fail_on_err, display_output) - - except CommandFailure as error: - # Report an error if any command fails - self.log.error("Failed to execute command: %s", str(error)) - self.fail("Failed to execute command") - - return result - - def _execute_command(self, command, fail_on_err=True, display_output=True, hosts=None): - """Execute the command on all client hosts. - - Optionally verify if the command returns a non zero return code. - - Args: - command (str): the command to execute on the client hosts - fail_on_err (bool, optional): whether or not to fail the test if - command returns a non zero return code. Defaults to True. - display_output (bool, optional): whether or not to display output. - Defaults to True. - - Raises: - CommandFailure: if 'fail_on_err' is set and the command fails on at - least one of the client hosts - - Returns: - dict: a dictionary of return codes keys and accompanying NodeSet - values indicating which hosts yielded the return code. - - """ - if hosts is None: - hosts = self.hostlist_clients - result = pcmd(hosts, command, verbose=display_output, timeout=300) - if (0 not in result or len(result) > 1) and fail_on_err: - hosts = [str(nodes) for code, nodes in list(result.items()) if code != 0] - raise CommandFailure("Error running '{}' on the following hosts: {}".format( - command, NodeSet(",".join(hosts)))) - return result