From c270e79576dbc0de13a60431d293b347ab997e4a Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Thu, 12 Dec 2024 00:21:39 +0000 Subject: [PATCH] add new ftest skeleton Features: telemetry Required-githooks: true Signed-off-by: Michael MacDonald --- src/client/dfs/io.c | 2 +- .../ftest/telemetry/dfs_client_telemetry.py | 49 +++++++++++++++++++ .../ftest/telemetry/dfs_client_telemetry.yaml | 46 +++++++++++++++++ src/tests/ftest/util/telemetry_utils.py | 30 +++++++++++- 4 files changed, 125 insertions(+), 2 deletions(-) create mode 100644 src/tests/ftest/telemetry/dfs_client_telemetry.py create mode 100644 src/tests/ftest/telemetry/dfs_client_telemetry.yaml diff --git a/src/client/dfs/io.c b/src/client/dfs/io.c index 17c2ee24bb13..db8f79ab105a 100644 --- a/src/client/dfs/io.c +++ b/src/client/dfs/io.c @@ -308,7 +308,7 @@ dfs_writex(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_ev daos_event_errno_rc(ev); buf_size = 0; - if (sgl) + if (dfs->metrics != NULL && sgl != NULL) for (i = 0; i < sgl->sg_nr; i++) buf_size += sgl->sg_iovs[i].iov_len; diff --git a/src/tests/ftest/telemetry/dfs_client_telemetry.py b/src/tests/ftest/telemetry/dfs_client_telemetry.py new file mode 100644 index 000000000000..8b0cc65be56d --- /dev/null +++ b/src/tests/ftest/telemetry/dfs_client_telemetry.py @@ -0,0 +1,49 @@ +""" + (C) Copyright 2024 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +from ior_utils import read_data, write_data +from telemetry_test_base import TestWithClientTelemetry +from telemetry_utils import CLIENT_DFS_IO_METRICS, CLIENT_DFS_OPS_METRICS + + +class DFSClientTelemetry(TestWithClientTelemetry): + """Tests to verify DFS telemetry. + + :avocado: recursive + """ + + def test_dfs_metrics(self): + """JIRA ID: DAOS-16837. + + Verify that the DFS metrics are incrementing as expected. + After performing some I/O, the DFS-level metrics should look reasonable. + + Test steps: + 1) Create a pool and container + 2) Perform some I/O with IOR + 3) Verify that the DFS metrics are sane + + :avocado: tags=all,daily_regression + :avocado: tags=vm + :avocado: tags=telemetry + :avocado: tags=DFSClientTelemetry,test_dfs_metrics + """ + # create pool and container + pool = self.get_pool(connect=True) + container = self.get_container(pool=pool) + + self.log_step('Writing data to the pool (ior)') + ior = write_data(self, container) + self.log_step('Reading data from the pool (ior)') + read_data(self, ior, container) + + metric_names = CLIENT_DFS_OPS_METRICS + CLIENT_DFS_IO_METRICS + + self.log_step('Reading dfs telemetry') + after_metrics = self.telemetry.collect_client_data(metric_names) + for metric in metric_names: + print(f'{metric}: {after_metrics[metric]}') + + self.log_step('Test passed') diff --git a/src/tests/ftest/telemetry/dfs_client_telemetry.yaml b/src/tests/ftest/telemetry/dfs_client_telemetry.yaml new file mode 100644 index 000000000000..d585dc81fda4 --- /dev/null +++ b/src/tests/ftest/telemetry/dfs_client_telemetry.yaml @@ -0,0 +1,46 @@ +hosts: + test_servers: 1 + test_clients: 1 + +timeout: 180 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 4 + nr_xs_helpers: 0 + storage: + 0: + class: ram + scm_mount: /mnt/daos + system_ram_reserved: 1 + +agent_config: + telemetry_port: 9191 + telemetry_retain: 30s + telemetry_enabled: true + +pool: + scm_size: 2G + +container: + type: POSIX + control_method: daos + dfs_oclass: SX + +ior: &ior_base + ppn: 4 + api: DFS + transfer_size: 512K + block_size: 1M + dfs_oclass: SX + +ior_write: + <<: *ior_base + flags: "-k -v -w -W -G 1" + +ior_read: + <<: *ior_base + flags: "-v -r -R -G 1" diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index fb6b37d9ef31..1d11ba8401cd 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -967,6 +967,32 @@ class ClientTelemetryUtils(TelemetryUtils): _gen_stats_metrics("client_io_ops_tgt_update_active") CLIENT_IO_OPS_UPDATE_ACTIVE_METRICS = \ _gen_stats_metrics("client_io_ops_update_active") + CLIENT_DFS_OPS_METRICS = [ + "client_dfs_ops_CHMOD", + "client_dfs_ops_CHOWN", + "client_dfs_ops_CREATE", + "client_dfs_ops_GETSIZE", + "client_dfs_ops_GETXATTR", + "client_dfs_ops_LSXATTR", + "client_dfs_ops_MKDIR", + "client_dfs_ops_OPEN", + "client_dfs_ops_OPENDIR", + "client_dfs_ops_READ", + "client_dfs_ops_READDIR", + "client_dfs_ops_READLINK", + "client_dfs_ops_RENAME", + "client_dfs_ops_RMXATTR", + "client_dfs_ops_SETATTR", + "client_dfs_ops_SETXATTR", + "client_dfs_ops_STAT", + "client_dfs_ops_SYMLINK", + "client_dfs_ops_SYNC", + "client_dfs_ops_TRUNCATE", + "client_dfs_ops_UNLINK", + "client_dfs_ops_WRITE"] + CLIENT_DFS_IO_METRICS = [ + "client_dfs_read_bytes", + "client_dfs_write_bytes"] CLIENT_IO_METRICS = CLIENT_IO_LATENCY_FETCH_METRICS +\ CLIENT_IO_LATENCY_UPDATE_METRICS +\ CLIENT_IO_OPS_AKEY_ENUM_ACTIVE_METRICS +\ @@ -1009,7 +1035,9 @@ class ClientTelemetryUtils(TelemetryUtils): CLIENT_IO_OPS_TGT_PUNCH_ACTIVE_METRICS +\ CLIENT_IO_OPS_TGT_PUNCH_LATENCY_METRICS +\ CLIENT_IO_OPS_TGT_UPDATE_ACTIVE_METRICS +\ - CLIENT_IO_OPS_UPDATE_ACTIVE_METRICS + CLIENT_IO_OPS_UPDATE_ACTIVE_METRICS +\ + CLIENT_DFS_OPS_METRICS +\ + CLIENT_DFS_IO_METRICS def __init__(self, dmg, servers, clients): """Create a ClientTelemetryUtils object.