From ec445d75a7194a76be2845730dc5e721092069a7 Mon Sep 17 00:00:00 2001
From: Nasf-Fan <fan.yong@intel.com>
Date: Fri, 30 Aug 2024 10:37:31 +0800
Subject: [PATCH 01/21] DAOS-16385 dtx: fix DRAM leak during handle DTX
 collective RPC (#15010)

The DRAM leak will happen on non-leader engines when collectively
punch object.

Signed-off-by: Fan Yong <fan.yong@intel.com>
---
 src/dtx/dtx_coll.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/dtx/dtx_coll.c b/src/dtx/dtx_coll.c
index 3ae69648c02..9623dce4917 100644
--- a/src/dtx/dtx_coll.c
+++ b/src/dtx/dtx_coll.c
@@ -92,6 +92,7 @@ dtx_coll_prep_ult(void *arg)
 			DP_RC(dcpa->dcpa_result));
 
 out:
+	D_FREE(mbs);
 	if (cont != NULL)
 		ds_cont_child_put(cont);
 

From 1b5943cc5323d822c0f2713e1e468b0b7dd2f3f4 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Fri, 30 Aug 2024 15:32:15 -0400
Subject: [PATCH 02/21] DAOS-16484 test: Exclude local host in default
 interface selection (#15049)

When including the local host in the default interface selection a
difference in ib0 speeds will cause the logic to select eth0 and then
the tcp provider.

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/environment_utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/tests/ftest/util/environment_utils.py b/src/tests/ftest/util/environment_utils.py
index e36d750500e..8223063a85e 100644
--- a/src/tests/ftest/util/environment_utils.py
+++ b/src/tests/ftest/util/environment_utils.py
@@ -9,7 +9,6 @@
 
 from ClusterShell.NodeSet import NodeSet
 # pylint: disable=import-error,no-name-in-module
-from util.host_utils import get_local_host
 from util.network_utils import (PROVIDER_ALIAS, SUPPORTED_PROVIDERS, NetworkException,
                                 get_common_provider, get_fastest_interface)
 from util.run_utils import run_remote
@@ -327,7 +326,7 @@ def _default_interface(self, logger, hosts):
             # Find all the /sys/class/net interfaces on the launch node (excluding lo)
             logger.debug("Detecting network devices - D_INTERFACE not set")
             try:
-                interface = get_fastest_interface(logger, hosts | get_local_host())
+                interface = get_fastest_interface(logger, hosts)
             except NetworkException as error:
                 raise TestEnvironmentException("Error obtaining a default interface!") from error
         return interface

From 9662e9899347eeab8dab3c5a7bbbe03095c027c3 Mon Sep 17 00:00:00 2001
From: Mohamad Chaarawi <mohamad.chaarawi@intel.com>
Date: Fri, 30 Aug 2024 15:20:13 -0500
Subject: [PATCH 03/21] DAOS-15800 client: create cart context on specific
 interface (#14804)

Cart has added the ability to select network interface on context creation. The daos_agent also added a numa-fabric map that can be queried at init time. Update the DAOS client to query from the agent a map of numa to network interface on daos_init(), and on EQ creation, select the best interface for the network context based on the numa of the calling thread.

Signed-off-by: Mohamad Chaarawi <mohamad.chaarawi@intel.com>
---
 src/cart/README.env                    |   2 +-
 src/cart/crt_internal_types.h          |   2 +-
 src/client/api/SConscript              |   2 +-
 src/client/api/event.c                 |  39 ++++++-
 src/engine/SConscript                  |   2 +-
 src/include/daos/mgmt.h                |   6 ++
 src/mgmt/cli_mgmt.c                    | 136 +++++++++++++++++++++++--
 src/rdb/tests/SConscript               |   2 +-
 src/tests/SConscript                   |   2 +-
 src/tests/ftest/dfuse/pil4dfs_fio.py   |   8 +-
 src/tests/ftest/dfuse/pil4dfs_fio.yaml |   2 +
 11 files changed, 181 insertions(+), 22 deletions(-)

diff --git a/src/cart/README.env b/src/cart/README.env
index 00f270d7a41..0b081ca9623 100644
--- a/src/cart/README.env
+++ b/src/cart/README.env
@@ -155,7 +155,7 @@ This file lists the environment variables used in CaRT.
 
  . CRT_CTX_NUM
    If set, specifies the limit of number of allowed CaRT contexts to be created.
-   Valid range is [1, 64], with default being 64 if unset.
+   Valid range is [1, 128], with default being 128 if unset.
 
  . D_FI_CONFIG
    Specifies the fault injection configuration file. If this variable is not set
diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h
index 847463cba78..857c1a4522d 100644
--- a/src/cart/crt_internal_types.h
+++ b/src/cart/crt_internal_types.h
@@ -14,7 +14,7 @@
 #define CRT_CONTEXT_NULL         (NULL)
 
 #ifndef CRT_SRV_CONTEXT_NUM
-#define CRT_SRV_CONTEXT_NUM (64)	/* Maximum number of contexts */
+#define CRT_SRV_CONTEXT_NUM (128) /* Maximum number of contexts */
 #endif
 
 
diff --git a/src/client/api/SConscript b/src/client/api/SConscript
index 8c71ceb4273..9ba2f2185c1 100644
--- a/src/client/api/SConscript
+++ b/src/client/api/SConscript
@@ -17,7 +17,7 @@ def scons():
 
     if prereqs.client_requested():
         libdaos = env.d_library('daos', libdaos_tgts, SHLIBVERSION=API_VERSION,
-                                LIBS=['daos_common'])
+                                LIBS=['daos_common', 'numa'])
         if hasattr(env, 'InstallVersionedLib'):
             env.InstallVersionedLib('$PREFIX/lib64/', libdaos, SHLIBVERSION=API_VERSION)
         else:
diff --git a/src/client/api/event.c b/src/client/api/event.c
index 2bbd63653e7..ae7241e830e 100644
--- a/src/client/api/event.c
+++ b/src/client/api/event.c
@@ -83,11 +83,24 @@ daos_eq_lib_init(crt_init_options_t *crt_info)
 		D_GOTO(unlock, rc);
 	}
 
-	/* use a global shared context for all eq for now */
-	rc = crt_context_create(&daos_eq_ctx);
+	if (d_dynamic_ctx_g) {
+		char iface[DAOS_SYS_INFO_STRING_MAX];
+
+		rc = dc_mgmt_get_iface(&iface[0]);
+		if (rc && rc != -DER_NONEXIST) {
+			D_ERROR("failed to get iface: " DF_RC "\n", DP_RC(rc));
+			D_GOTO(crt, rc);
+		}
+		/** if no interface returned, use the default */
+		if (rc == -DER_NONEXIST)
+			rc = crt_context_create(&daos_eq_ctx);
+		else
+			rc = crt_context_create_on_iface(iface, &daos_eq_ctx);
+	} else {
+		rc = crt_context_create(&daos_eq_ctx);
+	}
 	if (rc != 0) {
-		D_ERROR("failed to create client context: "DF_RC"\n",
-			DP_RC(rc));
+		D_ERROR("failed to create client context: " DF_RC "\n", DP_RC(rc));
 		D_GOTO(crt, rc);
 	}
 
@@ -656,7 +669,23 @@ daos_eq_create(daos_handle_t *eqh)
 
 	eqx = daos_eq2eqx(eq);
 
-	rc = crt_context_create(&eqx->eqx_ctx);
+	if (d_dynamic_ctx_g) {
+		char iface[DAOS_SYS_INFO_STRING_MAX];
+
+		rc = dc_mgmt_get_iface(&iface[0]);
+		if (rc && rc != -DER_NONEXIST) {
+			D_ERROR("failed to get iface: " DF_RC "\n", DP_RC(rc));
+			return rc;
+		}
+
+		/** if no interface returned, use the default */
+		if (rc == -DER_NONEXIST)
+			rc = crt_context_create(&eqx->eqx_ctx);
+		else
+			rc = crt_context_create_on_iface(iface, &eqx->eqx_ctx);
+	} else {
+		rc = crt_context_create(&eqx->eqx_ctx);
+	}
 	if (rc) {
 		D_WARN("Failed to create CART context; using the global one, "DF_RC"\n", DP_RC(rc));
 		eqx->eqx_ctx = daos_eq_ctx;
diff --git a/src/engine/SConscript b/src/engine/SConscript
index 15d3385485d..c837e83022f 100644
--- a/src/engine/SConscript
+++ b/src/engine/SConscript
@@ -14,7 +14,7 @@ def scons():
     denv.Append(CPPDEFINES=['-DDAOS_PMEM_BUILD'])
     libraries = ['daos_common_pmem', 'gurt', 'cart', 'vos_srv']
     libraries += ['bio', 'dl', 'uuid', 'pthread', 'abt']
-    libraries += ['hwloc', 'pmemobj', 'protobuf-c', 'isal']
+    libraries += ['hwloc', 'pmemobj', 'protobuf-c', 'isal', 'numa']
 
     denv.require('argobots', 'protobufc', 'pmdk', 'isal')
 
diff --git a/src/include/daos/mgmt.h b/src/include/daos/mgmt.h
index 8113a65ad98..0e4b623bd40 100644
--- a/src/include/daos/mgmt.h
+++ b/src/include/daos/mgmt.h
@@ -17,6 +17,8 @@
 #include <daos/pool.h>
 #include "svc.pb-c.h"
 
+extern bool d_dynamic_ctx_g;
+
 int dc_mgmt_init(void);
 
 void dc_mgmt_fini(void);
@@ -41,6 +43,8 @@ struct dc_mgmt_sys_info {
 	d_rank_list_t  *ms_ranks;
 	char		system_name[DAOS_SYS_INFO_STRING_MAX + 1];
 	uint32_t        provider_idx; /* Provider index (if more than one available) */
+	daos_size_t     numa_entries_nr;
+	daos_size_t    *numa_iface_idx_rr;
 };
 
 /** Client system handle */
@@ -78,5 +82,7 @@ int dc_get_attach_info(const char *name, bool all_ranks, struct dc_mgmt_sys_info
 void dc_put_attach_info(struct dc_mgmt_sys_info *info, Mgmt__GetAttachInfoResp *resp);
 int dc_mgmt_cache_attach_info(const char *name);
 void dc_mgmt_drop_attach_info(void);
+int
+    dc_mgmt_get_iface(char *iface);
 int dc_mgmt_tm_register(const char *sys, const char *jobid, key_t shm_key, uid_t *owner_uid);
 #endif
diff --git a/src/mgmt/cli_mgmt.c b/src/mgmt/cli_mgmt.c
index 6bb1c7c9513..31e0ac1f7d9 100644
--- a/src/mgmt/cli_mgmt.c
+++ b/src/mgmt/cli_mgmt.c
@@ -12,17 +12,17 @@
 
 #define D_LOGFAC	DD_FAC(mgmt)
 
-#include <daos/mgmt.h>
-
 #include <daos/agent.h>
 #include <daos/drpc_modules.h>
 #include <daos/event.h>
 #include <daos/job.h>
+#include <daos/mgmt.h>
 #include <daos/pool.h>
 #include <daos/security.h>
 #include "svc.pb-c.h"
 #include "rpc.h"
 #include <errno.h>
+#include <numa.h>
 #include <stdlib.h>
 #include <sys/ipc.h>
 
@@ -31,6 +31,7 @@ char agent_sys_name[DAOS_SYS_NAME_MAX + 1] = DAOS_DEFAULT_SYS_NAME;
 static struct dc_mgmt_sys_info info_g;
 static Mgmt__GetAttachInfoResp *resp_g;
 
+bool                            d_dynamic_ctx_g;
 int	dc_mgmt_proto_version;
 
 int
@@ -241,6 +242,7 @@ put_attach_info(struct dc_mgmt_sys_info *info, Mgmt__GetAttachInfoResp *resp)
 	if (resp != NULL)
 		free_get_attach_info_resp(resp);
 	d_rank_list_free(info->ms_ranks);
+	D_FREE(info->numa_iface_idx_rr);
 }
 
 void
@@ -413,9 +415,23 @@ dc_get_attach_info(const char *name, bool all_ranks, struct dc_mgmt_sys_info *in
 int
 dc_mgmt_cache_attach_info(const char *name)
 {
+	int rc;
+
 	if (name != NULL && strcmp(name, agent_sys_name) != 0)
 		return -DER_INVAL;
-	return get_attach_info(name, true, &info_g, &resp_g);
+	rc = get_attach_info(name, true, &info_g, &resp_g);
+	if (rc)
+		return rc;
+
+	info_g.numa_entries_nr = resp_g->n_numa_fabric_interfaces;
+	D_ALLOC_ARRAY(info_g.numa_iface_idx_rr, info_g.numa_entries_nr);
+	if (info_g.numa_iface_idx_rr == NULL)
+		D_GOTO(err_rank_list, rc = -DER_NOMEM);
+	return 0;
+
+err_rank_list:
+	put_attach_info(&info_g, resp_g);
+	return rc;
 }
 
 static void
@@ -625,14 +641,56 @@ dc_mgmt_net_cfg(const char *name, crt_init_options_t *crt_info)
 	D_STRNDUP(crt_info->cio_provider, info->provider, DAOS_SYS_INFO_STRING_MAX);
 	if (NULL == crt_info->cio_provider)
 		D_GOTO(cleanup, rc = -DER_NOMEM);
-	D_STRNDUP(crt_info->cio_interface, info->interface, DAOS_SYS_INFO_STRING_MAX);
-	if (NULL == crt_info->cio_interface)
-		D_GOTO(cleanup, rc = -DER_NOMEM);
-	D_STRNDUP(crt_info->cio_domain, info->domain, DAOS_SYS_INFO_STRING_MAX);
-	if (NULL == crt_info->cio_domain)
-		D_GOTO(cleanup, rc = -DER_NOMEM);
 
-	D_INFO("Network interface: %s, Domain: %s\n", info->interface, info->domain);
+	d_getenv_bool("D_DYNAMIC_CTX", &d_dynamic_ctx_g);
+	if (d_dynamic_ctx_g) {
+		int         i;
+		daos_size_t size = 0;
+
+		for (i = 0; i < resp->n_numa_fabric_interfaces; i++)
+			size += resp_g->numa_fabric_interfaces[i]->n_ifaces *
+				(DAOS_SYS_INFO_STRING_MAX + 1);
+
+		D_ALLOC(crt_info->cio_interface, size);
+		if (crt_info->cio_interface == NULL)
+			D_GOTO(cleanup, rc = -DER_NOMEM);
+		D_ALLOC(crt_info->cio_domain, size);
+		if (crt_info->cio_domain == NULL)
+			D_GOTO(cleanup, rc = -DER_NOMEM);
+
+		for (i = 0; i < resp->n_numa_fabric_interfaces; i++) {
+			Mgmt__FabricInterfaces *numa_ifaces = resp_g->numa_fabric_interfaces[i];
+			int                     j;
+
+			for (j = 0; j < numa_ifaces->n_ifaces; j++) {
+				if (i != 0 || j != 0) {
+					strcat(crt_info->cio_interface, ",");
+					strcat(crt_info->cio_domain, ",");
+				}
+				strncat(crt_info->cio_interface, numa_ifaces->ifaces[j]->interface,
+					DAOS_SYS_INFO_STRING_MAX);
+				strncat(crt_info->cio_domain, numa_ifaces->ifaces[j]->domain,
+					DAOS_SYS_INFO_STRING_MAX);
+			}
+			/*
+			 * If we have multiple interfaces per numa node, we want to randomize the
+			 * first interface selected in case we have multiple processes running
+			 * there. So initialize the index array at that interface to -1 to know that
+			 * this is the first selection later.
+			 */
+			if (numa_ifaces->n_ifaces > 1)
+				info_g.numa_iface_idx_rr[i] = -1;
+		}
+	} else {
+		D_STRNDUP(crt_info->cio_interface, info->interface, DAOS_SYS_INFO_STRING_MAX);
+		if (NULL == crt_info->cio_interface)
+			D_GOTO(cleanup, rc = -DER_NOMEM);
+		D_STRNDUP(crt_info->cio_domain, info->domain, DAOS_SYS_INFO_STRING_MAX);
+		if (NULL == crt_info->cio_domain)
+			D_GOTO(cleanup, rc = -DER_NOMEM);
+	}
+	D_INFO("Network interface: %s, Domain: %s, Provider: %s\n", crt_info->cio_interface,
+	       crt_info->cio_domain, crt_info->cio_provider);
 	D_DEBUG(DB_MGMT,
 		"CaRT initialization with:\n"
 		"\tD_PROVIDER: %s, CRT_TIMEOUT: %d, CRT_SECONDARY_PROVIDER: %s\n",
@@ -667,6 +725,64 @@ int dc_mgmt_net_cfg_check(const char *name)
 	return 0;
 }
 
+int
+dc_mgmt_get_iface(char *iface)
+{
+	int cpu;
+	int numa;
+	int i;
+
+	cpu = sched_getcpu();
+	if (cpu < 0) {
+		D_ERROR("sched_getcpu() failed: %d (%s)\n", errno, strerror(errno));
+		return d_errno2der(errno);
+	}
+
+	numa = numa_node_of_cpu(cpu);
+	if (numa < 0) {
+		D_ERROR("numa_node_of_cpu() failed: %d (%s)\n", errno, strerror(errno));
+		return d_errno2der(errno);
+	}
+
+	if (resp_g->n_numa_fabric_interfaces <= 0) {
+		D_ERROR("No fabric interfaces initialized.\n");
+		return -DER_INVAL;
+	}
+
+	for (i = 0; i < resp_g->n_numa_fabric_interfaces; i++) {
+		Mgmt__FabricInterfaces *numa_ifaces = resp_g->numa_fabric_interfaces[i];
+		int                     idx;
+
+		if (numa_ifaces->numa_node != numa)
+			continue;
+
+		/*
+		 * Randomize the first interface used to avoid multiple processes starting on the
+		 * first interface (if there is more than 1).
+		 */
+		if (info_g.numa_iface_idx_rr[i] == -1) {
+			d_srand(getpid());
+			info_g.numa_iface_idx_rr[i] = d_rand() % numa_ifaces->n_ifaces;
+		}
+		idx = info_g.numa_iface_idx_rr[i] % numa_ifaces->n_ifaces;
+		D_ASSERT(numa_ifaces->ifaces[idx]->numa_node == numa);
+		info_g.numa_iface_idx_rr[i]++;
+
+		if (copy_str(iface, numa_ifaces->ifaces[idx]->interface) != 0) {
+			D_ERROR("Interface string too long.\n");
+			return -DER_INVAL;
+		}
+		D_DEBUG(DB_MGMT, "Numa: %d, Interface Selected: IDX: %d, Name = %s\n", numa, idx,
+			iface);
+		break;
+	}
+	if (i == resp_g->n_numa_fabric_interfaces) {
+		D_DEBUG(DB_MGMT, "No iface on numa %d\n", numa);
+		return -DER_NONEXIST;
+	}
+	return 0;
+}
+
 static int send_monitor_request(struct dc_pool *pool, int request_type)
 {
 	struct drpc		 *ctx;
diff --git a/src/rdb/tests/SConscript b/src/rdb/tests/SConscript
index cd46e41355f..b1919aac2ff 100644
--- a/src/rdb/tests/SConscript
+++ b/src/rdb/tests/SConscript
@@ -15,7 +15,7 @@ def scons():
     # rdbt client
     rdbt = tenv.d_program('rdbt', ['rdbt.c', 'rpc.c'] + libdaos_tgts,
                           LIBS=['daos_common_pmem', 'cart', 'gurt', 'uuid', 'isal', 'protobuf-c',
-                                'pthread'])
+                                'pthread', 'numa'])
     tenv.Install('$PREFIX/bin', rdbt)
 
 
diff --git a/src/tests/SConscript b/src/tests/SConscript
index 0c620376c21..d2927397981 100644
--- a/src/tests/SConscript
+++ b/src/tests/SConscript
@@ -35,7 +35,7 @@ def build_tests(env):
     daos_perf = denv.d_program('daos_perf', ['daos_perf.c', perf_common], LIBS=libs_client)
     denv.Install('$PREFIX/bin/', daos_perf)
 
-    libs_server += ['vos', 'bio', 'abt']
+    libs_server += ['vos', 'bio', 'abt', 'numa']
     vos_engine = denv.StaticObject(['vos_engine.c'])
 
     if denv["STACK_MMAP"] == 1:
diff --git a/src/tests/ftest/dfuse/pil4dfs_fio.py b/src/tests/ftest/dfuse/pil4dfs_fio.py
index 9b32ef39937..a149f5610d8 100644
--- a/src/tests/ftest/dfuse/pil4dfs_fio.py
+++ b/src/tests/ftest/dfuse/pil4dfs_fio.py
@@ -12,7 +12,7 @@
 from cpu_utils import CpuInfo
 from dfuse_utils import get_dfuse, start_dfuse
 from fio_utils import FioCommand
-from general_utils import bytes_to_human, percent_change
+from general_utils import bytes_to_human, get_log_file, percent_change
 
 
 class Pil4dfsFio(TestWithServers):
@@ -115,6 +115,9 @@ def _run_fio_pil4dfs(self, ioengine):
             "global", "cpus_allowed", self.fio_cpus_allowed,
             f"fio --name=global --cpus_allowed={self.fio_cpus_allowed}")
         fio_cmd.env['LD_PRELOAD'] = os.path.join(self.prefix, 'lib64', 'libpil4dfs.so')
+        fio_cmd.env['D_DYNAMIC_CTX'] = 1
+        fio_cmd.env["D_LOG_FILE"] = get_log_file(self.client_log)
+        fio_cmd.env["D_LOG_MASK"] = 'INFO'
         fio_cmd.hosts = self.hostlist_clients
 
         bws = {}
@@ -154,6 +157,9 @@ def _run_fio_dfs(self):
         fio_cmd.update(
             "job", "pool", container.pool.uuid, f"fio --name=job --pool={container.pool.uuid}")
         fio_cmd.update("job", "cont", container.uuid, f"fio --name=job --cont={container.uuid}")
+        fio_cmd.env['D_DYNAMIC_CTX'] = 1
+        fio_cmd.env["D_LOG_FILE"] = get_log_file(self.client_log)
+        fio_cmd.env["D_LOG_MASK"] = 'INFO'
         fio_cmd.hosts = self.hostlist_clients
 
         bws = {}
diff --git a/src/tests/ftest/dfuse/pil4dfs_fio.yaml b/src/tests/ftest/dfuse/pil4dfs_fio.yaml
index 5d7ef22e4ec..e5b62446fbf 100644
--- a/src/tests/ftest/dfuse/pil4dfs_fio.yaml
+++ b/src/tests/ftest/dfuse/pil4dfs_fio.yaml
@@ -13,12 +13,14 @@ server_config:
       fabric_iface: ib0
       fabric_iface_port: 31317
       log_file: daos_server0.log
+      log_mask: INFO
       storage: auto
     1:
       pinned_numa_node: 1
       fabric_iface: ib1
       fabric_iface_port: 31417
       log_file: daos_server1.log
+      log_mask: INFO
       storage: auto
 
 pool:

From d2f062a2c7bb8ef87c9ab32d56536608d694006a Mon Sep 17 00:00:00 2001
From: Jeff Olivier <jeffolivier@google.com>
Date: Fri, 30 Aug 2024 15:55:27 -0600
Subject: [PATCH 04/21] DAOS-16445 client: Add function to cycle OIDs
 non-sequentially (#14999)

We've noticed that with sequential order, object placement is poor.

We get 40% fill for 8GiB files with 25 ranks and 16 targets per rank
with EC_2P1G8. With this patch, we get a much better distribution.

This patch adds the following:

1. A function for cycling oid.hi incrementing by a large prime
2. For DFS, randomize the starting value
3. Modify DFS to cycle OIDs using the new function.

Signed-off-by: Jeff Olivier <jeffolivier@google.com>
---
 src/client/dfs/dfs_internal.h | 15 +++++++++------
 src/client/dfs/mnt.c          | 18 +++++++++---------
 src/include/daos_obj.h        | 16 +++++++++++++++-
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h
index 99f2fb8cde2..7425fc2f00d 100644
--- a/src/client/dfs/dfs_internal.h
+++ b/src/client/dfs/dfs_internal.h
@@ -99,9 +99,6 @@
 /** Max recursion depth for symlinks */
 #define DFS_MAX_RECURSION  40
 
-/** MAX value for the HI OID */
-#define MAX_OID_HI         ((1UL << 32) - 1)
-
 typedef uint64_t dfs_magic_t;
 typedef uint16_t dfs_sb_ver_t;
 typedef uint16_t dfs_layout_ver_t;
@@ -164,6 +161,8 @@ struct dfs {
 	daos_handle_t        coh;
 	/** refcount on cont handle that through the DFS API */
 	uint32_t             coh_refcount;
+	/** The last oid.hi in the sequence */
+	uint32_t             last_hi;
 	/** Transaction handle epoch. DAOS_EPOCH_MAX for DAOS_TX_NONE */
 	daos_epoch_t         th_epoch;
 	/** Transaction handle */
@@ -343,7 +342,7 @@ oid_gen(dfs_t *dfs, daos_oclass_id_t oclass, bool file, daos_obj_id_t *oid)
 
 	D_MUTEX_LOCK(&dfs->lock);
 	/** If we ran out of local OIDs, alloc one from the container */
-	if (dfs->oid.hi >= MAX_OID_HI) {
+	if (dfs->oid.hi == dfs->last_hi) {
 		/** Allocate an OID for the namespace */
 		rc = daos_cont_alloc_oids(dfs->coh, 1, &dfs->oid.lo, NULL);
 		if (rc) {
@@ -351,12 +350,16 @@ oid_gen(dfs_t *dfs, daos_oclass_id_t oclass, bool file, daos_obj_id_t *oid)
 			D_MUTEX_UNLOCK(&dfs->lock);
 			return daos_der2errno(rc);
 		}
-		dfs->oid.hi = 0;
+		/** Start such that dfs->last_hi will be final value */
+		dfs->oid.hi = dfs->last_hi;
 	}
 
 	/** set oid and lo, bump the current hi value */
 	oid->lo = dfs->oid.lo;
-	oid->hi = dfs->oid.hi++;
+	daos_obj_oid_cycle(&dfs->oid);
+	if (unlikely(dfs->oid.lo == RESERVED_LO && dfs->oid.hi <= 1))
+		daos_obj_oid_cycle(&dfs->oid); /* Avoid reserved oids */
+	oid->hi = dfs->oid.hi;
 	D_MUTEX_UNLOCK(&dfs->lock);
 
 	/** if a regular file, use UINT64 typed dkeys for the array object */
diff --git a/src/client/dfs/mnt.c b/src/client/dfs/mnt.c
index a73fafe34df..d270be1e414 100644
--- a/src/client/dfs/mnt.c
+++ b/src/client/dfs/mnt.c
@@ -685,20 +685,20 @@ dfs_mount(daos_handle_t poh, daos_handle_t coh, int flags, dfs_t **_dfs)
 
 	/** if RW, allocate an OID for the namespace */
 	if (amode == O_RDWR) {
+		dfs->last_hi = (unsigned int)d_rand();
+		/** Avoid potential conflict with SB or ROOT */
+		if (dfs->last_hi <= 1)
+			dfs->last_hi = 2;
+
 		rc = daos_cont_alloc_oids(coh, 1, &dfs->oid.lo, NULL);
 		if (rc) {
 			D_ERROR("daos_cont_alloc_oids() Failed, " DF_RC "\n", DP_RC(rc));
 			D_GOTO(err_root, rc = daos_der2errno(rc));
 		}
 
-		/*
-		 * if this is the first time we allocate on this container,
-		 * account 0 for SB, 1 for root obj.
-		 */
-		if (dfs->oid.lo == RESERVED_LO)
-			dfs->oid.hi = ROOT_HI + 1;
-		else
-			dfs->oid.hi = 0;
+		dfs->oid.hi = dfs->last_hi;
+		/** Increment so that dfs->last_hi is the last value */
+		daos_obj_oid_cycle(&dfs->oid);
 	}
 
 	dfs->mounted = DFS_MOUNT;
@@ -1023,7 +1023,7 @@ dfs_global2local(daos_handle_t poh, daos_handle_t coh, int flags, d_iov_t glob,
 
 	/** allocate a new oid on the next file or dir creation */
 	dfs->oid.lo = 0;
-	dfs->oid.hi = MAX_OID_HI;
+	dfs->oid.hi = dfs->last_hi;
 
 	rc = D_MUTEX_INIT(&dfs->lock, NULL);
 	if (rc != 0) {
diff --git a/src/include/daos_obj.h b/src/include/daos_obj.h
index 316d1b5547b..52b15ade40b 100644
--- a/src/include/daos_obj.h
+++ b/src/include/daos_obj.h
@@ -1,5 +1,5 @@
 /**
- * (C) Copyright 2015-2023 Intel Corporation.
+ * (C) Copyright 2015-2024 Intel Corporation.
  *
  * SPDX-License-Identifier: BSD-2-Clause-Patent
  */
@@ -564,6 +564,20 @@ daos_obj_generate_oid(daos_handle_t coh, daos_obj_id_t *oid,
 		      enum daos_otype_t type, daos_oclass_id_t cid,
 		      daos_oclass_hints_t hints, uint32_t args);
 
+/**
+ * This function, if called 2^32 times will set oid->hi to every unique 32-bit
+ * value. The caller is responsible for setting the initial value, tracking the
+ * final value, and avoiding any values that are otherwise reserved.
+ *
+ * \param[in, out]	oid	oid to cycle
+ */
+static inline void
+daos_obj_oid_cycle(daos_obj_id_t *oid)
+{
+	/** Uses a large prime number to guarantee hitting every unique value */
+	oid->hi = (oid->hi + 999999937) & UINT_MAX;
+}
+
 /**
  * Open an DAOS object.
  *

From 407199fd0069e1a513efef667a3cc1953ba9d54b Mon Sep 17 00:00:00 2001
From: Li Wei <wei.g.li@intel.com>
Date: Mon, 2 Sep 2024 10:52:00 +0900
Subject: [PATCH 05/21] DAOS-16251 dtx: Fix dtx_req_send user-after-free
 (#15035)

In dtx_req_send, since the crt_req_send releases the req reference, din
may have been freed when dereferenced for the DL_CDEBUG call.

Signed-off-by: Li Wei <wei.g.li@intel.com>
---
 src/dtx/dtx_rpc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/dtx/dtx_rpc.c b/src/dtx/dtx_rpc.c
index 1c0e73c9640..e654047a621 100644
--- a/src/dtx/dtx_rpc.c
+++ b/src/dtx/dtx_rpc.c
@@ -241,7 +241,7 @@ static int
 dtx_req_send(struct dtx_req_rec *drr, daos_epoch_t epoch)
 {
 	struct dtx_req_args	*dra = drr->drr_parent;
-	crt_rpc_t		*req;
+	crt_rpc_t		*req = NULL;
 	crt_endpoint_t		 tgt_ep;
 	crt_opcode_t		 opc;
 	struct dtx_in		*din = NULL;
@@ -282,12 +282,12 @@ dtx_req_send(struct dtx_req_rec *drr, daos_epoch_t epoch)
 		}
 
 		rc = crt_req_send(req, dtx_req_cb, drr);
+		/* CAUTION: req and din may have been freed. */
 	}
 
 	DL_CDEBUG(rc != 0, DLOG_ERR, DB_TRACE, rc,
 		  "DTX req for opc %x to %d/%d (req %p future %p) sent epoch "DF_X64,
-		  dra->dra_opc, drr->drr_rank, drr->drr_tag, req, dra->dra_future,
-		  din != NULL ? din->di_epoch : 0);
+		  dra->dra_opc, drr->drr_rank, drr->drr_tag, req, dra->dra_future, epoch);
 
 	if (rc != 0 && drr->drr_comp == 0) {
 		drr->drr_comp = 1;

From e6be2a6a33fdc8c18f20c017251284b609756435 Mon Sep 17 00:00:00 2001
From: Michael MacDonald <mjmac@google.com>
Date: Mon, 2 Sep 2024 09:15:58 -0400
Subject: [PATCH 06/21] DAOS-16304 tools: Add daos health net-test command
 (#14980)

Wrap self_test to provide a simplified network test
to detect obvious client/server connectivity and
performance problems.

Signed-off-by: Michael MacDonald <mjmac@google.com>
---
 src/control/SConscript                        |  10 +-
 src/control/cmd/daos/health.go                |  71 +++-
 src/control/cmd/daos/health_test.go           |  64 +++
 src/control/cmd/daos/main.go                  |   3 +-
 src/control/cmd/daos/pretty/selftest.go       | 262 +++++++++++++
 src/control/cmd/daos/pretty/selftest_test.go  | 371 ++++++++++++++++++
 src/control/cmd/daos/stubbed.go               |  15 +
 src/control/cmd/daos/system.go                |   2 +-
 src/control/cmd/daos/util.go                  |  26 +-
 src/control/cmd/dmg/pool.go                   |  43 +-
 src/control/cmd/dmg/pretty/telemetry.go       |  13 +-
 src/control/cmd/dmg/pretty/telemetry_test.go  | 109 ++---
 src/control/common/test/utils.go              |   9 +
 src/control/lib/control/telemetry.go          | 323 ++-------------
 src/control/lib/control/telemetry_test.go     | 184 ++++-----
 src/control/lib/daos/api/api.go               |  17 +-
 src/control/lib/daos/api/errors.go            |  13 +
 src/control/lib/daos/api/libdaos.go           |  38 ++
 src/control/lib/daos/api/libdaos_selftest.go  |  31 ++
 .../lib/daos/api/libdaos_selftest_stubs.go    | 170 ++++++++
 src/control/lib/daos/api/libdaos_stubs.go     | 116 ++++++
 src/control/lib/daos/api/provider.go          |   4 +-
 src/control/lib/daos/api/selftest.go          | 227 +++++++++++
 src/control/lib/daos/api/selftest_test.go     | 206 ++++++++++
 src/control/lib/daos/api/system.go            |  14 +-
 src/control/lib/daos/libgurt.go               |  22 ++
 src/control/lib/daos/libgurt_stubs.go         |  21 +
 src/control/lib/daos/logging.go               |  29 +-
 src/control/lib/daos/selftest.go              | 354 +++++++++++++++++
 src/control/lib/daos/selftest_test.go         | 295 ++++++++++++++
 src/control/lib/daos/status.go                |   4 +-
 src/control/lib/daos/telemetry.go             | 302 ++++++++++++++
 src/control/lib/daos/telemetry_test.go        | 246 ++++++++++++
 src/control/lib/ui/num_flags.go               |  75 ++++
 src/control/lib/ui/num_flags_test.go          | 139 +++++++
 src/control/run_go_tests.sh                   |   5 +-
 src/mgmt/cli_mgmt.c                           |   1 +
 src/utils/self_test/self_test.c               |   4 +-
 src/utils/self_test/self_test_lib.c           |  10 +
 src/utils/self_test/self_test_lib.h           |   2 +
 40 files changed, 3294 insertions(+), 556 deletions(-)
 create mode 100644 src/control/cmd/daos/health_test.go
 create mode 100644 src/control/cmd/daos/pretty/selftest.go
 create mode 100644 src/control/cmd/daos/pretty/selftest_test.go
 create mode 100644 src/control/cmd/daos/stubbed.go
 create mode 100644 src/control/lib/daos/api/errors.go
 create mode 100644 src/control/lib/daos/api/libdaos.go
 create mode 100644 src/control/lib/daos/api/libdaos_selftest.go
 create mode 100644 src/control/lib/daos/api/libdaos_selftest_stubs.go
 create mode 100644 src/control/lib/daos/api/libdaos_stubs.go
 create mode 100644 src/control/lib/daos/api/selftest.go
 create mode 100644 src/control/lib/daos/api/selftest_test.go
 create mode 100644 src/control/lib/daos/libgurt.go
 create mode 100644 src/control/lib/daos/libgurt_stubs.go
 create mode 100644 src/control/lib/daos/selftest.go
 create mode 100644 src/control/lib/daos/selftest_test.go
 create mode 100644 src/control/lib/daos/telemetry.go
 create mode 100644 src/control/lib/daos/telemetry_test.go
 create mode 100644 src/control/lib/ui/num_flags.go
 create mode 100644 src/control/lib/ui/num_flags_test.go

diff --git a/src/control/SConscript b/src/control/SConscript
index e8865f368bb..06410fee53a 100644
--- a/src/control/SConscript
+++ b/src/control/SConscript
@@ -106,7 +106,8 @@ def install_go_bin(env, name, libs=None, install_man=False):
         build_path = join('$BUILD_DIR/src/control', f'{name}.8')
         menv = env.Clone()
         # This runs code from the build area so needs LD_LIBRARY_PATH set.
-        menv.d_enable_ld_path(["cart", "gurt", "client/api", "common", "client/dfs", "utils"])
+        menv.d_enable_ld_path(["cart", "gurt", "client/api", "common", "client/dfs", "utils",
+                               "utils/self_test"])
         menv.Command(build_path, target, f'{gen_bin} manpage -o {build_path}')
         menv.Install('$PREFIX/share/man/man8', build_path)
 
@@ -151,9 +152,12 @@ def scons():
                              "-L$BUILD_DIR/src/cart "
                              "-L$BUILD_DIR/src/common "
                              "-L$BUILD_DIR/src/client/dfs "
-                             "-L$BUILD_DIR/src/utils $_RPATH")
+                             "-L$BUILD_DIR/src/utils "
+                             "-L$BUILD_DIR/src/utils/self_test "
+                             "$_RPATH")
         dbenv.AppendENVPath("CGO_LDFLAGS", dblibs, sep=" ")
-        install_go_bin(dbenv, 'daos', libs=['daos_cmd_hdlrs', 'dfs', 'duns', 'daos'],
+        install_go_bin(dbenv, 'daos', libs=['daos_cmd_hdlrs', 'dfs', 'duns', 'daos',
+                                            'daos_self_test'],
                        install_man=True)
 
     if not prereqs.server_requested():
diff --git a/src/control/cmd/daos/health.go b/src/control/cmd/daos/health.go
index cbc29c1e3ba..70e54213084 100644
--- a/src/control/cmd/daos/health.go
+++ b/src/control/cmd/daos/health.go
@@ -14,12 +14,16 @@ import (
 
 	"github.com/daos-stack/daos/src/control/build"
 	"github.com/daos-stack/daos/src/control/cmd/daos/pretty"
+	"github.com/daos-stack/daos/src/control/common/cmdutil"
 	"github.com/daos-stack/daos/src/control/lib/daos"
+	"github.com/daos-stack/daos/src/control/lib/ranklist"
+	"github.com/daos-stack/daos/src/control/lib/ui"
 	"github.com/daos-stack/daos/src/control/logging"
 )
 
 type healthCmds struct {
-	Check healthCheckCmd `command:"check" description:"Perform DAOS system health checks"`
+	Check   healthCheckCmd `command:"check" description:"Perform DAOS system health checks"`
+	NetTest netTestCmd     `command:"net-test" description:"Perform non-destructive DAOS networking tests"`
 }
 
 type healthCheckCmd struct {
@@ -68,7 +72,7 @@ func (cmd *healthCheckCmd) Execute([]string) error {
 		return err
 	}
 
-	sysInfo, err := cmd.apiProvider.GetSystemInfo()
+	sysInfo, err := cmd.apiProvider.GetSystemInfo(cmd.MustLogCtx())
 	if err != nil {
 		cmd.Errorf("failed to query system information: %v", err)
 	}
@@ -166,3 +170,66 @@ func (cmd *healthCheckCmd) Execute([]string) error {
 
 	return nil
 }
+
+type netTestCmd struct {
+	cmdutil.JSONOutputCmd
+	cmdutil.LogCmd
+	sysCmd
+	Ranks       ui.RankSetFlag  `short:"r" long:"ranks" description:"Use the specified ranks as test endpoints (default: all)"`
+	Tags        ui.RankSetFlag  `short:"t" long:"tags" description:"Use the specified tags on ranks" default:"0"`
+	XferSize    ui.ByteSizeFlag `short:"s" long:"size" description:"Per-RPC transfer size (send/reply)"`
+	MaxInflight uint            `short:"m" long:"max-inflight" description:"Maximum number of inflight RPCs"`
+	RepCount    uint            `short:"c" long:"rep-count" description:"Number of times to repeat the RPCs, per endpoint"`
+	TpsBytes    bool            `short:"y" long:"bytes" description:"Show throughput values in bytes per second"`
+	Verbose     bool            `short:"v" long:"verbose" description:"Display more detailed DAOS network testing information"`
+}
+
+func (cmd *netTestCmd) Execute(_ []string) error {
+	cfg := &daos.SelfTestConfig{
+		GroupName:       cmd.SysName,
+		EndpointRanks:   cmd.Ranks.Ranks(),
+		EndpointTags:    ranklist.RanksToUint32(cmd.Tags.Ranks()),
+		MaxInflightRPCs: cmd.MaxInflight,
+		Repetitions:     cmd.RepCount,
+	}
+	if cmd.XferSize.IsSet() {
+		// If set, use that size, otherwise use the zero value.
+		cfg.SendSizes = []uint64{cmd.XferSize.Bytes}
+		cfg.ReplySizes = cfg.SendSizes
+	}
+	if err := cfg.SetDefaults(); err != nil {
+		return err
+	}
+
+	if !cmd.JSONOutputEnabled() {
+		var cfgBuf strings.Builder
+		if err := pretty.PrintSelfTestConfig(&cfgBuf, cfg, cmd.Verbose); err != nil {
+			return err
+		}
+		cmd.Info(cfgBuf.String())
+		cmd.Info("Starting non-destructive network test (duration depends on performance)...\n\n")
+	}
+
+	res, err := RunSelfTest(cmd.MustLogCtx(), cfg)
+	if err != nil {
+		return err
+	}
+
+	if cmd.JSONOutputEnabled() {
+		return cmd.OutputJSON(struct {
+			Cfg *daos.SelfTestConfig   `json:"configuration"`
+			Res []*daos.SelfTestResult `json:"results"`
+		}{
+			Cfg: cfg,
+			Res: res,
+		}, nil)
+	}
+
+	var resBuf strings.Builder
+	if err := pretty.PrintSelfTestResults(&resBuf, res, cmd.Verbose, cmd.TpsBytes); err != nil {
+		return err
+	}
+	cmd.Info(resBuf.String())
+
+	return nil
+}
diff --git a/src/control/cmd/daos/health_test.go b/src/control/cmd/daos/health_test.go
new file mode 100644
index 00000000000..95565b36bb2
--- /dev/null
+++ b/src/control/cmd/daos/health_test.go
@@ -0,0 +1,64 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package main
+
+import (
+	"context"
+	"testing"
+
+	"github.com/dustin/go-humanize"
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
+
+	"github.com/daos-stack/daos/src/control/common/cmdutil"
+	"github.com/daos-stack/daos/src/control/common/test"
+	"github.com/daos-stack/daos/src/control/lib/daos"
+	"github.com/daos-stack/daos/src/control/lib/ranklist"
+	"github.com/daos-stack/daos/src/control/lib/ui"
+	"github.com/daos-stack/daos/src/control/logging"
+)
+
+func RunSelfTest(ctx context.Context, cfg *daos.SelfTestConfig) ([]*daos.SelfTestResult, error) {
+	return []*daos.SelfTestResult{}, nil
+}
+
+func TestDaos_netTestCmdExecute(t *testing.T) {
+	// Quickie smoke test for the UI -- will flesh out later.
+	var opts cliOptions
+	log, buf := logging.NewTestLogger(t.Name())
+	defer test.ShowBufferOnFailure(t, buf)
+	args := []string{
+		"health", "net-test",
+		"--ranks", "0-3",
+		"--tags", "4-9",
+		"--size", "20 MiB",
+		"--rep-count", "2222",
+		"--bytes", "--verbose",
+	}
+	expArgs := netTestCmd{}
+	expArgs.Ranks.Replace(ranklist.MustCreateRankSet("0-3"))
+	expArgs.Tags.Replace(ranklist.MustCreateRankSet("4-9"))
+	expArgs.XferSize.Bytes = 20 * humanize.MiByte
+	expArgs.RepCount = 2222
+	expArgs.Verbose = true
+	expArgs.TpsBytes = true
+
+	if err := parseOpts(args, &opts, log); err != nil {
+		t.Fatal(err)
+	}
+	cmpOpts := cmp.Options{
+		cmpopts.IgnoreUnexported(netTestCmd{}),
+		cmp.Comparer(func(a, b ranklist.RankSet) bool {
+			return a.String() == b.String()
+		}),
+		cmp.Comparer(func(a, b ui.ByteSizeFlag) bool {
+			return a.String() == b.String()
+		}),
+		cmpopts.IgnoreTypes(cmdutil.LogCmd{}, cmdutil.JSONOutputCmd{}),
+	}
+	test.CmpAny(t, "health net-test args", expArgs, opts.Health.NetTest, cmpOpts...)
+}
diff --git a/src/control/cmd/daos/main.go b/src/control/cmd/daos/main.go
index 5d6886cb2e5..95e13585340 100644
--- a/src/control/cmd/daos/main.go
+++ b/src/control/cmd/daos/main.go
@@ -20,6 +20,7 @@ import (
 	"github.com/daos-stack/daos/src/control/common/cmdutil"
 	"github.com/daos-stack/daos/src/control/fault"
 	"github.com/daos-stack/daos/src/control/lib/atm"
+	"github.com/daos-stack/daos/src/control/lib/daos"
 	"github.com/daos-stack/daos/src/control/logging"
 )
 
@@ -182,7 +183,7 @@ or query/manage an object inside a container.`
 	// Initialize the daos debug system first so that
 	// any allocations made as part of argument parsing
 	// are logged when running under NLT.
-	debugFini, err := initDaosDebug()
+	debugFini, err := daos.InitLogging(daos.UnsetLogMask)
 	if err != nil {
 		exitWithError(log, err)
 	}
diff --git a/src/control/cmd/daos/pretty/selftest.go b/src/control/cmd/daos/pretty/selftest.go
new file mode 100644
index 00000000000..9a18dc0cb44
--- /dev/null
+++ b/src/control/cmd/daos/pretty/selftest.go
@@ -0,0 +1,262 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package pretty
+
+import (
+	"fmt"
+	"io"
+	"sort"
+
+	"github.com/pkg/errors"
+
+	"github.com/daos-stack/daos/src/control/lib/daos"
+	"github.com/daos-stack/daos/src/control/lib/hostlist"
+	"github.com/daos-stack/daos/src/control/lib/ranklist"
+	"github.com/daos-stack/daos/src/control/lib/txtfmt"
+	"github.com/daos-stack/daos/src/control/lib/ui"
+)
+
+type timeUnit uint64
+
+const (
+	ns timeUnit = 0
+	us timeUnit = 1000
+	ms timeUnit = 1000 * 1000
+	s  timeUnit = 1000 * 1000 * 1000
+)
+
+func (u timeUnit) String() string {
+	switch u {
+	case ns:
+		return "ns"
+	case us:
+		return "μs"
+	case ms:
+		return "ms"
+	case s:
+		return "s"
+	default:
+		return "unknown"
+	}
+}
+
+func printLatencyVal(val float64, u timeUnit) string {
+	return fmt.Sprintf("%.02f%s", val/float64(u), u)
+}
+
+// PrintSelfTestResult generates a human-readable representation of the supplied
+// daos.SelfTestResult struct and writes it to the supplied io.Writer.
+func PrintSelfTestResult(out io.Writer, result *daos.SelfTestResult, verbose, showBytes bool) error {
+	if result == nil {
+		return errors.Errorf("nil %T", result)
+	}
+
+	rpcThroughput := float64(result.MasterLatency.Succeeded()) / result.Duration.Seconds()
+
+	epRanks := ranklist.NewRankSet()
+	epTgts := hostlist.NewNumericSet()
+	for _, ep := range result.TargetEndpoints {
+		epRanks.Add(ep.Rank)
+		epTgts.Add(uint(ep.Tag))
+	}
+	srvEpTitle := "Server Endpoint"
+	if epRanks.Count() > 1 {
+		srvEpTitle += "s"
+	}
+	summary := []txtfmt.TableRow{
+		{srvEpTitle: epRanks.RangedString() + ":" + epTgts.RangedString()},
+		{"RPC Throughput": fmt.Sprintf("%.02f RPC/s", rpcThroughput)},
+	}
+	if result.SendSize > 0 || result.ReplySize > 0 {
+		suffix := "B/s"
+		bw := rpcThroughput * (float64(result.SendSize) + float64(result.ReplySize))
+		if !showBytes {
+			bw *= 8
+			suffix = "bps"
+		}
+		summary = append(summary, txtfmt.TableRow{
+			"RPC Bandwidth": ui.FmtHumanSize(bw, suffix, false),
+		})
+	}
+	_, masterBuckets := result.MasterLatency.Percentiles()
+	summary = append(summary, txtfmt.TableRow{
+		"Average Latency": printLatencyVal(float64(result.MasterLatency.Average()), ms),
+	})
+	if l, found := masterBuckets[95]; found {
+		summary = append(summary, txtfmt.TableRow{
+			"95% Latency": printLatencyVal(l.UpperBound, ms),
+		})
+	}
+	if l, found := masterBuckets[99]; found {
+		summary = append(summary, txtfmt.TableRow{
+			"99% Latency": printLatencyVal(l.UpperBound, ms),
+		})
+	}
+	if verbose {
+		summary = append(summary, []txtfmt.TableRow{
+			{"Client Endpoint": result.MasterEndpoint.String()},
+			{"Duration": result.Duration.String()},
+			{"Repetitions": fmt.Sprintf("%d", result.Repetitions)},
+			{"Send Size": ui.FmtHumanSize(float64(result.SendSize), "B", true)},
+			{"Reply Size": ui.FmtHumanSize(float64(result.ReplySize), "B", true)},
+		}...)
+	}
+	if result.MasterLatency.FailCount > 0 {
+		failPct := (float64(result.MasterLatency.FailCount) / float64(result.Repetitions)) * 100
+		summary = append(summary, txtfmt.TableRow{
+			"Failed RPCs": fmt.Sprintf("%d (%.01f%%)", result.MasterLatency.FailCount, failPct),
+		})
+	}
+	ef := txtfmt.NewEntityFormatter("Client/Server Network Test Summary", 2)
+	fmt.Fprintln(out, ef.Format(summary))
+
+	if !verbose {
+		return nil
+	}
+
+	fmt.Fprintln(out, "Per-Target Latency Results")
+	iw := txtfmt.NewIndentWriter(out)
+
+	var hasFailed bool
+	dispUnit := ms // TODO: Calculate based on average value?
+	pctTitles := make(map[uint64]string)
+	var table []txtfmt.TableRow
+	for _, ep := range result.TargetEndpoints {
+		el, found := result.TargetLatencies[ep]
+		if !found {
+			continue
+		}
+
+		if el.FailCount > 0 {
+			hasFailed = true
+		}
+		pcts, buckets := el.Percentiles()
+
+		row := txtfmt.TableRow{
+			"Target": ep.String(),
+			"Min":    printLatencyVal(float64(el.Min), dispUnit),
+			"Max":    printLatencyVal(float64(el.Max), dispUnit),
+			"Failed": fmt.Sprintf("%.01f%%", float64(el.FailCount)/float64(el.TotalRPCs)*100),
+		}
+		if verbose {
+			row["Average"] = printLatencyVal(float64(el.Average()), dispUnit)
+			row["StdDev"] = printLatencyVal(el.StdDev(), dispUnit)
+		}
+
+		for _, pct := range pcts {
+			pctTitles[pct] = fmt.Sprintf("%d%%", pct)
+			row[pctTitles[pct]] = printLatencyVal(buckets[pct].UpperBound, dispUnit)
+		}
+
+		table = append(table, row)
+	}
+
+	var pctKeys []uint64
+	for key := range pctTitles {
+		pctKeys = append(pctKeys, key)
+	}
+	sort.Slice(pctKeys, func(a, b int) bool {
+		return pctKeys[a] < pctKeys[b]
+	})
+	titles := []string{"Target", "Min"}
+	for _, key := range pctKeys {
+		titles = append(titles, pctTitles[key])
+	}
+	titles = append(titles, "Max")
+	if verbose {
+		titles = append(titles, "Average")
+		titles = append(titles, "StdDev")
+	}
+	if hasFailed {
+		titles = append(titles, "Failed")
+	}
+	tf := txtfmt.NewTableFormatter(titles...)
+	tf.InitWriter(iw)
+	tf.Format(table)
+
+	return nil
+}
+
+// PrintSelfTestResults generates a human-readable representation of the supplied
+// slice of daos.SelfTestResult structs and writes it to the supplied io.Writer.
+func PrintSelfTestResults(out io.Writer, results []*daos.SelfTestResult, verbose, showBytes bool) error {
+	if len(results) == 0 {
+		fmt.Fprintln(out, "No test results.")
+	}
+	if len(results) > 1 {
+		fmt.Fprintf(out, "Showing %d self test results:\n", len(results))
+		out = txtfmt.NewIndentWriter(out)
+	}
+	for _, res := range results {
+		if err := PrintSelfTestResult(out, res, verbose, showBytes); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// PrintSelfTestConfig generates a human-readable representation of the self_test configuration.
+func PrintSelfTestConfig(out io.Writer, cfg *daos.SelfTestConfig, verbose bool) error {
+	if cfg == nil {
+		return errors.Errorf("nil %T", cfg)
+	}
+
+	srvRow := func(r []ranklist.Rank) txtfmt.TableRow {
+		srvTitle := "Server"
+		if len(r) == 1 {
+			return txtfmt.TableRow{srvTitle: fmt.Sprintf("%d", r[0])}
+		}
+		srvTitle += "s"
+		if len(r) == 0 {
+			return txtfmt.TableRow{srvTitle: "All"}
+		}
+		return txtfmt.TableRow{srvTitle: ranklist.RankSetFromRanks(r).RangedString()}
+	}
+	rpcSizeRow := func(dir string, sizes []uint64) txtfmt.TableRow {
+		title := fmt.Sprintf("%s RPC Size", dir)
+		if len(sizes) == 0 {
+			return txtfmt.TableRow{title: "None"}
+		} else if len(sizes) == 1 {
+			return txtfmt.TableRow{title: ui.FmtHumanSize(float64(sizes[0]), "B", true)}
+		}
+		sizeStrs := make([]string, len(sizes))
+		for i, size := range sizes {
+			sizeStrs[i] = ui.FmtHumanSize(float64(size), "B", true)
+		}
+		return txtfmt.TableRow{title + "s": fmt.Sprintf("%v", sizeStrs)}
+	}
+	cfgRows := []txtfmt.TableRow{
+		srvRow(cfg.EndpointRanks),
+		rpcSizeRow("Send", cfg.SendSizes),
+		rpcSizeRow("Reply", cfg.ReplySizes),
+		{"RPCs Per Server": fmt.Sprintf("%d", cfg.Repetitions)},
+	}
+	if verbose {
+		tagRow := func(t []uint32) txtfmt.TableRow {
+			tagTitle := "Tag"
+			if len(t) == 1 {
+				return txtfmt.TableRow{tagTitle: fmt.Sprintf("%d", t[0])}
+			}
+			tagTitle += "s"
+			if len(t) == 0 {
+				return txtfmt.TableRow{tagTitle: "ERROR (0 tags)"} // Can't(?) happen...
+			}
+			return txtfmt.TableRow{tagTitle: ranklist.RankSetFromRanks(ranklist.RanksFromUint32(t)).RangedString()}
+		}
+		cfgRows = append(cfgRows, []txtfmt.TableRow{
+			{"System Name": cfg.GroupName},
+			tagRow(cfg.EndpointTags),
+			{"Max In-Flight RPCs": fmt.Sprintf("%d", cfg.MaxInflightRPCs)},
+		}...)
+	}
+
+	ef := txtfmt.NewEntityFormatter("Client/Server Network Test Parameters", 2)
+	fmt.Fprintln(out, ef.Format(cfgRows))
+
+	return nil
+}
diff --git a/src/control/cmd/daos/pretty/selftest_test.go b/src/control/cmd/daos/pretty/selftest_test.go
new file mode 100644
index 00000000000..8ae57f4347a
--- /dev/null
+++ b/src/control/cmd/daos/pretty/selftest_test.go
@@ -0,0 +1,371 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package pretty_test
+
+import (
+	"errors"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/daos-stack/daos/src/control/cmd/daos/pretty"
+	"github.com/daos-stack/daos/src/control/common/test"
+	"github.com/daos-stack/daos/src/control/lib/daos"
+	"github.com/daos-stack/daos/src/control/lib/ranklist"
+)
+
+func TestPretty_PrintSelfTestConfig(t *testing.T) {
+	genCfg := func(xfrm func(cfg *daos.SelfTestConfig)) *daos.SelfTestConfig {
+		cfg := &daos.SelfTestConfig{}
+		cfg.SetDefaults()
+		if xfrm != nil {
+			xfrm(cfg)
+		}
+		return cfg
+	}
+	for name, tc := range map[string]struct {
+		cfg     *daos.SelfTestConfig
+		verbose bool
+		expStr  string
+		expErr  error
+	}{
+		"nil": {
+			expErr: errors.New("nil"),
+		},
+		"defaults": {
+			cfg: genCfg(nil),
+			expStr: `
+Client/Server Network Test Parameters
+-------------------------------------
+  Servers        : All     
+  Send RPC Size  : 1.00 MiB
+  Reply RPC Size : 1.00 MiB
+  RPCs Per Server: 10000   
+
+`,
+		},
+		"single server": {
+			cfg: genCfg(func(cfg *daos.SelfTestConfig) {
+				cfg.EndpointRanks = []ranklist.Rank{1}
+			}),
+			expStr: `
+Client/Server Network Test Parameters
+-------------------------------------
+  Server         : 1       
+  Send RPC Size  : 1.00 MiB
+  Reply RPC Size : 1.00 MiB
+  RPCs Per Server: 10000   
+
+`,
+		},
+		"custom": {
+			cfg: genCfg(func(cfg *daos.SelfTestConfig) {
+				cfg.EndpointRanks = []ranklist.Rank{0, 1, 2}
+				cfg.SendSizes = []uint64{1024, 1024 * 1024}
+				cfg.ReplySizes = []uint64{2048 * 1024, 2048 * 1024 * 1024}
+			}),
+			expStr: `
+Client/Server Network Test Parameters
+-------------------------------------
+  Servers        : [0-2]              
+  Send RPC Sizes : [1.00 KiB 1.00 MiB]
+  Reply RPC Sizes: [2.00 MiB 2.00 GiB]
+  RPCs Per Server: 10000              
+
+`,
+		},
+		"defaults - verbose": {
+			cfg:     genCfg(nil),
+			verbose: true,
+			expStr: `
+Client/Server Network Test Parameters
+-------------------------------------
+  Servers           : All        
+  Send RPC Size     : 1.00 MiB   
+  Reply RPC Size    : 1.00 MiB   
+  RPCs Per Server   : 10000      
+  System Name       : daos_server
+  Tag               : 0          
+  Max In-Flight RPCs: 16         
+
+`,
+		},
+		"custom - verbose": {
+			cfg: genCfg(func(cfg *daos.SelfTestConfig) {
+				cfg.EndpointRanks = []ranklist.Rank{0, 1, 2}
+				cfg.EndpointTags = []uint32{0, 1, 2}
+				cfg.SendSizes = []uint64{1024, 1024 * 1024}
+				cfg.ReplySizes = []uint64{2048 * 1024, 2048 * 1024 * 1024}
+			}),
+			verbose: true,
+			expStr: `
+Client/Server Network Test Parameters
+-------------------------------------
+  Servers           : [0-2]              
+  Send RPC Sizes    : [1.00 KiB 1.00 MiB]
+  Reply RPC Sizes   : [2.00 MiB 2.00 GiB]
+  RPCs Per Server   : 10000              
+  System Name       : daos_server        
+  Tags              : [0-2]              
+  Max In-Flight RPCs: 16                 
+
+`,
+		},
+		"no sizes?": {
+			cfg: genCfg(func(cfg *daos.SelfTestConfig) {
+				cfg.SendSizes = []uint64{}
+				cfg.ReplySizes = []uint64{}
+			}),
+			verbose: true,
+			expStr: `
+Client/Server Network Test Parameters
+-------------------------------------
+  Servers           : All        
+  Send RPC Size     : None       
+  Reply RPC Size    : None       
+  RPCs Per Server   : 10000      
+  System Name       : daos_server
+  Tag               : 0          
+  Max In-Flight RPCs: 16         
+
+`,
+		},
+		"no targets?": {
+			cfg: genCfg(func(cfg *daos.SelfTestConfig) {
+				cfg.EndpointTags = []uint32{}
+			}),
+			verbose: true,
+			expStr: `
+Client/Server Network Test Parameters
+-------------------------------------
+  Servers           : All           
+  Send RPC Size     : 1.00 MiB      
+  Reply RPC Size    : 1.00 MiB      
+  RPCs Per Server   : 10000         
+  System Name       : daos_server   
+  Tags              : ERROR (0 tags)
+  Max In-Flight RPCs: 16            
+
+`,
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			var bld strings.Builder
+			gotErr := pretty.PrintSelfTestConfig(&bld, tc.cfg, tc.verbose)
+			test.CmpErr(t, tc.expErr, gotErr)
+			if tc.expErr != nil {
+				return
+			}
+
+			test.CmpAny(t, "Config Output", strings.TrimLeft(tc.expStr, "\n"), bld.String())
+		})
+	}
+}
+
+func genResult(xfrm func(result *daos.SelfTestResult)) *daos.SelfTestResult {
+	cfg := &daos.SelfTestConfig{}
+	cfg.SetDefaults()
+	result := &daos.SelfTestResult{
+		MasterEndpoint: daos.SelfTestEndpoint{Rank: 3, Tag: 0},
+		TargetEndpoints: []daos.SelfTestEndpoint{
+			{Rank: 0, Tag: 0},
+			{Rank: 1, Tag: 0},
+			{Rank: 2, Tag: 0},
+		},
+		Repetitions:     cfg.Repetitions * 3,
+		SendSize:        cfg.SendSizes[0],
+		ReplySize:       cfg.ReplySizes[0],
+		BufferAlignment: cfg.BufferAlignment,
+		Duration:        8500 * time.Millisecond,
+		MasterLatency:   &daos.EndpointLatency{},
+	}
+	for i := int64(1); i <= int64(result.Repetitions); i++ {
+		result.MasterLatency.AddValue(i * 1000)
+		result.AddTargetLatency(ranklist.Rank(i%3), 0, i*1000)
+	}
+	if xfrm != nil {
+		xfrm(result)
+	}
+	return result
+}
+
+func TestPrettyPrintSelfTestResult(t *testing.T) {
+	for name, tc := range map[string]struct {
+		result    *daos.SelfTestResult
+		verbose   bool
+		showBytes bool
+		expStr    string
+		expErr    error
+	}{
+		"nil": {
+			expErr: errors.New("nil"),
+		},
+		"non-verbose, bps": {
+			result: genResult(nil),
+			expStr: `
+Client/Server Network Test Summary
+----------------------------------
+  Server Endpoints: [0-2]:0      
+  RPC Throughput  : 3529.41 RPC/s
+  RPC Bandwidth   : 59.21 Gbps   
+  Average Latency : 15.00ms      
+  95% Latency     : 28.50ms      
+  99% Latency     : 29.70ms      
+
+`,
+		},
+		"non-verbose, bytes": {
+			result:    genResult(nil),
+			showBytes: true,
+			expStr: `
+Client/Server Network Test Summary
+----------------------------------
+  Server Endpoints: [0-2]:0      
+  RPC Throughput  : 3529.41 RPC/s
+  RPC Bandwidth   : 7.40 GB/s    
+  Average Latency : 15.00ms      
+  95% Latency     : 28.50ms      
+  99% Latency     : 29.70ms      
+
+`,
+		},
+		"verbose, bps": {
+			result:  genResult(nil),
+			verbose: true,
+			expStr: `
+Client/Server Network Test Summary
+----------------------------------
+  Server Endpoints: [0-2]:0      
+  RPC Throughput  : 3529.41 RPC/s
+  RPC Bandwidth   : 59.21 Gbps   
+  Average Latency : 15.00ms      
+  95% Latency     : 28.50ms      
+  99% Latency     : 29.70ms      
+  Client Endpoint : 3:0          
+  Duration        : 8.5s         
+  Repetitions     : 30000        
+  Send Size       : 1.00 MiB     
+  Reply Size      : 1.00 MiB     
+
+Per-Target Latency Results
+  Target Min    50%     75%     90%     95%     99%     Max     Average StdDev 
+  ------ ---    ---     ---     ---     ---     ---     ---     ------- ------ 
+  0:0    0.00ms 15.00ms 22.50ms 27.00ms 28.50ms 29.70ms 30.00ms 15.00ms 8.66ms 
+  1:0    0.00ms 15.00ms 22.50ms 27.00ms 28.50ms 29.70ms 30.00ms 15.00ms 8.66ms 
+  2:0    0.00ms 15.00ms 22.50ms 27.00ms 28.50ms 29.70ms 30.00ms 15.00ms 8.66ms 
+`,
+		},
+		"verbose with failures, bytes": {
+			result: genResult(func(res *daos.SelfTestResult) {
+				for i := int64(1); i <= int64(res.Repetitions/4); i++ {
+					res.MasterLatency.AddValue(-1)
+					res.AddTargetLatency(ranklist.Rank(i%3), 0, -1)
+				}
+			}),
+			verbose:   true,
+			showBytes: true,
+			expStr: `
+Client/Server Network Test Summary
+----------------------------------
+  Server Endpoints: [0-2]:0      
+  RPC Throughput  : 3529.41 RPC/s
+  RPC Bandwidth   : 7.40 GB/s    
+  Average Latency : 15.00ms      
+  95% Latency     : 28.50ms      
+  99% Latency     : 29.70ms      
+  Client Endpoint : 3:0          
+  Duration        : 8.5s         
+  Repetitions     : 30000        
+  Send Size       : 1.00 MiB     
+  Reply Size      : 1.00 MiB     
+  Failed RPCs     : 7500 (25.0%) 
+
+Per-Target Latency Results
+  Target Min    50%     75%     90%     95%     99%     Max     Average StdDev Failed 
+  ------ ---    ---     ---     ---     ---     ---     ---     ------- ------ ------ 
+  0:0    0.00ms 15.00ms 22.50ms 27.00ms 28.50ms 29.70ms 30.00ms 15.00ms 8.66ms 20.0%  
+  1:0    0.00ms 15.00ms 22.50ms 27.00ms 28.50ms 29.70ms 30.00ms 15.00ms 8.66ms 20.0%  
+  2:0    0.00ms 15.00ms 22.50ms 27.00ms 28.50ms 29.70ms 30.00ms 15.00ms 8.66ms 20.0%  
+`,
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			var bld strings.Builder
+			gotErr := pretty.PrintSelfTestResult(&bld, tc.result, tc.verbose, tc.showBytes)
+			test.CmpErr(t, tc.expErr, gotErr)
+			if tc.expErr != nil {
+				return
+			}
+
+			test.CmpAny(t, "Self Test Result", strings.TrimLeft(tc.expStr, "\n"), bld.String())
+		})
+	}
+}
+
+func TestPretty_PrintSelfTestResults(t *testing.T) {
+	for name, tc := range map[string]struct {
+		results []*daos.SelfTestResult
+		verbose bool
+		expStr  string
+		expErr  error
+	}{
+		"zero results": {
+			expStr: `
+No test results.
+`,
+		},
+		"one result": {
+			results: []*daos.SelfTestResult{genResult(nil)},
+			expStr: `
+Client/Server Network Test Summary
+----------------------------------
+  Server Endpoints: [0-2]:0      
+  RPC Throughput  : 3529.41 RPC/s
+  RPC Bandwidth   : 59.21 Gbps   
+  Average Latency : 15.00ms      
+  95% Latency     : 28.50ms      
+  99% Latency     : 29.70ms      
+
+`,
+		},
+		"two results": {
+			results: []*daos.SelfTestResult{genResult(nil), genResult(nil)},
+			expStr: `
+Showing 2 self test results:
+  Client/Server Network Test Summary
+  ----------------------------------
+    Server Endpoints: [0-2]:0      
+    RPC Throughput  : 3529.41 RPC/s
+    RPC Bandwidth   : 59.21 Gbps   
+    Average Latency : 15.00ms      
+    95% Latency     : 28.50ms      
+    99% Latency     : 29.70ms      
+
+  Client/Server Network Test Summary
+  ----------------------------------
+    Server Endpoints: [0-2]:0      
+    RPC Throughput  : 3529.41 RPC/s
+    RPC Bandwidth   : 59.21 Gbps   
+    Average Latency : 15.00ms      
+    95% Latency     : 28.50ms      
+    99% Latency     : 29.70ms      
+
+`,
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			var bld strings.Builder
+			gotErr := pretty.PrintSelfTestResults(&bld, tc.results, tc.verbose, false)
+			test.CmpErr(t, tc.expErr, gotErr)
+			if tc.expErr != nil {
+				return
+			}
+
+			test.CmpAny(t, "Config Output", strings.TrimLeft(tc.expStr, "\n"), bld.String())
+		})
+	}
+}
diff --git a/src/control/cmd/daos/stubbed.go b/src/control/cmd/daos/stubbed.go
new file mode 100644
index 00000000000..4a08ad77255
--- /dev/null
+++ b/src/control/cmd/daos/stubbed.go
@@ -0,0 +1,15 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build !test_stubs
+// +build !test_stubs
+
+package main
+
+import "github.com/daos-stack/daos/src/control/lib/daos/api"
+
+var (
+	RunSelfTest = api.RunSelfTest
+)
diff --git a/src/control/cmd/daos/system.go b/src/control/cmd/daos/system.go
index bdb189494f8..b0deb41f3a8 100644
--- a/src/control/cmd/daos/system.go
+++ b/src/control/cmd/daos/system.go
@@ -19,7 +19,7 @@ type systemQueryCmd struct {
 }
 
 func (cmd *systemQueryCmd) Execute(_ []string) error {
-	sysInfo, err := cmd.apiProvider.GetSystemInfo()
+	sysInfo, err := cmd.apiProvider.GetSystemInfo(cmd.MustLogCtx())
 	if err != nil {
 		return errors.Wrap(err, "failed to query DAOS system")
 	}
diff --git a/src/control/cmd/daos/util.go b/src/control/cmd/daos/util.go
index 1800a130e2a..d5b128bf9a4 100644
--- a/src/control/cmd/daos/util.go
+++ b/src/control/cmd/daos/util.go
@@ -275,20 +275,24 @@ type daosCaller interface {
 	initDAOS() (func(), error)
 }
 
+type sysCmd struct {
+	SysName string
+}
+
+func (sc *sysCmd) setSysName(sysName string) {
+	sc.SysName = sysName
+}
+
 type daosCmd struct {
 	cmdutil.NoArgsCmd
 	cmdutil.JSONOutputCmd
 	cmdutil.LogCmd
+	sysCmd
 	apiProvider *api.Provider
-	SysName     string
-}
-
-func (dc *daosCmd) setSysName(sysName string) {
-	dc.SysName = sysName
 }
 
 func (dc *daosCmd) initDAOS() (func(), error) {
-	provider, err := api.NewProvider(dc.Logger)
+	provider, err := api.NewProvider(dc.Logger, false)
 	if err != nil {
 		return func() {}, err
 	}
@@ -297,16 +301,6 @@ func (dc *daosCmd) initDAOS() (func(), error) {
 	return provider.Cleanup, nil
 }
 
-func initDaosDebug() (func(), error) {
-	if rc := C.daos_debug_init(nil); rc != 0 {
-		return nil, errors.Wrap(daosError(rc), "daos_debug_init() failed")
-	}
-
-	return func() {
-		C.daos_debug_fini()
-	}, nil
-}
-
 func resolveDunsPath(path string, ap *C.struct_cmd_args_s) error {
 	if path == "" {
 		return errors.New("empty path")
diff --git a/src/control/cmd/dmg/pool.go b/src/control/cmd/dmg/pool.go
index cbd1209a41b..d7d267fbbe3 100644
--- a/src/control/cmd/dmg/pool.go
+++ b/src/control/cmd/dmg/pool.go
@@ -120,33 +120,8 @@ func (trf *tierRatioFlag) UnmarshalFlag(fv string) error {
 	return nil
 }
 
-type sizeFlag struct {
-	bytes uint64
-}
-
-func (sf sizeFlag) IsSet() bool {
-	return sf.bytes > 0
-}
-
-func (sf sizeFlag) String() string {
-	return humanize.Bytes(sf.bytes)
-}
-
-func (sf *sizeFlag) UnmarshalFlag(fv string) (err error) {
-	if fv == "" {
-		return errors.New("no size specified")
-	}
-
-	sf.bytes, err = humanize.ParseBytes(fv)
-	if err != nil {
-		return errors.Errorf("invalid size %q", fv)
-	}
-
-	return nil
-}
-
 type poolSizeFlag struct {
-	sizeFlag
+	ui.ByteSizeFlag
 	availRatio uint64
 }
 
@@ -155,7 +130,7 @@ func (psf poolSizeFlag) IsRatio() bool {
 }
 
 func (psf poolSizeFlag) IsSet() bool {
-	return psf.sizeFlag.IsSet() || psf.IsRatio()
+	return psf.ByteSizeFlag.IsSet() || psf.IsRatio()
 }
 
 func (psf poolSizeFlag) String() string {
@@ -163,7 +138,7 @@ func (psf poolSizeFlag) String() string {
 		return fmt.Sprintf("%d%%", psf.availRatio)
 	}
 
-	return psf.sizeFlag.String()
+	return psf.ByteSizeFlag.String()
 }
 
 func (psf *poolSizeFlag) UnmarshalFlag(fv string) error {
@@ -182,7 +157,7 @@ func (psf *poolSizeFlag) UnmarshalFlag(fv string) error {
 		return nil
 	}
 
-	return psf.sizeFlag.UnmarshalFlag(fv)
+	return psf.ByteSizeFlag.UnmarshalFlag(fv)
 }
 
 // PoolCreateCmd is the struct representing the command to create a DAOS pool.
@@ -199,8 +174,8 @@ type PoolCreateCmd struct {
 	TierRatio  tierRatioFlag       `short:"t" long:"tier-ratio" description:"Percentage of storage tiers for pool storage (auto; default: 6,94)"`
 	NumRanks   uint32              `short:"k" long:"nranks" description:"Number of ranks to use (auto)"`
 	NumSvcReps uint32              `short:"v" long:"nsvc" description:"Number of pool service replicas"`
-	ScmSize    sizeFlag            `short:"s" long:"scm-size" description:"Per-engine SCM allocation for DAOS pool (manual)"`
-	NVMeSize   sizeFlag            `short:"n" long:"nvme-size" description:"Per-engine NVMe allocation for DAOS pool (manual)"`
+	ScmSize    ui.ByteSizeFlag     `short:"s" long:"scm-size" description:"Per-engine SCM allocation for DAOS pool (manual)"`
+	NVMeSize   ui.ByteSizeFlag     `short:"n" long:"nvme-size" description:"Per-engine NVMe allocation for DAOS pool (manual)"`
 	RankList   ui.RankSetFlag      `short:"r" long:"ranks" description:"Storage engine unique identifiers (ranks) for DAOS pool"`
 
 	Args struct {
@@ -259,7 +234,7 @@ func (cmd *PoolCreateCmd) storageAutoTotal(req *control.PoolCreateReq) error {
 
 	req.NumRanks = cmd.NumRanks
 	req.TierRatio = cmd.TierRatio.Ratios()
-	req.TotalBytes = cmd.Size.bytes
+	req.TotalBytes = cmd.Size.Bytes
 
 	scmPercentage := ratio2Percentage(cmd.Logger, req.TierRatio[0], req.TierRatio[1])
 	msg := fmt.Sprintf("Creating DAOS pool with automatic storage allocation: "+
@@ -280,8 +255,8 @@ func (cmd *PoolCreateCmd) storageManual(req *control.PoolCreateReq) error {
 		return errIncompatFlags("tier-ratio", "scm-size")
 	}
 
-	scmBytes := cmd.ScmSize.bytes
-	nvmeBytes := cmd.NVMeSize.bytes
+	scmBytes := cmd.ScmSize.Bytes
+	nvmeBytes := cmd.NVMeSize.Bytes
 	req.TierBytes = []uint64{scmBytes, nvmeBytes}
 
 	msg := fmt.Sprintf("Creating DAOS pool with manual per-engine storage allocation:"+
diff --git a/src/control/cmd/dmg/pretty/telemetry.go b/src/control/cmd/dmg/pretty/telemetry.go
index fb234f048ae..70091c3f8ee 100644
--- a/src/control/cmd/dmg/pretty/telemetry.go
+++ b/src/control/cmd/dmg/pretty/telemetry.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2021 Intel Corporation.
+// (C) Copyright 2021-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -13,6 +13,7 @@ import (
 	"strings"
 
 	"github.com/daos-stack/daos/src/control/lib/control"
+	"github.com/daos-stack/daos/src/control/lib/daos"
 	"github.com/daos-stack/daos/src/control/lib/txtfmt"
 )
 
@@ -76,7 +77,7 @@ func PrintMetricsQueryResp(out io.Writer, resp *control.MetricsQueryResp) error
 	return nil
 }
 
-func printMetrics(out io.Writer, metrics []control.Metric, metricType control.MetricType) {
+func printMetrics(out io.Writer, metrics []daos.Metric, metricType daos.MetricType) {
 	if len(metrics) == 0 {
 		fmt.Fprintf(out, "No metrics found\n")
 		return
@@ -92,7 +93,7 @@ func printMetrics(out io.Writer, metrics []control.Metric, metricType control.Me
 
 	for _, m := range metrics {
 		switch realM := m.(type) {
-		case *control.SimpleMetric:
+		case *daos.SimpleMetric:
 			labels := metricLabelsToStr(realM.Labels)
 			name := metricType.String()
 			table = append(table, txtfmt.TableRow{
@@ -100,7 +101,7 @@ func printMetrics(out io.Writer, metrics []control.Metric, metricType control.Me
 				labelTitle: labels,
 				valTitle:   fmt.Sprintf("%g", realM.Value),
 			})
-		case *control.SummaryMetric:
+		case *daos.SummaryMetric:
 			labels := metricLabelsToStr(realM.Labels)
 			table = append(table, txtfmt.TableRow{
 				nameTitle:  "Sample Count",
@@ -119,7 +120,7 @@ func printMetrics(out io.Writer, metrics []control.Metric, metricType control.Me
 					valTitle:   fmt.Sprintf("%g", realM.Quantiles[quant]),
 				})
 			}
-		case *control.HistogramMetric:
+		case *daos.HistogramMetric:
 			labels := metricLabelsToStr(realM.Labels)
 			table = append(table, txtfmt.TableRow{
 				nameTitle:  "Sample Count",
@@ -150,7 +151,7 @@ func printMetrics(out io.Writer, metrics []control.Metric, metricType control.Me
 	tablePrint.Format(table)
 }
 
-func metricLabelsToStr(labels control.LabelMap) string {
+func metricLabelsToStr(labels daos.MetricLabelMap) string {
 	if len(labels) == 0 {
 		return "N/A"
 	}
diff --git a/src/control/cmd/dmg/pretty/telemetry_test.go b/src/control/cmd/dmg/pretty/telemetry_test.go
index 66881a091cd..09e82c6ce90 100644
--- a/src/control/cmd/dmg/pretty/telemetry_test.go
+++ b/src/control/cmd/dmg/pretty/telemetry_test.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2021-2022 Intel Corporation.
+// (C) Copyright 2021-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -13,6 +13,7 @@ import (
 
 	"github.com/daos-stack/daos/src/control/common/test"
 	"github.com/daos-stack/daos/src/control/lib/control"
+	"github.com/daos-stack/daos/src/control/lib/daos"
 )
 
 func TestPretty_PrintMetricsListResp(t *testing.T) {
@@ -30,16 +31,16 @@ func TestPretty_PrintMetricsListResp(t *testing.T) {
 		},
 		"empty list": {
 			resp: &control.MetricsListResp{
-				AvailableMetricSets: []*control.MetricSet{},
+				AvailableMetricSets: []*daos.MetricSet{},
 			},
 		},
 		"one item": {
 			resp: &control.MetricsListResp{
-				AvailableMetricSets: []*control.MetricSet{
+				AvailableMetricSets: []*daos.MetricSet{
 					{
 						Name:        "test_metric_1",
 						Description: "Test Metric",
-						Type:        control.MetricTypeGeneric,
+						Type:        daos.MetricTypeGeneric,
 					},
 				},
 			},
@@ -51,21 +52,21 @@ test_metric_1 Generic Test Metric
 		},
 		"multi item": {
 			resp: &control.MetricsListResp{
-				AvailableMetricSets: []*control.MetricSet{
+				AvailableMetricSets: []*daos.MetricSet{
 					{
 						Name:        "test_metric_1",
 						Description: "Test metric",
-						Type:        control.MetricTypeGauge,
+						Type:        daos.MetricTypeGauge,
 					},
 					{
 						Name:        "test_metric_2",
 						Description: "Another test metric",
-						Type:        control.MetricTypeSummary,
+						Type:        daos.MetricTypeSummary,
 					},
 					{
 						Name:        "funny_hats",
 						Description: "Hilarious headwear",
-						Type:        control.MetricTypeCounter,
+						Type:        daos.MetricTypeCounter,
 					},
 				},
 			},
@@ -79,7 +80,7 @@ funny_hats    Counter Hilarious headwear
 		},
 		"write failure": {
 			resp: &control.MetricsListResp{
-				AvailableMetricSets: []*control.MetricSet{
+				AvailableMetricSets: []*daos.MetricSet{
 					{
 						Name:        "test_metric_1",
 						Description: "Test Metric",
@@ -117,12 +118,12 @@ func TestPretty_PrintMetricsQueryResp(t *testing.T) {
 		},
 		"empty list": {
 			resp: &control.MetricsQueryResp{
-				MetricSets: []*control.MetricSet{},
+				MetricSets: []*daos.MetricSet{},
 			},
 		},
 		"set without values": {
 			resp: &control.MetricsQueryResp{
-				MetricSets: []*control.MetricSet{
+				MetricSets: []*daos.MetricSet{
 					{
 						Name:        "test_metric_1",
 						Description: "Test Metric",
@@ -138,26 +139,26 @@ func TestPretty_PrintMetricsQueryResp(t *testing.T) {
 		},
 		"untyped": {
 			resp: &control.MetricsQueryResp{
-				MetricSets: []*control.MetricSet{
+				MetricSets: []*daos.MetricSet{
 					{
 						Name:        "my_metric",
 						Description: "A test metric",
-						Type:        control.MetricTypeGeneric,
-						Metrics: []control.Metric{
-							&control.SimpleMetric{
+						Type:        daos.MetricTypeGeneric,
+						Metrics: []daos.Metric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{
 									"foo": "bar",
 								},
 								Value: 2.25,
 							},
-							&control.SimpleMetric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{
 									"ring":   "one",
 									"bearer": "frodo",
 								},
 								Value: 5,
 							},
-							&control.SimpleMetric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{},
 								Value:  125,
 							},
@@ -178,26 +179,26 @@ func TestPretty_PrintMetricsQueryResp(t *testing.T) {
 		},
 		"counter type": {
 			resp: &control.MetricsQueryResp{
-				MetricSets: []*control.MetricSet{
+				MetricSets: []*daos.MetricSet{
 					{
 						Name:        "my_counter",
 						Description: "A test metric",
-						Type:        control.MetricTypeCounter,
-						Metrics: []control.Metric{
-							&control.SimpleMetric{
+						Type:        daos.MetricTypeCounter,
+						Metrics: []daos.Metric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{
 									"foo": "bar",
 								},
 								Value: 2.25,
 							},
-							&control.SimpleMetric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{
 									"ring":   "one",
 									"bearer": "frodo",
 								},
 								Value: 5,
 							},
-							&control.SimpleMetric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{},
 								Value:  125,
 							},
@@ -218,26 +219,26 @@ func TestPretty_PrintMetricsQueryResp(t *testing.T) {
 		},
 		"gauge type": {
 			resp: &control.MetricsQueryResp{
-				MetricSets: []*control.MetricSet{
+				MetricSets: []*daos.MetricSet{
 					{
 						Name:        "my_gauge",
 						Description: "A test metric",
-						Type:        control.MetricTypeGauge,
-						Metrics: []control.Metric{
-							&control.SimpleMetric{
+						Type:        daos.MetricTypeGauge,
+						Metrics: []daos.Metric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{
 									"foo": "bar",
 								},
 								Value: 2.25,
 							},
-							&control.SimpleMetric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{
 									"ring":   "one",
 									"bearer": "frodo",
 								},
 								Value: 5,
 							},
-							&control.SimpleMetric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{},
 								Value:  125,
 							},
@@ -258,14 +259,14 @@ func TestPretty_PrintMetricsQueryResp(t *testing.T) {
 		},
 		"summary type": {
 			resp: &control.MetricsQueryResp{
-				MetricSets: []*control.MetricSet{
+				MetricSets: []*daos.MetricSet{
 					{
 						Name:        "my_summary",
 						Description: "A test metric",
-						Type:        control.MetricTypeSummary,
-						Metrics: []control.Metric{
-							&control.SummaryMetric{
-								Labels: control.LabelMap{
+						Type:        daos.MetricTypeSummary,
+						Metrics: []daos.Metric{
+							&daos.SummaryMetric{
+								Labels: daos.MetricLabelMap{
 									"foo": "bar",
 								},
 								SampleCount: 55,
@@ -275,8 +276,8 @@ func TestPretty_PrintMetricsQueryResp(t *testing.T) {
 									0.5:  33.333,
 								},
 							},
-							&control.SummaryMetric{
-								Labels:      control.LabelMap{},
+							&daos.SummaryMetric{
+								Labels:      daos.MetricLabelMap{},
 								SampleCount: 102,
 								SampleSum:   19.84,
 								Quantiles: map[float64]float64{
@@ -304,19 +305,19 @@ func TestPretty_PrintMetricsQueryResp(t *testing.T) {
 		},
 		"histogram type": {
 			resp: &control.MetricsQueryResp{
-				MetricSets: []*control.MetricSet{
+				MetricSets: []*daos.MetricSet{
 					{
 						Name:        "my_histogram",
 						Description: "A test metric",
-						Type:        control.MetricTypeHistogram,
-						Metrics: []control.Metric{
-							&control.HistogramMetric{
-								Labels: control.LabelMap{
+						Type:        daos.MetricTypeHistogram,
+						Metrics: []daos.Metric{
+							&daos.HistogramMetric{
+								Labels: daos.MetricLabelMap{
 									"foo": "bar",
 								},
 								SampleCount: 55,
 								SampleSum:   6094.27,
-								Buckets: []*control.MetricBucket{
+								Buckets: []*daos.MetricBucket{
 									{
 										UpperBound:      500,
 										CumulativeCount: 2,
@@ -327,8 +328,8 @@ func TestPretty_PrintMetricsQueryResp(t *testing.T) {
 									},
 								},
 							},
-							&control.HistogramMetric{
-								Labels:      control.LabelMap{},
+							&daos.HistogramMetric{
+								Labels:      daos.MetricLabelMap{},
 								SampleCount: 22,
 								SampleSum:   102,
 							},
@@ -354,26 +355,26 @@ func TestPretty_PrintMetricsQueryResp(t *testing.T) {
 		},
 		"multiple sets": {
 			resp: &control.MetricsQueryResp{
-				MetricSets: []*control.MetricSet{
+				MetricSets: []*daos.MetricSet{
 					{
 						Name:        "my_counter",
 						Description: "A test metric",
-						Type:        control.MetricTypeCounter,
-						Metrics: []control.Metric{
-							&control.SimpleMetric{
+						Type:        daos.MetricTypeCounter,
+						Metrics: []daos.Metric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{
 									"foo": "bar",
 								},
 								Value: 2.25,
 							},
-							&control.SimpleMetric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{
 									"ring":   "one",
 									"bearer": "frodo",
 								},
 								Value: 5,
 							},
-							&control.SimpleMetric{
+							&daos.SimpleMetric{
 								Labels: map[string]string{},
 								Value:  125,
 							},
@@ -382,9 +383,9 @@ func TestPretty_PrintMetricsQueryResp(t *testing.T) {
 					{
 						Name:        "my_summary",
 						Description: "Another test metric",
-						Type:        control.MetricTypeSummary,
-						Metrics: []control.Metric{
-							&control.SummaryMetric{
+						Type:        daos.MetricTypeSummary,
+						Metrics: []daos.Metric{
+							&daos.SummaryMetric{
 								SampleCount: 55,
 								SampleSum:   6094.27,
 								Quantiles: map[float64]float64{
@@ -418,7 +419,7 @@ func TestPretty_PrintMetricsQueryResp(t *testing.T) {
 		},
 		"write failure": {
 			resp: &control.MetricsQueryResp{
-				MetricSets: []*control.MetricSet{
+				MetricSets: []*daos.MetricSet{
 					{
 						Name:        "test_metric_1",
 						Description: "Test Metric",
diff --git a/src/control/common/test/utils.go b/src/control/common/test/utils.go
index f7cc72ef72a..2c7610a82d4 100644
--- a/src/control/common/test/utils.go
+++ b/src/control/common/test/utils.go
@@ -140,6 +140,15 @@ func CmpErr(t *testing.T, want, got error) {
 	}
 }
 
+// CmpAny compares two values and fails the test if they are not equal.
+func CmpAny(t *testing.T, desc string, want, got any, cmpOpts ...cmp.Option) {
+	t.Helper()
+
+	if diff := cmp.Diff(want, got, cmpOpts...); diff != "" {
+		t.Fatalf("unexpected %s (-want, +got):\n%s\n", desc, diff)
+	}
+}
+
 // SplitFile separates file content into contiguous sections separated by
 // a blank line.
 func SplitFile(path string) (sections [][]string, err error) {
diff --git a/src/control/lib/control/telemetry.go b/src/control/lib/control/telemetry.go
index 62854438df4..919e54ff284 100644
--- a/src/control/lib/control/telemetry.go
+++ b/src/control/lib/control/telemetry.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2021 Intel Corporation.
+// (C) Copyright 2021-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -8,13 +8,12 @@ package control
 
 import (
 	"context"
-	"encoding/json"
 	"fmt"
 	"net/url"
 	"sort"
-	"strconv"
 	"strings"
 
+	"github.com/daos-stack/daos/src/control/lib/daos"
 	"github.com/pkg/errors"
 	pclient "github.com/prometheus/client_model/go"
 	"github.com/prometheus/common/expfmt"
@@ -58,299 +57,21 @@ func scrapeMetrics(ctx context.Context, req httpGetter) (pbMetricMap, error) {
 	return pbMetricMap(result), nil
 }
 
-// MetricType defines the different types of metrics.
-type MetricType uint32
-
-const (
-	MetricTypeUnknown MetricType = iota
-	MetricTypeGeneric
-	MetricTypeCounter
-	MetricTypeGauge
-	MetricTypeSummary
-	MetricTypeHistogram
-
-	metricTypeUnknownStr   = "Unknown"
-	metricTypeGenericStr   = "Generic"
-	metricTypeCounterStr   = "Counter"
-	metricTypeGaugeStr     = "Gauge"
-	metricTypeSummaryStr   = "Summary"
-	metricTypeHistogramStr = "Histogram"
-)
-
-func (t MetricType) String() string {
-	switch t {
-	case MetricTypeGeneric:
-		return metricTypeGenericStr
-	case MetricTypeCounter:
-		return metricTypeCounterStr
-	case MetricTypeGauge:
-		return metricTypeGaugeStr
-	case MetricTypeSummary:
-		return metricTypeSummaryStr
-	case MetricTypeHistogram:
-		return metricTypeHistogramStr
-	}
-
-	return metricTypeUnknownStr
-}
-
-func metricTypeFromPrometheus(pType pclient.MetricType) MetricType {
+func metricTypeFromPrometheus(pType pclient.MetricType) daos.MetricType {
 	switch pType {
 	case pclient.MetricType_COUNTER:
-		return MetricTypeCounter
+		return daos.MetricTypeCounter
 	case pclient.MetricType_GAUGE:
-		return MetricTypeGauge
+		return daos.MetricTypeGauge
 	case pclient.MetricType_SUMMARY:
-		return MetricTypeSummary
+		return daos.MetricTypeSummary
 	case pclient.MetricType_HISTOGRAM:
-		return MetricTypeHistogram
+		return daos.MetricTypeHistogram
 	case pclient.MetricType_UNTYPED:
-		return MetricTypeGeneric
-	}
-
-	return MetricTypeUnknown
-}
-
-func metricTypeFromString(typeStr string) MetricType {
-	// normalize the strings for comparison
-	switch strings.ToLower(typeStr) {
-	case strings.ToLower(metricTypeCounterStr):
-		return MetricTypeCounter
-	case strings.ToLower(metricTypeGaugeStr):
-		return MetricTypeGauge
-	case strings.ToLower(metricTypeSummaryStr):
-		return MetricTypeSummary
-	case strings.ToLower(metricTypeHistogramStr):
-		return MetricTypeHistogram
-	case strings.ToLower(metricTypeGenericStr):
-		return MetricTypeGeneric
-	}
-	return MetricTypeUnknown
-}
-
-type (
-	// Metric is an interface implemented by all metric types.
-	Metric interface {
-		IsMetric()
-	}
-
-	// LabelMap is the set of key-value label pairs.
-	LabelMap map[string]string
-
-	// SimpleMetric is a specific metric with a value.
-	SimpleMetric struct {
-		Labels LabelMap `json:"labels"`
-		Value  float64  `json:"value"`
-	}
-
-	// QuantileMap is the set of quantile measurements.
-	QuantileMap map[float64]float64
-
-	// SummaryMetric represents a group of observations.
-	SummaryMetric struct {
-		Labels      LabelMap    `json:"labels"`
-		SampleCount uint64      `json:"sample_count"`
-		SampleSum   float64     `json:"sample_sum"`
-		Quantiles   QuantileMap `json:"quantiles"`
-	}
-
-	// MetricBucket represents a bucket for observations to be sorted into.
-	MetricBucket struct {
-		CumulativeCount uint64  `json:"cumulative_count"`
-		UpperBound      float64 `json:"upper_bound"`
-	}
-
-	// HistogramMetric represents a group of observations sorted into
-	// buckets.
-	HistogramMetric struct {
-		Labels      LabelMap        `json:"labels"`
-		SampleCount uint64          `json:"sample_count"`
-		SampleSum   float64         `json:"sample_sum"`
-		Buckets     []*MetricBucket `json:"buckets"`
-	}
-
-	// MetricSet is a group of related metrics.
-	MetricSet struct {
-		Name        string     `json:"name"`
-		Description string     `json:"description"`
-		Type        MetricType `json:"type"`
-		Metrics     []Metric   `json:"metrics"`
-	}
-)
-
-// IsMetric identifies SimpleMetric as a Metric.
-func (*SimpleMetric) IsMetric() {}
-
-// IsMetric identifies SummaryMetric as a Metric.
-func (*SummaryMetric) IsMetric() {}
-
-// UnmarshalJSON unmarshals a SummaryMetric from JSON.
-func (m *SummaryMetric) UnmarshalJSON(data []byte) error {
-	if m == nil {
-		return errors.New("nil SummaryMetric")
-	}
-
-	if m.Quantiles == nil {
-		m.Quantiles = make(QuantileMap)
-	}
-
-	type Alias SummaryMetric
-	aux := (*Alias)(m)
-	if err := json.Unmarshal(data, &aux); err != nil {
-		return err
-	}
-
-	return nil
-}
-
-// IsMetric identifies HistogramMetric as a Metric.
-func (*HistogramMetric) IsMetric() {}
-
-// Keys gets the sorted list of label keys.
-func (m LabelMap) Keys() []string {
-	result := make([]string, 0, len(m))
-	for label := range m {
-		result = append(result, label)
-	}
-	sort.Strings(result)
-	return result
-}
-
-// Keys gets the sorted list of quantile keys.
-func (m QuantileMap) Keys() []float64 {
-	result := make([]float64, 0, len(m))
-	for q := range m {
-		result = append(result, q)
-	}
-	sort.Float64s(result)
-	return result
-}
-
-// MarshalJSON marshals the QuantileMap into JSON.
-func (m QuantileMap) MarshalJSON() ([]byte, error) {
-	strMap := make(map[string]string)
-
-	fmtFloat := func(f float64) string {
-		return strconv.FormatFloat(f, 'g', -1, 64)
-	}
-
-	for key, val := range m {
-		strMap[fmtFloat(key)] = fmtFloat(val)
-	}
-
-	return json.Marshal(&strMap)
-}
-
-// UnmarshalJSON unmarshals the QuantileMap from JSON.
-func (m QuantileMap) UnmarshalJSON(data []byte) error {
-	if m == nil {
-		return errors.New("QuantileMap is nil")
-	}
-
-	fromJSON := make(map[string]string)
-
-	if err := json.Unmarshal(data, &fromJSON); err != nil {
-		return nil
-	}
-
-	for key, val := range fromJSON {
-		floatKey, err := strconv.ParseFloat(key, 64)
-		if err != nil {
-			return errors.Wrapf(err, "QuantileMap key %q", key)
-		}
-
-		floatVal, err := strconv.ParseFloat(val, 64)
-		if err != nil {
-			return errors.Wrapf(err, "QuantileMap value %q for key %q", val, key)
-		}
-
-		m[floatKey] = floatVal
+		return daos.MetricTypeGeneric
 	}
-	return nil
-}
-
-// MarshalJSON marshals the MetricSet to JSON.
-func (ms *MetricSet) MarshalJSON() ([]byte, error) {
-	type toJSON MetricSet
-	return json.Marshal(&struct {
-		Type string `json:"type"`
-		*toJSON
-	}{
-		Type:   strings.ToLower(ms.Type.String()),
-		toJSON: (*toJSON)(ms),
-	})
-}
-
-// jsonMetric serves as a universal metric representation for unmarshaling from
-// JSON. It covers all possible fields of Metric types.
-type jsonMetric struct {
-	Labels      LabelMap        `json:"labels"`
-	Value       float64         `json:"value"`
-	SampleCount uint64          `json:"sample_count"`
-	SampleSum   float64         `json:"sample_sum"`
-	Quantiles   QuantileMap     `json:"quantiles"`
-	Buckets     []*MetricBucket `json:"buckets"`
-}
 
-// UnmarshalJSON unmarshals a Metric into the jsonMetric type.
-func (jm *jsonMetric) UnmarshalJSON(data []byte) error {
-	if jm == nil {
-		return errors.New("nil jsonMetric")
-	}
-
-	if jm.Quantiles == nil {
-		jm.Quantiles = make(QuantileMap)
-	}
-
-	type Alias jsonMetric
-	aux := (*Alias)(jm)
-	if err := json.Unmarshal(data, &aux); err != nil {
-		return err
-	}
-
-	return nil
-}
-
-// UnmarshalJSON unmarshals the MetricSet from JSON.
-func (ms *MetricSet) UnmarshalJSON(data []byte) error {
-	if ms == nil {
-		return errors.New("nil MetricSet")
-	}
-
-	type fromJSON MetricSet
-	from := &struct {
-		Type    string        `json:"type"`
-		Metrics []*jsonMetric `json:"metrics"`
-		*fromJSON
-	}{
-		fromJSON: (*fromJSON)(ms),
-	}
-	if err := json.Unmarshal(data, from); err != nil {
-		return err
-	}
-
-	ms.Type = metricTypeFromString(from.Type)
-	for _, m := range from.Metrics {
-		switch ms.Type {
-		case MetricTypeSummary:
-			ms.Metrics = append(ms.Metrics, &SummaryMetric{
-				Labels:      m.Labels,
-				SampleCount: m.SampleCount,
-				SampleSum:   m.SampleSum,
-				Quantiles:   m.Quantiles,
-			})
-		case MetricTypeHistogram:
-			ms.Metrics = append(ms.Metrics, &HistogramMetric{
-				Labels:      m.Labels,
-				SampleCount: m.SampleCount,
-				SampleSum:   m.SampleSum,
-				Buckets:     m.Buckets,
-			})
-		default:
-			ms.Metrics = append(ms.Metrics, newSimpleMetric(m.Labels, m.Value))
-		}
-	}
-	return nil
+	return daos.MetricTypeUnknown
 }
 
 type (
@@ -363,7 +84,7 @@ type (
 
 	// MetricsListResp contains the list of available metrics.
 	MetricsListResp struct {
-		AvailableMetricSets []*MetricSet `json:"available_metric_sets"`
+		AvailableMetricSets []*daos.MetricSet `json:"available_metric_sets"`
 	}
 )
 
@@ -390,10 +111,10 @@ func MetricsList(ctx context.Context, req *MetricsListReq) (*MetricsListResp, er
 
 	resp := new(MetricsListResp)
 
-	list := make([]*MetricSet, 0, len(scraped))
+	list := make([]*daos.MetricSet, 0, len(scraped))
 	for _, name := range scraped.Keys() {
 		mf := scraped[name]
-		newMetric := &MetricSet{
+		newMetric := &daos.MetricSet{
 			Name:        name,
 			Description: mf.GetHelp(),
 			Type:        metricTypeFromPrometheus(mf.GetType()),
@@ -416,7 +137,7 @@ type (
 
 	// MetricsQueryResp contains the list of telemetry values per host.
 	MetricsQueryResp struct {
-		MetricSets []*MetricSet `json:"metric_sets"`
+		MetricSets []*daos.MetricSet `json:"metric_sets"`
 	}
 )
 
@@ -453,14 +174,14 @@ func MetricsQuery(ctx context.Context, req *MetricsQueryReq) (*MetricsQueryResp,
 func newMetricsQueryResp(scraped pbMetricMap, metricNames []string) (*MetricsQueryResp, error) {
 	resp := new(MetricsQueryResp)
 
-	list := make([]*MetricSet, 0, len(metricNames))
+	list := make([]*daos.MetricSet, 0, len(metricNames))
 	for _, name := range metricNames {
 		mf, found := scraped[name]
 		if !found {
 			return nil, errors.Errorf("metric %q not found on host", name)
 		}
 
-		newSet := &MetricSet{
+		newSet := &daos.MetricSet{
 			Name:        name,
 			Description: mf.GetHelp(),
 			Type:        metricTypeFromPrometheus(mf.GetType()),
@@ -482,7 +203,7 @@ func newMetricsQueryResp(scraped pbMetricMap, metricNames []string) (*MetricsQue
 	return resp, nil
 }
 
-func getMetricFromPrometheus(pMetric *pclient.Metric, metricType pclient.MetricType) (Metric, error) {
+func getMetricFromPrometheus(pMetric *pclient.Metric, metricType pclient.MetricType) (daos.Metric, error) {
 	labels := metricsLabelsToMap(pMetric)
 	switch metricType {
 	case pclient.MetricType_COUNTER:
@@ -491,11 +212,11 @@ func getMetricFromPrometheus(pMetric *pclient.Metric, metricType pclient.MetricT
 		return newSimpleMetric(labels, pMetric.GetGauge().GetValue()), nil
 	case pclient.MetricType_SUMMARY:
 		summary := pMetric.GetSummary()
-		newMetric := &SummaryMetric{
+		newMetric := &daos.SummaryMetric{
 			Labels:      labels,
 			SampleSum:   summary.GetSampleSum(),
 			SampleCount: summary.GetSampleCount(),
-			Quantiles:   QuantileMap{},
+			Quantiles:   daos.QuantileMap{},
 		}
 		for _, q := range summary.Quantile {
 			newMetric.Quantiles[q.GetQuantile()] = q.GetValue()
@@ -503,14 +224,14 @@ func getMetricFromPrometheus(pMetric *pclient.Metric, metricType pclient.MetricT
 		return newMetric, nil
 	case pclient.MetricType_HISTOGRAM:
 		histogram := pMetric.GetHistogram()
-		newMetric := &HistogramMetric{
+		newMetric := &daos.HistogramMetric{
 			Labels:      labels,
 			SampleSum:   histogram.GetSampleSum(),
 			SampleCount: histogram.GetSampleCount(),
 		}
 		for _, b := range histogram.Bucket {
 			newMetric.Buckets = append(newMetric.Buckets,
-				&MetricBucket{
+				&daos.MetricBucket{
 					UpperBound:      b.GetUpperBound(),
 					CumulativeCount: b.GetCumulativeCount(),
 				})
@@ -523,8 +244,8 @@ func getMetricFromPrometheus(pMetric *pclient.Metric, metricType pclient.MetricT
 	return nil, errors.New("unknown metric type")
 }
 
-func newSimpleMetric(labels map[string]string, value float64) *SimpleMetric {
-	return &SimpleMetric{
+func newSimpleMetric(labels map[string]string, value float64) *daos.SimpleMetric {
+	return &daos.SimpleMetric{
 		Labels: labels,
 		Value:  value,
 	}
diff --git a/src/control/lib/control/telemetry_test.go b/src/control/lib/control/telemetry_test.go
index f5d8cb701ec..5887283852d 100644
--- a/src/control/lib/control/telemetry_test.go
+++ b/src/control/lib/control/telemetry_test.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2021-2022 Intel Corporation.
+// (C) Copyright 2021-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -23,6 +23,7 @@ import (
 	"google.golang.org/protobuf/proto"
 
 	"github.com/daos-stack/daos/src/control/common/test"
+	"github.com/daos-stack/daos/src/control/lib/daos"
 )
 
 func newTestMetricFamily(name string, help string, mType pclient.MetricType) *pclient.MetricFamily {
@@ -253,7 +254,7 @@ func TestControl_MetricsList(t *testing.T) {
 				return []byte{}, nil
 			},
 			expResp: &MetricsListResp{
-				AvailableMetricSets: []*MetricSet{},
+				AvailableMetricSets: []*daos.MetricSet{},
 			},
 		},
 		"success": {
@@ -263,16 +264,16 @@ func TestControl_MetricsList(t *testing.T) {
 			},
 			scrapeFn: mockScrapeFnSuccess(t, testMetricFam...),
 			expResp: &MetricsListResp{
-				AvailableMetricSets: []*MetricSet{
+				AvailableMetricSets: []*daos.MetricSet{
 					{
 						Name:        "counter",
 						Description: "this is the counter help",
-						Type:        MetricTypeCounter,
+						Type:        daos.MetricTypeCounter,
 					},
 					{
 						Name:        "gauge",
 						Description: "this is the gauge help",
-						Type:        MetricTypeGauge,
+						Type:        daos.MetricTypeGauge,
 					},
 				},
 			},
@@ -299,7 +300,7 @@ func TestControl_MetricsList(t *testing.T) {
 }
 
 func TestControl_getMetricFromPrometheus(t *testing.T) {
-	testLabels := LabelMap{
+	testLabels := daos.MetricLabelMap{
 		"foo": "bar",
 		"baz": "snafu",
 	}
@@ -341,7 +342,7 @@ func TestControl_getMetricFromPrometheus(t *testing.T) {
 	for name, tc := range map[string]struct {
 		input     *pclient.Metric
 		inputType pclient.MetricType
-		expResult Metric
+		expResult daos.Metric
 		expErr    error
 	}{
 		"counter": {
@@ -362,11 +363,11 @@ func TestControl_getMetricFromPrometheus(t *testing.T) {
 		"summary": {
 			input:     testSummary,
 			inputType: pclient.MetricType_SUMMARY,
-			expResult: &SummaryMetric{
+			expResult: &daos.SummaryMetric{
 				Labels:      testLabels,
 				SampleSum:   testSummary.Summary.GetSampleSum(),
 				SampleCount: testSummary.Summary.GetSampleCount(),
-				Quantiles: QuantileMap{
+				Quantiles: daos.QuantileMap{
 					0: 1,
 					1: 2,
 					2: 3,
@@ -377,11 +378,11 @@ func TestControl_getMetricFromPrometheus(t *testing.T) {
 		"histogram": {
 			input:     testHistogram,
 			inputType: pclient.MetricType_HISTOGRAM,
-			expResult: &HistogramMetric{
+			expResult: &daos.HistogramMetric{
 				Labels:      testLabels,
 				SampleSum:   testHistogram.Histogram.GetSampleSum(),
 				SampleCount: testHistogram.Histogram.GetSampleCount(),
-				Buckets: []*MetricBucket{
+				Buckets: []*daos.MetricBucket{
 					{
 						UpperBound:      100,
 						CumulativeCount: 1,
@@ -465,7 +466,7 @@ func TestControl_MetricsQuery(t *testing.T) {
 				return []byte{}, nil
 			},
 			expResp: &MetricsQueryResp{
-				MetricSets: []*MetricSet{},
+				MetricSets: []*daos.MetricSet{},
 			},
 		},
 		"all metrics": {
@@ -475,39 +476,39 @@ func TestControl_MetricsQuery(t *testing.T) {
 			},
 			scrapeFn: mockScrapeFnSuccess(t, testMetricFam...),
 			expResp: &MetricsQueryResp{
-				MetricSets: []*MetricSet{
+				MetricSets: []*daos.MetricSet{
 					{
 						Name:        "my_counter",
 						Description: "this is the counter help",
-						Type:        MetricTypeCounter,
-						Metrics: []Metric{
+						Type:        daos.MetricTypeCounter,
+						Metrics: []daos.Metric{
 							newSimpleMetric(map[string]string{}, 0),
 						},
 					},
 					{
 						Name:        "my_gauge",
 						Description: "this is the gauge help",
-						Type:        MetricTypeGauge,
-						Metrics: []Metric{
+						Type:        daos.MetricTypeGauge,
+						Metrics: []daos.Metric{
 							newSimpleMetric(map[string]string{}, 0),
 						},
 					},
 					{
 						Name:        "my_generic",
 						Description: "this is the generic help",
-						Type:        MetricTypeGeneric,
-						Metrics: []Metric{
+						Type:        daos.MetricTypeGeneric,
+						Metrics: []daos.Metric{
 							newSimpleMetric(map[string]string{}, 0),
 						},
 					},
 					{
 						Name:        "my_histogram",
 						Description: "this is the histogram help",
-						Type:        MetricTypeHistogram,
-						Metrics: []Metric{
-							&HistogramMetric{
-								Labels: LabelMap{},
-								Buckets: []*MetricBucket{
+						Type:        daos.MetricTypeHistogram,
+						Metrics: []daos.Metric{
+							&daos.HistogramMetric{
+								Labels: daos.MetricLabelMap{},
+								Buckets: []*daos.MetricBucket{
 									// Prometheus library parsing
 									// includes inf bucket at minimum
 									{UpperBound: math.Inf(0)},
@@ -518,11 +519,11 @@ func TestControl_MetricsQuery(t *testing.T) {
 					{
 						Name:        "my_summary",
 						Description: "this is the summary help",
-						Type:        MetricTypeSummary,
-						Metrics: []Metric{
-							&SummaryMetric{
-								Labels:    LabelMap{},
-								Quantiles: QuantileMap{0: 0},
+						Type:        daos.MetricTypeSummary,
+						Metrics: []daos.Metric{
+							&daos.SummaryMetric{
+								Labels:    daos.MetricLabelMap{},
+								Quantiles: daos.QuantileMap{0: 0},
 							},
 						},
 					},
@@ -537,20 +538,20 @@ func TestControl_MetricsQuery(t *testing.T) {
 			},
 			scrapeFn: mockScrapeFnSuccess(t, testMetricFam...),
 			expResp: &MetricsQueryResp{
-				MetricSets: []*MetricSet{
+				MetricSets: []*daos.MetricSet{
 					{
 						Name:        "my_generic",
 						Description: "this is the generic help",
-						Type:        MetricTypeGeneric,
-						Metrics: []Metric{
+						Type:        daos.MetricTypeGeneric,
+						Metrics: []daos.Metric{
 							newSimpleMetric(map[string]string{}, 0),
 						},
 					},
 					{
 						Name:        "my_counter",
 						Description: "this is the counter help",
-						Type:        MetricTypeCounter,
-						Metrics: []Metric{
+						Type:        daos.MetricTypeCounter,
+						Metrics: []daos.Metric{
 							newSimpleMetric(map[string]string{}, 0),
 						},
 					},
@@ -589,29 +590,29 @@ func TestControl_Metric_JSON(t *testing.T) {
 	}
 
 	for name, tc := range map[string]struct {
-		metric Metric
+		metric daos.Metric
 	}{
 		"nil": {},
 		"simple": {
 			metric: newSimpleMetric(testLabelMap, 123),
 		},
 		"summary": {
-			metric: &SummaryMetric{
+			metric: &daos.SummaryMetric{
 				Labels:      testLabelMap,
 				SampleSum:   5678.9,
 				SampleCount: 42,
-				Quantiles: QuantileMap{
+				Quantiles: daos.QuantileMap{
 					0.25: 50,
 					0.5:  42,
 				},
 			},
 		},
 		"histogram": {
-			metric: &HistogramMetric{
+			metric: &daos.HistogramMetric{
 				Labels:      testLabelMap,
 				SampleSum:   9876,
 				SampleCount: 120,
-				Buckets: []*MetricBucket{
+				Buckets: []*daos.MetricBucket{
 					{
 						CumulativeCount: 55,
 						UpperBound:      500,
@@ -626,16 +627,16 @@ func TestControl_Metric_JSON(t *testing.T) {
 				t.Fatalf("expected to marshal, got %q", err)
 			}
 
-			var unmarshaled Metric
+			var unmarshaled daos.Metric
 			switch tc.metric.(type) {
-			case *SimpleMetric:
-				unmarshaled = new(SimpleMetric)
-			case *SummaryMetric:
-				unmarshaled = new(SummaryMetric)
-			case *HistogramMetric:
-				unmarshaled = new(HistogramMetric)
+			case *daos.SimpleMetric:
+				unmarshaled = new(daos.SimpleMetric)
+			case *daos.SummaryMetric:
+				unmarshaled = new(daos.SummaryMetric)
+			case *daos.HistogramMetric:
+				unmarshaled = new(daos.HistogramMetric)
 			default:
-				unmarshaled = new(SimpleMetric)
+				unmarshaled = new(daos.SimpleMetric)
 			}
 
 			err = json.Unmarshal(marshaled, unmarshaled)
@@ -645,7 +646,7 @@ func TestControl_Metric_JSON(t *testing.T) {
 
 			expResult := tc.metric
 			if tc.metric == nil {
-				expResult = &SimpleMetric{}
+				expResult = &daos.SimpleMetric{}
 			}
 
 			if diff := cmp.Diff(expResult, unmarshaled); diff != "" {
@@ -655,62 +656,17 @@ func TestControl_Metric_JSON(t *testing.T) {
 	}
 }
 
-func TestControl_metricTypeFromString(t *testing.T) {
-	for name, tc := range map[string]struct {
-		input   string
-		expType MetricType
-	}{
-		"empty": {
-			expType: MetricTypeUnknown,
-		},
-		"counter": {
-			input:   "counter",
-			expType: MetricTypeCounter,
-		},
-		"gauge": {
-			input:   "gauge",
-			expType: MetricTypeGauge,
-		},
-		"summary": {
-			input:   "summary",
-			expType: MetricTypeSummary,
-		},
-		"histogram": {
-			input:   "histogram",
-			expType: MetricTypeHistogram,
-		},
-		"generic": {
-			input:   "generic",
-			expType: MetricTypeGeneric,
-		},
-		"invalid": {
-			input:   "some garbage text",
-			expType: MetricTypeUnknown,
-		},
-		"weird capitalization": {
-			input:   "CoUnTeR",
-			expType: MetricTypeCounter,
-		},
-	} {
-		t.Run(name, func(t *testing.T) {
-			gotType := metricTypeFromString(tc.input)
-
-			test.AssertEqual(t, tc.expType, gotType, "")
-		})
-	}
-}
-
 func TestControl_MetricSet_JSON(t *testing.T) {
 	for name, tc := range map[string]struct {
-		set *MetricSet
+		set *daos.MetricSet
 	}{
 		"nil": {},
 		"generic type": {
-			set: &MetricSet{
+			set: &daos.MetricSet{
 				Name:        "timespan",
 				Description: "It's been a while",
-				Type:        MetricTypeGeneric,
-				Metrics: []Metric{
+				Type:        daos.MetricTypeGeneric,
+				Metrics: []daos.Metric{
 					newSimpleMetric(map[string]string{
 						"units": "nanoseconds",
 					}, float64(time.Second)),
@@ -718,11 +674,11 @@ func TestControl_MetricSet_JSON(t *testing.T) {
 			},
 		},
 		"counter type": {
-			set: &MetricSet{
+			set: &daos.MetricSet{
 				Name:        "one_ring",
 				Description: "Precious...",
-				Type:        MetricTypeCounter,
-				Metrics: []Metric{
+				Type:        daos.MetricTypeCounter,
+				Metrics: []daos.Metric{
 					newSimpleMetric(map[string]string{
 						"owner": "frodo",
 					}, 1),
@@ -730,11 +686,11 @@ func TestControl_MetricSet_JSON(t *testing.T) {
 			},
 		},
 		"gauge type": {
-			set: &MetricSet{
+			set: &daos.MetricSet{
 				Name:        "funny_hats",
 				Description: "Hilarious headgear in inventory",
-				Type:        MetricTypeGauge,
-				Metrics: []Metric{
+				Type:        daos.MetricTypeGauge,
+				Metrics: []daos.Metric{
 					newSimpleMetric(map[string]string{
 						"type": "tophat",
 					}, 1),
@@ -748,12 +704,12 @@ func TestControl_MetricSet_JSON(t *testing.T) {
 			},
 		},
 		"summary type": {
-			set: &MetricSet{
+			set: &daos.MetricSet{
 				Name:        "alpha",
 				Description: "The first letter! Everybody's favorite!",
-				Type:        MetricTypeSummary,
-				Metrics: []Metric{
-					&SummaryMetric{
+				Type:        daos.MetricTypeSummary,
+				Metrics: []daos.Metric{
+					&daos.SummaryMetric{
 						Labels:      map[string]string{"beta": "b"},
 						SampleCount: 3,
 						SampleSum:   42,
@@ -763,16 +719,16 @@ func TestControl_MetricSet_JSON(t *testing.T) {
 			},
 		},
 		"histogram type": {
-			set: &MetricSet{
+			set: &daos.MetricSet{
 				Name:        "my_histogram",
 				Description: "This is a histogram",
-				Type:        MetricTypeHistogram,
-				Metrics: []Metric{
-					&HistogramMetric{
+				Type:        daos.MetricTypeHistogram,
+				Metrics: []daos.Metric{
+					&daos.HistogramMetric{
 						Labels:      map[string]string{"owner": "me"},
 						SampleCount: 1024,
 						SampleSum:   12344,
-						Buckets: []*MetricBucket{
+						Buckets: []*daos.MetricBucket{
 							{
 								CumulativeCount: 789,
 								UpperBound:      500,
@@ -793,7 +749,7 @@ func TestControl_MetricSet_JSON(t *testing.T) {
 				t.Fatalf("expected to marshal, got %q", err)
 			}
 
-			unmarshaled := new(MetricSet)
+			unmarshaled := new(daos.MetricSet)
 			err = json.Unmarshal(marshaled, unmarshaled)
 			if err != nil {
 				t.Fatalf("expected to unmarshal, got %q", err)
@@ -801,7 +757,7 @@ func TestControl_MetricSet_JSON(t *testing.T) {
 
 			expResult := tc.set
 			if tc.set == nil {
-				expResult = &MetricSet{}
+				expResult = &daos.MetricSet{}
 			}
 
 			if diff := cmp.Diff(expResult, unmarshaled); diff != "" {
diff --git a/src/control/lib/daos/api/api.go b/src/control/lib/daos/api/api.go
index dbd6d2725a8..51ef20e669a 100644
--- a/src/control/lib/daos/api/api.go
+++ b/src/control/lib/daos/api/api.go
@@ -38,7 +38,7 @@ func (api *api) isInitialized() bool {
 
 // Init performs DAOS API initialization steps and returns a closure
 // to be called before application exit.
-func (api *api) Init() (func(), error) {
+func (api *api) Init(initLogging bool) (func(), error) {
 	api.Lock()
 	defer api.Unlock()
 
@@ -47,12 +47,21 @@ func (api *api) Init() (func(), error) {
 		return stubFini, daos.Already
 	}
 
-	if err := daosError(C.daos_init()); err != nil {
+	logFini := stubFini
+	if initLogging {
+		fini, err := daos.InitLogging(daos.DefaultErrorMask)
+		if err != nil {
+			return stubFini, err
+		}
+		logFini = fini
+	}
+
+	if err := daosError(daos_init()); err != nil {
 		return stubFini, err
 	}
 	api.initialized = true
 
-	return api.Fini, nil
+	return func() { api.Fini(); logFini() }, nil
 }
 
 // Fini releases resources obtained during DAOS API initialization.
@@ -64,6 +73,6 @@ func (api *api) Fini() {
 		return
 	}
 
-	C.daos_fini()
+	daos_fini()
 	api.initialized = false
 }
diff --git a/src/control/lib/daos/api/errors.go b/src/control/lib/daos/api/errors.go
new file mode 100644
index 00000000000..6d1b4b665e3
--- /dev/null
+++ b/src/control/lib/daos/api/errors.go
@@ -0,0 +1,13 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package api
+
+import "github.com/pkg/errors"
+
+var (
+	ErrNoSystemRanks = errors.New("no ranks in system")
+)
diff --git a/src/control/lib/daos/api/libdaos.go b/src/control/lib/daos/api/libdaos.go
new file mode 100644
index 00000000000..d7c6bfed82d
--- /dev/null
+++ b/src/control/lib/daos/api/libdaos.go
@@ -0,0 +1,38 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build !test_stubs
+// +build !test_stubs
+
+package api
+
+/*
+#include <daos.h>
+#include <daos_mgmt.h>
+#include <daos/agent.h>
+
+#cgo LDFLAGS: -lcart -lgurt -ldaos -ldaos_common
+*/
+import "C"
+
+func daos_init() C.int {
+	return C.daos_init()
+}
+
+func daos_fini() {
+	C.daos_fini()
+}
+
+func dc_agent_fini() {
+	C.dc_agent_fini()
+}
+
+func daos_mgmt_get_sys_info(sys *C.char, sys_info **C.struct_daos_sys_info) C.int {
+	return C.daos_mgmt_get_sys_info(sys, sys_info)
+}
+
+func daos_mgmt_put_sys_info(sys_info *C.struct_daos_sys_info) {
+	C.daos_mgmt_put_sys_info(sys_info)
+}
diff --git a/src/control/lib/daos/api/libdaos_selftest.go b/src/control/lib/daos/api/libdaos_selftest.go
new file mode 100644
index 00000000000..a466ba54b74
--- /dev/null
+++ b/src/control/lib/daos/api/libdaos_selftest.go
@@ -0,0 +1,31 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build !test_stubs
+// +build !test_stubs
+
+package api
+
+/*
+#cgo CFLAGS: -I${SRCDIR}/../../../../cart/utils -I${SRCDIR}/../../../../utils/self_test
+#cgo LDFLAGS: -lgurt -lcart -ldaos_self_test
+
+#include "self_test_lib.h"
+*/
+import "C"
+
+func run_self_test(sizes *C.struct_st_size_params, numSizes C.int, repCount C.int, maxInflight C.int, groupName *C.char,
+	optMsEndpoints *C.struct_st_endpoint, numOptMsEndpoints C.uint32_t,
+	tgtEndpoints *C.struct_st_endpoint, numTgtEndpoints C.uint32_t,
+	msEndpoints **C.struct_st_master_endpt, numMsEndpoints *C.uint32_t,
+	sizeLatencies ****C.struct_st_latency, bufAlignment C.int16_t) C.int {
+	return C.run_self_test(sizes, numSizes, repCount, maxInflight, groupName,
+		optMsEndpoints, numOptMsEndpoints, tgtEndpoints, numTgtEndpoints,
+		msEndpoints, numMsEndpoints, sizeLatencies, bufAlignment, nil, true, true)
+}
+
+func self_test_fini(agent_used C.bool) {
+	C.self_test_fini(agent_used)
+}
diff --git a/src/control/lib/daos/api/libdaos_selftest_stubs.go b/src/control/lib/daos/api/libdaos_selftest_stubs.go
new file mode 100644
index 00000000000..9c9e1eb4603
--- /dev/null
+++ b/src/control/lib/daos/api/libdaos_selftest_stubs.go
@@ -0,0 +1,170 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build test_stubs
+// +build test_stubs
+
+package api
+
+import (
+	"unsafe"
+
+	"github.com/daos-stack/daos/src/control/lib/daos"
+	"github.com/daos-stack/daos/src/control/lib/hostlist"
+	"github.com/daos-stack/daos/src/control/lib/ranklist"
+)
+
+/*
+#cgo CFLAGS: -I${SRCDIR}/../../../../cart/utils -I${SRCDIR}/../../../../utils/self_test
+#cgo LDFLAGS: -lgurt -lcart -ldaos_self_test
+
+#include <daos_errno.h>
+#include <gurt/common.h>
+
+#include "self_test_lib.h"
+
+struct st_latency ***
+alloc_latency_arrays(size_t num_sizes, size_t num_endpoints, size_t num_latencies)
+{
+	struct st_latency	***latencies = NULL;
+	int					   i, j;
+
+	D_ALLOC_ARRAY(latencies, num_sizes);
+	if (latencies == NULL)
+		return NULL;
+
+	for (i = 0; i < num_sizes; i++) {
+		D_ALLOC_ARRAY(latencies[i], num_endpoints);
+		if (latencies[i] == NULL)
+			return NULL;
+
+		for (j = 0; j < num_endpoints; j++) {
+			D_ALLOC_ARRAY(latencies[i][j], num_latencies);
+			if (latencies[i][j] == NULL)
+				return NULL;
+		}
+	}
+
+	return latencies;
+}
+*/
+import "C"
+
+type run_self_test_EndpointLatency struct {
+	val    C.int64_t
+	rank   C.uint32_t
+	tag    C.uint32_t
+	cci_rc C.int
+}
+
+var (
+	run_self_test_RunConfig         *daos.SelfTestConfig
+	run_self_test_RC                C.int = 0
+	run_self_test_MsEndpoints       []daos.SelfTestEndpoint
+	run_self_test_EndpointLatencies []run_self_test_EndpointLatency
+)
+
+func run_self_test(sizes *C.struct_st_size_params, numSizes C.int, repCount C.int, maxInflight C.int, groupName *C.char,
+	optMsEndpoints *C.struct_st_endpoint, numOptMsEndpoints C.uint32_t,
+	tgtEndpoints *C.struct_st_endpoint, numTgtEndpoints C.uint32_t,
+	msEndpoints **C.struct_st_master_endpt, numMsEndpoints *C.uint32_t,
+	sizeLatencies ****C.struct_st_latency, bufAlignment C.int16_t) C.int {
+
+	cfg := &daos.SelfTestConfig{
+		GroupName:       C.GoString(groupName),
+		Repetitions:     uint(repCount),
+		BufferAlignment: int16(bufAlignment),
+		MaxInflightRPCs: uint(maxInflight),
+	}
+
+	if numSizes > 0 {
+		cfg.SendSizes = make([]uint64, int(numSizes))
+		cfg.ReplySizes = make([]uint64, int(numSizes))
+		testSizesSlice := unsafe.Slice(sizes, int(numSizes))
+		for i := 0; i < int(numSizes); i++ {
+			cfg.SendSizes[i] = uint64(testSizesSlice[i].send_size)
+			cfg.ReplySizes[i] = uint64(testSizesSlice[i].reply_size)
+		}
+	}
+
+	if numOptMsEndpoints > 0 {
+		cfg.MasterEndpoints = make([]daos.SelfTestEndpoint, int(numOptMsEndpoints))
+		msEndpointsSlice := unsafe.Slice(optMsEndpoints, int(numOptMsEndpoints))
+		for i := 0; i < int(numOptMsEndpoints); i++ {
+			cfg.MasterEndpoints[i].Rank = ranklist.Rank(msEndpointsSlice[i].rank)
+			cfg.MasterEndpoints[i].Tag = uint32(msEndpointsSlice[i].tag)
+		}
+		run_self_test_MsEndpoints = cfg.MasterEndpoints
+	}
+
+	if numTgtEndpoints > 0 {
+		rankSet := ranklist.NewRankSet()
+		tagSet := hostlist.NewNumericSet()
+		tgtEndpointsSlice := unsafe.Slice(tgtEndpoints, int(numTgtEndpoints))
+		for i := 0; i < int(numTgtEndpoints); i++ {
+			rankSet.Add(ranklist.Rank(tgtEndpointsSlice[i].rank))
+			tagSet.Add(uint(tgtEndpointsSlice[i].tag))
+		}
+		cfg.EndpointRanks = rankSet.Ranks()
+		for _, tag := range tagSet.Slice() {
+			cfg.EndpointTags = append(cfg.EndpointTags, uint32(tag))
+		}
+
+		// If the configuration doesn't specify master endpoints,
+		// create one similarly to how the library does it.
+		if len(run_self_test_MsEndpoints) == 0 {
+			run_self_test_MsEndpoints = []daos.SelfTestEndpoint{
+				{
+					Rank: cfg.EndpointRanks[len(cfg.EndpointRanks)-1] + 1,
+					Tag:  cfg.EndpointTags[len(cfg.EndpointTags)-1],
+				},
+			}
+		}
+	}
+
+	run_self_test_RunConfig = cfg
+	if run_self_test_RC != 0 {
+		return run_self_test_RC
+	}
+
+	// Construct the C array of master endpoints for the out parameter.
+	// Must be freed by the caller.
+	*numMsEndpoints = C.uint32_t(len(run_self_test_MsEndpoints))
+	ptr, err := C.calloc(C.size_t(len(run_self_test_MsEndpoints)), C.sizeof_struct_st_master_endpt)
+	if err != nil {
+		panic("calloc() failed for master endpoints")
+	}
+	*msEndpoints = (*C.struct_st_master_endpt)(ptr)
+	msEpSlice := unsafe.Slice(*msEndpoints, int(*numMsEndpoints))
+	for i := 0; i < int(*numMsEndpoints); i++ {
+		msEpSlice[i].endpt.ep_rank = C.uint32_t(run_self_test_MsEndpoints[i].Rank)
+		msEpSlice[i].endpt.ep_tag = C.uint32_t(run_self_test_MsEndpoints[i].Tag)
+	}
+
+	// Construct the multi-dimensional C array of test latencies for the out parameter.
+	// Must be freed by the caller.
+	*sizeLatencies = C.alloc_latency_arrays(C.size_t(numSizes), C.size_t(*numMsEndpoints), C.size_t(len(run_self_test_EndpointLatencies)))
+	if *sizeLatencies == nil {
+		panic("calloc() failed for latency arrays")
+	}
+
+	sizesSlice := unsafe.Slice(*sizeLatencies, int(numSizes))
+	for i := 0; i < int(numSizes); i++ {
+		msSessSlice := unsafe.Slice(sizesSlice[i], int(*numMsEndpoints))
+		for j := 0; j < int(*numMsEndpoints); j++ {
+			epLatSlice := unsafe.Slice(msSessSlice[j], len(run_self_test_EndpointLatencies))
+
+			for k := 0; k < len(run_self_test_EndpointLatencies); k++ {
+				epLatSlice[k].val = run_self_test_EndpointLatencies[k].val
+				epLatSlice[k].rank = run_self_test_EndpointLatencies[k].rank
+				epLatSlice[k].tag = run_self_test_EndpointLatencies[k].tag
+			}
+		}
+	}
+
+	return run_self_test_RC
+}
+
+func self_test_fini(agent_used C.bool) {}
diff --git a/src/control/lib/daos/api/libdaos_stubs.go b/src/control/lib/daos/api/libdaos_stubs.go
new file mode 100644
index 00000000000..341b90bdd34
--- /dev/null
+++ b/src/control/lib/daos/api/libdaos_stubs.go
@@ -0,0 +1,116 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build test_stubs
+// +build test_stubs
+
+package api
+
+import (
+	"unsafe"
+
+	"github.com/daos-stack/daos/src/control/build"
+	"github.com/daos-stack/daos/src/control/lib/daos"
+)
+
+/*
+#include <daos_errno.h>
+#include <daos_mgmt.h>
+*/
+import "C"
+
+var (
+	daos_init_RC C.int = 0
+)
+
+func daos_init() C.int {
+	return daos_init_RC
+}
+
+func daos_fini() {}
+
+func dc_agent_fini() {}
+
+var (
+	defaultSystemInfo *daos.SystemInfo = &daos.SystemInfo{
+		Name:      build.DefaultSystemName,
+		Provider:  "ofi+tcp",
+		AgentPath: "/does/not/exist",
+		RankURIs: []*daos.RankURI{
+			{Rank: 0, URI: "/does/not/exist"},
+			{Rank: 1, URI: "/does/not/exist"},
+			{Rank: 2, URI: "/does/not/exist"},
+		},
+		AccessPointRankURIs: []*daos.RankURI{
+			{Rank: 0, URI: "/does/not/exist"},
+			{Rank: 1, URI: "/does/not/exist"},
+			{Rank: 2, URI: "/does/not/exist"},
+		},
+	}
+	daos_mgmt_get_sys_info_SystemInfo *daos.SystemInfo = defaultSystemInfo
+	daos_mgmt_get_sys_info_RC         C.int            = 0
+)
+
+func daos_mgmt_get_sys_info(group *C.char, sys_info_out **C.struct_daos_sys_info) C.int {
+	if daos_mgmt_get_sys_info_RC != 0 {
+		return daos_mgmt_get_sys_info_RC
+	}
+
+	si := &C.struct_daos_sys_info{}
+	for i, c := range daos_mgmt_get_sys_info_SystemInfo.Name {
+		si.dsi_system_name[i] = C.char(c)
+	}
+	if group != nil && C.GoString(group) != daos_mgmt_get_sys_info_SystemInfo.Name {
+		panic("invalid group")
+	}
+	for i, c := range daos_mgmt_get_sys_info_SystemInfo.Provider {
+		si.dsi_fabric_provider[i] = C.char(c)
+	}
+	for i, c := range daos_mgmt_get_sys_info_SystemInfo.AgentPath {
+		si.dsi_agent_path[i] = C.char(c)
+	}
+
+	si.dsi_nr_ranks = C.uint32_t(len(daos_mgmt_get_sys_info_SystemInfo.RankURIs))
+	si.dsi_ranks = (*C.struct_daos_rank_uri)(C.calloc(C.size_t(si.dsi_nr_ranks), C.sizeof_struct_daos_rank_uri))
+	if si.dsi_ranks == nil {
+		panic("calloc() failed for system ranks")
+	}
+	rankSlice := unsafe.Slice(si.dsi_ranks, int(si.dsi_nr_ranks))
+	for i, rankURI := range daos_mgmt_get_sys_info_SystemInfo.RankURIs {
+		rankSlice[i].dru_rank = C.uint32_t(rankURI.Rank)
+		rankSlice[i].dru_uri = C.CString(rankURI.URI)
+	}
+
+	si.dsi_nr_ms_ranks = C.uint32_t(len(daos_mgmt_get_sys_info_SystemInfo.AccessPointRankURIs))
+	si.dsi_ms_ranks = (*C.uint32_t)(C.calloc(C.size_t(si.dsi_nr_ms_ranks), C.sizeof_uint32_t))
+	if si.dsi_ms_ranks == nil {
+		panic("calloc() failed for ms ranks")
+	}
+	msRankSlice := unsafe.Slice(si.dsi_ms_ranks, int(si.dsi_nr_ms_ranks))
+	for i, rankURI := range daos_mgmt_get_sys_info_SystemInfo.AccessPointRankURIs {
+		msRankSlice[i] = C.uint32_t(rankURI.Rank)
+	}
+
+	*sys_info_out = si
+	return 0
+}
+
+func daos_mgmt_put_sys_info(sys_info *C.struct_daos_sys_info) {
+	if sys_info == nil {
+		return
+	}
+
+	if sys_info.dsi_ranks != nil {
+		rankSlice := unsafe.Slice(sys_info.dsi_ranks, int(sys_info.dsi_nr_ranks))
+		for _, rankURI := range rankSlice {
+			C.free(unsafe.Pointer(rankURI.dru_uri))
+		}
+		C.free(unsafe.Pointer(sys_info.dsi_ranks))
+	}
+
+	if sys_info.dsi_ms_ranks != nil {
+		C.free(unsafe.Pointer(sys_info.dsi_ms_ranks))
+	}
+}
diff --git a/src/control/lib/daos/api/provider.go b/src/control/lib/daos/api/provider.go
index 5b7c74be35c..443cfbd1441 100644
--- a/src/control/lib/daos/api/provider.go
+++ b/src/control/lib/daos/api/provider.go
@@ -28,9 +28,9 @@ type (
 )
 
 // NewProvider returns an initialized DAOS API provider.
-func NewProvider(log debugTraceLogger) (*Provider, error) {
+func NewProvider(log debugTraceLogger, initLogging bool) (*Provider, error) {
 	api := &api{}
-	cleanup, err := api.Init()
+	cleanup, err := api.Init(initLogging)
 	if err != nil {
 		return nil, errors.Wrap(err, "failed to initialize DAOS API")
 	}
diff --git a/src/control/lib/daos/api/selftest.go b/src/control/lib/daos/api/selftest.go
new file mode 100644
index 00000000000..49e62aa6638
--- /dev/null
+++ b/src/control/lib/daos/api/selftest.go
@@ -0,0 +1,227 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package api
+
+import (
+	"context"
+	"time"
+	"unsafe"
+
+	"github.com/pkg/errors"
+
+	"github.com/daos-stack/daos/src/control/lib/daos"
+	"github.com/daos-stack/daos/src/control/lib/ranklist"
+	"github.com/daos-stack/daos/src/control/logging"
+)
+
+/*
+#cgo CFLAGS: -I${SRCDIR}/../../../../cart/utils -I${SRCDIR}/../../../../utils/self_test
+
+#include "self_test_lib.h"
+
+void
+set_size_params(struct st_size_params *params, int send_size, int reply_size)
+{
+	params->send_size = send_size;
+	if (send_size == 0)
+		params->send_type = CRT_SELF_TEST_MSG_TYPE_EMPTY;
+	else if (send_size < CRT_SELF_TEST_AUTO_BULK_THRESH)
+		params->send_type = CRT_SELF_TEST_MSG_TYPE_IOV;
+	else
+		params->send_type = CRT_SELF_TEST_MSG_TYPE_BULK_GET;
+
+	params->reply_size = reply_size;
+	if (reply_size == 0)
+		params->reply_type = CRT_SELF_TEST_MSG_TYPE_EMPTY;
+	else if (reply_size < CRT_SELF_TEST_AUTO_BULK_THRESH)
+		params->reply_type = CRT_SELF_TEST_MSG_TYPE_IOV;
+	else
+		params->reply_type = CRT_SELF_TEST_MSG_TYPE_BULK_PUT;
+}
+*/
+import "C"
+
+type tgtEndpointSlice []daos.SelfTestEndpoint
+
+// toC returns a pointer to a C array of target endpoints.
+// NB: Caller must free the array when finished.
+func (tes tgtEndpointSlice) toC() (*C.struct_st_endpoint, error) {
+	if len(tes) == 0 {
+		return nil, errors.New("empty tgt endpoint slice")
+	}
+
+	ptr, err := C.calloc(C.size_t(len(tes)), C.sizeof_struct_st_endpoint)
+	if err != nil {
+		return nil, err
+	}
+	cEndpoints := (*C.struct_st_endpoint)(ptr)
+	endPoints := unsafe.Slice(cEndpoints, len(tes))
+	for i, ep := range tes {
+		endPoints[i].rank = C.uint32_t(ep.Rank)
+		endPoints[i].tag = C.uint32_t(ep.Tag)
+	}
+
+	return cEndpoints, nil
+}
+
+// getAllSystemRanks returns the set of system ranks available to use
+// for a self_test run. If no ranks are available, a sentinel error
+// is returned.
+func getAllSystemRanks(ctx context.Context) ([]ranklist.Rank, error) {
+	log := logging.FromContext(ctx)
+	p, err := NewProvider(log, false)
+	if err != nil {
+		return nil, err
+	}
+	defer p.Cleanup()
+
+	si, err := p.GetSystemInfo(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(si.RankURIs) == 0 {
+		return nil, ErrNoSystemRanks
+	}
+
+	var systemRanks []ranklist.Rank
+	for _, rankURI := range si.RankURIs {
+		systemRanks = append(systemRanks, ranklist.Rank(rankURI.Rank))
+	}
+
+	return systemRanks, nil
+}
+
+// RunSelfTest uses the provided configuration to run the logic
+// behind the self_test tool. Per-size structured test results
+// are returned as a slice.
+func RunSelfTest(ctx context.Context, cfg *daos.SelfTestConfig) ([]*daos.SelfTestResult, error) {
+	if err := cfg.Validate(); err != nil {
+		return nil, errors.Wrap(err, "invalid self_test configuration")
+	}
+
+	ptr, err := C.calloc(C.size_t(len(cfg.SendSizes)), C.sizeof_struct_st_size_params)
+	if err != nil {
+		return nil, err
+	}
+	cSizes := (*C.struct_st_size_params)(ptr)
+	defer C.free(unsafe.Pointer(cSizes))
+	testSizes := unsafe.Slice(cSizes, len(cfg.SendSizes))
+	for i := 0; i < len(testSizes); i++ {
+		C.set_size_params(&testSizes[i], C.int(cfg.SendSizes[i]), C.int(cfg.ReplySizes[i]))
+	}
+
+	if len(cfg.EndpointRanks) == 0 {
+		cfg.EndpointRanks, err = getAllSystemRanks(ctx)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	tgtEndpoints := make(tgtEndpointSlice, 0, len(cfg.EndpointRanks)*len(cfg.EndpointTags))
+	for _, r := range cfg.EndpointRanks {
+		for _, t := range cfg.EndpointTags {
+			tgtEndpoints = append(tgtEndpoints, daos.SelfTestEndpoint{Rank: r, Tag: t})
+		}
+	}
+	cTgtEndpoints, err := tgtEndpoints.toC()
+	defer C.free(unsafe.Pointer(cTgtEndpoints))
+
+	repCount := C.int(int(cfg.Repetitions) * len(tgtEndpoints))
+	maxInflight := C.int(cfg.MaxInflightRPCs)
+	var cOptMasterEndpoints *C.struct_st_endpoint
+	var numOptMsEndpoints C.uint
+	var cMasterEndpoints *C.struct_st_master_endpt
+	var numMsEndpoints C.uint32_t
+	var cSizeLatencies ***C.struct_st_latency
+	var bufAlignment = C.int16_t(cfg.BufferAlignment)
+
+	cGroupName := C.CString(cfg.GroupName)
+	defer C.free(unsafe.Pointer(cGroupName))
+
+	if len(cfg.MasterEndpoints) > 0 {
+		numOptMsEndpoints = C.uint(len(cfg.MasterEndpoints))
+		ptr, err := C.calloc(C.size_t(numOptMsEndpoints), C.sizeof_struct_st_endpoint)
+		if err != nil {
+			return nil, err
+		}
+		cOptMasterEndpoints = (*C.struct_st_endpoint)(ptr)
+		defer C.free(unsafe.Pointer(cOptMasterEndpoints))
+
+		masterEndpoints := unsafe.Slice(cOptMasterEndpoints, int(numOptMsEndpoints))
+		for i, ep := range cfg.MasterEndpoints {
+			masterEndpoints[i].rank = C.uint(ep.Rank)
+			masterEndpoints[i].tag = C.uint(ep.Tag)
+		}
+	}
+
+	defer func() {
+		if cMasterEndpoints != nil {
+			C.free(unsafe.Pointer(cMasterEndpoints))
+		}
+		if cSizeLatencies != nil {
+			C.free_size_latencies(cSizeLatencies, C.uint32_t(len(testSizes)), numMsEndpoints)
+		}
+		self_test_fini(true)
+	}()
+
+	rc := run_self_test(cSizes, C.int(len(testSizes)), repCount, maxInflight, cGroupName,
+		cOptMasterEndpoints, numOptMsEndpoints,
+		cTgtEndpoints, C.uint32_t(len(tgtEndpoints)),
+		&cMasterEndpoints, &numMsEndpoints,
+		&cSizeLatencies, bufAlignment)
+	if err := daos.ErrorFromRC(int(rc)); err != nil {
+		return nil, errors.Wrap(err, "self_test failed")
+	}
+
+	if numMsEndpoints == 0 || cMasterEndpoints == nil {
+		return nil, errors.New("no master endpoints defined")
+	}
+	if cSizeLatencies == nil {
+		return nil, errors.New("no test latencies recorded")
+	}
+
+	masterEndpoints := unsafe.Slice(cMasterEndpoints, int(numMsEndpoints))
+	var results []*daos.SelfTestResult
+	perSizeList := unsafe.Slice(cSizeLatencies, len(testSizes))
+	for i := 0; i < len(testSizes); i++ {
+		params := testSizes[i]
+		msSessions := unsafe.Slice(perSizeList[i], int(numMsEndpoints))
+		for j := 0; j < int(numMsEndpoints); j++ {
+			msEp := masterEndpoints[j]
+			res := &daos.SelfTestResult{
+				MasterEndpoint: daos.SelfTestEndpoint{
+					Rank: ranklist.Rank(msEp.endpt.ep_rank),
+					Tag:  uint32(msEp.endpt.ep_tag),
+				},
+				TargetEndpoints: tgtEndpoints,
+				Repetitions:     uint(repCount),
+				SendSize:        uint64(params.send_size),
+				ReplySize:       uint64(params.reply_size),
+				BufferAlignment: int16(bufAlignment),
+				Duration:        time.Duration(msEp.reply.test_duration_ns),
+				MasterLatency:   new(daos.EndpointLatency),
+				TargetLatencies: make(map[daos.SelfTestEndpoint]*daos.EndpointLatency),
+			}
+			repLatencies := unsafe.Slice(msSessions[j], int(repCount))
+
+			for _, latency := range repLatencies {
+				if latency.cci_rc < 0 {
+					res.MasterLatency.AddValue(-1)
+					res.AddTargetLatency(ranklist.Rank(latency.rank), uint32(latency.tag), -1)
+					continue
+				}
+				res.MasterLatency.AddValue(int64(latency.val))
+				res.AddTargetLatency(ranklist.Rank(latency.rank), uint32(latency.tag), int64(latency.val))
+			}
+
+			results = append(results, res)
+		}
+	}
+
+	return results, nil
+}
diff --git a/src/control/lib/daos/api/selftest_test.go b/src/control/lib/daos/api/selftest_test.go
new file mode 100644
index 00000000000..7413c938f56
--- /dev/null
+++ b/src/control/lib/daos/api/selftest_test.go
@@ -0,0 +1,206 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package api
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
+	"github.com/pkg/errors"
+
+	"github.com/daos-stack/daos/src/control/common/test"
+	"github.com/daos-stack/daos/src/control/lib/daos"
+	"github.com/daos-stack/daos/src/control/lib/ranklist"
+	"github.com/daos-stack/daos/src/control/logging"
+)
+
+func TestAPI_RunSelfTest(t *testing.T) {
+	genCfg := func(xfrm func(cfg *daos.SelfTestConfig)) *daos.SelfTestConfig {
+		cfg := &daos.SelfTestConfig{}
+		cfg.SetDefaults()
+		if xfrm != nil {
+			xfrm(cfg)
+		}
+		return cfg
+	}
+	genEndPoints := func(tags []uint32, ranks ...ranklist.Rank) (eps []daos.SelfTestEndpoint) {
+		if len(tags) == 0 {
+			tags = []uint32{0}
+		}
+		if len(ranks) == 0 {
+			ranks = []ranklist.Rank{0}
+		}
+		for i := 0; i < len(ranks); i++ {
+			for j := 0; j < len(tags); j++ {
+				eps = append(eps, daos.SelfTestEndpoint{Rank: ranks[i], Tag: tags[j]})
+			}
+		}
+		return
+	}
+	genEpLatencies := func(totalReps uint, eps ...daos.SelfTestEndpoint) (lats []run_self_test_EndpointLatency) {
+		if totalReps == 0 {
+			totalReps = genCfg(nil).Repetitions
+		}
+		if len(eps) == 0 {
+			eps = genEndPoints(nil, 0, 1, 2)
+		}
+		lats = make([]run_self_test_EndpointLatency, totalReps)
+		latCount := 0
+		for i := 0; i < int(totalReps); i++ {
+			lats[i] = run_self_test_EndpointLatency{
+				val:    _Ctype_int64_t(i + 1),
+				rank:   _Ctype___uint32_t(eps[i%len(eps)].Rank),
+				tag:    _Ctype___uint32_t(eps[i%len(eps)].Tag),
+				cci_rc: 0,
+			}
+			latCount++
+		}
+		return
+	}
+	genExpResults := func(cfg *daos.SelfTestConfig) (results []*daos.SelfTestResult) {
+		for i := range cfg.SendSizes {
+			var tgtEndpoints []daos.SelfTestEndpoint
+			if len(cfg.EndpointRanks) > 0 {
+				tgtEndpoints = genEndPoints(cfg.EndpointTags, cfg.EndpointRanks...)
+			}
+
+			masterEndPoints := cfg.MasterEndpoints
+			if len(masterEndPoints) == 0 {
+				masterEndPoints = []daos.SelfTestEndpoint{
+					{
+						Rank: cfg.EndpointRanks[len(cfg.EndpointRanks)-1] + 1,
+						Tag:  cfg.EndpointTags[len(cfg.EndpointTags)-1],
+					},
+				}
+			}
+
+			for _, mep := range masterEndPoints {
+				tgtEps := genEndPoints(cfg.EndpointTags, cfg.EndpointRanks...)
+				res := &daos.SelfTestResult{
+					MasterEndpoint:  mep,
+					TargetEndpoints: tgtEps,
+					Repetitions:     cfg.Repetitions * uint(len(tgtEps)),
+					SendSize:        cfg.SendSizes[i],
+					ReplySize:       cfg.ReplySizes[i],
+					BufferAlignment: cfg.BufferAlignment,
+					MasterLatency:   &daos.EndpointLatency{},
+					TargetLatencies: make(map[daos.SelfTestEndpoint]*daos.EndpointLatency),
+				}
+				for _, ep := range tgtEndpoints {
+					res.TargetLatencies[ep] = &daos.EndpointLatency{}
+				}
+
+				results = append(results, res)
+			}
+		}
+		return
+	}
+
+	for name, tc := range map[string]struct {
+		cfg             *daos.SelfTestConfig
+		self_test_RC    int
+		testSysInfo     *daos.SystemInfo
+		get_sys_info_RC int
+		expMsEps        []daos.SelfTestEndpoint
+		expRunCfg       *daos.SelfTestConfig
+		expRunResults   []*daos.SelfTestResult
+		expErr          error
+	}{
+		"empty config": {
+			cfg:    &daos.SelfTestConfig{},
+			expErr: errors.New("invalid self_test configuration"),
+		},
+		"library alloc fails": {
+			cfg:          genCfg(nil),
+			self_test_RC: int(daos.NoMemory),
+			expErr:       daos.NoMemory,
+		},
+		"GetSystemInfo fails": {
+			cfg:             genCfg(nil),
+			get_sys_info_RC: int(daos.AgentCommFailed),
+			expErr:          daos.AgentCommFailed,
+		},
+		"custom config -- 1 rank": {
+			cfg: genCfg(func(cfg *daos.SelfTestConfig) {
+				cfg.EndpointRanks = []ranklist.Rank{1}
+				cfg.EndpointTags = []uint32{1}
+				cfg.Repetitions = 10
+				cfg.SendSizes = []uint64{1024}
+				cfg.ReplySizes = []uint64{1024}
+			}),
+		},
+		"custom config -- defined master endpoints": {
+			cfg: genCfg(func(cfg *daos.SelfTestConfig) {
+				cfg.EndpointRanks = []ranklist.Rank{0, 1, 2}
+				cfg.EndpointTags = []uint32{1}
+				cfg.MasterEndpoints = []daos.SelfTestEndpoint{
+					{Rank: 0, Tag: 1},
+					{Rank: 1, Tag: 1},
+				}
+			}),
+		},
+		"default config -- all ranks": {
+			cfg: genCfg(nil),
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			if tc.testSysInfo == nil {
+				tc.testSysInfo = defaultSystemInfo
+			}
+			daos_mgmt_get_sys_info_SystemInfo = tc.testSysInfo
+			daos_mgmt_get_sys_info_RC = _Ctype_int(tc.get_sys_info_RC)
+			defer func() {
+				daos_mgmt_get_sys_info_SystemInfo = defaultSystemInfo
+				daos_mgmt_get_sys_info_RC = 0
+			}()
+
+			run_self_test_RunConfig = nil
+			run_self_test_MsEndpoints = nil
+			run_self_test_EndpointLatencies = nil
+			run_self_test_RC = _Ctype_int(tc.self_test_RC)
+			log, buf := logging.NewTestLogger(t.Name())
+			defer test.ShowBufferOnFailure(t, buf)
+
+			var sysRanks []ranklist.Rank
+			if len(tc.cfg.EndpointRanks) == 0 {
+				sysRanks = make([]ranklist.Rank, len(tc.testSysInfo.RankURIs))
+				for i, rankURI := range tc.testSysInfo.RankURIs {
+					sysRanks[i] = ranklist.Rank(rankURI.Rank)
+				}
+			} else {
+				sysRanks = tc.cfg.EndpointRanks
+			}
+			if tc.expRunCfg == nil {
+				tc.expRunCfg = tc.cfg.Copy()
+				tc.expRunCfg.EndpointRanks = sysRanks
+				tc.expRunCfg.Repetitions = tc.cfg.Repetitions * uint(len(sysRanks))
+			}
+			if tc.expRunResults == nil {
+				expCfg := tc.cfg.Copy()
+				expCfg.EndpointRanks = sysRanks
+				tc.expRunResults = genExpResults(expCfg)
+			}
+			tgtEps := genEndPoints(tc.cfg.EndpointTags, sysRanks...)
+			run_self_test_EndpointLatencies = genEpLatencies(tc.cfg.Repetitions*uint(len(tgtEps)), tgtEps...)
+
+			ctx := test.MustLogContext(t, log)
+			res, err := RunSelfTest(ctx, tc.cfg)
+			test.CmpErr(t, tc.expErr, err)
+			if tc.expErr != nil {
+				return
+			}
+			test.CmpAny(t, "SelfTestConfig", tc.expRunCfg, run_self_test_RunConfig)
+			cmpOpts := cmp.Options{
+				// Don't need to test all of this again. Just verify that
+				// we get the expected number of latency results here.
+				cmpopts.IgnoreTypes(daos.EndpointLatency{}),
+			}
+			test.CmpAny(t, "SelfTestResults", tc.expRunResults, res, cmpOpts...)
+		})
+	}
+}
diff --git a/src/control/lib/daos/api/system.go b/src/control/lib/daos/api/system.go
index da874f69ddf..73001363a25 100644
--- a/src/control/lib/daos/api/system.go
+++ b/src/control/lib/daos/api/system.go
@@ -7,6 +7,8 @@
 package api
 
 import (
+	"context"
+	"sort"
 	"unsafe"
 
 	"github.com/pkg/errors"
@@ -22,13 +24,13 @@ import (
 import "C"
 
 // GetSystemInfo queries for the connected system information.
-func (p *Provider) GetSystemInfo() (*daos.SystemInfo, error) {
+func (p *Provider) GetSystemInfo(ctx context.Context) (*daos.SystemInfo, error) {
 	var cSysInfo *C.struct_daos_sys_info
-	rc := C.daos_mgmt_get_sys_info(nil, &cSysInfo)
+	rc := daos_mgmt_get_sys_info(nil, &cSysInfo)
 	if err := daos.ErrorFromRC(int(rc)); err != nil {
 		return nil, errors.Wrap(err, "querying DAOS system information")
 	}
-	defer C.daos_mgmt_put_sys_info(cSysInfo)
+	defer daos_mgmt_put_sys_info(cSysInfo)
 
 	sysInfo := &daos.SystemInfo{
 		Name:      C.GoString(&cSysInfo.dsi_system_name[0]),
@@ -46,10 +48,16 @@ func (p *Provider) GetSystemInfo() (*daos.SystemInfo, error) {
 		sysInfo.RankURIs = append(sysInfo.RankURIs, rankURI)
 		rankURIs[rankURI.Rank] = rankURI
 	}
+	sort.Slice(sysInfo.RankURIs, func(i, j int) bool {
+		return sysInfo.RankURIs[i].Rank < sysInfo.RankURIs[j].Rank
+	})
 
 	for _, cMSRank := range unsafe.Slice(cSysInfo.dsi_ms_ranks, int(cSysInfo.dsi_nr_ms_ranks)) {
 		sysInfo.AccessPointRankURIs = append(sysInfo.AccessPointRankURIs, rankURIs[uint32(cMSRank)])
 	}
+	sort.Slice(sysInfo.AccessPointRankURIs, func(i, j int) bool {
+		return sysInfo.AccessPointRankURIs[i].Rank < sysInfo.AccessPointRankURIs[j].Rank
+	})
 
 	return sysInfo, nil
 }
diff --git a/src/control/lib/daos/libgurt.go b/src/control/lib/daos/libgurt.go
new file mode 100644
index 00000000000..3e1485b968f
--- /dev/null
+++ b/src/control/lib/daos/libgurt.go
@@ -0,0 +1,22 @@
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build !test_stubs
+// +build !test_stubs
+
+package daos
+
+/*
+#cgo LDFLAGS: -lgurt
+
+#include <daos/debug.h>
+*/
+import "C"
+
+func daos_debug_init(log_file *C.char) C.int {
+	return C.daos_debug_init(log_file)
+}
+
+func daos_debug_fini() {
+	C.daos_debug_fini()
+}
diff --git a/src/control/lib/daos/libgurt_stubs.go b/src/control/lib/daos/libgurt_stubs.go
new file mode 100644
index 00000000000..af8b3dc896a
--- /dev/null
+++ b/src/control/lib/daos/libgurt_stubs.go
@@ -0,0 +1,21 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+//go:build test_stubs
+// +build test_stubs
+
+package daos
+
+import "C"
+
+var (
+	daos_debug_init_RC C.int = 0
+)
+
+func daos_debug_init(log_file *C.char) C.int {
+	return daos_debug_init_RC
+}
+
+func daos_debug_fini() {}
diff --git a/src/control/lib/daos/logging.go b/src/control/lib/daos/logging.go
index 9891adba0be..8c9f26a3e3f 100644
--- a/src/control/lib/daos/logging.go
+++ b/src/control/lib/daos/logging.go
@@ -13,14 +13,9 @@ import (
 	"github.com/pkg/errors"
 )
 
-/*
-#cgo LDFLAGS: -lgurt
-
-#include <daos/debug.h>
-*/
-import "C"
-
 const (
+	// UnsetLogMask defines an explicitly-unset log mask.
+	UnsetLogMask = "UNSET"
 	// DefaultDebugMask defines the basic debug mask.
 	DefaultDebugMask = "DEBUG,MEM=ERR,OBJECT=ERR,PLACEMENT=ERR"
 	// DefaultInfoMask defines the basic info mask.
@@ -35,13 +30,27 @@ func InitLogging(masks ...string) (func(), error) {
 	if mask == "" {
 		mask = DefaultInfoMask
 	}
-	os.Setenv("D_LOG_MASK", mask)
+	if mask != UnsetLogMask {
+		if err := SetLogMask(mask); err != nil {
+			return func() {}, errors.Wrap(err, "failed to set DAOS logging mask")
+		}
+	}
 
-	if rc := C.daos_debug_init(nil); rc != 0 {
+	if rc := daos_debug_init(nil); rc != 0 {
 		return func() {}, errors.Wrap(Status(rc), "daos_debug_init() failed")
 	}
 
 	return func() {
-		C.daos_debug_fini()
+		daos_debug_fini()
 	}, nil
 }
+
+// SetLogMask sets the DAOS logging mask.
+func SetLogMask(mask string) error {
+	return os.Setenv("D_LOG_MASK", mask)
+}
+
+// GetLogMask returns the DAOS logging mask, if set.
+func GetLogMask() string {
+	return os.Getenv("D_LOG_MASK")
+}
diff --git a/src/control/lib/daos/selftest.go b/src/control/lib/daos/selftest.go
new file mode 100644
index 00000000000..703685b5dbe
--- /dev/null
+++ b/src/control/lib/daos/selftest.go
@@ -0,0 +1,354 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package daos
+
+import (
+	"encoding/json"
+	"fmt"
+	"math"
+	"sort"
+	"time"
+
+	"github.com/pkg/errors"
+
+	"github.com/daos-stack/daos/src/control/build"
+	"github.com/daos-stack/daos/src/control/lib/ranklist"
+)
+
+/*
+#cgo CFLAGS: -I${SRCDIR}/../../../cart/utils -I${SRCDIR}/../../../utils/self_test
+
+#include "self_test_lib.h"
+*/
+import "C"
+
+type (
+	// EndpointLatency represents the results of running latency tests against
+	// a single rank:target endpoint.
+	EndpointLatency struct {
+		rawValues  []uint64
+		sorted     bool
+		TotalRPCs  uint64  `json:"total_rpcs"`
+		Min        uint64  `json:"min"`
+		Max        uint64  `json:"max"`
+		Sum        uint64  `json:"-"`
+		SumSquares float64 `json:"-"`
+		FailCount  uint64  `json:"fail_count"`
+	}
+
+	// SelfTestEndpoint represents a rank:target test endpoint.
+	SelfTestEndpoint struct {
+		Rank ranklist.Rank
+		Tag  uint32
+	}
+
+	// SelfTestConfig defines the parameters for a set of self_test runs.
+	SelfTestConfig struct {
+		GroupName       string             `json:"group_name"`
+		MasterEndpoints []SelfTestEndpoint `json:"master_endpoints,omitempty"`
+		EndpointRanks   []ranklist.Rank    `json:"endpoint_ranks"`
+		EndpointTags    []uint32           `json:"endpoint_tags"`
+		Repetitions     uint               `json:"repetitions"`
+		SendSizes       []uint64           `json:"send_sizes"`
+		ReplySizes      []uint64           `json:"reply_sizes"`
+		BufferAlignment int16              `json:"buffer_alignment"`
+		MaxInflightRPCs uint               `json:"max_inflight_rpcs"`
+	}
+
+	// SelfTestResult represents the results of a single self_test run.
+	SelfTestResult struct {
+		MasterEndpoint  SelfTestEndpoint                      `json:"-"`
+		TargetEndpoints []SelfTestEndpoint                    `json:"-"`
+		Repetitions     uint                                  `json:"repetitions"`
+		SendSize        uint64                                `json:"send_size"`
+		ReplySize       uint64                                `json:"reply_size"`
+		BufferAlignment int16                                 `json:"buffer_alignment"`
+		Duration        time.Duration                         `json:"duration"`
+		MasterLatency   *EndpointLatency                      `json:"master_latency"`
+		TargetLatencies map[SelfTestEndpoint]*EndpointLatency `json:"-"`
+	}
+)
+
+var defaultLatencyPercentiles []uint64 = []uint64{50, 75, 90, 95, 99}
+
+const (
+	defaultSendSize     = 1 << 20 // 1MiB
+	defaultReplySize    = defaultSendSize
+	defaultRepCount     = 10000
+	defaultMaxInflight  = 16
+	defaultBufAlignment = C.CRT_ST_BUF_ALIGN_DEFAULT
+)
+
+// SetDefaults replaces unset parameters with default values.
+func (cfg *SelfTestConfig) SetDefaults() error {
+	if cfg == nil {
+		return errors.New("nil config")
+	}
+
+	if cfg.GroupName == "" {
+		cfg.GroupName = build.DefaultSystemName
+	}
+	if len(cfg.EndpointTags) == 0 {
+		cfg.EndpointTags = []uint32{0}
+	}
+	if len(cfg.SendSizes) == 0 {
+		cfg.SendSizes = []uint64{defaultSendSize}
+	}
+	if len(cfg.ReplySizes) == 0 {
+		cfg.ReplySizes = []uint64{defaultReplySize}
+	}
+	if cfg.Repetitions == 0 {
+		cfg.Repetitions = defaultRepCount
+	}
+	if cfg.MaxInflightRPCs == 0 {
+		cfg.MaxInflightRPCs = defaultMaxInflight
+	}
+	if cfg.BufferAlignment == 0 {
+		cfg.BufferAlignment = defaultBufAlignment
+	}
+
+	return cfg.Validate()
+}
+
+// Validate checks the configuration for validity.
+func (cfg *SelfTestConfig) Validate() error {
+	if cfg == nil {
+		return errors.New("nil config")
+	}
+
+	if cfg.GroupName == "" {
+		return errors.New("group name is required")
+	}
+	if len(cfg.EndpointTags) == 0 {
+		return errors.New("endpoint tag list is required")
+	}
+	if len(cfg.SendSizes) == 0 {
+		return errors.New("send size list is required")
+	}
+	if len(cfg.ReplySizes) == 0 {
+		return errors.New("reply size list is required")
+	}
+	if cfg.Repetitions == 0 {
+		return errors.New("repetitions is required")
+	}
+	if cfg.MaxInflightRPCs == 0 {
+		return errors.New("max inflight RPCs is required")
+	}
+	if cfg.MaxInflightRPCs == 0 {
+		return errors.New("max inflight RPCs is required")
+	}
+	if cfg.BufferAlignment == 0 {
+		return errors.New("buffer alignment is required")
+	}
+	if len(cfg.SendSizes) != len(cfg.ReplySizes) {
+		return errors.New("send/reply size list mismatch")
+	}
+
+	return nil
+}
+
+// Copy returns a copy of the configuration.
+func (cfg *SelfTestConfig) Copy() *SelfTestConfig {
+	if cfg == nil {
+		return nil
+	}
+
+	cp := &SelfTestConfig{}
+	*cp = *cfg
+	copy(cp.MasterEndpoints, cfg.MasterEndpoints)
+	copy(cp.EndpointRanks, cfg.EndpointRanks)
+	copy(cp.EndpointTags, cfg.EndpointTags)
+	copy(cp.SendSizes, cfg.SendSizes)
+	copy(cp.ReplySizes, cfg.ReplySizes)
+
+	return cp
+}
+
+// Succeeded returns the number of RPCs that succeeded.
+func (epl *EndpointLatency) Succeeded() uint64 {
+	return epl.TotalRPCs - epl.FailCount
+}
+
+// AddValue adds a sampled latency value (or -1 to increment the failure count).
+func (epl *EndpointLatency) AddValue(value int64) {
+	epl.TotalRPCs++
+	if value < 0 {
+		epl.FailCount++
+		return
+	}
+
+	// TODO: Figure out if there's a more clever way to do this... Seems
+	// like with histograms we need to pre-bucket the values.
+	epl.rawValues = append(epl.rawValues, uint64(value))
+	epl.sorted = false
+	if epl.TotalRPCs == 1 || value < int64(epl.Min) {
+		epl.Min = uint64(value)
+	}
+	if value > int64(epl.Max) {
+		epl.Max = uint64(value)
+	}
+
+	epl.SumSquares += float64(value) * float64(value)
+	epl.Sum += uint64(value)
+}
+
+func (epl *EndpointLatency) sortValues() {
+	if epl.sorted {
+		return
+	}
+
+	sort.Slice(epl.rawValues, func(a, b int) bool {
+		return epl.rawValues[a] < epl.rawValues[b]
+	})
+	epl.sorted = true
+}
+
+// Percentiles returns a sorted slice of bucket keys and a map of buckets
+// holding percentile values.
+func (epl *EndpointLatency) Percentiles(percentiles ...uint64) ([]uint64, map[uint64]*MetricBucket) {
+	epl.sortValues()
+
+	if len(percentiles) == 0 {
+		percentiles = defaultLatencyPercentiles
+	}
+	sort.Slice(percentiles, func(a, b int) bool {
+		return percentiles[a] < percentiles[b]
+	})
+	buckets := make(map[uint64]*MetricBucket)
+
+	for _, p := range percentiles {
+		valIdx := epl.Succeeded() * p / 100
+		if uint64(len(epl.rawValues)) <= valIdx {
+			continue
+		}
+		buckets[p] = &MetricBucket{
+			Label:           fmt.Sprintf("%d", p),
+			CumulativeCount: valIdx,
+			UpperBound:      float64(epl.rawValues[valIdx]),
+		}
+	}
+
+	return percentiles, buckets
+}
+
+// PercentileBuckets returns a sorted slice of buckets holding percentile values.
+func (epl *EndpointLatency) PercentileBuckets(percentiles ...uint64) []*MetricBucket {
+	keys, bucketMap := epl.Percentiles(percentiles...)
+	buckets := make([]*MetricBucket, 0, len(bucketMap))
+	for _, key := range keys {
+		buckets = append(buckets, bucketMap[key])
+	}
+
+	return buckets
+}
+
+// Average returns the average latency value of successful RPCs.
+func (epl *EndpointLatency) Average() float64 {
+	if epl.Succeeded() == 0 {
+		return 0
+	}
+	return float64(epl.Sum) / float64(epl.Succeeded())
+}
+
+// StdDev returns the standard deviation of the latency values of successful RPCs.
+func (epl *EndpointLatency) StdDev() float64 {
+	if epl.Succeeded() < 2 {
+		return 0
+	}
+	avg := epl.Average()
+	return math.Sqrt((epl.SumSquares - (float64(epl.Succeeded()) * avg * avg)) / float64(epl.Succeeded()-1))
+}
+
+func roundFloat(val float64, places int) float64 {
+	return math.Round(val*math.Pow10(places)) / math.Pow10(places)
+}
+
+func (epl *EndpointLatency) MarshalJSON() ([]byte, error) {
+	type toJSON EndpointLatency
+	return json.Marshal(struct {
+		Average     float64         `json:"avg"`
+		StdDev      float64         `json:"std_dev"`
+		Percentiles []*MetricBucket `json:"percentiles"`
+		*toJSON
+	}{
+		Average:     roundFloat(epl.Average(), 4),
+		StdDev:      roundFloat(epl.StdDev(), 4),
+		Percentiles: epl.PercentileBuckets(),
+		toJSON:      (*toJSON)(epl),
+	})
+}
+
+func (ste SelfTestEndpoint) String() string {
+	return fmt.Sprintf("%d:%d", ste.Rank, ste.Tag)
+}
+
+func (str *SelfTestResult) MarshalJSON() ([]byte, error) {
+	epLatencies := make(map[string]*EndpointLatency)
+	for ep, lr := range str.TargetLatencies {
+		epLatencies[ep.String()] = lr
+	}
+
+	type toJSON SelfTestResult
+	return json.Marshal(struct {
+		MasterEndpoint    string                      `json:"master_endpoint"`
+		TargetEndpoints   []string                    `json:"target_endpoints"`
+		EndpointLatencies map[string]*EndpointLatency `json:"target_latencies,omitempty"`
+		*toJSON
+	}{
+		MasterEndpoint: str.MasterEndpoint.String(),
+		TargetEndpoints: func() []string {
+			eps := make([]string, len(str.TargetEndpoints))
+			for i, ep := range str.TargetEndpoints {
+				eps[i] = ep.String()
+			}
+			return eps
+		}(),
+		EndpointLatencies: epLatencies,
+		toJSON:            (*toJSON)(str),
+	})
+}
+
+// AddTargetLatency adds a latency value for a target endpoint.
+func (str *SelfTestResult) AddTargetLatency(rank ranklist.Rank, tag uint32, value int64) {
+	var found bool
+	for _, ep := range str.TargetEndpoints {
+		if ep.Rank == rank && ep.Tag == tag {
+			found = true
+			break
+		}
+	}
+	if !found {
+		return
+	}
+
+	if str.TargetLatencies == nil {
+		str.TargetLatencies = make(map[SelfTestEndpoint]*EndpointLatency)
+	}
+
+	ep := SelfTestEndpoint{
+		Rank: rank,
+		Tag:  tag,
+	}
+	epl, found := str.TargetLatencies[ep]
+	if !found {
+		epl = &EndpointLatency{
+			rawValues: make([]uint64, 0, str.Repetitions/uint(len(str.TargetEndpoints))),
+		}
+		str.TargetLatencies[ep] = epl
+	}
+
+	epl.AddValue(value)
+}
+
+// TargetRanks returns a slice of target ranks in the same order
+// as the configured target endpoints.
+func (str *SelfTestResult) TargetRanks() (ranks []ranklist.Rank) {
+	for _, ep := range str.TargetEndpoints {
+		ranks = append(ranks, ep.Rank)
+	}
+	return
+}
diff --git a/src/control/lib/daos/selftest_test.go b/src/control/lib/daos/selftest_test.go
new file mode 100644
index 00000000000..8029a176349
--- /dev/null
+++ b/src/control/lib/daos/selftest_test.go
@@ -0,0 +1,295 @@
+//
+// (C) Copyright 2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package daos_test
+
+import (
+	"encoding/json"
+	"testing"
+	"time"
+
+	"github.com/google/go-cmp/cmp/cmpopts"
+	"github.com/pkg/errors"
+
+	"github.com/daos-stack/daos/src/control/common/test"
+	"github.com/daos-stack/daos/src/control/lib/daos"
+	"github.com/daos-stack/daos/src/control/lib/ranklist"
+)
+
+func TestDaos_SelfTestConfig(t *testing.T) {
+	for name, tc := range map[string]struct {
+		cfg    *daos.SelfTestConfig
+		expErr error
+	}{
+		"nil config fails validation": {
+			expErr: errors.New("nil"),
+		},
+		"imbalanced send/reply lists": {
+			cfg: func() *daos.SelfTestConfig {
+				cfg := new(daos.SelfTestConfig)
+				cfg.SendSizes = []uint64{0, 1}
+				cfg.ReplySizes = []uint64{1}
+				cfg.SetDefaults()
+				return cfg
+			}(),
+			expErr: errors.New("mismatch"),
+		},
+		"defaults should pass": {
+			cfg: func() *daos.SelfTestConfig {
+				cfg := new(daos.SelfTestConfig)
+				cfg.SetDefaults()
+				return cfg
+			}(),
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			gotErr := tc.cfg.Validate()
+			test.CmpErr(t, tc.expErr, gotErr)
+		})
+	}
+}
+
+func TestDaos_EndpointLatency(t *testing.T) {
+	epl := new(daos.EndpointLatency)
+	for i := int64(1); i <= 100; i++ {
+		epl.AddValue(i)
+	}
+	epl.AddValue(-1)
+
+	test.CmpAny(t, "TotalRPCs", uint64(101), epl.TotalRPCs)
+	test.CmpAny(t, "Succeeded()", uint64(100), epl.Succeeded())
+	test.CmpAny(t, "FailCount", uint64(1), epl.FailCount)
+	test.CmpAny(t, "Min", uint64(1), epl.Min)
+	test.CmpAny(t, "Max", uint64(100), epl.Max)
+	test.CmpAny(t, "Sum", uint64(5050), epl.Sum)
+	test.CmpAny(t, "SumSquares", float64(338350), epl.SumSquares)
+	test.CmpAny(t, "Average()", float64(50.5), epl.Average())
+	test.CmpAny(t, "StdDev()", float64(29.0115), epl.StdDev(), cmpopts.EquateApprox(0, 0.0001))
+
+	keys, buckets := epl.Percentiles()
+	sorted := make([]*daos.MetricBucket, len(keys))
+	for i, key := range keys {
+		sorted[i] = buckets[key]
+
+		switch key {
+		case 50:
+			test.CmpAny(t, "50th", float64(51), buckets[key].UpperBound)
+		case 75:
+			test.CmpAny(t, "75th", float64(76), buckets[key].UpperBound)
+		case 90:
+			test.CmpAny(t, "90th", float64(91), buckets[key].UpperBound)
+		case 95:
+			test.CmpAny(t, "95th", float64(96), buckets[key].UpperBound)
+		case 99:
+			test.CmpAny(t, "99th", float64(100), buckets[key].UpperBound)
+		}
+	}
+	test.CmpAny(t, "PercentileBuckets()", sorted, epl.PercentileBuckets())
+}
+
+func TestDaos_SelfTestResult(t *testing.T) {
+	str := new(daos.SelfTestResult)
+
+	testRank := ranklist.Rank(1)
+	testTarget := uint32(0)
+	testEndpoint := daos.SelfTestEndpoint{Rank: testRank, Tag: testTarget}
+	str.AddTargetLatency(testRank, testTarget, 1)
+	if _, found := str.TargetLatencies[testEndpoint]; found {
+		t.Fatal("expected no latency for unknown endpoint")
+	}
+
+	str.TargetEndpoints = append(str.TargetEndpoints, testEndpoint)
+	str.AddTargetLatency(testRank, testTarget, 1)
+	if _, found := str.TargetLatencies[testEndpoint]; !found {
+		t.Fatal("expected latency for known endpoint")
+	}
+
+	test.CmpAny(t, "TargetRanks()", []ranklist.Rank{testRank}, str.TargetRanks())
+}
+
+func TestDaos_SelfTestResult_MarshalJSON(t *testing.T) {
+	str := &daos.SelfTestResult{
+		MasterEndpoint: daos.SelfTestEndpoint{Rank: 3, Tag: 0},
+		TargetEndpoints: []daos.SelfTestEndpoint{
+			{Rank: 0, Tag: 0},
+			{Rank: 1, Tag: 0},
+			{Rank: 2, Tag: 0},
+		},
+		Repetitions:     3000,
+		SendSize:        1024,
+		ReplySize:       1024,
+		BufferAlignment: -1,
+		Duration:        2 * time.Second,
+		MasterLatency:   &daos.EndpointLatency{},
+	}
+
+	for i := int64(1); i <= int64(str.Repetitions); i++ {
+		str.MasterLatency.AddValue(i)
+		str.AddTargetLatency(ranklist.Rank(i%3), 0, i)
+	}
+
+	gotBytes, err := json.MarshalIndent(str, "", "  ")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	expStr := `{
+  "master_endpoint": "3:0",
+  "target_endpoints": [
+    "0:0",
+    "1:0",
+    "2:0"
+  ],
+  "target_latencies": {
+    "0:0": {
+      "avg": 1501.5,
+      "std_dev": 866.4583,
+      "percentiles": [
+        {
+          "label": "50",
+          "cumulative_count": 500,
+          "upper_bound": 1503
+        },
+        {
+          "label": "75",
+          "cumulative_count": 750,
+          "upper_bound": 2253
+        },
+        {
+          "label": "90",
+          "cumulative_count": 900,
+          "upper_bound": 2703
+        },
+        {
+          "label": "95",
+          "cumulative_count": 950,
+          "upper_bound": 2853
+        },
+        {
+          "label": "99",
+          "cumulative_count": 990,
+          "upper_bound": 2973
+        }
+      ],
+      "total_rpcs": 1000,
+      "min": 3,
+      "max": 3000,
+      "fail_count": 0
+    },
+    "1:0": {
+      "avg": 1499.5,
+      "std_dev": 866.4583,
+      "percentiles": [
+        {
+          "label": "50",
+          "cumulative_count": 500,
+          "upper_bound": 1501
+        },
+        {
+          "label": "75",
+          "cumulative_count": 750,
+          "upper_bound": 2251
+        },
+        {
+          "label": "90",
+          "cumulative_count": 900,
+          "upper_bound": 2701
+        },
+        {
+          "label": "95",
+          "cumulative_count": 950,
+          "upper_bound": 2851
+        },
+        {
+          "label": "99",
+          "cumulative_count": 990,
+          "upper_bound": 2971
+        }
+      ],
+      "total_rpcs": 1000,
+      "min": 1,
+      "max": 2998,
+      "fail_count": 0
+    },
+    "2:0": {
+      "avg": 1500.5,
+      "std_dev": 866.4583,
+      "percentiles": [
+        {
+          "label": "50",
+          "cumulative_count": 500,
+          "upper_bound": 1502
+        },
+        {
+          "label": "75",
+          "cumulative_count": 750,
+          "upper_bound": 2252
+        },
+        {
+          "label": "90",
+          "cumulative_count": 900,
+          "upper_bound": 2702
+        },
+        {
+          "label": "95",
+          "cumulative_count": 950,
+          "upper_bound": 2852
+        },
+        {
+          "label": "99",
+          "cumulative_count": 990,
+          "upper_bound": 2972
+        }
+      ],
+      "total_rpcs": 1000,
+      "min": 2,
+      "max": 2999,
+      "fail_count": 0
+    }
+  },
+  "repetitions": 3000,
+  "send_size": 1024,
+  "reply_size": 1024,
+  "buffer_alignment": -1,
+  "duration": 2000000000,
+  "master_latency": {
+    "avg": 1500.5,
+    "std_dev": 866.1697,
+    "percentiles": [
+      {
+        "label": "50",
+        "cumulative_count": 1500,
+        "upper_bound": 1501
+      },
+      {
+        "label": "75",
+        "cumulative_count": 2250,
+        "upper_bound": 2251
+      },
+      {
+        "label": "90",
+        "cumulative_count": 2700,
+        "upper_bound": 2701
+      },
+      {
+        "label": "95",
+        "cumulative_count": 2850,
+        "upper_bound": 2851
+      },
+      {
+        "label": "99",
+        "cumulative_count": 2970,
+        "upper_bound": 2971
+      }
+    ],
+    "total_rpcs": 3000,
+    "min": 1,
+    "max": 3000,
+    "fail_count": 0
+  }
+}`
+	test.CmpAny(t, "JSON output", expStr, string(gotBytes))
+}
diff --git a/src/control/lib/daos/status.go b/src/control/lib/daos/status.go
index 577c0c01d2b..54099f31a2f 100644
--- a/src/control/lib/daos/status.go
+++ b/src/control/lib/daos/status.go
@@ -172,5 +172,7 @@ const (
 	// BadCert indicates that an invalid certificate was detected.
 	BadCert Status = -C.DER_BAD_CERT
 	// RedundancyFactorExceeded indicates that the maximum number of failed components was exceeded.
-	RedundancyFactorExceeded = -C.DER_RF
+	RedundancyFactorExceeded Status = -C.DER_RF
+	// AgentCommFailed indicates that client/agent communication failed.
+	AgentCommFailed Status = -C.DER_AGENT_COMM
 )
diff --git a/src/control/lib/daos/telemetry.go b/src/control/lib/daos/telemetry.go
new file mode 100644
index 00000000000..e56377ddb1e
--- /dev/null
+++ b/src/control/lib/daos/telemetry.go
@@ -0,0 +1,302 @@
+//
+// (C) Copyright 2021-2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package daos
+
+import (
+	"encoding/json"
+	"sort"
+	"strconv"
+	"strings"
+
+	"github.com/pkg/errors"
+)
+
+// MetricType defines the different types of metrics.
+type MetricType uint32
+
+const (
+	MetricTypeUnknown MetricType = iota
+	MetricTypeGeneric
+	MetricTypeCounter
+	MetricTypeGauge
+	MetricTypeSummary
+	MetricTypeHistogram
+
+	metricTypeUnknownStr   = "Unknown"
+	metricTypeGenericStr   = "Generic"
+	metricTypeCounterStr   = "Counter"
+	metricTypeGaugeStr     = "Gauge"
+	metricTypeSummaryStr   = "Summary"
+	metricTypeHistogramStr = "Histogram"
+)
+
+func (t MetricType) String() string {
+	switch t {
+	case MetricTypeGeneric:
+		return metricTypeGenericStr
+	case MetricTypeCounter:
+		return metricTypeCounterStr
+	case MetricTypeGauge:
+		return metricTypeGaugeStr
+	case MetricTypeSummary:
+		return metricTypeSummaryStr
+	case MetricTypeHistogram:
+		return metricTypeHistogramStr
+	}
+
+	return metricTypeUnknownStr
+}
+
+func metricTypeFromString(typeStr string) MetricType {
+	// normalize the strings for comparison
+	switch strings.ToLower(typeStr) {
+	case strings.ToLower(metricTypeCounterStr):
+		return MetricTypeCounter
+	case strings.ToLower(metricTypeGaugeStr):
+		return MetricTypeGauge
+	case strings.ToLower(metricTypeSummaryStr):
+		return MetricTypeSummary
+	case strings.ToLower(metricTypeHistogramStr):
+		return MetricTypeHistogram
+	case strings.ToLower(metricTypeGenericStr):
+		return MetricTypeGeneric
+	}
+	return MetricTypeUnknown
+}
+
+type (
+	// Metric is an interface implemented by all metric types.
+	Metric interface {
+		IsMetric()
+	}
+
+	// MetricLabelMap is the set of key-value label pairs.
+	MetricLabelMap map[string]string
+
+	// SimpleMetric is a specific metric with a value.
+	SimpleMetric struct {
+		Labels MetricLabelMap `json:"labels"`
+		Value  float64        `json:"value"`
+	}
+
+	// QuantileMap is the set of quantile measurements.
+	QuantileMap map[float64]float64
+
+	// SummaryMetric represents a group of observations.
+	SummaryMetric struct {
+		Labels      MetricLabelMap `json:"labels"`
+		SampleCount uint64         `json:"sample_count"`
+		SampleSum   float64        `json:"sample_sum"`
+		Quantiles   QuantileMap    `json:"quantiles"`
+	}
+
+	// MetricBucket represents a bucket for observations to be sorted into.
+	MetricBucket struct {
+		Label           string  `json:"label"`
+		CumulativeCount uint64  `json:"cumulative_count"`
+		UpperBound      float64 `json:"upper_bound"`
+	}
+
+	// HistogramMetric represents a group of observations sorted into
+	// buckets.
+	HistogramMetric struct {
+		Labels      MetricLabelMap  `json:"labels"`
+		SampleCount uint64          `json:"sample_count"`
+		SampleSum   float64         `json:"sample_sum"`
+		Buckets     []*MetricBucket `json:"buckets"`
+	}
+
+	// MetricSet is a group of related metrics.
+	MetricSet struct {
+		Name        string     `json:"name"`
+		Description string     `json:"description"`
+		Type        MetricType `json:"type"`
+		Metrics     []Metric   `json:"metrics"`
+	}
+)
+
+// IsMetric identifies SimpleMetric as a Metric.
+func (*SimpleMetric) IsMetric() {}
+
+// IsMetric identifies SummaryMetric as a Metric.
+func (*SummaryMetric) IsMetric() {}
+
+// UnmarshalJSON unmarshals a SummaryMetric from JSON.
+func (m *SummaryMetric) UnmarshalJSON(data []byte) error {
+	if m == nil {
+		return errors.New("nil SummaryMetric")
+	}
+
+	if m.Quantiles == nil {
+		m.Quantiles = make(QuantileMap)
+	}
+
+	type Alias SummaryMetric
+	aux := (*Alias)(m)
+	if err := json.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// IsMetric identifies HistogramMetric as a Metric.
+func (*HistogramMetric) IsMetric() {}
+
+// Keys gets the sorted list of label keys.
+func (m MetricLabelMap) Keys() []string {
+	result := make([]string, 0, len(m))
+	for label := range m {
+		result = append(result, label)
+	}
+	sort.Strings(result)
+	return result
+}
+
+// Keys gets the sorted list of quantile keys.
+func (m QuantileMap) Keys() []float64 {
+	result := make([]float64, 0, len(m))
+	for q := range m {
+		result = append(result, q)
+	}
+	sort.Float64s(result)
+	return result
+}
+
+// MarshalJSON marshals the QuantileMap into JSON.
+func (m QuantileMap) MarshalJSON() ([]byte, error) {
+	strMap := make(map[string]string)
+
+	fmtFloat := func(f float64) string {
+		return strconv.FormatFloat(f, 'g', -1, 64)
+	}
+
+	for key, val := range m {
+		strMap[fmtFloat(key)] = fmtFloat(val)
+	}
+
+	return json.Marshal(&strMap)
+}
+
+// UnmarshalJSON unmarshals the QuantileMap from JSON.
+func (m QuantileMap) UnmarshalJSON(data []byte) error {
+	if m == nil {
+		return errors.New("QuantileMap is nil")
+	}
+
+	fromJSON := make(map[string]string)
+
+	if err := json.Unmarshal(data, &fromJSON); err != nil {
+		return nil
+	}
+
+	for key, val := range fromJSON {
+		floatKey, err := strconv.ParseFloat(key, 64)
+		if err != nil {
+			return errors.Wrapf(err, "QuantileMap key %q", key)
+		}
+
+		floatVal, err := strconv.ParseFloat(val, 64)
+		if err != nil {
+			return errors.Wrapf(err, "QuantileMap value %q for key %q", val, key)
+		}
+
+		m[floatKey] = floatVal
+	}
+	return nil
+}
+
+// MarshalJSON marshals the MetricSet to JSON.
+func (ms *MetricSet) MarshalJSON() ([]byte, error) {
+	type toJSON MetricSet
+	return json.Marshal(&struct {
+		Type string `json:"type"`
+		*toJSON
+	}{
+		Type:   strings.ToLower(ms.Type.String()),
+		toJSON: (*toJSON)(ms),
+	})
+}
+
+// jsonMetric serves as a universal metric representation for unmarshaling from
+// JSON. It covers all possible fields of Metric types.
+type jsonMetric struct {
+	Labels      MetricLabelMap  `json:"labels"`
+	Value       float64         `json:"value"`
+	SampleCount uint64          `json:"sample_count"`
+	SampleSum   float64         `json:"sample_sum"`
+	Quantiles   QuantileMap     `json:"quantiles"`
+	Buckets     []*MetricBucket `json:"buckets"`
+}
+
+// UnmarshalJSON unmarshals a Metric into the jsonMetric type.
+func (jm *jsonMetric) UnmarshalJSON(data []byte) error {
+	if jm == nil {
+		return errors.New("nil jsonMetric")
+	}
+
+	if jm.Quantiles == nil {
+		jm.Quantiles = make(QuantileMap)
+	}
+
+	type Alias jsonMetric
+	aux := (*Alias)(jm)
+	if err := json.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// UnmarshalJSON unmarshals the MetricSet from JSON.
+func (ms *MetricSet) UnmarshalJSON(data []byte) error {
+	if ms == nil {
+		return errors.New("nil MetricSet")
+	}
+
+	type fromJSON MetricSet
+	from := &struct {
+		Type    string        `json:"type"`
+		Metrics []*jsonMetric `json:"metrics"`
+		*fromJSON
+	}{
+		fromJSON: (*fromJSON)(ms),
+	}
+	if err := json.Unmarshal(data, from); err != nil {
+		return err
+	}
+
+	ms.Type = metricTypeFromString(from.Type)
+	for _, m := range from.Metrics {
+		switch ms.Type {
+		case MetricTypeSummary:
+			ms.Metrics = append(ms.Metrics, &SummaryMetric{
+				Labels:      m.Labels,
+				SampleCount: m.SampleCount,
+				SampleSum:   m.SampleSum,
+				Quantiles:   m.Quantiles,
+			})
+		case MetricTypeHistogram:
+			ms.Metrics = append(ms.Metrics, &HistogramMetric{
+				Labels:      m.Labels,
+				SampleCount: m.SampleCount,
+				SampleSum:   m.SampleSum,
+				Buckets:     m.Buckets,
+			})
+		default:
+			ms.Metrics = append(ms.Metrics, newSimpleMetric(m.Labels, m.Value))
+		}
+	}
+	return nil
+}
+
+func newSimpleMetric(labels map[string]string, value float64) *SimpleMetric {
+	return &SimpleMetric{
+		Labels: labels,
+		Value:  value,
+	}
+}
diff --git a/src/control/lib/daos/telemetry_test.go b/src/control/lib/daos/telemetry_test.go
new file mode 100644
index 00000000000..9a1ee77fcf1
--- /dev/null
+++ b/src/control/lib/daos/telemetry_test.go
@@ -0,0 +1,246 @@
+//
+// (C) Copyright 2021-2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package daos
+
+import (
+	"encoding/json"
+	"testing"
+	"time"
+
+	"github.com/google/go-cmp/cmp"
+
+	"github.com/daos-stack/daos/src/control/common/test"
+)
+
+func TestDaos_Metric_JSON(t *testing.T) {
+	testLabelMap := map[string]string{
+		"label1": "val1",
+		"label2": "val2",
+	}
+
+	for name, tc := range map[string]struct {
+		metric Metric
+	}{
+		"nil": {},
+		"simple": {
+			metric: newSimpleMetric(testLabelMap, 123),
+		},
+		"summary": {
+			metric: &SummaryMetric{
+				Labels:      testLabelMap,
+				SampleSum:   5678.9,
+				SampleCount: 42,
+				Quantiles: QuantileMap{
+					0.25: 50,
+					0.5:  42,
+				},
+			},
+		},
+		"histogram": {
+			metric: &HistogramMetric{
+				Labels:      testLabelMap,
+				SampleSum:   9876,
+				SampleCount: 120,
+				Buckets: []*MetricBucket{
+					{
+						CumulativeCount: 55,
+						UpperBound:      500,
+					},
+				},
+			},
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			marshaled, err := json.Marshal(tc.metric)
+			if err != nil {
+				t.Fatalf("expected to marshal, got %q", err)
+			}
+
+			var unmarshaled Metric
+			switch tc.metric.(type) {
+			case *SimpleMetric:
+				unmarshaled = new(SimpleMetric)
+			case *SummaryMetric:
+				unmarshaled = new(SummaryMetric)
+			case *HistogramMetric:
+				unmarshaled = new(HistogramMetric)
+			default:
+				unmarshaled = new(SimpleMetric)
+			}
+
+			err = json.Unmarshal(marshaled, unmarshaled)
+			if err != nil {
+				t.Fatalf("expected to unmarshal, got %q", err)
+			}
+
+			expResult := tc.metric
+			if tc.metric == nil {
+				expResult = &SimpleMetric{}
+			}
+
+			if diff := cmp.Diff(expResult, unmarshaled); diff != "" {
+				t.Fatalf("unmarshaled different from original (-want, +got):\n%s\n", diff)
+			}
+		})
+	}
+}
+
+func TestDaos_metricTypeFromString(t *testing.T) {
+	for name, tc := range map[string]struct {
+		input   string
+		expType MetricType
+	}{
+		"empty": {
+			expType: MetricTypeUnknown,
+		},
+		"counter": {
+			input:   "counter",
+			expType: MetricTypeCounter,
+		},
+		"gauge": {
+			input:   "gauge",
+			expType: MetricTypeGauge,
+		},
+		"summary": {
+			input:   "summary",
+			expType: MetricTypeSummary,
+		},
+		"histogram": {
+			input:   "histogram",
+			expType: MetricTypeHistogram,
+		},
+		"generic": {
+			input:   "generic",
+			expType: MetricTypeGeneric,
+		},
+		"invalid": {
+			input:   "some garbage text",
+			expType: MetricTypeUnknown,
+		},
+		"weird capitalization": {
+			input:   "CoUnTeR",
+			expType: MetricTypeCounter,
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			gotType := metricTypeFromString(tc.input)
+
+			test.AssertEqual(t, tc.expType, gotType, "")
+		})
+	}
+}
+
+func TestDaos_MetricSet_JSON(t *testing.T) {
+	for name, tc := range map[string]struct {
+		set *MetricSet
+	}{
+		"nil": {},
+		"generic type": {
+			set: &MetricSet{
+				Name:        "timespan",
+				Description: "It's been a while",
+				Type:        MetricTypeGeneric,
+				Metrics: []Metric{
+					newSimpleMetric(map[string]string{
+						"units": "nanoseconds",
+					}, float64(time.Second)),
+				},
+			},
+		},
+		"counter type": {
+			set: &MetricSet{
+				Name:        "one_ring",
+				Description: "Precious...",
+				Type:        MetricTypeCounter,
+				Metrics: []Metric{
+					newSimpleMetric(map[string]string{
+						"owner": "frodo",
+					}, 1),
+				},
+			},
+		},
+		"gauge type": {
+			set: &MetricSet{
+				Name:        "funny_hats",
+				Description: "Hilarious headgear in inventory",
+				Type:        MetricTypeGauge,
+				Metrics: []Metric{
+					newSimpleMetric(map[string]string{
+						"type": "tophat",
+					}, 1),
+					newSimpleMetric(map[string]string{
+						"type": "cowboy",
+					}, 6),
+					newSimpleMetric(map[string]string{
+						"type": "jester",
+					}, 0),
+				},
+			},
+		},
+		"summary type": {
+			set: &MetricSet{
+				Name:        "alpha",
+				Description: "The first letter! Everybody's favorite!",
+				Type:        MetricTypeSummary,
+				Metrics: []Metric{
+					&SummaryMetric{
+						Labels:      map[string]string{"beta": "b"},
+						SampleCount: 3,
+						SampleSum:   42,
+						Quantiles:   map[float64]float64{0.5: 2.2},
+					},
+				},
+			},
+		},
+		"histogram type": {
+			set: &MetricSet{
+				Name:        "my_histogram",
+				Description: "This is a histogram",
+				Type:        MetricTypeHistogram,
+				Metrics: []Metric{
+					&HistogramMetric{
+						Labels:      map[string]string{"owner": "me"},
+						SampleCount: 1024,
+						SampleSum:   12344,
+						Buckets: []*MetricBucket{
+							{
+								CumulativeCount: 789,
+								UpperBound:      500,
+							},
+							{
+								CumulativeCount: 456,
+								UpperBound:      1000,
+							},
+						},
+					},
+				},
+			},
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			marshaled, err := json.Marshal(tc.set)
+			if err != nil {
+				t.Fatalf("expected to marshal, got %q", err)
+			}
+
+			unmarshaled := new(MetricSet)
+			err = json.Unmarshal(marshaled, unmarshaled)
+			if err != nil {
+				t.Fatalf("expected to unmarshal, got %q", err)
+			}
+
+			expResult := tc.set
+			if tc.set == nil {
+				expResult = &MetricSet{}
+			}
+
+			if diff := cmp.Diff(expResult, unmarshaled); diff != "" {
+				t.Fatalf("unmarshaled different from original (-want, +got):\n%s\n", diff)
+			}
+		})
+	}
+}
diff --git a/src/control/lib/ui/num_flags.go b/src/control/lib/ui/num_flags.go
new file mode 100644
index 00000000000..22d124c43c8
--- /dev/null
+++ b/src/control/lib/ui/num_flags.go
@@ -0,0 +1,75 @@
+//
+// (C) Copyright 2022-2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package ui
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/dustin/go-humanize"
+	"github.com/pkg/errors"
+
+	"github.com/daos-stack/daos/src/control/lib/atm"
+)
+
+// FmtHumanSize formats the supplied size in a human-readable format.
+func FmtHumanSize(size float64, suffix string, binary bool) string {
+	if size == 0 {
+		return "0 " + suffix
+	}
+	val := size
+
+	base := float64(1000)
+	if binary {
+		base = 1024
+		if suffix != "" {
+			suffix = "i" + suffix
+		}
+	}
+
+	for _, unit := range []string{"", " K", " M", " G", " T", " P", " E", " Z", " Y"} {
+		if math.Abs(val) < base {
+			if unit == "" && suffix != "" {
+				unit = " "
+			}
+			return fmt.Sprintf("%.02f%s%s", val, unit, suffix)
+		}
+		val /= base
+	}
+
+	// Fallback to scientific notation for unexpectedly huge numbers.
+	return fmt.Sprintf("%E %s", size, suffix)
+}
+
+// ByteSizeFlag is a go-flags compatible flag type for converting
+// string input into a byte size.
+type ByteSizeFlag struct {
+	set   atm.Bool
+	Bytes uint64
+}
+
+func (sf ByteSizeFlag) IsSet() bool {
+	return sf.set.IsTrue()
+}
+
+func (sf ByteSizeFlag) String() string {
+	return humanize.Bytes(sf.Bytes)
+}
+
+func (sf *ByteSizeFlag) UnmarshalFlag(fv string) (err error) {
+	if fv == "" {
+		return errors.New("no size specified")
+	}
+
+	sf.Bytes, err = humanize.ParseBytes(fv)
+	if err != nil {
+		return errors.Errorf("invalid size %q", fv)
+	}
+	sf.set.SetTrue()
+
+	return nil
+}
diff --git a/src/control/lib/ui/num_flags_test.go b/src/control/lib/ui/num_flags_test.go
new file mode 100644
index 00000000000..6d46f4c1025
--- /dev/null
+++ b/src/control/lib/ui/num_flags_test.go
@@ -0,0 +1,139 @@
+//
+// (C) Copyright 2021-2024 Intel Corporation.
+//
+// SPDX-License-Identifier: BSD-2-Clause-Patent
+//
+
+package ui_test
+
+import (
+	"testing"
+
+	"github.com/pkg/errors"
+
+	"github.com/daos-stack/daos/src/control/common/test"
+	"github.com/daos-stack/daos/src/control/lib/ui"
+)
+
+func TestUI_FmtHumanSize(t *testing.T) {
+	for name, tc := range map[string]struct {
+		input   float64
+		suffix  string
+		binary  bool
+		expSize string
+	}{
+		"0": {
+			input:   0,
+			suffix:  "B",
+			expSize: "0 B",
+		},
+		"-0": {
+			input:   -0,
+			suffix:  "B",
+			expSize: "0 B",
+		},
+		"-1": {
+			input:   -1,
+			suffix:  "B",
+			expSize: "-1.00 B",
+		},
+		"1 no suffix": {
+			input:   1,
+			expSize: "1.00",
+		},
+		"1 binary no suffix": {
+			input:   1,
+			binary:  true,
+			expSize: "1.00",
+		},
+		"1000 no suffix": {
+			input:   1000,
+			expSize: "1.00 K",
+		},
+		"1000 binary no suffix": {
+			input:   1000,
+			binary:  true,
+			expSize: "1000.00",
+		},
+		"1024 binary no suffix": {
+			input:   1024,
+			binary:  true,
+			expSize: "1.00 K",
+		},
+		"4.5PB": {
+			input:   1 << 52,
+			suffix:  "B",
+			expSize: "4.50 PB",
+		},
+		"4PiB binary": {
+			input:   1 << 52,
+			suffix:  "B",
+			binary:  true,
+			expSize: "4.00 PiB",
+		},
+		"trouble": {
+			input:   1 << 90,
+			suffix:  "tribbles",
+			expSize: "1.237940E+27 tribbles",
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			gotSize := ui.FmtHumanSize(tc.input, tc.suffix, tc.binary)
+			test.AssertEqual(t, tc.expSize, gotSize, "unexpected size")
+		})
+	}
+}
+
+func TestUI_ByteSizeFlag(t *testing.T) {
+	for name, tc := range map[string]struct {
+		input   string
+		expSize uint64
+		expStr  string
+		expErr  error
+	}{
+		"empty": {
+			expErr: errors.New("no size specified"),
+		},
+		"invalid size": {
+			input:  "horse",
+			expErr: errors.New("invalid size"),
+		},
+		"negative size invalid": {
+			input:  "-438 TB",
+			expErr: errors.New("invalid size"),
+		},
+		"0": {
+			input:   "0",
+			expSize: 0,
+			expStr:  "0 B",
+		},
+		"weird but valid": {
+			input:   "0 EiB",
+			expSize: 0,
+			expStr:  "0 B",
+		},
+		"valid MB": {
+			input:   "10MB",
+			expSize: 10 * 1000 * 1000,
+			expStr:  "10 MB",
+		},
+		"valid raw number": {
+			input:   "1058577",
+			expSize: 1058577,
+			expStr:  "1.1 MB",
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			f := ui.ByteSizeFlag{}
+			gotErr := f.UnmarshalFlag(tc.input)
+			test.CmpErr(t, tc.expErr, gotErr)
+			if tc.expErr != nil {
+				test.AssertFalse(t, f.IsSet(), "shouldn't be set on error")
+				return
+			}
+			test.AssertTrue(t, f.IsSet(), "should be set on success")
+			test.AssertEqual(t, tc.expSize, f.Bytes, "unexpected size")
+			test.AssertEqual(t, tc.expStr, f.String(), "unexpected string")
+		})
+	}
+}
diff --git a/src/control/run_go_tests.sh b/src/control/run_go_tests.sh
index ec8204dd308..c976b1595f8 100755
--- a/src/control/run_go_tests.sh
+++ b/src/control/run_go_tests.sh
@@ -100,15 +100,18 @@ function setup_environment()
 	LD_LIBRARY_PATH=${SL_PREFIX+${SL_PREFIX}/lib}
 	LD_LIBRARY_PATH+="${SL_PREFIX+:${SL_PREFIX}/lib64}"
 	LD_LIBRARY_PATH+="${SL_PREFIX+:${SL_PREFIX}/lib64/daos_srv}"
+	LD_LIBRARY_PATH+="${SL_MERCURY_PREFIX+:${SL_MERCURY_PREFIX}/lib}"
 	LD_LIBRARY_PATH+="${SL_SPDK_PREFIX+:${SL_SPDK_PREFIX}/lib}"
 	LD_LIBRARY_PATH+="${SL_OFI_PREFIX+:${SL_OFI_PREFIX}/lib}"
 	CGO_LDFLAGS=${SL_PREFIX+-L${SL_PREFIX}/lib}
 	CGO_LDFLAGS+="${SL_PREFIX+ -L${SL_PREFIX}/lib64}"
 	CGO_LDFLAGS+="${SL_PREFIX+ -L${SL_PREFIX}/lib64/daos_srv}"
 	CGO_LDFLAGS+="${SL_BUILD_DIR+ -L${SL_BUILD_DIR}/src/control/lib/spdk}"
+	CGO_LDFLAGS+="${SL_MERCURY_PREFIX+ -L${SL_MERCURY_PREFIX}/lib}"
 	CGO_LDFLAGS+="${SL_SPDK_PREFIX+ -L${SL_SPDK_PREFIX}/lib}"
 	CGO_LDFLAGS+="${SL_OFI_PREFIX+ -L${SL_OFI_PREFIX}/lib}"
 	CGO_CFLAGS=${SL_PREFIX+-I${SL_PREFIX}/include}
+	CGO_CFLAGS+="${SL_MERCURY_PREFIX+ -I${SL_MERCURY_PREFIX}/include}"
 	CGO_CFLAGS+="${SL_SPDK_PREFIX+ -I${SL_SPDK_PREFIX}/include}"
 	CGO_CFLAGS+="${SL_OFI_PREFIX+ -I${SL_OFI_PREFIX}/include}"
 	CGO_CFLAGS+="${SL_ARGOBOTS_PREFIX+ -I${SL_ARGOBOTS_PREFIX}/include}"
@@ -167,7 +170,7 @@ $output
 
 function get_test_runner()
 {
-	test_args="-mod vendor -race -cover -v ./... -tags firmware,fault_injection"
+	test_args="-mod vendor -race -cover -v ./... -tags firmware,fault_injection,test_stubs"
 	test_runner="go test"
 
 	if which gotestsum >/dev/null; then
diff --git a/src/mgmt/cli_mgmt.c b/src/mgmt/cli_mgmt.c
index 31e0ac1f7d9..1efb8468d29 100644
--- a/src/mgmt/cli_mgmt.c
+++ b/src/mgmt/cli_mgmt.c
@@ -530,6 +530,7 @@ dc_mgmt_put_sys_info(struct daos_sys_info *info)
 	if (info == NULL)
 		return;
 	free_rank_uris(info->dsi_ranks, info->dsi_nr_ranks);
+	D_FREE(info->dsi_ms_ranks);
 	D_FREE(info);
 }
 
diff --git a/src/utils/self_test/self_test.c b/src/utils/self_test/self_test.c
index 3cdedbee8c5..fb8118fc88e 100644
--- a/src/utils/self_test/self_test.c
+++ b/src/utils/self_test/self_test.c
@@ -1240,9 +1240,7 @@ int main(int argc, char *argv[])
 	D_FREE(tgt_endpts);
 	D_FREE(all_params);
 
-	if (use_agent)
-		dc_mgmt_fini();
-
+	self_test_fini(use_agent);
 	d_log_fini();
 
 	return ret;
diff --git a/src/utils/self_test/self_test_lib.c b/src/utils/self_test/self_test_lib.c
index beb8e54cef2..c8c94feddf5 100644
--- a/src/utils/self_test/self_test_lib.c
+++ b/src/utils/self_test/self_test_lib.c
@@ -187,6 +187,16 @@ self_test_init(char *dest_name, crt_context_t *crt_ctx, crt_group_t **srv_grp, p
 	return 0;
 }
 
+void
+self_test_fini(bool agent_used)
+{
+	if (!agent_used)
+		return;
+
+	dc_mgmt_fini();
+	dc_agent_fini();
+}
+
 int
 st_compare_endpts(const void *a_in, const void *b_in)
 {
diff --git a/src/utils/self_test/self_test_lib.h b/src/utils/self_test/self_test_lib.h
index 38dcc57f5c1..52843986716 100644
--- a/src/utils/self_test/self_test_lib.h
+++ b/src/utils/self_test/self_test_lib.h
@@ -57,5 +57,7 @@ int
 st_compare_latencies_by_ranks(const void *a_in, const void *b_in);
 void
 free_size_latencies(struct st_latency ***latencies, uint32_t num_msg_sizes, uint32_t num_ms_endpts);
+void
+self_test_fini(bool agent_used);
 
 #endif /* __SELF_TEST_LIB_H__ */
\ No newline at end of file

From 46e638339b9b1ae9e0c7dc2cc13a15fad6124fbc Mon Sep 17 00:00:00 2001
From: Mohamad Chaarawi <mohamad.chaarawi@intel.com>
Date: Mon, 2 Sep 2024 12:32:59 -0500
Subject: [PATCH 07/21] DAOS-16272 dfs: fix get_info returning incorrect oclass
 (#15048)

If user creates a container without --file-oclass, the get_info call was
returning the default oclass of a directory on daos fs get-attr. Fix
that to properly use the enum types for default scenario.

Signed-off-by: Mohamad Chaarawi <mohamad.chaarawi@intel.com>
---
 src/client/dfs/obj.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/client/dfs/obj.c b/src/client/dfs/obj.c
index a9dd6d6f51f..309439bc807 100644
--- a/src/client/dfs/obj.c
+++ b/src/client/dfs/obj.c
@@ -86,15 +86,14 @@ dfs_obj_get_info(dfs_t *dfs, dfs_obj_t *obj, dfs_obj_info_t *info)
 			if (dfs->attr.da_dir_oclass_id)
 				info->doi_dir_oclass_id = dfs->attr.da_dir_oclass_id;
 			else
-				rc = daos_obj_get_oclass(dfs->coh, 0, 0, 0,
+				rc = daos_obj_get_oclass(dfs->coh, DAOS_OT_MULTI_HASHED, 0, 0,
 							 &info->doi_dir_oclass_id);
 
 			if (dfs->attr.da_file_oclass_id)
 				info->doi_file_oclass_id = dfs->attr.da_file_oclass_id;
 			else
-				rc = daos_obj_get_oclass(dfs->coh, 0, 0, 0,
+				rc = daos_obj_get_oclass(dfs->coh, DAOS_OT_ARRAY_BYTE, 0, 0,
 							 &info->doi_file_oclass_id);
-
 			if (rc) {
 				D_ERROR("daos_obj_get_oclass() failed " DF_RC "\n", DP_RC(rc));
 				return daos_der2errno(rc);

From 435e332cdcf961adcade822ac09b395dfceb1fe2 Mon Sep 17 00:00:00 2001
From: Liang Zhen <liang.zhen@intel.com>
Date: Thu, 5 Sep 2024 00:53:39 +0800
Subject: [PATCH 08/21] DAOS-15863 container: fix a race for container cache
 (#15038)

* DAOS-15863 container: fix a race for container cache

while destroying a container, cont_child_destroy_one() releases
its own refcount before waiting, if another ULT releases its
refcount, which is the last one, wakes up the waiting ULT and frees
it ds_cont_child straightaway, because no one else has refcount.

When the waiting ULT is waken up, it will try to change the already
freed ds_cont_child.

This patch changes the LRU eviction logic and fixes this race.


Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Signed-off-by: Jeff Olivier <jeffolivier@google.com>
Co-authored-by: Jeff Olivier <jeffolivier@google.com>
---
 src/common/lru.c           | 54 ++++++++++++++++----------------------
 src/container/srv_target.c |  4 ++-
 src/include/daos/lru.h     | 38 +++++++--------------------
 3 files changed, 35 insertions(+), 61 deletions(-)

diff --git a/src/common/lru.c b/src/common/lru.c
index bb270500ab7..de86d367e0e 100644
--- a/src/common/lru.c
+++ b/src/common/lru.c
@@ -36,7 +36,10 @@ lru_hop_rec_decref(struct d_hash_table *htable, d_list_t *link)
 
 	D_ASSERT(llink->ll_ref > 0);
 	llink->ll_ref--;
-	if (llink->ll_ref == 1 && llink->ll_ops->lop_wakeup)
+
+	/* eviction waiter is the last one holds refcount */
+	if (llink->ll_wait_evict &&
+	    llink->ll_ops->lop_wakeup && daos_lru_is_last_user(llink))
 		llink->ll_ops->lop_wakeup(llink);
 
 	/* Delete from hash only if no more references */
@@ -215,15 +218,6 @@ daos_lru_ref_hold(struct daos_lru_cache *lcache, void *key,
 	if (link != NULL) {
 		llink = link2llink(link);
 		D_ASSERT(llink->ll_evicted == 0);
-		if (llink->ll_evicting) {
-			/**
-			 * Avoid calling `lru_hop_rec_decref()` at this point
-			 * to prevent `wakeup()` from being invoked twice.
-			 */
-			D_ASSERT(llink->ll_ref > 1);
-			llink->ll_ref--;
-			D_GOTO(out, rc = -DER_SHUTDOWN);
-		}
 		/* remove busy item from LRU */
 		if (!d_list_empty(&llink->ll_qlink))
 			d_list_del_init(&llink->ll_qlink);
@@ -257,24 +251,17 @@ daos_lru_ref_hold(struct daos_lru_cache *lcache, void *key,
 	return rc;
 }
 
-static void
-lru_ref_release_internal(struct daos_lru_cache *lcache, struct daos_llink *llink, bool wait)
+void
+daos_lru_ref_release(struct daos_lru_cache *lcache, struct daos_llink *llink)
 {
 	D_ASSERT(lcache != NULL && llink != NULL && llink->ll_ref > 1);
 	D_ASSERT(d_list_empty(&llink->ll_qlink));
 
 	lru_hop_rec_decref(&lcache->dlc_htable, &llink->ll_link);
 
-	if (wait && llink->ll_ref > 1) {
-		D_ASSERT(llink->ll_evicting == 0);
-		llink->ll_evicting = 1;
-		lcache->dlc_ops->lop_wait(llink);
-		llink->ll_evicting = 0;
-		llink->ll_evicted = 1;
-	}
-
 	if (llink->ll_ref == 1) { /* the last refcount */
-		if (lcache->dlc_csize == 0)
+		/* zero-sized cache always evicts unused item */
+		if (lcache->dlc_csize == 0 && !llink->ll_evicted)
 			llink->ll_evicted = 1;
 
 		if (llink->ll_evicted) {
@@ -297,15 +284,20 @@ lru_ref_release_internal(struct daos_lru_cache *lcache, struct daos_llink *llink
 }
 
 void
-daos_lru_ref_release(struct daos_lru_cache *lcache, struct daos_llink *llink)
-{
-	lru_ref_release_internal(lcache, llink, false);
-}
-
-void
-daos_lru_ref_wait_evict(struct daos_lru_cache *lcache, struct daos_llink *llink)
+daos_lru_ref_evict_wait(struct daos_lru_cache *lcache, struct daos_llink *llink)
 {
-	D_ASSERT(lcache->dlc_ops->lop_wait);
-
-	lru_ref_release_internal(lcache, llink, true);
+	if (!llink->ll_evicted)
+		daos_lru_ref_evict(lcache, llink);
+
+	if (lcache->dlc_ops->lop_wait && !daos_lru_is_last_user(llink)) {
+		/* Wait until I'm the last one.
+		 * XXX: the implementation can only support one waiter for now, if there
+		 * is a secondary ULT calls this function on the same item, it will hit
+		 * the assertion.
+		 */
+		D_ASSERT(!llink->ll_wait_evict);
+		llink->ll_wait_evict = 1;
+		lcache->dlc_ops->lop_wait(llink);
+		llink->ll_wait_evict = 0;
+	}
 }
diff --git a/src/container/srv_target.c b/src/container/srv_target.c
index b5abcc2d759..f3ef47c8447 100644
--- a/src/container/srv_target.c
+++ b/src/container/srv_target.c
@@ -1261,7 +1261,9 @@ cont_child_destroy_one(void *vin)
 			D_GOTO(out_pool, rc = -DER_BUSY);
 		} /* else: resync should have completed, try again */
 
-		daos_lru_ref_wait_evict(tls->dt_cont_cache, &cont->sc_list);
+		/* nobody should see it again after eviction */
+		daos_lru_ref_evict_wait(tls->dt_cont_cache, &cont->sc_list);
+		daos_lru_ref_release(tls->dt_cont_cache, &cont->sc_list);
 	}
 
 	D_DEBUG(DB_MD, DF_CONT": destroying vos container\n",
diff --git a/src/include/daos/lru.h b/src/include/daos/lru.h
index 03b1eb90e4c..40bee5c492b 100644
--- a/src/include/daos/lru.h
+++ b/src/include/daos/lru.h
@@ -37,8 +37,8 @@ struct daos_llink {
 	d_list_t		 ll_link;	/**< LRU hash link */
 	d_list_t		 ll_qlink;	/**< Temp link for traverse */
 	uint32_t		 ll_ref;	/**< refcount for this ref */
-	uint32_t		 ll_evicted:1,	/**< has been evicted */
-				 ll_evicting:1; /**< been evicting */
+	uint32_t		 ll_evicted:1;	/**< has been evicted */
+	uint32_t		 ll_wait_evict:1; /**< wait for completion of eviction */
 	struct daos_llink_ops	*ll_ops;	/**< ops to maintain refs */
 };
 
@@ -121,26 +121,7 @@ void
 daos_lru_ref_release(struct daos_lru_cache *lcache, struct daos_llink *llink);
 
 /**
- * Evicts the LRU link from the DAOS LRU cache after waiting
- * for all references to be released.
- *
- * \param[in] lcache		DAOS LRU cache
- * \param[in] llink		DAOS LRU link to be evicted
- *
- */
-void
-daos_lru_ref_wait_evict(struct daos_lru_cache *lcache, struct daos_llink *llink);
-
-/**
- * Flush old items from LRU.
- *
- * \param[in] lcache		DAOS LRU cache
- */
-void
-daos_lru_ref_flush(struct daos_lru_cache *lcache);
-
-/**
- * Evict the item from LRU after releasing the last refcount on it.
+ * Evict the item from LRU before releasing the refcount on it.
  *
  * \param[in] lcache		DAOS LRU cache
  * \param[in] llink		DAOS LRU item to be evicted
@@ -153,15 +134,14 @@ daos_lru_ref_evict(struct daos_lru_cache *lcache, struct daos_llink *llink)
 }
 
 /**
- * Check if a LRU element has been evicted or not
+ * Evict the item from LRU before releasing the refcount on it, wait until
+ * the caller is the last one holds refcount.
  *
- * \param[in] llink		DAOS LRU item to check
+ * \param[in] lcache		DAOS LRU cache
+ * \param[in] llink		DAOS LRU item to be evicted
  */
-static inline bool
-daos_lru_ref_evicted(struct daos_llink *llink)
-{
-	return llink->ll_evicted;
-}
+void
+daos_lru_ref_evict_wait(struct daos_lru_cache *lcache, struct daos_llink *llink);
 
 /**
  * Increase a usage reference to the LRU element

From 48487d12d543709449083cf12e49e73fe8f7e98d Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Wed, 4 Sep 2024 17:29:58 -0400
Subject: [PATCH 09/21] DAOS-16471 test: Reduce targets for
 ioctl_pool_handles.py (#15063)

The dfuse/ioctl_pool_handles.py test is overloading the VM so reduce the number of engine targets.

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/dfuse/ioctl_pool_handles.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/ftest/dfuse/ioctl_pool_handles.yaml b/src/tests/ftest/dfuse/ioctl_pool_handles.yaml
index 2900f67c328..35752453850 100644
--- a/src/tests/ftest/dfuse/ioctl_pool_handles.yaml
+++ b/src/tests/ftest/dfuse/ioctl_pool_handles.yaml
@@ -16,7 +16,7 @@ server_config:
         0:
           class: ram
           scm_mount: /mnt/daos
-      targets: 16
+      targets: 8
   system_ram_reserved: 1
 
 pool:

From 5cf46545fbf96545e7532012f9491759d19f0fd6 Mon Sep 17 00:00:00 2001
From: Nasf-Fan <fan.yong@intel.com>
Date: Fri, 6 Sep 2024 00:22:14 +0800
Subject: [PATCH 10/21] DAOS-16483 vos: handle empty DTX when vos_tx_end
 (#15053)

It is possible that the DTX modified nothing when stop currnet backend
transaction. Under such case, we may not generate persistent DTX entry.
Then need to bypass such case before checking on-disk DTX entry status.

The patch makes some clean and removed redundant metrics for committed
DTX entries.

Enhance vos_dtx_deregister_record() to handle GC case.

Signed-off-by: Fan Yong <fan.yong@intel.com>
---
 src/dtx/dtx_common.c                    |  2 +-
 src/tests/ftest/util/telemetry_utils.py |  1 -
 src/vos/vos_common.c                    | 31 ++++-----
 src/vos/vos_dtx.c                       | 86 ++++++++++++++++++++-----
 src/vos/vos_tls.h                       |  1 -
 5 files changed, 83 insertions(+), 38 deletions(-)

diff --git a/src/dtx/dtx_common.c b/src/dtx/dtx_common.c
index 353bd880009..ff4f2dfe4ef 100644
--- a/src/dtx/dtx_common.c
+++ b/src/dtx/dtx_common.c
@@ -1341,7 +1341,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul
 	 * it persistently. Otherwise, the subsequent DTX resync may not find it as
 	 * to regard it as failed transaction and abort it.
 	 */
-	if (result == 0 && !dth->dth_active && !dth->dth_prepared &&
+	if (result == 0 && !dth->dth_active && !dth->dth_prepared && !dth->dth_solo &&
 	    (dth->dth_dist || dth->dth_modification_cnt > 0)) {
 		result = vos_dtx_attach(dth, true, dth->dth_ent != NULL ? true : false);
 		if (unlikely(result < 0)) {
diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py
index b782173e3f4..d59c8d39e81 100644
--- a/src/tests/ftest/util/telemetry_utils.py
+++ b/src/tests/ftest/util/telemetry_utils.py
@@ -423,7 +423,6 @@ class TelemetryUtils():
         ENGINE_NVME_CRIT_WARN_METRICS +\
         ENGINE_NVME_INTEL_VENDOR_METRICS
     ENGINE_MEM_USAGE_METRICS = [
-        "engine_mem_vos_dtx_cmt_ent_48",
         "engine_mem_vos_vos_obj_360",
         "engine_mem_vos_vos_lru_size",
         "engine_mem_dtx_dtx_leader_handle_360"]
diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c
index e9389deea2f..dbb8d28fd04 100644
--- a/src/vos/vos_common.c
+++ b/src/vos/vos_common.c
@@ -403,16 +403,24 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in,
 			}
 		} else if (dae != NULL) {
 			if (dth->dth_solo) {
-				if (err == 0 && cont->vc_solo_dtx_epoch < dth->dth_epoch)
+				if (err == 0 && dae->dae_committing &&
+				    cont->vc_solo_dtx_epoch < dth->dth_epoch)
 					cont->vc_solo_dtx_epoch = dth->dth_epoch;
 
 				vos_dtx_post_handle(cont, &dae, &dce, 1, false, err != 0);
 			} else {
 				D_ASSERT(dce == NULL);
-				if (err == 0) {
-					dae->dae_prepared = 1;
+				if (err == 0 && dth->dth_active) {
+					D_ASSERTF(!UMOFF_IS_NULL(dae->dae_df_off),
+						  "Non-prepared DTX " DF_DTI "\n",
+						  DP_DTI(&dth->dth_xid));
+
 					dae_df = umem_off2ptr(umm, dae->dae_df_off);
-					D_ASSERT(!(dae_df->dae_flags & DTE_INVALID));
+					D_ASSERTF(!(dae_df->dae_flags & DTE_INVALID),
+						  "Invalid status for DTX " DF_DTI "\n",
+						  DP_DTI(&dth->dth_xid));
+
+					dae->dae_prepared = 1;
 				}
 			}
 		}
@@ -561,13 +569,6 @@ vos_tls_init(int tags, int xs_id, int tgt_id)
 		}
 	}
 
-	rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE,
-			     "Number of committed entries kept around for reply"
-			     " reconstruction", "entries",
-			     "io/dtx/committed/tgt_%u", tgt_id);
-	if (rc)
-		D_WARN("Failed to create committed cnt sensor: "DF_RC"\n",
-		       DP_RC(rc));
 	if (tgt_id >= 0) {
 		rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE,
 				     "Number of committed entries kept around for reply"
@@ -577,14 +578,6 @@ vos_tls_init(int tags, int xs_id, int tgt_id)
 			D_WARN("Failed to create committed cnt sensor: "DF_RC"\n",
 			       DP_RC(rc));
 
-		rc = d_tm_add_metric(&tls->vtl_dtx_cmt_ent_cnt, D_TM_GAUGE,
-				     "Number of committed entries", "entry",
-				     "mem/vos/dtx_cmt_ent_%u/tgt_%u",
-				     sizeof(struct vos_dtx_cmt_ent), tgt_id);
-		if (rc)
-			D_WARN("Failed to create committed cnt: "DF_RC"\n",
-			       DP_RC(rc));
-
 		rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE,
 				     "Number of cached vos object", "entry",
 				     "mem/vos/vos_obj_%u/tgt_%u",
diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c
index 697b943ecf0..8edf32117ef 100644
--- a/src/vos/vos_dtx.c
+++ b/src/vos/vos_dtx.c
@@ -768,7 +768,6 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t
 		   daos_epoch_t cmt_time, struct vos_dtx_cmt_ent **dce_p,
 		   struct vos_dtx_act_ent **dae_p, bool *rm_cos, bool *fatal)
 {
-	struct vos_tls			*tls = vos_tls_get(false);
 	struct vos_dtx_act_ent		*dae = NULL;
 	struct vos_dtx_cmt_ent		*dce = NULL;
 	d_iov_t				 kiov;
@@ -833,7 +832,6 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t
 	if (dce == NULL)
 		D_GOTO(out, rc = -DER_NOMEM);
 
-	d_tm_inc_gauge(tls->vtl_dtx_cmt_ent_cnt, 1);
 	DCE_CMT_TIME(dce) = cmt_time;
 	if (dae != NULL) {
 		DCE_XID(dce) = DAE_XID(dae);
@@ -1534,10 +1532,14 @@ int
 vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh,
 			  uint32_t entry, daos_epoch_t epoch, umem_off_t record)
 {
+	struct dtx_handle		*dth = vos_dth_get(false);
 	struct vos_container		*cont;
 	struct vos_dtx_act_ent		*dae;
+	struct vos_dtx_act_ent_df	*dae_df;
+	umem_off_t			*rec_df;
 	bool				 found;
 	int				 count;
+	int				 rc;
 	int				 i;
 
 	if (!vos_dtx_is_normal_entry(entry))
@@ -1566,10 +1568,54 @@ vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh,
 	 *	 by another prepared (but non-committed) DTX, then do not allow current transaction
 	 *	 to modify it. Because if current transaction is aborted or failed for some reason,
 	 *	 there is no efficient way to recover such former non-committed DTX.
+	 *
+	 *	 If dth is NULL, then it is for GC. Under such case, deregister the record anyway.
 	 */
-	if (dae->dae_dbd != NULL)
-		return dtx_inprogress(dae, vos_dth_get(cont->vc_pool->vp_sysdb), false, false, 8);
+	if (dae->dae_dbd != NULL) {
+		if (dth != NULL)
+			return dtx_inprogress(dae, dth, false, false, 8);
+
+		dae_df = umem_off2ptr(umm, dae->dae_df_off);
+		D_ASSERT(!(dae_df->dae_flags & DTE_INVALID));
 
+		if (dae_df->dae_rec_cnt > DTX_INLINE_REC_CNT)
+			count = DTX_INLINE_REC_CNT;
+		else
+			count = dae_df->dae_rec_cnt;
+
+		rec_df = dae_df->dae_rec_inline;
+		for (i = 0; i < count; i++) {
+			if (record == umem_off2offset(rec_df[i])) {
+				rc = umem_tx_add_ptr(umm, &rec_df[i], sizeof(rec_df[i]));
+				if (rc != 0)
+					return rc;
+
+				rec_df[i] = UMOFF_NULL;
+				goto cache;
+			}
+		}
+
+		rec_df = umem_off2ptr(umm, dae_df->dae_rec_off);
+		if (rec_df == NULL)
+			/* If non-exist on disk, then must be non-exist in cache. */
+			return 0;
+
+		for (i = 0; i < dae_df->dae_rec_cnt - DTX_INLINE_REC_CNT; i++) {
+			if (record == umem_off2offset(rec_df[i])) {
+				rc = umem_tx_add_ptr(umm, &rec_df[i], sizeof(rec_df[i]));
+				if (rc != 0)
+					return rc;
+
+				rec_df[i] = UMOFF_NULL;
+				goto cache;
+			}
+		}
+
+		/* If non-exist on disk, then must be non-exist in cache. */
+		return 0;
+	}
+
+cache:
 	if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT)
 		count = DTX_INLINE_REC_CNT;
 	else
@@ -2115,14 +2161,18 @@ vos_dtx_post_handle(struct vos_container *cont,
 
 	if (!abort && dces != NULL) {
 		struct vos_tls		*tls = vos_tls_get(false);
+		int			 j = 0;
 
 		D_ASSERT(cont->vc_pool->vp_sysdb == false);
 		for (i = 0; i < count; i++) {
-			if (dces[i] != NULL) {
-				cont->vc_dtx_committed_count++;
-				cont->vc_pool->vp_dtx_committed_count++;
-				d_tm_inc_gauge(tls->vtl_committed, 1);
-			}
+			if (dces[i] != NULL)
+				j++;
+		}
+
+		if (j > 0) {
+			cont->vc_dtx_committed_count += j;
+			cont->vc_pool->vp_dtx_committed_count += j;
+			d_tm_inc_gauge(tls->vtl_committed, j);
 		}
 	}
 
@@ -2438,6 +2488,7 @@ vos_dtx_aggregate(daos_handle_t coh)
 	uint64_t			 epoch;
 	umem_off_t			 dbd_off;
 	umem_off_t			 next = UMOFF_NULL;
+	int				 count = 0;
 	int				 rc;
 	int				 i;
 
@@ -2480,13 +2531,10 @@ vos_dtx_aggregate(daos_handle_t coh)
 				UMOFF_P(dbd_off), DP_RC(rc));
 			goto out;
 		}
-
-		cont->vc_dtx_committed_count--;
-		cont->vc_pool->vp_dtx_committed_count--;
-		d_tm_dec_gauge(tls->vtl_committed, 1);
-		d_tm_dec_gauge(tls->vtl_dtx_cmt_ent_cnt, 1);
 	}
 
+	count = dbd->dbd_count;
+
 	if (epoch != cont_df->cd_newest_aggregated) {
 		rc = umem_tx_add_ptr(umm, &cont_df->cd_newest_aggregated,
 				     sizeof(cont_df->cd_newest_aggregated));
@@ -2544,8 +2592,14 @@ vos_dtx_aggregate(daos_handle_t coh)
 
 out:
 	rc = umem_tx_end(umm, rc);
-	if (rc == 0 && cont->vc_cmt_dtx_reindex_pos == dbd_off)
-		cont->vc_cmt_dtx_reindex_pos = next;
+	if (rc == 0) {
+		if (cont->vc_cmt_dtx_reindex_pos == dbd_off)
+			cont->vc_cmt_dtx_reindex_pos = next;
+
+		cont->vc_dtx_committed_count -= count;
+		cont->vc_pool->vp_dtx_committed_count -= count;
+		d_tm_dec_gauge(tls->vtl_committed, count);
+	}
 
 	DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc,
 		  "Release DTX committed blob %p (" UMOFF_PF ") for cont " DF_UUID, dbd,
diff --git a/src/vos/vos_tls.h b/src/vos/vos_tls.h
index 981cce10be5..2fc328457d0 100644
--- a/src/vos/vos_tls.h
+++ b/src/vos/vos_tls.h
@@ -64,7 +64,6 @@ struct vos_tls {
 	};
 	struct d_tm_node_t		 *vtl_committed;
 	struct d_tm_node_t		 *vtl_obj_cnt;
-	struct d_tm_node_t		 *vtl_dtx_cmt_ent_cnt;
 	struct d_tm_node_t		 *vtl_lru_alloc_size;
 };
 

From d778a9515ce8907b3eb503b83d5b2c3c431b8c8f Mon Sep 17 00:00:00 2001
From: Joseph Moore <26410038+jgmoore-or@users.noreply.github.com>
Date: Thu, 5 Sep 2024 11:07:27 -0600
Subject: [PATCH 11/21] DAOS-16271 mercury: Add patch to avoid seg fault in key
 resolve. (#15067)

Signed-off-by: Joseph Moore <joseph.moore@intel.com>
---
 utils/build.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/build.config b/utils/build.config
index 174df687036..c38d49a267a 100644
--- a/utils/build.config
+++ b/utils/build.config
@@ -29,3 +29,4 @@ ucx=https://github.com/openucx/ucx.git
 spdk=https://github.com/spdk/spdk/commit/b0aba3fcd5aceceea530a702922153bc75664978.diff,https://github.com/spdk/spdk/commit/445a4c808badbad3942696ecf16fa60e8129a747.diff
 ofi=https://github.com/ofiwg/libfabric/commit/d827c6484cc5bf67dfbe395890e258860c3f0979.diff
 fuse=https://github.com/libfuse/libfuse/commit/c9905341ea34ff9acbc11b3c53ba8bcea35eeed8.diff
+mercury=https://raw.githubusercontent.com/daos-stack/mercury/481297621bafbbcac4cc6f8feab3f1b6f8b14b59/na_ucx_keyres_epchk.patch

From e77265fd5f0c16d9377ca9e9801698988e4b5eb0 Mon Sep 17 00:00:00 2001
From: Phil Henderson <phillip.henderson@intel.com>
Date: Thu, 5 Sep 2024 13:31:59 -0400
Subject: [PATCH 12/21] DAOS-16484 test: Support mixed speeds when selecting a
 default interface (#15050)

Allow selecting a default interface that is running at a different speed
on different hosts.  Primarily this is to support selecting the ib0
interface by default when the launch node has a slower ib0 interface
than the cluster hosts.

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
---
 src/tests/ftest/util/environment_utils.py | 3 ++-
 src/tests/ftest/util/network_utils.py     | 9 +++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/tests/ftest/util/environment_utils.py b/src/tests/ftest/util/environment_utils.py
index 8223063a85e..e36d750500e 100644
--- a/src/tests/ftest/util/environment_utils.py
+++ b/src/tests/ftest/util/environment_utils.py
@@ -9,6 +9,7 @@
 
 from ClusterShell.NodeSet import NodeSet
 # pylint: disable=import-error,no-name-in-module
+from util.host_utils import get_local_host
 from util.network_utils import (PROVIDER_ALIAS, SUPPORTED_PROVIDERS, NetworkException,
                                 get_common_provider, get_fastest_interface)
 from util.run_utils import run_remote
@@ -326,7 +327,7 @@ def _default_interface(self, logger, hosts):
             # Find all the /sys/class/net interfaces on the launch node (excluding lo)
             logger.debug("Detecting network devices - D_INTERFACE not set")
             try:
-                interface = get_fastest_interface(logger, hosts)
+                interface = get_fastest_interface(logger, hosts | get_local_host())
             except NetworkException as error:
                 raise TestEnvironmentException("Error obtaining a default interface!") from error
         return interface
diff --git a/src/tests/ftest/util/network_utils.py b/src/tests/ftest/util/network_utils.py
index 52ba2420964..e3802364d8f 100644
--- a/src/tests/ftest/util/network_utils.py
+++ b/src/tests/ftest/util/network_utils.py
@@ -405,11 +405,12 @@ def get_fastest_interface(logger, hosts, verbose=True):
     for interface in common_interfaces:
         detected_speeds = get_interface_speeds(logger, hosts, interface, verbose)
         speed_list = []
+        speed_hosts = NodeSet()
         for speed, node_set in detected_speeds.items():
-            if node_set == hosts:
-                # Only include detected homogeneous interface speeds
-                speed_list.append(speed)
-        if speed_list:
+            speed_list.append(speed)
+            speed_hosts.add(node_set)
+        if speed_list and speed_hosts == hosts:
+            # Only include interface speeds if a speed is detected on all the hosts
             interface_speeds[interface] = min(speed_list)
 
     logger.info("Active network interface speeds on %s:", hosts)

From 91de313a493d629ba50306294dde51a319743e3a Mon Sep 17 00:00:00 2001
From: Makito Kano <makito.kano@intel.com>
Date: Fri, 6 Sep 2024 06:59:02 +0900
Subject: [PATCH 13/21] =?UTF-8?q?DAOS-16446=20test:=20HDF5-VOL=20test=20-?=
 =?UTF-8?q?=20Set=20object=20class=20and=20container=20prope=E2=80=A6=20(#?=
 =?UTF-8?q?15004)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In HDF5, DFS, MPIIO, or POSIX, object class and container properties are defined
during the container create. If it’s DFS, object class is also set to the IOR
parameter. However, in HDF5-VOL, object class and container properties are
defined with the following environment variables of mpirun.

HDF5_DAOS_OBJ_CLASS (Object class)
HDF5_DAOS_FILE_PROP (Container properties)

The infrastructure to set these variables are already there in run_ior_with_pool().
In file_count_test_base.py, pass in the env vars to run_ior_with_pool(env=env) as a
dictionary. Object class is the oclass variable. Container properties can be
obtained from container -> properties field in the test yaml.

This fix is discussed in PR #14964.

Signed-off-by: Makito Kano <makito.kano@intel.com>
---
 src/tests/ftest/util/file_count_test_base.py | 31 ++++++++++++++------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/src/tests/ftest/util/file_count_test_base.py b/src/tests/ftest/util/file_count_test_base.py
index be21183c97a..36410e92260 100644
--- a/src/tests/ftest/util/file_count_test_base.py
+++ b/src/tests/ftest/util/file_count_test_base.py
@@ -17,15 +17,15 @@ class FileCountTestBase(IorTestBase, MdtestBase):
     :avocado: recursive
     """
 
-    def add_containers(self, file_oclass=None, dir_oclass=None):
-        """Create a list of containers that the various jobs use for storage.
+    def get_file_write_container(self, file_oclass=None, dir_oclass=None):
+        """Create a container, set oclass, dir_oclass, and add rd_fac property based on oclass.
 
         Args:
-            file_oclass (str, optional): file object class of container.
-                                         Defaults to None.
-            dir_oclass (str, optional): dir object class of container.
-                                        Defaults to None.
+            file_oclass (str, optional): file object class of container. Defaults to None.
+            dir_oclass (str, optional): dir object class of container. Defaults to None.
 
+        Returns:
+            TestContainer: Created container with oclass, dir_oclass, and rd_fac set.
 
         """
         # Create a container and add it to the overall list of containers
@@ -92,7 +92,7 @@ def run_file_count(self):
                     rd_fac = extract_redundancy_factor(oclass)
                     dir_oclass = self.get_diroclass(rd_fac)
                     self.mdtest_cmd.dfs_dir_oclass.update(dir_oclass)
-                    self.container = self.add_containers(oclass, dir_oclass)
+                    self.container = self.get_file_write_container(oclass, dir_oclass)
                     try:
                         self.processes = mdtest_np
                         self.ppn = mdtest_ppn
@@ -111,14 +111,27 @@ def run_file_count(self):
                 # run ior
                 self.log.info("=======>>>Starting IOR with %s and %s", api, oclass)
                 self.ior_cmd.dfs_oclass.update(oclass)
-                self.container = self.add_containers(oclass)
+                self.container = self.get_file_write_container(oclass)
                 self.update_ior_cmd_with_pool(False)
                 try:
                     self.processes = ior_np
                     self.ppn = ior_ppn
                     if api == 'HDF5-VOL':
+                        # Format the container properties so that it works with HDF5-VOL env var.
+                        # Each entry:value pair needs to be separated by a semicolon. Since we're
+                        # using this in the mpirun command, semicolon would indicate the end of the
+                        # command, so quote the whole thing.
+                        cont_props = self.container.properties.value
+                        cont_props_hdf5_vol = '"' + cont_props.replace(",", ";") + '"'
+                        self.log.info("cont_props_hdf5_vol = %s", cont_props_hdf5_vol)
+                        env = self.ior_cmd.env.copy()
+                        env.update({
+                            "HDF5_DAOS_OBJ_CLASS": oclass,
+                            "HDF5_DAOS_FILE_PROP": cont_props_hdf5_vol
+                        })
                         self.ior_cmd.api.update('HDF5')
-                        self.run_ior_with_pool(create_pool=False, plugin_path=hdf5_plugin_path)
+                        self.run_ior_with_pool(
+                            create_pool=False, plugin_path=hdf5_plugin_path, env=env)
                     elif self.ior_cmd.api.value == 'POSIX':
                         self.run_ior_with_pool(create_pool=False, intercept=intercept)
                     else:

From c57eced6a2b134449195840408124821a7658254 Mon Sep 17 00:00:00 2001
From: Dalton Bohning <dalton.bohning@intel.com>
Date: Thu, 5 Sep 2024 16:26:43 -0700
Subject: [PATCH 14/21] DAOS-16447 test: set D_IL_REPORT per test (#15012)

set D_IL_REPORT per test instead of setting defaults values in
utilities. This allows running without it set.

Signed-off-by: Dalton Bohning <dalton.bohning@intel.com>
---
 src/tests/ftest/io/large_file_count.yaml     | 4 ++++
 src/tests/ftest/io/small_file_count.yaml     | 4 ++++
 src/tests/ftest/ior/small.yaml               | 2 ++
 src/tests/ftest/util/file_count_test_base.py | 2 +-
 src/tests/ftest/util/ior_test_base.py        | 2 --
 src/tests/ftest/util/ior_utils.py            | 6 +++---
 src/tests/ftest/util/soak_utils.py           | 8 --------
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/tests/ftest/io/large_file_count.yaml b/src/tests/ftest/io/large_file_count.yaml
index 619143a83e8..6ff375cf3a9 100644
--- a/src/tests/ftest/io/large_file_count.yaml
+++ b/src/tests/ftest/io/large_file_count.yaml
@@ -44,6 +44,8 @@ ior:
   repetitions: 1
   transfer_size: '1M'
   block_size: '7G'
+  env_vars:
+    - D_IL_REPORT=1
 dfuse:
   disable_caching: true
 
@@ -59,3 +61,5 @@ mdtest:
   write_bytes: 4096
   read_bytes: 4096
   depth: 0
+  env_vars:
+    - D_IL_REPORT=1
diff --git a/src/tests/ftest/io/small_file_count.yaml b/src/tests/ftest/io/small_file_count.yaml
index b9bf23cd126..79e02c3d787 100644
--- a/src/tests/ftest/io/small_file_count.yaml
+++ b/src/tests/ftest/io/small_file_count.yaml
@@ -45,6 +45,8 @@ ior:
   repetitions: 1
   transfer_size: '1M'
   block_size: '2G'
+  env_vars:
+    - D_IL_REPORT=1
 dfuse:
   disable_caching: true
 
@@ -60,3 +62,5 @@ mdtest:
   write_bytes: 4096
   read_bytes: 4096
   depth: 0
+  env_vars:
+    - D_IL_REPORT=1
diff --git a/src/tests/ftest/ior/small.yaml b/src/tests/ftest/ior/small.yaml
index b0b21620a93..b638a396580 100644
--- a/src/tests/ftest/ior/small.yaml
+++ b/src/tests/ftest/ior/small.yaml
@@ -38,6 +38,8 @@ container:
   properties: cksum:crc16,cksum_size:16384,srv_cksum:on
   control_method: daos
 ior:
+  env_vars:
+    - D_IL_REPORT=1
   ior_timeout: 75
   client_processes:
     ppn: 32
diff --git a/src/tests/ftest/util/file_count_test_base.py b/src/tests/ftest/util/file_count_test_base.py
index 36410e92260..f95e22bd4bd 100644
--- a/src/tests/ftest/util/file_count_test_base.py
+++ b/src/tests/ftest/util/file_count_test_base.py
@@ -97,7 +97,7 @@ def run_file_count(self):
                         self.processes = mdtest_np
                         self.ppn = mdtest_ppn
                         if self.mdtest_cmd.api.value == 'POSIX':
-                            self.mdtest_cmd.env.update(LD_PRELOAD=intercept, D_IL_REPORT='1')
+                            self.mdtest_cmd.env.update(LD_PRELOAD=intercept)
                             self.execute_mdtest()
                         else:
                             self.execute_mdtest()
diff --git a/src/tests/ftest/util/ior_test_base.py b/src/tests/ftest/util/ior_test_base.py
index 625a283593e..8f056c34002 100644
--- a/src/tests/ftest/util/ior_test_base.py
+++ b/src/tests/ftest/util/ior_test_base.py
@@ -225,8 +225,6 @@ def run_ior(self, manager, processes, intercept=None, display_space=True,
             env = self.ior_cmd.get_default_env(str(manager), self.client_log)
         if intercept:
             env['LD_PRELOAD'] = intercept
-            if 'D_IL_REPORT' not in env:
-                env['D_IL_REPORT'] = '1'
         if plugin_path:
             env["HDF5_VOL_CONNECTOR"] = "daos"
             env["HDF5_PLUGIN_PATH"] = str(plugin_path)
diff --git a/src/tests/ftest/util/ior_utils.py b/src/tests/ftest/util/ior_utils.py
index 7851e4587d7..ffde4454fcb 100644
--- a/src/tests/ftest/util/ior_utils.py
+++ b/src/tests/ftest/util/ior_utils.py
@@ -588,7 +588,7 @@ def get_unique_log(self, container):
         return '.'.join(['_'.join(parts), 'log'])
 
     def run(self, pool, container, processes, ppn=None, intercept=None, plugin_path=None,
-            dfuse=None, display_space=True, fail_on_warning=False, unique_log=True, il_report=1):
+            dfuse=None, display_space=True, fail_on_warning=False, unique_log=True, il_report=None):
         # pylint: disable=too-many-arguments
         """Run ior.
 
@@ -609,7 +609,7 @@ def run(self, pool, container, processes, ppn=None, intercept=None, plugin_path=
             unique_log (bool, optional): whether or not to update the log file with a new unique log
                 file name. Defaults to True.
             il_report (int, optional): D_IL_REPORT value to use when 'intercept' is specified and a
-                value does not already exist in the environment. Defaults to 1.
+                value does not already exist in the environment. Defaults to None.
 
         Raises:
             CommandFailure: if there is an error running the ior command
@@ -627,7 +627,7 @@ def run(self, pool, container, processes, ppn=None, intercept=None, plugin_path=
             self.env["LD_PRELOAD"] = intercept
             if "D_LOG_MASK" not in self.env:
                 self.env["D_LOG_MASK"] = "INFO"
-            if "D_IL_REPORT" not in self.env:
+            if "D_IL_REPORT" not in self.env and il_report is not None:
                 self.env["D_IL_REPORT"] = str(il_report)
 
         if plugin_path:
diff --git a/src/tests/ftest/util/soak_utils.py b/src/tests/ftest/util/soak_utils.py
index d190a06e759..c527e67fea8 100644
--- a/src/tests/ftest/util/soak_utils.py
+++ b/src/tests/ftest/util/soak_utils.py
@@ -1004,10 +1004,8 @@ def create_ior_cmdline(self, job_spec, pool, ppn, nodesperjob, oclass_list=None,
             mpirun_cmd.get_params(self)
             if api == "POSIX-LIBPIL4DFS":
                 env["LD_PRELOAD"] = os.path.join(self.prefix, 'lib64', 'libpil4dfs.so')
-                env["D_IL_REPORT"] = "1"
             if api == "POSIX-LIBIOIL":
                 env["LD_PRELOAD"] = os.path.join(self.prefix, 'lib64', 'libioil.so')
-                env["D_IL_REPORT"] = "1"
             # add envs if api is HDF5-VOL
             if api == "HDF5-VOL":
                 vol = True
@@ -1173,10 +1171,8 @@ def create_mdtest_cmdline(self, job_spec, pool, ppn, nodesperjob):
                 if self.enable_il and api == "POSIX-LIBPIL4DFS":
                     env["LD_PRELOAD"] = os.path.join(
                         self.prefix, 'lib64', 'libpil4dfs.so')
-                    env["D_IL_REPORT"] = "1"
                 if self.enable_il and api == "POSIX-LIBIOIL":
                     env["LD_PRELOAD"] = os.path.join(self.prefix, 'lib64', 'libioil.so')
-                    env["D_IL_REPORT"] = "1"
             mpirun_cmd = Mpirun(mdtest_cmd, mpi_type=self.mpi_module)
             mpirun_cmd.get_params(self)
             mpirun_cmd.assign_processes(nodesperjob * ppn)
@@ -1304,10 +1300,8 @@ def create_fio_cmdline(self, job_spec, pool):
         cmds.append(f"cd {dfuse.mount_dir.value};")
         if self.enable_il and api == "POSIX-LIBPIL4DFS":
             cmds.append(f"export LD_PRELOAD={os.path.join(self.prefix, 'lib64', 'libpil4dfs.so')}")
-            cmds.append("export D_IL_REPORT=1")
         if self.enable_il and api == "POSIX-LIBIOIL":
             cmds.append(f"export LD_PRELOAD={os.path.join(self.prefix, 'lib64', 'libioil.so')}")
-            cmds.append("export D_IL_REPORT=1")
         cmds.append(str(fio_cmd))
         cmds.append("status=$?")
         cmds.append("cd -")
@@ -1379,10 +1373,8 @@ def create_app_cmdline(self, job_spec, pool, ppn, nodesperjob):
                 env["DAOS_UNS_PREFIX"] = format_path(pool, self.container[-1])
             if self.enable_il and api == "POSIX-LIBPIL4DFS":
                 env["LD_PRELOAD"] = os.path.join(self.prefix, 'lib64', 'libpil4dfs.so')
-                env["D_IL_REPORT"] = "1"
             if self.enable_il and api == "POSIX-LIBIOIL":
                 env["LD_PRELOAD"] = os.path.join(self.prefix, 'lib64', 'libioil.so')
-                env["D_IL_REPORT"] = "1"
             mpirun_cmd.assign_environment(env, True)
             mpirun_cmd.assign_processes(nodesperjob * ppn)
             mpirun_cmd.ppn.update(ppn)

From 369e4f19a692991d1d57ed85cb6ca1c94ef3ce15 Mon Sep 17 00:00:00 2001
From: Dalton Bohning <dalton.bohning@intel.com>
Date: Thu, 5 Sep 2024 16:30:28 -0700
Subject: [PATCH 15/21] DAOS-16450 test: auto run dfs tests when dfs is
 modified (#15017)

Automatically include dfs tests when dfs files are modified in PRs.

Signed-off-by: Dalton Bohning <dalton.bohning@intel.com>
---
 src/tests/ftest/tags.yaml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/tests/ftest/tags.yaml b/src/tests/ftest/tags.yaml
index 0887db1f4ba..8812336fe3f 100644
--- a/src/tests/ftest/tags.yaml
+++ b/src/tests/ftest/tags.yaml
@@ -16,15 +16,18 @@ src/tests/ftest*:
   stop_on_match: True
   tags: pr
 
-# Everything else runs pr
+# Individual code areas
+src/client/dfs/: dfs
+
+# Everything runs pr unless a previous entry was matched with stop_on_match
 .*: pr
 
 
 # Examples for reference only. Each code area needs to be carefully evaluated.
-# Any files touch under "src/cart/"" should run "cart" tests
+# Any files touch under "src/cart/" should run "cart" tests
 # src/cart/: cart
 
-# Run "control" for files under "src/control/""
+# Run "control" for files under "src/control/"
 # Run "control" and "security" for files under "src/control/security"
 # src/control/: control
 # src/control/security/: security

From a05d25dd421793a8ae33ae30e0e11c0b1a48dda9 Mon Sep 17 00:00:00 2001
From: Dalton Bohning <dalton.bohning@intel.com>
Date: Thu, 5 Sep 2024 16:31:57 -0700
Subject: [PATCH 16/21] DAOS-16510 cq: update pylint to 3.2.7 (#15072)

update pylint to 3.2.7

Signed-off-by: Dalton Bohning <dalton.bohning@intel.com>
---
 utils/cq/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/cq/requirements.txt b/utils/cq/requirements.txt
index fe41786611f..c599bed7248 100644
--- a/utils/cq/requirements.txt
+++ b/utils/cq/requirements.txt
@@ -4,7 +4,7 @@ pyenchant
 ## https://github.com/pycqa/flake8/issues/1389 https://github.com/PyCQA/flake8/pull/1720
 flake8<6.0.0
 isort==5.13.2
-pylint==3.2.6
+pylint==3.2.7
 yamllint==1.35.1
 codespell==2.2.6
 # Used by ci/jira_query.py which pip installs it standalone.

From 733fda6f18ade7e444d846bc1721eed915866cd7 Mon Sep 17 00:00:00 2001
From: Dalton Bohning <dalton.bohning@intel.com>
Date: Thu, 5 Sep 2024 16:43:06 -0700
Subject: [PATCH 17/21] DAOS-16509 test: replace IorTestBase.execute_cmd with
 run_remote (#15070)

replace usage of IorTestBase.execute_cmd with run_remote

Signed-off-by: Dalton Bohning <dalton.bohning@intel.com>
---
 .../ftest/aggregation/dfuse_space_check.py    | 23 +++++---
 src/tests/ftest/datamover/posix_meta_entry.py | 29 ++++-----
 src/tests/ftest/datamover/posix_symlinks.py   | 26 ++++----
 .../ftest/deployment/critical_integration.py  |  5 +-
 src/tests/ftest/dfuse/sparse_file.py          | 18 +++---
 src/tests/ftest/util/data_mover_test_base.py  |  3 +-
 src/tests/ftest/util/ior_test_base.py         | 59 +------------------
 7 files changed, 57 insertions(+), 106 deletions(-)

diff --git a/src/tests/ftest/aggregation/dfuse_space_check.py b/src/tests/ftest/aggregation/dfuse_space_check.py
index 1d119f807a7..4bae72ef6c5 100644
--- a/src/tests/ftest/aggregation/dfuse_space_check.py
+++ b/src/tests/ftest/aggregation/dfuse_space_check.py
@@ -1,5 +1,5 @@
 """
-  (C) Copyright 2020-2022 Intel Corporation.
+  (C) Copyright 2020-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
@@ -9,6 +9,7 @@
 
 from dfuse_utils import get_dfuse, start_dfuse
 from ior_test_base import IorTestBase
+from run_utils import run_remote
 
 
 class DfuseSpaceCheck(IorTestBase):
@@ -72,8 +73,11 @@ def write_multiple_files(self, dfuse):
         while self.get_nvme_free_space(False) >= self.block_size:
             file_path = os.path.join(dfuse.mount_dir.value, "file{}.txt".format(file_count))
             write_dd_cmd = "dd if=/dev/zero of={} bs={} count=1".format(file_path, self.block_size)
-            if 0 in self.execute_cmd(write_dd_cmd, fail_on_err=True, display_output=False):
-                file_count += 1
+            result = run_remote(
+                self.log, self.hostlist_clients, write_dd_cmd, verbose=False, timeout=300)
+            if not result.passed:
+                self.fail(f"Error running: {write_dd_cmd}")
+            file_count += 1
 
         return file_count
 
@@ -118,14 +122,16 @@ def test_dfusespacecheck(self):
 
         # Create a file as large as we can
         large_file = os.path.join(dfuse.mount_dir.value, 'largefile.txt')
-        self.execute_cmd('touch {}'.format(large_file))
+        if not run_remote(self.log, self.hostlist_clients, f'touch {large_file}').passed:
+            self.fail(f"Error creating {large_file}")
         dd_count = (self.initial_space // self.block_size) + 1
         write_dd_cmd = "dd if=/dev/zero of={} bs={} count={}".format(
             large_file, self.block_size, dd_count)
-        self.execute_cmd(write_dd_cmd, False)
+        run_remote(self.log, self.hostlist_clients, write_dd_cmd)
 
         # Remove the file
-        self.execute_cmd('rm -rf {}'.format(large_file))
+        if not run_remote(self.log, self.hostlist_clients, f'rm -rf {large_file}').passed:
+            self.fail(f"Error removing {large_file}")
 
         # Wait for aggregation to complete
         self.wait_for_aggregation()
@@ -142,7 +148,10 @@ def test_dfusespacecheck(self):
         self.pool.set_property("reclaim", "time")
 
         # remove all the small files created above.
-        self.execute_cmd("rm -rf {}".format(os.path.join(dfuse.mount_dir.value, '*')))
+        result = run_remote(
+            self.log, self.hostlist_clients, f"rm -rf {os.path.join(dfuse.mount_dir.value, '*')}")
+        if not result.passed:
+            self.fail("Error removing files in mount dir")
 
         # Wait for aggregation to complete after file removal
         self.wait_for_aggregation()
diff --git a/src/tests/ftest/datamover/posix_meta_entry.py b/src/tests/ftest/datamover/posix_meta_entry.py
index bb608c27853..e5cb9ccf072 100644
--- a/src/tests/ftest/datamover/posix_meta_entry.py
+++ b/src/tests/ftest/datamover/posix_meta_entry.py
@@ -1,5 +1,5 @@
 """
-  (C) Copyright 2020-2023 Intel Corporation.
+  (C) Copyright 2020-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
@@ -7,7 +7,7 @@
 
 from data_mover_test_base import DataMoverTestBase
 from dfuse_utils import get_dfuse, start_dfuse
-from exception_utils import CommandFailure
+from run_utils import run_remote
 
 
 class DmvrPosixMetaEntry(DataMoverTestBase):
@@ -143,7 +143,9 @@ def create_data(self, path):
 
             "popd"
         ]
-        self.execute_cmd_list(cmd_list)
+        cmd = " &&\n".join(cmd_list)
+        if not run_remote(self.log, self.hostlist_clients, cmd, timeout=300).passed:
+            self.fail("Failed to create data in path")
 
     def compare_data(self, path1, path2, cmp_filetype=True,
                      cmp_perms=True, cmp_owner=True, cmp_times=False,
@@ -190,11 +192,9 @@ def compare_data(self, path1, path2, cmp_filetype=True,
                     field_printf, entry2)
                 diff_cmd = "diff <({} 2>&1) <({} 2>&1)".format(
                     stat_cmd1, stat_cmd2)
-                result = self.execute_cmd(diff_cmd, fail_on_err=False)
-                if 0 not in result or len(result) > 1:
-                    hosts = [str(nodes) for code, nodes in list(result.items()) if code != 0]
-                    raise CommandFailure(
-                        "Command to check files failed '{}' on {}".format(diff_cmd, hosts))
+                result = run_remote(self.log, self.hostlist_clients, diff_cmd, timeout=300)
+                if not result.passed or not result.homogeneous:
+                    self.fail(f"Unexpected diff between {entry1} and {entry2}")
 
             if cmp_xattr:
                 # Use getfattr to get the xattrs
@@ -202,13 +202,6 @@ def compare_data(self, path1, path2, cmp_filetype=True,
                 xattr_cmd2 = "getfattr -d -h '{}'".format(entry2)
                 diff_cmd = "diff -I '^#' <({} 2>&1) <({} 2>&1)".format(
                     xattr_cmd1, xattr_cmd2)
-                self.execute_cmd(diff_cmd)
-
-    def execute_cmd_list(self, cmd_list):
-        """Execute a list of commands, separated by &&.
-
-        Args:
-            cmd_list (list): A list of commands to execute.
-        """
-        cmd = " &&\n".join(cmd_list)
-        self.execute_cmd(cmd)
+                result = run_remote(self.log, self.hostlist_clients, diff_cmd, timeout=300)
+                if not result.passed or not result.homogeneous:
+                    self.fail(f"Unexpected diff between {entry1} and {entry2}")
diff --git a/src/tests/ftest/datamover/posix_symlinks.py b/src/tests/ftest/datamover/posix_symlinks.py
index 68d60e4c973..2c4e4f28aff 100644
--- a/src/tests/ftest/datamover/posix_symlinks.py
+++ b/src/tests/ftest/datamover/posix_symlinks.py
@@ -1,5 +1,5 @@
 '''
-  (C) Copyright 2020-2023 Intel Corporation.
+  (C) Copyright 2020-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 '''
@@ -7,6 +7,7 @@
 
 from data_mover_test_base import DataMoverTestBase
 from dfuse_utils import get_dfuse, start_dfuse
+from run_utils import run_remote
 
 
 class DmvrPosixSymlinks(DataMoverTestBase):
@@ -119,8 +120,9 @@ def run_dm_posix_symlinks_fun(self, pool, cont, link_fun, link_desc):
         if do_deref:
             # Use POSIX cp to create a baseline for dereferencing
             deref_baseline_path = join(posix_test_path, "baseline_" + link_desc)
-            self.execute_cmd("cp -r --dereference '{}' '{}'".format(
-                src_posix_path, deref_baseline_path))
+            cp_cmd = f"cp -r --dereference '{src_posix_path}' '{deref_baseline_path}'"
+            if not run_remote(self.log, self.hostlist_clients, cp_cmd, timeout=300).passed:
+                self.fail("Failed to create dereference baseline")
             diff_src = deref_baseline_path
         else:
             # Just compare against the original
@@ -195,7 +197,9 @@ def create_links_forward(self, path):
 
             "popd"
         ]
-        self.execute_cmd_list(cmd_list)
+        cmd = " &&\n".join(cmd_list)
+        if not run_remote(self.log, self.hostlist_clients, cmd, timeout=300).passed:
+            self.fail(f"Failed to create forward symlinks in {path}")
 
     def create_links_backward(self, path):
         """
@@ -225,7 +229,9 @@ def create_links_backward(self, path):
 
             "popd"
         ]
-        self.execute_cmd_list(cmd_list)
+        cmd = " &&\n".join(cmd_list)
+        if not run_remote(self.log, self.hostlist_clients, cmd, timeout=300).passed:
+            self.fail(f"Failed to create backward symlinks in {path}")
 
     def create_links_mixed(self, path):
         """
@@ -256,12 +262,6 @@ def create_links_mixed(self, path):
 
             "popd"
         ]
-        self.execute_cmd_list(cmd_list)
-
-    def execute_cmd_list(self, cmd_list):
-        """Execute a list of commands, separated by &&.
-        Args:
-            cmd_list (list): A list of commands to execute.
-        """
         cmd = " &&\n".join(cmd_list)
-        self.execute_cmd(cmd)
+        if not run_remote(self.log, self.hostlist_clients, cmd, timeout=300).passed:
+            self.fail(f"Failed to create mixed symlinks in {path}")
diff --git a/src/tests/ftest/deployment/critical_integration.py b/src/tests/ftest/deployment/critical_integration.py
index c8b9b296f3c..d1cf28ab555 100644
--- a/src/tests/ftest/deployment/critical_integration.py
+++ b/src/tests/ftest/deployment/critical_integration.py
@@ -11,7 +11,7 @@
 from ClusterShell.NodeSet import NodeSet
 from exception_utils import CommandFailure
 from general_utils import DaosTestError, get_journalctl, journalctl_time, run_command
-from ior_test_base import IorTestBase
+from run_utils import run_remote
 
 # pylint: disable-next=fixme
 # TODO Provision all daos nodes using provisioning tool provided by HPCM
@@ -67,7 +67,8 @@ def test_passwdlessssh_versioncheck(self):
                 daos_server_version_list.append(out['response']['version'])
                 if check_remote_root_access:
                     run_command(remote_root_access)
-                IorTestBase._execute_command(self, command_for_inter_node, hosts=[host])
+                if not run_remote(self.log, NodeSet(host), command_for_inter_node).passed:
+                    self.fail(f"Inter-node clush failed on {host}")
             except (DaosTestError, CommandFailure, KeyError) as error:
                 self.log.error("Error: %s", error)
                 failed_nodes.add(host)
diff --git a/src/tests/ftest/dfuse/sparse_file.py b/src/tests/ftest/dfuse/sparse_file.py
index ef31c3816e6..484b787c38c 100644
--- a/src/tests/ftest/dfuse/sparse_file.py
+++ b/src/tests/ftest/dfuse/sparse_file.py
@@ -1,5 +1,5 @@
 """
-  (C) Copyright 2020-2023 Intel Corporation.
+  (C) Copyright 2020-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
@@ -11,6 +11,7 @@
 from dfuse_utils import get_dfuse, start_dfuse
 from general_utils import get_remote_file_size
 from ior_test_base import IorTestBase
+from run_utils import run_remote
 
 
 class SparseFile(IorTestBase):
@@ -60,7 +61,8 @@ def test_sparsefile(self):
         # create large file and perform write to it so that if goes out of
         # space.
         sparse_file = os.path.join(dfuse.mount_dir.value, 'sparsefile.txt')
-        self.execute_cmd("touch {}".format(sparse_file))
+        if not run_remote(self.log, self.hostlist_clients, f"touch {sparse_file}").passed:
+            self.fail(f"Failed to create {sparse_file}")
         self.log.info("File size (in bytes) before truncate: %s",
                       get_remote_file_size(self.hostlist_clients[0], sparse_file))
 
@@ -84,7 +86,8 @@ def test_sparsefile(self):
 
         # write to the first byte of the file with char 'A'
         dd_first_byte = "echo 'A' | dd conv=notrunc of={} bs=1 count=1".format(sparse_file)
-        self.execute_cmd(dd_first_byte)
+        if not run_remote(self.log, self.hostlist_clients, dd_first_byte).passed:
+            self.fail(f"Failed to create first byte in {sparse_file}")
         fsize_write_1stbyte = get_remote_file_size(self.hostlist_clients[0], sparse_file)
         self.log.info("File size (in bytes) after writing first byte: %s", fsize_write_1stbyte)
         # verify file did not got overwritten after dd write.
@@ -93,7 +96,8 @@ def test_sparsefile(self):
         # write to the 1024th byte position of the file
         dd_1024_byte = "echo 'A' | dd conv=notrunc of={} obs=1 seek=1023 bs=1 count=1".format(
             sparse_file)
-        self.execute_cmd(dd_1024_byte)
+        if not run_remote(self.log, self.hostlist_clients, dd_1024_byte).passed:
+            self.fail(f"Failed to create 1024th byte in {sparse_file}")
         fsize_write_1024thwrite = get_remote_file_size(self.hostlist_clients[0], sparse_file)
         self.log.info("File size (in bytes) after writing 1024th byte: %s", fsize_write_1024thwrite)
         # verify file did not got overwritten after dd write.
@@ -110,13 +114,13 @@ def test_sparsefile(self):
         # check the middle 1022 bytes if they are filled with zeros
         middle_1022_bytes = "cmp --ignore-initial=1 --bytes=1022 {} {}".format(
             sparse_file, "/dev/zero")
-        self.execute_cmd(middle_1022_bytes)
+        if not run_remote(self.log, self.hostlist_clients, middle_1022_bytes).passed:
+            self.fail(f"Unexpected bytes in {sparse_file}")
 
         # read last 512 bytes which should be zeros till end of file.
         ignore_bytes = self.space_before - 512
         read_till_eof = "cmp --ignore-initial={} {} {}".format(
             ignore_bytes, sparse_file, "/dev/zero")
-        # self.execute_cmd(read_till_eof, False)
         # fail the test if the above command is successful.
-        if 0 in self.execute_cmd(read_till_eof, False):
+        if run_remote(self.log, self.hostlist_clients, read_till_eof).passed:
             self.fail("read_till_eof command was supposed to fail. But it completed successfully.")
diff --git a/src/tests/ftest/util/data_mover_test_base.py b/src/tests/ftest/util/data_mover_test_base.py
index 18ab4e33b48..01e9f9e8b3a 100644
--- a/src/tests/ftest/util/data_mover_test_base.py
+++ b/src/tests/ftest/util/data_mover_test_base.py
@@ -817,7 +817,8 @@ def run_diff(self, src, dst, deref=False):
 
         cmd = "diff -r {} '{}' '{}'".format(
             deref_str, src, dst)
-        self.execute_cmd(cmd)
+        if not run_remote(self.log, self.hostlist_clients, cmd, timeout=300).passed:
+            self.fail(f"Unexpected diff between {src} and {dst}")
 
     # pylint: disable=too-many-arguments
     def run_datamover(self, test_desc=None,
diff --git a/src/tests/ftest/util/ior_test_base.py b/src/tests/ftest/util/ior_test_base.py
index 8f056c34002..7a7955d78a5 100644
--- a/src/tests/ftest/util/ior_test_base.py
+++ b/src/tests/ftest/util/ior_test_base.py
@@ -6,10 +6,9 @@
 import os
 
 from apricot import TestWithServers
-from ClusterShell.NodeSet import NodeSet
 from dfuse_utils import get_dfuse, start_dfuse
 from exception_utils import CommandFailure
-from general_utils import get_random_string, pcmd
+from general_utils import get_random_string
 from host_utils import get_local_host
 from ior_utils import IorCommand
 from job_manager_utils import get_job_manager
@@ -369,59 +368,3 @@ def verify_pool_size(self, original_pool_info, processes):
             self.fail(
                 "Pool Free Size did not match: actual={}, expected={}".format(
                     actual_pool_size, expected_pool_size))
-
-    def execute_cmd(self, command, fail_on_err=True, display_output=True):
-        """Execute cmd using general_utils.pcmd.
-
-        Args:
-            command (str): the command to execute on the client hosts
-            fail_on_err (bool, optional): whether or not to fail the test if
-                command returns a non zero return code. Defaults to True.
-            display_output (bool, optional): whether or not to display output.
-                Defaults to True.
-
-        Returns:
-            dict: a dictionary of return codes keys and accompanying NodeSet
-                values indicating which hosts yielded the return code.
-
-        """
-        try:
-            # Execute the bash command on each client host
-            result = self._execute_command(command, fail_on_err, display_output)
-
-        except CommandFailure as error:
-            # Report an error if any command fails
-            self.log.error("Failed to execute command: %s", str(error))
-            self.fail("Failed to execute command")
-
-        return result
-
-    def _execute_command(self, command, fail_on_err=True, display_output=True, hosts=None):
-        """Execute the command on all client hosts.
-
-        Optionally verify if the command returns a non zero return code.
-
-        Args:
-            command (str): the command to execute on the client hosts
-            fail_on_err (bool, optional): whether or not to fail the test if
-                command returns a non zero return code. Defaults to True.
-            display_output (bool, optional): whether or not to display output.
-                Defaults to True.
-
-        Raises:
-            CommandFailure: if 'fail_on_err' is set and the command fails on at
-                least one of the client hosts
-
-        Returns:
-            dict: a dictionary of return codes keys and accompanying NodeSet
-                values indicating which hosts yielded the return code.
-
-        """
-        if hosts is None:
-            hosts = self.hostlist_clients
-        result = pcmd(hosts, command, verbose=display_output, timeout=300)
-        if (0 not in result or len(result) > 1) and fail_on_err:
-            hosts = [str(nodes) for code, nodes in list(result.items()) if code != 0]
-            raise CommandFailure("Error running '{}' on the following hosts: {}".format(
-                command, NodeSet(",".join(hosts))))
-        return result

From 0e52fa5e6827cb0c0cbd90580c8f1755ade2eb66 Mon Sep 17 00:00:00 2001
From: Nasf-Fan <fan.yong@intel.com>
Date: Fri, 6 Sep 2024 11:16:27 +0800
Subject: [PATCH 18/21] DAOS-16458 object: fix invalid DRAM access in
 obj_bulk_transfer (#15026)

For EC object update via CPD RPC, when calculate the bitmap to skip
some iods for current EC data shard, we may input NULL for "*skips"
parameter. It may cause the old logic in obj_get_iods_offs_by_oid()
to generate some undefined DRAM for "skips" bitmap. Such bitmap may
be over-written by others, as to subsequent obj_bulk_transfer() may
be misguided.

The patch also fixes a bug inside obj_bulk_transfer() that cast any
input RPC as UPDATE/FETCH by force.

Signed-off-by: Fan Yong <fan.yong@intel.com>
---
 src/object/srv_coll.c     |  2 +-
 src/object/srv_internal.h |  2 +-
 src/object/srv_obj.c      | 41 ++++++++++++++++++++++++---------------
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/src/object/srv_coll.c b/src/object/srv_coll.c
index 9e421810861..a63a11d574b 100644
--- a/src/object/srv_coll.c
+++ b/src/object/srv_coll.c
@@ -183,7 +183,7 @@ obj_coll_punch_bulk(crt_rpc_t *rpc, d_iov_t *iov, crt_proc_t *p_proc,
 	sgl.sg_iovs = iov;
 
 	rc = obj_bulk_transfer(rpc, CRT_BULK_GET, false, &ocpi->ocpi_tgt_bulk, NULL, NULL,
-			       DAOS_HDL_INVAL, &sgls, 1, NULL, NULL);
+			       DAOS_HDL_INVAL, &sgls, 1, 1, NULL, NULL);
 	if (rc != 0) {
 		D_ERROR("Failed to prepare bulk transfer for coll_punch, size %u: "DF_RC"\n",
 			ocpi->ocpi_bulk_tgt_sz, DP_RC(rc));
diff --git a/src/object/srv_internal.h b/src/object/srv_internal.h
index 6f13e3f36dc..a24986247a5 100644
--- a/src/object/srv_internal.h
+++ b/src/object/srv_internal.h
@@ -280,7 +280,7 @@ typedef int (*ds_iofw_cb_t)(crt_rpc_t *req, void *arg);
 
 int obj_bulk_transfer(crt_rpc_t *rpc, crt_bulk_op_t bulk_op, bool bulk_bind,
 		      crt_bulk_t *remote_bulks, uint64_t *remote_offs, uint8_t *skips,
-		      daos_handle_t ioh, d_sg_list_t **sgls, int sgl_nr,
+		      daos_handle_t ioh, d_sg_list_t **sgls, int sgl_nr, int bulk_nr,
 		      struct obj_bulk_args *p_arg, struct ds_cont_hdl *coh);
 int obj_tgt_punch(struct obj_tgt_punch_args *otpa, uint32_t *shards, uint32_t count);
 int obj_tgt_query(struct obj_tgt_query_args *otqa, uuid_t po_uuid, uuid_t co_hdl, uuid_t co_uuid,
diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c
index 0a246bebdca..febd3d36ead 100644
--- a/src/object/srv_obj.c
+++ b/src/object/srv_obj.c
@@ -488,22 +488,24 @@ bulk_transfer_sgl(daos_handle_t ioh, crt_rpc_t *rpc, crt_bulk_t remote_bulk,
 int
 obj_bulk_transfer(crt_rpc_t *rpc, crt_bulk_op_t bulk_op, bool bulk_bind, crt_bulk_t *remote_bulks,
 		  uint64_t *remote_offs, uint8_t *skips, daos_handle_t ioh, d_sg_list_t **sgls,
-		  int sgl_nr, struct obj_bulk_args *p_arg, struct ds_cont_hdl *coh)
+		  int sgl_nr, int bulk_nr, struct obj_bulk_args *p_arg, struct ds_cont_hdl *coh)
 {
-	struct obj_rw_in	*orw = crt_req_get(rpc);
 	struct obj_bulk_args	arg = { 0 };
 	int			i, rc, *status, ret;
 	int			skip_nr = 0;
-	int			bulk_nr;
 	bool			async = true;
 	uint64_t		time = daos_get_ntime();
 
+	if (unlikely(sgl_nr > bulk_nr)) {
+		D_ERROR("Invalid sgl_nr vs bulk_nr: %d/%d\n", sgl_nr, bulk_nr);
+		return -DER_INVAL;
+	}
+
 	if (remote_bulks == NULL) {
 		D_ERROR("No remote bulks provided\n");
 		return -DER_INVAL;
 	}
 
-	bulk_nr = orw->orw_bulks.ca_count;
 	if (p_arg == NULL) {
 		p_arg = &arg;
 		async = false;
@@ -514,7 +516,7 @@ obj_bulk_transfer(crt_rpc_t *rpc, crt_bulk_op_t bulk_op, bool bulk_bind, crt_bul
 		return dss_abterr2der(rc);
 
 	p_arg->inited = true;
-	D_DEBUG(DB_IO, "bulk_op %d sgl_nr %d\n", bulk_op, sgl_nr);
+	D_DEBUG(DB_IO, "bulk_op %d, sgl_nr %d, bulk_nr %d\n", bulk_op, sgl_nr, bulk_nr);
 
 	p_arg->bulks_inflight++;
 
@@ -542,9 +544,9 @@ obj_bulk_transfer(crt_rpc_t *rpc, crt_bulk_op_t bulk_op, bool bulk_bind, crt_bul
 		while (skips != NULL && isset(skips, i + skip_nr))
 			skip_nr++;
 
-		if (bulk_nr > 0)
-			D_ASSERTF(i + skip_nr < bulk_nr, "i %d, skip_nr %d, bulk_nr %d\n",
-				  i, skip_nr, bulk_nr);
+		D_ASSERTF(i + skip_nr < bulk_nr, "i %d, skip_nr %d, sgl_nr %d, bulk_nr %d\n",
+			  i, skip_nr, sgl_nr, bulk_nr);
+
 		if (remote_bulks[i + skip_nr] == NULL)
 			continue;
 
@@ -574,6 +576,12 @@ obj_bulk_transfer(crt_rpc_t *rpc, crt_bulk_op_t bulk_op, bool bulk_bind, crt_bul
 			break;
 		}
 	}
+
+	if (skips != NULL)
+		D_ASSERTF(skip_nr + sgl_nr <= bulk_nr,
+			  "Unmatched skip_nr %d, sgl_nr %d, bulk_nr %d\n",
+			  skip_nr, sgl_nr, bulk_nr);
+
 done:
 	if (--(p_arg->bulks_inflight) == 0)
 		ABT_eventual_set(p_arg->eventual, &rc, sizeof(rc));
@@ -836,7 +844,7 @@ obj_echo_rw(crt_rpc_t *rpc, daos_iod_t *iod, uint64_t *off)
 	/* Only support 1 iod now */
 	bulk_bind = orw->orw_flags & ORF_BULK_BIND;
 	rc = obj_bulk_transfer(rpc, bulk_op, bulk_bind, orw->orw_bulks.ca_arrays, off,
-			       NULL, DAOS_HDL_INVAL, &p_sgl, 1, NULL, NULL);
+			       NULL, DAOS_HDL_INVAL, &p_sgl, 1, 1, NULL, NULL);
 out:
 	orwo->orw_ret = rc;
 	orwo->orw_map_version = orw->orw_map_ver;
@@ -1636,7 +1644,8 @@ obj_local_rw_internal(crt_rpc_t *rpc, struct obj_io_context *ioc, daos_iod_t *io
 	if (rma) {
 		bulk_bind = orw->orw_flags & ORF_BULK_BIND;
 		rc = obj_bulk_transfer(rpc, bulk_op, bulk_bind, orw->orw_bulks.ca_arrays, offs,
-				       skips, ioh, NULL, iods_nr, NULL, ioc->ioc_coh);
+				       skips, ioh, NULL, iods_nr, orw->orw_bulks.ca_count, NULL,
+				       ioc->ioc_coh);
 		if (rc == 0) {
 			bio_iod_flush(biod);
 
@@ -1809,7 +1818,7 @@ obj_get_iods_offs_by_oid(daos_unit_oid_t uoid, struct obj_iod_array *iod_array,
 		}
 	}
 	if (oiod_nr > LOCAL_SKIP_BITS_NUM || *skips == NULL) {
-		D_ALLOC(*skips, roundup(oiod_nr / NBBY, 4));
+		D_ALLOC(*skips, (oiod_nr + NBBY - 1) / NBBY);
 		if (*skips == NULL)
 			D_GOTO(out, rc = -DER_NOMEM);
 	}
@@ -2448,7 +2457,7 @@ ds_obj_ec_rep_handler(crt_rpc_t *rpc)
 		goto end;
 	}
 	rc = obj_bulk_transfer(rpc, CRT_BULK_GET, false, &oer->er_bulk, NULL, NULL,
-			       ioh, NULL, 1, NULL, ioc.ioc_coh);
+			       ioh, NULL, 1, 1, NULL, ioc.ioc_coh);
 	if (rc)
 		D_ERROR(DF_UOID " bulk transfer failed: " DF_RC "\n", DP_UOID(oer->er_oid),
 			DP_RC(rc));
@@ -2526,7 +2535,7 @@ ds_obj_ec_agg_handler(crt_rpc_t *rpc)
 			goto end;
 		}
 		rc = obj_bulk_transfer(rpc, CRT_BULK_GET, false, &oea->ea_bulk,
-				       NULL, NULL, ioh, NULL, 1, NULL, ioc.ioc_coh);
+				       NULL, NULL, ioh, NULL, 1, 1, NULL, ioc.ioc_coh);
 		if (rc)
 			D_ERROR(DF_UOID " bulk transfer failed: " DF_RC "\n", DP_UOID(oea->ea_oid),
 				DP_RC(rc));
@@ -3275,7 +3284,7 @@ obj_enum_reply_bulk(crt_rpc_t *rpc)
 		return 0;
 
 	rc = obj_bulk_transfer(rpc, CRT_BULK_PUT, false, bulks, NULL, NULL,
-			       DAOS_HDL_INVAL, sgls, idx, NULL, NULL);
+			       DAOS_HDL_INVAL, sgls, idx, idx, NULL, NULL);
 	if (oei->oei_kds_bulk) {
 		D_FREE(oeo->oeo_kds.ca_arrays);
 		oeo->oeo_kds.ca_count = 0;
@@ -4560,7 +4569,7 @@ ds_cpd_handle_one(crt_rpc_t *rpc, struct daos_cpd_sub_head *dcsh, struct daos_cp
 
 			rc = obj_bulk_transfer(rpc, CRT_BULK_GET, dcu->dcu_flags & ORF_BULK_BIND,
 					       dcu->dcu_bulks, poffs[i], pskips[i], iohs[i], NULL,
-					       piod_nrs[i], &bulks[i], ioc->ioc_coh);
+					       piod_nrs[i], dcsr->dcsr_nr, &bulks[i], ioc->ioc_coh);
 			if (rc != 0) {
 				D_ERROR("Bulk transfer failed for obj "
 					DF_UOID", DTX "DF_DTI": "DF_RC"\n",
@@ -5276,7 +5285,7 @@ ds_obj_cpd_body_bulk(crt_rpc_t *rpc, struct obj_io_context *ioc, bool leader,
 	}
 
 	rc = obj_bulk_transfer(rpc, CRT_BULK_GET, ORF_BULK_BIND, bulks, NULL, NULL,
-			       DAOS_HDL_INVAL, sgls, count, NULL, ioc->ioc_coh);
+			       DAOS_HDL_INVAL, sgls, count, count, NULL, ioc->ioc_coh);
 	if (rc != 0)
 		goto out;
 

From 13532841caf7c2a8ac8ddfef72fc5e3fd35494ae Mon Sep 17 00:00:00 2001
From: Niu Yawei <yawei.niu@intel.com>
Date: Fri, 6 Sep 2024 16:52:18 +0800
Subject: [PATCH 19/21] DAOS-16486 object: return proper error on stale pool
 map (#15064)

Client with stale pool map may try to send RPC to a DOWN target, if the
target was brought DOWN due to faulty NVMe device, the ds_pool_child could
have been stopped on the NVMe faulty reaction, We'd ensure proper error
code is returned for such case.

Signed-off-by: Niu Yawei <yawei.niu@intel.com>
---
 src/dtx/tests/srv_mock.c    |  7 +++++++
 src/include/daos_srv/pool.h |  2 ++
 src/object/srv_obj.c        | 31 ++++++++++++++++++++++++++++++-
 src/pool/srv_target.c       | 15 +++++++++++++++
 4 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/src/dtx/tests/srv_mock.c b/src/dtx/tests/srv_mock.c
index 245b3b11513..3d4ac70d773 100644
--- a/src/dtx/tests/srv_mock.c
+++ b/src/dtx/tests/srv_mock.c
@@ -71,6 +71,13 @@ ds_pool_child_put(struct ds_pool_child *child)
 	assert_true(false);
 }
 
+struct ds_pool_child *
+ds_pool_child_find(const uuid_t uuid)
+{
+	assert_true(false);
+	return NULL;
+}
+
 struct ds_pool_child *
 ds_pool_child_lookup(const uuid_t uuid)
 {
diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h
index 777c5c25e46..8931f3a605e 100644
--- a/src/include/daos_srv/pool.h
+++ b/src/include/daos_srv/pool.h
@@ -249,6 +249,8 @@ ds_pool_svc_ops_save(struct rdb_tx *tx, void *pool_svc, uuid_t pool_uuid, uuid_t
 		     uint64_t cli_time, bool dup_op, int rc_in, struct ds_pool_svc_op_val *op_valp);
 
 /* Find ds_pool_child in cache, hold one reference */
+struct ds_pool_child *ds_pool_child_find(const uuid_t uuid);
+/* Find ds_pool_child in STARTING or STARTED state, hold one reference */
 struct ds_pool_child *ds_pool_child_lookup(const uuid_t uuid);
 /* Put the reference held by ds_pool_child_lookup() */
 void ds_pool_child_put(struct ds_pool_child *child);
diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c
index febd3d36ead..a51682b4785 100644
--- a/src/object/srv_obj.c
+++ b/src/object/srv_obj.c
@@ -2170,8 +2170,37 @@ obj_ioc_begin_lite(uint32_t rpc_map_ver, uuid_t pool_uuid,
 	int			rc;
 
 	rc = obj_ioc_init(pool_uuid, coh_uuid, cont_uuid, rpc, ioc);
-	if (rc)
+	if (rc) {
+		DL_ERROR(rc, "Failed to initialize object I/O context.");
+
+		/*
+		 * Client with stale pool map may try to send RPC to a DOWN target, if the
+		 * target was brought DOWN due to faulty NVMe device, the ds_pool_child could
+		 * have been stopped on the NVMe faulty reaction, then above obj_io_init()
+		 * will fail with -DER_NO_HDL.
+		 *
+		 * We'd ensure proper error code is returned for such case.
+		 */
+		poc = ds_pool_child_find(pool_uuid);
+		if (poc == NULL) {
+			D_ERROR("Failed to find pool:"DF_UUID"\n", DP_UUID(pool_uuid));
+			return rc;
+		}
+
+		if (rpc_map_ver < poc->spc_pool->sp_map_version) {
+			D_ERROR("Stale pool map version %u < %u from client.\n",
+				rpc_map_ver, poc->spc_pool->sp_map_version);
+
+			/* Restart the DTX if using stale pool map */
+			if (opc_get(rpc->cr_opc) == DAOS_OBJ_RPC_CPD)
+				rc = -DER_TX_RESTART;
+			else
+				rc = -DER_STALE;
+		}
+
+		ds_pool_child_put(poc);
 		return rc;
+	}
 
 	poc = ioc->ioc_coc->sc_pool;
 	D_ASSERT(poc != NULL);
diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c
index cdec648f8d3..a917a34007e 100644
--- a/src/pool/srv_target.c
+++ b/src/pool/srv_target.c
@@ -88,6 +88,21 @@ pool_child_lookup_noref(const uuid_t uuid)
 	return NULL;
 }
 
+struct ds_pool_child *
+ds_pool_child_find(const uuid_t uuid)
+{
+	struct ds_pool_child	*child;
+
+	child = pool_child_lookup_noref(uuid);
+	if (child == NULL) {
+		D_ERROR(DF_UUID": Pool child isn't found.\n", DP_UUID(uuid));
+		return child;
+	}
+
+	child->spc_ref++;
+	return child;
+}
+
 struct ds_pool_child *
 ds_pool_child_lookup(const uuid_t uuid)
 {

From bb1b7c855f9c39c639ae62272cde6e7cbf04aa13 Mon Sep 17 00:00:00 2001
From: Niu Yawei <yawei.niu@intel.com>
Date: Fri, 6 Sep 2024 20:24:41 +0800
Subject: [PATCH 20/21] DAOS-16514 vos: fix coverity issue (#15083)

Fix coverity 2555843 explict null dereferenced.

Signed-off-by: Niu Yawei <yawei.niu@intel.com>
---
 src/vos/vos_obj.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/vos/vos_obj.c b/src/vos/vos_obj.c
index b4e772b1e29..77cb041711f 100644
--- a/src/vos/vos_obj.c
+++ b/src/vos/vos_obj.c
@@ -1701,7 +1701,8 @@ vos_obj_iter_prep(vos_iter_type_t type, vos_iter_param_t *param,
 		return -DER_NOMEM;
 
 	/* ip_hdl is dkey or akey tree open handle for vos_iterate_key() */
-	if (!(param->ip_flags & VOS_IT_KEY_TREE)) {
+	if (param->ip_flags != VOS_IT_KEY_TREE) {
+		D_ASSERT(!(param->ip_flags & VOS_IT_KEY_TREE));
 		cont = vos_hdl2cont(param->ip_hdl);
 		is_sysdb = cont->vc_pool->vp_sysdb;
 		dth = vos_dth_get(is_sysdb);

From 6a59b265794a0a3b727e37420498a71f0898369f Mon Sep 17 00:00:00 2001
From: Liu Xuezhao <xuezhao.liu@intel.com>
Date: Fri, 6 Sep 2024 23:36:58 +0800
Subject: [PATCH 21/21] DAOS-16467 rebuild: add DAOS_POOL_RF ENV for massive
 failure case (#15037)

* DAOS-16467 rebuild: add DAOS_PW_RF ENV for massive failure case

Allow user to set DAOS_PW_RF as pw_rf (pool wise RF).
If SWIM detected engine failure is going to break pw_rf, don't change
pool map, also don't trigger rebuild.
With critical log message to ask administrator to bring back those
engines in top priority (just "system start --ranks=xxx", need not to
reintegrate those engines).

a few functions renamed to avoid confuse -
pool_map_find_nodes() -> pool_map_find_ranks()
pool_map_find_node_by_rank() -> pool_map_find_dom_by_rank()
pool_map_node_nr() -> pool_map_rank_nr()

Signed-off-by: Xuezhao Liu <xuezhao.liu@intel.com>
---
 docs/admin/env_variables.md                 |   1 +
 docs/admin/pool_operations.md               |  24 +++++
 src/chk/chk_engine.c                        |   4 +-
 src/common/pool_map.c                       |  18 ++--
 src/container/cli.c                         |   2 +-
 src/container/srv_container.c               |  15 ++-
 src/dtx/dtx_coll.c                          |  14 +--
 src/include/daos/pool_map.h                 |   8 +-
 src/include/daos_prop.h                     |  11 ++-
 src/object/cli_coll.c                       |   2 +-
 src/object/srv_coll.c                       |   2 +-
 src/pool/cli.c                              |   4 +-
 src/pool/rpc.h                              |  10 ++
 src/pool/srv.c                              |  16 ++-
 src/pool/srv_internal.h                     |   2 +
 src/pool/srv_pool.c                         | 102 +++++++++++++++++---
 src/pool/srv_pool_map.c                     |   2 +-
 src/pool/srv_util.c                         |  14 +--
 src/rebuild/srv.c                           |  18 ++--
 src/tests/ftest/util/server_utils_params.py |   1 +
 20 files changed, 196 insertions(+), 74 deletions(-)

diff --git a/docs/admin/env_variables.md b/docs/admin/env_variables.md
index 2f5c2053683..060c3790d57 100644
--- a/docs/admin/env_variables.md
+++ b/docs/admin/env_variables.md
@@ -53,6 +53,7 @@ Environment variables in this section only apply to the server side.
 |DAOS\_DTX\_RPC\_HELPER\_THD|DTX RPC helper threshold. The valid range is [18, unlimited). The default value is 513.|
 |DAOS\_DTX\_BATCHED\_ULT\_MAX|The max count of DTX batched commit ULTs. The valid range is [0, unlimited). 0 means to commit DTX synchronously. The default value is 32.|
 |DAOS\_FORWARD\_NEIGHBOR|Set to enable I/O forwarding on neighbor xstream in the absence of helper threads.|
+|DAOS\_POOL\_RF|Redundancy factor for the pool. The valid range is [1, 4]. The default value is 2.|
 
 ## Server and Client environment variables
 
diff --git a/docs/admin/pool_operations.md b/docs/admin/pool_operations.md
index 388a81d8700..36907a2e31f 100644
--- a/docs/admin/pool_operations.md
+++ b/docs/admin/pool_operations.md
@@ -916,6 +916,30 @@ and possibly repair a pmemobj file. As discussed in the previous section, the
 rebuild status can be consulted via the pool query and will be expanded
 with more information.
 
+## Pool Redundancy Factor
+
+If the DAOS system experiences cascading failures, where the number of failed
+fault domains exceeds a pool's redundancy factor, there could be unrecoverable
+errors and applications could suffer from data loss. This can happen in cases
+of power or network outages and would cause node/engine failures. In most cases
+those failures can be recovered and DAOS engines can be restarted and the system
+can function again.
+
+Administrator can set the default pool redundancy factor by environment variable
+"DAOS_POOL_RF" in the server yaml file. If SWIM detects and reports an engine is
+dead and the number of failed fault domain exceeds or is going to exceed the pool
+redundancy factor, it will not change pool map immediately. Instead, it will give
+critical log message:
+intolerable unavailability: engine rank x
+In this case, the system administrator should check and try to recover those
+failed engines and bring them back with:
+dmg system start --ranks=x
+one by one. A reintegrate call is not needed.
+
+For true unrecoverable failures, the administrator can still exclude engines.
+However, data loss is expected as the number of unrecoverable failures exceeds
+the pool redundancy factor.
+
 ## Recovering Container Ownership
 
 Typically users are expected to manage their containers. However, in the event
diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c
index 9113ca22531..56e6da3ad9b 100644
--- a/src/chk/chk_engine.c
+++ b/src/chk/chk_engine.c
@@ -668,7 +668,7 @@ chk_engine_pool_mbs_one(struct chk_pool_rec *cpr, struct pool_map *map, struct c
 	int			 rc = 0;
 	bool			 unknown;
 
-	dom = pool_map_find_node_by_rank(map, mbs->cpm_rank);
+	dom = pool_map_find_dom_by_rank(map, mbs->cpm_rank);
 	if (dom == NULL) {
 		D_ASSERT(mbs->cpm_rank != dss_self_rank());
 
@@ -777,7 +777,7 @@ chk_engine_find_dangling_pm(struct chk_pool_rec *cpr, struct pool_map *map)
 	int			 j;
 	bool			 down;
 
-	rank_nr = pool_map_find_nodes(map, PO_COMP_ID_ALL, &doms);
+	rank_nr = pool_map_find_ranks(map, PO_COMP_ID_ALL, &doms);
 	if (rank_nr <= 0)
 		D_GOTO(out, rc = rank_nr);
 
diff --git a/src/common/pool_map.c b/src/common/pool_map.c
index 7d7b38adb6c..1712f398dcb 100644
--- a/src/common/pool_map.c
+++ b/src/common/pool_map.c
@@ -1573,7 +1573,7 @@ add_domain_tree_to_pool_buf(struct pool_map *map, struct pool_buf *map_buf,
 			if (map) {
 				struct pool_domain *found_dom;
 
-				found_dom = pool_map_find_node_by_rank(map, rank);
+				found_dom = pool_map_find_dom_by_rank(map, rank);
 				if (found_dom) {
 					if (found_dom->do_comp.co_status == PO_COMP_ST_NEW)
 						found_new_dom = true;
@@ -2038,7 +2038,7 @@ pool_map_find_domain(struct pool_map *map, pool_comp_type_t type, uint32_t id,
 }
 
 /**
- * Find all nodes in the pool map.
+ * Find all ranks in the pool map.
  *
  * \param map	[IN]	pool map to search.
  * \param id	[IN]	id to search.
@@ -2048,7 +2048,7 @@ pool_map_find_domain(struct pool_map *map, pool_comp_type_t type, uint32_t id,
  *                      0 if none.
  */
 int
-pool_map_find_nodes(struct pool_map *map, uint32_t id,
+pool_map_find_ranks(struct pool_map *map, uint32_t id,
 		    struct pool_domain **domain_pp)
 {
 	return pool_map_find_domain(map, PO_COMP_TP_RANK, id,
@@ -2102,14 +2102,14 @@ pool_map_find_target(struct pool_map *map, uint32_t id,
  * \return              domain found by rank.
  */
 struct pool_domain *
-pool_map_find_node_by_rank(struct pool_map *map, uint32_t rank)
+pool_map_find_dom_by_rank(struct pool_map *map, uint32_t rank)
 {
 	struct pool_domain	*doms;
 	struct pool_domain	*found = NULL;
 	int			doms_cnt;
 	int			i;
 
-	doms_cnt = pool_map_find_nodes(map, PO_COMP_ID_ALL, &doms);
+	doms_cnt = pool_map_find_ranks(map, PO_COMP_ID_ALL, &doms);
 	if (doms_cnt <= 0)
 		return NULL;
 
@@ -2150,7 +2150,7 @@ pool_map_find_targets_on_ranks(struct pool_map *map, d_rank_list_t *rank_list,
 	for (i = 0; i < rank_list->rl_nr; i++) {
 		struct pool_domain *dom;
 
-		dom = pool_map_find_node_by_rank(map, rank_list->rl_ranks[i]);
+		dom = pool_map_find_dom_by_rank(map, rank_list->rl_ranks[i]);
 		if (dom == NULL) {
 			pool_target_id_list_free(tgts);
 			return 0;
@@ -2191,7 +2191,7 @@ pool_map_find_target_by_rank_idx(struct pool_map *map, uint32_t rank,
 {
 	struct pool_domain	*dom;
 
-	dom = pool_map_find_node_by_rank(map, rank);
+	dom = pool_map_find_dom_by_rank(map, rank);
 	if (dom == NULL)
 		return 0;
 
@@ -2867,7 +2867,7 @@ pool_map_find_by_rank_status(struct pool_map *map,
 
 	*tgt_ppp = NULL;
 	*tgt_cnt = 0;
-	dom = pool_map_find_node_by_rank(map, rank);
+	dom = pool_map_find_dom_by_rank(map, rank);
 	if (dom == NULL)
 		return 0;
 
@@ -2902,7 +2902,7 @@ pool_map_get_ranks(uuid_t pool_uuid, struct pool_map *map, bool get_enabled, d_r
 	struct pool_domain	*domains = NULL;
 	d_rank_list_t		*ranklist = NULL;
 
-	nnodes_tot = pool_map_find_nodes(map, PO_COMP_ID_ALL, &domains);
+	nnodes_tot = pool_map_find_ranks(map, PO_COMP_ID_ALL, &domains);
 	for (i = 0; i < nnodes_tot; i++) {
 		if (pool_map_node_status_match(&domains[i], ENABLED))
 			nnodes_enabled++;
diff --git a/src/container/cli.c b/src/container/cli.c
index 590f689333b..cd43667a2a4 100644
--- a/src/container/cli.c
+++ b/src/container/cli.c
@@ -3386,7 +3386,7 @@ dc_cont_node_id2ptr(daos_handle_t coh, uint32_t node_id,
 	pool = dc_hdl2pool(dc->dc_pool_hdl);
 	D_ASSERT(pool != NULL);
 	D_RWLOCK_RDLOCK(&pool->dp_map_lock);
-	n = pool_map_find_nodes(pool->dp_map, node_id, dom);
+	n = pool_map_find_ranks(pool->dp_map, node_id, dom);
 	D_RWLOCK_UNLOCK(&pool->dp_map_lock);
 	dc_pool_put(pool);
 	dc_cont_put(dc);
diff --git a/src/container/srv_container.c b/src/container/srv_container.c
index 9071f8f731c..372da43afe4 100644
--- a/src/container/srv_container.c
+++ b/src/container/srv_container.c
@@ -1667,7 +1667,7 @@ cont_ec_agg_alloc(struct cont_svc *cont_svc, uuid_t cont_uuid,
 {
 	struct cont_ec_agg	*ec_agg = NULL;
 	struct pool_domain	*doms;
-	int			node_nr;
+	int			rank_nr;
 	int			rc = 0;
 	int			i;
 
@@ -1676,19 +1676,18 @@ cont_ec_agg_alloc(struct cont_svc *cont_svc, uuid_t cont_uuid,
 		return -DER_NOMEM;
 
 	D_ASSERT(cont_svc->cs_pool->sp_map != NULL);
-	node_nr = pool_map_find_nodes(cont_svc->cs_pool->sp_map,
-				      PO_COMP_ID_ALL, &doms);
-	if (node_nr < 0)
-		D_GOTO(out, rc = node_nr);
+	rank_nr = pool_map_find_ranks(cont_svc->cs_pool->sp_map, PO_COMP_ID_ALL, &doms);
+	if (rank_nr < 0)
+		D_GOTO(out, rc = rank_nr);
 
-	D_ALLOC_ARRAY(ec_agg->ea_server_ephs, node_nr);
+	D_ALLOC_ARRAY(ec_agg->ea_server_ephs, rank_nr);
 	if (ec_agg->ea_server_ephs == NULL)
 		D_GOTO(out, rc = -DER_NOMEM);
 
 	uuid_copy(ec_agg->ea_cont_uuid, cont_uuid);
-	ec_agg->ea_servers_num = node_nr;
+	ec_agg->ea_servers_num = rank_nr;
 	ec_agg->ea_current_eph = 0;
-	for (i = 0; i < node_nr; i++) {
+	for (i = 0; i < rank_nr; i++) {
 		ec_agg->ea_server_ephs[i].rank = doms[i].do_comp.co_rank;
 		ec_agg->ea_server_ephs[i].eph = 0;
 	}
diff --git a/src/dtx/dtx_coll.c b/src/dtx/dtx_coll.c
index 9623dce4917..863307e9a7f 100644
--- a/src/dtx/dtx_coll.c
+++ b/src/dtx/dtx_coll.c
@@ -112,7 +112,7 @@ dtx_coll_prep(uuid_t po_uuid, daos_unit_oid_t oid, struct dtx_id *xid, struct dt
 	struct dtx_coll_target	*dct;
 	struct dtx_coll_entry	*dce = NULL;
 	struct daos_obj_md	 md = { 0 };
-	uint32_t		 node_nr;
+	uint32_t		 rank_nr;
 	d_rank_t		 my_rank = dss_self_rank();
 	d_rank_t		 max_rank = 0;
 	int			 rc = 0;
@@ -192,19 +192,19 @@ dtx_coll_prep(uuid_t po_uuid, daos_unit_oid_t oid, struct dtx_id *xid, struct dt
 		}
 	}
 
-	node_nr = pool_map_node_nr(map->pl_poolmap);
-	if (unlikely(node_nr == 1))
+	rank_nr = pool_map_rank_nr(map->pl_poolmap);
+	if (unlikely(rank_nr == 1))
 		D_GOTO(out, rc = 0);
 
-	dce->dce_ranks = d_rank_list_alloc(node_nr - 1);
+	dce->dce_ranks = d_rank_list_alloc(rank_nr - 1);
 	if (dce->dce_ranks == NULL)
 		D_GOTO(out, rc = -DER_NOMEM);
 
-	D_ALLOC_ARRAY(dce->dce_hints, node_nr);
+	D_ALLOC_ARRAY(dce->dce_hints, rank_nr);
 	if (dce->dce_hints == NULL)
 		D_GOTO(out, rc = -DER_NOMEM);
 
-	for (i = 0; i < node_nr; i++)
+	for (i = 0; i < rank_nr; i++)
 		dce->dce_hints[i] = (uint8_t)(-1);
 
 	md.omd_id = oid.id_pub;
@@ -220,7 +220,7 @@ dtx_coll_prep(uuid_t po_uuid, daos_unit_oid_t oid, struct dtx_id *xid, struct dt
 		goto out;
 	}
 
-	for (i = 0, j = 0; i < layout->ol_nr && j < node_nr - 1; i++) {
+	for (i = 0, j = 0; i < layout->ol_nr && j < rank_nr - 1; i++) {
 		if (layout->ol_shards[i].po_target == -1 || layout->ol_shards[i].po_shard == -1)
 			continue;
 
diff --git a/src/include/daos/pool_map.h b/src/include/daos/pool_map.h
index 0df39f0e510..95695d2b027 100644
--- a/src/include/daos/pool_map.h
+++ b/src/include/daos/pool_map.h
@@ -281,7 +281,7 @@ int pool_map_find_target(struct pool_map *map, uint32_t id,
 			 struct pool_target **target_pp);
 int pool_map_find_domain(struct pool_map *map, pool_comp_type_t type,
 			 uint32_t id, struct pool_domain **domain_pp);
-int pool_map_find_nodes(struct pool_map *map, uint32_t id,
+int pool_map_find_ranks(struct pool_map *map, uint32_t id,
 			struct pool_domain **domain_pp);
 int pool_map_find_tgts_by_state(struct pool_map *map,
 				pool_comp_state_t match_states,
@@ -311,7 +311,7 @@ bool
 pool_map_node_status_match(struct pool_domain *dom, unsigned int status);
 
 struct pool_domain *
-pool_map_find_node_by_rank(struct pool_map *map, uint32_t rank);
+pool_map_find_dom_by_rank(struct pool_map *map, uint32_t rank);
 
 int pool_map_find_by_rank_status(struct pool_map *map,
 				 struct pool_target ***tgt_ppp,
@@ -339,9 +339,9 @@ pool_map_target_nr(struct pool_map *map)
 }
 
 static inline unsigned int
-pool_map_node_nr(struct pool_map *map)
+pool_map_rank_nr(struct pool_map *map)
 {
-	return pool_map_find_nodes(map, PO_COMP_ID_ALL, NULL);
+	return pool_map_find_ranks(map, PO_COMP_ID_ALL, NULL);
 }
 
 /*
diff --git a/src/include/daos_prop.h b/src/include/daos_prop.h
index c6ca94f84c1..3b7216efd0e 100644
--- a/src/include/daos_prop.h
+++ b/src/include/daos_prop.h
@@ -464,11 +464,12 @@ enum {
 
 /** container redundancy factor */
 enum {
-	DAOS_PROP_CO_REDUN_RF0,
-	DAOS_PROP_CO_REDUN_RF1,
-	DAOS_PROP_CO_REDUN_RF2,
-	DAOS_PROP_CO_REDUN_RF3,
-	DAOS_PROP_CO_REDUN_RF4,
+	DAOS_PROP_CO_REDUN_RF0	= 0,
+	DAOS_PROP_CO_REDUN_RF1	= 1,
+	DAOS_PROP_CO_REDUN_RF2	= 2,
+	DAOS_PROP_CO_REDUN_RF3	= 3,
+	DAOS_PROP_CO_REDUN_RF4	= 4,
+	DAOS_RF_MAX		= 4,
 };
 
 /**
diff --git a/src/object/cli_coll.c b/src/object/cli_coll.c
index 12ba634813a..e05abadf3cf 100644
--- a/src/object/cli_coll.c
+++ b/src/object/cli_coll.c
@@ -139,7 +139,7 @@ obj_coll_oper_args_init(struct coll_oper_args *coa, struct dc_object *obj, bool
 	D_ASSERT(coa->coa_dcts == NULL);
 
 	D_RWLOCK_RDLOCK(&pool->dp_map_lock);
-	pool_ranks = pool_map_node_nr(pool->dp_map);
+	pool_ranks = pool_map_rank_nr(pool->dp_map);
 	D_RWLOCK_UNLOCK(&pool->dp_map_lock);
 
 	D_RWLOCK_RDLOCK(&obj->cob_lock);
diff --git a/src/object/srv_coll.c b/src/object/srv_coll.c
index a63a11d574b..2a152b47bd6 100644
--- a/src/object/srv_coll.c
+++ b/src/object/srv_coll.c
@@ -291,7 +291,7 @@ obj_coll_punch_prep(struct obj_coll_punch_in *ocpi, struct daos_coll_target *dct
 		D_GOTO(out, rc = -DER_INVAL);
 	}
 
-	size = pool_map_node_nr(map->pl_poolmap);
+	size = pool_map_rank_nr(map->pl_poolmap);
 	D_ALLOC_ARRAY(dce->dce_hints, size);
 	if (dce->dce_hints == NULL)
 		D_GOTO(out, rc = -DER_NOMEM);
diff --git a/src/pool/cli.c b/src/pool/cli.c
index 5345017f824..85fa718aa1c 100644
--- a/src/pool/cli.c
+++ b/src/pool/cli.c
@@ -503,7 +503,7 @@ update_rsvc_client(struct dc_pool *pool)
 {
 	struct subtract_rsvc_rank_arg arg;
 
-	arg.srra_nodes_len = pool_map_find_nodes(pool->dp_map, PO_COMP_ID_ALL, &arg.srra_nodes);
+	arg.srra_nodes_len = pool_map_find_ranks(pool->dp_map, PO_COMP_ID_ALL, &arg.srra_nodes);
 	/* There must be at least one rank. */
 	D_ASSERTF(arg.srra_nodes_len > 0, "%d > 0\n", arg.srra_nodes_len);
 
@@ -2016,7 +2016,7 @@ choose_map_refresh_rank(struct map_refresh_arg *arg)
 	if (arg->mra_n <= 0)
 		return CRT_NO_RANK;
 
-	n = pool_map_find_nodes(arg->mra_pool->dp_map, PO_COMP_ID_ALL, &nodes);
+	n = pool_map_find_ranks(arg->mra_pool->dp_map, PO_COMP_ID_ALL, &nodes);
 	/* There must be at least one rank. */
 	D_ASSERTF(n > 0, "%d\n", n);
 
diff --git a/src/pool/rpc.h b/src/pool/rpc.h
index cf763b896dc..cfddcc48931 100644
--- a/src/pool/rpc.h
+++ b/src/pool/rpc.h
@@ -147,6 +147,16 @@ CRT_RPC_DECLARE(pool_op, DAOS_ISEQ_POOL_OP, DAOS_OSEQ_POOL_OP)
 CRT_RPC_DECLARE(pool_create, DAOS_ISEQ_POOL_CREATE, DAOS_OSEQ_POOL_CREATE)
 
 /* clang-format on */
+
+/* the source of pool map update operation */
+enum map_update_source {
+	MUS_SWIM = 0,
+	/* May need to differentiate from administrator/csum scrubber/nvme healthy monitor later.
+	 * Now all non-swim cases fall to DMG category.
+	 */
+	MUS_DMG,
+};
+
 enum map_update_opc {
 	MAP_EXCLUDE = 0,
 	MAP_DRAIN,
diff --git a/src/pool/srv.c b/src/pool/srv.c
index 2a45f4dec05..7e5548e8508 100644
--- a/src/pool/srv.c
+++ b/src/pool/srv.c
@@ -19,7 +19,12 @@
 #include "rpc.h"
 #include "srv_internal.h"
 #include "srv_layout.h"
-bool ec_agg_disabled;
+
+bool		ec_agg_disabled;
+uint32_t	pw_rf; /* pool wise RF */
+#define PW_RF_DEFAULT	(2)
+#define PW_RF_MIN	(1)
+#define PW_RF_MAX	(4)
 
 static int
 init(void)
@@ -47,6 +52,15 @@ init(void)
 	if (unlikely(ec_agg_disabled))
 		D_WARN("EC aggregation is disabled.\n");
 
+	pw_rf = PW_RF_DEFAULT;
+	d_getenv_uint32_t("DAOS_POOL_RF", &pw_rf);
+	if (pw_rf < PW_RF_MIN || pw_rf > PW_RF_MAX) {
+		D_INFO("pw_rf %d is out of range [%d, %d], take default %d\n",
+		       pw_rf, PW_RF_MIN, PW_RF_MAX, PW_RF_DEFAULT);
+		pw_rf = PW_RF_DEFAULT;
+	}
+	D_INFO("pool wise RF %d\n", pw_rf);
+
 	ds_pool_rsvc_class_register();
 
 	bio_register_ract_ops(&nvme_reaction_ops);
diff --git a/src/pool/srv_internal.h b/src/pool/srv_internal.h
index c09d2ffcaea..8f864c8c11a 100644
--- a/src/pool/srv_internal.h
+++ b/src/pool/srv_internal.h
@@ -16,6 +16,8 @@
 #include <daos_security.h>
 #include <gurt/telemetry_common.h>
 
+extern uint32_t pw_rf;
+
 /**
  * Global pool metrics
  */
diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c
index 54e29767347..667e4bc6ed6 100644
--- a/src/pool/srv_pool.c
+++ b/src/pool/srv_pool.c
@@ -1355,11 +1355,11 @@ handle_event(struct pool_svc *svc, struct pool_svc_event *event)
 
 	if (event->psv_rank == dss_self_rank() && event->psv_src == CRT_EVS_GRPMOD &&
 	    event->psv_type == CRT_EVT_DEAD) {
-		D_DEBUG(DB_MGMT, "ignore exclusion of self\n");
+		D_DEBUG(DB_MD, "ignore exclusion of self\n");
 		goto out;
 	}
 
-	D_DEBUG(DB_MD, DF_UUID": handling event: "DF_PS_EVENT"\n", DP_UUID(svc->ps_uuid),
+	D_INFO(DF_UUID": handling event: "DF_PS_EVENT"\n", DP_UUID(svc->ps_uuid),
 		DP_PS_EVENT(event));
 
 	if (event->psv_src == CRT_EVS_SWIM && event->psv_type == CRT_EVT_ALIVE) {
@@ -1381,8 +1381,8 @@ handle_event(struct pool_svc *svc, struct pool_svc_event *event)
 		 * and does not have a copy of the pool map.
 		 */
 		ds_rsvc_request_map_dist(&svc->ps_rsvc);
-		D_DEBUG(DB_MD, DF_UUID": requested map dist for rank %u\n", DP_UUID(svc->ps_uuid),
-			event->psv_rank);
+		D_DEBUG(DB_MD, DF_UUID": requested map dist for rank %u\n",
+			DP_UUID(svc->ps_uuid), event->psv_rank);
 	} else if (event->psv_type == CRT_EVT_DEAD) {
 		rc = pool_svc_exclude_rank(svc, event->psv_rank);
 		if (rc != 0)
@@ -1809,7 +1809,7 @@ pool_svc_check_node_status(struct pool_svc *svc)
 
 	D_DEBUG(DB_MD, DF_UUID": checking node status\n", DP_UUID(svc->ps_uuid));
 	ABT_rwlock_rdlock(svc->ps_pool->sp_lock);
-	doms_cnt = pool_map_find_nodes(svc->ps_pool->sp_map, PO_COMP_ID_ALL,
+	doms_cnt = pool_map_find_ranks(svc->ps_pool->sp_map, PO_COMP_ID_ALL,
 				       &doms);
 	D_ASSERT(doms_cnt >= 0);
 	for (i = 0; i < doms_cnt; i++) {
@@ -6500,6 +6500,49 @@ pool_svc_schedule_reconf(struct pool_svc *svc, struct pool_map *map, uint32_t ma
 	return 0;
 }
 
+static int
+pool_map_crit_prompt(struct pool_svc *svc, struct pool_map *map, d_rank_t rank)
+{
+	crt_group_t		*primary_grp;
+	struct pool_domain	*doms;
+	int			 doms_cnt;
+	int			 i;
+	int			 rc = 0;
+
+	D_DEBUG(DB_MD, DF_UUID": checking node status\n", DP_UUID(svc->ps_uuid));
+	doms_cnt = pool_map_find_ranks(map, PO_COMP_ID_ALL, &doms);
+	D_ASSERT(doms_cnt >= 0);
+	primary_grp = crt_group_lookup(NULL);
+	D_ASSERT(primary_grp != NULL);
+
+	D_CRIT("!!! Please try to recover these engines in top priority -\n");
+	D_CRIT("!!! Please refer \"Pool-Wise Redundancy Factor\" section in pool_operations.md\n");
+	D_CRIT("!!! pool "DF_UUID": intolerable unavailability: engine rank %u\n",
+	       DP_UUID(svc->ps_uuid), rank);
+	for (i = 0; i < doms_cnt; i++) {
+		struct swim_member_state state;
+
+		if (!(doms[i].do_comp.co_status & PO_COMP_ST_UPIN) ||
+		    (doms[i].do_comp.co_rank == rank))
+			continue;
+
+		rc = crt_rank_state_get(primary_grp, doms[i].do_comp.co_rank, &state);
+		if (rc != 0 && rc != -DER_NONEXIST) {
+			D_ERROR("failed to get status of rank %u: %d\n",
+				doms[i].do_comp.co_rank, rc);
+			break;
+		}
+
+		D_DEBUG(DB_MD, "rank/state %d/%d\n", doms[i].do_comp.co_rank,
+			rc == -DER_NONEXIST ? -1 : state.sms_status);
+		if (rc == -DER_NONEXIST || state.sms_status == SWIM_MEMBER_DEAD)
+			D_CRIT("!!! pool "DF_UUID" : intolerable unavailability: engine rank %u\n",
+			       DP_UUID(svc->ps_uuid), doms[i].do_comp.co_rank);
+	}
+
+	return rc;
+}
+
 /*
  * Perform an update to the pool map of \a svc.
  *
@@ -6532,7 +6575,8 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc,
 			     struct pool_target_addr_list *tgt_addrs,
 			     struct rsvc_hint *hint, bool *p_updated,
 			     uint32_t *map_version_p, uint32_t *tgt_map_ver,
-			     struct pool_target_addr_list *inval_tgt_addrs)
+			     struct pool_target_addr_list *inval_tgt_addrs,
+			     enum map_update_source src)
 {
 	struct rdb_tx		tx;
 	struct pool_map	       *map;
@@ -6628,7 +6672,7 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc,
 	 * If the map modification affects myself, leave it to a new PS leader
 	 * if there's another PS replica, or reject it.
 	 */
-	node = pool_map_find_node_by_rank(map, dss_self_rank());
+	node = pool_map_find_dom_by_rank(map, dss_self_rank());
 	if (node == NULL || !(node->do_comp.co_status & DC_POOL_SVC_MAP_STATES)) {
 		d_rank_list_t *replicas;
 
@@ -6653,6 +6697,33 @@ pool_svc_update_map_internal(struct pool_svc *svc, unsigned int opc,
 		goto out_map;
 	}
 
+	/* For SWIM exclude, don't change pool map if the pw_rf is broken or is going to be broken,
+	 * with CRIT log message to ask administrator to bring back the engine.
+	 */
+	if (src == MUS_SWIM && opc == MAP_EXCLUDE) {
+		d_rank_t	rank;
+		int		failed_cnt;
+
+		rc = pool_map_update_failed_cnt(map);
+		if (rc != 0) {
+			DL_ERROR(rc, DF_UUID": pool_map_update_failed_cnt failed.",
+				 DP_UUID(svc->ps_uuid));
+			goto out_map;
+		}
+
+		D_ASSERT(tgt_addrs->pta_number == 1);
+		rank = tgt_addrs->pta_addrs->pta_rank;
+		failed_cnt = pool_map_get_failed_cnt(map, PO_COMP_TP_NODE);
+		D_INFO(DF_UUID": SWIM exclude rank %d, failed NODE %d\n",
+		       DP_UUID(svc->ps_uuid), rank, failed_cnt);
+		if (failed_cnt > pw_rf) {
+			D_CRIT(DF_UUID": exclude rank %d will break pw_rf %d, failed_cnt %d\n",
+			       DP_UUID(svc->ps_uuid), rank, pw_rf, failed_cnt);
+			rc = pool_map_crit_prompt(svc, map, rank);
+			goto out_map;
+		}
+	}
+
 	/* Write the new pool map. */
 	rc = pool_buf_extract(map, &map_buf);
 	if (rc != 0)
@@ -6809,7 +6880,7 @@ pool_update_map_internal(uuid_t pool_uuid, unsigned int opc, bool exclude_rank,
 	rc = pool_svc_update_map_internal(svc, opc, exclude_rank, NULL, 0,
 					  NULL, tgts, tgt_addrs, hint, p_updated,
 					  map_version_p, tgt_map_ver,
-					  inval_tgt_addrs);
+					  inval_tgt_addrs, MUS_DMG);
 
 	pool_svc_put_leader(svc);
 	return rc;
@@ -6859,8 +6930,8 @@ static int
 pool_svc_update_map(struct pool_svc *svc, crt_opcode_t opc, bool exclude_rank,
 		    d_rank_list_t *extend_rank_list, uint32_t *extend_domains,
 		    uint32_t extend_domains_nr, struct pool_target_addr_list *list,
-		    struct pool_target_addr_list *inval_list_out,
-		    uint32_t *map_version, struct rsvc_hint *hint)
+		    struct pool_target_addr_list *inval_list_out, uint32_t *map_version,
+		    struct rsvc_hint *hint, enum map_update_source src)
 {
 	struct pool_target_id_list	target_list = { 0 };
 	daos_prop_t			prop = { 0 };
@@ -6875,7 +6946,7 @@ pool_svc_update_map(struct pool_svc *svc, crt_opcode_t opc, bool exclude_rank,
 	rc = pool_svc_update_map_internal(svc, opc, exclude_rank, extend_rank_list,
 					  extend_domains_nr, extend_domains,
 					  &target_list, list, hint, &updated,
-					  map_version, &tgt_map_ver, inval_list_out);
+					  map_version, &tgt_map_ver, inval_list_out, src);
 	if (rc)
 		D_GOTO(out, rc);
 
@@ -6962,10 +7033,9 @@ ds_pool_extend_handler(crt_rpc_t *rpc)
 		goto out;
 
 	rc = pool_svc_update_map(svc, pool_opc_2map_opc(opc_get(rpc->cr_opc)),
-				 false /* exclude_rank */,
-				 &rank_list, domains, ndomains,
+				 false /* exclude_rank */, &rank_list, domains, ndomains,
 				 NULL, NULL, &out->peo_op.po_map_version,
-				 &out->peo_op.po_hint);
+				 &out->peo_op.po_hint, MUS_DMG);
 
 	pool_svc_put_leader(svc);
 out:
@@ -7067,7 +7137,7 @@ ds_pool_update_handler(crt_rpc_t *rpc, int handler_version)
 	rc = pool_svc_update_map(svc, pool_opc_2map_opc(opc_get(rpc->cr_opc)),
 				 false /* exclude_rank */, NULL, NULL, 0, &list,
 				 &inval_list_out, &out->pto_op.po_map_version,
-				 &out->pto_op.po_hint);
+				 &out->pto_op.po_hint, MUS_DMG);
 	if (rc != 0)
 		goto out_svc;
 
@@ -7112,7 +7182,7 @@ pool_svc_exclude_rank(struct pool_svc *svc, d_rank_t rank)
 
 	rc = pool_svc_update_map(svc, pool_opc_2map_opc(POOL_EXCLUDE), true /* exclude_rank */,
 				 NULL, NULL, 0, &list, &inval_list_out, &map_version,
-				 NULL /* hint */);
+				 NULL /* hint */, MUS_SWIM);
 
 	D_DEBUG(DB_MD, "Exclude pool "DF_UUID"/%u rank %u: rc %d\n",
 		DP_UUID(svc->ps_uuid), map_version, rank, rc);
diff --git a/src/pool/srv_pool_map.c b/src/pool/srv_pool_map.c
index 1cb5632598f..9793df24f01 100644
--- a/src/pool/srv_pool_map.c
+++ b/src/pool/srv_pool_map.c
@@ -378,7 +378,7 @@ ds_pool_map_tgts_update(struct pool_map *map, struct pool_target_id_list *tgts,
 			return -DER_NONEXIST;
 		}
 
-		dom = pool_map_find_node_by_rank(map, target->ta_comp.co_rank);
+		dom = pool_map_find_dom_by_rank(map, target->ta_comp.co_rank);
 		if (dom == NULL) {
 			D_ERROR("Got request to change nonexistent rank %u"
 				" in map %p\n",
diff --git a/src/pool/srv_util.c b/src/pool/srv_util.c
index e39072568e1..29f012d5844 100644
--- a/src/pool/srv_util.c
+++ b/src/pool/srv_util.c
@@ -21,19 +21,19 @@ int
 map_ranks_init(const struct pool_map *map, unsigned int status, d_rank_list_t *ranks)
 {
 	struct pool_domain     *domains = NULL;
-	int			nnodes;
+	int			nranks;
 	int			n = 0;
 	int			i;
 	d_rank_t	       *rs;
 
-	nnodes = pool_map_find_nodes((struct pool_map *)map,
+	nranks = pool_map_find_ranks((struct pool_map *)map,
 				      PO_COMP_ID_ALL, &domains);
-	if (nnodes == 0) {
+	if (nranks == 0) {
 		D_ERROR("no nodes in pool map\n");
 		return -DER_IO;
 	}
 
-	for (i = 0; i < nnodes; i++) {
+	for (i = 0; i < nranks; i++) {
 		if (status & domains[i].do_comp.co_status)
 			n++;
 	}
@@ -52,7 +52,7 @@ map_ranks_init(const struct pool_map *map, unsigned int status, d_rank_list_t *r
 	ranks->rl_ranks = rs;
 
 	n = 0;
-	for (i = 0; i < nnodes; i++) {
+	for (i = 0; i < nranks; i++) {
 		if (status & domains[i].do_comp.co_status) {
 			D_ASSERT(n < ranks->rl_nr);
 			ranks->rl_ranks[n] = domains[i].do_comp.co_rank;
@@ -85,7 +85,7 @@ ds_pool_map_rank_up(struct pool_map *map, d_rank_t rank)
 	struct pool_domain     *node;
 	int			rc;
 
-	rc = pool_map_find_nodes(map, rank, &node);
+	rc = pool_map_find_ranks(map, rank, &node);
 	if (rc == 0)
 		return false;
 	D_ASSERTF(rc == 1, "%d\n", rc);
@@ -921,7 +921,7 @@ testu_create_pool_map(d_rank_t *ranks, int n_ranks, d_rank_t *down_ranks, int n_
 	for (i = 0; i < n_down_ranks; i++) {
 		struct pool_domain *d;
 
-		d = pool_map_find_node_by_rank(map, down_ranks[i]);
+		d = pool_map_find_dom_by_rank(map, down_ranks[i]);
 		D_ASSERT(d != NULL);
 		d->do_comp.co_status = PO_COMP_ST_DOWN;
 	}
diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c
index fb0b4261b75..818d116eaea 100644
--- a/src/rebuild/srv.c
+++ b/src/rebuild/srv.c
@@ -648,7 +648,7 @@ rebuild_leader_status_check(struct ds_pool *pool, uint32_t op,
 		for (i = 0; i < excluded.rl_nr; i++) {
 			struct pool_domain *dom;
 
-			dom = pool_map_find_node_by_rank(pool->sp_map, excluded.rl_ranks[i]);
+			dom = pool_map_find_dom_by_rank(pool->sp_map, excluded.rl_ranks[i]);
 			D_ASSERT(dom != NULL);
 
 			if (rgt->rgt_opc == RB_OP_REBUILD) {
@@ -757,7 +757,7 @@ rebuild_global_pool_tracker_create(struct ds_pool *pool, uint32_t ver, uint32_t
 				   uint32_t opc, struct rebuild_global_pool_tracker **p_rgt)
 {
 	struct rebuild_global_pool_tracker *rgt;
-	int node_nr;
+	int rank_nr;
 	struct pool_domain *doms;
 	int i;
 	int rc = 0;
@@ -767,11 +767,11 @@ rebuild_global_pool_tracker_create(struct ds_pool *pool, uint32_t ver, uint32_t
 		return -DER_NOMEM;
 
 	D_INIT_LIST_HEAD(&rgt->rgt_list);
-	node_nr = pool_map_find_nodes(pool->sp_map, PO_COMP_ID_ALL, &doms);
-	if (node_nr < 0)
-		D_GOTO(out, rc = node_nr);
+	rank_nr = pool_map_find_ranks(pool->sp_map, PO_COMP_ID_ALL, &doms);
+	if (rank_nr < 0)
+		D_GOTO(out, rc = rank_nr);
 
-	D_ALLOC_ARRAY(rgt->rgt_servers, node_nr);
+	D_ALLOC_ARRAY(rgt->rgt_servers, rank_nr);
 	if (rgt->rgt_servers == NULL)
 		D_GOTO(out, rc = -DER_NOMEM);
 
@@ -783,9 +783,9 @@ rebuild_global_pool_tracker_create(struct ds_pool *pool, uint32_t ver, uint32_t
 	if (rc != ABT_SUCCESS)
 		D_GOTO(out, rc = dss_abterr2der(rc));
 
-	for (i = 0; i < node_nr; i++)
+	for (i = 0; i < rank_nr; i++)
 		rgt->rgt_servers[i].rank = doms[i].do_comp.co_rank;
-	rgt->rgt_servers_number = node_nr;
+	rgt->rgt_servers_number = rank_nr;
 
 	uuid_copy(rgt->rgt_pool_uuid, pool->sp_uuid);
 	rgt->rgt_rebuild_ver = ver;
@@ -954,7 +954,7 @@ rebuild_scan_broadcast(struct ds_pool *pool, struct rebuild_global_pool_tracker
 		for (i = 0; i < up_ranks.rl_nr; i++) {
 			struct pool_domain *dom;
 
-			dom = pool_map_find_node_by_rank(pool->sp_map, up_ranks.rl_ranks[i]);
+			dom = pool_map_find_dom_by_rank(pool->sp_map, up_ranks.rl_ranks[i]);
 			D_ASSERT(dom != NULL);
 			D_DEBUG(DB_REBUILD, DF_RB " rank %u co_in_ver %u\n", DP_RB_RGT(rgt),
 				up_ranks.rl_ranks[i], dom->do_comp.co_in_ver);
diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py
index 248617c1b36..440ffe68f82 100644
--- a/src/tests/ftest/util/server_utils_params.py
+++ b/src/tests/ftest/util/server_utils_params.py
@@ -434,6 +434,7 @@ class EngineYamlParameters(YamlParameters):
     REQUIRED_ENV_VARS = {
         "common": [
             "D_LOG_FILE_APPEND_PID=1",
+            "DAOS_POOL_RF=4",
             "COVFILE=/tmp/test.cov"],
         "ofi+tcp": [],
         "ofi+tcp;ofi_rxm": [],