Skip to content

Commit

Permalink
DAOS-16837 container: Add client-side DFS metrics (#15544)
Browse files Browse the repository at this point in the history
If metrics are enabled for a POSIX container, create
a new pool/$UUID/container/$UUID/dfs metrics root in
the client telemetry to provide DFS-oriented metrics
(POSIX ops, file I/Os, etc).

Also fixes a bug in the agent code for pruning unused
client telemetry segments.

Required-githooks: true

Signed-off-by: Michael MacDonald <[email protected]>
  • Loading branch information
mjmac committed Jan 14, 2025
1 parent 1bf85e1 commit 46ccc2f
Show file tree
Hide file tree
Showing 23 changed files with 628 additions and 25 deletions.
2 changes: 1 addition & 1 deletion src/client/dfs/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def scons():
libraries = ['daos_common', 'daos', 'uuid', 'gurt']

dfs_src = ['common.c', 'cont.c', 'dir.c', 'file.c', 'io.c', 'lookup.c', 'mnt.c', 'obj.c',
'pipeline.c', 'readdir.c', 'rename.c', 'xattr.c', 'dfs_sys.c']
'pipeline.c', 'readdir.c', 'rename.c', 'xattr.c', 'dfs_sys.c', 'metrics.c']
dfs = denv.d_library('dfs', dfs_src, LIBS=libraries)
denv.Install('$PREFIX/lib64/', dfs)

Expand Down
4 changes: 4 additions & 0 deletions src/client/dfs/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,8 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz
stbuf->st_atim.tv_sec = stbuf->st_mtim.tv_sec;
stbuf->st_atim.tv_nsec = stbuf->st_mtim.tv_nsec;
}

DFS_OP_STAT_INCR(dfs, DOS_STAT);
return 0;
}

Expand Down Expand Up @@ -710,6 +712,7 @@ open_dir(dfs_t *dfs, dfs_obj_t *parent, int flags, daos_oclass_id_t cid, struct
D_ASSERT(rc == 0);
dir->d.chunk_size = entry->chunk_size;
dir->d.oclass = entry->oclass;
DFS_OP_STAT_INCR(dfs, DOS_MKDIR);
return 0;
}
}
Expand Down Expand Up @@ -742,6 +745,7 @@ open_dir(dfs_t *dfs, dfs_obj_t *parent, int flags, daos_oclass_id_t cid, struct
oid_cp(&dir->oid, entry->oid);
dir->d.chunk_size = entry->chunk_size;
dir->d.oclass = entry->oclass;
DFS_OP_STAT_INCR(dfs, DOS_OPENDIR);
return 0;
}

Expand Down
4 changes: 4 additions & 0 deletions src/client/dfs/dfs_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <daos.h>
#include <daos_fs.h>

#include "metrics.h"

/** D-key name of SB metadata */
#define SB_DKEY "DFS_SB_METADATA"

Expand Down Expand Up @@ -190,6 +192,8 @@ struct dfs {
struct dfs_mnt_hdls *cont_hdl;
/** the root dir stat buf */
struct stat root_stbuf;
/** DFS top-level metrics */
struct dfs_metrics *metrics;
};

struct dfs_entry {
Expand Down
2 changes: 2 additions & 0 deletions src/client/dfs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ dfs_mkdir(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode, daos_ocl
if (rc != 0)
return daos_der2errno(rc);

DFS_OP_STAT_INCR(dfs, DOS_MKDIR);
return rc;
}

Expand Down Expand Up @@ -220,6 +221,7 @@ dfs_remove(dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, daos_obj
if (oid)
oid_cp(oid, entry.oid);

DFS_OP_STAT_INCR(dfs, DOS_UNLINK);
out:
rc = check_tx(th, rc);
if (rc == ERESTART)
Expand Down
44 changes: 42 additions & 2 deletions src/client/dfs/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,20 @@

#include "dfs_internal.h"

static void
dfs_update_file_metrics(dfs_t *dfs, daos_size_t read_bytes, daos_size_t write_bytes)
{
if (dfs == NULL || dfs->metrics == NULL)
return;

if (read_bytes > 0)
d_tm_inc_gauge(dfs->metrics->dm_read_bytes, read_bytes);
if (write_bytes > 0)
d_tm_inc_gauge(dfs->metrics->dm_write_bytes, write_bytes);
}

struct dfs_read_params {
dfs_t *dfs;
daos_size_t *read_size;
daos_array_iod_t arr_iod;
daos_range_t rg;
Expand All @@ -35,6 +48,8 @@ read_cb(tse_task_t *task, void *data)
D_GOTO(out, rc);
}

DFS_OP_STAT_INCR(params->dfs, DOS_READ);
dfs_update_file_metrics(params->dfs, params->arr_iod.arr_nr_read, 0);
*params->read_size = params->arr_iod.arr_nr_read;
out:
D_FREE(params);
Expand All @@ -61,6 +76,7 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, d_sg_li
if (params == NULL)
D_GOTO(err_task, rc = -DER_NOMEM);

params->dfs = dfs;
params->read_size = read_size;

/** set array location */
Expand Down Expand Up @@ -90,6 +106,7 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, d_sg_li
* completion cb that frees params in this case, so we can just ignore the rc here.
*/
dc_task_schedule(task, true);

return 0;

err_params:
Expand Down Expand Up @@ -125,6 +142,7 @@ dfs_read(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_size
daos_event_launch(ev);
daos_event_complete(ev, 0);
}
DFS_OP_STAT_INCR(dfs, DOS_READ);
return 0;
}

Expand All @@ -146,7 +164,9 @@ dfs_read(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_size
return daos_der2errno(rc);
}

DFS_OP_STAT_INCR(dfs, DOS_READ);
*read_size = iod.arr_nr_read;
dfs_update_file_metrics(dfs, iod.arr_nr_read, 0);
return 0;
}

Expand All @@ -173,6 +193,7 @@ dfs_readx(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_siz
daos_event_launch(ev);
daos_event_complete(ev, 0);
}
DFS_OP_STAT_INCR(dfs, DOS_READ);
return 0;
}

Expand All @@ -189,7 +210,9 @@ dfs_readx(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_siz
return daos_der2errno(rc);
}

DFS_OP_STAT_INCR(dfs, DOS_READ);
*read_size = arr_iod.arr_nr_read;
dfs_update_file_metrics(dfs, arr_iod.arr_nr_read, 0);
return 0;
}

Expand Down Expand Up @@ -223,6 +246,7 @@ dfs_write(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_eve
daos_event_launch(ev);
daos_event_complete(ev, 0);
}
DFS_OP_STAT_INCR(dfs, DOS_WRITE);
return 0;
}

Expand All @@ -238,8 +262,12 @@ dfs_write(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_eve
daos_event_errno_rc(ev);

rc = daos_array_write(obj->oh, DAOS_TX_NONE, &iod, sgl, ev);
if (rc)
if (rc == 0) {
DFS_OP_STAT_INCR(dfs, DOS_WRITE);
dfs_update_file_metrics(dfs, 0, buf_size);
} else {
D_ERROR("daos_array_write() failed, " DF_RC "\n", DP_RC(rc));
}

return daos_der2errno(rc);
}
Expand All @@ -248,6 +276,8 @@ int
dfs_writex(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_event_t *ev)
{
daos_array_iod_t arr_iod;
daos_size_t buf_size;
int i;
int rc;

if (dfs == NULL || !dfs->mounted)
Expand All @@ -266,6 +296,7 @@ dfs_writex(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_ev
daos_event_launch(ev);
daos_event_complete(ev, 0);
}
DFS_OP_STAT_INCR(dfs, DOS_WRITE);
return 0;
}

Expand All @@ -276,9 +307,18 @@ dfs_writex(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_ev
if (ev)
daos_event_errno_rc(ev);

buf_size = 0;
if (dfs->metrics != NULL && sgl != NULL)
for (i = 0; i < sgl->sg_nr; i++)
buf_size += sgl->sg_iovs[i].iov_len;

rc = daos_array_write(obj->oh, DAOS_TX_NONE, &arr_iod, sgl, ev);
if (rc)
if (rc == 0) {
DFS_OP_STAT_INCR(dfs, DOS_WRITE);
dfs_update_file_metrics(dfs, 0, buf_size);
} else {
D_ERROR("daos_array_write() failed (%d)\n", rc);
}

return daos_der2errno(rc);
}
174 changes: 174 additions & 0 deletions src/client/dfs/metrics.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/**
* (C) Copyright 2024 Intel Corporation.

Check failure on line 2 in src/client/dfs/metrics.c

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
#define D_LOGFAC DD_FAC(dfs)

#include <uuid/uuid.h>
#include <fcntl.h>

#include <daos.h>
#include <daos_fs.h>
#include <daos_fs_sys.h>
#include <daos/common.h>
#include <daos/container.h>
#include <daos/metrics.h>
#include <daos/pool.h>
#include <daos/job.h>
#include <gurt/telemetry_common.h>
#include <gurt/telemetry_producer.h>
#include <gurt/telemetry_consumer.h>

#include "metrics.h"
#include "dfs_internal.h"

#define DFS_METRICS_ROOT "dfs"

#define STAT_METRICS_SIZE (D_TM_METRIC_SIZE * DOS_LIMIT)
#define FILE_METRICS_SIZE (((D_TM_METRIC_SIZE * NR_SIZE_BUCKETS) * 2) + D_TM_METRIC_SIZE * 2)
#define DFS_METRICS_SIZE (STAT_METRICS_SIZE + FILE_METRICS_SIZE)

#define SPRINTF_TM_PATH(buf, pool_uuid, cont_uuid, path) \
snprintf(buf, sizeof(buf), "pool/" DF_UUIDF "/container/" DF_UUIDF "/%s", \
DP_UUID(pool_uuid), DP_UUID(cont_uuid), path);

#define ADD_STAT_METRIC(name, ...) \
SPRINTF_TM_PATH(tmp_path, pool_uuid, cont_uuid, DFS_METRICS_ROOT "/ops/" #name); \
rc = d_tm_add_metric(&metrics->dm_op_stats[i], D_TM_COUNTER, "Count of " #name " calls", \
"calls", tmp_path); \
if (rc != 0) { \
DL_ERROR(rc, "failed to create " #name " counter"); \
return; \
} \
i++;

static void
op_stats_init(struct dfs_metrics *metrics, uuid_t pool_uuid, uuid_t cont_uuid)
{
char tmp_path[D_TM_MAX_NAME_LEN] = {0};
int i = 0;
int rc;

if (metrics == NULL)
return;

D_FOREACH_DFS_OP_STAT(ADD_STAT_METRIC);
}

static void
cont_stats_init(struct dfs_metrics *metrics, uuid_t pool_uuid, uuid_t cont_uuid)
{
char tmp_path[D_TM_MAX_NAME_LEN] = {0};
int rc = 0;

if (metrics == NULL)
return;

SPRINTF_TM_PATH(tmp_path, pool_uuid, cont_uuid, "mount_time");
rc = d_tm_add_metric(&metrics->dm_mount_time, D_TM_TIMESTAMP, "container mount time", NULL,
tmp_path);
if (rc != 0)
DL_ERROR(rc, "failed to create mount_time timestamp");
}

static void
file_stats_init(struct dfs_metrics *metrics, uuid_t pool_uuid, uuid_t cont_uuid)
{
char tmp_path[D_TM_MAX_NAME_LEN] = {0};
int rc = 0;

if (metrics == NULL)
return;

SPRINTF_TM_PATH(tmp_path, pool_uuid, cont_uuid, DFS_METRICS_ROOT "/read_bytes");
rc = d_tm_add_metric(&metrics->dm_read_bytes, D_TM_STATS_GAUGE, "dfs read bytes", "bytes",
tmp_path);
if (rc != 0)
DL_ERROR(rc, "failed to create dfs read_bytes counter");
rc =
d_tm_init_histogram(metrics->dm_read_bytes, tmp_path, NR_SIZE_BUCKETS, 256, 2, "bytes");
if (rc)
DL_ERROR(rc, "Failed to init dfs read size histogram");

SPRINTF_TM_PATH(tmp_path, pool_uuid, cont_uuid, DFS_METRICS_ROOT "/write_bytes");
rc = d_tm_add_metric(&metrics->dm_write_bytes, D_TM_STATS_GAUGE, "dfs write bytes", "bytes",
tmp_path);
if (rc != 0)
DL_ERROR(rc, "failed to create dfs write_bytes counter");
rc = d_tm_init_histogram(metrics->dm_write_bytes, tmp_path, NR_SIZE_BUCKETS, 256, 2,
"bytes");
if (rc)
DL_ERROR(rc, "Failed to init dfs write size histogram");
}

bool
dfs_metrics_enabled()
{
/* set in client/api/metrics.c */
return daos_client_metric;
}

void
dfs_metrics_init(dfs_t *dfs)
{
uuid_t pool_uuid;
uuid_t cont_uuid;
char root_name[D_TM_MAX_NAME_LEN];
pid_t pid = getpid();
size_t root_size = DFS_METRICS_SIZE + (D_TM_METRIC_SIZE * 3);
int rc;

if (dfs == NULL)
return;

rc = dc_pool_hdl2uuid(dfs->poh, NULL, &pool_uuid);
if (rc != 0) {
DL_ERROR(rc, "failed to get pool UUID");
goto error;
}

rc = dc_cont_hdl2uuid(dfs->coh, NULL, &cont_uuid);
if (rc != 0) {
DL_ERROR(rc, "failed to get container UUID");
goto error;
}

snprintf(root_name, sizeof(root_name), "%d", pid);
/* if only container-level metrics are enabled; this will init a root for them */
rc = d_tm_init_with_name(d_tm_cli_pid_key(pid), root_size, D_TM_OPEN_OR_CREATE, root_name);
if (rc != 0 && rc != -DER_ALREADY) {
DL_ERROR(rc, "failed to init DFS metrics");
goto error;
}

D_ALLOC_PTR(dfs->metrics);
if (dfs->metrics == NULL) {
D_ERROR("failed to alloc DFS metrics");
goto error;
}

SPRINTF_TM_PATH(root_name, pool_uuid, cont_uuid, DFS_METRICS_ROOT);
rc = d_tm_add_ephemeral_dir(NULL, DFS_METRICS_SIZE, root_name);
if (rc != 0) {
DL_ERROR(rc, "failed to add DFS metrics dir");
goto error;
}

cont_stats_init(dfs->metrics, pool_uuid, cont_uuid);
op_stats_init(dfs->metrics, pool_uuid, cont_uuid);
file_stats_init(dfs->metrics, pool_uuid, cont_uuid);

d_tm_record_timestamp(dfs->metrics->dm_mount_time);
return;

error:
if (dfs->metrics != NULL)
D_FREE(dfs->metrics);
}

void
dfs_metrics_fini(dfs_t *dfs)
{
D_FREE(dfs->metrics);
}
Loading

0 comments on commit 46ccc2f

Please sign in to comment.