Skip to content

Commit

Permalink
DAOS-14561 vos: Adjust GC metrics (#13368)
Browse files Browse the repository at this point in the history
 * Switch gc gauges to counters for monitoring
    as rates instead of absolute values
  * Add aggregation failure counter

Also adds a small change to the Go builder for
handling versions with a -suffix.

Signed-off-by: Michael MacDonald <[email protected]>
  • Loading branch information
mjmac authored Nov 29, 2023
1 parent c7df5df commit ea3ae8e
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 14 deletions.
2 changes: 2 additions & 0 deletions site_scons/site_tools/go_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ def _check_go_version(context):

# go version go1.2.3 Linux/amd64
go_version = out.split(' ')[2].replace('go', '')
if '-' in go_version:
go_version = go_version.split('-')[0]
if len([x for x, y in
zip(go_version.split('.'), MIN_GO_VERSION.split('.'))
if int(x) < int(y)]) > 0:
Expand Down
18 changes: 16 additions & 2 deletions src/vos/vos_aggregate.c
Original file line number Diff line number Diff line change
Expand Up @@ -2355,7 +2355,12 @@ vos_aggregate_pre_cb(daos_handle_t ih, vos_iter_entry_t *entry,
}

if (rc < 0) {
struct vos_agg_metrics *vam = agg_cont2metrics(cont);

D_ERROR("VOS aggregation failed: "DF_RC"\n", DP_RC(rc));
if (vam && vam->vam_fail_count)
d_tm_inc_counter(vam->vam_fail_count, 1);

return rc;
}

Expand Down Expand Up @@ -2428,7 +2433,11 @@ vos_aggregate_post_cb(daos_handle_t ih, vos_iter_entry_t *entry,
inc_agg_counter(agg_param, type, AGG_OP_DEL);
rc = 0;
} else if (rc != 0) {
struct vos_agg_metrics *vam = agg_cont2metrics(cont);

D_ERROR("VOS aggregation failed: %d\n", rc);
if (vam && vam->vam_fail_count)
d_tm_inc_counter(vam->vam_fail_count, 1);

/*
* -DER_TX_BUSY error indicates current ilog aggregation
Expand All @@ -2439,8 +2448,6 @@ vos_aggregate_post_cb(daos_handle_t ih, vos_iter_entry_t *entry,
* orphan the current entry due to incarnation log semantics.
*/
if (rc == -DER_TX_BUSY) {
struct vos_agg_metrics *vam = agg_cont2metrics(cont);

agg_param->ap_in_progress = 1;
rc = 0;
switch (type) {
Expand Down Expand Up @@ -2733,6 +2740,13 @@ vos_aggregate(daos_handle_t coh, daos_epoch_range_t *epr,
free_agg_data:
D_FREE(ad);

if (rc < 0) {
struct vos_agg_metrics *vam = agg_cont2metrics(cont);

if (vam && vam->vam_fail_count)
d_tm_inc_counter(vam->vam_fail_count, 1);
}

return rc;
}

Expand Down
6 changes: 6 additions & 0 deletions src/vos/vos_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,12 @@ vos_metrics_alloc(const char *path, int tgt_id)
if (rc)
D_WARN("Failed to create 'merged_size' telemetry : "DF_RC"\n", DP_RC(rc));

/* VOS aggregation failed */
rc = d_tm_add_metric(&vam->vam_fail_count, D_TM_COUNTER, "aggregation failures", NULL,
"%s/%s/fail_count/tgt_%u", path, VOS_AGG_DIR, tgt_id);
if (rc)
DL_WARN(rc, "Failed to create 'fail_count' telemetry");

/* Metrics related to VOS checkpointing */
vos_chkpt_metrics_init(&vp_metrics->vp_chkpt_metrics, path, tgt_id);

Expand Down
24 changes: 12 additions & 12 deletions src/vos/vos_gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -684,12 +684,12 @@ gc_update_stats(struct vos_pool *pool)

if (pool->vp_metrics != NULL) {
vgm = &pool->vp_metrics->vp_gc_metrics;
d_tm_set_gauge(vgm->vgm_cont_del, stat->gs_conts);
d_tm_set_gauge(vgm->vgm_obj_del, stat->gs_objs);
d_tm_set_gauge(vgm->vgm_dkey_del, stat->gs_dkeys);
d_tm_set_gauge(vgm->vgm_akey_del, stat->gs_akeys);
d_tm_set_gauge(vgm->vgm_ev_del, stat->gs_recxs);
d_tm_set_gauge(vgm->vgm_sv_del, stat->gs_singvs);
d_tm_inc_counter(vgm->vgm_cont_del, stat->gs_conts);
d_tm_inc_counter(vgm->vgm_obj_del, stat->gs_objs);
d_tm_inc_counter(vgm->vgm_dkey_del, stat->gs_dkeys);
d_tm_inc_counter(vgm->vgm_akey_del, stat->gs_akeys);
d_tm_inc_counter(vgm->vgm_ev_del, stat->gs_recxs);
d_tm_inc_counter(vgm->vgm_sv_del, stat->gs_singvs);
}

gstat->gs_conts += stat->gs_conts;
Expand Down Expand Up @@ -1286,37 +1286,37 @@ vos_gc_metrics_init(struct vos_gc_metrics *vgm, const char *path, int tgt_id)
D_WARN("Failed to create 'duration' telemetry: " DF_RC "\n", DP_RC(rc));

/* GC container deletion */
rc = d_tm_add_metric(&vgm->vgm_cont_del, D_TM_STATS_GAUGE, "GC containers deleted", NULL,
rc = d_tm_add_metric(&vgm->vgm_cont_del, D_TM_COUNTER, "GC containers deleted", NULL,
"%s/%s/cont_del/tgt_%u", path, VOS_GC_DIR, tgt_id);
if (rc)
D_WARN("Failed to create 'cont_del' telemetry: " DF_RC "\n", DP_RC(rc));

/* GC object deletion */
rc = d_tm_add_metric(&vgm->vgm_obj_del, D_TM_STATS_GAUGE, "GC objects deleted", NULL,
rc = d_tm_add_metric(&vgm->vgm_obj_del, D_TM_COUNTER, "GC objects deleted", NULL,
"%s/%s/obj_del/tgt_%u", path, VOS_GC_DIR, tgt_id);
if (rc)
D_WARN("Failed to create 'obj_del' telemetry: " DF_RC "\n", DP_RC(rc));

/* GC dkey deletion */
rc = d_tm_add_metric(&vgm->vgm_dkey_del, D_TM_STATS_GAUGE, "GC dkeys deleted", NULL,
rc = d_tm_add_metric(&vgm->vgm_dkey_del, D_TM_COUNTER, "GC dkeys deleted", NULL,
"%s/%s/dkey_del/tgt_%u", path, VOS_GC_DIR, tgt_id);
if (rc)
D_WARN("Failed to create 'dkey_del' telemetry: " DF_RC "\n", DP_RC(rc));

/* GC akey deletion */
rc = d_tm_add_metric(&vgm->vgm_akey_del, D_TM_STATS_GAUGE, "GC akeys deleted", NULL,
rc = d_tm_add_metric(&vgm->vgm_akey_del, D_TM_COUNTER, "GC akeys deleted", NULL,
"%s/%s/akey_del/tgt_%u", path, VOS_GC_DIR, tgt_id);
if (rc)
D_WARN("Failed to create 'akey_del' telemetry: " DF_RC "\n", DP_RC(rc));

/* GC ev deletion */
rc = d_tm_add_metric(&vgm->vgm_ev_del, D_TM_STATS_GAUGE, "GC ev deleted", NULL,
rc = d_tm_add_metric(&vgm->vgm_ev_del, D_TM_COUNTER, "GC ev deleted", NULL,
"%s/%s/ev_del/tgt_%u", path, VOS_GC_DIR, tgt_id);
if (rc)
D_WARN("Failed to create 'ev_del' telemetry: " DF_RC "\n", DP_RC(rc));

/* GC sv deletion */
rc = d_tm_add_metric(&vgm->vgm_sv_del, D_TM_STATS_GAUGE, "GC sv deleted", NULL,
rc = d_tm_add_metric(&vgm->vgm_sv_del, D_TM_COUNTER, "GC sv deleted", NULL,
"%s/%s/sv_del/tgt_%u", path, VOS_GC_DIR, tgt_id);
if (rc)
D_WARN("Failed to create 'sv_del' telemetry: " DF_RC "\n", DP_RC(rc));
Expand Down
1 change: 1 addition & 0 deletions src/vos/vos_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ struct vos_agg_metrics {
struct d_tm_node_t *vam_del_ev; /* Deleted EV records */
struct d_tm_node_t *vam_merge_recs; /* Total merged EV records */
struct d_tm_node_t *vam_merge_size; /* Total merged size */
struct d_tm_node_t *vam_fail_count; /* Aggregation failed */
};

struct vos_gc_metrics {
Expand Down

0 comments on commit ea3ae8e

Please sign in to comment.