diff --git a/src/vos/tests/vts_gc.c b/src/vos/tests/vts_gc.c index 21c37219e7a..cdc08369cb6 100644 --- a/src/vos/tests/vts_gc.c +++ b/src/vos/tests/vts_gc.c @@ -298,7 +298,7 @@ gc_key_test(void **state) int rc; rc = gc_key_run(args); - assert_int_equal(rc, 0); + assert_rc_equal(rc, 0); } static int diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index 45252f9da0e..d4f8e31de8a 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -774,6 +774,21 @@ vos_metrics_alloc(const char *path, int tgt_id) if (rc) D_WARN("Failed to create 'nvme_used' telemetry : "DF_RC"\n", DP_RC(rc)); + /* VOS space SCM total metric */ + rc = d_tm_add_metric(&vsm->vsm_scm_total, D_TM_GAUGE, "SCM space total", "bytes", + "%s/%s/scm_total/tgt_%u", path, VOS_SPACE_DIR, tgt_id); + if (rc) + D_WARN("Failed to create 'scm_total' telemetry : " DF_RC "\n", DP_RC(rc)); + + /* VOS space NVME total metric */ + rc = d_tm_add_metric(&vsm->vsm_nvme_total, D_TM_GAUGE, "NVME space total", "bytes", + "%s/%s/nvme_total/tgt_%u", path, VOS_SPACE_DIR, tgt_id); + if (rc) + D_WARN("Failed to create 'nvme_total' telemetry : " DF_RC "\n", DP_RC(rc)); + + /** garbage collection metrics */ + vos_gc_metrics_init(&vp_metrics->vp_gc_metrics, path, tgt_id); + /* Initialize the vos_space_metrics timeout counter */ vsm->vsm_last_update_ts = 0; diff --git a/src/vos/vos_gc.c b/src/vos/vos_gc.c index 136166f93da..16779819bd5 100644 --- a/src/vos/vos_gc.c +++ b/src/vos/vos_gc.c @@ -675,6 +675,33 @@ gc_get_container(struct vos_pool *pool) return cont; } +static void +gc_update_stats(struct vos_pool *pool) +{ + struct vos_gc_stat *stat = &pool->vp_gc_stat; + struct vos_gc_stat *gstat = &pool->vp_gc_stat_global; + struct vos_gc_metrics *vgm; + + if (pool->vp_metrics != NULL) { + vgm = &pool->vp_metrics->vp_gc_metrics; + d_tm_set_gauge(vgm->vgm_cont_del, stat->gs_conts); + d_tm_set_gauge(vgm->vgm_obj_del, stat->gs_objs); + d_tm_set_gauge(vgm->vgm_dkey_del, stat->gs_dkeys); + d_tm_set_gauge(vgm->vgm_akey_del, stat->gs_akeys); + d_tm_set_gauge(vgm->vgm_ev_del, stat->gs_recxs); + d_tm_set_gauge(vgm->vgm_sv_del, stat->gs_singvs); + } + + gstat->gs_conts += stat->gs_conts; + gstat->gs_objs += stat->gs_objs; + gstat->gs_dkeys += stat->gs_dkeys; + gstat->gs_akeys += stat->gs_akeys; + gstat->gs_recxs += stat->gs_recxs; + gstat->gs_singvs += stat->gs_singvs; + + memset(stat, 0, sizeof(*stat)); +} + /** * Run garbage collector for a pool, it returns if all @credits are consumed * or there is nothing to be reclaimed. @@ -689,7 +716,7 @@ gc_reclaim_pool(struct vos_pool *pool, int *credits, bool *empty_ret) if (pool->vp_dying) { *empty_ret = true; - return 0; + D_GOTO(done, rc = 0); } /* take an extra ref to avoid concurrent container destroy/free */ @@ -702,7 +729,8 @@ gc_reclaim_pool(struct vos_pool *pool, int *credits, bool *empty_ret) DP_UUID(pool->vp_id), DP_RC(rc)); if (cont != NULL) vos_cont_decref(cont); - return rc; + *empty_ret = false; + goto done; } *empty_ret = false; @@ -799,6 +827,9 @@ gc_reclaim_pool(struct vos_pool *pool, int *credits, bool *empty_ret) if (cont != NULL) vos_cont_decref(cont); +done: + gc_update_stats(pool); + return rc; } @@ -934,7 +965,7 @@ gc_have_pool(struct vos_pool *pool) static void gc_log_pool(struct vos_pool *pool) { - struct vos_gc_stat *stat = &pool->vp_gc_stat; + struct vos_gc_stat *stat = &pool->vp_gc_stat_global; D_DEBUG(DB_TRACE, "Pool="DF_UUID", GC reclaimed:\n" @@ -1124,6 +1155,9 @@ int vos_gc_pool(daos_handle_t poh, int credits, int (*yield_func)(void *arg), void *yield_arg) { + struct d_tm_node_t *duration = NULL; + struct d_tm_node_t *tight = NULL; + struct d_tm_node_t *slack = NULL; struct vos_pool *pool = vos_hdl2pool(poh); struct vos_tls *tls = vos_tls_get(pool->vp_sysdb); struct vos_gc_param param; @@ -1149,24 +1183,44 @@ vos_gc_pool(daos_handle_t poh, int credits, int (*yield_func)(void *arg), tls->vtl_gc_running++; + if (pool->vp_metrics != NULL) { + duration = pool->vp_metrics->vp_gc_metrics.vgm_duration; + slack = pool->vp_metrics->vp_gc_metrics.vgm_slack_cnt; + tight = pool->vp_metrics->vp_gc_metrics.vgm_tight_cnt; + } + while (1) { int creds = param.vgc_credits; + d_tm_mark_duration_start(duration, D_TM_CLOCK_THREAD_CPUTIME); + if (creds == GC_CREDS_TIGHT) + d_tm_inc_counter(tight, 1); + else + d_tm_inc_counter(slack, 1); + if (credits > 0 && (credits - total) < creds) creds = credits - total; total += creds; rc = vos_gc_pool_tight(poh, &creds); + if (rc) { D_ERROR("GC pool failed: " DF_RC "\n", DP_RC(rc)); + d_tm_mark_duration_end(duration); break; } total -= creds; /* subtract the remainded credits */ - if (creds != 0) + if (creds != 0) { + d_tm_mark_duration_end(duration); break; /* reclaimed everything */ + } - if (credits > 0 && total >= credits) + if (credits > 0 && total >= credits) { + d_tm_mark_duration_end(duration); break; /* consumed all credits */ + } + + d_tm_mark_duration_end(duration); if (vos_gc_yield(¶m)) { D_DEBUG(DB_TRACE, "GC pool run aborted\n"); @@ -1217,3 +1271,65 @@ vos_flush_pool(daos_handle_t poh, uint32_t nr_flush, uint32_t *nr_flushed) return rc; } + +#define VOS_GC_DIR "vos_gc" +void +vos_gc_metrics_init(struct vos_gc_metrics *vgm, const char *path, int tgt_id) +{ + int rc; + + /* GC slice duration */ + rc = d_tm_add_metric(&vgm->vgm_duration, D_TM_DURATION | D_TM_CLOCK_THREAD_CPUTIME, + "GC slice duration", NULL, "%s/%s/duration/tgt_%u", path, VOS_GC_DIR, + tgt_id); + if (rc) + D_WARN("Failed to create 'duration' telemetry: " DF_RC "\n", DP_RC(rc)); + + /* GC container deletion */ + rc = d_tm_add_metric(&vgm->vgm_cont_del, D_TM_STATS_GAUGE, "GC containers deleted", NULL, + "%s/%s/cont_del/tgt_%u", path, VOS_GC_DIR, tgt_id); + if (rc) + D_WARN("Failed to create 'cont_del' telemetry: " DF_RC "\n", DP_RC(rc)); + + /* GC object deletion */ + rc = d_tm_add_metric(&vgm->vgm_obj_del, D_TM_STATS_GAUGE, "GC objects deleted", NULL, + "%s/%s/obj_del/tgt_%u", path, VOS_GC_DIR, tgt_id); + if (rc) + D_WARN("Failed to create 'obj_del' telemetry: " DF_RC "\n", DP_RC(rc)); + + /* GC dkey deletion */ + rc = d_tm_add_metric(&vgm->vgm_dkey_del, D_TM_STATS_GAUGE, "GC dkeys deleted", NULL, + "%s/%s/dkey_del/tgt_%u", path, VOS_GC_DIR, tgt_id); + if (rc) + D_WARN("Failed to create 'dkey_del' telemetry: " DF_RC "\n", DP_RC(rc)); + + /* GC akey deletion */ + rc = d_tm_add_metric(&vgm->vgm_akey_del, D_TM_STATS_GAUGE, "GC akeys deleted", NULL, + "%s/%s/akey_del/tgt_%u", path, VOS_GC_DIR, tgt_id); + if (rc) + D_WARN("Failed to create 'akey_del' telemetry: " DF_RC "\n", DP_RC(rc)); + + /* GC ev deletion */ + rc = d_tm_add_metric(&vgm->vgm_ev_del, D_TM_STATS_GAUGE, "GC ev deleted", NULL, + "%s/%s/ev_del/tgt_%u", path, VOS_GC_DIR, tgt_id); + if (rc) + D_WARN("Failed to create 'ev_del' telemetry: " DF_RC "\n", DP_RC(rc)); + + /* GC sv deletion */ + rc = d_tm_add_metric(&vgm->vgm_sv_del, D_TM_STATS_GAUGE, "GC sv deleted", NULL, + "%s/%s/sv_del/tgt_%u", path, VOS_GC_DIR, tgt_id); + if (rc) + D_WARN("Failed to create 'sv_del' telemetry: " DF_RC "\n", DP_RC(rc)); + + /* GC slack mode runs */ + rc = d_tm_add_metric(&vgm->vgm_slack_cnt, D_TM_COUNTER, "GC slack mode count", NULL, + "%s/%s/slack_cnt/tgt_%u", path, VOS_GC_DIR, tgt_id); + if (rc) + D_WARN("Failed to create 'slack_cnt' telemetry: " DF_RC "\n", DP_RC(rc)); + + /* GC tight mode runs */ + rc = d_tm_add_metric(&vgm->vgm_tight_cnt, D_TM_COUNTER, "GC tight mode count", NULL, + "%s/%s/tight_cnt/tgt_%u", path, VOS_GC_DIR, tgt_id); + if (rc) + D_WARN("Failed to create 'tight_cnt' telemetry: " DF_RC "\n", DP_RC(rc)); +} diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 8c04f3f4cd6..400ebc0cd7e 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -188,6 +188,18 @@ struct vos_agg_metrics { struct d_tm_node_t *vam_merge_size; /* Total merged size */ }; +struct vos_gc_metrics { + struct d_tm_node_t *vgm_duration; /* Duration of each gc scan */ + struct d_tm_node_t *vgm_cont_del; /* containers reclaimed */ + struct d_tm_node_t *vgm_obj_del; /* objects reclaimed */ + struct d_tm_node_t *vgm_dkey_del; /* dkeys reclaimed */ + struct d_tm_node_t *vgm_akey_del; /* akeys reclaimed */ + struct d_tm_node_t *vgm_ev_del; /* EV records reclaimed */ + struct d_tm_node_t *vgm_sv_del; /* SV records reclaimed */ + struct d_tm_node_t *vgm_slack_cnt; /* Slack mode count */ + struct d_tm_node_t *vgm_tight_cnt; /* Tight mode count */ +}; + /* * VOS Pool metrics for checkpoint activity. */ @@ -200,10 +212,14 @@ struct vos_chkpt_metrics { }; void vos_chkpt_metrics_init(struct vos_chkpt_metrics *vc_metrics, const char *path, int tgt_id); +void +vos_gc_metrics_init(struct vos_gc_metrics *vc_metrics, const char *path, int tgt_id); struct vos_space_metrics { struct d_tm_node_t *vsm_scm_used; /* SCM space used */ struct d_tm_node_t *vsm_nvme_used; /* NVMe space used */ + struct d_tm_node_t *vsm_scm_total; /* SCM space total */ + struct d_tm_node_t *vsm_nvme_total; /* NVMe space total */ uint64_t vsm_last_update_ts; /* Timeout counter */ }; @@ -219,6 +235,7 @@ struct vos_rh_metrics { struct vos_pool_metrics { void *vp_vea_metrics; struct vos_agg_metrics vp_agg_metrics; + struct vos_gc_metrics vp_gc_metrics; struct vos_space_metrics vp_space_metrics; struct vos_chkpt_metrics vp_chkpt_metrics; struct vos_rh_metrics vp_rh_metrics; @@ -255,6 +272,8 @@ struct vos_pool { /** btr handle for the container table */ daos_handle_t vp_cont_th; /** GC statistics of this pool */ + struct vos_gc_stat vp_gc_stat_global; + /** GC per slice statistics of this pool */ struct vos_gc_stat vp_gc_stat; /** link chain on vos_tls::vtl_gc_pools */ d_list_t vp_gc_link; diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index c0dbfc950e4..37690ae3a9a 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -1504,7 +1504,7 @@ vos_pool_query(daos_handle_t poh, vos_pool_info_t *pinfo) D_ASSERT(pinfo != NULL); pinfo->pif_cont_nr = pool_df->pd_cont_nr; - pinfo->pif_gc_stat = pool->vp_gc_stat; + pinfo->pif_gc_stat = pool->vp_gc_stat_global; rc = vos_space_query(pool, &pinfo->pif_space, true); if (rc) @@ -1562,7 +1562,7 @@ vos_pool_ctl(daos_handle_t poh, enum vos_pool_opc opc, void *param) default: return -DER_NOSYS; case VOS_PO_CTL_RESET_GC: - memset(&pool->vp_gc_stat, 0, sizeof(pool->vp_gc_stat)); + memset(&pool->vp_gc_stat_global, 0, sizeof(pool->vp_gc_stat_global)); break; case VOS_PO_CTL_SET_POLICY: if (param == NULL) diff --git a/src/vos/vos_space.c b/src/vos/vos_space.c index 28b7fd694b7..811058c4f8d 100644 --- a/src/vos/vos_space.c +++ b/src/vos/vos_space.c @@ -385,6 +385,12 @@ vos_space_update_metrics(struct vos_pool *pool) if (!vpm) return; + if (vpm->vp_space_metrics.vsm_last_update_ts == 0) { + /* Set the constant values */ + d_tm_set_gauge(vpm->vp_space_metrics.vsm_scm_total, pool->vp_pool_df->pd_scm_sz); + d_tm_set_gauge(vpm->vp_space_metrics.vsm_nvme_total, pool->vp_pool_df->pd_nvme_sz); + } + now = daos_gettime_coarse(); if (now < vpm->vp_space_metrics.vsm_last_update_ts + VOS_SPACE_METRICS_INTV) { return;