diff --git a/site_scons/site_tools/go_builder.py b/site_scons/site_tools/go_builder.py index b3706a8976d..51fedbe88cf 100644 --- a/site_scons/site_tools/go_builder.py +++ b/site_scons/site_tools/go_builder.py @@ -70,6 +70,8 @@ def _check_go_version(context): # go version go1.2.3 Linux/amd64 go_version = out.split(' ')[2].replace('go', '') + if '-' in go_version: + go_version = go_version.split('-')[0] if len([x for x, y in zip(go_version.split('.'), MIN_GO_VERSION.split('.')) if int(x) < int(y)]) > 0: diff --git a/src/vos/vos_aggregate.c b/src/vos/vos_aggregate.c index a50661d748f..ccad7b5b26b 100644 --- a/src/vos/vos_aggregate.c +++ b/src/vos/vos_aggregate.c @@ -2355,7 +2355,12 @@ vos_aggregate_pre_cb(daos_handle_t ih, vos_iter_entry_t *entry, } if (rc < 0) { + struct vos_agg_metrics *vam = agg_cont2metrics(cont); + D_ERROR("VOS aggregation failed: "DF_RC"\n", DP_RC(rc)); + if (vam && vam->vam_fail_count) + d_tm_inc_counter(vam->vam_fail_count, 1); + return rc; } @@ -2428,7 +2433,11 @@ vos_aggregate_post_cb(daos_handle_t ih, vos_iter_entry_t *entry, inc_agg_counter(agg_param, type, AGG_OP_DEL); rc = 0; } else if (rc != 0) { + struct vos_agg_metrics *vam = agg_cont2metrics(cont); + D_ERROR("VOS aggregation failed: %d\n", rc); + if (vam && vam->vam_fail_count) + d_tm_inc_counter(vam->vam_fail_count, 1); /* * -DER_TX_BUSY error indicates current ilog aggregation @@ -2439,8 +2448,6 @@ vos_aggregate_post_cb(daos_handle_t ih, vos_iter_entry_t *entry, * orphan the current entry due to incarnation log semantics. */ if (rc == -DER_TX_BUSY) { - struct vos_agg_metrics *vam = agg_cont2metrics(cont); - agg_param->ap_in_progress = 1; rc = 0; switch (type) { @@ -2733,6 +2740,13 @@ vos_aggregate(daos_handle_t coh, daos_epoch_range_t *epr, free_agg_data: D_FREE(ad); + if (rc < 0) { + struct vos_agg_metrics *vam = agg_cont2metrics(cont); + + if (vam && vam->vam_fail_count) + d_tm_inc_counter(vam->vam_fail_count, 1); + } + return rc; } diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index d4f8e31de8a..8d53124c30c 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -759,6 +759,12 @@ vos_metrics_alloc(const char *path, int tgt_id) if (rc) D_WARN("Failed to create 'merged_size' telemetry : "DF_RC"\n", DP_RC(rc)); + /* VOS aggregation failed */ + rc = d_tm_add_metric(&vam->vam_fail_count, D_TM_COUNTER, "aggregation failures", NULL, + "%s/%s/fail_count/tgt_%u", path, VOS_AGG_DIR, tgt_id); + if (rc) + DL_WARN(rc, "Failed to create 'fail_count' telemetry"); + /* Metrics related to VOS checkpointing */ vos_chkpt_metrics_init(&vp_metrics->vp_chkpt_metrics, path, tgt_id); diff --git a/src/vos/vos_gc.c b/src/vos/vos_gc.c index 16779819bd5..0937b883f33 100644 --- a/src/vos/vos_gc.c +++ b/src/vos/vos_gc.c @@ -684,12 +684,12 @@ gc_update_stats(struct vos_pool *pool) if (pool->vp_metrics != NULL) { vgm = &pool->vp_metrics->vp_gc_metrics; - d_tm_set_gauge(vgm->vgm_cont_del, stat->gs_conts); - d_tm_set_gauge(vgm->vgm_obj_del, stat->gs_objs); - d_tm_set_gauge(vgm->vgm_dkey_del, stat->gs_dkeys); - d_tm_set_gauge(vgm->vgm_akey_del, stat->gs_akeys); - d_tm_set_gauge(vgm->vgm_ev_del, stat->gs_recxs); - d_tm_set_gauge(vgm->vgm_sv_del, stat->gs_singvs); + d_tm_inc_counter(vgm->vgm_cont_del, stat->gs_conts); + d_tm_inc_counter(vgm->vgm_obj_del, stat->gs_objs); + d_tm_inc_counter(vgm->vgm_dkey_del, stat->gs_dkeys); + d_tm_inc_counter(vgm->vgm_akey_del, stat->gs_akeys); + d_tm_inc_counter(vgm->vgm_ev_del, stat->gs_recxs); + d_tm_inc_counter(vgm->vgm_sv_del, stat->gs_singvs); } gstat->gs_conts += stat->gs_conts; @@ -1286,37 +1286,37 @@ vos_gc_metrics_init(struct vos_gc_metrics *vgm, const char *path, int tgt_id) D_WARN("Failed to create 'duration' telemetry: " DF_RC "\n", DP_RC(rc)); /* GC container deletion */ - rc = d_tm_add_metric(&vgm->vgm_cont_del, D_TM_STATS_GAUGE, "GC containers deleted", NULL, + rc = d_tm_add_metric(&vgm->vgm_cont_del, D_TM_COUNTER, "GC containers deleted", NULL, "%s/%s/cont_del/tgt_%u", path, VOS_GC_DIR, tgt_id); if (rc) D_WARN("Failed to create 'cont_del' telemetry: " DF_RC "\n", DP_RC(rc)); /* GC object deletion */ - rc = d_tm_add_metric(&vgm->vgm_obj_del, D_TM_STATS_GAUGE, "GC objects deleted", NULL, + rc = d_tm_add_metric(&vgm->vgm_obj_del, D_TM_COUNTER, "GC objects deleted", NULL, "%s/%s/obj_del/tgt_%u", path, VOS_GC_DIR, tgt_id); if (rc) D_WARN("Failed to create 'obj_del' telemetry: " DF_RC "\n", DP_RC(rc)); /* GC dkey deletion */ - rc = d_tm_add_metric(&vgm->vgm_dkey_del, D_TM_STATS_GAUGE, "GC dkeys deleted", NULL, + rc = d_tm_add_metric(&vgm->vgm_dkey_del, D_TM_COUNTER, "GC dkeys deleted", NULL, "%s/%s/dkey_del/tgt_%u", path, VOS_GC_DIR, tgt_id); if (rc) D_WARN("Failed to create 'dkey_del' telemetry: " DF_RC "\n", DP_RC(rc)); /* GC akey deletion */ - rc = d_tm_add_metric(&vgm->vgm_akey_del, D_TM_STATS_GAUGE, "GC akeys deleted", NULL, + rc = d_tm_add_metric(&vgm->vgm_akey_del, D_TM_COUNTER, "GC akeys deleted", NULL, "%s/%s/akey_del/tgt_%u", path, VOS_GC_DIR, tgt_id); if (rc) D_WARN("Failed to create 'akey_del' telemetry: " DF_RC "\n", DP_RC(rc)); /* GC ev deletion */ - rc = d_tm_add_metric(&vgm->vgm_ev_del, D_TM_STATS_GAUGE, "GC ev deleted", NULL, + rc = d_tm_add_metric(&vgm->vgm_ev_del, D_TM_COUNTER, "GC ev deleted", NULL, "%s/%s/ev_del/tgt_%u", path, VOS_GC_DIR, tgt_id); if (rc) D_WARN("Failed to create 'ev_del' telemetry: " DF_RC "\n", DP_RC(rc)); /* GC sv deletion */ - rc = d_tm_add_metric(&vgm->vgm_sv_del, D_TM_STATS_GAUGE, "GC sv deleted", NULL, + rc = d_tm_add_metric(&vgm->vgm_sv_del, D_TM_COUNTER, "GC sv deleted", NULL, "%s/%s/sv_del/tgt_%u", path, VOS_GC_DIR, tgt_id); if (rc) D_WARN("Failed to create 'sv_del' telemetry: " DF_RC "\n", DP_RC(rc)); diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 400ebc0cd7e..20aaa565077 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -186,6 +186,7 @@ struct vos_agg_metrics { struct d_tm_node_t *vam_del_ev; /* Deleted EV records */ struct d_tm_node_t *vam_merge_recs; /* Total merged EV records */ struct d_tm_node_t *vam_merge_size; /* Total merged size */ + struct d_tm_node_t *vam_fail_count; /* Aggregation failed */ }; struct vos_gc_metrics {