From 582a24e50f02ec1ab95b7cc7290bf2436c535dc7 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Sat, 31 Aug 2024 09:27:36 +0800 Subject: [PATCH] DAOS-16483 vos: handle empty DTX when vos_tx_end - b26 It is possible that the DTX modified nothing when stop currnet backend transaction. Under such case, we may not generate persistent DTX entry. Then need to bypass such case before checking on-disk DTX entry status. The patch makes some clean and removed redundant metrics for committed DTX entries. Enhance vos_dtx_deregister_record() to handle GC case. Signed-off-by: Fan Yong --- src/dtx/dtx_common.c | 2 +- src/tests/ftest/util/telemetry_utils.py | 1 - src/vos/vos_common.c | 31 ++++----- src/vos/vos_dtx.c | 86 ++++++++++++++++++++----- src/vos/vos_tls.h | 1 - 5 files changed, 83 insertions(+), 38 deletions(-) diff --git a/src/dtx/dtx_common.c b/src/dtx/dtx_common.c index 353bd880009..ff4f2dfe4ef 100644 --- a/src/dtx/dtx_common.c +++ b/src/dtx/dtx_common.c @@ -1341,7 +1341,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul * it persistently. Otherwise, the subsequent DTX resync may not find it as * to regard it as failed transaction and abort it. */ - if (result == 0 && !dth->dth_active && !dth->dth_prepared && + if (result == 0 && !dth->dth_active && !dth->dth_prepared && !dth->dth_solo && (dth->dth_dist || dth->dth_modification_cnt > 0)) { result = vos_dtx_attach(dth, true, dth->dth_ent != NULL ? true : false); if (unlikely(result < 0)) { diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index aec831b3b8a..db424b6de68 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -421,7 +421,6 @@ class TelemetryUtils(): ENGINE_NVME_CRIT_WARN_METRICS +\ ENGINE_NVME_INTEL_VENDOR_METRICS ENGINE_MEM_USAGE_METRICS = [ - "engine_mem_vos_dtx_cmt_ent_48", "engine_mem_vos_vos_obj_360", "engine_mem_vos_vos_lru_size", "engine_mem_dtx_dtx_leader_handle_360"] diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index fb8461e2931..93bf1757f10 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -405,16 +405,24 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, } } else if (dae != NULL) { if (dth->dth_solo) { - if (err == 0 && cont->vc_solo_dtx_epoch < dth->dth_epoch) + if (err == 0 && dae->dae_committing && + cont->vc_solo_dtx_epoch < dth->dth_epoch) cont->vc_solo_dtx_epoch = dth->dth_epoch; vos_dtx_post_handle(cont, &dae, &dce, 1, false, err != 0); } else { D_ASSERT(dce == NULL); - if (err == 0) { - dae->dae_prepared = 1; + if (err == 0 && dth->dth_active) { + D_ASSERTF(!UMOFF_IS_NULL(dae->dae_df_off), + "Non-prepared DTX " DF_DTI "\n", + DP_DTI(&dth->dth_xid)); + dae_df = umem_off2ptr(umm, dae->dae_df_off); - D_ASSERT(!(dae_df->dae_flags & DTE_INVALID)); + D_ASSERTF(!(dae_df->dae_flags & DTE_INVALID), + "Invalid status for DTX " DF_DTI "\n", + DP_DTI(&dth->dth_xid)); + + dae->dae_prepared = 1; } } } @@ -563,13 +571,6 @@ vos_tls_init(int tags, int xs_id, int tgt_id) } } - rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE, - "Number of committed entries kept around for reply" - " reconstruction", "entries", - "io/dtx/committed/tgt_%u", tgt_id); - if (rc) - D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", - DP_RC(rc)); if (tgt_id >= 0) { rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE, "Number of committed entries kept around for reply" @@ -579,14 +580,6 @@ vos_tls_init(int tags, int xs_id, int tgt_id) D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", DP_RC(rc)); - rc = d_tm_add_metric(&tls->vtl_dtx_cmt_ent_cnt, D_TM_GAUGE, - "Number of committed entries", "entry", - "mem/vos/dtx_cmt_ent_%u/tgt_%u", - sizeof(struct vos_dtx_cmt_ent), tgt_id); - if (rc) - D_WARN("Failed to create committed cnt: "DF_RC"\n", - DP_RC(rc)); - rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE, "Number of cached vos object", "entry", "mem/vos/vos_obj_%u/tgt_%u", diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 0e70133629f..1c60f781507 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -769,7 +769,6 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t daos_epoch_t cmt_time, struct vos_dtx_cmt_ent **dce_p, struct vos_dtx_act_ent **dae_p, bool *rm_cos, bool *fatal) { - struct vos_tls *tls = vos_tls_get(false); struct vos_dtx_act_ent *dae = NULL; struct vos_dtx_cmt_ent *dce = NULL; d_iov_t kiov; @@ -834,7 +833,6 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t if (dce == NULL) D_GOTO(out, rc = -DER_NOMEM); - d_tm_inc_gauge(tls->vtl_dtx_cmt_ent_cnt, 1); DCE_CMT_TIME(dce) = cmt_time; if (dae != NULL) { DCE_XID(dce) = DAE_XID(dae); @@ -1535,10 +1533,14 @@ int vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, uint32_t entry, daos_epoch_t epoch, umem_off_t record) { + struct dtx_handle *dth = vos_dth_get(false); struct vos_container *cont; struct vos_dtx_act_ent *dae; + struct vos_dtx_act_ent_df *dae_df; + umem_off_t *rec_df; bool found; int count; + int rc; int i; if (!vos_dtx_is_normal_entry(entry)) @@ -1567,10 +1569,54 @@ vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, * by another prepared (but non-committed) DTX, then do not allow current transaction * to modify it. Because if current transaction is aborted or failed for some reason, * there is no efficient way to recover such former non-committed DTX. + * + * If dth is NULL, then it is for GC. Under such case, deregister the record anyway. */ - if (dae->dae_dbd != NULL) - return dtx_inprogress(dae, vos_dth_get(cont->vc_pool->vp_sysdb), false, false, 8); + if (dae->dae_dbd != NULL) { + if (dth != NULL) + return dtx_inprogress(dae, dth, false, false, 8); + + dae_df = umem_off2ptr(umm, dae->dae_df_off); + D_ASSERT(!(dae_df->dae_flags & DTE_INVALID)); + if (dae_df->dae_rec_cnt > DTX_INLINE_REC_CNT) + count = DTX_INLINE_REC_CNT; + else + count = dae_df->dae_rec_cnt; + + rec_df = dae_df->dae_rec_inline; + for (i = 0; i < count; i++) { + if (record == umem_off2offset(rec_df[i])) { + rc = umem_tx_add_ptr(umm, &rec_df[i], sizeof(rec_df[i])); + if (rc != 0) + return rc; + + rec_df[i] = UMOFF_NULL; + goto cache; + } + } + + rec_df = umem_off2ptr(umm, dae_df->dae_rec_off); + if (rec_df == NULL) + /* If non-exist on disk, then must be non-exist in cache. */ + return 0; + + for (i = 0; i < dae_df->dae_rec_cnt - DTX_INLINE_REC_CNT; i++) { + if (record == umem_off2offset(rec_df[i])) { + rc = umem_tx_add_ptr(umm, &rec_df[i], sizeof(rec_df[i])); + if (rc != 0) + return rc; + + rec_df[i] = UMOFF_NULL; + goto cache; + } + } + + /* If non-exist on disk, then must be non-exist in cache. */ + return 0; + } + +cache: if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT) count = DTX_INLINE_REC_CNT; else @@ -2116,14 +2162,18 @@ vos_dtx_post_handle(struct vos_container *cont, if (!abort && dces != NULL) { struct vos_tls *tls = vos_tls_get(false); + int j = 0; D_ASSERT(cont->vc_pool->vp_sysdb == false); for (i = 0; i < count; i++) { - if (dces[i] != NULL) { - cont->vc_dtx_committed_count++; - cont->vc_pool->vp_dtx_committed_count++; - d_tm_inc_gauge(tls->vtl_committed, 1); - } + if (dces[i] != NULL) + j++; + } + + if (j > 0) { + cont->vc_dtx_committed_count += j; + cont->vc_pool->vp_dtx_committed_count += j; + d_tm_inc_gauge(tls->vtl_committed, j); } } @@ -2439,6 +2489,7 @@ vos_dtx_aggregate(daos_handle_t coh) uint64_t epoch; umem_off_t dbd_off; umem_off_t next = UMOFF_NULL; + int count = 0; int rc; int i; @@ -2481,13 +2532,10 @@ vos_dtx_aggregate(daos_handle_t coh) UMOFF_P(dbd_off), DP_RC(rc)); goto out; } - - cont->vc_dtx_committed_count--; - cont->vc_pool->vp_dtx_committed_count--; - d_tm_dec_gauge(tls->vtl_committed, 1); - d_tm_dec_gauge(tls->vtl_dtx_cmt_ent_cnt, 1); } + count = dbd->dbd_count; + if (epoch != cont_df->cd_newest_aggregated) { rc = umem_tx_add_ptr(umm, &cont_df->cd_newest_aggregated, sizeof(cont_df->cd_newest_aggregated)); @@ -2545,8 +2593,14 @@ vos_dtx_aggregate(daos_handle_t coh) out: rc = umem_tx_end(umm, rc); - if (rc == 0 && cont->vc_cmt_dtx_reindex_pos == dbd_off) - cont->vc_cmt_dtx_reindex_pos = next; + if (rc == 0) { + if (cont->vc_cmt_dtx_reindex_pos == dbd_off) + cont->vc_cmt_dtx_reindex_pos = next; + + cont->vc_dtx_committed_count -= count; + cont->vc_pool->vp_dtx_committed_count -= count; + d_tm_dec_gauge(tls->vtl_committed, count); + } DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, "Release DTX committed blob %p (" UMOFF_PF ") for cont " DF_UUID, dbd, diff --git a/src/vos/vos_tls.h b/src/vos/vos_tls.h index 981cce10be5..2fc328457d0 100644 --- a/src/vos/vos_tls.h +++ b/src/vos/vos_tls.h @@ -64,7 +64,6 @@ struct vos_tls { }; struct d_tm_node_t *vtl_committed; struct d_tm_node_t *vtl_obj_cnt; - struct d_tm_node_t *vtl_dtx_cmt_ent_cnt; struct d_tm_node_t *vtl_lru_alloc_size; };