Skip to content

Commit

Permalink
DAOS-16483 vos: handle empty DTX when vos_tx_end - b26 (#15055)
Browse files Browse the repository at this point in the history
It is possible that the DTX modified nothing when stop currnet backend
transaction. Under such case, we may not generate persistent DTX entry.
Then need to bypass such case before checking on-disk DTX entry status.

The patch makes some clean and removed redundant metrics for committed
DTX entries.

Enhance vos_dtx_deregister_record() to handle GC case.

Signed-off-by: Fan Yong <[email protected]>
  • Loading branch information
Nasf-Fan authored Sep 5, 2024
1 parent 35334aa commit 906f0a4
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 38 deletions.
2 changes: 1 addition & 1 deletion src/dtx/dtx_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1341,7 +1341,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul
* it persistently. Otherwise, the subsequent DTX resync may not find it as
* to regard it as failed transaction and abort it.
*/
if (result == 0 && !dth->dth_active && !dth->dth_prepared &&
if (result == 0 && !dth->dth_active && !dth->dth_prepared && !dth->dth_solo &&
(dth->dth_dist || dth->dth_modification_cnt > 0)) {
result = vos_dtx_attach(dth, true, dth->dth_ent != NULL ? true : false);
if (unlikely(result < 0)) {
Expand Down
1 change: 0 additions & 1 deletion src/tests/ftest/util/telemetry_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,6 @@ class TelemetryUtils():
ENGINE_NVME_CRIT_WARN_METRICS +\
ENGINE_NVME_INTEL_VENDOR_METRICS
ENGINE_MEM_USAGE_METRICS = [
"engine_mem_vos_dtx_cmt_ent_48",
"engine_mem_vos_vos_obj_360",
"engine_mem_vos_vos_lru_size",
"engine_mem_dtx_dtx_leader_handle_360"]
Expand Down
31 changes: 12 additions & 19 deletions src/vos/vos_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,16 +405,24 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in,
}
} else if (dae != NULL) {
if (dth->dth_solo) {
if (err == 0 && cont->vc_solo_dtx_epoch < dth->dth_epoch)
if (err == 0 && dae->dae_committing &&
cont->vc_solo_dtx_epoch < dth->dth_epoch)
cont->vc_solo_dtx_epoch = dth->dth_epoch;

vos_dtx_post_handle(cont, &dae, &dce, 1, false, err != 0);
} else {
D_ASSERT(dce == NULL);
if (err == 0) {
dae->dae_prepared = 1;
if (err == 0 && dth->dth_active) {
D_ASSERTF(!UMOFF_IS_NULL(dae->dae_df_off),
"Non-prepared DTX " DF_DTI "\n",
DP_DTI(&dth->dth_xid));

dae_df = umem_off2ptr(umm, dae->dae_df_off);
D_ASSERT(!(dae_df->dae_flags & DTE_INVALID));
D_ASSERTF(!(dae_df->dae_flags & DTE_INVALID),
"Invalid status for DTX " DF_DTI "\n",
DP_DTI(&dth->dth_xid));

dae->dae_prepared = 1;
}
}
}
Expand Down Expand Up @@ -563,13 +571,6 @@ vos_tls_init(int tags, int xs_id, int tgt_id)
}
}

rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE,
"Number of committed entries kept around for reply"
" reconstruction", "entries",
"io/dtx/committed/tgt_%u", tgt_id);
if (rc)
D_WARN("Failed to create committed cnt sensor: "DF_RC"\n",
DP_RC(rc));
if (tgt_id >= 0) {
rc = d_tm_add_metric(&tls->vtl_committed, D_TM_STATS_GAUGE,
"Number of committed entries kept around for reply"
Expand All @@ -579,14 +580,6 @@ vos_tls_init(int tags, int xs_id, int tgt_id)
D_WARN("Failed to create committed cnt sensor: "DF_RC"\n",
DP_RC(rc));

rc = d_tm_add_metric(&tls->vtl_dtx_cmt_ent_cnt, D_TM_GAUGE,
"Number of committed entries", "entry",
"mem/vos/dtx_cmt_ent_%u/tgt_%u",
sizeof(struct vos_dtx_cmt_ent), tgt_id);
if (rc)
D_WARN("Failed to create committed cnt: "DF_RC"\n",
DP_RC(rc));

rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE,
"Number of cached vos object", "entry",
"mem/vos/vos_obj_%u/tgt_%u",
Expand Down
86 changes: 70 additions & 16 deletions src/vos/vos_dtx.c
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,6 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t
daos_epoch_t cmt_time, struct vos_dtx_cmt_ent **dce_p,
struct vos_dtx_act_ent **dae_p, bool *rm_cos, bool *fatal)
{
struct vos_tls *tls = vos_tls_get(false);
struct vos_dtx_act_ent *dae = NULL;
struct vos_dtx_cmt_ent *dce = NULL;
d_iov_t kiov;
Expand Down Expand Up @@ -834,7 +833,6 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t
if (dce == NULL)
D_GOTO(out, rc = -DER_NOMEM);

d_tm_inc_gauge(tls->vtl_dtx_cmt_ent_cnt, 1);
DCE_CMT_TIME(dce) = cmt_time;
if (dae != NULL) {
DCE_XID(dce) = DAE_XID(dae);
Expand Down Expand Up @@ -1535,10 +1533,14 @@ int
vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh,
uint32_t entry, daos_epoch_t epoch, umem_off_t record)
{
struct dtx_handle *dth = vos_dth_get(false);
struct vos_container *cont;
struct vos_dtx_act_ent *dae;
struct vos_dtx_act_ent_df *dae_df;
umem_off_t *rec_df;
bool found;
int count;
int rc;
int i;

if (!vos_dtx_is_normal_entry(entry))
Expand Down Expand Up @@ -1567,10 +1569,54 @@ vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh,
* by another prepared (but non-committed) DTX, then do not allow current transaction
* to modify it. Because if current transaction is aborted or failed for some reason,
* there is no efficient way to recover such former non-committed DTX.
*
* If dth is NULL, then it is for GC. Under such case, deregister the record anyway.
*/
if (dae->dae_dbd != NULL)
return dtx_inprogress(dae, vos_dth_get(cont->vc_pool->vp_sysdb), false, false, 8);
if (dae->dae_dbd != NULL) {
if (dth != NULL)
return dtx_inprogress(dae, dth, false, false, 8);

dae_df = umem_off2ptr(umm, dae->dae_df_off);
D_ASSERT(!(dae_df->dae_flags & DTE_INVALID));

if (dae_df->dae_rec_cnt > DTX_INLINE_REC_CNT)
count = DTX_INLINE_REC_CNT;
else
count = dae_df->dae_rec_cnt;

rec_df = dae_df->dae_rec_inline;
for (i = 0; i < count; i++) {
if (record == umem_off2offset(rec_df[i])) {
rc = umem_tx_add_ptr(umm, &rec_df[i], sizeof(rec_df[i]));
if (rc != 0)
return rc;

rec_df[i] = UMOFF_NULL;
goto cache;
}
}

rec_df = umem_off2ptr(umm, dae_df->dae_rec_off);
if (rec_df == NULL)
/* If non-exist on disk, then must be non-exist in cache. */
return 0;

for (i = 0; i < dae_df->dae_rec_cnt - DTX_INLINE_REC_CNT; i++) {
if (record == umem_off2offset(rec_df[i])) {
rc = umem_tx_add_ptr(umm, &rec_df[i], sizeof(rec_df[i]));
if (rc != 0)
return rc;

rec_df[i] = UMOFF_NULL;
goto cache;
}
}

/* If non-exist on disk, then must be non-exist in cache. */
return 0;
}

cache:
if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT)
count = DTX_INLINE_REC_CNT;
else
Expand Down Expand Up @@ -2116,14 +2162,18 @@ vos_dtx_post_handle(struct vos_container *cont,

if (!abort && dces != NULL) {
struct vos_tls *tls = vos_tls_get(false);
int j = 0;

D_ASSERT(cont->vc_pool->vp_sysdb == false);
for (i = 0; i < count; i++) {
if (dces[i] != NULL) {
cont->vc_dtx_committed_count++;
cont->vc_pool->vp_dtx_committed_count++;
d_tm_inc_gauge(tls->vtl_committed, 1);
}
if (dces[i] != NULL)
j++;
}

if (j > 0) {
cont->vc_dtx_committed_count += j;
cont->vc_pool->vp_dtx_committed_count += j;
d_tm_inc_gauge(tls->vtl_committed, j);
}
}

Expand Down Expand Up @@ -2439,6 +2489,7 @@ vos_dtx_aggregate(daos_handle_t coh)
uint64_t epoch;
umem_off_t dbd_off;
umem_off_t next = UMOFF_NULL;
int count = 0;
int rc;
int i;

Expand Down Expand Up @@ -2481,13 +2532,10 @@ vos_dtx_aggregate(daos_handle_t coh)
UMOFF_P(dbd_off), DP_RC(rc));
goto out;
}

cont->vc_dtx_committed_count--;
cont->vc_pool->vp_dtx_committed_count--;
d_tm_dec_gauge(tls->vtl_committed, 1);
d_tm_dec_gauge(tls->vtl_dtx_cmt_ent_cnt, 1);
}

count = dbd->dbd_count;

if (epoch != cont_df->cd_newest_aggregated) {
rc = umem_tx_add_ptr(umm, &cont_df->cd_newest_aggregated,
sizeof(cont_df->cd_newest_aggregated));
Expand Down Expand Up @@ -2545,8 +2593,14 @@ vos_dtx_aggregate(daos_handle_t coh)

out:
rc = umem_tx_end(umm, rc);
if (rc == 0 && cont->vc_cmt_dtx_reindex_pos == dbd_off)
cont->vc_cmt_dtx_reindex_pos = next;
if (rc == 0) {
if (cont->vc_cmt_dtx_reindex_pos == dbd_off)
cont->vc_cmt_dtx_reindex_pos = next;

cont->vc_dtx_committed_count -= count;
cont->vc_pool->vp_dtx_committed_count -= count;
d_tm_dec_gauge(tls->vtl_committed, count);
}

DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc,
"Release DTX committed blob %p (" UMOFF_PF ") for cont " DF_UUID, dbd,
Expand Down
1 change: 0 additions & 1 deletion src/vos/vos_tls.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ struct vos_tls {
};
struct d_tm_node_t *vtl_committed;
struct d_tm_node_t *vtl_obj_cnt;
struct d_tm_node_t *vtl_dtx_cmt_ent_cnt;
struct d_tm_node_t *vtl_lru_alloc_size;
};

Expand Down

0 comments on commit 906f0a4

Please sign in to comment.