diff --git a/ci/provisioning/post_provision_config.sh b/ci/provisioning/post_provision_config.sh index 11b308f513e..13f78358a83 100755 --- a/ci/provisioning/post_provision_config.sh +++ b/ci/provisioning/post_provision_config.sh @@ -20,7 +20,7 @@ source ci/provisioning/post_provision_config_common_functions.sh source ci/junit.sh -: "${MLNX_VER_NUM:=latest-5.8}" +: "${MLNX_VER_NUM:=24.04-0.6.6.0}" : "${DISTRO:=EL_7}" DSL_REPO_var="DAOS_STACK_${DISTRO}_LOCAL_REPO" diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 97b4bd9b5da..dd4ea4ad33b 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -672,6 +672,8 @@ crt_get_info_string(bool primary, crt_provider_t provider, int iface_idx, start_port = crt_provider_ctx0_port_get(primary, provider); domain_str = crt_provider_domain_str_get(primary, provider, iface_idx); + D_ASSERTF(provider_str != NULL, "String for provider=%d not found\n", provider); + /* CXI provider uses domain names for info string */ if (provider == CRT_PROV_OFI_CXI) iface_str = NULL; @@ -690,8 +692,7 @@ crt_get_info_string(bool primary, crt_provider_t provider, int iface_idx, D_GOTO(out, rc); } - if (provider_str) - size += strlen(provider_str); + size = strlen(provider_str); if (domain_str) size += strlen(domain_str); if (iface_str) diff --git a/src/dtx/dtx_rpc.c b/src/dtx/dtx_rpc.c index cc18a1ac915..1c0e73c9640 100644 --- a/src/dtx/dtx_rpc.c +++ b/src/dtx/dtx_rpc.c @@ -1220,6 +1220,9 @@ dtx_refresh_internal(struct ds_cont_child *cont, int *check_count, d_list_t *che /* Handle the entries whose leaders are on current server. */ d_list_for_each_entry_safe(dsp, tmp, &self, dsp_link) { struct dtx_entry dte; + struct dtx_entry *pdte = &dte; + struct dtx_cos_key dck; + d_list_del(&dsp->dsp_link); @@ -1228,13 +1231,31 @@ dtx_refresh_internal(struct ds_cont_child *cont, int *check_count, d_list_t *che dte.dte_refs = 1; dte.dte_mbs = dsp->dsp_mbs; + if (for_io) { + rc = vos_dtx_check(cont->sc_hdl, &dsp->dsp_xid, NULL, NULL, NULL, false); + switch(rc) { + case DTX_ST_COMMITTABLE: + dck.oid = dsp->dsp_oid; + dck.dkey_hash = dsp->dsp_dkey_hash; + rc = dtx_commit(cont, &pdte, &dck, 1); + if (rc < 0 && rc != -DER_NONEXIST && for_io) + d_list_add_tail(&dsp->dsp_link, cmt_list); + else + dtx_dsp_free(dsp); + continue; + case DTX_ST_COMMITTED: + case -DER_NONEXIST: /* Aborted */ + dtx_dsp_free(dsp); + continue; + default: + break; + } + } + rc = dtx_status_handle_one(cont, &dte, dsp->dsp_oid, dsp->dsp_dkey_hash, dsp->dsp_epoch, NULL, NULL); switch (rc) { case DSHR_NEED_COMMIT: { - struct dtx_entry *pdte = &dte; - struct dtx_cos_key dck; - dck.oid = dsp->dsp_oid; dck.dkey_hash = dsp->dsp_dkey_hash; rc = dtx_commit(cont, &pdte, &dck, 1); diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index a2cb1f41adb..3c950615034 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -5655,7 +5655,8 @@ ds_obj_coll_punch_handler(crt_rpc_t *rpc) 1 /* start, [0] is for current engine */, ocpi->ocpi_disp_width, &exec_arg.coll_cur); - rc = dtx_leader_begin(ioc.ioc_vos_coh, &odm->odm_xid, &epoch, 1, version, + rc = dtx_leader_begin(ioc.ioc_vos_coh, &odm->odm_xid, &epoch, + dcts[0].dct_shards[dmi->dmi_tgt_id].dcs_nr, version, &ocpi->ocpi_oid, NULL /* dti_cos */, 0 /* dti_cos_cnt */, NULL /* tgts */, exec_arg.coll_cur.grp_nr /* tgt_cnt */, dtx_flags, odm->odm_mbs, dce, &dlh); diff --git a/src/tests/ftest/telemetry/wal_metrics.py b/src/tests/ftest/telemetry/wal_metrics.py index 96ac57e30a3..105015aaf29 100644 --- a/src/tests/ftest/telemetry/wal_metrics.py +++ b/src/tests/ftest/telemetry/wal_metrics.py @@ -100,8 +100,8 @@ def test_wal_replay_metrics(self): # Replay size should be > 0 after pool create for MD on SSD ranges[metric][label] = [1] elif metric.endswith('_replay_time'): - # Replay time should be 10 - 50,000 after pool create for MD on SSD - ranges[metric][label] = [10, 50000] + # Replay time should be 1 - 1,000,000 us after pool create for MD on SSD + ranges[metric][label] = [1, 1000000] elif metric.endswith('_replay_transactions'): # Replay transactions should be > 0 after pool create for MD on SSD ranges[metric][label] = [1] diff --git a/src/tests/ftest/util/file_count_test_base.py b/src/tests/ftest/util/file_count_test_base.py index ef6c83dd275..58b1a49f0a5 100644 --- a/src/tests/ftest/util/file_count_test_base.py +++ b/src/tests/ftest/util/file_count_test_base.py @@ -117,7 +117,7 @@ def run_file_count(self): try: self.processes = ior_np self.ppn = ior_ppn - if self.ior_cmd.api.value == 'HDF5-VOL': + if api == 'HDF5-VOL': self.ior_cmd.api.update('HDF5') self.run_ior_with_pool( create_pool=False, plugin_path=hdf5_plugin_path, mount_dir=mount_dir) diff --git a/src/tests/ftest/util/launch_utils.py b/src/tests/ftest/util/launch_utils.py index 2ed42ce7a9b..a86df01ea55 100644 --- a/src/tests/ftest/util/launch_utils.py +++ b/src/tests/ftest/util/launch_utils.py @@ -1015,8 +1015,8 @@ def update_test_yaml(self, logger, scm_size, scm_mount, extra_yaml, multiplier, if new_yaml_file: if verbose > 0: # Optionally display a diff of the yaml file - if not run_local(logger, f"diff -y {test.yaml_file} {new_yaml_file}").passed: - raise RunException(f"Error diff'ing {test.yaml_file}") + # diff returns rc=1 if the files are different, so ignore errors + run_local(logger, f"diff -y {test.yaml_file} {new_yaml_file}") test.yaml_file = new_yaml_file # Display the modified yaml file variants with debug diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index 28e2ac86757..fb8461e2931 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -269,13 +269,15 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, struct umem_rsrvd_act **rsrvd_scmp, d_list_t *nvme_exts, bool started, struct bio_desc *biod, int err) { - struct vos_pool *pool; - struct dtx_handle *dth = dth_in; - struct vos_dtx_act_ent *dae; - struct dtx_rsrvd_uint *dru; - struct vos_dtx_cmt_ent *dce = NULL; - struct dtx_handle tmp = {0}; - int rc; + struct vos_pool *pool; + struct umem_instance *umm; + struct dtx_handle *dth = dth_in; + struct vos_dtx_act_ent *dae; + struct vos_dtx_act_ent_df *dae_df; + struct dtx_rsrvd_uint *dru; + struct vos_dtx_cmt_ent *dce = NULL; + struct dtx_handle tmp = {0}; + int rc = 0; if (!dtx_is_valid_handle(dth)) { /** Created a dummy dth handle for publishing extents */ @@ -287,11 +289,11 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, D_INIT_LIST_HEAD(&tmp.dth_deferred_nvme); } - if (dth->dth_local) { + if (dth->dth_local) pool = vos_hdl2pool(dth_in->dth_poh); - } else { + else pool = cont->vc_pool; - } + umm = vos_pool2umm(pool); if (rsrvd_scmp != NULL) { D_ASSERT(nvme_exts != NULL); @@ -300,7 +302,7 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, * Just do your best to release the SCM reservation. Can't handle another * error while handling one already anyway. */ - (void)vos_publish_scm(vos_pool2umm(pool), *rsrvd_scmp, false /* publish */); + (void)vos_publish_scm(umm, *rsrvd_scmp, false /* publish */); D_FREE(*rsrvd_scmp); *rsrvd_scmp = NULL; err = -DER_NOMEM; @@ -341,9 +343,9 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, vos_dth_set(NULL, pool->vp_sysdb); if (bio_nvme_configured(SMD_DEV_TYPE_META) && biod != NULL) - err = umem_tx_end_ex(vos_pool2umm(pool), err, biod); + err = umem_tx_end_ex(umm, err, biod); else - err = umem_tx_end(vos_pool2umm(pool), err); + err = umem_tx_end(umm, err); cancel: if (dtx_is_valid_handle(dth_in)) { @@ -409,8 +411,11 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, vos_dtx_post_handle(cont, &dae, &dce, 1, false, err != 0); } else { D_ASSERT(dce == NULL); - if (err == 0) + if (err == 0) { dae->dae_prepared = 1; + dae_df = umem_off2ptr(umm, dae->dae_df_off); + D_ASSERT(!(dae_df->dae_flags & DTE_INVALID)); + } } } } diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 27a41ce5776..0e70133629f 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -639,17 +639,25 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, return 0; /* In spite of for commit or abort, the DTX must be local preparing/prepared. */ - D_ASSERTF(vos_dae_is_prepare(dae), "Unexpected DTX "DF_DTI" status for %s\n", - DP_DTI(&DAE_XID(dae)), abort ? "abort" : "commit"); + D_ASSERTF(vos_dae_is_prepare(dae), + "Unexpected DTX "DF_DTI" status for %s in pool "DF_UUID" cont "DF_UUID"\n", + DP_DTI(&DAE_XID(dae)), abort ? "abort" : "commit", + DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id)); dbd = dae->dae_dbd; dae_df = umem_off2ptr(umm, dae->dae_df_off); - D_ASSERTF(dae_df != NULL, "Hit invalid DTX entry "DF_DTI" when release for %s\n", - DP_DTI(&DAE_XID(dae)), abort ? "abort" : "commit"); + D_ASSERTF(dae_df != NULL, "Hit invalid DTX entry "DF_DTI" when release for %s in pool " + DF_UUID" cont "DF_UUID"\n", DP_DTI(&DAE_XID(dae)), abort ? "abort" : "commit", + DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id)); D_ASSERTF(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC, - "Invalid blob %p magic %x for "DF_DTI" (lid %x)\n", - dbd, dbd->dbd_magic, DP_DTI(&DAE_XID(dae)), DAE_LID(dae)); + "Bad blob %p magic %x for "DF_DTI" (lid %x) in pool "DF_UUID" cont "DF_UUID"\n", + dbd, dbd->dbd_magic, DP_DTI(&DAE_XID(dae)), DAE_LID(dae), + DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id)); + D_ASSERTF(dbd->dbd_index > 0, + "%s DTX "DF_DTI" against new DTX blob %p in pool "DF_UUID" cont "DF_UUID"\n", + abort ? "abort" : "commit", DP_DTI(&DAE_XID(dae)), dbd, + DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id)); if (!UMOFF_IS_NULL(dae_df->dae_mbs_off)) { /* dae_mbs_off will be invalid via flag DTE_INVALID. */ @@ -688,19 +696,16 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, } if (dbd->dbd_count > 1 || dbd->dbd_index < dbd->dbd_cap) { - rc = umem_tx_add_ptr(umm, &dae_df->dae_flags, - sizeof(dae_df->dae_flags)); + rc = umem_tx_add_ptr(umm, &dae_df->dae_flags, sizeof(dae_df->dae_flags)); if (rc != 0) return rc; - /* Mark the DTX entry as invalid in SCM. */ - dae_df->dae_flags = DTE_INVALID; - - rc = umem_tx_add_ptr(umm, &dbd->dbd_count, - sizeof(dbd->dbd_count)); + rc = umem_tx_add_ptr(umm, &dbd->dbd_count, sizeof(dbd->dbd_count)); if (rc != 0) return rc; + /* Mark the DTX entry as invalid persistently. */ + dae_df->dae_flags = DTE_INVALID; dbd->dbd_count--; } else { struct vos_cont_df *cont_df = cont->vc_cont_df; @@ -922,6 +927,8 @@ vos_dtx_extend_act_table(struct vos_container *cont) dbd->dbd_magic = DTX_ACT_BLOB_MAGIC; dbd->dbd_cap = (DTX_BLOB_SIZE - sizeof(struct vos_dtx_blob_df)) / sizeof(struct vos_dtx_act_ent_df); + dbd->dbd_count = 0; + dbd->dbd_index = 0; tmp = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); if (tmp == NULL) { @@ -932,14 +939,14 @@ vos_dtx_extend_act_table(struct vos_container *cont) sizeof(cont_df->cd_dtx_active_head) + sizeof(cont_df->cd_dtx_active_tail)); if (rc != 0) - return rc; + goto out; cont_df->cd_dtx_active_head = dbd_off; } else { rc = umem_tx_add_ptr(umm, &tmp->dbd_next, sizeof(tmp->dbd_next)); if (rc != 0) - return rc; + goto out; tmp->dbd_next = dbd_off; @@ -947,19 +954,20 @@ vos_dtx_extend_act_table(struct vos_container *cont) rc = umem_tx_add_ptr(umm, &cont_df->cd_dtx_active_tail, sizeof(cont_df->cd_dtx_active_tail)); if (rc != 0) - return rc; + goto out; } cont_df->cd_dtx_active_tail = dbd_off; - D_DEBUG(DB_IO, "Allocated DTX active blob %p ("UMOFF_PF") for cont "DF_UUID"\n", - dbd, UMOFF_P(dbd_off), DP_UUID(cont->vc_id)); - - return 0; +out: + DL_CDEBUG(rc == 0, DB_IO, DLOG_ERR, rc, + "Allocated DTX active blob %p ("UMOFF_PF") for cont "DF_UUID, + dbd, UMOFF_P(dbd_off), DP_UUID(cont->vc_id)); + return rc; } static int -vos_dtx_alloc(struct umem_instance *umm, struct vos_dtx_blob_df *dbd, struct dtx_handle *dth) +vos_dtx_alloc(struct umem_instance *umm, struct dtx_handle *dth) { struct vos_dtx_act_ent *dae = NULL; struct vos_container *cont; @@ -1005,21 +1013,12 @@ vos_dtx_alloc(struct umem_instance *umm, struct vos_dtx_blob_df *dbd, struct dtx DAE_MBS_FLAGS(dae) = 0; } - if (dbd != NULL) { - D_ASSERT(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC); - - dae->dae_df_off = umem_ptr2off(umm, dbd) + - offsetof(struct vos_dtx_blob_df, dbd_active_data) + - sizeof(struct vos_dtx_act_ent_df) * dbd->dbd_index; - } - /* Will be set as dbd::dbd_index via vos_dtx_prepared(). */ DAE_INDEX(dae) = DTX_INDEX_INVAL; - dae->dae_dbd = dbd; dae->dae_dth = dth; - D_DEBUG(DB_IO, "Allocated new lid DTX: "DF_DTI" lid=%lx, dae=%p, dae_dbd=%p\n", - DP_DTI(&dth->dth_xid), DAE_LID(dae) & DTX_LID_SOLO_MASK, dae, dbd); + D_DEBUG(DB_IO, "Allocated new lid DTX: "DF_DTI" lid=%lx, dae=%p\n", + DP_DTI(&dth->dth_xid), DAE_LID(dae) & DTX_LID_SOLO_MASK, dae); d_iov_set(&kiov, &DAE_XID(dae), sizeof(DAE_XID(dae))); d_iov_set(&riov, dae, sizeof(*dae)); @@ -1445,46 +1444,6 @@ vos_dtx_validation(struct dtx_handle *dth) return rc; } -static int -vos_dtx_active(struct dtx_handle *dth) -{ - struct vos_dtx_act_ent *dae = dth->dth_ent; - struct vos_container *cont; - struct vos_cont_df *cont_df; - struct umem_instance *umm; - struct vos_dtx_blob_df *dbd; - int rc = 0; - - if (dae->dae_dbd != NULL) - goto out; - - cont = vos_hdl2cont(dth->dth_coh); - cont_df = cont->vc_cont_df; - umm = vos_cont2umm(cont); - dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); - - if (dbd == NULL || dbd->dbd_index >= dbd->dbd_cap) { - rc = vos_dtx_extend_act_table(cont); - if (rc != 0) - goto out; - - dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); - } - - D_ASSERT(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC); - - dae->dae_df_off = umem_ptr2off(umm, dbd) + - offsetof(struct vos_dtx_blob_df, dbd_active_data) + - sizeof(struct vos_dtx_act_ent_df) * dbd->dbd_index; - dae->dae_dbd = dbd; - -out: - if (rc == 0) - dth->dth_active = 1; - - return rc; -} - /* The caller has started local transaction. */ int vos_dtx_register_record(struct umem_instance *umm, umem_off_t record, @@ -1552,15 +1511,10 @@ vos_dtx_register_record(struct umem_instance *umm, umem_off_t record, return 0; } - if (!dth->dth_active) { - rc = vos_dtx_active(dth); - if (rc != 0) - goto out; - } - rc = vos_dtx_append(dth, record, type); if (rc == 0) { /* Incarnation log entry implies a share */ + dth->dth_active = 1; *tx_id = DAE_LID(dae); if (type == DTX_RT_ILOG) dth->dth_modify_shared = 1; @@ -1577,20 +1531,18 @@ vos_dtx_register_record(struct umem_instance *umm, umem_off_t record, } /* The caller has started local transaction. */ -void +int vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, uint32_t entry, daos_epoch_t epoch, umem_off_t record) { struct vos_container *cont; struct vos_dtx_act_ent *dae; - struct vos_dtx_act_ent_df *dae_df; - umem_off_t *rec_df; bool found; int count; int i; if (!vos_dtx_is_normal_entry(entry)) - return; + return 0; D_ASSERT(entry >= DTX_LID_RESERVED); @@ -1600,20 +1552,24 @@ vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, * The on-disk entry will be destroyed soon. */ if (cont == NULL) - return; + return 0; found = lrua_lookupx(cont->vc_dtx_array, entry - DTX_LID_RESERVED, epoch, &dae); if (!found) { D_WARN("Could not find active DTX record for lid=%d, epoch=" DF_U64"\n", entry, epoch); - return; + return 0; } - dae_df = umem_off2ptr(umm, dae->dae_df_off); - if (daos_is_zero_dti(&dae_df->dae_xid) || - dae_df->dae_flags & DTE_INVALID) - return; + /* + * NOTE: If the record to be deregistered (for free or overwrite, and so on) is referenced + * by another prepared (but non-committed) DTX, then do not allow current transaction + * to modify it. Because if current transaction is aborted or failed for some reason, + * there is no efficient way to recover such former non-committed DTX. + */ + if (dae->dae_dbd != NULL) + return dtx_inprogress(dae, vos_dth_get(cont->vc_pool->vp_sysdb), false, false, 8); if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT) count = DTX_INLINE_REC_CNT; @@ -1623,46 +1579,18 @@ vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, for (i = 0; i < count; i++) { if (record == umem_off2offset(DAE_REC_INLINE(dae)[i])) { DAE_REC_INLINE(dae)[i] = UMOFF_NULL; - goto handle_df; + return 0; } } for (i = 0; i < DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT; i++) { if (record == umem_off2offset(dae->dae_records[i])) { dae->dae_records[i] = UMOFF_NULL; - goto handle_df; - } - } - - /* Not found */ - return; - -handle_df: - if (dae_df->dae_rec_cnt > DTX_INLINE_REC_CNT) - count = DTX_INLINE_REC_CNT; - else - count = dae_df->dae_rec_cnt; - - rec_df = dae_df->dae_rec_inline; - for (i = 0; i < count; i++) { - if (umem_off2offset(rec_df[i]) == record) { - rec_df[i] = UMOFF_NULL; - return; + return 0; } } - rec_df = umem_off2ptr(umm, dae_df->dae_rec_off); - - /* Not found */ - if (rec_df == NULL) - return; - - for (i = 0; i < dae_df->dae_rec_cnt - DTX_INLINE_REC_CNT; i++) { - if (umem_off2offset(rec_df[i]) == record) { - rec_df[i] = UMOFF_NULL; - return; - } - } + return 0; } int @@ -1670,6 +1598,8 @@ vos_dtx_prepared(struct dtx_handle *dth, struct vos_dtx_cmt_ent **dce_p) { struct vos_dtx_act_ent *dae = dth->dth_ent; struct vos_container *cont = vos_hdl2cont(dth->dth_coh); + struct vos_dtx_act_ent_df *dae_df; + struct vos_cont_df *cont_df; struct umem_instance *umm; struct vos_dtx_blob_df *dbd; umem_off_t rec_off; @@ -1705,9 +1635,26 @@ vos_dtx_prepared(struct dtx_handle *dth, struct vos_dtx_cmt_ent **dce_p) return rc; } + D_ASSERT(dae->dae_dbd == NULL); + + cont_df = cont->vc_cont_df; umm = vos_cont2umm(cont); - dbd = dae->dae_dbd; - D_ASSERT(dbd != NULL); + dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); + if (dbd == NULL || dbd->dbd_index >= dbd->dbd_cap) { + rc = vos_dtx_extend_act_table(cont); + if (rc != 0) + return rc; + + dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); + } + + D_ASSERT(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC); + + dae->dae_dbd = dbd; + dae->dae_df_off = umem_ptr2off(umm, dbd) + + offsetof(struct vos_dtx_blob_df, dbd_active_data) + + sizeof(struct vos_dtx_act_ent_df) * dbd->dbd_index; + dae_df = umem_off2ptr(umm, dae->dae_df_off); /* Use the dkey_hash for the last modification as the dkey_hash * for the whole transaction. It will used as the index for DTX @@ -1784,27 +1731,30 @@ vos_dtx_prepared(struct dtx_handle *dth, struct vos_dtx_cmt_ent **dce_p) DAE_INDEX(dae) = dbd->dbd_index; if (DAE_INDEX(dae) > 0) { - rc = umem_tx_xadd_ptr(umm, umem_off2ptr(umm, dae->dae_df_off), - sizeof(struct vos_dtx_act_ent_df), UMEM_XADD_NO_SNAPSHOT); + rc = umem_tx_xadd_ptr(umm, dae_df, sizeof(*dae_df), UMEM_XADD_NO_SNAPSHOT); if (rc != 0) - return rc; + goto out; /* dbd_index is next to dbd_count */ rc = umem_tx_add_ptr(umm, &dbd->dbd_count, sizeof(dbd->dbd_count) + sizeof(dbd->dbd_index)); if (rc != 0) - return rc; + goto out; } - memcpy(umem_off2ptr(umm, dae->dae_df_off), - &dae->dae_base, sizeof(struct vos_dtx_act_ent_df)); + memcpy(dae_df, &dae->dae_base, sizeof(*dae_df)); dbd->dbd_count++; dbd->dbd_index++; dae->dae_preparing = 1; dae->dae_need_release = 1; - return 0; +out: + DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, + "Preparing DTX "DF_DTI" in dbd "UMOFF_PF" at index %u, count %u, cap %u", + DP_DTI(&DAE_XID(dae)), UMOFF_P(cont_df->cd_dtx_active_tail), + dbd->dbd_index, dbd->dbd_count, dbd->dbd_cap); + return rc; } static struct dtx_memberships * @@ -2827,12 +2777,16 @@ vos_dtx_act_reindex(struct vos_container *cont) dbd_count++; } - D_ASSERTF(dbd_count == dbd->dbd_count, - "Unmatched active DTX count %d/%d, cap %d, idx %d for blob %p (" - UMOFF_PF"), head "UMOFF_PF", tail "UMOFF_PF"\n", - dbd_count, dbd->dbd_count, dbd->dbd_cap, dbd->dbd_index, dbd, - UMOFF_P(dbd_off), UMOFF_P(cont_df->cd_dtx_active_head), - UMOFF_P(cont_df->cd_dtx_active_tail)); + if (unlikely(dbd_count != dbd->dbd_count)) { + D_ERROR("Unmatched active DTX count %d/%d, cap %d, idx %d for blob %p (" + UMOFF_PF"), head "UMOFF_PF", tail "UMOFF_PF" in pool " + DF_UUID" cont "DF_UUID"\n", dbd_count, dbd->dbd_count, dbd->dbd_cap, + dbd->dbd_index, dbd, UMOFF_P(dbd_off), + UMOFF_P(cont_df->cd_dtx_active_head), + UMOFF_P(cont_df->cd_dtx_active_tail), DP_UUID(cont->vc_pool->vp_id), + DP_UUID(cont->vc_id)); + D_GOTO(out, rc = -DER_IO); + } dbd_off = dbd->dbd_next; } @@ -3012,13 +2966,12 @@ vos_dtx_attach(struct dtx_handle *dth, bool persistent, bool exist) { struct vos_container *cont; struct umem_instance *umm = NULL; - struct vos_dtx_blob_df *dbd = NULL; struct vos_dtx_cmt_ent *dce = NULL; - struct vos_cont_df *cont_df = NULL; struct vos_dtx_act_ent *dae; d_iov_t kiov; d_iov_t riov; int rc = 0; + bool tx = false; if (!dtx_is_valid_handle(dth)) return 0; @@ -3055,31 +3008,11 @@ vos_dtx_attach(struct dtx_handle *dth, bool persistent, bool exist) if (rc != 0) goto out; - cont_df = cont->vc_cont_df; - dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); - if (dbd == NULL || dbd->dbd_index >= dbd->dbd_cap) { - rc = vos_dtx_extend_act_table(cont); - if (rc != 0) - goto out; - - dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); - } + tx = true; } - if (dth->dth_ent == NULL) { - rc = vos_dtx_alloc(umm, dbd, dth); - } else if (persistent) { - D_ASSERT(dbd != NULL); - D_ASSERT(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC); - - dae = dth->dth_ent; - D_ASSERT(dae->dae_dbd == NULL); - - dae->dae_df_off = umem_ptr2off(umm, dbd) + - offsetof(struct vos_dtx_blob_df, dbd_active_data) + - sizeof(struct vos_dtx_act_ent_df) * dbd->dbd_index; - dae->dae_dbd = dbd; - } + if (dth->dth_ent == NULL) + rc = vos_dtx_alloc(umm, dth); out: if (rc == 0) { @@ -3094,7 +3027,7 @@ vos_dtx_attach(struct dtx_handle *dth, bool persistent, bool exist) } if (persistent) { - if (cont_df != NULL) { + if (tx) { if (rc == 0) { rc = umem_tx_commit(umm); D_ASSERTF(rc == 0, "local TX commit failure %d\n", rc); diff --git a/src/vos/vos_ilog.c b/src/vos/vos_ilog.c index ae67f9a00ac..54abf2f407f 100644 --- a/src/vos/vos_ilog.c +++ b/src/vos/vos_ilog.c @@ -82,8 +82,7 @@ vos_ilog_del(struct umem_instance *umm, umem_off_t ilog_off, uint32_t tx_id, return 0; coh.cookie = (unsigned long)args; - vos_dtx_deregister_record(umm, coh, tx_id, epoch, ilog_off); - return 0; + return vos_dtx_deregister_record(umm, coh, tx_id, epoch, ilog_off); } void diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index a6c7fa9ac00..7d4dd3ac166 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -747,8 +747,10 @@ vos_dtx_get(bool standalone); * \param epoch [IN] Epoch for the DTX. * \param record [IN] Address (offset) of the record to be * deregistered. + * + * \return 0 on success and negative on failure. */ -void +int vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, uint32_t entry, daos_epoch_t epoch, umem_off_t record); diff --git a/src/vos/vos_tree.c b/src/vos/vos_tree.c index e48467f9364..c36fcaa88c5 100644 --- a/src/vos/vos_tree.c +++ b/src/vos/vos_tree.c @@ -601,6 +601,7 @@ svt_rec_free_internal(struct btr_instance *tins, struct btr_record *rec, struct dtx_handle *dth = NULL; struct umem_rsrvd_act *rsrvd_scm; struct vos_container *cont = vos_hdl2cont(tins->ti_coh); + int rc; if (UMOFF_IS_NULL(rec->rec_off)) return 0; @@ -611,12 +612,12 @@ svt_rec_free_internal(struct btr_instance *tins, struct btr_record *rec, return -DER_NO_PERM; /* Not allowed */ } - vos_dtx_deregister_record(&tins->ti_umm, tins->ti_coh, - irec->ir_dtx, *epc, rec->rec_off); + rc = vos_dtx_deregister_record(&tins->ti_umm, tins->ti_coh, + irec->ir_dtx, *epc, rec->rec_off); + if (rc != 0) + return rc; if (!overwrite) { - int rc; - /* SCM value is stored together with vos_irec_df */ if (addr->ba_type == DAOS_MEDIA_NVME) { struct vos_pool *pool = tins->ti_priv; @@ -796,9 +797,8 @@ evt_dop_log_del(struct umem_instance *umm, daos_epoch_t epoch, daos_handle_t coh; coh.cookie = (unsigned long)args; - vos_dtx_deregister_record(umm, coh, desc->dc_dtx, epoch, - umem_ptr2off(umm, desc)); - return 0; + return vos_dtx_deregister_record(umm, coh, desc->dc_dtx, epoch, + umem_ptr2off(umm, desc)); } void