From 08b5847166bb0db4c149aa3bb55a75de28901e93 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Mon, 26 Aug 2024 00:19:26 -0400 Subject: [PATCH] DAOS-14317 vos: initial changes for the phase2 object pre-load - Introduced new durable format 'vos_obj_p2_df' for the md-on-ssd phase2 object, at most 4 evict-able bucket IDs could be stored. - Changed vos_obj_hold() & vos_obj_release() to pin or unpin object respectively. - Changed the private data of VOS dkey/akey/value trees from 'vos_pool' to 'vos_object', the private data will be used for allocating/reserving from the evict-able bucket. TODO: - Move the vos_obj_hold() from vos_update_end() to vos_update_begin(), change VOS I/O code to do reserve from evict-able bucket. - Reorg GC code to pre-load objects before starting transaction. - Reorg DTX commit code to pre-load objects before starting transaction. - Reorg multiple-objects tx code to pre-load objects. - Improve engine scheduler to take both non-evict-able & evict-able zones space pressure into account. Required-githooks: true Signed-off-by: Niu Yawei --- src/common/btree.c | 9 ++++-- src/include/daos/btree.h | 2 ++ src/include/daos/mem.h | 2 -- src/include/daos_srv/evtree.h | 6 +++- src/vos/evtree.c | 15 +++++++-- src/vos/vos_gc.c | 4 +-- src/vos/vos_internal.h | 43 +++++++++++++++++++++++++- src/vos/vos_layout.h | 13 ++++++++ src/vos/vos_obj.c | 11 ++++--- src/vos/vos_obj.h | 8 ++++- src/vos/vos_obj_cache.c | 58 ++++++++++++++++++++++++++++++++--- src/vos/vos_obj_index.c | 31 +++++++++++++------ src/vos/vos_query.c | 6 ++-- src/vos/vos_tree.c | 44 ++++++++++++++++---------- 14 files changed, 203 insertions(+), 49 deletions(-) diff --git a/src/common/btree.c b/src/common/btree.c index 6bf1bdb2b15e..0657bf384dc9 100644 --- a/src/common/btree.c +++ b/src/common/btree.c @@ -945,8 +945,12 @@ btr_root_alloc(struct btr_context *tcx) struct btr_instance *tins = &tcx->tc_tins; struct btr_root *root; - tins->ti_root_off = umem_zalloc(btr_umm(tcx), - sizeof(struct btr_root)); + if (btr_ops(tcx)->to_node_alloc != NULL) + tins->ti_root_off = btr_ops(tcx)->to_node_alloc(&tcx->tc_tins, + sizeof(struct btr_root)); + else + tins->ti_root_off = umem_zalloc(btr_umm(tcx), sizeof(struct btr_root)); + if (UMOFF_IS_NULL(tins->ti_root_off)) return btr_umm(tcx)->umm_nospc_rc; @@ -3884,6 +3888,7 @@ btr_tree_destroy(struct btr_context *tcx, void *args, bool *destroyed) tcx->tc_tins.ti_root_off, tcx->tc_order); root = tcx->tc_tins.ti_root; + tcx->tc_tins.ti_destroy = 1; if (root && !UMOFF_IS_NULL(root->tr_node)) { /* destroy the root and all descendants */ rc = btr_node_destroy(tcx, root->tr_node, args, &empty); diff --git a/src/include/daos/btree.h b/src/include/daos/btree.h index 24c7b95cbe4d..51f7999e47af 100644 --- a/src/include/daos/btree.h +++ b/src/include/daos/btree.h @@ -429,6 +429,8 @@ struct btr_instance { struct btr_root *ti_root; /** Customized operations for the tree */ btr_ops_t *ti_ops; + /** The context is used for tree destroy */ + unsigned int ti_destroy : 1; }; /** diff --git a/src/include/daos/mem.h b/src/include/daos/mem.h index 7a7ca3dcf7c6..b94f75b22c41 100644 --- a/src/include/daos/mem.h +++ b/src/include/daos/mem.h @@ -451,8 +451,6 @@ typedef void umem_cache_wait_cb_t(void *arg, uint64_t chkpt_tx, uint64_t *committed_tx); /** - * Write all dirty pages before @wal_tx to MD blob. (XXX: not yet implemented) - * * This function can yield internally, it is called by checkpoint service of upper level stack. * * \param[in] store The umem store diff --git a/src/include/daos_srv/evtree.h b/src/include/daos_srv/evtree.h index 63224259cccb..292c8848c87e 100644 --- a/src/include/daos_srv/evtree.h +++ b/src/include/daos_srv/evtree.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2023 Intel Corporation. + * (C) Copyright 2017-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -70,6 +70,10 @@ struct evt_desc_cbs { struct evt_desc *desc, daos_size_t nob, void *args); void *dc_bio_free_args; + /** + * Argument for allocation. + */ + void *dc_alloc_arg; /** * Availability check, it is for data tracked by DTX undo log. * It is optional, EVTree always treats data extent is available if diff --git a/src/vos/evtree.c b/src/vos/evtree.c index d635453f8b2e..b0bb7a7be6f4 100644 --- a/src/vos/evtree.c +++ b/src/vos/evtree.c @@ -1435,6 +1435,17 @@ evt_node_size(struct evt_context *tcx, bool leaf) return evt_order2size(tcx->tc_order, leaf); } +static inline umem_off_t +evt_zalloc(struct evt_context *tcx, size_t size) +{ + struct vos_object *obj = tcx->tc_desc_cbs.dc_alloc_arg; + + if (obj != NULL) + return vos_obj_zalloc(obj, size); + + return umem_zalloc(evt_umm(tcx), size); +} + /** Allocate a evtree node */ static int evt_node_alloc(struct evt_context *tcx, unsigned int flags, @@ -1444,7 +1455,7 @@ evt_node_alloc(struct evt_context *tcx, unsigned int flags, umem_off_t nd_off; bool leaf = (flags & EVT_NODE_LEAF); - nd_off = umem_zalloc(evt_umm(tcx), evt_node_size(tcx, leaf)); + nd_off = evt_zalloc(tcx, evt_node_size(tcx, leaf)); if (UMOFF_IS_NULL(nd_off)) return -DER_NOSPACE; @@ -3263,7 +3274,7 @@ evt_common_insert(struct evt_context *tcx, struct evt_node *nd, D_DEBUG(DB_TRACE, "Allocating an extra %d bytes " "for checksum", csum_buf_size); } - desc_off = umem_zalloc(evt_umm(tcx), desc_size); + desc_off = evt_zalloc(tcx, desc_size); if (UMOFF_IS_NULL(desc_off)) return -DER_NOSPACE; diff --git a/src/vos/vos_gc.c b/src/vos/vos_gc.c index 0937b883f336..b90cd5d1199b 100644 --- a/src/vos/vos_gc.c +++ b/src/vos/vos_gc.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -115,7 +115,7 @@ gc_drain_evt(struct vos_gc *gc, struct vos_pool *pool, daos_handle_t coh, daos_handle_t toh; int rc; - vos_evt_desc_cbs_init(&cbs, pool, coh); + vos_evt_desc_cbs_init(&cbs, pool, coh, NULL); rc = evt_open(root, &pool->vp_uma, &cbs, &toh); if (rc == -DER_NONEXIST) { *empty = true; diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index f6a74fce7e6a..8d1de73ed2ed 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -1258,7 +1258,7 @@ vos_bio_addr_free(struct vos_pool *pool, bio_addr_t *addr, daos_size_t nob); void vos_evt_desc_cbs_init(struct evt_desc_cbs *cbs, struct vos_pool *pool, - daos_handle_t coh); + daos_handle_t coh, struct vos_object *obj); int vos_tx_begin(struct dtx_handle *dth, struct umem_instance *umm, bool is_sysdb); @@ -1329,6 +1329,12 @@ vos_pool2umm(struct vos_pool *pool) return &pool->vp_umm; } +static inline struct umem_store * +vos_pool2store(struct vos_pool *pool) +{ + return &pool->vp_umm.umm_pool->up_store; +} + static inline struct umem_instance * vos_cont2umm(struct vos_container *cont) { @@ -1844,4 +1850,39 @@ vos_io_scm(struct vos_pool *pool, daos_iod_type_t type, daos_size_t size, enum v int vos_insert_oid(struct dtx_handle *dth, struct vos_container *cont, daos_unit_oid_t *oid); +static inline bool +vos_pool_is_p2(struct vos_pool *pool) +{ + struct umem_store *store = vos_pool2store(pool); + + return store->store_type == DAOS_MD_BMEM_V2; +} + +static inline umem_off_t +vos_obj_zalloc(struct vos_object *obj, size_t size) +{ + struct vos_pool *pool = vos_obj2pool(obj); + + if (vos_pool_is_p2(pool)) { + D_ASSERT(obj->obj_bkt_allot == 1); + return umem_zalloc_from_bucket(vos_pool2umm(pool), size, obj->obj_bkt_ids[0]); + } + + return umem_zalloc(vos_pool2umm(pool), size); +} + +static inline umem_off_t +vos_obj_reserve(struct vos_object *obj, struct umem_rsrvd_act *rsrvd_scm, daos_size_t size) +{ + struct vos_pool *pool = vos_obj2pool(obj); + + if (vos_pool_is_p2(pool)) { + D_ASSERT(obj->obj_bkt_allot == 1); + return umem_reserve_from_bucket(vos_pool2umm(pool), rsrvd_scm, size, + obj->obj_bkt_ids[0]); + } + + return umem_reserve(vos_pool2umm(pool), rsrvd_scm, size); +} + #endif /* __VOS_INTERNAL_H__ */ diff --git a/src/vos/vos_layout.h b/src/vos/vos_layout.h index 72459544c27b..0400a351175c 100644 --- a/src/vos/vos_layout.h +++ b/src/vos/vos_layout.h @@ -377,4 +377,17 @@ struct vos_obj_df { struct btr_root vo_tree; }; +#define VOS_OBJ_BKTS_MAX 4 + +/* + * VOS object durable format for md-on-ssd phase2. The size is fit to the 128 bytes + * slab (see slab_map[] defined in mem.c). + */ +struct vos_obj_p2_df { + struct vos_obj_df p2_obj_df; + uint32_t p2_bkt_ids[VOS_OBJ_BKTS_MAX]; + uint64_t p2_reserved; +}; +D_CASSERT(sizeof(struct vos_obj_p2_df) == D_ALIGNUP(sizeof(struct vos_obj_df), 32)); + #endif diff --git a/src/vos/vos_obj.c b/src/vos/vos_obj.c index ee56a01b629e..817430f94b08 100644 --- a/src/vos/vos_obj.c +++ b/src/vos/vos_obj.c @@ -1065,7 +1065,8 @@ key_iter_fetch_root(struct vos_obj_iter *oiter, vos_iter_type_t type, * subtree */ if (krec->kr_bmap & KREC_BF_EVT) { - vos_evt_desc_cbs_init(&cbs, vos_obj2pool(obj), vos_cont2hdl(obj->obj_cont)); + vos_evt_desc_cbs_init(&cbs, vos_obj2pool(obj), vos_cont2hdl(obj->obj_cont), + obj); rc = evt_open(&krec->kr_evt, info->ii_uma, &cbs, &info->ii_tree_hdl); if (rc) { D_DEBUG(DB_TRACE, @@ -1077,7 +1078,7 @@ key_iter_fetch_root(struct vos_obj_iter *oiter, vos_iter_type_t type, info->ii_fake_akey_flag = VOS_IT_DKEY_EV; } else { rc = dbtree_open_inplace_ex(&krec->kr_btr, info->ii_uma, - vos_cont2hdl(obj->obj_cont), vos_obj2pool(obj), + vos_cont2hdl(obj->obj_cont), obj, &info->ii_tree_hdl); if (rc) { D_DEBUG(DB_TRACE, @@ -2030,7 +2031,7 @@ vos_obj_akey_iter_nested_prep(vos_iter_type_t type, struct vos_iter_info *info, } rc = dbtree_open_inplace_ex(info->ii_btr, info->ii_uma, vos_cont2hdl(obj->obj_cont), - vos_obj2pool(obj), &toh); + obj, &toh); if (rc) { D_DEBUG(DB_TRACE, "Failed to open tree for iterator:" @@ -2087,7 +2088,7 @@ vos_obj_iter_sv_nested_prep(vos_iter_type_t type, struct vos_iter_info *info, } rc = dbtree_open_inplace_ex(info->ii_btr, info->ii_uma, vos_cont2hdl(obj->obj_cont), - vos_obj2pool(obj), &toh); + obj, &toh); if (rc) { D_DEBUG(DB_TRACE, "Failed to open tree for iterator:" @@ -2147,7 +2148,7 @@ vos_obj_ev_iter_nested_prep(vos_iter_type_t type, struct vos_iter_info *info, goto prepare; } - vos_evt_desc_cbs_init(&cbs, vos_obj2pool(obj), vos_cont2hdl(obj->obj_cont)); + vos_evt_desc_cbs_init(&cbs, vos_obj2pool(obj), vos_cont2hdl(obj->obj_cont), obj); rc = evt_open(info->ii_evt, info->ii_uma, &cbs, &toh); if (rc) { D_DEBUG(DB_TRACE, diff --git a/src/vos/vos_obj.h b/src/vos/vos_obj.h index 2ccc8d71988a..7b254ad2d75c 100644 --- a/src/vos/vos_obj.h +++ b/src/vos/vos_obj.h @@ -47,12 +47,18 @@ struct vos_object { struct vos_obj_df *obj_df; /** backref to container */ struct vos_container *obj_cont; + /* Handle for the pinned object */ + struct umem_pin_handle *obj_pin_hdl; + /** Bucket IDs for the object */ + uint32_t obj_bkt_ids[VOS_OBJ_BKTS_MAX]; /** nobody should access this object */ bool obj_zombie; /** Object is held for discard */ uint32_t obj_discard : 1, /** If non-zero, object is held for aggregation */ - obj_aggregate : 1; + obj_aggregate : 1, + /** Evict-able bucket is already allocated */ + obj_bkt_allot : 1; }; enum { diff --git a/src/vos/vos_obj_cache.c b/src/vos/vos_obj_cache.c index 8845eae00858..65e61c07a933 100644 --- a/src/vos/vos_obj_cache.c +++ b/src/vos/vos_obj_cache.c @@ -245,12 +245,56 @@ obj_get(struct daos_lru_cache *occ, struct vos_container *cont, daos_unit_oid_t return rc; } +static inline void +vos_obj_unpin(struct vos_object *obj) +{ + struct vos_pool *pool = vos_obj2pool(obj); + struct umem_store *store = vos_pool2store(pool); + + if (obj->obj_pin_hdl != NULL) { + umem_cache_unpin(store, obj->obj_pin_hdl); + obj->obj_pin_hdl = NULL; + } +} + +/* Support single evict-able bucket for this moment */ +static inline int +vos_obj_pin(struct vos_object *obj) +{ + struct vos_pool *pool = vos_obj2pool(obj); + struct umem_store *store = vos_pool2store(pool); + struct umem_cache_range rg; + + if (!vos_pool_is_p2(pool)) + return 0; + + if (!obj->obj_bkt_allot) { + if (!obj->obj_df) { + obj->obj_bkt_ids[0] = umem_allot_mb_evictable(vos_pool2umm(pool), 0); + } else { + struct vos_obj_p2_df *p2 = (struct vos_obj_p2_df *)obj->obj_df; + + obj->obj_bkt_ids[0] = p2->p2_bkt_ids[0]; + } + obj->obj_bkt_allot = 1; + } + + D_ASSERT(obj->obj_pin_hdl == NULL); + if (obj->obj_bkt_ids[0] == UMEM_DEFAULT_MBKT_ID) + return 0; + + rg.cr_off = umem_get_mb_base_offset(vos_pool2umm(pool), obj->obj_bkt_ids[0]); + rg.cr_size = store->cache->ca_page_sz; + + return umem_cache_pin(store, &rg, 1, false, &obj->obj_pin_hdl); +} + static inline void obj_release(struct daos_lru_cache *occ, struct vos_object *obj, bool evict) { D_ASSERT(obj != NULL); - /* TODO: Unpin the object in md-on-ssd phase II */ + vos_obj_unpin(obj); if (obj == &obj_local) { clean_object(obj); @@ -301,6 +345,8 @@ cache_object(struct daos_lru_cache *occ, struct vos_object **objp) obj_new->obj_sync_epoch = obj_local.obj_sync_epoch; obj_new->obj_df = obj_local.obj_df; obj_new->obj_zombie = obj_local.obj_zombie; + obj_new->obj_bkt_allot = obj_local.obj_bkt_allot; + obj_new->obj_pin_hdl = obj_local.obj_pin_hdl; obj_local.obj_toh = DAOS_HDL_INVAL; obj_local.obj_ih = DAOS_HDL_INVAL; @@ -363,7 +409,9 @@ vos_obj_check_discard(struct vos_container *cont, daos_unit_oid_t oid, uint64_t if (rc) return rc; - /* TODO: Pin object in memory */ + rc = vos_obj_pin(obj); + if (rc) + return rc; if (check_discard(obj, flags)) /* Update request will retry with this error */ @@ -507,8 +555,10 @@ vos_obj_hold(struct vos_container *cont, daos_unit_oid_t oid, daos_epoch_range_t D_ASSERT(tmprc == 0); /* Non-zero only valid for akey */ } - /* TODO: Pin the object in memory in md-on-ssd phase II. Revise the 'obj_local' implementation - * then, since this function could yield. */ + /* TODO: Revise the 'obj_local' implementation later, since this function could yield. */ + rc = vos_obj_pin(obj); + if (rc) + goto failed; /* It's done for DAOS_INTENT_UPDATE or DAOS_INTENT_PUNCH or DAOS_INTENT_KILL */ if (intent == DAOS_INTENT_UPDATE || intent == DAOS_INTENT_PUNCH || diff --git a/src/vos/vos_obj_index.c b/src/vos/vos_obj_index.c index 72870cfd76e2..e6111cd07469 100644 --- a/src/vos/vos_obj_index.c +++ b/src/vos/vos_obj_index.c @@ -47,7 +47,8 @@ oi_hkey_size(void) static int oi_rec_msize(int alloc_overhead) { - return alloc_overhead + sizeof(struct vos_obj_df); + /* This function is only used for metadata overhead estimation. */ + return alloc_overhead + D_ALIGNUP(sizeof(struct vos_obj_df), 32); } static void @@ -67,6 +68,15 @@ oi_hkey_cmp(struct btr_instance *tins, struct btr_record *rec, void *hkey) return dbtree_key_cmp_rc(memcmp(oid1, oid2, sizeof(*oid1))); } +static inline unsigned int +vos_obj_df_size(struct vos_pool *pool) +{ + if (vos_pool_is_p2(pool)) + return sizeof(struct vos_obj_p2_df); + + return sizeof(struct vos_obj_df); +} + static int oi_rec_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov, struct btr_record *rec, d_iov_t *val_out) @@ -76,10 +86,11 @@ oi_rec_alloc(struct btr_instance *tins, d_iov_t *key_iov, struct vos_obj_df *obj; daos_unit_oid_t *key; umem_off_t obj_off; + struct vos_pool *pool = (struct vos_pool *)tins->ti_priv; int rc; /* Allocate a PMEM value of type vos_obj_df */ - obj_off = umem_zalloc(&tins->ti_umm, sizeof(struct vos_obj_df)); + obj_off = umem_zalloc(&tins->ti_umm, vos_obj_df_size(pool)); if (UMOFF_IS_NULL(obj_off)) return -DER_NOSPACE; @@ -100,11 +111,11 @@ oi_rec_alloc(struct btr_instance *tins, d_iov_t *key_iov, } else { struct vos_obj_df *new_obj = val_out->iov_buf; - memcpy(obj, new_obj, sizeof(*obj)); + memcpy(obj, new_obj, vos_obj_df_size(pool)); obj->vo_id = *key; } - d_iov_set(val_iov, obj, sizeof(struct vos_obj_df)); + d_iov_set(val_iov, obj, vos_obj_df_size(pool)); rec->rec_off = obj_off; /* For new created object, commit it synchronously to reduce @@ -176,7 +187,7 @@ oi_rec_fetch(struct btr_instance *tins, struct btr_record *rec, DP_UOID(obj->vo_id), rec->rec_off); D_ASSERT(val_iov != NULL); - d_iov_set(val_iov, obj, sizeof(struct vos_obj_df)); + d_iov_set(val_iov, obj, vos_obj_df_size((struct vos_pool *)tins->ti_priv)); return 0; } @@ -504,7 +515,7 @@ oi_iter_nested_tree_fetch(struct vos_iterator *iter, vos_iter_type_t type, return rc; } - D_ASSERT(rec_iov.iov_len == sizeof(struct vos_obj_df)); + D_ASSERT(rec_iov.iov_len == vos_obj_df_size(oiter->oit_cont->vc_pool)); obj = (struct vos_obj_df *)rec_iov.iov_buf; rc = oi_iter_ilog_check(obj, oiter, &info->ii_epr, false); @@ -610,7 +621,7 @@ oi_iter_match_probe(struct vos_iterator *iter, daos_anchor_t *anchor, uint32_t f goto failed; } - D_ASSERT(iov.iov_len == sizeof(struct vos_obj_df)); + D_ASSERT(iov.iov_len == vos_obj_df_size(oiter->oit_cont->vc_pool)); obj = (struct vos_obj_df *)iov.iov_buf; if (iter->it_filter_cb != NULL && (flags & VOS_ITER_PROBE_AGAIN) == 0) { @@ -767,7 +778,7 @@ oi_iter_fetch(struct vos_iterator *iter, vos_iter_entry_t *it_entry, return rc; } - D_ASSERT(rec_iov.iov_len == sizeof(struct vos_obj_df)); + D_ASSERT(rec_iov.iov_len == vos_obj_df_size(oiter->oit_cont->vc_pool)); return oi_iter_fill(rec_iov.iov_buf, oiter, false, it_entry); } @@ -818,7 +829,7 @@ oi_iter_check_punch(daos_handle_t ih) "Probe should be done before aggregation\n"); if (rc != 0) return rc; - D_ASSERT(rec_iov.iov_len == sizeof(struct vos_obj_df)); + D_ASSERT(rec_iov.iov_len == vos_obj_df_size(oiter->oit_cont->vc_pool)); obj = (struct vos_obj_df *)rec_iov.iov_buf; oid = obj->vo_id; @@ -873,7 +884,7 @@ oi_iter_aggregate(daos_handle_t ih, bool range_discard) "Probe should be done before aggregation\n"); if (rc != 0) return rc; - D_ASSERT(rec_iov.iov_len == sizeof(struct vos_obj_df)); + D_ASSERT(rec_iov.iov_len == vos_obj_df_size(oiter->oit_cont->vc_pool)); obj = (struct vos_obj_df *)rec_iov.iov_buf; oid = obj->vo_id; diff --git a/src/vos/vos_query.c b/src/vos/vos_query.c index e924e4016b69..b4d414012e50 100644 --- a/src/vos/vos_query.c +++ b/src/vos/vos_query.c @@ -162,7 +162,7 @@ query_normal_recx(struct open_query *query, daos_recx_t *recx) uint32_t inob; - vos_evt_desc_cbs_init(&cbs, query->qt_pool, query->qt_coh); + vos_evt_desc_cbs_init(&cbs, query->qt_pool, query->qt_coh, query->qt_obj); rc = evt_open(query->qt_recx_root, &query->qt_pool->vp_uma, &cbs, &toh); if (rc != 0) return rc; @@ -344,7 +344,7 @@ query_ec_recx(struct open_query *query, daos_recx_t *recx) bool prefresh = true; - vos_evt_desc_cbs_init(&cbs, query->qt_pool, query->qt_coh); + vos_evt_desc_cbs_init(&cbs, query->qt_pool, query->qt_coh, query->qt_obj); rc = evt_open(query->qt_recx_root, &query->qt_pool->vp_uma, &cbs, &toh); if (rc != 0) return rc; @@ -517,7 +517,7 @@ open_and_query_key(struct open_query *query, daos_key_t *key, return -DER_NONEXIST; rc = dbtree_open_inplace_ex(to_open, &query->qt_pool->vp_uma, - query->qt_coh, query->qt_pool, toh); + query->qt_coh, query->qt_obj, toh); if (rc != 0) return rc; diff --git a/src/vos/vos_tree.c b/src/vos/vos_tree.c index c36fcaa88c58..5e6f3822e9a6 100644 --- a/src/vos/vos_tree.c +++ b/src/vos/vos_tree.c @@ -154,8 +154,10 @@ ktr_hkey_gen(struct btr_instance *tins, d_iov_t *key_iov, void *hkey) { struct ktr_hkey *kkey = (struct ktr_hkey *)hkey; struct umem_pool *umm_pool = tins->ti_umm.umm_pool; - struct vos_pool *pool = (struct vos_pool *)tins->ti_priv; + struct vos_pool *pool; + D_ASSERT(tins->ti_destroy == 0); + pool = vos_obj2pool(tins->ti_priv); D_ASSERT(key_iov->iov_len < pool->vp_pool_df->pd_scm_sz); hkey_common_gen(key_iov, hkey); @@ -255,7 +257,8 @@ ktr_rec_alloc(struct btr_instance *tins, d_iov_t *key_iov, rbund = iov2rec_bundle(val_iov); - rec->rec_off = umem_zalloc(&tins->ti_umm, vos_krec_size(rbund)); + D_ASSERT(tins->ti_destroy == 0); + rec->rec_off = vos_obj_zalloc(tins->ti_priv, vos_krec_size(rbund)); if (UMOFF_IS_NULL(rec->rec_off)) return -DER_NOSPACE; @@ -298,11 +301,15 @@ ktr_rec_free(struct btr_instance *tins, struct btr_record *rec, void *args) if (rc != 0) return rc; - pool = (struct vos_pool *)tins->ti_priv; + D_ASSERT(tins->ti_priv); + if (tins->ti_destroy) + pool = (struct vos_pool *)tins->ti_priv; + else + pool = vos_obj2pool(tins->ti_priv); + vos_ilog_ts_evict(&krec->kr_ilog, (krec->kr_bmap & KREC_BF_DKEY) ? VOS_TS_TYPE_DKEY : VOS_TS_TYPE_AKEY, pool->vp_sysdb); - D_ASSERT(tins->ti_priv); gc = (krec->kr_bmap & KREC_BF_DKEY) ? GC_DKEY : GC_AKEY; coh = vos_cont2hdl(args); return gc_add_item(pool, coh, gc, rec->rec_off, 0); @@ -351,7 +358,8 @@ ktr_rec_update(struct btr_instance *tins, struct btr_record *rec, static umem_off_t ktr_node_alloc(struct btr_instance *tins, int size) { - return umem_zalloc(&tins->ti_umm, size); + D_ASSERT(tins->ti_destroy == 0); + return vos_obj_zalloc(tins->ti_priv, size); } static btr_ops_t key_btr_ops = { @@ -620,9 +628,13 @@ svt_rec_free_internal(struct btr_instance *tins, struct btr_record *rec, if (!overwrite) { /* SCM value is stored together with vos_irec_df */ if (addr->ba_type == DAOS_MEDIA_NVME) { - struct vos_pool *pool = tins->ti_priv; + struct vos_pool *pool; - D_ASSERT(pool != NULL); + D_ASSERT(tins->ti_priv != NULL); + if (tins->ti_destroy) + pool = (struct vos_pool *)tins->ti_priv; + else + pool = vos_obj2pool(tins->ti_priv); rc = vos_bio_addr_free(pool, addr, irec->ir_size); if (rc) return rc; @@ -714,7 +726,7 @@ svt_check_availability(struct btr_instance *tins, struct btr_record *rec, static umem_off_t svt_node_alloc(struct btr_instance *tins, int size) { - return umem_zalloc(&tins->ti_umm, size); + return vos_obj_zalloc(tins->ti_priv, size); } static btr_ops_t singv_btr_ops = { @@ -802,12 +814,13 @@ evt_dop_log_del(struct umem_instance *umm, daos_epoch_t epoch, } void -vos_evt_desc_cbs_init(struct evt_desc_cbs *cbs, struct vos_pool *pool, - daos_handle_t coh) +vos_evt_desc_cbs_init(struct evt_desc_cbs *cbs, struct vos_pool *pool, daos_handle_t coh, + struct vos_object *obj) { /* NB: coh is not required for destroy */ cbs->dc_bio_free_cb = evt_dop_bio_free; cbs->dc_bio_free_args = (void *)pool; + cbs->dc_alloc_arg = (void *)obj; cbs->dc_log_status_cb = evt_dop_log_status; cbs->dc_log_status_args = (void *)(unsigned long)coh.cookie; cbs->dc_log_add_cb = evt_dop_log_add; @@ -829,7 +842,7 @@ tree_open_create(struct vos_object *obj, enum vos_tree_class tclass, int flags, int unexpected_flag; int rc = 0; - vos_evt_desc_cbs_init(&cbs, pool, coh); + vos_evt_desc_cbs_init(&cbs, pool, coh, obj); if ((krec->kr_bmap & (KREC_BF_BTR | KREC_BF_EVT)) == 0) goto create; @@ -855,7 +868,7 @@ tree_open_create(struct vos_object *obj, enum vos_tree_class tclass, int flags, if (expected_flag == KREC_BF_EVT) { rc = evt_open(&krec->kr_evt, uma, &cbs, sub_toh); } else { - rc = dbtree_open_inplace_ex(&krec->kr_btr, uma, coh, pool, sub_toh); + rc = dbtree_open_inplace_ex(&krec->kr_btr, uma, coh, obj, sub_toh); } if (rc != 0) D_ERROR("Failed to open tree: " DF_RC "\n", DP_RC(rc)); @@ -924,7 +937,7 @@ tree_open_create(struct vos_object *obj, enum vos_tree_class tclass, int flags, rc = dbtree_create_inplace_ex(ta->ta_class, tree_feats, ta->ta_order, uma, &krec->kr_btr, - coh, pool, sub_toh); + coh, obj, sub_toh); if (rc != 0) { D_ERROR("Failed to create btree: "DF_RC"\n", DP_RC(rc)); goto out; @@ -1206,14 +1219,13 @@ obj_tree_init(struct vos_object *obj) ta->ta_order, vos_obj2uma(obj), &obj->obj_df->vo_tree, vos_cont2hdl(obj->obj_cont), - vos_obj2pool(obj), - &obj->obj_toh); + obj, &obj->obj_toh); } else { D_DEBUG(DB_DF, "Open btree for object\n"); rc = dbtree_open_inplace_ex(&obj->obj_df->vo_tree, vos_obj2uma(obj), vos_cont2hdl(obj->obj_cont), - vos_obj2pool(obj), &obj->obj_toh); + obj, &obj->obj_toh); } if (rc)