Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-14317 vos: initial changes for the phase2 object pre-load #15001

Merged
merged 1 commit into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/common/btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -945,8 +945,12 @@ btr_root_alloc(struct btr_context *tcx)
struct btr_instance *tins = &tcx->tc_tins;
struct btr_root *root;

tins->ti_root_off = umem_zalloc(btr_umm(tcx),
sizeof(struct btr_root));
if (btr_ops(tcx)->to_node_alloc != NULL)
tins->ti_root_off = btr_ops(tcx)->to_node_alloc(&tcx->tc_tins,
sizeof(struct btr_root));
else
tins->ti_root_off = umem_zalloc(btr_umm(tcx), sizeof(struct btr_root));

if (UMOFF_IS_NULL(tins->ti_root_off))
return btr_umm(tcx)->umm_nospc_rc;

Expand Down Expand Up @@ -3884,6 +3888,7 @@ btr_tree_destroy(struct btr_context *tcx, void *args, bool *destroyed)
tcx->tc_tins.ti_root_off, tcx->tc_order);

root = tcx->tc_tins.ti_root;
tcx->tc_tins.ti_destroy = 1;
if (root && !UMOFF_IS_NULL(root->tr_node)) {
/* destroy the root and all descendants */
rc = btr_node_destroy(tcx, root->tr_node, args, &empty);
Expand Down
2 changes: 2 additions & 0 deletions src/include/daos/btree.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,8 @@ struct btr_instance {
struct btr_root *ti_root;
/** Customized operations for the tree */
btr_ops_t *ti_ops;
/** The context is used for tree destroy */
unsigned int ti_destroy : 1;
};

/**
Expand Down
2 changes: 0 additions & 2 deletions src/include/daos/mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,6 @@ typedef void
umem_cache_wait_cb_t(void *arg, uint64_t chkpt_tx, uint64_t *committed_tx);

/**
* Write all dirty pages before @wal_tx to MD blob. (XXX: not yet implemented)
*
* This function can yield internally, it is called by checkpoint service of upper level stack.
*
* \param[in] store The umem store
Expand Down
6 changes: 5 additions & 1 deletion src/include/daos_srv/evtree.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2017-2023 Intel Corporation.
* (C) Copyright 2017-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -70,6 +70,10 @@ struct evt_desc_cbs {
struct evt_desc *desc,
daos_size_t nob, void *args);
void *dc_bio_free_args;
/**
* Argument for allocation.
*/
void *dc_alloc_arg;
/**
* Availability check, it is for data tracked by DTX undo log.
* It is optional, EVTree always treats data extent is available if
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/util/telemetry_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ class TelemetryUtils():
ENGINE_NVME_INTEL_VENDOR_METRICS
ENGINE_MEM_USAGE_METRICS = [
"engine_mem_vos_dtx_cmt_ent_48",
"engine_mem_vos_vos_obj_360",
"engine_mem_vos_vos_obj_384",
daltonbohning marked this conversation as resolved.
Show resolved Hide resolved
"engine_mem_vos_vos_lru_size",
"engine_mem_dtx_dtx_leader_handle_360"]
ENGINE_MEM_TOTAL_USAGE_METRICS = [
Expand Down
10 changes: 6 additions & 4 deletions src/vos/evtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1443,8 +1443,9 @@ evt_node_alloc(struct evt_context *tcx, unsigned int flags,
struct evt_node *nd;
umem_off_t nd_off;
bool leaf = (flags & EVT_NODE_LEAF);
struct vos_object *obj = tcx->tc_desc_cbs.dc_alloc_arg;

nd_off = umem_zalloc(evt_umm(tcx), evt_node_size(tcx, leaf));
nd_off = vos_obj_alloc(evt_umm(tcx), obj, evt_node_size(tcx, leaf), true);
if (UMOFF_IS_NULL(nd_off))
return -DER_NOSPACE;

Expand Down Expand Up @@ -3249,8 +3250,9 @@ evt_common_insert(struct evt_context *tcx, struct evt_node *nd,
}

if (leaf) {
umem_off_t desc_off;
uint32_t csum_buf_size = 0;
umem_off_t desc_off;
uint32_t csum_buf_size = 0;
struct vos_object *obj = tcx->tc_desc_cbs.dc_alloc_arg;

if (ci_is_valid(&ent->ei_csum))
csum_buf_size = ci_csums_len(ent->ei_csum);
Expand All @@ -3263,7 +3265,7 @@ evt_common_insert(struct evt_context *tcx, struct evt_node *nd,
D_DEBUG(DB_TRACE, "Allocating an extra %d bytes "
"for checksum", csum_buf_size);
}
desc_off = umem_zalloc(evt_umm(tcx), desc_size);
desc_off = vos_obj_alloc(evt_umm(tcx), obj, desc_size, true);
if (UMOFF_IS_NULL(desc_off))
return -DER_NOSPACE;

Expand Down
54 changes: 27 additions & 27 deletions src/vos/tests/vts_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ io_update_and_fetch_dkey(struct io_test_args *arg, daos_epoch_t update_epoch,
static inline int
hold_obj(struct vos_container *cont, daos_unit_oid_t oid, daos_epoch_range_t *epr,
daos_epoch_t bound, uint64_t flags, uint32_t intent, struct vos_object **obj_p,
struct vos_ts_set *ts_set)
struct vos_ts_set *ts_set, struct umem_instance *umm)
{
int rc;

Expand All @@ -908,7 +908,16 @@ hold_obj(struct vos_container *cont, daos_unit_oid_t oid, daos_epoch_range_t *ep

if (flags & VOS_OBJ_CREATE) {
assert_ptr_not_equal(*obj_p, NULL);

if (umm != NULL) {
rc = umem_tx_begin(umm, NULL);
assert_rc_equal(rc, 0);
}

rc = vos_obj_incarnate(*obj_p, epr, bound, flags, intent, ts_set);

if (umm != NULL)
rc = umem_tx_end(umm, rc);
}

return rc;
Expand All @@ -926,7 +935,8 @@ hold_objects(struct vos_object **objs, daos_handle_t *coh, daos_unit_oid_t *oid,
hold_flags |= VOS_OBJ_VISIBLE;
for (i = start; i < end; i++) {
rc = hold_obj(vos_hdl2cont(*coh), *oid, &epr, 0, hold_flags,
no_create ? DAOS_INTENT_DEFAULT : DAOS_INTENT_UPDATE, &objs[i], 0);
no_create ? DAOS_INTENT_DEFAULT : DAOS_INTENT_UPDATE,
&objs[i], 0, NULL);
if (rc != exp_rc)
return 1;
}
Expand Down Expand Up @@ -1006,82 +1016,72 @@ io_obj_cache_test(void **state)

ummg = vos_cont2umm(vos_hdl2cont(ctx->tc_co_hdl));
umml = vos_cont2umm(vos_hdl2cont(l_coh));
rc = umem_tx_begin(ummg, NULL);
assert_rc_equal(rc, 0);

rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0,
VOS_OBJ_CREATE | VOS_OBJ_VISIBLE, DAOS_INTENT_UPDATE, &objs[0], 0);
VOS_OBJ_CREATE | VOS_OBJ_VISIBLE, DAOS_INTENT_UPDATE, &objs[0], 0, ummg);
assert_rc_equal(rc, 0);

/** Hold object for discard */
rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0, VOS_OBJ_DISCARD,
DAOS_INTENT_DISCARD, &obj1, 0);
DAOS_INTENT_DISCARD, &obj1, 0, ummg);
assert_rc_equal(rc, 0);
/** Second discard should fail */
rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0, VOS_OBJ_DISCARD,
DAOS_INTENT_DISCARD, &obj2, 0);
DAOS_INTENT_DISCARD, &obj2, 0, ummg);
assert_rc_equal(rc, -DER_BUSY);
/** Should prevent simultaneous aggregation */
rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0, VOS_OBJ_AGGREGATE,
DAOS_INTENT_PURGE, &obj2, 0);
DAOS_INTENT_PURGE, &obj2, 0, ummg);
assert_rc_equal(rc, -DER_BUSY);
/** Should prevent simultaneous hold for create as well */
rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0,
VOS_OBJ_CREATE | VOS_OBJ_VISIBLE, DAOS_INTENT_UPDATE, &obj2, 0);
VOS_OBJ_CREATE | VOS_OBJ_VISIBLE, DAOS_INTENT_UPDATE, &obj2,
0, ummg);
assert_rc_equal(rc, -DER_UPDATE_AGAIN);

/** Need to be able to hold for read though or iteration won't work */
rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0, VOS_OBJ_VISIBLE,
DAOS_INTENT_DEFAULT, &obj2, 0);
DAOS_INTENT_DEFAULT, &obj2, 0, ummg);
vos_obj_release(obj2, 0, false);
vos_obj_release(obj1, VOS_OBJ_DISCARD, false);

/** Hold object for aggregation */
rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0, VOS_OBJ_AGGREGATE,
DAOS_INTENT_PURGE, &obj1, 0);
DAOS_INTENT_PURGE, &obj1, 0, ummg);
assert_rc_equal(rc, 0);
/** Discard should fail */
rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0, VOS_OBJ_DISCARD,
DAOS_INTENT_DISCARD, &obj2, 0);
DAOS_INTENT_DISCARD, &obj2, 0, ummg);
assert_rc_equal(rc, -DER_BUSY);
/** Second aggregation should fail */
rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0, VOS_OBJ_AGGREGATE,
DAOS_INTENT_PURGE, &obj2, 0);
DAOS_INTENT_PURGE, &obj2, 0, ummg);
assert_rc_equal(rc, -DER_BUSY);
/** Simultaneous create should work */
rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0,
VOS_OBJ_CREATE | VOS_OBJ_VISIBLE, DAOS_INTENT_UPDATE, &obj2, 0);
VOS_OBJ_CREATE | VOS_OBJ_VISIBLE, DAOS_INTENT_UPDATE, &obj2, 0, ummg);
assert_rc_equal(rc, 0);
vos_obj_release(obj2, 0, false);

/** Need to be able to hold for read though or iteration won't work */
rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0, VOS_OBJ_VISIBLE,
DAOS_INTENT_DEFAULT, &obj2, 0);
DAOS_INTENT_DEFAULT, &obj2, 0, ummg);
vos_obj_release(obj2, 0, false);
vos_obj_release(obj1, VOS_OBJ_AGGREGATE, false);

/** Now that other one is done, this should work */
rc = hold_obj(vos_hdl2cont(ctx->tc_co_hdl), oids[0], &epr, 0, VOS_OBJ_DISCARD,
DAOS_INTENT_DISCARD, &obj2, 0);
DAOS_INTENT_DISCARD, &obj2, 0, ummg);
assert_rc_equal(rc, 0);
vos_obj_release(obj2, VOS_OBJ_DISCARD, false);

rc = umem_tx_end(ummg, 0);
assert_rc_equal(rc, 0);

vos_obj_release(objs[0], 0, false);

rc = umem_tx_begin(umml, NULL);
assert_rc_equal(rc, 0);

rc = hold_obj(vos_hdl2cont(l_coh), oids[1], &epr, 0,
VOS_OBJ_CREATE | VOS_OBJ_VISIBLE, DAOS_INTENT_UPDATE, &objs[0], 0);
VOS_OBJ_CREATE | VOS_OBJ_VISIBLE, DAOS_INTENT_UPDATE, &objs[0], 0, umml);
assert_rc_equal(rc, 0);
vos_obj_release(objs[0], 0, false);

rc = umem_tx_end(umml, 0);
assert_rc_equal(rc, 0);

rc = hold_objects(objs, &ctx->tc_co_hdl, &oids[0], 0, 10, true, 0);
assert_int_equal(rc, 0);

Expand All @@ -1091,7 +1091,7 @@ io_obj_cache_test(void **state)
rc = hold_objects(objs, &l_coh, &oids[1], 10, 15, true, 0);
assert_int_equal(rc, 0);
rc = hold_obj(vos_hdl2cont(l_coh), oids[1], &epr, 0, VOS_OBJ_VISIBLE,
DAOS_INTENT_DEFAULT, &objs[16], 0);
DAOS_INTENT_DEFAULT, &objs[16], 0, NULL);
assert_rc_equal(rc, 0);

vos_obj_release(objs[16], 0, false);
Expand Down
2 changes: 1 addition & 1 deletion src/vos/vos_aggregate.c
Original file line number Diff line number Diff line change
Expand Up @@ -984,7 +984,7 @@ reserve_segment(struct vos_object *obj, struct agg_io_context *io,

if (vos_io_scm(vos_obj2pool(obj), DAOS_IOD_ARRAY, size, VOS_IOS_AGGREGATION)) {
/** Store on SCM */
off = vos_reserve_scm(obj->obj_cont, io->ic_rsrvd_scm, size);
off = vos_reserve_scm(obj->obj_cont, io->ic_rsrvd_scm, size, obj);
if (UMOFF_IS_NULL(off)) {
now = daos_gettime_coarse();
if (now - obj->obj_cont->vc_agg_nospc_ts > VOS_NOSPC_ERROR_INTVL) {
Expand Down
4 changes: 2 additions & 2 deletions src/vos/vos_gc.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2019-2023 Intel Corporation.
* (C) Copyright 2019-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -115,7 +115,7 @@ gc_drain_evt(struct vos_gc *gc, struct vos_pool *pool, daos_handle_t coh,
daos_handle_t toh;
int rc;

vos_evt_desc_cbs_init(&cbs, pool, coh);
vos_evt_desc_cbs_init(&cbs, pool, coh, NULL);
rc = evt_open(root, &pool->vp_uma, &cbs, &toh);
if (rc == -DER_NONEXIST) {
*empty = true;
Expand Down
48 changes: 46 additions & 2 deletions src/vos/vos_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,7 @@ vos_bio_addr_free(struct vos_pool *pool, bio_addr_t *addr, daos_size_t nob);

void
vos_evt_desc_cbs_init(struct evt_desc_cbs *cbs, struct vos_pool *pool,
daos_handle_t coh);
daos_handle_t coh, struct vos_object *obj);

int
vos_tx_begin(struct dtx_handle *dth, struct umem_instance *umm, bool is_sysdb);
Expand Down Expand Up @@ -1312,7 +1312,7 @@ vos_dedup_invalidate(struct vos_pool *pool);

umem_off_t
vos_reserve_scm(struct vos_container *cont, struct umem_rsrvd_act *rsrvd_scm,
daos_size_t size);
daos_size_t size, struct vos_object *obj);
int
vos_publish_scm(struct umem_instance *umm, struct umem_rsrvd_act *rsrvd_scm, bool publish);
int
Expand All @@ -1329,6 +1329,12 @@ vos_pool2umm(struct vos_pool *pool)
return &pool->vp_umm;
}

static inline struct umem_store *
vos_pool2store(struct vos_pool *pool)
{
return &pool->vp_umm.umm_pool->up_store;
}

static inline struct umem_instance *
vos_cont2umm(struct vos_container *cont)
{
Expand Down Expand Up @@ -1844,4 +1850,42 @@ vos_io_scm(struct vos_pool *pool, daos_iod_type_t type, daos_size_t size, enum v
int
vos_insert_oid(struct dtx_handle *dth, struct vos_container *cont, daos_unit_oid_t *oid);

static inline bool
vos_pool_is_p2(struct vos_pool *pool)
{
struct umem_store *store = vos_pool2store(pool);

return store->store_type == DAOS_MD_BMEM_V2;
}

static inline umem_off_t
vos_obj_alloc(struct umem_instance *umm, struct vos_object *obj, size_t size, bool zeroing)
{

if (obj != NULL && vos_pool_is_p2(vos_obj2pool(obj))) {
D_ASSERT(obj->obj_bkt_allot == 1);
if (zeroing)
return umem_zalloc_from_bucket(umm, size, obj->obj_bkt_ids[0]);

return umem_alloc_from_bucket(umm, size, obj->obj_bkt_ids[0]);
}

if (zeroing)
return umem_zalloc(umm, size);

return umem_alloc(umm, size);
}

static inline umem_off_t
vos_obj_reserve(struct umem_instance *umm, struct vos_object *obj,
struct umem_rsrvd_act *rsrvd_scm, daos_size_t size)
{
if (obj != NULL && vos_pool_is_p2(vos_obj2pool(obj))) {
D_ASSERT(obj->obj_bkt_allot == 1);
return umem_reserve_from_bucket(umm, rsrvd_scm, size, obj->obj_bkt_ids[0]);
}

return umem_reserve(umm, rsrvd_scm, size);
}

#endif /* __VOS_INTERNAL_H__ */
Loading
Loading