Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/feature/vos_on_blob_p2' into tan…
Browse files Browse the repository at this point in the history
…abarr/control-display-poolquery-mdonssd

Required-githooks: true

Signed-off-by: Tom Nabarro <[email protected]>
  • Loading branch information
tanabarr committed Sep 6, 2024
2 parents 8b78d43 + 16df8c9 commit 8ea108f
Show file tree
Hide file tree
Showing 9 changed files with 125 additions and 45 deletions.
1 change: 0 additions & 1 deletion src/common/btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -3888,7 +3888,6 @@ btr_tree_destroy(struct btr_context *tcx, void *args, bool *destroyed)
tcx->tc_tins.ti_root_off, tcx->tc_order);

root = tcx->tc_tins.ti_root;
tcx->tc_tins.ti_destroy = 1;
if (root && !UMOFF_IS_NULL(root->tr_node)) {
/* destroy the root and all descendants */
rc = btr_node_destroy(tcx, root->tr_node, args, &empty);
Expand Down
2 changes: 0 additions & 2 deletions src/include/daos/btree.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,6 @@ struct btr_instance {
struct btr_root *ti_root;
/** Customized operations for the tree */
btr_ops_t *ti_ops;
/** The context is used for tree destroy */
unsigned int ti_destroy : 1;
};

/**
Expand Down
2 changes: 1 addition & 1 deletion src/vos/vos_container.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ cont_df_rec_free(struct btr_instance *tins, struct btr_record *rec, void *args)
cont_df = umem_off2ptr(&tins->ti_umm, rec->rec_off);
vos_ts_evict(&cont_df->cd_ts_idx, VOS_TS_TYPE_CONT, vos_pool->vp_sysdb);

return gc_add_item(vos_pool, DAOS_HDL_INVAL, GC_CONT, rec->rec_off, 0);
return gc_add_item(vos_pool, DAOS_HDL_INVAL, GC_CONT, rec->rec_off, NULL);
}

static int
Expand Down
88 changes: 70 additions & 18 deletions src/vos/vos_gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,26 @@ struct vos_gc {
*/
static int
gc_drain_btr(struct vos_gc *gc, struct vos_pool *pool, daos_handle_t coh,
struct btr_root *root, int *credits, bool *empty)
struct vos_gc_item *item, struct btr_root *root, int *credits, bool *empty)
{
daos_handle_t toh;
int rc;
struct vos_object dummy_obj = { 0 };
struct vos_container dummy_cont = { 0 };
daos_handle_t toh;
void *priv;
int rc, i;

if (gc->gc_type == GC_CONT) {
priv = pool;
} else {
dummy_cont.vc_pool = pool;
dummy_obj.obj_cont = &dummy_cont;
dummy_obj.obj_bkt_allot = 1;
for (i = 0; i < VOS_GC_BKTS_MAX; i++)
dummy_obj.obj_bkt_ids[i] = item->it_bkt_ids[i];
priv = &dummy_obj;
}

rc = dbtree_open_inplace_ex(root, &pool->vp_uma, coh, pool, &toh);
rc = dbtree_open_inplace_ex(root, &pool->vp_uma, coh, priv, &toh);
if (rc == -DER_NONEXIST) { /* empty tree */
*empty = true;
return 0;
Expand Down Expand Up @@ -126,7 +140,7 @@ gc_drain_evt(struct vos_gc *gc, struct vos_pool *pool, daos_handle_t coh,

D_DEBUG(DB_TRACE, "drain %s evtree, creds=%d\n", gc->gc_name, *credits);
rc = evt_drain(toh, credits, empty);
D_ASSERT(evt_close(toh) == 0);
evt_close(toh);
if (rc)
goto failed;

Expand Down Expand Up @@ -160,7 +174,7 @@ gc_drain_key(struct vos_gc *gc, struct vos_pool *pool, daos_handle_t coh,
}

if (key->kr_bmap & KREC_BF_BTR) {
rc = gc_drain_btr(gc, pool, coh, &key->kr_btr, credits, empty);
rc = gc_drain_btr(gc, pool, coh, item, &key->kr_btr, credits, empty);

} else if (key->kr_bmap & KREC_BF_EVT) {
D_ASSERT(gc->gc_type == GC_AKEY);
Expand Down Expand Up @@ -195,7 +209,7 @@ gc_free_dkey(struct vos_gc *gc, struct vos_pool *pool, daos_handle_t coh, struct

D_ASSERT(krec->kr_bmap & KREC_BF_DKEY);
if (krec->kr_bmap & KREC_BF_NO_AKEY)
gc_add_item(pool, coh, GC_AKEY, item->it_addr, item->it_args);
gc_add_item(pool, coh, GC_AKEY, item->it_addr, &item->it_bkt_ids[0]);
else
umem_free(&pool->vp_umm, item->it_addr);
return 0;
Expand All @@ -211,7 +225,7 @@ gc_drain_obj(struct vos_gc *gc, struct vos_pool *pool, daos_handle_t coh,
{
struct vos_obj_df *obj = umem_off2ptr(&pool->vp_umm, item->it_addr);

return gc_drain_btr(gc, pool, coh, &obj->vo_tree, credits, empty);
return gc_drain_btr(gc, pool, coh, item, &obj->vo_tree, credits, empty);
}

static int
Expand Down Expand Up @@ -298,8 +312,7 @@ gc_drain_cont(struct vos_gc *gc, struct vos_pool *pool, daos_handle_t coh,
}

D_ASSERT(daos_handle_is_inval(coh));
return gc_drain_btr(gc, pool, coh, &cont->cd_obj_root,
credits, empty);
return gc_drain_btr(gc, pool, coh, item, &cont->cd_obj_root, credits, empty);
}

static int
Expand Down Expand Up @@ -627,12 +640,12 @@ gc_free_item(struct vos_gc *gc, struct vos_pool *pool,
*/
int
gc_add_item(struct vos_pool *pool, daos_handle_t coh,
enum vos_gc_type type, umem_off_t item_off, uint64_t args)
enum vos_gc_type type, umem_off_t item_off, uint32_t *bkt_ids)
{
struct vos_container *cont = vos_hdl2cont(coh);
struct vos_gc_bin_df *bin = gc_type2bin(pool, cont, type);
struct vos_gc_item item;
int rc;
int rc, i;

D_DEBUG(DB_TRACE, "Add %s addr="DF_X64"\n",
gc_type2name(type), item_off);
Expand All @@ -641,7 +654,9 @@ gc_add_item(struct vos_pool *pool, daos_handle_t coh,
return 0; /* OK to ignore because the pool is being deleted */

item.it_addr = item_off;
item.it_args = args;
for (i = 0; i < VOS_GC_BKTS_MAX; i++)
item.it_bkt_ids[i] = bkt_ids ? bkt_ids[i] : UMEM_DEFAULT_MBKT_ID;

rc = gc_bin_add_item(&pool->vp_umm, bin, &item);
if (rc) {
D_ERROR("Failed to add item, pool=" DF_UUID ", rc=" DF_RC "\n",
Expand Down Expand Up @@ -712,28 +727,41 @@ gc_reclaim_pool(struct vos_pool *pool, int *credits, bool *empty_ret)
struct vos_container *cont = gc_get_container(pool);
struct vos_gc *gc = &gc_table[0]; /* start from akey */
int creds = *credits;
uint32_t bkt = UMEM_DEFAULT_MBKT_ID, pinned_bkt = UMEM_DEFAULT_MBKT_ID;
struct umem_pin_handle *pin_hdl = NULL;
struct umem_cache_range rg;
int rc;

if (pool->vp_dying) {
*empty_ret = true;
D_GOTO(done, rc = 0);
}
*empty_ret = false;

/* take an extra ref to avoid concurrent container destroy/free */
if (cont != NULL)
vos_cont_addref(cont);

pin_obj:
if (bkt != UMEM_DEFAULT_MBKT_ID) {
rg.cr_off = umem_get_mb_base_offset(vos_pool2umm(pool), bkt);
rg.cr_size = vos_pool2store(pool)->cache->ca_page_sz;

rc = umem_cache_pin(vos_pool2store(pool), &rg, 1, false, &pin_hdl);
if (rc) {
DL_ERROR(rc, "Failed to pin bucket %u.", bkt);
goto tx_error;
}
pinned_bkt = bkt;
}

rc = umem_tx_begin(&pool->vp_umm, NULL);
if (rc) {
D_ERROR("Failed to start transacton for " DF_UUID ": " DF_RC "\n",
DP_UUID(pool->vp_id), DP_RC(rc));
if (cont != NULL)
vos_cont_decref(cont);
*empty_ret = false;
goto done;
goto tx_error;
}

*empty_ret = false;
while (creds > 0) {
struct vos_gc_item *item;
bool empty = false;
Expand Down Expand Up @@ -769,6 +797,25 @@ gc_reclaim_pool(struct vos_pool *pool, int *credits, bool *empty_ret)
if (DAOS_FAIL_CHECK(DAOS_VOS_GC_CONT))
D_ASSERT(cont != NULL);

bkt = item->it_bkt_ids[0];
if (bkt != UMEM_DEFAULT_MBKT_ID && bkt != pinned_bkt) {
D_ASSERT(gc->gc_type != GC_CONT);
D_ASSERT(vos_pool_is_p2(pool));

rc = umem_tx_end(&pool->vp_umm, rc);
if (rc != 0) {
DL_ERROR(rc, "Transaction commit failed.");
goto tx_error;
}

if (pin_hdl != NULL) {
umem_cache_unpin(vos_pool2store(pool), pin_hdl);
pin_hdl = NULL;
}

goto pin_obj;
}

rc = gc_drain_item(gc, pool, vos_cont2hdl(cont), item, &creds,
&empty);
if (rc < 0) {
Expand Down Expand Up @@ -815,6 +862,11 @@ gc_reclaim_pool(struct vos_pool *pool, int *credits, bool *empty_ret)
rc = umem_tx_end(&pool->vp_umm, rc);
if (rc == 0)
*credits = creds;
tx_error:
if (pin_hdl != NULL) {
umem_cache_unpin(vos_pool2store(pool), pin_hdl);
pin_hdl = NULL;
}

if (cont != NULL && d_list_empty(&cont->vc_gc_link)) {
/** The container may not be empty so add it back to end of
Expand Down
2 changes: 1 addition & 1 deletion src/vos/vos_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1371,7 +1371,7 @@ void
gc_check_cont(struct vos_container *cont);
int
gc_add_item(struct vos_pool *pool, daos_handle_t coh,
enum vos_gc_type type, umem_off_t item_off, uint64_t args);
enum vos_gc_type type, umem_off_t item_off, uint32_t *bkt_ids);
int
vos_gc_pool_tight(daos_handle_t poh, int *credits);
void
Expand Down
22 changes: 13 additions & 9 deletions src/vos/vos_layout.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ struct vos_gc_bin_df {
uint16_t bin_pad16;
};

/*
* This is smaller than the VOS_OBJ_BKTS_MAX for object durable format, because
* I don't want to increase each GC item size (the amount of GC item is massive)
* for an imagined requirement.
*
* If we really need to support more than 2 evict-able buckets per object in the
* futhure, we can enlarge the GC item then.
*/
#define VOS_GC_BKTS_MAX 2

struct vos_gc_bag_df {
/** index of the first item in FIFO */
uint16_t bag_item_first;
Expand All @@ -57,19 +67,12 @@ struct vos_gc_bag_df {
struct vos_gc_item {
/* address of the item to be freed */
umem_off_t it_addr;
/** Reserved, argument for GC_VEA/BIO (e.g. size of extent) */
uint64_t it_args;
/* object buckets for GC_AKEY/DKEY/OBJ of the md-on-ssd p2 pool */
uint32_t it_bkt_ids[VOS_GC_BKTS_MAX];
} bag_items[0];
};

enum vos_gc_type {
/* XXX: we could define GC_VEA, which can free NVMe/SCM space.
* So svt_rec_free() and evt_desc_bio_free() only need to call
* gc_add_item() to register BIO address for GC.
*
* However, GC_VEA could have extra overhead of reassigning SCM
* pointers, but it also has low latency for undo changes.
*/
GC_AKEY,
GC_DKEY,
GC_OBJ,
Expand Down Expand Up @@ -378,6 +381,7 @@ struct vos_obj_df {
};

#define VOS_OBJ_BKTS_MAX 4
D_CASSERT(VOS_GC_BKTS_MAX <= VOS_OBJ_BKTS_MAX);

/*
* VOS object durable format for md-on-ssd phase2. The size is fit to the 128 bytes
Expand Down
19 changes: 19 additions & 0 deletions src/vos/vos_obj_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,25 @@ vos_obj_incarnate(struct vos_object *obj, daos_epoch_range_t *epr, daos_epoch_t
return -DER_TX_RESTART;
}

if (obj->obj_bkt_ids[0] != UMEM_DEFAULT_MBKT_ID) {
struct vos_obj_p2_df *p2 = (struct vos_obj_p2_df *)obj->obj_df;

D_ASSERT(vos_pool_is_p2(vos_obj2pool(obj)));
D_ASSERT(obj->obj_bkt_allot);

if (p2->p2_bkt_ids[0] == UMEM_DEFAULT_MBKT_ID) {
p2->p2_bkt_ids[0] = obj->obj_bkt_ids[0];
rc = umem_tx_add_ptr(vos_cont2umm(cont), &p2->p2_bkt_ids[0],
sizeof(p2->p2_bkt_ids[0]));
if (rc) {
DL_ERROR(rc, "Add bucket ID failed.");
return rc;
}
} else {
D_ASSERT(p2->p2_bkt_ids[0] == obj->obj_bkt_ids[0]);
}
}

/* It's done for DAOS_INTENT_PUNCH case */
if (intent == DAOS_INTENT_PUNCH)
return 0;
Expand Down
10 changes: 9 additions & 1 deletion src/vos/vos_obj_index.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ oi_rec_free(struct btr_instance *tins, struct btr_record *rec, void *args)
daos_handle_t coh = { 0 };
int rc;
struct vos_pool *pool;
uint32_t *bkt_ids = NULL;

obj = umem_off2ptr(umm, rec->rec_off);

Expand Down Expand Up @@ -173,7 +174,14 @@ oi_rec_free(struct btr_instance *tins, struct btr_record *rec, void *args)

if (del_arg != NULL)
coh = vos_cont2hdl((struct vos_container *)del_arg->cont);
return gc_add_item(tins->ti_priv, coh, GC_OBJ, rec->rec_off, 0);

if (vos_pool_is_p2(pool)) {
struct vos_obj_p2_df *p2 = (struct vos_obj_p2_df *)obj;

bkt_ids = &p2->p2_bkt_ids[0];
}

return gc_add_item(tins->ti_priv, coh, GC_OBJ, rec->rec_off, bkt_ids);
}

static int
Expand Down
24 changes: 12 additions & 12 deletions src/vos/vos_tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ ktr_hkey_gen(struct btr_instance *tins, d_iov_t *key_iov, void *hkey)
struct umem_pool *umm_pool = tins->ti_umm.umm_pool;
struct vos_pool *pool;

D_ASSERT(tins->ti_destroy == 0);
pool = vos_obj2pool(tins->ti_priv);
D_ASSERT(key_iov->iov_len < pool->vp_pool_df->pd_scm_sz);
hkey_common_gen(key_iov, hkey);
Expand Down Expand Up @@ -257,7 +256,6 @@ ktr_rec_alloc(struct btr_instance *tins, d_iov_t *key_iov,

rbund = iov2rec_bundle(val_iov);

D_ASSERT(tins->ti_destroy == 0);
rec->rec_off = vos_obj_alloc(&tins->ti_umm, tins->ti_priv, vos_krec_size(rbund), true);
if (UMOFF_IS_NULL(rec->rec_off))
return -DER_NOSPACE;
Expand Down Expand Up @@ -289,6 +287,8 @@ ktr_rec_free(struct btr_instance *tins, struct btr_record *rec, void *args)
int gc;
int rc;
struct vos_pool *pool;
struct vos_object *obj;
uint32_t *bkt_ids = NULL;

if (UMOFF_IS_NULL(rec->rec_off))
return 0;
Expand All @@ -302,17 +302,21 @@ ktr_rec_free(struct btr_instance *tins, struct btr_record *rec, void *args)
return rc;

D_ASSERT(tins->ti_priv);
if (tins->ti_destroy)
pool = (struct vos_pool *)tins->ti_priv;
else
pool = vos_obj2pool(tins->ti_priv);
obj = tins->ti_priv;
pool = vos_obj2pool(obj);

vos_ilog_ts_evict(&krec->kr_ilog, (krec->kr_bmap & KREC_BF_DKEY) ?
VOS_TS_TYPE_DKEY : VOS_TS_TYPE_AKEY, pool->vp_sysdb);

gc = (krec->kr_bmap & KREC_BF_DKEY) ? GC_DKEY : GC_AKEY;
coh = vos_cont2hdl(args);
return gc_add_item(pool, coh, gc, rec->rec_off, 0);

if (vos_pool_is_p2(pool)) {
D_ASSERT(obj->obj_bkt_allot == 1);
bkt_ids = &obj->obj_bkt_ids[0];
}

return gc_add_item(pool, coh, gc, rec->rec_off, bkt_ids);
}

static int
Expand Down Expand Up @@ -358,7 +362,6 @@ ktr_rec_update(struct btr_instance *tins, struct btr_record *rec,
static umem_off_t
ktr_node_alloc(struct btr_instance *tins, int size)
{
D_ASSERT(tins->ti_destroy == 0);
return vos_obj_alloc(&tins->ti_umm, tins->ti_priv, size, true);
}

Expand Down Expand Up @@ -631,10 +634,7 @@ svt_rec_free_internal(struct btr_instance *tins, struct btr_record *rec,
struct vos_pool *pool;

D_ASSERT(tins->ti_priv != NULL);
if (tins->ti_destroy)
pool = (struct vos_pool *)tins->ti_priv;
else
pool = vos_obj2pool(tins->ti_priv);
pool = vos_obj2pool(tins->ti_priv);
rc = vos_bio_addr_free(pool, addr, irec->ir_size);
if (rc)
return rc;
Expand Down

0 comments on commit 8ea108f

Please sign in to comment.