Skip to content

Commit

Permalink
DAOS-16278 vos: per pool backend type (#14946)
Browse files Browse the repository at this point in the history
Use the user specified backend type when possible, if user specify BMEM
V1 backend and try to create a pool with "meta_size > scm_size", turn
to use BMEM V2 instead.

Store the per-pool backend type in meta blob header for pool open.

Signed-off-by: Niu Yawei <[email protected]>
  • Loading branch information
NiuYawei authored Aug 20, 2024
1 parent 35f7652 commit 0ba3bcf
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 25 deletions.
5 changes: 3 additions & 2 deletions src/bio/bio_context.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2018-2023 Intel Corporation.
* (C) Copyright 2018-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -630,7 +630,7 @@ default_wal_sz(uint64_t meta_sz)
}

int bio_mc_create(struct bio_xs_context *xs_ctxt, uuid_t pool_id, uint64_t scm_sz, uint64_t meta_sz,
uint64_t wal_sz, uint64_t data_sz, enum bio_mc_flags flags)
uint64_t wal_sz, uint64_t data_sz, enum bio_mc_flags flags, uint8_t backend_type)
{
int rc = 0, rc1;
spdk_blob_id data_blobid = SPDK_BLOBID_INVALID;
Expand Down Expand Up @@ -734,6 +734,7 @@ int bio_mc_create(struct bio_xs_context *xs_ctxt, uuid_t pool_id, uint64_t scm_s
fi->fi_wal_size = wal_sz;
fi->fi_data_size = data_sz;
fi->fi_vos_id = xs_ctxt->bxc_tgt_id;
fi->fi_backend_type = backend_type;

rc = meta_format(mc, fi, true);
if (rc)
Expand Down
4 changes: 3 additions & 1 deletion src/bio/bio_wal.c
Original file line number Diff line number Diff line change
Expand Up @@ -1861,13 +1861,14 @@ bio_wal_checkpoint(struct bio_meta_context *mc, uint64_t tx_id, uint64_t *purged

void
bio_meta_get_attr(struct bio_meta_context *mc, uint64_t *capacity, uint32_t *blk_sz,
uint32_t *hdr_blks)
uint32_t *hdr_blks, uint8_t *backend_type)
{
/* The mc could be NULL when md on SSD not enabled & data blob not existing */
if (mc != NULL) {
*blk_sz = mc->mc_meta_hdr.mh_blk_bytes;
*capacity = mc->mc_meta_hdr.mh_tot_blks * (*blk_sz);
*hdr_blks = mc->mc_meta_hdr.mh_hdr_blks;
*backend_type = mc->mc_meta_hdr.mh_backend_type;
}
}

Expand Down Expand Up @@ -2069,6 +2070,7 @@ meta_format(struct bio_meta_context *mc, struct meta_fmt_info *fi, bool force)
meta_hdr->mh_tot_blks = (fi->fi_meta_size / META_BLK_SZ) - META_HDR_BLKS;
meta_hdr->mh_vos_id = fi->fi_vos_id;
meta_hdr->mh_flags = META_HDR_FL_EMPTY;
meta_hdr->mh_backend_type = fi->fi_backend_type;

rc = write_header(mc, mc->mc_meta, meta_hdr, sizeof(*meta_hdr), &meta_hdr->mh_csum);
if (rc) {
Expand Down
6 changes: 5 additions & 1 deletion src/bio/bio_wal.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ struct meta_header {
uint64_t mh_tot_blks; /* Meta blob capacity, in blocks */
uint32_t mh_vos_id; /* Associated per-engine target ID */
uint32_t mh_flags; /* Meta header flags */
uint32_t mh_padding[5]; /* Reserved */
uint8_t mh_backend_type; /* Backend allocator type */
uint8_t mh_padding1; /* Reserved */
uint16_t mh_padding2; /* Reserved */
uint32_t mh_padding[4]; /* Reserved */
uint32_t mh_csum; /* Checksum of this header */
};

Expand Down Expand Up @@ -124,6 +127,7 @@ struct meta_fmt_info {
uint64_t fi_wal_size; /* WAL blob size in bytes */
uint64_t fi_data_size; /* Data blob size in bytes */
uint32_t fi_vos_id; /* Associated per-engine target ID */
uint8_t fi_backend_type; /* Backend allocator type */
};

int meta_format(struct bio_meta_context *mc, struct meta_fmt_info *fi, bool force);
Expand Down
2 changes: 2 additions & 0 deletions src/include/daos/mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ int umempobj_backend_type2class_id(int backend);
#define UMEMPOBJ_ENABLE_STATS 0x1

#ifdef DAOS_PMEM_BUILD

/* The backend type is stored in meta blob header, don't change the value */
enum {
DAOS_MD_PMEM = 0,
DAOS_MD_BMEM = 1,
Expand Down
6 changes: 4 additions & 2 deletions src/include/daos_srv/bio.h
Original file line number Diff line number Diff line change
Expand Up @@ -948,11 +948,13 @@ enum bio_mc_flags {
* \param[in] wal_sz WAL blob in bytes
* \param[in] data_sz Data blob in bytes
* \param[in] flags bio_mc_flags
* \param[in] backend_type Backend allocator type
*
* \return Zero on success, negative value on error.
*/
int bio_mc_create(struct bio_xs_context *xs_ctxt, uuid_t pool_id, uint64_t scm_sz,
uint64_t meta_sz, uint64_t wal_sz, uint64_t data_sz, enum bio_mc_flags flags);
uint64_t meta_sz, uint64_t wal_sz, uint64_t data_sz, enum bio_mc_flags flags,
uint8_t backend_type);

/*
* Destroy Meta/Data/WAL blobs
Expand Down Expand Up @@ -1081,7 +1083,7 @@ int bio_wal_checkpoint(struct bio_meta_context *mc, uint64_t tx_id, uint64_t *pu
* Query meta capacity & meta block size & meta blob header blocks.
*/
void bio_meta_get_attr(struct bio_meta_context *mc, uint64_t *capacity, uint32_t *blk_sz,
uint32_t *hdr_blks);
uint32_t *hdr_blks, uint8_t *backend_type);

struct bio_wal_info {
uint32_t wi_tot_blks; /* Total blocks */
Expand Down
2 changes: 1 addition & 1 deletion src/vos/tests/wal_ut.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ ut_mc_init(struct bio_ut_args *args, uint64_t meta_sz, uint64_t wal_sz, uint64_t
int rc, ret;

uuid_generate(args->bua_pool_id);
rc = bio_mc_create(args->bua_xs_ctxt, args->bua_pool_id, 0, meta_sz, wal_sz, data_sz, 0);
rc = bio_mc_create(args->bua_xs_ctxt, args->bua_pool_id, 0, meta_sz, wal_sz, data_sz, 0, 0);
if (rc) {
D_ERROR("UT MC create failed. "DF_RC"\n", DP_RC(rc));
return rc;
Expand Down
49 changes: 31 additions & 18 deletions src/vos/vos_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,19 @@ vos2mc_flags(unsigned int vos_flags)
return mc_flags;
}

static inline void
init_umem_store(struct umem_store *store, struct bio_meta_context *mc)
{
bio_meta_get_attr(mc, &store->stor_size, &store->stor_blk_size, &store->stor_hdr_blks,
(uint8_t *)&store->store_type);
store->stor_priv = mc;
store->stor_ops = &vos_store_ops;

/* Legacy BMEM V1 pool without backend type stored */
if (bio_nvme_configured(SMD_DEV_TYPE_META) && store->store_type == DAOS_MD_PMEM)
store->store_type = DAOS_MD_BMEM;
}

static int
vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout,
size_t scm_sz, size_t nvme_sz, size_t wal_sz, size_t meta_sz,
Expand All @@ -754,15 +767,17 @@ vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout,

*ph = NULL;
/* always use PMEM mode for SMD */
store.store_type = umempobj_get_backend_type();
if (flags & VOS_POF_SYSDB) {
store.store_type = DAOS_MD_PMEM;
store.store_standalone = true;
goto umem_create;
}

/* No NVMe is configured or current xstream doesn't have NVMe context */
if (!bio_nvme_configured(SMD_DEV_TYPE_MAX) || xs_ctxt == NULL)
if (!bio_nvme_configured(SMD_DEV_TYPE_MAX) || xs_ctxt == NULL) {
store.store_type = DAOS_MD_PMEM;
goto umem_create;
}

/* Is meta_sz is set then use it, otherwise derive from VOS file size or scm_sz */
if (!meta_sz) {
Expand All @@ -779,11 +794,16 @@ vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout,
}
}

store.store_type = umempobj_get_backend_type();
if (store.store_type == DAOS_MD_BMEM && meta_sz > scm_sz)
store.store_type = DAOS_MD_BMEM_V2;

D_DEBUG(DB_MGMT, "Create BIO meta context for xs:%p pool:"DF_UUID" "
"scm_sz: %zu meta_sz: %zu, nvme_sz: %zu wal_sz:%zu\n",
xs_ctxt, DP_UUID(pool_id), scm_sz, meta_sz, nvme_sz, wal_sz);
"scm_sz: %zu meta_sz: %zu, nvme_sz: %zu wal_sz:%zu backend:%d\n",
xs_ctxt, DP_UUID(pool_id), scm_sz, meta_sz, nvme_sz, wal_sz, store.store_type);

rc = bio_mc_create(xs_ctxt, pool_id, scm_sz, meta_sz, wal_sz, nvme_sz, mc_flags);
rc = bio_mc_create(xs_ctxt, pool_id, scm_sz, meta_sz, wal_sz, nvme_sz, mc_flags,
store.store_type);
if (rc != 0) {
D_ERROR("Failed to create BIO meta context for xs:%p pool:"DF_UUID". "DF_RC"\n",
xs_ctxt, DP_UUID(pool_id), DP_RC(rc));
Expand All @@ -802,14 +822,7 @@ vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout,
return rc;
}

// TODO DAOS-13690: When DAV allocator supports different MD-blob and VOS-file sizes, pass
// meta_sz and scm_sz to umem_store. The poolsize umempobj_create()
// param will continue to be used as it is currently (non-zero for create,
// zero to use existing) to keep compatibility with PMEM mode.

bio_meta_get_attr(mc, &store.stor_size, &store.stor_blk_size, &store.stor_hdr_blks);
store.stor_priv = mc;
store.stor_ops = &vos_store_ops;
init_umem_store(&store, mc);

umem_create:
D_DEBUG(DB_MGMT, "umempobj_create sz: " DF_U64 " store_sz: " DF_U64, scm_sz,
Expand Down Expand Up @@ -849,15 +862,17 @@ vos_pmemobj_open(const char *path, uuid_t pool_id, const char *layout, unsigned

*ph = NULL;
/* always use PMEM mode for SMD */
store.store_type = umempobj_get_backend_type();
if (flags & VOS_POF_SYSDB) {
store.store_type = DAOS_MD_PMEM;
store.store_standalone = true;
goto umem_open;
}

/* No NVMe is configured or current xstream doesn't have NVMe context */
if (!bio_nvme_configured(SMD_DEV_TYPE_MAX) || xs_ctxt == NULL)
if (!bio_nvme_configured(SMD_DEV_TYPE_MAX) || xs_ctxt == NULL) {
store.store_type = DAOS_MD_PMEM;
goto umem_open;
}

D_DEBUG(DB_MGMT, "Open BIO meta context for xs:%p pool:"DF_UUID"\n",
xs_ctxt, DP_UUID(pool_id));
Expand All @@ -869,9 +884,7 @@ vos_pmemobj_open(const char *path, uuid_t pool_id, const char *layout, unsigned
return rc;
}

bio_meta_get_attr(mc, &store.stor_size, &store.stor_blk_size, &store.stor_hdr_blks);
store.stor_priv = mc;
store.stor_ops = &vos_store_ops;
init_umem_store(&store, mc);
if (metrics != NULL) {
struct vos_pool_metrics *vpm = (struct vos_pool_metrics *)metrics;

Expand Down

0 comments on commit 0ba3bcf

Please sign in to comment.