From 0ba3bcf85670d7be4af43b3ba6457d72f5b4b7a4 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Tue, 20 Aug 2024 09:21:04 +0800 Subject: [PATCH] DAOS-16278 vos: per pool backend type (#14946) Use the user specified backend type when possible, if user specify BMEM V1 backend and try to create a pool with "meta_size > scm_size", turn to use BMEM V2 instead. Store the per-pool backend type in meta blob header for pool open. Signed-off-by: Niu Yawei --- src/bio/bio_context.c | 5 ++-- src/bio/bio_wal.c | 4 +++- src/bio/bio_wal.h | 6 ++++- src/include/daos/mem.h | 2 ++ src/include/daos_srv/bio.h | 6 +++-- src/vos/tests/wal_ut.c | 2 +- src/vos/vos_pool.c | 49 ++++++++++++++++++++++++-------------- 7 files changed, 49 insertions(+), 25 deletions(-) diff --git a/src/bio/bio_context.c b/src/bio/bio_context.c index e827a9a6f56..f8a7dd87897 100644 --- a/src/bio/bio_context.c +++ b/src/bio/bio_context.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2023 Intel Corporation. + * (C) Copyright 2018-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -630,7 +630,7 @@ default_wal_sz(uint64_t meta_sz) } int bio_mc_create(struct bio_xs_context *xs_ctxt, uuid_t pool_id, uint64_t scm_sz, uint64_t meta_sz, - uint64_t wal_sz, uint64_t data_sz, enum bio_mc_flags flags) + uint64_t wal_sz, uint64_t data_sz, enum bio_mc_flags flags, uint8_t backend_type) { int rc = 0, rc1; spdk_blob_id data_blobid = SPDK_BLOBID_INVALID; @@ -734,6 +734,7 @@ int bio_mc_create(struct bio_xs_context *xs_ctxt, uuid_t pool_id, uint64_t scm_s fi->fi_wal_size = wal_sz; fi->fi_data_size = data_sz; fi->fi_vos_id = xs_ctxt->bxc_tgt_id; + fi->fi_backend_type = backend_type; rc = meta_format(mc, fi, true); if (rc) diff --git a/src/bio/bio_wal.c b/src/bio/bio_wal.c index 6c99a203966..ff3427bdf1a 100644 --- a/src/bio/bio_wal.c +++ b/src/bio/bio_wal.c @@ -1861,13 +1861,14 @@ bio_wal_checkpoint(struct bio_meta_context *mc, uint64_t tx_id, uint64_t *purged void bio_meta_get_attr(struct bio_meta_context *mc, uint64_t *capacity, uint32_t *blk_sz, - uint32_t *hdr_blks) + uint32_t *hdr_blks, uint8_t *backend_type) { /* The mc could be NULL when md on SSD not enabled & data blob not existing */ if (mc != NULL) { *blk_sz = mc->mc_meta_hdr.mh_blk_bytes; *capacity = mc->mc_meta_hdr.mh_tot_blks * (*blk_sz); *hdr_blks = mc->mc_meta_hdr.mh_hdr_blks; + *backend_type = mc->mc_meta_hdr.mh_backend_type; } } @@ -2069,6 +2070,7 @@ meta_format(struct bio_meta_context *mc, struct meta_fmt_info *fi, bool force) meta_hdr->mh_tot_blks = (fi->fi_meta_size / META_BLK_SZ) - META_HDR_BLKS; meta_hdr->mh_vos_id = fi->fi_vos_id; meta_hdr->mh_flags = META_HDR_FL_EMPTY; + meta_hdr->mh_backend_type = fi->fi_backend_type; rc = write_header(mc, mc->mc_meta, meta_hdr, sizeof(*meta_hdr), &meta_hdr->mh_csum); if (rc) { diff --git a/src/bio/bio_wal.h b/src/bio/bio_wal.h index 6eb187c61e6..8623adf1025 100644 --- a/src/bio/bio_wal.h +++ b/src/bio/bio_wal.h @@ -28,7 +28,10 @@ struct meta_header { uint64_t mh_tot_blks; /* Meta blob capacity, in blocks */ uint32_t mh_vos_id; /* Associated per-engine target ID */ uint32_t mh_flags; /* Meta header flags */ - uint32_t mh_padding[5]; /* Reserved */ + uint8_t mh_backend_type; /* Backend allocator type */ + uint8_t mh_padding1; /* Reserved */ + uint16_t mh_padding2; /* Reserved */ + uint32_t mh_padding[4]; /* Reserved */ uint32_t mh_csum; /* Checksum of this header */ }; @@ -124,6 +127,7 @@ struct meta_fmt_info { uint64_t fi_wal_size; /* WAL blob size in bytes */ uint64_t fi_data_size; /* Data blob size in bytes */ uint32_t fi_vos_id; /* Associated per-engine target ID */ + uint8_t fi_backend_type; /* Backend allocator type */ }; int meta_format(struct bio_meta_context *mc, struct meta_fmt_info *fi, bool force); diff --git a/src/include/daos/mem.h b/src/include/daos/mem.h index d49afa797e7..7a7ca3dcf7c 100644 --- a/src/include/daos/mem.h +++ b/src/include/daos/mem.h @@ -34,6 +34,8 @@ int umempobj_backend_type2class_id(int backend); #define UMEMPOBJ_ENABLE_STATS 0x1 #ifdef DAOS_PMEM_BUILD + +/* The backend type is stored in meta blob header, don't change the value */ enum { DAOS_MD_PMEM = 0, DAOS_MD_BMEM = 1, diff --git a/src/include/daos_srv/bio.h b/src/include/daos_srv/bio.h index 0aa0a63eb49..f27c5f7ebba 100644 --- a/src/include/daos_srv/bio.h +++ b/src/include/daos_srv/bio.h @@ -948,11 +948,13 @@ enum bio_mc_flags { * \param[in] wal_sz WAL blob in bytes * \param[in] data_sz Data blob in bytes * \param[in] flags bio_mc_flags + * \param[in] backend_type Backend allocator type * * \return Zero on success, negative value on error. */ int bio_mc_create(struct bio_xs_context *xs_ctxt, uuid_t pool_id, uint64_t scm_sz, - uint64_t meta_sz, uint64_t wal_sz, uint64_t data_sz, enum bio_mc_flags flags); + uint64_t meta_sz, uint64_t wal_sz, uint64_t data_sz, enum bio_mc_flags flags, + uint8_t backend_type); /* * Destroy Meta/Data/WAL blobs @@ -1081,7 +1083,7 @@ int bio_wal_checkpoint(struct bio_meta_context *mc, uint64_t tx_id, uint64_t *pu * Query meta capacity & meta block size & meta blob header blocks. */ void bio_meta_get_attr(struct bio_meta_context *mc, uint64_t *capacity, uint32_t *blk_sz, - uint32_t *hdr_blks); + uint32_t *hdr_blks, uint8_t *backend_type); struct bio_wal_info { uint32_t wi_tot_blks; /* Total blocks */ diff --git a/src/vos/tests/wal_ut.c b/src/vos/tests/wal_ut.c index fda61ab3608..32b4b4c9957 100644 --- a/src/vos/tests/wal_ut.c +++ b/src/vos/tests/wal_ut.c @@ -29,7 +29,7 @@ ut_mc_init(struct bio_ut_args *args, uint64_t meta_sz, uint64_t wal_sz, uint64_t int rc, ret; uuid_generate(args->bua_pool_id); - rc = bio_mc_create(args->bua_xs_ctxt, args->bua_pool_id, 0, meta_sz, wal_sz, data_sz, 0); + rc = bio_mc_create(args->bua_xs_ctxt, args->bua_pool_id, 0, meta_sz, wal_sz, data_sz, 0, 0); if (rc) { D_ERROR("UT MC create failed. "DF_RC"\n", DP_RC(rc)); return rc; diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index 3f0bdb03707..60a17b1aa11 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -740,6 +740,19 @@ vos2mc_flags(unsigned int vos_flags) return mc_flags; } +static inline void +init_umem_store(struct umem_store *store, struct bio_meta_context *mc) +{ + bio_meta_get_attr(mc, &store->stor_size, &store->stor_blk_size, &store->stor_hdr_blks, + (uint8_t *)&store->store_type); + store->stor_priv = mc; + store->stor_ops = &vos_store_ops; + + /* Legacy BMEM V1 pool without backend type stored */ + if (bio_nvme_configured(SMD_DEV_TYPE_META) && store->store_type == DAOS_MD_PMEM) + store->store_type = DAOS_MD_BMEM; +} + static int vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout, size_t scm_sz, size_t nvme_sz, size_t wal_sz, size_t meta_sz, @@ -754,15 +767,17 @@ vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout, *ph = NULL; /* always use PMEM mode for SMD */ - store.store_type = umempobj_get_backend_type(); if (flags & VOS_POF_SYSDB) { store.store_type = DAOS_MD_PMEM; store.store_standalone = true; + goto umem_create; } /* No NVMe is configured or current xstream doesn't have NVMe context */ - if (!bio_nvme_configured(SMD_DEV_TYPE_MAX) || xs_ctxt == NULL) + if (!bio_nvme_configured(SMD_DEV_TYPE_MAX) || xs_ctxt == NULL) { + store.store_type = DAOS_MD_PMEM; goto umem_create; + } /* Is meta_sz is set then use it, otherwise derive from VOS file size or scm_sz */ if (!meta_sz) { @@ -779,11 +794,16 @@ vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout, } } + store.store_type = umempobj_get_backend_type(); + if (store.store_type == DAOS_MD_BMEM && meta_sz > scm_sz) + store.store_type = DAOS_MD_BMEM_V2; + D_DEBUG(DB_MGMT, "Create BIO meta context for xs:%p pool:"DF_UUID" " - "scm_sz: %zu meta_sz: %zu, nvme_sz: %zu wal_sz:%zu\n", - xs_ctxt, DP_UUID(pool_id), scm_sz, meta_sz, nvme_sz, wal_sz); + "scm_sz: %zu meta_sz: %zu, nvme_sz: %zu wal_sz:%zu backend:%d\n", + xs_ctxt, DP_UUID(pool_id), scm_sz, meta_sz, nvme_sz, wal_sz, store.store_type); - rc = bio_mc_create(xs_ctxt, pool_id, scm_sz, meta_sz, wal_sz, nvme_sz, mc_flags); + rc = bio_mc_create(xs_ctxt, pool_id, scm_sz, meta_sz, wal_sz, nvme_sz, mc_flags, + store.store_type); if (rc != 0) { D_ERROR("Failed to create BIO meta context for xs:%p pool:"DF_UUID". "DF_RC"\n", xs_ctxt, DP_UUID(pool_id), DP_RC(rc)); @@ -802,14 +822,7 @@ vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout, return rc; } - // TODO DAOS-13690: When DAV allocator supports different MD-blob and VOS-file sizes, pass - // meta_sz and scm_sz to umem_store. The poolsize umempobj_create() - // param will continue to be used as it is currently (non-zero for create, - // zero to use existing) to keep compatibility with PMEM mode. - - bio_meta_get_attr(mc, &store.stor_size, &store.stor_blk_size, &store.stor_hdr_blks); - store.stor_priv = mc; - store.stor_ops = &vos_store_ops; + init_umem_store(&store, mc); umem_create: D_DEBUG(DB_MGMT, "umempobj_create sz: " DF_U64 " store_sz: " DF_U64, scm_sz, @@ -849,15 +862,17 @@ vos_pmemobj_open(const char *path, uuid_t pool_id, const char *layout, unsigned *ph = NULL; /* always use PMEM mode for SMD */ - store.store_type = umempobj_get_backend_type(); if (flags & VOS_POF_SYSDB) { store.store_type = DAOS_MD_PMEM; store.store_standalone = true; + goto umem_open; } /* No NVMe is configured or current xstream doesn't have NVMe context */ - if (!bio_nvme_configured(SMD_DEV_TYPE_MAX) || xs_ctxt == NULL) + if (!bio_nvme_configured(SMD_DEV_TYPE_MAX) || xs_ctxt == NULL) { + store.store_type = DAOS_MD_PMEM; goto umem_open; + } D_DEBUG(DB_MGMT, "Open BIO meta context for xs:%p pool:"DF_UUID"\n", xs_ctxt, DP_UUID(pool_id)); @@ -869,9 +884,7 @@ vos_pmemobj_open(const char *path, uuid_t pool_id, const char *layout, unsigned return rc; } - bio_meta_get_attr(mc, &store.stor_size, &store.stor_blk_size, &store.stor_hdr_blks); - store.stor_priv = mc; - store.stor_ops = &vos_store_ops; + init_umem_store(&store, mc); if (metrics != NULL) { struct vos_pool_metrics *vpm = (struct vos_pool_metrics *)metrics;