From 988bd58df906c1e7c46b0b567c28c10a5339d46a Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Mon, 9 Dec 2024 22:33:35 -0500 Subject: [PATCH] DAOS-16866 bio: pre-allocate more DMA chunks on engine start Each VOS xstream used to pre-allocate 24 DMA chunks (192MB) on engine start, then the per-xstream DMA buffer will be expanded on-demand until hitting the upper bound (128 chunks, 1GB by default). This PR bumped the pre-allocate size to 60% of upper bound, and made the pre-allocate percentage configurable via env var DAOS_DMA_INIT_PCT. Required-githooks: true Signed-off-by: Niu Yawei --- src/bio/bio_buffer.c | 2 ++ src/bio/bio_xstream.c | 31 ++++++++++++++++++++++--------- utils/test_memcheck.supp | 11 +++++++++++ 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/bio/bio_buffer.c b/src/bio/bio_buffer.c index 1f6baae521b8..3f0137ad23ee 100644 --- a/src/bio/bio_buffer.c +++ b/src/bio/bio_buffer.c @@ -49,6 +49,7 @@ dma_alloc_chunk(unsigned int cnt) } if (chunk->bdc_ptr == NULL) { + D_ERROR("Failed to allocate %zu DMA buffer\n", bytes); D_FREE(chunk); return NULL; } @@ -88,6 +89,7 @@ dma_buffer_grow(struct bio_dma_buffer *buf, unsigned int cnt) for (i = 0; i < cnt; i++) { chunk = dma_alloc_chunk(bio_chk_sz); if (chunk == NULL) { + D_ERROR("Failed to grow DMA buffer (%u chunks)\n", buf->bdb_tot_cnt); rc = -DER_NOMEM; break; } diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index 1396689e892d..d26a5f4bee19 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -31,8 +31,8 @@ /* SPDK blob parameters */ #define DAOS_BS_CLUSTER_SZ (1ULL << 25) /* 32MB */ /* DMA buffer parameters */ -#define DAOS_DMA_CHUNK_CNT_INIT 24 /* Per-xstream init chunks, 192MB */ -#define DAOS_DMA_CHUNK_CNT_MAX 128 /* Per-xstream max chunks, 1GB */ +#define DAOS_DMA_CHUNK_INIT_PCT 60 /* Default pre-xstream init chunks, in percentage */ +#define DAOS_DMA_CHUNK_CNT_MAX 128 /* Default per-xstream max chunks, 1GB */ #define DAOS_DMA_CHUNK_CNT_MIN 32 /* Per-xstream min chunks, 256MB */ /* Max in-flight blob IOs per io channel */ @@ -48,8 +48,8 @@ unsigned int bio_chk_sz; unsigned int bio_chk_cnt_max; /* NUMA node affinity */ unsigned int bio_numa_node; -/* Per-xstream initial DMA buffer size (in chunk count) */ -static unsigned int bio_chk_cnt_init; +/* Per-xstream initial DMA buffer size (in percentage) */ +static unsigned int bio_chk_init_pct; /* Diret RDMA over SCM */ bool bio_scm_rdma; /* Whether SPDK inited */ @@ -203,6 +203,14 @@ bypass_health_collect() return nvme_glb.bd_bypass_health_collect; } +static inline unsigned int +init_chk_cnt() +{ + unsigned init_cnt = (bio_chk_cnt_max * bio_chk_init_pct / 100); + + return (init_cnt == 0) ? 1 : init_cnt; +} + int bio_nvme_init(const char *nvme_conf, int numa_node, unsigned int mem_size, unsigned int hugepage_size, unsigned int tgt_nr, bool bypass_health_collect) @@ -249,7 +257,7 @@ bio_nvme_init(const char *nvme_conf, int numa_node, unsigned int mem_size, */ glb_criteria.fc_max_csum_errs = UINT32_MAX; - bio_chk_cnt_init = DAOS_DMA_CHUNK_CNT_INIT; + bio_chk_init_pct = DAOS_DMA_CHUNK_INIT_PCT; bio_chk_cnt_max = DAOS_DMA_CHUNK_CNT_MAX; bio_chk_sz = ((uint64_t)size_mb << 20) >> BIO_DMA_PAGE_SHIFT; @@ -291,8 +299,13 @@ bio_nvme_init(const char *nvme_conf, int numa_node, unsigned int mem_size, mem_size, tgt_nr); return -DER_INVAL; } - D_INFO("Set per-xstream DMA buffer upper bound to %u %uMB chunks\n", - bio_chk_cnt_max, size_mb); + + d_getenv_uint("DAOS_DMA_INIT_PCT", &bio_chk_init_pct); + if (bio_chk_init_pct == 0 || bio_chk_init_pct >= 100) + bio_chk_init_pct = DAOS_DMA_CHUNK_INIT_PCT; + + D_INFO("Set per-xstream DMA buffer upper bound to %u %uMB chunks, prealloc %u chunks\n", + bio_chk_cnt_max, size_mb, init_chk_cnt()); spdk_bs_opts_init(&nvme_glb.bd_bs_opts, sizeof(nvme_glb.bd_bs_opts)); nvme_glb.bd_bs_opts.cluster_sz = DAOS_BS_CLUSTER_SZ; @@ -1560,7 +1573,7 @@ bio_xsctxt_alloc(struct bio_xs_context **pctxt, int tgt_id, bool self_polling) /* Skip NVMe context setup if the daos_nvme.conf isn't present */ if (!bio_nvme_configured(SMD_DEV_TYPE_MAX)) { - ctxt->bxc_dma_buf = dma_buffer_create(bio_chk_cnt_init, tgt_id); + ctxt->bxc_dma_buf = dma_buffer_create(init_chk_cnt(), tgt_id); if (ctxt->bxc_dma_buf == NULL) { D_FREE(ctxt); *pctxt = NULL; @@ -1673,7 +1686,7 @@ bio_xsctxt_alloc(struct bio_xs_context **pctxt, int tgt_id, bool self_polling) D_ASSERT(d_bdev != NULL); } - ctxt->bxc_dma_buf = dma_buffer_create(bio_chk_cnt_init, tgt_id); + ctxt->bxc_dma_buf = dma_buffer_create(init_chk_cnt(), tgt_id); if (ctxt->bxc_dma_buf == NULL) { D_ERROR("failed to initialize dma buffer\n"); rc = -DER_NOMEM; diff --git a/utils/test_memcheck.supp b/utils/test_memcheck.supp index 3134d89d0eed..ce2399058585 100644 --- a/utils/test_memcheck.supp +++ b/utils/test_memcheck.supp @@ -418,3 +418,14 @@ ... fun:runtime.persistentalloc } +{ + DAOS-16866 + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:mem_map_get_map_1gb + fun:spdk_mem_map_set_translation + fun:vtophys_notify + fun:spdk_mem_register + ... +}