Skip to content

Commit

Permalink
DAOS-16763 common: Tunable to control max NEMB (#15422)
Browse files Browse the repository at this point in the history
A new tunable, DAOS_MD_ON_SSD_NEMB_PCT is introuced, to define the
percentage of memory cache that non-evictable memory buckets can
expand to. This tunable will be read during pool creation and
persisted, ensuring that each time the pool is reopened,
it retains the value set during its creation.

Signed-off-by: Sherin T George <[email protected]>
  • Loading branch information
sherintg authored and NiuYawei committed Nov 1, 2024
1 parent 5187fa9 commit 54c018a
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 36 deletions.
54 changes: 28 additions & 26 deletions src/common/dav_v2/dav_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,15 @@ dav_obj_open_internal(int fd, int flags, size_t scm_sz, const char *path, struct
struct heap_zone_limits hzl;
struct zone *z0;

hzl = heap_get_zone_limits(store->stor_size, scm_sz);
hzl = heap_get_zone_limits(store->stor_size, scm_sz, 100);

if (hzl.nzones_heap == 0) {
ERR("Insufficient heap size.");
errno = EINVAL;
return NULL;
}

if (hzl.nzones_ne_max < 2) {
if ((hzl.nzones_cache < 2) && (hzl.nzones_heap > hzl.nzones_cache)) {
ERR("Insufficient scm size.");
errno = EINVAL;
return NULL;
Expand Down Expand Up @@ -117,24 +117,15 @@ dav_obj_open_internal(int fd, int flags, size_t scm_sz, const char *path, struct
goto out1;
}

rc = umem_cache_alloc(store, ZONE_MAX_SIZE, hzl.nzones_heap, hzl.nzones_cache,
hzl.nzones_ne_max, 4096, mmap_base, is_zone_evictable,
dav_uc_callback, hdl);
if (rc != 0) {
D_ERROR("Could not allocate page cache: rc=" DF_RC "\n", DP_RC(rc));
err = daos_der2errno(rc);
goto out1;
}

D_STRNDUP(hdl->do_path, path, strlen(path));

if (flags & DAV_HEAP_INIT) {
rc = heap_init(mmap_base, scm_sz, store);
if (rc) {
err = errno;
goto out2;
goto out1;
}
}

D_STRNDUP(hdl->do_path, path, strlen(path));
D_ALLOC_PTR(hdl->do_heap);
if (hdl->do_heap == NULL) {
err = ENOMEM;
Expand All @@ -155,37 +146,46 @@ dav_obj_open_internal(int fd, int flags, size_t scm_sz, const char *path, struct
heap_set_root_ptrs(hdl->do_heap, &hdl->do_root_offsetp, &hdl->do_root_sizep);
heap_set_stats_ptr(hdl->do_heap, &hdl->do_stats->persistent);

rc = umem_cache_alloc(store, ZONE_MAX_SIZE, hzl.nzones_heap, hzl.nzones_cache,
heap_get_max_nemb(hdl->do_heap), 4096, mmap_base, is_zone_evictable,
dav_uc_callback, hdl);
if (rc != 0) {
D_ERROR("Could not allocate page cache: rc=" DF_RC "\n", DP_RC(rc));
err = daos_der2errno(rc);
goto out3;
}

if (!(flags & DAV_HEAP_INIT)) {
rc = heap_zone_load(hdl->do_heap, 0);
if (rc) {
err = rc;
goto out3;
goto out4;
}
D_ASSERT(store != NULL);
rc = hdl->do_store->stor_ops->so_wal_replay(hdl->do_store, dav_wal_replay_cb, hdl);
if (rc) {
err = daos_der2errno(rc);
goto out3;
goto out4;
}
}

rc = dav_create_clogs(hdl);
if (rc) {
err = rc;
goto out3;
goto out4;
}

rc = lw_tx_begin(hdl);
if (rc) {
D_ERROR("lw_tx_begin failed with err %d\n", rc);
err = ENOMEM;
goto out3;
goto out5;
}
rc = heap_ensure_zone0_initialized(hdl->do_heap);
if (rc) {
lw_tx_end(hdl, NULL);
D_ERROR("Failed to initialize zone0, rc = %d", daos_errno2der(rc));
goto out3;
goto out5;
}
lw_tx_end(hdl, NULL);

Expand All @@ -198,35 +198,35 @@ dav_obj_open_internal(int fd, int flags, size_t scm_sz, const char *path, struct
if (rc) {
D_ERROR("Failed to update mbrt with zinfo errno = %d", rc);
err = rc;
goto out3;
goto out5;
}

rc = heap_load_nonevictable_zones(hdl->do_heap);
if (rc) {
D_ERROR("Failed to load required zones during boot, errno= %d", rc);
err = rc;
goto out3;
goto out5;
}
} else {
D_ASSERT(z0->header.zone0_zinfo_size == 0);
rc = lw_tx_begin(hdl);
if (rc) {
D_ERROR("lw_tx_begin failed with err %d\n", rc);
err = ENOMEM;
goto out3;
goto out5;
}
rc = obj_realloc(hdl, &z0->header.zone0_zinfo_off, &z0->header.zone0_zinfo_size,
heap_zinfo_get_size(hzl.nzones_heap));
if (rc != 0) {
lw_tx_end(hdl, NULL);
D_ERROR("Failed to setup zinfo");
goto out3;
goto out5;
}
rc = heap_update_mbrt_zinfo(hdl->do_heap, true);
if (rc) {
D_ERROR("Failed to update mbrt with zinfo errno = %d", rc);
err = rc;
goto out3;
goto out5;
}
lw_tx_end(hdl, NULL);
}
Expand All @@ -240,7 +240,10 @@ dav_obj_open_internal(int fd, int flags, size_t scm_sz, const char *path, struct
hdl->do_booted = 1;

return hdl;

out5:
dav_destroy_clogs(hdl);
out4:
umem_cache_free(hdl->do_store);
out3:
heap_cleanup(hdl->do_heap);
out2:
Expand All @@ -253,7 +256,6 @@ dav_obj_open_internal(int fd, int flags, size_t scm_sz, const char *path, struct
D_FREE(hdl->do_utx);
}
D_FREE(hdl->do_path);
umem_cache_free(hdl->do_store);
out1:
D_FREE(hdl);
out0:
Expand Down
50 changes: 45 additions & 5 deletions src/common/dav_v2/heap.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include "alloc_class.h"
#include "meta_io.h"

#define HEAP_NEMB_PCT_DEFAULT 80

static void
heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, uint32_t zone_id);

Expand Down Expand Up @@ -96,6 +98,7 @@ struct heap_rt {
unsigned zinfo_vec_size;
unsigned mb_create_waiters;
unsigned mb_pressure;
unsigned nemb_pct;
void *mb_create_wq;
struct zinfo_vec *zinfo_vec;
struct mbrt *default_mb;
Expand Down Expand Up @@ -1397,7 +1400,8 @@ heap_create_alloc_class_buckets(struct palloc_heap *heap, struct alloc_class *c)
* heap_write_header -- (internal) creates a clean header
*/
static int
heap_write_header(struct umem_store *store, size_t heap_size, size_t umem_cache_size)
heap_write_header(struct umem_store *store, size_t heap_size, size_t umem_cache_size,
uint32_t nemb_pct)
{
struct heap_header *newhdr;
int rc;
Expand All @@ -1414,6 +1418,7 @@ heap_write_header(struct umem_store *store, size_t heap_size, size_t umem_cache_
newhdr->heap_hdr_size = sizeof(struct heap_header);
newhdr->chunksize = CHUNKSIZE;
newhdr->chunks_per_zone = MAX_CHUNK;
newhdr->nemb_pct = (uint8_t)nemb_pct;
newhdr->checksum = 0;

util_checksum(newhdr, sizeof(*newhdr), &newhdr->checksum, 1, 0);
Expand Down Expand Up @@ -1483,6 +1488,11 @@ heap_verify_header(struct heap_header *hdr, size_t heap_size, size_t cache_size)
return -1;
}

if (hdr->nemb_pct > 100) {
D_ERROR("nemb pct value (%d) in heap header is incorrect\n", hdr->nemb_pct);
return -1;
}

if ((hdr->heap_hdr_size != sizeof(struct heap_header)) || (hdr->chunksize != CHUNKSIZE) ||
(hdr->chunks_per_zone != MAX_CHUNK)) {
D_ERROR("incompatible heap layout: hdr_sz=%lu, chunk_sz=%lu, max_chunks=%lu\n",
Expand Down Expand Up @@ -1558,6 +1568,7 @@ heap_boot(struct palloc_heap *heap, void *mmap_base, uint64_t heap_size, uint64_
struct heap_header *newhdr;
int err;
struct heap_zone_limits hzl;
uint32_t nemb_pct = HEAP_NEMB_PCT_DEFAULT;

D_ALLOC_PTR(newhdr);
if (!newhdr)
Expand All @@ -1575,6 +1586,8 @@ heap_boot(struct palloc_heap *heap, void *mmap_base, uint64_t heap_size, uint64_
D_FREE(newhdr);
return EINVAL;
}
if (newhdr->nemb_pct)
nemb_pct = newhdr->nemb_pct;
D_FREE(newhdr);

D_ALLOC_PTR_NZ(h);
Expand All @@ -1589,7 +1602,7 @@ heap_boot(struct palloc_heap *heap, void *mmap_base, uint64_t heap_size, uint64_
goto error_alloc_classes_new;
}

hzl = heap_get_zone_limits(heap_size, cache_size);
hzl = heap_get_zone_limits(heap_size, cache_size, nemb_pct);

h->nzones = hzl.nzones_heap;
h->nzones_ne = hzl.nzones_ne_max;
Expand Down Expand Up @@ -1630,6 +1643,28 @@ heap_boot(struct palloc_heap *heap, void *mmap_base, uint64_t heap_size, uint64_
return err;
}

static unsigned int
heap_get_nemb_pct()
{
unsigned int nemb_pct;

nemb_pct = HEAP_NEMB_PCT_DEFAULT;
d_getenv_uint("DAOS_MD_ON_SSD_NEMB_PCT", &nemb_pct);
if ((nemb_pct > 100) || (nemb_pct == 0)) {
D_ERROR("Invalid value %d for tunable DAOS_MD_ON_SSD_NEMB_PCT", nemb_pct);
nemb_pct = HEAP_NEMB_PCT_DEFAULT;
}
D_INFO("DAOS_MD_ON_SSD_NEMB_PCT set to %d", nemb_pct);

return nemb_pct;
}

int
heap_get_max_nemb(struct palloc_heap *heap)
{
return heap->rt->nzones_ne;
}

/*
* heap_init -- initializes the heap
*
Expand All @@ -1639,6 +1674,7 @@ int
heap_init(void *heap_start, uint64_t umem_cache_size, struct umem_store *store)
{
int nzones;
uint32_t nemb_pct = heap_get_nemb_pct();
uint64_t heap_size = store->stor_size;

if (heap_size < HEAP_MIN_SIZE)
Expand All @@ -1649,7 +1685,7 @@ heap_init(void *heap_start, uint64_t umem_cache_size, struct umem_store *store)
nzones = heap_max_zone(heap_size);
meta_clear_pages(store, sizeof(struct heap_header), 4096, ZONE_MAX_SIZE, nzones);

if (heap_write_header(store, heap_size, umem_cache_size))
if (heap_write_header(store, heap_size, umem_cache_size, nemb_pct))
return ENOMEM;

return 0;
Expand Down Expand Up @@ -1885,6 +1921,8 @@ heap_update_mbrt_zinfo(struct palloc_heap *heap, bool init)
heap->rt->zones_exhausted_ne = nemb_cnt;
heap->rt->zones_exhausted_e = emb_cnt;

D_ASSERT(heap->rt->nzones_e >= heap->rt->zones_exhausted_e);
D_ASSERT(heap->rt->nzones_ne >= heap->rt->zones_exhausted_ne);
return 0;
}

Expand Down Expand Up @@ -2058,10 +2096,12 @@ heap_foreach_object(struct palloc_heap *heap, object_callback cb, void *arg,
}

struct heap_zone_limits
heap_get_zone_limits(uint64_t heap_size, uint64_t cache_size)
heap_get_zone_limits(uint64_t heap_size, uint64_t cache_size, uint32_t nemb_pct)
{
struct heap_zone_limits zd = {0};

D_ASSERT(nemb_pct <= 100);

if (heap_size < sizeof(struct heap_header))
zd.nzones_heap = 0;
else
Expand All @@ -2075,7 +2115,7 @@ heap_get_zone_limits(uint64_t heap_size, uint64_t cache_size)
if (zd.nzones_heap < (zd.nzones_cache + UMEM_CACHE_MIN_EVICTABLE_PAGES))
zd.nzones_ne_max = zd.nzones_cache - UMEM_CACHE_MIN_EVICTABLE_PAGES;
else
zd.nzones_ne_max = zd.nzones_cache * 8 / 10;
zd.nzones_ne_max = ((unsigned long)zd.nzones_cache * nemb_pct) / 100;
if (zd.nzones_cache < (zd.nzones_ne_max + UMEM_CACHE_MIN_EVICTABLE_PAGES))
zd.nzones_ne_max = zd.nzones_cache - UMEM_CACHE_MIN_EVICTABLE_PAGES;
} else
Expand Down
4 changes: 3 additions & 1 deletion src/common/dav_v2/heap.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ heap_cleanup(struct palloc_heap *heap);
int
heap_check(void *heap_start, uint64_t heap_size);
int
heap_get_max_nemb(struct palloc_heap *heap);
int
heap_create_alloc_class_buckets(struct palloc_heap *heap, struct alloc_class *c);
int
heap_mbrt_update_alloc_class_buckets(struct palloc_heap *heap, struct mbrt *mb,
Expand Down Expand Up @@ -148,5 +150,5 @@ uint32_t
heap_off2mbid(struct palloc_heap *heap, uint64_t offset);

struct heap_zone_limits
heap_get_zone_limits(uint64_t heap_size, uint64_t cache_size);
heap_get_zone_limits(uint64_t heap_size, uint64_t cache_size, uint32_t nemb_pct);
#endif /* __DAOS_COMMON_HEAP_H */
3 changes: 2 additions & 1 deletion src/common/dav_v2/heap_layout.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ struct heap_header {
uint64_t heap_hdr_size;
uint64_t chunksize;
uint64_t chunks_per_zone;
uint8_t reserved[4016];
uint8_t nemb_pct;
uint8_t reserved[4015];
uint64_t checksum;
};

Expand Down
Loading

0 comments on commit 54c018a

Please sign in to comment.