diff --git a/.github/workflows/bullseye-coverage.yml b/.github/workflows/bullseye-coverage.yml index 223087fee07..dbe4a5f2263 100644 --- a/.github/workflows/bullseye-coverage.yml +++ b/.github/workflows/bullseye-coverage.yml @@ -10,7 +10,7 @@ env: # the organizational defaults values for these variables # TODO: we really need to define a list of supported versions (ideally it's no more than 2) # build is done on the lowest version and test on the highest with a "sanity test" - # stage done on all versions in the list ecept the highest + # stage done on all versions in the list except the highest EL8_BUILD_VERSION: ${{ vars.EL8_BUILD_VERSION_MASTER }} EL8_VERSION: ${{ vars.EL8_VERSION_MASTER }} EL9_BUILD_VERSION: ${{ vars.EL9_BUILD_VERSION_MASTER }} @@ -365,7 +365,8 @@ jobs: - name: Publish test results if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: EnricoMi/publish-unit-test-result-action@v2 + # yamllint disable-line rule:line-length + uses: EnricoMi/publish-unit-test-result-action@4e7013f9576bd22ffdae979dc6e68cb9ec2aeece # v2.7.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -632,7 +633,8 @@ jobs: - name: Publish test results if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: EnricoMi/publish-unit-test-result-action@v2 + # yamllint disable-line rule:line-length + uses: EnricoMi/publish-unit-test-result-action@4e7013f9576bd22ffdae979dc6e68cb9ec2aeece # v2.7.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci2.yml b/.github/workflows/ci2.yml index 50063bbad2b..a487173347b 100644 --- a/.github/workflows/ci2.yml +++ b/.github/workflows/ci2.yml @@ -67,7 +67,8 @@ jobs: run: docker cp build-post:/home/daos/daos/nlt-junit.xml ./ - name: Publish NLT test results if: always() - uses: EnricoMi/publish-unit-test-result-action@v1.17 + # yamllint disable-line rule:line-length + uses: EnricoMi/publish-unit-test-result-action@4e7013f9576bd22ffdae979dc6e68cb9ec2aeece # v2.7.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} files: nlt-junit.xml diff --git a/.github/workflows/landing-builds.yml b/.github/workflows/landing-builds.yml index aeedd80b9d6..09ba348aec3 100644 --- a/.github/workflows/landing-builds.yml +++ b/.github/workflows/landing-builds.yml @@ -138,7 +138,8 @@ jobs: run: docker cp build-post:/home/daos/daos/nlt-junit.xml ./ - name: Publish NLT test results if: always() - uses: EnricoMi/publish-unit-test-result-action@v1.17 + # yamllint disable-line rule:line-length + uses: EnricoMi/publish-unit-test-result-action@4e7013f9576bd22ffdae979dc6e68cb9ec2aeece # v2.7.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} files: nlt-junit.xml diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index af3059c22fe..e71094ff640 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -225,6 +225,7 @@ jobs: - codespell # - clang-format # not required - yaml-lint + - copyright if: (!cancelled()) steps: - name: Check if any job failed diff --git a/.github/workflows/rpm-build-and-test.yml b/.github/workflows/rpm-build-and-test.yml index 6f33efc1ad0..42d8ae60489 100644 --- a/.github/workflows/rpm-build-and-test.yml +++ b/.github/workflows/rpm-build-and-test.yml @@ -10,7 +10,7 @@ env: # the organizational defaults values for these variables # TODO: we really need to define a list of supported versions (ideally it's no more than 2) # build is done on the lowest version and test on the highest with a "sanity test" - # stage done on all versions in the list ecept the highest + # stage done on all versions in the list except the highest EL8_BUILD_VERSION: ${{ vars.EL8_BUILD_VERSION_MASTER }} EL8_VERSION: ${{ vars.EL8_VERSION_MASTER }} EL9_BUILD_VERSION: ${{ vars.EL9_BUILD_VERSION_MASTER }} @@ -373,7 +373,8 @@ jobs: - name: Publish test results if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: EnricoMi/publish-unit-test-result-action@v2 + # yamllint disable-line rule:line-length + uses: EnricoMi/publish-unit-test-result-action@4e7013f9576bd22ffdae979dc6e68cb9ec2aeece # v2.7.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -640,7 +641,8 @@ jobs: - name: Publish test results if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: EnricoMi/publish-unit-test-result-action@v2 + # yamllint disable-line rule:line-length + uses: EnricoMi/publish-unit-test-result-action@4e7013f9576bd22ffdae979dc6e68cb9ec2aeece # v2.7.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/docs/admin/troubleshooting.md b/docs/admin/troubleshooting.md index 5de7b95412d..92e48e6e83e 100644 --- a/docs/admin/troubleshooting.md +++ b/docs/admin/troubleshooting.md @@ -556,7 +556,7 @@ Alternately, the administrator may erase and re-format the DAOS system to start ### Engines become unavailable -Engines may become unavailable due to server power losses and reboots, network switch failures, etc. After staying unavailable for a certain period of time, these engines may become "excluded" or "errored" in `dmg system query` output. Once the states of all engines stabilize (see [`CRT_EVENT_DELAY`](env_variables.md)), each pool will check whether there is enough redundancy (see [Pool RF](pool_operations.md#pool-redundancy-factor)) to tolerate the unavailability of the "excluded" or "errored" engines. If there is enough redundancy, these engines will be excluded from the pool ("disabled ranks" in `dmg pool query --health-only` output); otherwise, the pool will perform no exclusion ("suspect ranks" in `dmg pool query --health-only` output as described in [Querying a Pool](pool_operations.md#querying-a-pool)) and may become temporarily unavailable (as seen by timeouts of `dmg pool query`, `dmg pool list`, etc.). Similarly, when engines become available, whenever the states of all engines stabilize, each pool will perform the aforementioned check for any unavailable engines that remain. +Engines may become unavailable due to server power losses and reboots, network switch failures, etc. After staying unavailable for a certain period of time, these engines may become "excluded" or "errored" in `dmg system query` output. Once the states of all engines stabilize (see [`CRT_EVENT_DELAY`](env_variables.md)), each pool will check whether there is enough redundancy (see [Pool RF](pool_operations.md#pool-redundancy-factor)) to tolerate the unavailability of the "excluded" or "errored" engines. If there is enough redundancy, these engines will be excluded from the pool ("Disabled ranks" in `dmg pool query --health-only` output); otherwise, the pool will perform no exclusion ("Dead ranks" in `dmg pool query --health-only` output as described in [Querying a Pool](pool_operations.md#querying-a-pool)) and may become temporarily unavailable (as seen by timeouts of `dmg pool query`, `dmg pool list`, etc.). Similarly, when engines become available, whenever the states of all engines stabilize, each pool will perform the aforementioned check for any unavailable engines that remain. To restore availability as well as capacity and performance, try to start all "excluded" or "errored" engines. Starting all of them at the same time minimizes the chance of triggering rebuild jobs. In many cases, the following command suffices: ``` diff --git a/src/client/dfs/SConscript b/src/client/dfs/SConscript index 5b98d6d7e8a..906b1bfd7d4 100644 --- a/src/client/dfs/SConscript +++ b/src/client/dfs/SConscript @@ -44,7 +44,7 @@ def scons(): libraries = ['daos_common', 'daos', 'uuid', 'gurt'] dfs_src = ['common.c', 'cont.c', 'dir.c', 'file.c', 'io.c', 'lookup.c', 'mnt.c', 'obj.c', - 'pipeline.c', 'readdir.c', 'rename.c', 'xattr.c', 'dfs_sys.c'] + 'pipeline.c', 'readdir.c', 'rename.c', 'xattr.c', 'dfs_sys.c', 'metrics.c'] dfs = denv.d_library('dfs', dfs_src, LIBS=libraries) denv.Install('$PREFIX/lib64/', dfs) diff --git a/src/client/dfs/common.c b/src/client/dfs/common.c index 92e7470d54a..acbc7eb11f7 100644 --- a/src/client/dfs/common.c +++ b/src/client/dfs/common.c @@ -625,6 +625,8 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz stbuf->st_atim.tv_sec = stbuf->st_mtim.tv_sec; stbuf->st_atim.tv_nsec = stbuf->st_mtim.tv_nsec; } + + DFS_OP_STAT_INCR(dfs, DOS_STAT); return 0; } @@ -710,6 +712,7 @@ open_dir(dfs_t *dfs, dfs_obj_t *parent, int flags, daos_oclass_id_t cid, struct D_ASSERT(rc == 0); dir->d.chunk_size = entry->chunk_size; dir->d.oclass = entry->oclass; + DFS_OP_STAT_INCR(dfs, DOS_MKDIR); return 0; } } @@ -742,6 +745,7 @@ open_dir(dfs_t *dfs, dfs_obj_t *parent, int flags, daos_oclass_id_t cid, struct oid_cp(&dir->oid, entry->oid); dir->d.chunk_size = entry->chunk_size; dir->d.oclass = entry->oclass; + DFS_OP_STAT_INCR(dfs, DOS_OPENDIR); return 0; } diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index 7425fc2f00d..41be576c349 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -15,6 +15,8 @@ #include #include +#include "metrics.h" + /** D-key name of SB metadata */ #define SB_DKEY "DFS_SB_METADATA" @@ -190,6 +192,8 @@ struct dfs { struct dfs_mnt_hdls *cont_hdl; /** the root dir stat buf */ struct stat root_stbuf; + /** DFS top-level metrics */ + struct dfs_metrics *metrics; }; struct dfs_entry { diff --git a/src/client/dfs/dir.c b/src/client/dfs/dir.c index 000c0625f58..188b79c917d 100644 --- a/src/client/dfs/dir.c +++ b/src/client/dfs/dir.c @@ -65,6 +65,7 @@ dfs_mkdir(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode, daos_ocl if (rc != 0) return daos_der2errno(rc); + DFS_OP_STAT_INCR(dfs, DOS_MKDIR); return rc; } @@ -220,6 +221,7 @@ dfs_remove(dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, daos_obj if (oid) oid_cp(oid, entry.oid); + DFS_OP_STAT_INCR(dfs, DOS_UNLINK); out: rc = check_tx(th, rc); if (rc == ERESTART) diff --git a/src/client/dfs/io.c b/src/client/dfs/io.c index 3919d8cfe19..db8f79ab105 100644 --- a/src/client/dfs/io.c +++ b/src/client/dfs/io.c @@ -15,7 +15,20 @@ #include "dfs_internal.h" +static void +dfs_update_file_metrics(dfs_t *dfs, daos_size_t read_bytes, daos_size_t write_bytes) +{ + if (dfs == NULL || dfs->metrics == NULL) + return; + + if (read_bytes > 0) + d_tm_inc_gauge(dfs->metrics->dm_read_bytes, read_bytes); + if (write_bytes > 0) + d_tm_inc_gauge(dfs->metrics->dm_write_bytes, write_bytes); +} + struct dfs_read_params { + dfs_t *dfs; daos_size_t *read_size; daos_array_iod_t arr_iod; daos_range_t rg; @@ -35,6 +48,8 @@ read_cb(tse_task_t *task, void *data) D_GOTO(out, rc); } + DFS_OP_STAT_INCR(params->dfs, DOS_READ); + dfs_update_file_metrics(params->dfs, params->arr_iod.arr_nr_read, 0); *params->read_size = params->arr_iod.arr_nr_read; out: D_FREE(params); @@ -61,6 +76,7 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, d_sg_li if (params == NULL) D_GOTO(err_task, rc = -DER_NOMEM); + params->dfs = dfs; params->read_size = read_size; /** set array location */ @@ -90,6 +106,7 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, d_sg_li * completion cb that frees params in this case, so we can just ignore the rc here. */ dc_task_schedule(task, true); + return 0; err_params: @@ -125,6 +142,7 @@ dfs_read(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_size daos_event_launch(ev); daos_event_complete(ev, 0); } + DFS_OP_STAT_INCR(dfs, DOS_READ); return 0; } @@ -146,7 +164,9 @@ dfs_read(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_size return daos_der2errno(rc); } + DFS_OP_STAT_INCR(dfs, DOS_READ); *read_size = iod.arr_nr_read; + dfs_update_file_metrics(dfs, iod.arr_nr_read, 0); return 0; } @@ -173,6 +193,7 @@ dfs_readx(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_siz daos_event_launch(ev); daos_event_complete(ev, 0); } + DFS_OP_STAT_INCR(dfs, DOS_READ); return 0; } @@ -189,7 +210,9 @@ dfs_readx(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_siz return daos_der2errno(rc); } + DFS_OP_STAT_INCR(dfs, DOS_READ); *read_size = arr_iod.arr_nr_read; + dfs_update_file_metrics(dfs, arr_iod.arr_nr_read, 0); return 0; } @@ -223,6 +246,7 @@ dfs_write(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_eve daos_event_launch(ev); daos_event_complete(ev, 0); } + DFS_OP_STAT_INCR(dfs, DOS_WRITE); return 0; } @@ -238,8 +262,12 @@ dfs_write(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_eve daos_event_errno_rc(ev); rc = daos_array_write(obj->oh, DAOS_TX_NONE, &iod, sgl, ev); - if (rc) + if (rc == 0) { + DFS_OP_STAT_INCR(dfs, DOS_WRITE); + dfs_update_file_metrics(dfs, 0, buf_size); + } else { D_ERROR("daos_array_write() failed, " DF_RC "\n", DP_RC(rc)); + } return daos_der2errno(rc); } @@ -248,6 +276,8 @@ int dfs_writex(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_event_t *ev) { daos_array_iod_t arr_iod; + daos_size_t buf_size; + int i; int rc; if (dfs == NULL || !dfs->mounted) @@ -266,6 +296,7 @@ dfs_writex(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_ev daos_event_launch(ev); daos_event_complete(ev, 0); } + DFS_OP_STAT_INCR(dfs, DOS_WRITE); return 0; } @@ -276,9 +307,18 @@ dfs_writex(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_ev if (ev) daos_event_errno_rc(ev); + buf_size = 0; + if (dfs->metrics != NULL && sgl != NULL) + for (i = 0; i < sgl->sg_nr; i++) + buf_size += sgl->sg_iovs[i].iov_len; + rc = daos_array_write(obj->oh, DAOS_TX_NONE, &arr_iod, sgl, ev); - if (rc) + if (rc == 0) { + DFS_OP_STAT_INCR(dfs, DOS_WRITE); + dfs_update_file_metrics(dfs, 0, buf_size); + } else { D_ERROR("daos_array_write() failed (%d)\n", rc); + } return daos_der2errno(rc); } diff --git a/src/client/dfs/metrics.c b/src/client/dfs/metrics.c new file mode 100644 index 00000000000..40aefc90e73 --- /dev/null +++ b/src/client/dfs/metrics.c @@ -0,0 +1,174 @@ +/** + * (C) Copyright 2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#define D_LOGFAC DD_FAC(dfs) + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "metrics.h" +#include "dfs_internal.h" + +#define DFS_METRICS_ROOT "dfs" + +#define STAT_METRICS_SIZE (D_TM_METRIC_SIZE * DOS_LIMIT) +#define FILE_METRICS_SIZE (((D_TM_METRIC_SIZE * NR_SIZE_BUCKETS) * 2) + D_TM_METRIC_SIZE * 2) +#define DFS_METRICS_SIZE (STAT_METRICS_SIZE + FILE_METRICS_SIZE) + +#define SPRINTF_TM_PATH(buf, pool_uuid, cont_uuid, path) \ + snprintf(buf, sizeof(buf), "pool/" DF_UUIDF "/container/" DF_UUIDF "/%s", \ + DP_UUID(pool_uuid), DP_UUID(cont_uuid), path); + +#define ADD_STAT_METRIC(name, ...) \ + SPRINTF_TM_PATH(tmp_path, pool_uuid, cont_uuid, DFS_METRICS_ROOT "/ops/" #name); \ + rc = d_tm_add_metric(&metrics->dm_op_stats[i], D_TM_COUNTER, "Count of " #name " calls", \ + "calls", tmp_path); \ + if (rc != 0) { \ + DL_ERROR(rc, "failed to create " #name " counter"); \ + return; \ + } \ + i++; + +static void +op_stats_init(struct dfs_metrics *metrics, uuid_t pool_uuid, uuid_t cont_uuid) +{ + char tmp_path[D_TM_MAX_NAME_LEN] = {0}; + int i = 0; + int rc; + + if (metrics == NULL) + return; + + D_FOREACH_DFS_OP_STAT(ADD_STAT_METRIC); +} + +static void +cont_stats_init(struct dfs_metrics *metrics, uuid_t pool_uuid, uuid_t cont_uuid) +{ + char tmp_path[D_TM_MAX_NAME_LEN] = {0}; + int rc = 0; + + if (metrics == NULL) + return; + + SPRINTF_TM_PATH(tmp_path, pool_uuid, cont_uuid, "mount_time"); + rc = d_tm_add_metric(&metrics->dm_mount_time, D_TM_TIMESTAMP, "container mount time", NULL, + tmp_path); + if (rc != 0) + DL_ERROR(rc, "failed to create mount_time timestamp"); +} + +static void +file_stats_init(struct dfs_metrics *metrics, uuid_t pool_uuid, uuid_t cont_uuid) +{ + char tmp_path[D_TM_MAX_NAME_LEN] = {0}; + int rc = 0; + + if (metrics == NULL) + return; + + SPRINTF_TM_PATH(tmp_path, pool_uuid, cont_uuid, DFS_METRICS_ROOT "/read_bytes"); + rc = d_tm_add_metric(&metrics->dm_read_bytes, D_TM_STATS_GAUGE, "dfs read bytes", "bytes", + tmp_path); + if (rc != 0) + DL_ERROR(rc, "failed to create dfs read_bytes counter"); + rc = + d_tm_init_histogram(metrics->dm_read_bytes, tmp_path, NR_SIZE_BUCKETS, 256, 2, "bytes"); + if (rc) + DL_ERROR(rc, "Failed to init dfs read size histogram"); + + SPRINTF_TM_PATH(tmp_path, pool_uuid, cont_uuid, DFS_METRICS_ROOT "/write_bytes"); + rc = d_tm_add_metric(&metrics->dm_write_bytes, D_TM_STATS_GAUGE, "dfs write bytes", "bytes", + tmp_path); + if (rc != 0) + DL_ERROR(rc, "failed to create dfs write_bytes counter"); + rc = d_tm_init_histogram(metrics->dm_write_bytes, tmp_path, NR_SIZE_BUCKETS, 256, 2, + "bytes"); + if (rc) + DL_ERROR(rc, "Failed to init dfs write size histogram"); +} + +bool +dfs_metrics_enabled() +{ + /* set in client/api/metrics.c */ + return daos_client_metric; +} + +void +dfs_metrics_init(dfs_t *dfs) +{ + uuid_t pool_uuid; + uuid_t cont_uuid; + char root_name[D_TM_MAX_NAME_LEN]; + pid_t pid = getpid(); + size_t root_size = DFS_METRICS_SIZE + (D_TM_METRIC_SIZE * 3); + int rc; + + if (dfs == NULL) + return; + + rc = dc_pool_hdl2uuid(dfs->poh, NULL, &pool_uuid); + if (rc != 0) { + DL_ERROR(rc, "failed to get pool UUID"); + goto error; + } + + rc = dc_cont_hdl2uuid(dfs->coh, NULL, &cont_uuid); + if (rc != 0) { + DL_ERROR(rc, "failed to get container UUID"); + goto error; + } + + snprintf(root_name, sizeof(root_name), "%d", pid); + /* if only container-level metrics are enabled; this will init a root for them */ + rc = d_tm_init_with_name(d_tm_cli_pid_key(pid), root_size, D_TM_OPEN_OR_CREATE, root_name); + if (rc != 0 && rc != -DER_ALREADY) { + DL_ERROR(rc, "failed to init DFS metrics"); + goto error; + } + + D_ALLOC_PTR(dfs->metrics); + if (dfs->metrics == NULL) { + D_ERROR("failed to alloc DFS metrics"); + goto error; + } + + SPRINTF_TM_PATH(root_name, pool_uuid, cont_uuid, DFS_METRICS_ROOT); + rc = d_tm_add_ephemeral_dir(NULL, DFS_METRICS_SIZE, root_name); + if (rc != 0) { + DL_ERROR(rc, "failed to add DFS metrics dir"); + goto error; + } + + cont_stats_init(dfs->metrics, pool_uuid, cont_uuid); + op_stats_init(dfs->metrics, pool_uuid, cont_uuid); + file_stats_init(dfs->metrics, pool_uuid, cont_uuid); + + d_tm_record_timestamp(dfs->metrics->dm_mount_time); + return; + +error: + if (dfs->metrics != NULL) + D_FREE(dfs->metrics); +} + +void +dfs_metrics_fini(dfs_t *dfs) +{ + D_FREE(dfs->metrics); +} \ No newline at end of file diff --git a/src/client/dfs/metrics.h b/src/client/dfs/metrics.h new file mode 100644 index 00000000000..722a57590d7 --- /dev/null +++ b/src/client/dfs/metrics.h @@ -0,0 +1,79 @@ +/** + * (C) Copyright 2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#ifndef __DFS_METRICS_H__ +#define __DFS_METRICS_H__ + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include +#include +#include + +/* + * Report read/write counts on a per-I/O size. + * Buckets starts at [0; 256B[ and are increased by power of 2 + * (i.e. [256B; 512B[, [512B; 1KB[) up to [4MB; infinity[ + * Since 4MB = 2^22 and 256B = 2^8, this means + * (22 - 8 + 1) = 15 buckets plus the 4MB+ bucket, so + * 16 buckets in total. + */ +#define NR_SIZE_BUCKETS 16 + +/* define a set of ops that we'll count if metrics are enabled */ +#define D_FOREACH_DFS_OP_STAT(ACTION) \ + ACTION(CHMOD) \ + ACTION(CHOWN) \ + ACTION(CREATE) \ + ACTION(GETSIZE) \ + ACTION(GETXATTR) \ + ACTION(LSXATTR) \ + ACTION(MKDIR) \ + ACTION(OPEN) \ + ACTION(OPENDIR) \ + ACTION(READ) \ + ACTION(READDIR) \ + ACTION(READLINK) \ + ACTION(RENAME) \ + ACTION(RMXATTR) \ + ACTION(SETATTR) \ + ACTION(SETXATTR) \ + ACTION(STAT) \ + ACTION(SYMLINK) \ + ACTION(SYNC) \ + ACTION(TRUNCATE) \ + ACTION(UNLINK) \ + ACTION(WRITE) + +#define DFS_OP_STAT_DEFINE(name, ...) DOS_##name, + +enum dfs_op_stat { + D_FOREACH_DFS_OP_STAT(DFS_OP_STAT_DEFINE) DOS_LIMIT, +}; + +#define DFS_OP_STAT_INCR(_dfs, _name) \ + if (_dfs->metrics != NULL) \ + d_tm_inc_counter(_dfs->metrics->dm_op_stats[(_name)], 1); + +struct dfs_metrics { + struct d_tm_node_t *dm_op_stats[DOS_LIMIT]; + struct d_tm_node_t *dm_read_bytes; + struct d_tm_node_t *dm_write_bytes; + struct d_tm_node_t *dm_mount_time; +}; + +bool +dfs_metrics_enabled(); + +void +dfs_metrics_init(dfs_t *dfs); + +void +dfs_metrics_fini(dfs_t *dfs); + +#endif /* __DFS_METRICS_H__ */ \ No newline at end of file diff --git a/src/client/dfs/mnt.c b/src/client/dfs/mnt.c index b0e373a2820..a4955bbde1d 100644 --- a/src/client/dfs/mnt.c +++ b/src/client/dfs/mnt.c @@ -729,6 +729,9 @@ dfs_mount_int(daos_handle_t poh, daos_handle_t coh, int flags, daos_epoch_t epoc daos_obj_oid_cycle(&dfs->oid); } + if (dfs_metrics_enabled()) + dfs_metrics_init(dfs); + dfs->mounted = DFS_MOUNT; *_dfs = dfs; daos_prop_free(prop); @@ -844,6 +847,8 @@ dfs_umount(dfs_t *dfs) daos_obj_close(dfs->root.oh, NULL); daos_obj_close(dfs->super_oh, NULL); + dfs_metrics_fini(dfs); + D_FREE(dfs->prefix); D_MUTEX_DESTROY(&dfs->lock); D_FREE(dfs); diff --git a/src/client/dfs/obj.c b/src/client/dfs/obj.c index 309439bc807..21c09027bef 100644 --- a/src/client/dfs/obj.c +++ b/src/client/dfs/obj.c @@ -208,6 +208,7 @@ open_file(dfs_t *dfs, dfs_obj_t *parent, int flags, daos_oclass_id_t cid, daos_s return rc; } else { D_ASSERT(rc == 0); + DFS_OP_STAT_INCR(dfs, DOS_CREATE); return 0; } } @@ -261,6 +262,7 @@ open_file(dfs_t *dfs, dfs_obj_t *parent, int flags, daos_oclass_id_t cid, daos_s } } oid_cp(&file->oid, entry->oid); + DFS_OP_STAT_INCR(dfs, DOS_OPEN); return 0; } @@ -320,6 +322,8 @@ open_symlink(dfs_t *dfs, dfs_obj_t *parent, int flags, daos_oclass_id_t cid, con D_FREE(sym->value); D_ERROR("Inserting entry '%s' failed: %d (%s)\n", sym->name, rc, strerror(rc)); + } else if (rc == 0) { + DFS_OP_STAT_INCR(dfs, DOS_SYMLINK); } return rc; } @@ -882,6 +886,8 @@ ostatx_cb(tse_task_t *task, void *data) rc2 = daos_obj_close(args->parent_oh, NULL); if (rc == 0) rc = rc2; + if (rc == 0) + DFS_OP_STAT_INCR(args->dfs, DOS_STAT); return rc; } @@ -1013,6 +1019,7 @@ statx_task(tse_task_t *task) err1_out: D_FREE(op_args); daos_obj_close(args->parent_oh, NULL); + return rc; } @@ -1243,6 +1250,7 @@ dfs_chmod(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode) D_GOTO(out, rc = daos_der2errno(rc)); } + DFS_OP_STAT_INCR(dfs, DOS_CHMOD); out: if (S_ISLNK(entry.mode)) { dfs_release(sym); @@ -1378,6 +1386,7 @@ dfs_chown(dfs_t *dfs, dfs_obj_t *parent, const char *name, uid_t uid, gid_t gid, D_GOTO(out, rc = daos_der2errno(rc)); } + DFS_OP_STAT_INCR(dfs, DOS_CHOWN); out: if (!(flags & O_NOFOLLOW) && S_ISLNK(entry.mode)) { dfs_release(sym); @@ -1598,6 +1607,7 @@ dfs_osetattr(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags) D_GOTO(out_obj, rc = daos_der2errno(rc)); } + DFS_OP_STAT_INCR(dfs, DOS_SETATTR); out_stat: *stbuf = rstat; out_obj: @@ -1662,6 +1672,7 @@ dfs_punch(dfs_t *dfs, dfs_obj_t *obj, daos_off_t offset, daos_size_t len) return daos_der2errno(rc); } + DFS_OP_STAT_INCR(dfs, DOS_TRUNCATE); return rc; } @@ -1697,6 +1708,7 @@ dfs_get_symlink_value(dfs_obj_t *obj, char *buf, daos_size_t *size) strcpy(buf, obj->value); *size = val_size; + DFS_OP_STAT_INCR(obj->dfs, DOS_READLINK); return 0; } @@ -1709,6 +1721,7 @@ dfs_sync(dfs_t *dfs) return EPERM; /** Take a snapshot here and allow rollover to that when supported. */ + /** Uncomment this when supported. DFS_OP_STAT_INCR(dfs, DOS_SYNC); */ return 0; } diff --git a/src/client/dfs/readdir.c b/src/client/dfs/readdir.c index a284b92e4c2..ba748a8d4e9 100644 --- a/src/client/dfs/readdir.c +++ b/src/client/dfs/readdir.c @@ -81,6 +81,7 @@ readdir_int(dfs_t *dfs, dfs_obj_t *obj, daos_anchor_t *anchor, uint32_t *nr, str break; } *nr = key_nr; + DFS_OP_STAT_INCR(dfs, DOS_READDIR); out: D_FREE(enum_buf); @@ -180,6 +181,7 @@ dfs_iterate(dfs_t *dfs, dfs_obj_t *obj, daos_anchor_t *anchor, uint32_t *nr, siz } *nr = keys_nr; + DFS_OP_STAT_INCR(dfs, DOS_READDIR); out: D_FREE(kds); D_FREE(enum_buf); diff --git a/src/client/dfs/rename.c b/src/client/dfs/rename.c index a7431f03536..cb6991e0e0b 100644 --- a/src/client/dfs/rename.c +++ b/src/client/dfs/rename.c @@ -299,6 +299,8 @@ dfs_move_internal(dfs_t *dfs, unsigned int flags, dfs_obj_t *parent, const char rc = check_tx(th, rc); if (rc == ERESTART) goto restart; + if (rc == 0) + DFS_OP_STAT_INCR(dfs, DOS_RENAME); if (entry.value) { D_ASSERT(S_ISLNK(entry.mode)); diff --git a/src/client/dfs/xattr.c b/src/client/dfs/xattr.c index 49b700e3def..b3a13a31a8d 100644 --- a/src/client/dfs/xattr.c +++ b/src/client/dfs/xattr.c @@ -122,6 +122,7 @@ dfs_setxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, const void *value, da } } + DFS_OP_STAT_INCR(dfs, DOS_SETXATTR); out: daos_obj_close(oh, NULL); free: @@ -194,6 +195,7 @@ dfs_getxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, void *value, daos_siz } *size = iod.iod_size; + DFS_OP_STAT_INCR(dfs, DOS_GETXATTR); close: daos_obj_close(oh, NULL); @@ -277,6 +279,7 @@ dfs_removexattr(dfs_t *dfs, dfs_obj_t *obj, const char *name) D_GOTO(out, rc = daos_der2errno(rc)); } + DFS_OP_STAT_INCR(dfs, DOS_RMXATTR); out: daos_obj_close(oh, NULL); free: @@ -354,6 +357,7 @@ dfs_listxattr(dfs_t *dfs, dfs_obj_t *obj, char *list, daos_size_t *size) } *size = ret_size; + DFS_OP_STAT_INCR(dfs, DOS_LSXATTR); out: daos_obj_close(oh, NULL); return rc; diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index 84def83db13..6f6d48f3320 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -1198,7 +1198,7 @@ bool dfuse_dcache_get_valid(struct dfuse_inode_entry *ie, double max_age); void -dfuse_pre_read(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh); +dfuse_pre_read(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie); int check_for_uns_ep(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, char *attr, diff --git a/src/client/dfuse/ops/open.c b/src/client/dfuse/ops/open.c index 2cb6a12a2bf..6abe707a907 100644 --- a/src/client/dfuse/ops/open.c +++ b/src/client/dfuse/ops/open.c @@ -122,7 +122,7 @@ dfuse_cb_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) * release from completing which also holds open the inode. */ if (preread) - dfuse_pre_read(dfuse_info, oh); + dfuse_pre_read(dfuse_info, ie); return; decref: diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 7dd0898101c..46286e7ae11 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -600,13 +600,13 @@ dfuse_cb_pre_read_complete(struct dfuse_event *ev) } void -dfuse_pre_read(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh) +dfuse_pre_read(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie) { - struct active_inode *active = oh->doh_ie->ie_active; + struct active_inode *active = ie->ie_active; struct dfuse_eq *eqt; int rc; struct dfuse_event *ev; - size_t len = oh->doh_ie->ie_stat.st_size; + size_t len = ie->ie_stat.st_size; eqt = pick_eqt(dfuse_info); ev = d_slab_acquire(eqt->de_pre_read_slab); @@ -616,7 +616,7 @@ dfuse_pre_read(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh) ev->de_iov.iov_len = len; ev->de_req = 0; ev->de_sgl.sg_nr = 1; - ev->de_ie = oh->doh_ie; + ev->de_ie = ie; ev->de_readahead_len = len; ev->de_req_position = 0; ev->de_di = dfuse_info; @@ -624,7 +624,7 @@ dfuse_pre_read(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh) ev->de_complete_cb = dfuse_cb_pre_read_complete; active->readahead->dra_ev = ev; - rc = dfs_read(oh->doh_dfs, oh->doh_obj, &ev->de_sgl, 0, &ev->de_len, &ev->de_ev); + rc = dfs_read(ie->ie_dfs->dfs_ns, ie->ie_obj, &ev->de_sgl, 0, &ev->de_len, &ev->de_ev); if (rc != 0) goto err; @@ -642,6 +642,6 @@ dfuse_pre_read(struct dfuse_info *dfuse_info, struct dfuse_obj_hdl *oh) d_slab_release(eqt->de_pre_read_slab, ev); active->readahead->dra_ev = NULL; } - active_ie_decref(dfuse_info, oh->doh_ie); + active_ie_decref(dfuse_info, ie); pre_read_mark_done(active); } diff --git a/src/common/dav_v2/dav_iface.c b/src/common/dav_v2/dav_iface.c index ede29fafc56..52e48fdefbb 100644 --- a/src/common/dav_v2/dav_iface.c +++ b/src/common/dav_v2/dav_iface.c @@ -51,7 +51,7 @@ dav_uc_callback(int evt_type, void *arg, uint32_t zid) break; case UMEM_CACHE_EVENT_PGEVICT: if (hdl->do_booted) { - VALGRIND_DO_DESTROY_MEMPOOL(z); + VALGRIND_DO_DESTROY_MEMPOOL_COND(z); } break; default: diff --git a/src/common/dav_v2/dav_v2.h b/src/common/dav_v2/dav_v2.h index 6147d33ba4e..a5b1ce65f5f 100644 --- a/src/common/dav_v2/dav_v2.h +++ b/src/common/dav_v2/dav_v2.h @@ -319,4 +319,15 @@ dav_allot_mb_evictable_v2(dav_obj_t *pop, int flags); size_t dav_obj_pgsz_v2(); +/** Force GC to reclaim freeblocks and mark empty non-evictable + * memory buckets as unused, thus allowing more umem_cache + * for non-evictable memory buckets. + * + * \param[in] pop pool handle + * + * \return 0, success + * < 0, error and errno is set to appropriate value. + */ +int +dav_force_gc_v2(dav_obj_t *pop); #endif /* __DAOS_COMMON_DAV_V2_H */ diff --git a/src/common/dav_v2/heap.c b/src/common/dav_v2/heap.c index d730fed7bc4..61b7ed2e2b1 100644 --- a/src/common/dav_v2/heap.c +++ b/src/common/dav_v2/heap.c @@ -26,9 +26,7 @@ #include "meta_io.h" #define HEAP_NEMB_PCT_DEFAULT 80 - -static void -heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, uint32_t zone_id); +#define HEAP_NEMB_EMPTY_THRESHOLD 16 #define MAX_RUN_LOCKS MAX_CHUNK #define MAX_RUN_LOCKS_VG MAX_CHUNK /* avoid perf issues /w drd */ @@ -92,13 +90,14 @@ struct heap_rt { unsigned zones_exhausted; unsigned zones_exhausted_e; unsigned zones_exhausted_ne; - unsigned zones_ne_gc; - unsigned zones_lastne_gc; + unsigned zones_nextne_gc; unsigned zones_unused_first; unsigned zinfo_vec_size; unsigned mb_create_waiters; unsigned mb_pressure; unsigned nemb_pct; + unsigned empty_nemb_cnt; + unsigned empty_nemb_gcth; void *mb_create_wq; struct zinfo_vec *zinfo_vec; struct mbrt *default_mb; @@ -112,23 +111,37 @@ struct heap_rt { #define MBRT_NON_EVICTABLE ((struct mbrt *)(-1UL)) +static void +heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, uint32_t zone_id); + static inline void heap_zinfo_set(struct palloc_heap *heap, uint32_t zid, bool allotted, bool evictable) { - struct zinfo_element *ze = heap->rt->zinfo_vec->z; + struct zinfo_element *ze; - ze[zid].z_allotted = allotted; - ze[zid].z_evictable = evictable; - mo_wal_persist(&heap->p_ops, &ze[zid], sizeof(ze[zid])); + if (heap->rt->zinfo_vec) { + ze = heap->rt->zinfo_vec->z; + ze[zid].z_allotted = allotted; + ze[zid].z_evictable = evictable; + mo_wal_persist(&heap->p_ops, &ze[zid], sizeof(ze[zid])); + } else + D_ASSERT(zid == 0); } static inline void heap_zinfo_get(struct palloc_heap *heap, uint32_t zid, bool *allotted, bool *evictable) { - struct zinfo_element *ze = heap->rt->zinfo_vec->z; + struct zinfo_element *ze; - *allotted = ze[zid].z_allotted; - *evictable = ze[zid].z_evictable; + if (heap->rt->zinfo_vec) { + ze = heap->rt->zinfo_vec->z; + *allotted = ze[zid].z_allotted; + *evictable = ze[zid].z_evictable; + } else { + D_ASSERT(zid == 0); + *allotted = false; + *evictable = false; + } } static inline void @@ -136,7 +149,7 @@ heap_zinfo_set_usage(struct palloc_heap *heap, uint32_t zid, enum mb_usage_hint { struct zinfo_element *ze = heap->rt->zinfo_vec->z; - D_ASSERT(ze[zid].z_allotted && ze[zid].z_evictable && val < MB_UMAX_HINT); + D_ASSERT(heap->rt->zinfo_vec && ze[zid].z_allotted && val < MB_UMAX_HINT); ze[zid].z_usage_hint = val; mo_wal_persist(&heap->p_ops, &ze[zid], sizeof(ze[zid])); } @@ -146,7 +159,8 @@ heap_zinfo_get_usage(struct palloc_heap *heap, uint32_t zid, enum mb_usage_hint { struct zinfo_element *ze = heap->rt->zinfo_vec->z; - D_ASSERT(ze[zid].z_allotted && ze[zid].z_evictable && ze[zid].z_usage_hint < MB_UMAX_HINT); + D_ASSERT(heap->rt->zinfo_vec && ze[zid].z_allotted && ze[zid].z_evictable && + ze[zid].z_usage_hint < MB_UMAX_HINT); *val = ze[zid].z_usage_hint; } @@ -213,6 +227,13 @@ heap_mbrt_setmb_evictable(struct palloc_heap *heap, struct mbrt *mb) heap->rt->evictable_mbs[mb->mb_id] = mb; } +void +heap_mbrt_setmb_unused(struct palloc_heap *heap, uint32_t zid) +{ + D_ASSERT((zid < heap->rt->nzones) && (heap->rt->evictable_mbs[zid] == MBRT_NON_EVICTABLE)); + heap->rt->evictable_mbs[zid] = NULL; +} + bool heap_mbrt_ismb_evictable(struct palloc_heap *heap, uint32_t zid) { @@ -361,7 +382,14 @@ heap_mbrt_init(struct palloc_heap *heap) rt->mb_create_waiters = 0; rt->mb_create_wq = NULL; rt->mb_pressure = 0; - ret = store->stor_ops->so_waitqueue_create(&rt->mb_create_wq); + rt->empty_nemb_cnt = 0; + rt->empty_nemb_gcth = HEAP_NEMB_EMPTY_THRESHOLD; + + d_getenv_uint("DAOS_NEMB_EMPTY_RECYCLE_THRESHOLD", &rt->empty_nemb_gcth); + if (!rt->empty_nemb_gcth) + rt->empty_nemb_gcth = HEAP_NEMB_EMPTY_THRESHOLD; + + ret = store->stor_ops->so_waitqueue_create(&rt->mb_create_wq); if (ret) { ret = daos_der2errno(ret); goto error; @@ -425,7 +453,7 @@ heap_mbrt_get_mb(struct palloc_heap *heap, uint32_t zone_id) if (!heap_mbrt_ismb_evictable(heap, zone_id)) return heap->rt->default_mb; - D_ASSERT(heap->rt->evictable_mbs[zone_id] != NULL); + D_ASSERTF(heap->rt->evictable_mbs[zone_id] != NULL, "zone_id %d is marked unused", zone_id); return heap->rt->evictable_mbs[zone_id]; } @@ -955,22 +983,21 @@ heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, } static int -heap_getnext_ne_zone(struct palloc_heap *heap, uint32_t *zone_id) +heap_reclaim_next_ne(struct palloc_heap *heap, uint32_t *zone_id) { bool allotted, evictable; - int i; + uint32_t i; struct heap_rt *h = heap->rt; - if (h->zones_ne_gc == h->zones_exhausted_ne) + if (h->zones_nextne_gc >= h->zones_exhausted) return -1; - i = h->zones_ne_gc ? h->zones_lastne_gc + 1 : 0; - - for (; i < h->zones_exhausted; i++) { + for (i = h->zones_nextne_gc; i < h->zones_exhausted; i++) { heap_zinfo_get(heap, i, &allotted, &evictable); if (!allotted) - break; + continue; if (!evictable) { + h->zones_nextne_gc = i + 1; *zone_id = i; return 0; } @@ -978,13 +1005,139 @@ heap_getnext_ne_zone(struct palloc_heap *heap, uint32_t *zone_id) return -1; } +static void +heap_reclaim_setlast_ne(struct palloc_heap *heap, uint32_t zone_id) +{ + if (zone_id >= heap->rt->zones_nextne_gc) + heap->rt->zones_nextne_gc = zone_id + 1; +} + +static int +heap_get_next_unused_zone(struct palloc_heap *heap, uint32_t *zone_id) +{ + bool allotted, evictable; + uint32_t i; + + for (i = heap->rt->zones_unused_first; i < heap->rt->nzones; i++) { + heap_zinfo_get(heap, i, &allotted, &evictable); + if (!allotted) + break; + } + if (i == heap->rt->nzones) { + heap->rt->zones_unused_first = heap->rt->nzones; + return -1; + } + + *zone_id = i; + return 0; +} + +static int +heap_mark_zone_used_transient(struct palloc_heap *heap, uint32_t zone_id, bool is_evictable) +{ + struct mbrt *mb; + + if (is_evictable) { + mb = heap_mbrt_setup_mb(heap, zone_id); + if (mb == NULL) { + ERR("Failed to setup mbrt for zone %u\n", zone_id); + return -1; + } + heap_mbrt_setmb_evictable(heap, mb); + } else + heap_mbrt_setmb_nonevictable(heap, zone_id); + + heap->rt->zones_unused_first = zone_id + 1; + if (heap->rt->zones_exhausted < heap->rt->zones_unused_first) + heap->rt->zones_exhausted = heap->rt->zones_unused_first; + if (is_evictable) + heap->rt->zones_exhausted_e++; + else + heap->rt->zones_exhausted_ne++; + return 0; +} + +static void +heap_mark_zone_used_persist(struct palloc_heap *heap, uint32_t zone_id) +{ + bool is_evictable = heap_mbrt_ismb_evictable(heap, zone_id); + + if (zone_id) + heap_zinfo_set(heap, zone_id, true, is_evictable); +} + +static void +heap_mark_zone_unused_transient(struct palloc_heap *heap, uint32_t zone_id) +{ + struct mbrt *mb = heap_mbrt_get_mb(heap, zone_id); + + if (heap_mbrt_ismb_evictable(heap, zone_id)) { + D_ASSERT(mb != NULL); + heap_mbrt_cleanup_mb(mb); + heap->rt->zones_exhausted_e--; + } else + heap->rt->zones_exhausted_ne--; + + heap_mbrt_setmb_unused(heap, zone_id); + + if (heap->rt->zones_unused_first > zone_id) + heap->rt->zones_unused_first = zone_id; + if (heap->rt->zones_exhausted == (zone_id + 1)) + heap->rt->zones_exhausted = zone_id; +} + +static void +heap_mark_zone_unused(struct palloc_heap *heap, uint32_t zone_id) +{ + struct umem_cache_range rg = {0}; + bool is_evictable = heap_mbrt_ismb_evictable(heap, zone_id); + int rc; + + D_ASSERT(is_evictable == false); + + heap_mark_zone_unused_transient(heap, zone_id); + rg.cr_off = GET_ZONE_OFFSET(zone_id); + rg.cr_size = + ((heap->size - rg.cr_off) > ZONE_MAX_SIZE) ? ZONE_MAX_SIZE : heap->size - rg.cr_off; + rc = umem_cache_map(heap->layout_info.store, &rg, 1); + if (rc != 0) { + rc = daos_der2errno(rc); + ERR("Failed to remap zone %d in umem cache as unused rc=%d\n", zone_id, rc); + heap_mark_zone_used_transient(heap, zone_id, is_evictable); + } + heap_zinfo_set_usage(heap, zone_id, MB_U0_HINT); + heap_zinfo_set(heap, zone_id, false, false); +} + +int +heap_populate_nemb_unused(struct palloc_heap *heap) +{ + struct bucket *defb; + struct memory_block m = MEMORY_BLOCK_NONE; + struct mbrt *mb; + + m.size_idx = MAX_CHUNK; + + mb = heap_mbrt_get_mb(heap, 0); + defb = mbrt_bucket_acquire(mb, DEFAULT_ALLOC_CLASS_ID); + while (bucket_alloc_block(defb, &m) == 0) { + heap->rt->empty_nemb_cnt--; + heap_mark_zone_unused(heap, m.zone_id); + + m = MEMORY_BLOCK_NONE; + m.size_idx = MAX_CHUNK; + } + mbrt_bucket_release(defb); + + return 0; +} + /* * heap_populate_bucket -- (internal) creates volatile state of memory blocks */ static int heap_populate_bucket(struct palloc_heap *heap, struct bucket *bucket) { - struct heap_rt *h = heap->rt; struct mbrt *mb = bucket_get_mbrt(bucket); struct umem_cache_range rg = {0}; int rc; @@ -999,28 +1152,32 @@ heap_populate_bucket(struct palloc_heap *heap, struct bucket *bucket) return ENOMEM; } - rc = heap_getnext_ne_zone(heap, &zone_id); + rc = heap_reclaim_next_ne(heap, &zone_id); if (!rc) goto reclaim_garbage; - /* at this point we are sure that there's no more memory in the heap */ - if (h->zones_exhausted_ne == h->nzones_ne) + if (heap->rt->zones_exhausted_ne >= heap->rt->nzones_ne) + return ENOMEM; + + rc = heap_get_next_unused_zone(heap, &zone_id); + if (rc) + return ENOMEM; + + rc = heap_mark_zone_used_transient(heap, zone_id, false); + if (rc) return ENOMEM; - zone_id = h->zones_exhausted++; /* Create a umem cache map for the new zone */ rg.cr_off = GET_ZONE_OFFSET(zone_id); rg.cr_size = ((heap->size - rg.cr_off) > ZONE_MAX_SIZE) ? ZONE_MAX_SIZE : heap->size - rg.cr_off; - heap_mbrt_setmb_nonevictable(heap, zone_id); rc = umem_cache_map(heap->layout_info.store, &rg, 1); if (rc != 0) { rc = daos_der2errno(rc); ERR("Failed to map zone %d to umem cache rc=%d\n", zone_id, rc); - h->zones_exhausted--; + heap_mark_zone_unused_transient(heap, zone_id); return rc; } - h->zones_exhausted_ne++; struct zone *z = ZID_TO_ZONE(&heap->layout_info, zone_id); @@ -1040,13 +1197,11 @@ heap_populate_bucket(struct palloc_heap *heap, struct bucket *bucket) sizeof(z->chunk_headers)); heap_zone_init(heap, zone_id, 0, false); - if (zone_id) - heap_zinfo_set(heap, zone_id, true, false); + heap_mark_zone_used_persist(heap, zone_id); reclaim_garbage: heap_reclaim_zone_garbage(heap, bucket, zone_id); - h->zones_lastne_gc = zone_id; - h->zones_ne_gc++; + heap_reclaim_setlast_ne(heap, zone_id); /* * It doesn't matter that this function might not have found any @@ -1610,9 +1765,9 @@ heap_boot(struct palloc_heap *heap, void *mmap_base, uint64_t heap_size, uint64_ h->zones_exhausted = 0; h->zones_exhausted_e = 0; h->zones_exhausted_ne = 0; - h->zones_ne_gc = 0; - h->zones_lastne_gc = 0; + h->zones_nextne_gc = 0; h->zones_unused_first = 0; + h->zinfo_vec = NULL; h->nlocks = On_valgrind ? MAX_RUN_LOCKS_VG : MAX_RUN_LOCKS; for (unsigned i = 0; i < h->nlocks; ++i) @@ -1695,7 +1850,6 @@ static inline int heap_create_evictable_mb(struct palloc_heap *heap, uint32_t *mb_id) { uint32_t zone_id; - struct mbrt *mb; struct umem_cache_range rg = {0}; int rc; struct zone *z; @@ -1717,25 +1871,21 @@ heap_create_evictable_mb(struct palloc_heap *heap, uint32_t *mb_id) goto out; } - for (zone_id = heap->rt->zones_unused_first; zone_id < heap->rt->nzones; zone_id++) { - if (!heap_mbrt_ismb_initialized(heap, zone_id)) - break; + rc = heap_get_next_unused_zone(heap, &zone_id); + if (rc) { + D_ERROR("Failed to obtain free zone for evictable mb"); + rc = 1; + errno = ENOMEM; + goto out; } - D_ASSERT(zone_id < heap->rt->nzones); - mb = heap_mbrt_setup_mb(heap, zone_id); - if (mb == NULL) { - ERR("Failed to setup mbrt for zone %u\n", zone_id); - rc = -1; + rc = heap_mark_zone_used_transient(heap, zone_id, true); + if (rc) { + rc = 1; + errno = ENOMEM; goto out; } - heap->rt->zones_unused_first = zone_id + 1; - if (heap->rt->zones_exhausted < heap->rt->zones_unused_first) - heap->rt->zones_exhausted = heap->rt->zones_unused_first; - heap->rt->zones_exhausted_e++; - heap_mbrt_setmb_evictable(heap, mb); - /* Create a umem cache map for the new zone */ rg.cr_off = GET_ZONE_OFFSET(zone_id); rg.cr_size = @@ -1778,7 +1928,7 @@ heap_create_evictable_mb(struct palloc_heap *heap, uint32_t *mb_id) lw_tx_end(heap->p_ops.base, NULL); goto error; } - heap_zinfo_set(heap, zone_id, true, true); + heap_mark_zone_used_persist(heap, zone_id); lw_tx_end(heap->p_ops.base, NULL); umem_cache_unpin(heap->layout_info.store, pin_handle); @@ -1789,11 +1939,7 @@ heap_create_evictable_mb(struct palloc_heap *heap, uint32_t *mb_id) error: if (pin_handle) umem_cache_unpin(heap->layout_info.store, pin_handle); - heap_mbrt_cleanup_mb(mb); - heap->rt->evictable_mbs[zone_id] = NULL; - heap->rt->zones_exhausted_e--; - if (heap->rt->zones_unused_first > zone_id) - heap->rt->zones_unused_first = zone_id; + heap_mark_zone_unused_transient(heap, zone_id); rc = -1; out: @@ -1934,14 +2080,21 @@ heap_load_nonevictable_zones(struct palloc_heap *heap) { int i, rc; bool allotted, evictable; + struct zone *zone; for (i = 1; i < heap->rt->zones_exhausted; i++) { heap_zinfo_get(heap, i, &allotted, &evictable); - D_ASSERT(allotted); + if (!allotted) + continue; if (!evictable) { rc = heap_zone_load(heap, i); if (rc) return rc; + zone = ZID_TO_ZONE(&heap->layout_info, i); + if (!zone->header.sp_usage) + heap_incr_empty_nemb_cnt(heap); + else + heap_mbrt_incrmb_usage(heap, 0, zone->header.sp_usage); } } return 0; @@ -2126,6 +2279,59 @@ heap_get_zone_limits(uint64_t heap_size, uint64_t cache_size, uint32_t nemb_pct) return zd; } +int +heap_incr_empty_nemb_cnt(struct palloc_heap *heap) +{ + return ++heap->rt->empty_nemb_cnt; +} + +int +heap_decr_empty_nemb_cnt(struct palloc_heap *heap) +{ + return heap->rt->empty_nemb_cnt ? --heap->rt->empty_nemb_cnt : 0; +} + +int +heap_force_recycle(struct palloc_heap *heap) +{ + struct bucket *defb; + struct mbrt *mb; + uint32_t zone_id; + uint32_t max_reclaim = heap->rt->empty_nemb_gcth * 2; + + mb = heap_mbrt_get_mb(heap, 0); + + if (heap->rt->empty_nemb_cnt < heap->rt->empty_nemb_gcth) { + if ((mb->space_usage > mb->prev_usage) || + ((mb->prev_usage - mb->space_usage) < + (ZONE_MAX_SIZE * heap->rt->empty_nemb_gcth))) { + if (mb->space_usage > mb->prev_usage) + mb->prev_usage = mb->space_usage; + return 0; + } + } + + + defb = mbrt_bucket_acquire(mb, DEFAULT_ALLOC_CLASS_ID); + while (heap_reclaim_next_ne(heap, &zone_id) == 0) { + heap_reclaim_zone_garbage(heap, defb, zone_id); + heap_reclaim_setlast_ne(heap, zone_id); + if (--max_reclaim == 0) + break; + } + + heap_reclaim_garbage(heap, defb); + mbrt_bucket_release(defb); + heap_populate_nemb_unused(heap); + mb->prev_usage = mb->space_usage; + + if (max_reclaim && (heap->rt->empty_nemb_cnt >= heap->rt->empty_nemb_gcth)) + D_WARN("Force GC failed to free up enough nembs, cnt = %d", + heap->rt->empty_nemb_cnt); + + return 0; +} + #if VG_MEMCHECK_ENABLED void heap_vg_zone_open(struct palloc_heap *heap, uint32_t zone_id, object_callback cb, void *args, @@ -2186,6 +2392,9 @@ heap_vg_open(struct palloc_heap *heap, object_callback cb, void *arg, int object if (!umem_cache_offisloaded(heap->layout_info.store, GET_ZONE_OFFSET(i))) continue; + if (!heap_mbrt_ismb_initialized(heap, i)) + continue; + if (heap_mbrt_ismb_evictable(heap, i)) VALGRIND_DO_CREATE_MEMPOOL(ZID_TO_ZONE(&heap->layout_info, i), 0, 0); diff --git a/src/common/dav_v2/heap.h b/src/common/dav_v2/heap.h index 8ceeff9a5cd..f2e710b4ce9 100644 --- a/src/common/dav_v2/heap.h +++ b/src/common/dav_v2/heap.h @@ -151,4 +151,11 @@ heap_off2mbid(struct palloc_heap *heap, uint64_t offset); struct heap_zone_limits heap_get_zone_limits(uint64_t heap_size, uint64_t cache_size, uint32_t nemb_pct); + +int +heap_force_recycle(struct palloc_heap *heap); +int +heap_incr_empty_nemb_cnt(struct palloc_heap *heap); +int +heap_decr_empty_nemb_cnt(struct palloc_heap *heap); #endif /* __DAOS_COMMON_HEAP_H */ diff --git a/src/common/dav_v2/palloc.c b/src/common/dav_v2/palloc.c index 3b929583e9a..a82c887f5b1 100644 --- a/src/common/dav_v2/palloc.c +++ b/src/common/dav_v2/palloc.c @@ -305,6 +305,7 @@ palloc_heap_action_exec(struct palloc_heap *heap, struct operation_context *ctx) { struct zone *zone; + bool is_evictable = false; #ifdef DAV_EXTRA_DEBUG if (act->m.m_ops->get_state(&act->m) == act->new_state) { D_CRIT("invalid operation or heap corruption\n"); @@ -324,14 +325,19 @@ palloc_heap_action_exec(struct palloc_heap *heap, * Update the memory bucket utilization info. */ if (heap_mbrt_ismb_evictable(heap, act->m.zone_id)) - zone = ZID_TO_ZONE(&heap->layout_info, act->m.zone_id); - else - zone = heap->layout_info.zone0; + is_evictable = true; - if (act->new_state == MEMBLOCK_FREE) + zone = ZID_TO_ZONE(&heap->layout_info, act->m.zone_id); + + if (act->new_state == MEMBLOCK_FREE) { zone->header.sp_usage -= act->m.m_ops->get_real_size(&act->m); - else + if (!is_evictable && !zone->header.sp_usage) + heap_incr_empty_nemb_cnt(heap); + } else { + if (!is_evictable && !zone->header.sp_usage) + heap_decr_empty_nemb_cnt(heap); zone->header.sp_usage += act->m.m_ops->get_real_size(&act->m); + } operation_add_entry(ctx, &zone->header.sp_usage, zone->header.sp_usage, ULOG_OPERATION_SET); } diff --git a/src/common/dav_v2/tx.c b/src/common/dav_v2/tx.c index d50c3f52299..98e6d6d314e 100644 --- a/src/common/dav_v2/tx.c +++ b/src/common/dav_v2/tx.c @@ -459,12 +459,12 @@ lw_tx_begin(dav_obj_t *pop) rc = umem_cache_reserve(pop->do_store); if (rc) { D_ERROR("umem_cache_reserve failed, " DF_RC "\n", DP_RC(rc)); - return rc; + return daos_der2errno(rc); } rc = dav_wal_tx_reserve(pop, &wal_id); if (rc) { D_ERROR("so_wal_reserv failed, "DF_RC"\n", DP_RC(rc)); - return rc; + return daos_der2errno(rc); } if (pop->do_utx == NULL) { utx = dav_umem_wtx_new(pop); @@ -1893,3 +1893,29 @@ obj_realloc(dav_obj_t *pop, uint64_t *offp, size_t *sizep, size_t size) return ret; } + +DAV_FUNC_EXPORT int +dav_force_gc_v2(dav_obj_t *pop) +{ + int tx_inprogress = 0; + int rc, ret; + + if (get_tx()->stage != DAV_TX_STAGE_NONE) + tx_inprogress = 1; + + if (!tx_inprogress) { + rc = lw_tx_begin(pop); + if (rc) { + errno = rc; + return -1; + } + } + + ret = heap_force_recycle(pop->do_heap); + + if (!tx_inprogress) { + rc = lw_tx_end(pop, NULL); + D_ASSERT(rc == 0); + } + return ret; +} diff --git a/src/common/dav_v2/valgrind_internal.h b/src/common/dav_v2/valgrind_internal.h index 86fe9d47a19..44d4d68744b 100644 --- a/src/common/dav_v2/valgrind_internal.h +++ b/src/common/dav_v2/valgrind_internal.h @@ -227,6 +227,13 @@ extern unsigned _On_memcheck; VALGRIND_DESTROY_MEMPOOL(heap);\ } while (0) +#define VALGRIND_DO_DESTROY_MEMPOOL_COND(heap) \ + do { \ + if (On_memcheck) \ + if (VALGRIND_MEMPOOL_EXISTS(heap)) \ + VALGRIND_DESTROY_MEMPOOL(heap); \ + } while (0) + #define VALGRIND_DO_MEMPOOL_ALLOC(heap, addr, size) do {\ if (On_memcheck)\ VALGRIND_MEMPOOL_ALLOC(heap, addr, size);\ @@ -270,6 +277,8 @@ extern unsigned _On_memcheck; #define VALGRIND_DO_DESTROY_MEMPOOL(heap) { (void) (heap); } +#define VALGRIND_DO_DESTROY_MEMPOOL_COND(heap) { (void) (heap); } + #define VALGRIND_DO_MEMPOOL_ALLOC(heap, addr, size)\ do { (void) (heap); (void) (addr); (void) (size); } while (0) diff --git a/src/common/mem.c b/src/common/mem.c index beccab45266..bdc414e5bfd 100644 --- a/src/common/mem.c +++ b/src/common/mem.c @@ -615,7 +615,7 @@ umempobj_get_heapusage(struct umem_pool *ph_p, daos_size_t *curr_allocated) * statistics for an evictable memory bucket can be approximate value if * memory bucket is not yet loaded on to the umem cache. * - * \param pool[IN] Pointer to the persistent object. + * \param ph_p[IN] Pointer to the persistent object. * \param mb_id[IN] memory bucket id. * \param curr_allocated[IN|OUT] Total bytes currently allocated * \param maxsz[IN|OUT] Max size the memory bucket can grow. @@ -651,6 +651,23 @@ umempobj_get_mbusage(struct umem_pool *ph_p, uint32_t mb_id, daos_size_t *curr_a return rc; } +/** Force GC within the heap to optimize umem_cache usage. This is only + * with DAV v2 allocator. + * + * \param ph_p[IN] Pointer to the persistent object. + * + * \return zero on success and non-zero on failure. + */ +int +umem_heap_gc(struct umem_instance *umm) +{ + struct umem_pool *ph_p = umm->umm_pool; + + if (ph_p->up_store.store_type == DAOS_MD_BMEM_V2) + return dav_force_gc_v2((dav_obj_t *)ph_p->up_priv); + return 0; +} + /** Log fragmentation related info for the pool. * * \param pool[IN] Pointer to the persistent object. @@ -2073,7 +2090,8 @@ struct umem_page_info { pi_copying : 1, /** Page is being copied. Blocks writes. */ pi_mapped : 1, /** Page is mapped to a MD page */ pi_sys : 1, /** Page is brought to cache by system internal access */ - pi_loaded : 1; /** Page is loaded */ + pi_loaded : 1, /** Page is loaded */ + pi_evictable : 1; /** Last known state on whether the page is evictable */ /** Highest transaction ID checkpointed. This is set before the page is copied. The * checkpoint will not be executed until the last committed ID is greater than or * equal to this value. If that's not the case immediately, the waiting flag is set @@ -2480,9 +2498,9 @@ cache_map_page(struct umem_cache *cache, struct umem_page_info *pinfo, unsigned pinfo->pi_mapped = 1; pinfo->pi_pg_id = pg_id; cache->ca_pages[pg_id].pg_info = pinfo; - if (!is_id_evictable(cache, pg_id)) + pinfo->pi_evictable = is_id_evictable(cache, pg_id); + if (!pinfo->pi_evictable) cache->ca_pgs_stats[UMEM_PG_STATS_NONEVICTABLE] += 1; - } static inline void @@ -2491,7 +2509,7 @@ cache_add2lru(struct umem_cache *cache, struct umem_page_info *pinfo) D_ASSERT(d_list_empty(&pinfo->pi_lru_link)); D_ASSERT(pinfo->pi_ref == 0); - if (is_id_evictable(cache, pinfo->pi_pg_id)) + if (pinfo->pi_evictable) d_list_add_tail(&pinfo->pi_lru_link, &cache->ca_pgs_lru[1]); else d_list_add_tail(&pinfo->pi_lru_link, &cache->ca_pgs_lru[0]); @@ -3231,6 +3249,10 @@ cache_evict_page(struct umem_cache *cache, bool for_sys) /* The page is referenced by others while flushing */ if ((pinfo->pi_ref > 0) || is_page_dirty(pinfo) || pinfo->pi_io == 1) return -DER_AGAIN; + + /* The status of the page changed to non-evictable? */ + if (!pinfo->pi_evictable) + return -DER_AGAIN; } if (cache->ca_evtcb_fn) { @@ -3310,6 +3332,7 @@ cache_get_free_page(struct umem_cache *cache, struct umem_page_info **ret_pinfo, /* * Only allow map empty pages. It could yield when mapping an evictable page, * so when caller tries to map non-evictable page, the page_nr must be 1. + * If a page is already mapped, check and update the evictability status. */ static int cache_map_pages(struct umem_cache *cache, uint32_t *pages, int page_nr) @@ -3336,6 +3359,15 @@ cache_map_pages(struct umem_cache *cache, uint32_t *pages, int page_nr) cache_push_free_page(cache, free_pinfo); free_pinfo = NULL; } + if (is_id_evictable(cache, pg_id) != pinfo->pi_evictable) { + pinfo->pi_evictable = is_id_evictable(cache, pg_id); + D_ASSERT(!pinfo->pi_evictable || + (cache->ca_pgs_stats[UMEM_PG_STATS_NONEVICTABLE] > 0)); + cache->ca_pgs_stats[UMEM_PG_STATS_NONEVICTABLE] += + pinfo->pi_evictable ? (-1) : 1; + d_list_del_init(&pinfo->pi_lru_link); + cache_add2lru(cache, pinfo); + } continue; } @@ -3520,6 +3552,7 @@ umem_cache_post_replay(struct umem_store *store) if (!is_id_evictable(cache, pinfo[idx].pi_pg_id)) { d_list_del_init(&pinfo[idx].pi_lru_link); d_list_add_tail(&pinfo[idx].pi_lru_link, &cache->ca_pgs_lru[0]); + pinfo[idx].pi_evictable = 0; cnt++; } } diff --git a/src/control/cmd/daos/pool.go b/src/control/cmd/daos/pool.go index 0066f3959b7..831a775db1a 100644 --- a/src/control/cmd/daos/pool.go +++ b/src/control/cmd/daos/pool.go @@ -295,21 +295,37 @@ func convertPoolInfo(pinfo *C.daos_pool_info_t) (*daos.PoolInfo, error) { return poolInfo, nil } -func queryPoolRankLists(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { - var rlPtr **C.d_rank_list_t = nil - var rl *C.d_rank_list_t = nil - - if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) || queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) || - queryMask.HasOption(daos.PoolQueryOptionDeadEngines) { - rlPtr = &rl - } +func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { + var enabledRanks *C.d_rank_list_t + var disabledRanks *C.d_rank_list_t + defer func() { + C.d_rank_list_free(enabledRanks) + C.d_rank_list_free(disabledRanks) + }() + var rc C.int cPoolInfo := C.daos_pool_info_t{ pi_bits: C.uint64_t(queryMask), } + if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) && queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + enaQm := queryMask + enaQm.ClearOptions(daos.PoolQueryOptionDisabledEngines) + cPoolInfo.pi_bits = C.uint64_t(enaQm) + rc = C.daos_pool_query(poolHdl, &enabledRanks, &cPoolInfo, nil, nil) + if err := daosError(rc); err != nil { + return nil, err + } + + /* second query to just get disabled ranks */ + rc = C.daos_pool_query(poolHdl, &disabledRanks, nil, nil, nil) + } else if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) { + rc = C.daos_pool_query(poolHdl, &enabledRanks, &cPoolInfo, nil, nil) + } else if queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + rc = C.daos_pool_query(poolHdl, &disabledRanks, &cPoolInfo, nil, nil) + } else { + rc = C.daos_pool_query(poolHdl, nil, &cPoolInfo, nil, nil) + } - rc := C.daos_pool_query(poolHdl, rlPtr, &cPoolInfo, nil, nil) - defer C.d_rank_list_free(rl) if err := daosError(rc); err != nil { return nil, err } @@ -318,79 +334,18 @@ func queryPoolRankLists(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) ( if err != nil { return nil, err } + poolInfo.QueryMask = queryMask - if rlPtr != nil { - rs, err := rankSetFromC(rl) + if enabledRanks != nil { + poolInfo.EnabledRanks, err = rankSetFromC(enabledRanks) if err != nil { return nil, err } - if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) { - poolInfo.EnabledRanks = rs - } - if queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { - poolInfo.DisabledRanks = rs - } - if queryMask.HasOption(daos.PoolQueryOptionDeadEngines) { - poolInfo.DeadRanks = rs - } } - - return poolInfo, nil -} -func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { - poolInfo := &daos.PoolInfo{} - originalMask := queryMask // Save the original queryMask - - // Function to handle the query and return a single RankList - queryAndUpdate := func(option string) error { - // Clear previous options and set new option - queryMask.ClearAll() - queryMask.SetOptions(option) - - poolInfo1, err := queryPoolRankLists(poolHdl, queryMask) + if disabledRanks != nil { + poolInfo.DisabledRanks, err = rankSetFromC(disabledRanks) if err != nil { - return err - } - - switch option { - case daos.PoolQueryOptionEnabledEngines: - poolInfo.EnabledRanks = poolInfo1.EnabledRanks - case daos.PoolQueryOptionDisabledEngines: - poolInfo.DisabledRanks = poolInfo1.DisabledRanks - case daos.PoolQueryOptionDeadEngines: - poolInfo.DeadRanks = poolInfo1.DeadRanks - } - return nil - } - - // Preprocess queryMask, select one option for the first query - var firstOption string - if originalMask.HasOption(daos.PoolQueryOptionEnabledEngines) { - firstOption = daos.PoolQueryOptionEnabledEngines - } else if originalMask.HasOption(daos.PoolQueryOptionDisabledEngines) { - firstOption = daos.PoolQueryOptionDisabledEngines - } else if originalMask.HasOption(daos.PoolQueryOptionDeadEngines) { - firstOption = daos.PoolQueryOptionDeadEngines - } - - // Perform the first query to get basic information - if err := queryAndUpdate(firstOption); err != nil { - return nil, err - } - - // Check the original query mask and update fields as needed - queryOptions := []string{ - daos.PoolQueryOptionEnabledEngines, - daos.PoolQueryOptionDisabledEngines, - daos.PoolQueryOptionDeadEngines, - } - - // Process each option sequentially - for _, opt := range queryOptions { - if originalMask.HasOption(opt) && opt != firstOption { - if err := queryAndUpdate(opt); err != nil { - return nil, err - } + return nil, err } } diff --git a/src/control/cmd/daos_agent/support.go b/src/control/cmd/daos_agent/support.go index 2729c15c13f..feab1137f04 100644 --- a/src/control/cmd/daos_agent/support.go +++ b/src/control/cmd/daos_agent/support.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2022-2023 Intel Corporation. +// (C) Copyright 2022-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -90,7 +90,7 @@ func (cmd *collectLogCmd) Execute(_ []string) error { } } } - fmt.Printf(progress.Display()) + fmt.Print(progress.Display()) } if cmd.Archive { @@ -106,7 +106,7 @@ func (cmd *collectLogCmd) Execute(_ []string) error { } } - fmt.Printf(progress.Display()) + fmt.Print(progress.Display()) return nil } diff --git a/src/control/cmd/daos_server/support.go b/src/control/cmd/daos_server/support.go index 975155c635f..4e23a81dddc 100644 --- a/src/control/cmd/daos_server/support.go +++ b/src/control/cmd/daos_server/support.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2022-2023 Intel Corporation. +// (C) Copyright 2022-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -100,7 +100,7 @@ func (cmd *collectLogCmd) Execute(_ []string) error { } } } - fmt.Printf(progress.Display()) + fmt.Print(progress.Display()) } if cmd.Archive { @@ -116,7 +116,7 @@ func (cmd *collectLogCmd) Execute(_ []string) error { } } - fmt.Printf(progress.Display()) + fmt.Print(progress.Display()) return nil } diff --git a/src/control/cmd/dmg/fi.go b/src/control/cmd/dmg/fi.go index 222d33c9308..edd66415d62 100644 --- a/src/control/cmd/dmg/fi.go +++ b/src/control/cmd/dmg/fi.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2022 Intel Corporation. +// (C) Copyright 2019-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -11,8 +11,8 @@ package main import ( "context" "encoding/json" - "io/ioutil" "math/rand" + "os" "strings" "time" @@ -78,7 +78,7 @@ func (cmd *addCheckerReportCmd) Execute(_ []string) (errOut error) { var rpt *chkpb.CheckReport if cmd.File != "" { - buf, err := ioutil.ReadFile(cmd.File) + buf, err := os.ReadFile(cmd.File) if err != nil { return errors.Wrapf(err, "failed to open file %s", cmd.File) } diff --git a/src/control/cmd/dmg/support.go b/src/control/cmd/dmg/support.go index 8ba3648d1fa..fdd30691f97 100644 --- a/src/control/cmd/dmg/support.go +++ b/src/control/cmd/dmg/support.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2022-2023 Intel Corporation. +// (C) Copyright 2022-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -202,7 +202,7 @@ func (cmd *collectLogCmd) Execute(_ []string) error { } } } - fmt.Printf(progress.Display()) + fmt.Print(progress.Display()) } // Run dmg command info collection set @@ -224,13 +224,13 @@ func (cmd *collectLogCmd) Execute(_ []string) error { } } } - fmt.Printf(progress.Display()) + fmt.Print(progress.Display()) } params.FileTransferExecArgs = cmd.FileTransferExecArgs // R sync the logs from servers rsyncerr := cmd.rsyncLog() - fmt.Printf(progress.Display()) + fmt.Print(progress.Display()) if rsyncerr != nil && cmd.StopOnError { return rsyncerr } @@ -252,10 +252,10 @@ func (cmd *collectLogCmd) Execute(_ []string) error { return err } } - fmt.Printf(progress.Display()) + fmt.Print(progress.Display()) } - fmt.Printf(progress.Display()) + fmt.Print(progress.Display()) if cmd.JSONOutputEnabled() { return cmd.OutputJSON(nil, err) diff --git a/src/control/cmd/dmg/telemetry.go b/src/control/cmd/dmg/telemetry.go index ac03a47fdfe..5806e44cef4 100644 --- a/src/control/cmd/dmg/telemetry.go +++ b/src/control/cmd/dmg/telemetry.go @@ -12,7 +12,6 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "net/http" "os" "os/exec" @@ -61,7 +60,7 @@ func (cmd *telemConfigCmd) fetchAsset(repo, platform string) (*os.File, error) { return nil, err } - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) if err != nil { return nil, err } @@ -106,7 +105,7 @@ func (cmd *telemConfigCmd) fetchAsset(repo, platform string) (*os.File, error) { } defer resp.Body.Close() - outFile, err := ioutil.TempFile("", dlName) + outFile, err := os.CreateTemp("", dlName) if err != nil { return nil, err } @@ -219,7 +218,7 @@ type ( ) func (cmd *telemConfigCmd) loadPromCfg(cfgPath string) (*promCfg, error) { - data, err := ioutil.ReadFile(cfgPath) + data, err := os.ReadFile(cfgPath) if err != nil { return nil, err } @@ -290,7 +289,7 @@ func (cmd *telemConfigCmd) configurePrometheus() (*installInfo, error) { return nil, err } - if err := ioutil.WriteFile(promInfo.cfgPath, data, 0644); err != nil { + if err := os.WriteFile(promInfo.cfgPath, data, 0644); err != nil { return nil, errors.Wrapf(err, "failed to write %s", promInfo.cfgPath) } cmd.Infof("Wrote DAOS monitoring config to %s)", promInfo.cfgPath) diff --git a/src/control/common/file_utils.go b/src/control/common/file_utils.go index 8b5ebe81875..e0c6eb86d67 100644 --- a/src/control/common/file_utils.go +++ b/src/control/common/file_utils.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2023 Intel Corporation. +// (C) Copyright 2019-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -9,7 +9,6 @@ package common import ( "fmt" "io" - "io/ioutil" "os" "os/exec" "path" @@ -27,7 +26,7 @@ const UtilLogDepth = 4 // GetFilePaths return full file paths in given directory with // matching file extensions func GetFilePaths(dir string, ext string) ([]string, error) { - files, err := ioutil.ReadDir(dir) + files, err := os.ReadDir(dir) if err != nil { return nil, err } diff --git a/src/control/common/proto/logging.go b/src/control/common/proto/logging.go index 7a9677ed2d8..627f92cfc64 100644 --- a/src/control/common/proto/logging.go +++ b/src/control/common/proto/logging.go @@ -116,6 +116,22 @@ func Debug(msg proto.Message) string { } fmt.Fprintf(&bld, "meta-file-size: %s (%d)", humanize.Bytes(m.MemFileBytes), m.MemFileBytes) + case *mgmtpb.PoolQueryReq: + fmt.Fprintf(&bld, "%T id:%s qm:%s", m, m.Id, daos.PoolQueryMask(m.QueryMask)) + case *mgmtpb.PoolQueryResp: + fmt.Fprintf(&bld, "%T status:%s uuid:%s qm:%s map:%d tot(eng/tgts):%d/%d ver(p/u):%d/%d svc_ldr:%d ", + m, daos.Status(m.Status), m.Uuid, daos.PoolQueryMask(m.QueryMask), m.Version, + m.TotalEngines, m.TotalTargets, m.PoolLayoutVer, m.UpgradeLayoutVer, m.SvcLdr) + ranks := &ranklist.RankSet{} + for _, r := range m.SvcReps { + ranks.Add(ranklist.Rank(r)) + } + fmt.Fprintf(&bld, "svc_ranks:%s ", ranks.String()) + fmt.Fprintf(&bld, "ena_ranks:%s ", m.EnabledRanks) + fmt.Fprintf(&bld, "dis_ranks:%s ", m.DisabledRanks) + fmt.Fprintf(&bld, "dead_ranks:%s ", m.DeadRanks) + fmt.Fprintf(&bld, "rebuild:%+v ", m.Rebuild) + fmt.Fprintf(&bld, "tier_stats:%+v ", m.TierStats) case *mgmtpb.PoolEvictReq: fmt.Fprintf(&bld, "%T pool:%s", m, m.Id) if len(m.Handles) > 0 { diff --git a/src/control/common/test/utils.go b/src/control/common/test/utils.go index 2c7610a82d4..ee685bf568a 100644 --- a/src/control/common/test/utils.go +++ b/src/control/common/test/utils.go @@ -12,7 +12,6 @@ import ( "fmt" "io" "io/fs" - "io/ioutil" "net" "os" "path/filepath" @@ -250,7 +249,7 @@ func CreateTestDir(t *testing.T) (string, func()) { t.Helper() name := strings.Replace(t.Name(), "/", "-", -1) - tmpDir, err := ioutil.TempDir("", name) + tmpDir, err := os.MkdirTemp("", name) if err != nil { t.Fatalf("Couldn't create temporary directory: %v", err) } @@ -268,7 +267,7 @@ func CreateTestDir(t *testing.T) (string, func()) { func CreateTestFile(t *testing.T, dir, content string) string { t.Helper() - file, err := ioutil.TempFile(dir, "") + file, err := os.CreateTemp(dir, "") if err != nil { t.Fatal(err) } diff --git a/src/control/drpc/drpc_client_test.go b/src/control/drpc/drpc_client_test.go index 10bf5a527a6..42258ce2120 100644 --- a/src/control/drpc/drpc_client_test.go +++ b/src/control/drpc/drpc_client_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2023 Intel Corporation. +// (C) Copyright 2019-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -8,13 +8,13 @@ package drpc import ( "context" - "fmt" "net" "testing" - "github.com/daos-stack/daos/src/control/common/test" "github.com/pkg/errors" "google.golang.org/protobuf/proto" + + "github.com/daos-stack/daos/src/control/common/test" ) // testSockPath is an arbitrary path string to use for testing. These tests @@ -41,7 +41,7 @@ func (m *mockDialer) dial(ctx context.Context, socketPath string) (net.Conn, err } func (m *mockDialer) SetError(errStr string) { - m.OutputErr = fmt.Errorf(errStr) + m.OutputErr = errors.New(errStr) m.OutputConn = nil } diff --git a/src/control/lib/control/config.go b/src/control/lib/control/config.go index 37c61056e98..e4a5452b841 100644 --- a/src/control/lib/control/config.go +++ b/src/control/lib/control/config.go @@ -8,7 +8,6 @@ package control import ( "fmt" - "io/ioutil" "os" "path" @@ -82,7 +81,7 @@ func LoadConfig(cfgPath string) (*Config, error) { return nil, ErrNoConfigFile } - data, err := ioutil.ReadFile(cfgPath) + data, err := os.ReadFile(cfgPath) if err != nil { return nil, err } diff --git a/src/control/lib/control/config_test.go b/src/control/lib/control/config_test.go index c1d13dd99e5..1e6bdf8d81d 100644 --- a/src/control/lib/control/config_test.go +++ b/src/control/lib/control/config_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2020-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -8,7 +8,6 @@ package control import ( "fmt" - "io/ioutil" "os" "path" "testing" @@ -38,7 +37,7 @@ func saveConfig(t *testing.T, cfg *Config, cfgPath string) { t.Fatal(err) } - if err := ioutil.WriteFile(cfgPath, data, 0644); err != nil { + if err := os.WriteFile(cfgPath, data, 0644); err != nil { t.Fatal(err) } } diff --git a/src/control/lib/control/control.go b/src/control/lib/control/control.go index 6ec5bb1ae2e..77f6b18f91d 100644 --- a/src/control/lib/control/control.go +++ b/src/control/lib/control/control.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -7,14 +7,14 @@ package control import ( - "io/ioutil" + "io" "github.com/daos-stack/daos/src/control/logging" ) // defaultLogger is used to provide a valid logger when none has // been supplied. -var defaultLogger debugLogger = logging.NewCombinedLogger("", ioutil.Discard) +var defaultLogger debugLogger = logging.NewCombinedLogger("", io.Discard) type ( // debugLogger defines a debug-only logging interface. diff --git a/src/control/lib/control/http.go b/src/control/lib/control/http.go index 8a89cbaf61a..929719c90f5 100644 --- a/src/control/lib/control/http.go +++ b/src/control/lib/control/http.go @@ -11,7 +11,7 @@ import ( "crypto/tls" "crypto/x509" "fmt" - "io/ioutil" + "io" "net/http" "net/url" "time" @@ -202,7 +202,7 @@ func httpGetBody(ctx context.Context, url *url.URL, get httpGetFn, timeout time. return nil, errors.Errorf("HTTP response error: %d %s", resp.StatusCode, http.StatusText(resp.StatusCode)) } - result, err := ioutil.ReadAll(resp.Body) + result, err := io.ReadAll(resp.Body) if err != nil { return nil, errors.Wrap(err, "reading HTTP response body") } diff --git a/src/control/lib/control/server_meta_test.go b/src/control/lib/control/server_meta_test.go index 89fdbb34e9d..b80729451ea 100644 --- a/src/control/lib/control/server_meta_test.go +++ b/src/control/lib/control/server_meta_test.go @@ -408,7 +408,7 @@ func TestControl_SmdQuery(t *testing.T) { for i, gotDev := range sqr.HostStorage.SmdInfo.Devices { hs := tc.expResp.HostStorage expDev := hs[hs.Keys()[0]].HostStorage.SmdInfo.Devices[i] - t.Logf(cmp.Diff(expDev, gotDev, defResCmpOpts()...)) + t.Log(cmp.Diff(expDev, gotDev, defResCmpOpts()...)) } } t.Fatalf("unexpected resp (-want, +got):\n%s\n", diff) @@ -844,7 +844,7 @@ func TestControl_SmdManage(t *testing.T) { } for i, gotDev := range sqr.HostStorage.SmdInfo.Devices { expDev := hs[keys[0]].HostStorage.SmdInfo.Devices[i] - t.Logf(cmp.Diff(expDev, gotDev, defResCmpOpts()...)) + t.Log(cmp.Diff(expDev, gotDev, defResCmpOpts()...)) } } t.Fatalf("unexpected resp (-want, +got):\n%s\n", diff) diff --git a/src/control/lib/daos/pool.go b/src/control/lib/daos/pool.go index 3d26b7a2d4e..6555a8cf721 100644 --- a/src/control/lib/daos/pool.go +++ b/src/control/lib/daos/pool.go @@ -101,6 +101,9 @@ type ( MediaType StorageMediaType `json:"media_type"` } + // PoolQueryOption is used to supply pool query options. + PoolQueryOption string + // PoolQueryMask implements a bitmask for pool query options. PoolQueryMask C.uint64_t ) @@ -112,15 +115,15 @@ const ( HealthOnlyPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | C.DPI_SPACE)) // PoolQueryOptionSpace retrieves storage space usage as part of the pool query. - PoolQueryOptionSpace = "space" + PoolQueryOptionSpace PoolQueryOption = "space" // PoolQueryOptionRebuild retrieves pool rebuild status as part of the pool query. - PoolQueryOptionRebuild = "rebuild" + PoolQueryOptionRebuild PoolQueryOption = "rebuild" // PoolQueryOptionEnabledEngines retrieves enabled engines as part of the pool query. - PoolQueryOptionEnabledEngines = "enabled_engines" + PoolQueryOptionEnabledEngines PoolQueryOption = "enabled_engines" // PoolQueryOptionDisabledEngines retrieves disabled engines as part of the pool query. - PoolQueryOptionDisabledEngines = "disabled_engines" + PoolQueryOptionDisabledEngines PoolQueryOption = "disabled_engines" // PoolQueryOptionDeadEngines retrieves dead engines as part of the pool query. - PoolQueryOptionDeadEngines = "dead_engines" + PoolQueryOptionDeadEngines PoolQueryOption = "dead_engines" // PoolConnectFlagReadOnly indicates that the connection is read-only. PoolConnectFlagReadOnly = C.DAOS_PC_RO @@ -130,7 +133,11 @@ const ( PoolConnectFlagExclusive = C.DAOS_PC_EX ) -var poolQueryOptMap = map[C.int]string{ +func (pqo PoolQueryOption) String() string { + return string(pqo) +} + +var poolQueryOptMap = map[C.int]PoolQueryOption{ C.DPI_SPACE: PoolQueryOptionSpace, C.DPI_REBUILD_STATUS: PoolQueryOptionRebuild, C.DPI_ENGINES_ENABLED: PoolQueryOptionEnabledEngines, @@ -138,7 +145,7 @@ var poolQueryOptMap = map[C.int]string{ C.DPI_ENGINES_DEAD: PoolQueryOptionDeadEngines, } -func resolvePoolQueryOpt(name string) (C.int, error) { +func resolvePoolQueryOpt(name PoolQueryOption) (C.int, error) { for opt, optName := range poolQueryOptMap { if name == optName { return opt, nil @@ -149,7 +156,7 @@ func resolvePoolQueryOpt(name string) (C.int, error) { // MustNewPoolQueryMask returns a PoolQueryMask initialized with the specified options. // NB: If an error occurs due to an invalid option, it panics. -func MustNewPoolQueryMask(options ...string) (mask PoolQueryMask) { +func MustNewPoolQueryMask(options ...PoolQueryOption) (mask PoolQueryMask) { if err := mask.SetOptions(options...); err != nil { panic(err) } @@ -157,8 +164,8 @@ func MustNewPoolQueryMask(options ...string) (mask PoolQueryMask) { } // SetOptions sets the pool query mask to include the specified options. -func (pqm *PoolQueryMask) SetOptions(optNames ...string) error { - for _, optName := range optNames { +func (pqm *PoolQueryMask) SetOptions(options ...PoolQueryOption) error { + for _, optName := range options { if opt, err := resolvePoolQueryOpt(optName); err != nil { return err } else { @@ -169,8 +176,8 @@ func (pqm *PoolQueryMask) SetOptions(optNames ...string) error { } // ClearOptions clears the pool query mask of the specified options. -func (pqm *PoolQueryMask) ClearOptions(optNames ...string) error { - for _, optName := range optNames { +func (pqm *PoolQueryMask) ClearOptions(options ...PoolQueryOption) error { + for _, optName := range options { if opt, err := resolvePoolQueryOpt(optName); err != nil { return err } else { @@ -191,8 +198,8 @@ func (pqm *PoolQueryMask) ClearAll() { } // HasOption returns true if the pool query mask includes the specified option. -func (pqm PoolQueryMask) HasOption(optName string) bool { - return strings.Contains(pqm.String(), optName) +func (pqm PoolQueryMask) HasOption(option PoolQueryOption) bool { + return strings.Contains(pqm.String(), option.String()) } func (pqm PoolQueryMask) String() string { @@ -201,7 +208,7 @@ func (pqm PoolQueryMask) String() string { opt := C.int(1 << i) if flag, ok := poolQueryOptMap[opt]; ok { if pqm&PoolQueryMask(opt) != 0 { - flags = append(flags, flag) + flags = append(flags, flag.String()) } } } @@ -228,7 +235,7 @@ func (pqm *PoolQueryMask) UnmarshalJSON(data []byte) error { var newVal PoolQueryMask for _, opt := range strings.Split(strings.Trim(string(data), "\""), ",") { for k, v := range poolQueryOptMap { - if v == opt { + if v.String() == opt { newVal |= PoolQueryMask(k) goto next } diff --git a/src/control/lib/daos/pool_test.go b/src/control/lib/daos/pool_test.go index 8cf5f71312f..4cec1466844 100644 --- a/src/control/lib/daos/pool_test.go +++ b/src/control/lib/daos/pool_test.go @@ -114,8 +114,12 @@ func genTestMask(modifyFn func(pqm *PoolQueryMask)) PoolQueryMask { return testMask } -func genOptsStr(queryOpts ...string) string { - return strings.Join(queryOpts, ",") +func genOptsStr(queryOpts ...PoolQueryOption) string { + optStrs := make([]string, 0, len(queryOpts)) + for _, opt := range queryOpts { + optStrs = append(optStrs, opt.String()) + } + return strings.Join(optStrs, ",") } func TestDaos_PoolQueryMask(t *testing.T) { @@ -156,7 +160,7 @@ func TestDaos_PoolQueryMask(t *testing.T) { testMask: genTestMask(func(pqm *PoolQueryMask) { pqm.SetOptions(PoolQueryOptionSpace) }), - expString: PoolQueryOptionSpace, + expString: PoolQueryOptionSpace.String(), }, "set query space=false": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -176,7 +180,7 @@ func TestDaos_PoolQueryMask(t *testing.T) { testMask: genTestMask(func(pqm *PoolQueryMask) { pqm.SetOptions(PoolQueryOptionRebuild) }), - expString: PoolQueryOptionRebuild, + expString: PoolQueryOptionRebuild.String(), }, "set query rebuild=false": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -189,7 +193,7 @@ func TestDaos_PoolQueryMask(t *testing.T) { testMask: genTestMask(func(pqm *PoolQueryMask) { pqm.SetOptions(PoolQueryOptionEnabledEngines) }), - expString: PoolQueryOptionEnabledEngines, + expString: PoolQueryOptionEnabledEngines.String(), }, "set query enabled_engines=false": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -202,7 +206,7 @@ func TestDaos_PoolQueryMask(t *testing.T) { testMask: genTestMask(func(pqm *PoolQueryMask) { pqm.SetOptions(PoolQueryOptionDisabledEngines) }), - expString: PoolQueryOptionDisabledEngines, + expString: PoolQueryOptionDisabledEngines.String(), }, "set query disabled_engines=false": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -220,6 +224,33 @@ func TestDaos_PoolQueryMask(t *testing.T) { } } +func TestDaos_PoolQueryMaskHasOption(t *testing.T) { + for name, tc := range map[string]struct { + testMask PoolQueryMask + testOpt PoolQueryOption + expHasOpt bool + }{ + "empty shouldn't match anything": { + testOpt: PoolQueryOptionSpace, + expHasOpt: false, + }, + "health-only shouldn't match space": { + testMask: HealthOnlyPoolQueryMask, + testOpt: PoolQueryOptionSpace, + expHasOpt: false, + }, + "default should match space": { + testMask: DefaultPoolQueryMask, + testOpt: PoolQueryOptionSpace, + expHasOpt: true, + }, + } { + t.Run(name, func(t *testing.T) { + test.AssertEqual(t, tc.expHasOpt, tc.testMask.HasOption(tc.testOpt), "unexpected HasOption result") + }) + } +} + func TestDaos_PoolQueryMaskMarshalJSON(t *testing.T) { // NB: The MarshalJSON implementation uses the stringer, so // there's no point in testing all of the options here. diff --git a/src/control/lib/hardware/sysfs/provider.go b/src/control/lib/hardware/sysfs/provider.go index 301845d40cc..0a253188344 100644 --- a/src/control/lib/hardware/sysfs/provider.go +++ b/src/control/lib/hardware/sysfs/provider.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2021-2023 Intel Corporation. +// (C) Copyright 2021-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -9,7 +9,6 @@ package sysfs import ( "context" "io" - "io/ioutil" "os" "path/filepath" "strconv" @@ -45,7 +44,7 @@ func isNetvscDevice(path string, subsystem string) bool { return false } - content, err := ioutil.ReadFile(filepath.Join(path, "device", "uevent")) + content, err := os.ReadFile(filepath.Join(path, "device", "uevent")) if err != nil { return false } @@ -87,7 +86,7 @@ func (s *Provider) GetNetDevClass(dev string) (hardware.NetDevClass, error) { return 0, errors.New("device name required") } - devClass, err := ioutil.ReadFile(s.sysPath("class", "net", dev, "type")) + devClass, err := os.ReadFile(s.sysPath("class", "net", dev, "type")) if err != nil { return 0, err } @@ -189,7 +188,7 @@ func (s *Provider) addPCIDevice(topo *hardware.Topology, subsystem string, path func (s *Provider) getNetworkDevice(path, subsystem string) (*hardware.PCIDevice, error) { // Network devices will have the device/net subdirectory structure - netDev, err := ioutil.ReadDir(filepath.Join(path, "device", "net")) + netDev, err := os.ReadDir(filepath.Join(path, "device", "net")) if err != nil { return nil, errors.Wrapf(err, "failed to read net device") } @@ -215,7 +214,7 @@ func (s *Provider) getNetworkDevice(path, subsystem string) (*hardware.PCIDevice func (s *Provider) getNUMANode(path string) (uint, error) { numaPath := filepath.Join(path, "device", "numa_node") - numaBytes, err := ioutil.ReadFile(numaPath) + numaBytes, err := os.ReadFile(numaPath) if err != nil { return 0, err } @@ -277,7 +276,7 @@ func (s *Provider) addVirtualNetDevices(topo *hardware.Topology) error { addedDevices := topo.AllDevices() netPath := s.sysPath("devices", "virtual", "net") - netIfaces, err := ioutil.ReadDir(netPath) + netIfaces, err := os.ReadDir(netPath) if err != nil { s.log.Tracef("unable to read any virtual net interfaces: %s", err.Error()) return nil @@ -325,7 +324,7 @@ func (s *Provider) getBackingDevice(ifacePath string, devices map[string]hardwar } } - ifaceFiles, err := ioutil.ReadDir(ifacePath) + ifaceFiles, err := os.ReadDir(ifacePath) if err != nil { s.log.Tracef("unable to read contents of %s", ifacePath) return nil, err @@ -355,7 +354,7 @@ func (s *Provider) getBackingDevice(ifacePath string, devices map[string]hardwar } func (s *Provider) getParentDevName(iface string) (string, error) { - parentBytes, err := ioutil.ReadFile(s.sysPath("class", "net", iface, "parent")) + parentBytes, err := os.ReadFile(s.sysPath("class", "net", iface, "parent")) if err != nil { return "", err } @@ -381,7 +380,7 @@ func (s *Provider) GetFabricInterfaces(ctx context.Context, provider string) (*h } func (s *Provider) getCXIFabricInterfaces() ([]*hardware.FabricInterface, error) { - cxiDevs, err := ioutil.ReadDir(s.sysPath("class", "cxi")) + cxiDevs, err := os.ReadDir(s.sysPath("class", "cxi")) if os.IsNotExist(err) { s.log.Tracef("no cxi subsystem in sysfs") return []*hardware.FabricInterface{}, nil @@ -446,7 +445,7 @@ func (s *Provider) getLoopbackDevState(iface string) (hardware.NetDevState, erro func (s *Provider) getNetOperState(iface string) (hardware.NetDevState, error) { statePath := s.sysPath("class", "net", iface, "operstate") - stateBytes, err := ioutil.ReadFile(statePath) + stateBytes, err := os.ReadFile(statePath) if err != nil { return hardware.NetDevStateUnknown, errors.Wrapf(err, "failed to get %q operational state", iface) } @@ -491,7 +490,7 @@ func (s *Provider) getInfinibandDevState(iface string) (hardware.NetDevState, er // The best way to determine that an Infiniband interface is ready is to check the state // of its ports. Ports in the "ACTIVE" state are either fully ready or will be very soon. ibPath := s.sysPath("class", "net", iface, "device", "infiniband") - ibDevs, err := ioutil.ReadDir(ibPath) + ibDevs, err := os.ReadDir(ibPath) if err != nil { return hardware.NetDevStateUnknown, errors.Wrapf(err, "can't access Infiniband details for %q", iface) } @@ -499,7 +498,7 @@ func (s *Provider) getInfinibandDevState(iface string) (hardware.NetDevState, er ibDevState := make([]hardware.NetDevState, 0) for _, dev := range ibDevs { portPath := filepath.Join(ibPath, dev.Name(), "ports") - ports, err := ioutil.ReadDir(portPath) + ports, err := os.ReadDir(portPath) if err != nil { return hardware.NetDevStateUnknown, errors.Wrapf(err, "unable to get ports for %s/%s", iface, dev.Name()) } @@ -507,7 +506,7 @@ func (s *Provider) getInfinibandDevState(iface string) (hardware.NetDevState, er portState := make([]hardware.NetDevState, 0) for _, port := range ports { statePath := filepath.Join(portPath, port.Name(), "state") - stateBytes, err := ioutil.ReadFile(statePath) + stateBytes, err := os.ReadFile(statePath) if err != nil { return hardware.NetDevStateUnknown, errors.Wrapf(err, "unable to get state for %s/%s port %s", iface, dev.Name(), port.Name()) @@ -595,7 +594,7 @@ func (s *Provider) IsIOMMUEnabled() (bool, error) { // Simple test for now -- if the path exists and contains // DMAR entries, we assume that's good enough. - dmars, err := ioutil.ReadDir(s.sysPath("class", "iommu")) + dmars, err := os.ReadDir(s.sysPath("class", "iommu")) if err != nil && !os.IsNotExist(err) { return false, err } diff --git a/src/control/lib/hardware/sysfs/provider_test.go b/src/control/lib/hardware/sysfs/provider_test.go index 1a6b539ac87..6375654d815 100644 --- a/src/control/lib/hardware/sysfs/provider_test.go +++ b/src/control/lib/hardware/sysfs/provider_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2021-2023 Intel Corporation. +// (C) Copyright 2021-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -9,7 +9,6 @@ package sysfs import ( "errors" "fmt" - "io/ioutil" "os" "path" "path/filepath" @@ -46,7 +45,7 @@ func TestSysfs_isNetvscDevice(t *testing.T) { } filePath := path.Join(dirPath, "uevent") - if err := ioutil.WriteFile(filePath, []byte(content), 0644); err != nil { + if err := os.WriteFile(filePath, []byte(content), 0644); err != nil { t.Fatal(err) } } @@ -181,7 +180,7 @@ func TestSysfs_Provider_GetNetDevClass(t *testing.T) { func writeTestFile(t *testing.T, path, contents string) { t.Helper() - if err := ioutil.WriteFile(path, []byte(contents), 0644); err != nil { + if err := os.WriteFile(path, []byte(contents), 0644); err != nil { t.Fatal(err) } } @@ -279,7 +278,7 @@ func setupNetvscDev(t *testing.T, root, devName, backingDevName string) { } filePath := path.Join(dirPath, "uevent") - if err := ioutil.WriteFile(filePath, []byte("DRIVER=hv_netvsc"), 0644); err != nil { + if err := os.WriteFile(filePath, []byte("DRIVER=hv_netvsc"), 0644); err != nil { t.Fatal(err) } } @@ -775,7 +774,7 @@ func TestSysfs_Provider_GetNetDevState(t *testing.T) { } statePath := filepath.Join(portPath, "state") - if err := ioutil.WriteFile(statePath, []byte(state), 0644); err != nil { + if err := os.WriteFile(statePath, []byte(state), 0644); err != nil { t.Fatal(err) } } diff --git a/src/control/lib/support/log.go b/src/control/lib/support/log.go index f6446159d75..721fc18fb50 100644 --- a/src/control/lib/support/log.go +++ b/src/control/lib/support/log.go @@ -12,7 +12,6 @@ import ( "context" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -318,7 +317,7 @@ func cpOutputToFile(target string, log logging.Logger, cp ...logCopy) (string, e cmd = strings.ReplaceAll(cmd, " ", "_") log.Debugf("Collecting DAOS command output = %s > %s ", runCmd, filepath.Join(target, cmd)) - if err := ioutil.WriteFile(filepath.Join(target, cmd), out, 0644); err != nil { + if err := os.WriteFile(filepath.Join(target, cmd), out, 0644); err != nil { return "", errors.Wrapf(err, "failed to write %s", filepath.Join(target, cmd)) } @@ -611,7 +610,7 @@ func collectAgentLog(log logging.Logger, opts ...CollectLogsParams) error { return err } - agentFile, err := ioutil.ReadFile(opts[0].Config) + agentFile, err := os.ReadFile(opts[0].Config) if err != nil { return err } diff --git a/src/control/lib/support/log_test.go b/src/control/lib/support/log_test.go index 12bb4214236..db7a5c048eb 100644 --- a/src/control/lib/support/log_test.go +++ b/src/control/lib/support/log_test.go @@ -8,7 +8,6 @@ package support import ( "fmt" - "io/ioutil" "os" "path/filepath" "reflect" @@ -662,7 +661,7 @@ func TestSupport_copyServerConfig(t *testing.T) { if tc.createFile { data := []byte("hello\nDAOS\n") if err := os.WriteFile(defaultSeverConfig, data, 0644); err != nil { - t.Fatalf(err.Error()) + t.Fatal(err.Error()) } } collLogParams.TargetFolder = tc.targetFolder @@ -672,7 +671,7 @@ func TestSupport_copyServerConfig(t *testing.T) { if tc.createFile { if err := os.Remove(defaultSeverConfig); err != nil { - t.Fatalf(err.Error()) + t.Fatal(err.Error()) } } }) @@ -1194,9 +1193,9 @@ INFO 2023/12/12 23:59:59.441241 LOG LINE 12 if tc.verifyLog != "" { readFile := filepath.Join(tc.destFile, filepath.Base(tc.srcFile)) - b, err := ioutil.ReadFile(readFile) + b, err := os.ReadFile(readFile) if err != nil { - t.Fatalf(err.Error()) + t.Fatal(err.Error()) } if strings.Contains(string(b), tc.verifyLog) == false { diff --git a/src/control/lib/telemetry/promexp/client_test.go b/src/control/lib/telemetry/promexp/client_test.go index d533b9c7931..5c5a0541adb 100644 --- a/src/control/lib/telemetry/promexp/client_test.go +++ b/src/control/lib/telemetry/promexp/client_test.go @@ -17,6 +17,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/lib/telemetry" "github.com/daos-stack/daos/src/control/logging" ) @@ -25,6 +26,8 @@ func TestPromExp_extractClientLabels(t *testing.T) { jobID := "testJob" pid := "12345" tid := "67890" + poolUUID := test.MockPoolUUID(1) + contUUID := test.MockPoolUUID(2) testPath := func(suffix string) string { return fmt.Sprintf("ID: %d/%s/%s/%s/%s", shmID, jobID, pid, tid, suffix) @@ -74,12 +77,32 @@ func TestPromExp_extractClientLabels(t *testing.T) { }, }, "pool ops": { - input: fmt.Sprintf("ID: %d/%s/%s/pool/%s/ops/foo", shmID, jobID, pid, test.MockPoolUUID(1)), + input: fmt.Sprintf("ID: %d/%s/%s/pool/%s/ops/foo", shmID, jobID, pid, poolUUID), expName: "pool_ops_foo", expLabels: labelMap{ "jobid": jobID, "pid": pid, - "pool": test.MockPoolUUID(1).String(), + "pool": poolUUID.String(), + }, + }, + "dfs ops": { + input: fmt.Sprintf("ID: %d/%s/%s/pool/%s/container/%s/dfs/ops/CHMOD", shmID, jobID, pid, poolUUID, contUUID), + expName: "dfs_ops_chmod", + expLabels: labelMap{ + "jobid": jobID, + "pid": pid, + "pool": poolUUID.String(), + "container": contUUID.String(), + }, + }, + "dfs read bytes": { + input: fmt.Sprintf("ID: %d/%s/%s/pool/%s/container/%s/dfs/read_bytes", shmID, jobID, pid, poolUUID, contUUID), + expName: "dfs_read_bytes", + expLabels: labelMap{ + "jobid": jobID, + "pid": pid, + "pool": poolUUID.String(), + "container": contUUID.String(), }, }, } { @@ -136,6 +159,7 @@ func TestPromExp_NewClientCollector(t *testing.T) { if err != nil { t.Fatal(err) } + defer telemetry.Fini() result, err := NewClientCollector(ctx, log, cs, tc.opts) test.CmpErr(t, tc.expErr, err) diff --git a/src/control/lib/telemetry/promexp/engine.go b/src/control/lib/telemetry/promexp/engine.go index bb0481f12a9..8c61528381c 100644 --- a/src/control/lib/telemetry/promexp/engine.go +++ b/src/control/lib/telemetry/promexp/engine.go @@ -148,6 +148,13 @@ func extractLabels(log logging.Logger, in string) (labels labelMap, name string) compsIdx++ name += "_ops_" + comps[compsIdx] compsIdx++ + case "container": + compsIdx++ + labels["container"] = comps[compsIdx] + compsIdx++ + if comps[compsIdx] == "dfs" { + name = "" // dfs metrics shouldn't go under pool + } } case "io": name = "io" @@ -204,7 +211,7 @@ func extractLabels(log logging.Logger, in string) (labels labelMap, name string) } } - name = sanitizeMetricName(name) + name = strings.ToLower(sanitizeMetricName(name)) return } diff --git a/src/control/lib/telemetry/telemetry.go b/src/control/lib/telemetry/telemetry.go index 479c41e2aab..2e5f763c9cb 100644 --- a/src/control/lib/telemetry/telemetry.go +++ b/src/control/lib/telemetry/telemetry.go @@ -612,6 +612,29 @@ func CollectMetrics(ctx context.Context, s *Schema, out chan<- Metric) error { return nil } +type pruneMap map[string]struct{} + +func (pm pruneMap) add(path string) { + pm[path] = struct{}{} +} + +func (pm pruneMap) removeParents(path string) { + for parent := range pm { + if strings.HasPrefix(path, parent) { + delete(pm, parent) + } + } +} + +func (pm pruneMap) toPrune() []string { + var paths []string + for path := range pm { + paths = append(paths, path) + } + sort.Sort(sort.Reverse(sort.StringSlice(paths))) + return paths +} + // PruneUnusedSegments removes shared memory segments associated with // unused ephemeral subdirectories. func PruneUnusedSegments(ctx context.Context, maxSegAge time.Duration) error { @@ -628,7 +651,7 @@ func PruneUnusedSegments(ctx context.Context, maxSegAge time.Duration) error { return errors.New("invalid handle") } - var toPrune []string + pruneCandidates := make(pruneMap) procNode := func(hdl *handle, id string, node *C.struct_d_tm_node_t) { if node == nil || node.dtn_type != C.D_TM_DIRECTORY { return @@ -652,21 +675,22 @@ func PruneUnusedSegments(ctx context.Context, maxSegAge time.Duration) error { // If the creator process was someone other than us, and it's still // around, don't mess with the segment. if _, err := common.GetProcName(st.Cpid()); err == nil && st.Cpid() != unix.Getpid() { + pruneCandidates.removeParents(path) return } if time.Since(st.Ctime()) <= maxSegAge { + pruneCandidates.removeParents(path) return } - log.Tracef("adding %s to prune list", path) - toPrune = append(toPrune, path) + log.Tracef("adding %s to prune candidates list", path) + pruneCandidates.add(path) } visit(hdl, hdl.root, "", true, procNode) - sort.Sort(sort.Reverse(sort.StringSlice(toPrune))) - for _, path := range toPrune { + for _, path := range pruneCandidates.toPrune() { log.Tracef("pruning %s", path) if err := removeLink(hdl, path); err != nil { log.Errorf("failed to prune %s: %s", path, err) diff --git a/src/control/lib/telemetry/telemetry_test.go b/src/control/lib/telemetry/telemetry_test.go index bc63cc81399..84abed5daf7 100644 --- a/src/control/lib/telemetry/telemetry_test.go +++ b/src/control/lib/telemetry/telemetry_test.go @@ -16,6 +16,7 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" "github.com/pkg/errors" "github.com/daos-stack/daos/src/control/common/test" @@ -461,3 +462,56 @@ func TestTelemetry_garbageCollection(t *testing.T) { }) } } + +func TestTelemetry_pruneMap(t *testing.T) { + type inputPath struct { + path string + shouldPrune bool + } + for name, tc := range map[string]struct { + inputPaths []inputPath + expToPrune []string + }{ + "empty": {}, + "no shared parents": { + inputPaths: []inputPath{ + {"/a", true}, + {"/b", true}, + {"/c", true}, + }, + expToPrune: []string{"/c", "/b", "/a"}, + }, + "deeply nested should not be pruned": { + inputPaths: []inputPath{ + {"/a", true}, + {"/a/b", true}, + {"/a/b/c", false}, + }, + expToPrune: nil, + }, + "deeply nested should be pruned": { + inputPaths: []inputPath{ + {"/a", true}, + {"/a/b", true}, + {"/a/b/c", true}, + }, + expToPrune: []string{"/a/b/c", "/a/b", "/a"}, + }, + } { + t.Run(name, func(t *testing.T) { + pm := make(pruneMap) + + for _, ip := range tc.inputPaths { + if ip.shouldPrune { + pm.add(ip.path) + } else { + pm.removeParents(ip.path) + } + } + + if diff := cmp.Diff(tc.expToPrune, pm.toPrune()); diff != "" { + t.Fatalf("unexpected toPrune list (-want, +got): %s", diff) + } + }) + } +} diff --git a/src/control/pbin/app.go b/src/control/pbin/app.go index 5aa32a41d29..89a8e936413 100644 --- a/src/control/pbin/app.go +++ b/src/control/pbin/app.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2021 Intel Corporation. +// (C) Copyright 2020-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -10,7 +10,6 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "os" "strings" @@ -69,7 +68,7 @@ func (a *App) WithLogFile(path string) *App { // is empty, non-error messages are not logged. func (a *App) configureLogging(logPath string) { logLevel := logging.LogLevelError - combinedOut := ioutil.Discard + combinedOut := io.Discard if logPath != "" { lf, err := common.AppendFile(logPath) if err == nil { diff --git a/src/control/pbin/exec_test.go b/src/control/pbin/exec_test.go index 4fc1e4dc1ab..6ce665cdb06 100644 --- a/src/control/pbin/exec_test.go +++ b/src/control/pbin/exec_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2022 Intel Corporation. +// (C) Copyright 2019-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -10,7 +10,6 @@ import ( "bytes" "encoding/json" "fmt" - "io/ioutil" "os" "testing" @@ -88,7 +87,7 @@ func generatePayload(size int) []byte { } func loadJSONPayload(t *testing.T, name string) []byte { - loadBuf, err := ioutil.ReadFile(fmt.Sprintf("testdata/%s.json", name)) + loadBuf, err := os.ReadFile(fmt.Sprintf("testdata/%s.json", name)) if err != nil { t.Fatal(err) } diff --git a/src/control/security/config_test.go b/src/control/security/config_test.go index 4e33f3769b8..2407c50b726 100644 --- a/src/control/security/config_test.go +++ b/src/control/security/config_test.go @@ -12,7 +12,6 @@ import ( "crypto/x509" "encoding/pem" "fmt" - "io/ioutil" "os" "path/filepath" "syscall" @@ -81,7 +80,7 @@ func SetupTCFilePerms(t *testing.T, conf *TransportConfig) { } func getCert(t *testing.T, path string) *x509.Certificate { - buf, err := ioutil.ReadFile(path) + buf, err := os.ReadFile(path) if err != nil { t.Fatal(err) } diff --git a/src/control/security/pem.go b/src/control/security/pem.go index 8404c9a4e85..707c771c592 100644 --- a/src/control/security/pem.go +++ b/src/control/security/pem.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2023 Intel Corporation. +// (C) Copyright 2019-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -13,7 +13,6 @@ import ( "crypto/x509" "encoding/pem" "fmt" - "io/ioutil" "os" "strings" @@ -129,7 +128,7 @@ func LoadPEMData(filePath string, perms os.FileMode) ([]byte, error) { return nil, err } - fileContents, err := ioutil.ReadFile(filePath) + fileContents, err := os.ReadFile(filePath) if err != nil { return nil, err } diff --git a/src/control/security/signature_test.go b/src/control/security/signature_test.go index ac1cfc1b871..601c37432fb 100644 --- a/src/control/security/signature_test.go +++ b/src/control/security/signature_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2023 Intel Corporation. +// (C) Copyright 2019-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -14,7 +14,6 @@ import ( crand "crypto/rand" "encoding/hex" "flag" - "io/ioutil" mrand "math/rand" "os" "path/filepath" @@ -44,7 +43,7 @@ func SignTestSetup(t *testing.T) (rsaKey, ecdsaKey crypto.PrivateKey, source []b if err != nil { t.Fatal("Failed to generate ecdsa key for testing") } - source, err = ioutil.ReadFile("testdata/certs/source.txt") + source, err = os.ReadFile("testdata/certs/source.txt") if err != nil { t.Fatal("Failed to read in source file for Sign test.") } @@ -73,12 +72,12 @@ func TestSign(t *testing.T) { result = []byte(err.Error()) } if *update { - err := ioutil.WriteFile(golden, result, 0644) + err := os.WriteFile(golden, result, 0644) if err != nil { t.Errorf("failed to update golden file %s", golden) } } - expected, err := ioutil.ReadFile(golden) + expected, err := os.ReadFile(golden) if err != nil { t.Errorf("unable to read golden file %s", golden) } @@ -105,7 +104,7 @@ func VerifyTestSetup(t *testing.T) (rsaKey, ecdsaKey crypto.PublicKey, source [] t.Fatal("Failed to generate ecdsa key for testing") } ecdsaKey = gen.Public() - source, err = ioutil.ReadFile("testdata/certs/source.txt") + source, err = os.ReadFile("testdata/certs/source.txt") if err != nil { t.Fatal("Failed to read in source file for Sign test.") } @@ -128,7 +127,7 @@ func TestVerify(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { golden := filepath.Join("testdata", "certs", tc.name+".golden") - expected, err := ioutil.ReadFile(golden) + expected, err := os.ReadFile(golden) if err != nil { t.Errorf("unable to read golden file %s", golden) } diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index f27308ed740..63eb9814426 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -8,7 +8,6 @@ package config import ( "fmt" - "io/ioutil" "math" "net" "os" @@ -357,7 +356,7 @@ func (cfg *Server) Load(log logging.Logger) error { return FaultConfigNoPath } - bytes, err := ioutil.ReadFile(cfg.Path) + bytes, err := os.ReadFile(cfg.Path) if err != nil { return errors.WithMessage(err, "reading file") } @@ -403,7 +402,7 @@ func (cfg *Server) SaveToFile(filename string) error { return err } - return ioutil.WriteFile(filename, bytes, 0644) + return os.WriteFile(filename, bytes, 0644) } // SetPath sets the default path to the configuration file. diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index 25661e143f8..b3b03dffcb1 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -11,7 +11,6 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "math" "os" "path/filepath" @@ -184,7 +183,7 @@ func TestServerConfig_MarshalUnmarshal(t *testing.T) { t.Fatal(err) } - bytes, err := ioutil.ReadFile(testFile) + bytes, err := os.ReadFile(testFile) if err != nil { t.Fatal(errors.WithMessage(err, "reading file")) } @@ -1437,7 +1436,7 @@ func replaceFile(t *testing.T, name, oldTxt, newTxt string) { defer f.Close() // create temp file - tmp, err := ioutil.TempFile("", "replace-*") + tmp, err := os.CreateTemp("", "replace-*") if err != nil { t.Fatal(err) } diff --git a/src/control/server/ctl_ranks_rpc.go b/src/control/server/ctl_ranks_rpc.go index 7006eec4647..ba227742d76 100644 --- a/src/control/server/ctl_ranks_rpc.go +++ b/src/control/server/ctl_ranks_rpc.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2023 Intel Corporation. +// (C) Copyright 2020-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -140,7 +140,7 @@ func (svc *ControlService) memberStateResults(instances []Engine, tgtState syste state := ei.LocalState() if state != tgtState { - results = append(results, system.NewMemberResult(rank, errors.Errorf(failMsg), + results = append(results, system.NewMemberResult(rank, errors.New(failMsg), system.MemberStateErrored)) continue } diff --git a/src/control/server/storage/bdev/backend_class_test.go b/src/control/server/storage/bdev/backend_class_test.go index 0eec7e38980..7d198e33183 100644 --- a/src/control/server/storage/bdev/backend_class_test.go +++ b/src/control/server/storage/bdev/backend_class_test.go @@ -7,7 +7,6 @@ package bdev import ( - "io/ioutil" "os" "path/filepath" "strings" @@ -742,7 +741,7 @@ func TestBackend_writeJSONFile(t *testing.T) { return } - gotOut, err := ioutil.ReadFile(cfgOutputPath) + gotOut, err := os.ReadFile(cfgOutputPath) if err != nil { t.Fatal(err) } diff --git a/src/control/server/storage/bdev/backend_vmd.go b/src/control/server/storage/bdev/backend_vmd.go index ed7d7c39d4f..48aecb83c96 100644 --- a/src/control/server/storage/bdev/backend_vmd.go +++ b/src/control/server/storage/bdev/backend_vmd.go @@ -132,7 +132,7 @@ func substituteVMDAddresses(log logging.Logger, inPCIAddrs *hardware.PCIAddressS dl, err := substVMDAddrs(inPCIAddrs, ctrlrs) if err != nil { - return nil, errors.Wrapf(err, msg) + return nil, errors.Wrap(err, msg) } log.Debugf("%s: new %s", msg, dl) diff --git a/src/control/system/raft/database_test.go b/src/control/system/raft/database_test.go index 9035db01aaf..0f196d5bbdf 100644 --- a/src/control/system/raft/database_test.go +++ b/src/control/system/raft/database_test.go @@ -11,7 +11,6 @@ import ( "context" "fmt" "io" - "io/ioutil" "math/rand" "net" "sort" @@ -228,7 +227,7 @@ func (tss *testSnapshotSink) Write(data []byte) (int, error) { return int(w), err } func (tss *testSnapshotSink) Reader() io.ReadCloser { - return ioutil.NopCloser(tss.contents) + return io.NopCloser(tss.contents) } func TestSystem_Database_SnapshotRestore(t *testing.T) { diff --git a/src/control/system/raft/raft_recovery.go b/src/control/system/raft/raft_recovery.go index 29aaeca6def..0647f43c077 100644 --- a/src/control/system/raft/raft_recovery.go +++ b/src/control/system/raft/raft_recovery.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2022 Intel Corporation. +// (C) Copyright 2022-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -10,7 +10,6 @@ import ( "bytes" "encoding/json" "io" - "io/ioutil" "os" "path/filepath" "sort" @@ -146,7 +145,7 @@ func RestoreLocalReplica(log logging.Logger, cfg *DatabaseConfig, snapPath strin } if strings.HasPrefix(snapPath, cfg.RaftDir) { - tmpDir, err := ioutil.TempDir("", "daos-raft-restore") + tmpDir, err := os.MkdirTemp("", "daos-raft-restore") if err != nil { return errors.Wrap(err, "failed to create temporary directory") } @@ -455,7 +454,7 @@ func GetLatestSnapshot(log logging.Logger, cfg *DatabaseConfig) (*SnapshotDetail func readSnapshotMeta(path string, meta *raft.SnapshotMeta) error { metaPath := filepath.Join(path, snapshotMetaFile) - data, err := ioutil.ReadFile(metaPath) + data, err := os.ReadFile(metaPath) if err != nil { return errors.Wrapf(err, "failed to read snapshot metadata from %q", metaPath) } @@ -465,7 +464,7 @@ func readSnapshotMeta(path string, meta *raft.SnapshotMeta) error { func readSnapshotData(path string) ([]byte, error) { dataPath := filepath.Join(path, snapshotDataFile) - data, err := ioutil.ReadFile(dataPath) + data, err := os.ReadFile(dataPath) if err != nil { return nil, errors.Wrapf(err, "failed to read snapshot data from %q", dataPath) } diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index 33b5e410dca..3e285c8dcdc 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -804,6 +804,12 @@ destroy_shmem_with_key(key_t key) return 0; } +static bool +is_initialized(void) +{ + return tm_shmem.ctx != NULL && tm_shmem.ctx->shmem_root != NULL; +} + /** * Initialize an instance of the telemetry and metrics API for the producer * process with the root set to the provided name. @@ -833,6 +839,9 @@ d_tm_init_with_name(int id, uint64_t mem_size, int flags, const char *root_name) int shmid = 0; int rc = DER_SUCCESS; + if (is_initialized()) + return -DER_ALREADY; + if (root_name == NULL || strnlen(root_name, D_TM_MAX_NAME_LEN) == 0) { D_ERROR("root name cannot be empty\n"); return -DER_INVAL; @@ -2253,13 +2262,6 @@ d_tm_find_metric(struct d_tm_context *ctx, char *path) return node; } -static bool -is_initialized(void) -{ - return tm_shmem.ctx != NULL && - tm_shmem.ctx->shmem_root != NULL; -} - /* * Get a pointer to the last token in the path without modifying the original * string. diff --git a/src/include/daos/mem.h b/src/include/daos/mem.h index 5a7c685749f..47260c0b5d2 100644 --- a/src/include/daos/mem.h +++ b/src/include/daos/mem.h @@ -1196,6 +1196,18 @@ umem_get_mb_from_offset(struct umem_instance *umm, umem_off_t off); umem_off_t umem_get_mb_base_offset(struct umem_instance *umm, uint32_t mb_id); +/** + * Force GC within the heap to optimize umem_cache usage with DAV + * v2 allocator. + * + * \param[in] umm umem instance pointer. + * + * \return 0, success + * < 0, error + */ +int +umem_heap_gc(struct umem_instance *umm); + /*********************************************************************************/ /* Type of memory actions */ diff --git a/src/include/daos/pool.h b/src/include/daos/pool.h index 07e861bb54a..4345fd2e9d7 100644 --- a/src/include/daos/pool.h +++ b/src/include/daos/pool.h @@ -158,6 +158,8 @@ void dc_pool_put(struct dc_pool *pool); int dc_pool_local2global(daos_handle_t poh, d_iov_t *glob); int dc_pool_global2local(d_iov_t glob, daos_handle_t *poh); +int + dc_pool_hdl2uuid(daos_handle_t poh, uuid_t *hdl_uuid, uuid_t *pool_uuid); int dc_pool_connect(tse_task_t *task); int dc_pool_disconnect(tse_task_t *task); int dc_pool_query(tse_task_t *task); diff --git a/src/pool/cli.c b/src/pool/cli.c index 94e26e47f5d..694ebc96f4f 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -1574,6 +1574,23 @@ dc_pool_global2local(d_iov_t glob, daos_handle_t *poh) return rc; } +int +dc_pool_hdl2uuid(daos_handle_t poh, uuid_t *hdl_uuid, uuid_t *uuid) +{ + struct dc_pool *dp; + + dp = dc_hdl2pool(poh); + if (dp == NULL) + return -DER_NO_HDL; + + if (hdl_uuid != NULL) + uuid_copy(*hdl_uuid, dp->dp_pool_hdl); + if (uuid != NULL) + uuid_copy(*uuid, dp->dp_pool); + dc_pool_put(dp); + return 0; +} + struct pool_update_state { struct rsvc_client client; struct dc_mgmt_sys *sys; diff --git a/src/tests/ftest/telemetry/basic_client_telemetry.py b/src/tests/ftest/telemetry/basic_client_telemetry.py index 692da134db4..5d0236dbb7a 100644 --- a/src/tests/ftest/telemetry/basic_client_telemetry.py +++ b/src/tests/ftest/telemetry/basic_client_telemetry.py @@ -46,9 +46,7 @@ def test_client_metrics_exist(self): self.log_step('Reading client telemetry (reads & writes should be > 0)') after_metrics = self.telemetry.collect_client_data(metric_names) for metric in metric_names: - msum = 0 - for value in after_metrics[metric].values(): - msum += value - self.assertGreater(msum, 0) + msum = sum(after_metrics[metric].values()) + self.assertGreater(msum, 0, f'{metric} value not greater than zero after I/O') self.log_step('Test passed') diff --git a/src/tests/ftest/telemetry/dfs_client_telemetry.py b/src/tests/ftest/telemetry/dfs_client_telemetry.py new file mode 100644 index 00000000000..2c234bb9341 --- /dev/null +++ b/src/tests/ftest/telemetry/dfs_client_telemetry.py @@ -0,0 +1,62 @@ +""" + (C) Copyright 2024 Intel Corporation. + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +from ior_utils import read_data, write_data +from telemetry_test_base import TestWithClientTelemetry + + +class DFSClientTelemetry(TestWithClientTelemetry): + """Tests to verify DFS telemetry. + + :avocado: recursive + """ + + def test_dfs_metrics(self): + """JIRA ID: DAOS-16837. + + Verify that the DFS metrics are incrementing as expected. + After performing some I/O, the DFS-level metrics should look reasonable. + + Test steps: + 1) Create a pool and container + 2) Perform some I/O with IOR + 3) Verify that the DFS metrics are sane + + :avocado: tags=all,daily_regression + :avocado: tags=vm + :avocado: tags=dfs,telemetry + :avocado: tags=DFSClientTelemetry,test_dfs_metrics + """ + # create pool and container + pool = self.get_pool(connect=True) + container = self.get_container(pool=pool) + + self.log_step('Writing data to the pool (ior)') + ior = write_data(self, container) + self.log_step('Reading data from the pool (ior)') + read_data(self, ior, container) + + # after an IOR run, we'd expect this set of metrics to have values > 0 + val_metric_names = [ + 'client_dfs_ops_create', + 'client_dfs_ops_open', + 'client_dfs_ops_read', + 'client_dfs_ops_write' + ] + bkt_metric_names = [ + 'client_dfs_read_bytes', + 'client_dfs_write_bytes' + ] + + self.log_step('Reading dfs telemetry') + after_metrics = self.telemetry.collect_client_data(val_metric_names + bkt_metric_names) + for metric in val_metric_names: + msum = sum(after_metrics[metric].values()) + self.assertGreater(msum, 0, f'{metric} value not greater than zero after I/O') + for metric in bkt_metric_names: + msum = sum(hist['sample_sum'] for hist in after_metrics[metric].values()) + self.assertGreater(msum, 0, f'{metric} sample_sum not greater than zero after I/O') + + self.log_step('Test passed') diff --git a/src/tests/ftest/telemetry/dfs_client_telemetry.yaml b/src/tests/ftest/telemetry/dfs_client_telemetry.yaml new file mode 100644 index 00000000000..e0dd33d1f87 --- /dev/null +++ b/src/tests/ftest/telemetry/dfs_client_telemetry.yaml @@ -0,0 +1,45 @@ +hosts: + test_servers: 1 + test_clients: 1 + +timeout: 180 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 4 + nr_xs_helpers: 0 + storage: + 0: + class: ram + scm_mount: /mnt/daos + system_ram_reserved: 1 + +agent_config: + telemetry_port: 9191 + telemetry_retain: 30s + telemetry_enabled: true + +pool: + scm_size: 2G + +container: + type: POSIX + dfs_oclass: SX + +ior: &ior_base + ppn: 4 + api: DFS + transfer_size: 512K + block_size: 1M + dfs_oclass: SX + +ior_write: + <<: *ior_base + flags: "-k -v -w -W -G 1" + +ior_read: + <<: *ior_base + flags: "-v -r -R -G 1" diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index 1b8e0220017..d13a359ee8b 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -1012,6 +1012,34 @@ class ClientTelemetryUtils(TelemetryUtils): CLIENT_IO_OPS_TGT_PUNCH_LATENCY_METRICS +\ CLIENT_IO_OPS_TGT_UPDATE_ACTIVE_METRICS +\ CLIENT_IO_OPS_UPDATE_ACTIVE_METRICS + CLIENT_DFS_OPS_METRICS = [ + "client_dfs_ops_chmod", + "client_dfs_ops_chown", + "client_dfs_ops_create", + "client_dfs_ops_getsize", + "client_dfs_ops_getxattr", + "client_dfs_ops_lsxattr", + "client_dfs_ops_mkdir", + "client_dfs_ops_open", + "client_dfs_ops_opendir", + "client_dfs_ops_read", + "client_dfs_ops_readdir", + "client_dfs_ops_readlink", + "client_dfs_ops_rename", + "client_dfs_ops_rmxattr", + "client_dfs_ops_setattr", + "client_dfs_ops_setxattr", + "client_dfs_ops_stat", + "client_dfs_ops_symlink", + "client_dfs_ops_sync", + "client_dfs_ops_truncate", + "client_dfs_ops_unlink", + "client_dfs_ops_write"] + CLIENT_DFS_IO_METRICS = [ + "client_dfs_read_bytes", + "client_dfs_write_bytes"] + CLIENT_DFS_METRICS = CLIENT_DFS_OPS_METRICS +\ + CLIENT_DFS_IO_METRICS def __init__(self, dmg, servers, clients): """Create a ClientTelemetryUtils object. @@ -1025,11 +1053,12 @@ def __init__(self, dmg, servers, clients): super().__init__(dmg, servers) self.clients = NodeSet.fromlist(clients) - def get_all_client_metrics_names(self, with_pools=False): + def get_all_client_metrics_names(self, with_pools=False, with_dfs=False): """Get all the telemetry metrics names for this client. Args: with_pools (bool): if True, include pool metrics in the results + with_dfs (bool): if True, include DFS metrics in the results Returns: list: all of the telemetry metrics names for this client @@ -1039,6 +1068,8 @@ def get_all_client_metrics_names(self, with_pools=False): all_metrics_names.extend(self.CLIENT_IO_METRICS) if with_pools: all_metrics_names.extend(self.CLIENT_POOL_METRICS) + if with_dfs: + all_metrics_names.extend(self.CLIENT_DFS_METRICS) return all_metrics_names @@ -1219,13 +1250,21 @@ def _get_data(self, names, info): if name not in data: data[name] = {} for metric in metrics[name]['metrics']: - if 'labels' not in metric or 'value' not in metric: + if 'labels' not in metric or \ + ('value' not in metric and 'buckets' not in metric): continue labels = [f'host:{host}'] for key, value in metric['labels'].items(): labels.append(":".join([str(key), str(value)])) label_key = ",".join(sorted(labels)) - data[name][label_key] = metric['value'] + if 'value' in metric: + data[name][label_key] = metric['value'] + else: + data[name][label_key] = { + 'sample_count': metric['sample_count'], + 'sample_sum': metric['sample_sum'], + 'buckets': metric['buckets'], + } return data def _set_display(self, compare=None): diff --git a/src/vos/tests/vts_wal.c b/src/vos/tests/vts_wal.c index 7506c7ffa79..83303ad213d 100644 --- a/src/vos/tests/vts_wal.c +++ b/src/vos/tests/vts_wal.c @@ -642,6 +642,7 @@ setup_mb_io(void **state) { int rc; + d_setenv("DAOS_NEMB_EMPTY_RECYCLE_THRESHOLD", "4", true); memset(&test_args, 0, sizeof(test_args)); rc = vts_ctx_init_ex(&test_args.ctx, MDTEST_VOS_SIZE, MDTEST_META_BLOB_SIZE); *state = (void *)&test_args; @@ -654,6 +655,7 @@ teardown_mb_io(void **state) struct io_test_args *args = (struct io_test_args *)*state; vts_ctx_fini(&args->ctx); + d_unsetenv("DAOS_NEMB_EMPTY_RECYCLE_THRESHOLD"); return 0; } @@ -1365,7 +1367,7 @@ alloc_bucket_to_full(struct umem_instance *umm, struct bucket_alloc_info *ainfo, void (*chkpt_fn)(void *arg), void *arg) { umem_off_t umoff, prev_umoff; - size_t alloc_size = 512; + size_t alloc_size = 2048; umem_off_t *ptr; struct umem_cache_range rg = {0}; struct umem_pin_handle *p_hdl; @@ -1383,13 +1385,13 @@ alloc_bucket_to_full(struct umem_instance *umm, struct bucket_alloc_info *ainfo, if (UMOFF_IS_NULL(ainfo->start_umoff)) { umem_tx_begin(umm, NULL); ainfo->start_umoff = umem_alloc_from_bucket(umm, alloc_size, id); - umem_tx_commit(umm); assert_false(UMOFF_IS_NULL(ainfo->start_umoff)); ainfo->num_allocs++; assert_true(umem_get_mb_from_offset(umm, ainfo->start_umoff) == id); prev_umoff = ainfo->start_umoff; ptr = (umem_off_t *)umem_off2ptr(umm, prev_umoff); *ptr = UMOFF_NULL; + umem_tx_commit(umm); } else prev_umoff = ainfo->start_umoff; @@ -1748,6 +1750,124 @@ wal_mb_nemb_pct(void **state) assert_true(i == (MDTEST_META_BLOB_SIZE - maxsz) / MDTEST_MB_SIZE); } +static void +nemb_unused(void **state) +{ + struct io_test_args *arg = *state; + struct vos_container *cont; + struct umem_instance *umm; + int i, j, rc; + struct bucket_alloc_info ainfo[MDTEST_MB_CNT + 1]; + daos_size_t maxsz, nemb_full_size, nemb_init_size, cur_allocated; + uint32_t id, found; + + cont = vos_hdl2cont(arg->ctx.tc_co_hdl); + umm = vos_cont2umm(cont); + + rc = umempobj_get_mbusage(umm->umm_pool, 0, &nemb_init_size, &maxsz); + assert_true(rc == 0); + print_message("phase0: nemb space utilization is %lu max is %lu\n", nemb_init_size, maxsz); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + ainfo[0].mb_id = 0; + ainfo[0].num_allocs = 0; + ainfo[0].start_umoff = UMOFF_NULL; + ainfo[0].alloc_size = 512 * 1024; + alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &nemb_full_size, &maxsz); + assert_true(rc == 0); + print_message("phase1: nemb space utilization is %lu max is %lu\n", nemb_full_size, maxsz); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + + free_bucket_by_pct(umm, &ainfo[0], 100, checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated, &maxsz); + assert_true(rc == 0); + print_message("phase2: nemb space utilization is %lu max is %lu\n", cur_allocated, maxsz); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + assert_true(nemb_init_size == cur_allocated); + + umem_heap_gc(umm); + + for (i = 1; i <= MDTEST_MAX_EMB_CNT; i++) { + /* Create an MB and fill it with allocs */ + ainfo[i].mb_id = umem_allot_mb_evictable(umm, 0); + for (j = 1; j < i; j++) + assert_false(ainfo[i].mb_id == ainfo[j].mb_id); + ainfo[i].num_allocs = 0; + ainfo[i].start_umoff = UMOFF_NULL; + ainfo[i].alloc_size = 512 * 1024; + assert_true(ainfo[i].mb_id != 0); + alloc_bucket_to_full(umm, &ainfo[i], checkpoint_fn, &arg->ctx.tc_po_hdl); + } + /* Make sure that we can only create MDTEST_MAX_EMB_CNT evictable MBs */ + id = umem_allot_mb_evictable(umm, 0); + for (j = 1; j <= MDTEST_MAX_EMB_CNT; j++) { + if (id == ainfo[j].mb_id) + break; + } + assert_true(j <= MDTEST_MAX_EMB_CNT); + found = 0; + for (j = 1; j <= MDTEST_MAX_EMB_CNT; j++) + if (umem_cache_offisloaded(&umm->umm_pool->up_store, ainfo[j].start_umoff)) + found++; + print_message("phase3: Found %d evictable MBs loaded\n", found); + D_ASSERT(found > (MDTEST_MB_VOS_CNT - MDTEST_MAX_NEMB_CNT)); + + for (i = 1; i <= MDTEST_MAX_EMB_CNT; i++) + free_bucket_by_pct(umm, &ainfo[i], 100, checkpoint_fn, &arg->ctx.tc_po_hdl); + + alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated, &maxsz); + assert_true(rc == 0); + print_message("phase4: nemb space utilization is %lu max is %lu\n", cur_allocated, maxsz); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + assert_true(nemb_full_size == cur_allocated); + + free_bucket_by_pct(umm, &ainfo[0], 100, checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated, &maxsz); + assert_true(rc == 0); + print_message("phase5: nemb space utilization is %lu max is %lu\n", cur_allocated, maxsz); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + assert_true(nemb_init_size == cur_allocated); + + wal_pool_refill(arg); + cont = vos_hdl2cont(arg->ctx.tc_co_hdl); + umm = vos_cont2umm(cont); + + /* Force GC */ + umem_heap_gc(umm); + umem_heap_gc(umm); + + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated, &maxsz); + assert_true(rc == 0); + print_message("phase6: nemb space utilization is %lu max is %lu\n", cur_allocated, maxsz); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + assert_true(nemb_init_size == cur_allocated); + + for (i = 1; i <= MDTEST_MAX_EMB_CNT; i++) + alloc_bucket_to_full(umm, &ainfo[i], checkpoint_fn, &arg->ctx.tc_po_hdl); + + /* Make sure that we can only create MDTEST_MAX_EMB_CNT evictable MBs */ + id = umem_allot_mb_evictable(umm, 0); + for (j = 1; j <= MDTEST_MAX_EMB_CNT; j++) { + if (id == ainfo[j].mb_id) + break; + } + assert_true(j <= MDTEST_MAX_EMB_CNT); + found = 0; + for (j = 1; j <= MDTEST_MAX_EMB_CNT; j++) + if (umem_cache_offisloaded(&umm->umm_pool->up_store, ainfo[j].start_umoff)) + found++; + + print_message("phase7: Found %d evictable MBs loaded\n", found); + D_ASSERT(found > (MDTEST_MB_VOS_CNT - MDTEST_MAX_NEMB_CNT)); + + alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated, &maxsz); + assert_true(rc == 0); + print_message("phase8: nemb space utilization is %lu max is %lu\n", cur_allocated, maxsz); + assert_true(nemb_full_size == cur_allocated); +} + static int umoff_in_freelist(umem_off_t *free_list, int cnt, umem_off_t umoff, bool clear) { @@ -2514,6 +2634,7 @@ static const struct CMUnitTest wal_MB_tests[] = { {"WAL38: P2 basic", p2_basic_test, setup_mb_io, teardown_mb_io}, {"WAL39: P2 fill evictable buckets", p2_fill_test, setup_mb_io, teardown_mb_io}, {"WAL40: nemb pct test", wal_mb_nemb_pct, setup_mb_io_nembpct, teardown_mb_io_nembpct}, + {"WAL41: nemb unused test", nemb_unused, setup_mb_io, teardown_mb_io}, }; int diff --git a/src/vos/vos_aggregate.c b/src/vos/vos_aggregate.c index 00f0f6409f4..63f5603b0e5 100644 --- a/src/vos/vos_aggregate.c +++ b/src/vos/vos_aggregate.c @@ -2765,6 +2765,7 @@ vos_aggregate(daos_handle_t coh, daos_epoch_range_t *epr, if (vam && vam->vam_fail_count) d_tm_inc_counter(vam->vam_fail_count, 1); } + umem_heap_gc(&cont->vc_pool->vp_umm); return rc; } diff --git a/src/vos/vos_gc.c b/src/vos/vos_gc.c index 5d5383ed766..eb1e2ac995b 100644 --- a/src/vos/vos_gc.c +++ b/src/vos/vos_gc.c @@ -1464,6 +1464,7 @@ gc_reclaim_pool_p2(struct vos_pool *pool, int *credits, bool *empty_ret) *credits = creds; gc_update_stats(pool); + umem_heap_gc(vos_pool2umm(pool)); return rc; }