From e122343ffa92e20f9e83a877cc05b38e6b426610 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Wed, 8 May 2024 10:01:22 -0600 Subject: [PATCH] Better control the thread pool size when mounting datasets Ever since a10d50f999, ZFS has mounted file systems in parallel when importing a pool. It uses a fixed size of 512 for the thread pool. But since c183d164aa1, it has also imported pools in parallel. So the total number of threads at one time is 513 * npools + 1. That can easily exceed the system's limit on the number of threads per process, which will cause one or more pools to be unable to allocate any worker threads, forcing them to fallback to slow serial mounting . To forestall that, manage the threadpool size in /sbin/zpool, not libzfs. Use the same size (512), but divided by the number of pools. This is a backwards-incompatible change to the libzfs abi. Sponsored by: Axcient Signed-off-by: Alan Somers --- cmd/zed/agents/zfs_mod.c | 2 +- cmd/zfs/zfs_main.c | 6 ++++-- cmd/zpool/zpool_main.c | 20 +++++++++++++++----- include/libzfs.h | 5 +++-- lib/libzfs/libzfs_mount.c | 25 +++++++++++++------------ 5 files changed, 36 insertions(+), 22 deletions(-) diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index 69163b80bd5a..d0372608c723 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -702,7 +702,7 @@ zfs_enable_ds(void *arg) { unavailpool_t *pool = (unavailpool_t *)arg; - (void) zpool_enable_datasets(pool->uap_zhp, NULL, 0); + (void) zpool_enable_datasets(pool->uap_zhp, NULL, 0, 512); zpool_close(pool->uap_zhp); free(pool); } diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 0bbdd5b18eda..75c0e40b610c 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -7192,6 +7192,8 @@ share_mount(int op, int argc, char **argv) int c, ret = 0; char *options = NULL; int flags = 0; + const uint_t mount_nthr = 512; + uint_t nthr; /* check options */ while ((c = getopt(argc, argv, op == OP_MOUNT ? ":aRlvo:Of" : "al")) @@ -7310,9 +7312,9 @@ share_mount(int op, int argc, char **argv) * be serialized so that we can prompt the user for their keys * in a consistent manner. */ + nthr = op == OP_MOUNT && !(flags & MS_CRYPT) ? mount_nthr : 1; zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used, - share_mount_one_cb, &share_mount_state, - op == OP_MOUNT && !(flags & MS_CRYPT)); + share_mount_one_cb, &share_mount_state, nthr); zfs_commit_shares(NULL); ret = share_mount_state.sm_status; diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 300b383af4f6..5bfaa8eec214 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -80,6 +80,8 @@ libzfs_handle_t *g_zfs; +static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */ + static int zpool_do_create(int, char **); static int zpool_do_destroy(int, char **); @@ -3342,7 +3344,7 @@ zfs_force_import_required(nvlist_t *config) */ static int do_import(nvlist_t *config, const char *newname, const char *mntopts, - nvlist_t *props, int flags) + nvlist_t *props, int flags, uint_t mntthreads) { int ret = 0; int ms_status = 0; @@ -3442,7 +3444,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && !(flags & ZFS_IMPORT_ONLY)) { - ms_status = zpool_enable_datasets(zhp, mntopts, 0); + ms_status = zpool_enable_datasets(zhp, mntopts, 0, mntthreads); if (ms_status == EZFS_SHAREFAILED) { (void) fprintf(stderr, gettext("Import was " "successful, but unable to share some datasets\n")); @@ -3461,6 +3463,7 @@ typedef struct import_parameters { const char *ip_mntopts; nvlist_t *ip_props; int ip_flags; + uint_t ip_mntthreads; int *ip_err; } import_parameters_t; @@ -3469,7 +3472,7 @@ do_import_task(void *arg) { import_parameters_t *ip = arg; *ip->ip_err |= do_import(ip->ip_config, NULL, ip->ip_mntopts, - ip->ip_props, ip->ip_flags); + ip->ip_props, ip->ip_flags, ip->ip_mntthreads); free(ip); } @@ -3483,6 +3486,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, uint64_t pool_state; boolean_t pool_specified = (import->poolname != NULL || import->guid != 0); + uint_t npools = 0; tpool_t *tp = NULL; @@ -3500,6 +3504,10 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, int err = 0; nvpair_t *elem = NULL; boolean_t first = B_TRUE; + if (!pool_specified && import->do_all) { + while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) + npools++; + } while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { verify(nvpair_value_nvlist(elem, &config) == 0); @@ -3530,6 +3538,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, ip->ip_mntopts = mntopts; ip->ip_props = props; ip->ip_flags = flags; + ip->ip_mntthreads = mount_tp_nthr / npools; ip->ip_err = &err; (void) tpool_dispatch(tp, do_import_task, @@ -3597,7 +3606,7 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, err = B_TRUE; } else { err |= do_import(found_config, new_name, - mntopts, props, flags); + mntopts, props, flags, mount_tp_nthr); } } @@ -7141,7 +7150,8 @@ zpool_do_split(int argc, char **argv) } if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) { - ms_status = zpool_enable_datasets(zhp, mntopts, 0); + ms_status = zpool_enable_datasets(zhp, mntopts, 0, + mount_tp_nthr); if (ms_status == EZFS_SHAREFAILED) { (void) fprintf(stderr, gettext("Split was successful, " "datasets are mounted but sharing of some datasets " diff --git a/include/libzfs.h b/include/libzfs.h index 2823b8845827..7836c2325f4e 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -716,7 +716,7 @@ typedef struct get_all_cb { } get_all_cb_t; _LIBZFS_H void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, - size_t, zfs_iter_f, void *, boolean_t); + size_t, zfs_iter_f, void *, uint_t); _LIBZFS_H void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); /* @@ -1004,7 +1004,8 @@ _LIBZFS_H int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, * Enable and disable datasets within a pool by mounting/unmounting and * sharing/unsharing them. */ -_LIBZFS_H int zpool_enable_datasets(zpool_handle_t *, const char *, int); +_LIBZFS_H int zpool_enable_datasets(zpool_handle_t *, const char *, int, + uint_t); _LIBZFS_H int zpool_disable_datasets(zpool_handle_t *, boolean_t); _LIBZFS_H void zpool_disable_datasets_os(zpool_handle_t *, boolean_t); _LIBZFS_H void zpool_disable_volume_os(const char *); diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index 3084e05e4d4d..42988bf9cbb2 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -83,8 +83,6 @@ #include #define MAXISALEN 257 /* based on sysinfo(2) man page */ -static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */ - static void zfs_mount_task(void *); static const proto_table_t proto_table[SA_PROTOCOL_COUNT] = { @@ -1205,19 +1203,20 @@ zfs_mount_task(void *arg) * * Callbacks are issued in one of two ways: * - * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT + * 1. Sequentially: If the nthr argument is <= 1 or the ZFS_SERIAL_MOUNT * environment variable is set, then we issue callbacks sequentially. * - * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT + * 2. In parallel: If the nthr argument is > 1 and the ZFS_SERIAL_MOUNT * environment variable is not set, then we use a tpool to dispatch threads * to mount filesystems in parallel. This function dispatches tasks to mount * the filesystems at the top-level mountpoints, and these tasks in turn * are responsible for recursively mounting filesystems in their children - * mountpoints. + * mountpoints. The value of the nthr argument will be the number of worker + * threads for the thread pool. */ void zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, - size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel) + size_t num_handles, zfs_iter_f func, void *data, uint_t nthr) { zoneid_t zoneid = getzoneid(); @@ -1226,7 +1225,7 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, * variable that can be used as a convenience to do a/b comparison * of serial vs. parallel mounting. */ - boolean_t serial_mount = !parallel || + boolean_t serial_mount = nthr <= 1 || (getenv("ZFS_SERIAL_MOUNT") != NULL); /* @@ -1246,7 +1245,7 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, * Issue the callback function for each dataset using a parallel * algorithm that uses a thread pool to manage threads. */ - tpool_t *tp = tpool_create(1, mount_tp_nthr, 0, NULL); + tpool_t *tp = tpool_create(1, nthr, 0, NULL); /* * There may be multiple "top level" mountpoints outside of the pool's @@ -1273,10 +1272,12 @@ zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, /* * Mount and share all datasets within the given pool. This assumes that no - * datasets within the pool are currently mounted. + * datasets within the pool are currently mounted. nthr will be number of + * worker threads to use while mounting datasets. */ int -zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) +zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags, + uint_t nthr) { get_all_cb_t cb = { 0 }; mount_state_t ms = { 0 }; @@ -1302,7 +1303,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) ms.ms_mntopts = mntopts; ms.ms_mntflags = flags; zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, - zfs_mount_one, &ms, B_TRUE); + zfs_mount_one, &ms, nthr); if (ms.ms_mntstatus != 0) ret = EZFS_MOUNTFAILED; @@ -1313,7 +1314,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) */ ms.ms_mntstatus = 0; zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, - zfs_share_one, &ms, B_FALSE); + zfs_share_one, &ms, 1); if (ms.ms_mntstatus != 0) ret = EZFS_SHAREFAILED; else