Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce number of metaslab preload taskq threads. #15319

Merged
merged 1 commit into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion include/sys/metaslab_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ struct metaslab_group {
int64_t mg_activation_count;
metaslab_class_t *mg_class;
vdev_t *mg_vd;
taskq_t *mg_taskq;
metaslab_group_t *mg_prev;
metaslab_group_t *mg_next;

Expand Down
4 changes: 2 additions & 2 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,9 @@ struct spa {

hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */
taskq_t *spa_metaslab_taskq; /* Taskq for metaslab preload */
taskq_t *spa_prefetch_taskq; /* Taskq for prefetch threads */
taskq_t *spa_upgrade_taskq; /* Taskq for upgrade jobs */
uint64_t spa_multihost; /* multihost aware (mmp) */
mmp_thread_t spa_mmp; /* multihost mmp thread */
list_t spa_leaf_list; /* list of leaf vdevs */
Expand All @@ -448,8 +450,6 @@ struct spa {
*/
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
zfs_refcount_t spa_refcount; /* number of opens */

taskq_t *spa_upgrade_taskq; /* taskq for upgrade jobs */
};

extern char *spa_config_path;
Expand Down
6 changes: 6 additions & 0 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,12 @@ Practical upper limit of total metaslabs per top-level vdev.
.It Sy metaslab_preload_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Enable metaslab group preloading.
.
.It Sy metaslab_preload_limit Ns = Ns Sy 10 Pq uint
Maximum number of metaslabs per group to preload
.
.It Sy metaslab_preload_pct Ns = Ns Sy 50 Pq uint
Percentage of CPUs to run a metaslab preload taskq
.
.It Sy metaslab_lba_weighting_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Give more weight to metaslabs with lower LBAs,
assuming they have greater bandwidth,
Expand Down
22 changes: 0 additions & 22 deletions module/os/freebsd/zfs/sysctl_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -614,28 +614,6 @@ SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct,
" space map to continue allocations in a first-fit fashion");
/* END CSTYLED */

/*
* Percentage of all cpus that can be used by the metaslab taskq.
*/
extern int metaslab_load_pct;

/* BEGIN CSTYLED */
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct,
CTLFLAG_RWTUN, &metaslab_load_pct, 0,
"Percentage of cpus that can be used by the metaslab taskq");
/* END CSTYLED */

/*
* Max number of metaslabs per group to preload.
*/
extern uint_t metaslab_preload_limit;

/* BEGIN CSTYLED */
SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, preload_limit,
CTLFLAG_RWTUN, &metaslab_preload_limit, 0,
"Max number of metaslabs per group to preload");
/* END CSTYLED */

/* mmp.c */

int
Expand Down
23 changes: 8 additions & 15 deletions module/zfs/metaslab.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,6 @@ static const uint32_t metaslab_min_search_count = 100;
*/
static int metaslab_df_use_largest_segment = B_FALSE;

/*
* Percentage of all cpus that can be used by the metaslab taskq.
*/
int metaslab_load_pct = 50;

/*
* These tunables control how long a metaslab will remain loaded after the
* last allocation from it. A metaslab can't be unloaded until at least
Expand Down Expand Up @@ -854,9 +849,6 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd, int allocators)
zfs_refcount_create_tracked(&mga->mga_alloc_queue_depth);
}

mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
maxclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC);

return (mg);
}

Expand All @@ -872,7 +864,6 @@ metaslab_group_destroy(metaslab_group_t *mg)
*/
ASSERT(mg->mg_activation_count <= 0);

taskq_destroy(mg->mg_taskq);
avl_destroy(&mg->mg_metaslab_tree);
mutex_destroy(&mg->mg_lock);
mutex_destroy(&mg->mg_ms_disabled_lock);
Expand Down Expand Up @@ -963,7 +954,7 @@ metaslab_group_passivate(metaslab_group_t *mg)
* allocations from taking place and any changes to the vdev tree.
*/
spa_config_exit(spa, locks & ~(SCL_ZIO - 1), spa);
taskq_wait_outstanding(mg->mg_taskq, 0);
taskq_wait_outstanding(spa->spa_metaslab_taskq, 0);
spa_config_enter(spa, locks & ~(SCL_ZIO - 1), spa, RW_WRITER);
metaslab_group_alloc_update(mg);
for (int i = 0; i < mg->mg_allocators; i++) {
Expand Down Expand Up @@ -3562,10 +3553,8 @@ metaslab_group_preload(metaslab_group_t *mg)
avl_tree_t *t = &mg->mg_metaslab_tree;
int m = 0;

if (spa_shutting_down(spa) || !metaslab_preload_enabled) {
taskq_wait_outstanding(mg->mg_taskq, 0);
if (spa_shutting_down(spa) || !metaslab_preload_enabled)
return;
}

mutex_enter(&mg->mg_lock);

Expand All @@ -3585,8 +3574,9 @@ metaslab_group_preload(metaslab_group_t *mg)
continue;
}

VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload,
msp, TQ_SLEEP) != TASKQID_INVALID);
VERIFY(taskq_dispatch(spa->spa_metaslab_taskq, metaslab_preload,
msp, TQ_SLEEP | (m <= mg->mg_allocators ? TQ_FRONT : 0))
!= TASKQID_INVALID);
}
mutex_exit(&mg->mg_lock);
}
Expand Down Expand Up @@ -6215,6 +6205,9 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, debug_unload, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_enabled, INT, ZMOD_RW,
"Preload potential metaslabs during reassessment");

ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_limit, UINT, ZMOD_RW,
"Max number of metaslabs per group to preload");

ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, unload_delay, UINT, ZMOD_RW,
"Delay in txgs after metaslab was last used before unloading");

Expand Down
28 changes: 21 additions & 7 deletions module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ static int spa_load_impl(spa_t *spa, spa_import_type_t type,
const char **ereport);
static void spa_vdev_resilver_done(spa_t *spa);

/*
* Percentage of all CPUs that can be used by the metaslab preload taskq.
*/
static uint_t metaslab_preload_pct = 50;

static uint_t zio_taskq_batch_pct = 80; /* 1 thread per cpu in pset */
static uint_t zio_taskq_batch_tpq; /* threads per taskq */
static const boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
Expand Down Expand Up @@ -1399,6 +1404,13 @@ spa_activate(spa_t *spa, spa_mode_t mode)
spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri,
1, INT_MAX, 0);

/*
* The taskq to preload metaslabs.
*/
spa->spa_metaslab_taskq = taskq_create("z_metaslab",
metaslab_preload_pct, maxclsyspri, 1, INT_MAX,
TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);

/*
* Taskq dedicated to prefetcher threads: this is used to prevent the
* pool traverse code from monopolizing the global (and limited)
Expand Down Expand Up @@ -1434,6 +1446,11 @@ spa_deactivate(spa_t *spa)
spa->spa_zvol_taskq = NULL;
}

if (spa->spa_metaslab_taskq) {
taskq_destroy(spa->spa_metaslab_taskq);
spa->spa_metaslab_taskq = NULL;
}

if (spa->spa_prefetch_taskq) {
taskq_destroy(spa->spa_prefetch_taskq);
spa->spa_prefetch_taskq = NULL;
Expand Down Expand Up @@ -1706,13 +1723,7 @@ spa_unload(spa_t *spa)
* This ensures that there is no async metaslab prefetching
* while we attempt to unload the spa.
*/
if (spa->spa_root_vdev != NULL) {
for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) {
vdev_t *vc = spa->spa_root_vdev->vdev_child[c];
if (vc->vdev_mg != NULL)
taskq_wait(vc->vdev_mg->mg_taskq);
}
}
taskq_wait(spa->spa_metaslab_taskq);

if (spa->spa_mmp.mmp_thread)
mmp_thread_stop(spa);
Expand Down Expand Up @@ -10134,6 +10145,9 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
/* asynchronous event notification */
EXPORT_SYMBOL(spa_event_notify);

ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_pct, UINT, ZMOD_RW,
"Percentage of CPUs to run a metaslab preload taskq");

/* BEGIN CSTYLED */
ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_shift, UINT, ZMOD_RW,
"log2 fraction of arc that can be used by inflight I/Os when "
Expand Down