From caae5eae441cd5e41b0998770e0827db7c32948f Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 24 Jun 2019 14:20:44 -0400 Subject: [PATCH] Avoid extra taskq_dispatch() calls by DMU. DMU sync code calls taskq_dispatch() for each sublist of os_dirty_dnodes and os_synced_dnodes. Since the number of sublists by default is equal to number of CPUs, it will dispatch equal, potentially large, number of tasks, waking up many CPUs to handle them, even if only one or few of sublists actually have any work to do. This change adds check for empty sublists to avoid this. Signed-off-by: Alexander Motin --- include/sys/multilist.h | 2 ++ module/zfs/dmu_objset.c | 19 ++++++++++++++----- module/zfs/multilist.c | 22 ++++++++++++++++++++++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/include/sys/multilist.h b/include/sys/multilist.h index 439540685971..0c7b4075d9a3 100644 --- a/include/sys/multilist.h +++ b/include/sys/multilist.h @@ -89,6 +89,8 @@ void multilist_sublist_insert_head(multilist_sublist_t *, void *); void multilist_sublist_insert_tail(multilist_sublist_t *, void *); void multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj); void multilist_sublist_remove(multilist_sublist_t *, void *); +int multilist_sublist_is_empty(multilist_sublist_t *); +int multilist_sublist_is_empty_idx(multilist_t *, unsigned int); void *multilist_sublist_head(multilist_sublist_t *); void *multilist_sublist_tail(multilist_sublist_t *); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 6b8c380e5ab4..5e596cf69e63 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1693,6 +1693,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) zio_t *zio; list_t *list; dbuf_dirty_record_t *dr; + int num_sublists; + multilist_t *ml; blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP); *blkptr_copy = *os->os_rootbp; @@ -1781,10 +1783,13 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) } } - for (int i = 0; - i < multilist_get_num_sublists(os->os_dirty_dnodes[txgoff]); i++) { + ml = os->os_dirty_dnodes[txgoff]; + num_sublists = multilist_get_num_sublists(ml); + for (int i = 0; i < num_sublists; i++) { + if (multilist_sublist_is_empty_idx(ml, i)) + continue; sync_dnodes_arg_t *sda = kmem_alloc(sizeof (*sda), KM_SLEEP); - sda->sda_list = os->os_dirty_dnodes[txgoff]; + sda->sda_list = ml; sda->sda_sublist_idx = i; sda->sda_tx = tx; (void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq, @@ -2087,6 +2092,8 @@ userquota_updates_task(void *arg) void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) { + int num_sublists; + if (!dmu_objset_userused_enabled(os)) return; @@ -2119,8 +2126,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); } - for (int i = 0; - i < multilist_get_num_sublists(os->os_synced_dnodes); i++) { + num_sublists = multilist_get_num_sublists(os->os_synced_dnodes); + for (int i = 0; i < num_sublists; i++) { + if (multilist_sublist_is_empty_idx(os->os_synced_dnodes, i)) + continue; userquota_updates_arg_t *uua = kmem_alloc(sizeof (*uua), KM_SLEEP); uua->uua_os = os; diff --git a/module/zfs/multilist.c b/module/zfs/multilist.c index 2a594c56cbd5..b74ee0f0670a 100644 --- a/module/zfs/multilist.c +++ b/module/zfs/multilist.c @@ -363,6 +363,28 @@ multilist_sublist_remove(multilist_sublist_t *mls, void *obj) list_remove(&mls->mls_list, obj); } +int +multilist_sublist_is_empty(multilist_sublist_t *mls) +{ + ASSERT(MUTEX_HELD(&mls->mls_lock)); + return (list_is_empty(&mls->mls_list)); +} + +int +multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx) +{ + multilist_sublist_t *mls; + int empty; + + ASSERT3U(sublist_idx, <, ml->ml_num_sublists); + mls = &ml->ml_sublists[sublist_idx]; + ASSERT(!MUTEX_HELD(&mls->mls_lock)); + mutex_enter(&mls->mls_lock); + empty = list_is_empty(&mls->mls_list); + mutex_exit(&mls->mls_lock); + return (empty); +} + void * multilist_sublist_head(multilist_sublist_t *mls) {