From 48b0a56b904b9e1cb1e4e8f6aca9367737813acd Mon Sep 17 00:00:00 2001 From: George Amanakis Date: Fri, 11 Feb 2022 08:20:23 +0100 Subject: [PATCH] Avoid dirtying the final TXGs when exporting a pool There are two codepaths than can dirty final TXGs: 1) If calling spa_export_common()->spa_unload()-> spa_unload_log_sm_flush_all() after the spa_final_txg is set, then spa_sync()->spa_flush_metaslabs() may end up dirtying the final TXGs. Then we have the following panic: Call Trace: dump_stack_lvl+0x46/0x62 spl_panic+0xea/0x102 [spl] dbuf_dirty+0xcd6/0x11b0 [zfs] zap_lockdir_impl+0x321/0x590 [zfs] zap_lockdir+0xed/0x150 [zfs] zap_update+0x69/0x250 [zfs] feature_sync+0x5f/0x190 [zfs] space_map_alloc+0x83/0xc0 [zfs] spa_generate_syncing_log_sm+0x10b/0x2f0 [zfs] spa_flush_metaslabs+0xb2/0x350 [zfs] spa_sync_iterate_to_convergence+0x15a/0x320 [zfs] spa_sync+0x2e0/0x840 [zfs] txg_sync_thread+0x2b1/0x3f0 [zfs] thread_generic_wrapper+0x62/0xa0 [spl] kthread+0x127/0x150 ret_from_fork+0x22/0x30 2) Calling vdev_*_stop_all() for a second time in spa_unload() after spa_export_common() unnecessarily delays the final TXGs beyond what spa_final_txg is set at. Fix this by performing the check and call for spa_unload_log_sm_flush_all() before the spa_final_txg is set in spa_export_common(). Also check if the spa_final_txg has already been set in spa_unload() and skip those calls in this case. Signed-off-by: George Amanakis --- module/zfs/spa.c | 60 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 7e18048af5e6..b2523e7a30d5 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1597,25 +1597,33 @@ spa_unload(spa_t *spa) spa_wake_waiters(spa); /* - * If the log space map feature is enabled and the pool is getting - * exported (but not destroyed), we want to spend some time flushing - * as many metaslabs as we can in an attempt to destroy log space - * maps and save import time. + * If we have set the spa_final_txg, we have already performed the + * tasks below in spa_export_common(). We should not redo it here since + * we delay the final TXGs beyond what spa_final_txg is set at. */ - if (spa_should_flush_logs_on_unload(spa)) - spa_unload_log_sm_flush_all(spa); + if (spa->spa_final_txg == UINT64_MAX) { + /* + * If the log space map feature is enabled and the pool is + * getting exported (but not destroyed), we want to spend some + * time flushing as many metaslabs as we can in an attempt to + * destroy log space maps and save import time. + */ + if (spa_should_flush_logs_on_unload(spa)) + spa_unload_log_sm_flush_all(spa); - /* - * Stop async tasks. - */ - spa_async_suspend(spa); + /* + * Stop async tasks. + */ + spa_async_suspend(spa); - if (spa->spa_root_vdev) { - vdev_t *root_vdev = spa->spa_root_vdev; - vdev_initialize_stop_all(root_vdev, VDEV_INITIALIZE_ACTIVE); - vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE); - vdev_autotrim_stop_all(spa); - vdev_rebuild_stop_all(spa); + if (spa->spa_root_vdev) { + vdev_t *root_vdev = spa->spa_root_vdev; + vdev_initialize_stop_all(root_vdev, + VDEV_INITIALIZE_ACTIVE); + vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE); + vdev_autotrim_stop_all(spa); + vdev_rebuild_stop_all(spa); + } } /* @@ -6427,9 +6435,27 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa->spa_state = new_state; + vdev_config_dirty(spa->spa_root_vdev); + spa_config_exit(spa, SCL_ALL, FTAG); + } + + /* + * If the log space map feature is enabled and the pool is + * getting exported (but not destroyed), we want to spend some + * time flushing as many metaslabs as we can in an attempt to + * destroy log space maps and save import time. This has to be + * done before we set the spa_final_txg, otherwise + * spa_sync() -> spa_flush_metaslabs() may dirty the final TXGs. + * spa_should_flush_logs_on_unload() should be called after + * spa_state has been set to the new_state. + */ + if (spa_should_flush_logs_on_unload(spa)) + spa_unload_log_sm_flush_all(spa); + + if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa->spa_final_txg = spa_last_synced_txg(spa) + TXG_DEFER_SIZE + 1; - vdev_config_dirty(spa->spa_root_vdev); spa_config_exit(spa, SCL_ALL, FTAG); } }