From 517fb733d8c145e6abfb8595185dc1227189e9cc Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Tue, 28 Nov 2023 22:28:48 +0000 Subject: [PATCH] Extend import_progress kstat with a notes field Detail the import progress of log spacemaps as they can take a very long time. Also grab the spa_note() messages to, as they provide insight into what is happening Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Co-authored-by: Don Brady Signed-off-by: Don Brady --- include/sys/spa.h | 3 + module/zfs/spa.c | 41 ++++++ module/zfs/spa_log_spacemap.c | 9 ++ module/zfs/spa_misc.c | 65 ++++++++- tests/runfiles/linux.run | 3 +- .../zpool_import/zpool_import_status.ksh | 133 ++++++++++++++++++ 6 files changed, 249 insertions(+), 5 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh diff --git a/include/sys/spa.h b/include/sys/spa.h index cef7933df441..ae447c2ba8e8 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -971,6 +971,9 @@ extern int spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t max_txg); extern int spa_import_progress_set_state(uint64_t pool_guid, spa_load_state_t spa_load_state); +extern void spa_import_progress_set_notes(spa_t *spa, const char *notes); +extern void spa_import_progress_set_notes_nolog(spa_t *spa, + const char *notes); /* Pool configuration locks */ extern int spa_config_tryenter(spa_t *spa, int locks, const void *tag, diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 20225640f8c5..bc9f60078db8 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -3109,6 +3109,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) spa->spa_load_state = state; (void) spa_import_progress_set_state(spa_guid(spa), spa_load_state(spa)); + spa_import_progress_set_notes(spa, "spa_load()"); gethrestime(&spa->spa_loaded_ts); error = spa_load_impl(spa, type, &ereport); @@ -3375,6 +3376,12 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) import_expire = gethrtime() + import_delay; + char buf[128]; + (void) snprintf(buf, sizeof (buf), "checking MMP activity, waiting " + "%llu ms", (u_longlong_t)(import_delay/1000000)); + spa_import_progress_set_notes(spa, buf); + + int interations = 0; while (gethrtime() < import_expire) { (void) spa_import_progress_set_mmp_check(spa_guid(spa), NSEC2SEC(import_expire - gethrtime())); @@ -3408,6 +3415,14 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) break; } error = 0; + + if (interations++ % 30 == 0 && import_expire > gethrtime()) { + (void) snprintf(buf, sizeof (buf), "checking MMP " + "activity, %llu ms remaining", + (u_longlong_t) + ((import_expire - gethrtime())/1000000)); + spa_import_progress_set_notes(spa, buf); + } } out: @@ -4995,6 +5010,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) /* * Retrieve the checkpoint txg if the pool has a checkpoint. */ + spa_import_progress_set_notes(spa, "Loading checkpoint txg"); error = spa_ld_read_checkpoint_txg(spa); if (error != 0) return (error); @@ -5007,6 +5023,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * initiated. Otherwise we could be reading from indirect vdevs before * we have loaded their mappings. */ + spa_import_progress_set_notes(spa, "Loading indirect vdev metadata"); error = spa_ld_open_indirect_vdev_metadata(spa); if (error != 0) return (error); @@ -5015,6 +5032,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Retrieve the full list of active features from the MOS and check if * they are all supported. */ + spa_import_progress_set_notes(spa, "Checking feature flags"); error = spa_ld_check_features(spa, &missing_feat_write); if (error != 0) return (error); @@ -5023,6 +5041,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Load several special directories from the MOS needed by the dsl_pool * layer. */ + spa_import_progress_set_notes(spa, "Loading special MOS directories"); error = spa_ld_load_special_directories(spa); if (error != 0) return (error); @@ -5030,6 +5049,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) /* * Retrieve pool properties from the MOS. */ + spa_import_progress_set_notes(spa, "Loading properties"); error = spa_ld_get_props(spa); if (error != 0) return (error); @@ -5038,6 +5058,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Retrieve the list of auxiliary devices - cache devices and spares - * and open them. */ + spa_import_progress_set_notes(spa, "Loading AUX vdevs"); error = spa_ld_open_aux_vdevs(spa, type); if (error != 0) return (error); @@ -5046,14 +5067,17 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Load the metadata for all vdevs. Also check if unopenable devices * should be autoreplaced. */ + spa_import_progress_set_notes(spa, "Loading vdev metadata"); error = spa_ld_load_vdev_metadata(spa); if (error != 0) return (error); + spa_import_progress_set_notes(spa, "Loading dedup tables"); error = spa_ld_load_dedup_tables(spa); if (error != 0) return (error); + spa_import_progress_set_notes(spa, "Loading BRT"); error = spa_ld_load_brt(spa); if (error != 0) return (error); @@ -5062,6 +5086,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Verify the logs now to make sure we don't have any unexpected errors * when we claim log blocks later. */ + spa_import_progress_set_notes(spa, "Verifying Log Devices"); error = spa_ld_verify_logs(spa, type, ereport); if (error != 0) return (error); @@ -5083,6 +5108,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * state. When performing an extreme rewind, we verify the whole pool, * which can take a very long time. */ + spa_import_progress_set_notes(spa, "Verifying pool data"); error = spa_ld_verify_pool_data(spa); if (error != 0) return (error); @@ -5092,6 +5118,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * we write anything to the pool because we'd need to update the space * accounting using the deflated sizes. */ + spa_import_progress_set_notes(spa, "Calculating deflated space"); spa_update_dspace(spa); /* @@ -5099,6 +5126,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * pool. If we are importing the pool in read-write mode, a few * additional steps must be performed to finish the import. */ + spa_import_progress_set_notes(spa, "Starting import"); if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER || spa->spa_load_max_txg == UINT64_MAX)) { uint64_t config_cache_txg = spa->spa_config_txg; @@ -5122,6 +5150,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg); } + spa_import_progress_set_notes(spa, "Claiming ZIL blocks"); /* * Traverse the ZIL and claim all blocks. */ @@ -5141,6 +5170,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * will have been set for us by ZIL traversal operations * performed above. */ + spa_import_progress_set_notes(spa, "Syncing ZIL claims"); txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); /* @@ -5148,6 +5178,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * next sync, we would update the config stored in vdev labels * and the cachefile (by default /etc/zfs/zpool.cache). */ + spa_import_progress_set_notes(spa, "Updating configs"); spa_ld_check_for_config_update(spa, config_cache_txg, update_config_cache); @@ -5156,6 +5187,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Then check all DTLs to see if anything needs resilvering. * The resilver will be deferred if a rebuild was started. */ + spa_import_progress_set_notes(spa, "Starting resilvers"); if (vdev_rebuild_active(spa->spa_root_vdev)) { vdev_rebuild_restart(spa); } else if (!dsl_scan_resilvering(spa->spa_dsl_pool) && @@ -5169,6 +5201,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) */ spa_history_log_version(spa, "open", NULL); + spa_import_progress_set_notes(spa, + "Restarting device removals"); spa_restart_removal(spa); spa_spawn_aux_threads(spa); @@ -5181,19 +5215,26 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * auxiliary threads above (from which the livelist * deletion zthr is part of). */ + spa_import_progress_set_notes(spa, + "Cleaning up inconsistent objsets"); (void) dmu_objset_find(spa_name(spa), dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); /* * Clean up any stale temporary dataset userrefs. */ + spa_import_progress_set_notes(spa, + "Cleaning up temporary userrefs"); dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + spa_import_progress_set_notes(spa, "Restarting Initialize"); vdev_initialize_restart(spa->spa_root_vdev); + spa_import_progress_set_notes(spa, "Restarting TRIM"); vdev_trim_restart(spa->spa_root_vdev); vdev_autotrim_restart(spa); spa_config_exit(spa, SCL_CONFIG, FTAG); + spa_import_progress_set_notes(spa, "Finished importing"); } spa_import_progress_remove(spa_guid(spa)); diff --git a/module/zfs/spa_log_spacemap.c b/module/zfs/spa_log_spacemap.c index cf05158b63f8..d08d6ca6458a 100644 --- a/module/zfs/spa_log_spacemap.c +++ b/module/zfs/spa_log_spacemap.c @@ -1153,6 +1153,7 @@ spa_ld_log_sm_data(spa_t *spa) uint_t pn = 0; uint64_t ps = 0; + uint64_t nsm = 0; psls = sls = avl_first(&spa->spa_sm_logs_by_txg); while (sls != NULL) { /* Prefetch log spacemaps up to 16 TXGs or MBs ahead. */ @@ -1185,6 +1186,13 @@ spa_ld_log_sm_data(spa_t *spa) summary_add_data(spa, sls->sls_txg, sls->sls_mscount, 0, sls->sls_nblocks); + char buf[128]; + (void) snprintf(buf, sizeof (buf), + "read %llu of %llu log space maps", + (u_longlong_t)nsm, + (u_longlong_t)avl_numnodes(&spa->spa_sm_logs_by_txg)); + spa_import_progress_set_notes_nolog(spa, buf); + struct spa_ld_log_sm_arg vla = { .slls_spa = spa, .slls_txg = sls->sls_txg @@ -1200,6 +1208,7 @@ spa_ld_log_sm_data(spa_t *spa) pn--; ps -= space_map_length(sls->sls_sm); + nsm++; space_map_close(sls->sls_sm); sls->sls_sm = NULL; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 3990af98c732..78c65d3905d9 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -426,6 +426,8 @@ spa_load_note(spa_t *spa, const char *fmt, ...) zfs_dbgmsg("spa_load(%s, config %s): %s", spa->spa_name, spa->spa_trust_config ? "trusted" : "untrusted", buf); + + spa_import_progress_set_notes_nolog(spa, buf); } /* @@ -2184,6 +2186,7 @@ typedef struct spa_import_progress { uint64_t pool_guid; /* unique id for updates */ char *pool_name; spa_load_state_t spa_load_state; + char *spa_load_notes; uint64_t mmp_sec_remaining; /* MMP activity check */ uint64_t spa_load_max_txg; /* rewind txg */ procfs_list_node_t smh_node; @@ -2194,9 +2197,9 @@ spa_history_list_t *spa_import_progress_list = NULL; static int spa_import_progress_show_header(struct seq_file *f) { - seq_printf(f, "%-20s %-14s %-14s %-12s %s\n", "pool_guid", + seq_printf(f, "%-20s %-14s %-14s %-12s %s %s\n", "pool_guid", "load_state", "multihost_secs", "max_txg", - "pool_name"); + "pool_name", "notes"); return (0); } @@ -2205,11 +2208,12 @@ spa_import_progress_show(struct seq_file *f, void *data) { spa_import_progress_t *sip = (spa_import_progress_t *)data; - seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %s\n", + seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %s %s\n", (u_longlong_t)sip->pool_guid, (u_longlong_t)sip->spa_load_state, (u_longlong_t)sip->mmp_sec_remaining, (u_longlong_t)sip->spa_load_max_txg, - (sip->pool_name ? sip->pool_name : "-")); + (sip->pool_name ? sip->pool_name : "-"), + (sip->spa_load_notes ? sip->spa_load_notes : "-")); return (0); } @@ -2223,6 +2227,8 @@ spa_import_progress_truncate(spa_history_list_t *shl, unsigned int size) sip = list_remove_head(&shl->procfs_list.pl_list); if (sip->pool_name) spa_strfree(sip->pool_name); + if (sip->spa_load_notes) + spa_strfree(sip->spa_load_notes); kmem_free(sip, sizeof (spa_import_progress_t)); shl->size--; } @@ -2278,6 +2284,10 @@ spa_import_progress_set_state(uint64_t pool_guid, sip = list_prev(&shl->procfs_list.pl_list, sip)) { if (sip->pool_guid == pool_guid) { sip->spa_load_state = load_state; + if (sip->spa_load_notes != NULL) { + spa_strfree(sip->spa_load_notes); + sip->spa_load_notes = NULL; + } error = 0; break; } @@ -2287,6 +2297,50 @@ spa_import_progress_set_state(uint64_t pool_guid, return (error); } +static void +spa_import_progress_set_notes_impl(spa_t *spa, const char *notes, + boolean_t log_dbgmsg) +{ + spa_history_list_t *shl = spa_import_progress_list; + spa_import_progress_t *sip; + uint64_t pool_guid = spa_guid(spa); + + if (shl->size == 0) + return; + + mutex_enter(&shl->procfs_list.pl_lock); + for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL; + sip = list_prev(&shl->procfs_list.pl_list, sip)) { + if (sip->pool_guid == pool_guid) { + if (sip->spa_load_notes != NULL) { + spa_strfree(sip->spa_load_notes); + sip->spa_load_notes = NULL; + } + if (notes != NULL) { + sip->spa_load_notes = spa_strdup(notes); + if (log_dbgmsg) { + zfs_dbgmsg("'%s' %s", sip->pool_name, + notes); + } + } + break; + } + } + mutex_exit(&shl->procfs_list.pl_lock); +} + +void +spa_import_progress_set_notes(spa_t *spa, const char *notes) +{ + spa_import_progress_set_notes_impl(spa, notes, B_TRUE); +} + +void +spa_import_progress_set_notes_nolog(spa_t *spa, const char *notes) +{ + spa_import_progress_set_notes_impl(spa, notes, B_FALSE); +} + int spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t load_max_txg) { @@ -2355,6 +2409,7 @@ spa_import_progress_add(spa_t *spa) poolname = spa_name(spa); sip->pool_name = spa_strdup(poolname); sip->spa_load_state = spa_load_state(spa); + sip->spa_load_notes = NULL; mutex_enter(&shl->procfs_list.pl_lock); procfs_list_add(&shl->procfs_list, sip); @@ -2374,6 +2429,8 @@ spa_import_progress_remove(uint64_t pool_guid) if (sip->pool_guid == pool_guid) { if (sip->pool_name) spa_strfree(sip->pool_name); + if (sip->spa_load_notes) + spa_strfree(sip->spa_load_notes); list_remove(&shl->procfs_list.pl_list, sip); shl->size--; kmem_free(sip, sizeof (spa_import_progress_t)); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 8bc55a1b4b47..a0f7ca4f5be2 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -86,7 +86,8 @@ tags = ['functional', 'cli_root', 'zpool_expand'] tests = ['zpool_import_hostid_changed', 'zpool_import_hostid_changed_unclean_export', 'zpool_import_hostid_changed_cachefile', - 'zpool_import_hostid_changed_cachefile_unclean_export'] + 'zpool_import_hostid_changed_cachefile_unclean_export', + 'zpool_import_status'] tags = ['functional', 'cli_root', 'zpool_import'] [tests/functional/cli_root/zpool_reopen:Linux] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh new file mode 100755 index 000000000000..7fb667e45df2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh @@ -0,0 +1,133 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2023 Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg + +# +# DESCRIPTION: +# During a pool import, the 'import_progress' kstat contains details +# on the import progress. +# +# STRATEGY: +# 1. Create test pool with several devices +# 2. Generate some ZIL records and spacemap logs +# 3. Export the pool +# 4. Import the pool in the background and monitor the kstat content +# 5. Check the zfs debug messages for import progress +# + +verify_runnable "global" + +function cleanup +{ + log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0 + log_must set_tunable64 METASLAB_DEBUG_LOAD 0 + + destroy_pool $TESTPOOL1 +} + +log_assert "During a pool import, the 'import_progress' kstat contains " \ + "notes on the progress" + +log_onexit cleanup + +log_must zpool create $TESTPOOL1 $VDEV0 $VDEV1 $VDEV2 +typeset guid=$(zpool get -H -o value guid $TESTPOOL1) + +log_must zfs create -o recordsize=8k $TESTPOOL1/fs +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=/$TESTPOOL1/fs/sync \ + conv=fdatasync,fsync bs=1 count=1 + +# +# Overwrite some blocks to populate spacemap logs +# +log_must dd if=/dev/urandom of=/$TESTPOOL1/fs/00 bs=1M count=200 +sync_all_pools +log_must dd if=/dev/urandom of=/$TESTPOOL1/fs/00 bs=1M count=200 +sync_all_pools + +# +# Freeze the pool to retain intent log records +# +log_must zpool freeze $TESTPOOL1 + +# fill_fs [destdir] [dirnum] [filenum] [bytes] [num_writes] [data] +log_must fill_fs /$TESTPOOL1/fs 1 2000 100 1024 R + +log_must zpool list -v $TESTPOOL1 + +# +# Unmount filesystem and export the pool +# +# At this stage the zfs intent log contains +# a set of records to replay. +# +log_must zfs unmount /$TESTPOOL1/fs + +log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1 +log_must zpool export $TESTPOOL1 + +log_must set_tunable64 METASLAB_DEBUG_LOAD 1 +log_note "Starting zpool import in background at" $(/usr/bin/date +'%H:%M:%S:%3N') +zpool import -d $DEVICE_DIR -f $guid & +pid=$! + +# +# capture progress until import is finished +# +log_note waiting for pid $pid to exit +cat /proc/spl/kstat/zfs/import_progress +while [[ -d /proc/"$pid" ]]; do + line=$(cat /proc/spl/kstat/zfs/import_progress | grep -v pool_guid) + if [[ -n $line ]]; then + echo $(/usr/bin/date +'%H:%M:%S:%3N') $line + fi + if [[ -f /$TESTPOOL1/fs/00 ]]; then + break; + fi + sleep 0.0001 +done +log_note "zpool import completed at" $(/usr/bin/date +'%H:%M:%S:%3N') + +entries=$(grep "spa_import_progress_set_notes_impl(): 'testpool1'" /proc/spl/kstat/zfs/dbgmsg | wc -l) +log_note "found $entries progress notes in dbgmsg" +log_must test $entries -gt 20 + +log_must zpool status $TESTPOOL1 + +log_pass "During a pool import, the 'import_progress' kstat contains " \ + "notes on the progress"