From 6f51903b381c93b5ff9370bdcf491622564ca042 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Thu, 9 May 2024 21:55:42 -0700 Subject: [PATCH] Update Signed-off-by: Paul Dagnelie --- cmd/zdb/zdb.c | 59 ++- cmd/zhack.c | 5 +- cmd/zpool/zpool_main.c | 108 ++++-- cmd/ztest.c | 12 +- include/libzfs.h | 1 + include/libzfs_core.h | 7 +- include/sys/fs/zfs.h | 16 +- include/sys/spa.h | 8 +- include/sys/spa_impl.h | 19 +- include/sys/zil.h | 2 + lib/libzfs/libzfs_pool.c | 146 ++++++-- lib/libzfs/libzfs_status.c | 4 + lib/libzfs_core/libzfs_core.c | 28 +- man/man7/zpoolconcepts.7 | 2 +- module/zfs/dsl_pool.c | 6 +- module/zfs/spa.c | 350 ++++++++++++------ module/zfs/spa_misc.c | 7 +- module/zfs/zap_leaf.c | 4 +- module/zfs/zfs_ioctl.c | 83 ++++- module/zfs/zil.c | 49 ++- tests/runfiles/common.run | 2 +- tests/zfs-tests/include/libtest.shlib | 4 +- tests/zfs-tests/tests/Makefile.am | 2 + .../functional/shared_log/shared_log.kshlib | 4 + .../shared_log/shared_log_001_pos.ksh | 2 + .../shared_log/shared_log_002_pos.ksh | 2 + .../shared_log/shared_log_003_pos.ksh | 2 + .../shared_log/shared_log_004_pos.ksh | 5 +- .../shared_log/shared_log_006_neg.ksh | 11 +- .../shared_log/shared_log_007_pos.ksh | 51 +++ .../shared_log/shared_log_008_pos.ksh | 81 ++++ 31 files changed, 878 insertions(+), 204 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/shared_log/shared_log_007_pos.ksh create mode 100755 tests/zfs-tests/tests/functional/shared_log/shared_log_008_pos.ksh diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 955892bade4f..9e390d502b93 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -5789,7 +5789,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, /* * Theoretically, we could try to track leaks here, but it would * require also importing the shared log pool and processing the - * chain map and space maps for it. The ZIL currently doesn't have + * chain map and space maps for it. ZDB currently doesn't have * much facility to support multiple pools at once, so we leave this * for future work. */ @@ -6827,7 +6827,8 @@ chain_map_count_blocks(spa_t *spa, zdb_cb_t *zbc) for (spa_chain_map_os_t *os_node = avl_first(os_t); os_node != NULL; os_node = AVL_NEXT(os_t, os_node)) { (void) zil_parse_raw(spa, &os_node->scmo_chain_head, - chain_map_count_blk_cb, chain_map_count_lr_cb, zbc); + chain_map_count_blk_cb, chain_map_count_lr_cb, + zbc); } } } @@ -8188,6 +8189,57 @@ dump_log_spacemap_obsolete_stats(spa_t *spa) (u_longlong_t)lsos.lsos_total_entries); } +static void print_blkptr(const blkptr_t *bp) +{ + char blkbuf[BP_SPRINTF_LEN]; + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); + if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD) + snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp); + (void) printf("%s\n", blkbuf); + +} + +static int +chain_map_dump_blk_cb(spa_t *spa, const blkptr_t *bp, void *arg) +{ + (void) spa, (void) arg; + printf("\t\t\tBP: "); + print_blkptr(bp); + return (0); +} + +static int +chain_map_dump_lr_cb(spa_t *spa, const lr_t *lrc, void *arg) +{ + (void) spa, (void) arg; + lr_write_t *lr = (lr_write_t *)lrc; + blkptr_t *bp = &lr->lr_blkptr; + printf("\t\t\tLR BP: "); + print_blkptr(bp); + return (0); +} + +static void +dump_chain_map(spa_t *spa) +{ + (void) printf("Chain map contents:\n"); + avl_tree_t *pool_t = &spa->spa_chain_map; + + for (spa_chain_map_pool_t *pool_node = avl_first(pool_t); + pool_node != NULL; pool_node = AVL_NEXT(pool_t, pool_node)) { + avl_tree_t *os_t = &pool_node->scmp_os_tree; + (void) printf("\tPool entry: %s\n", pool_node->scmp_name); + for (spa_chain_map_os_t *os_node = avl_first(os_t); + os_node != NULL; os_node = AVL_NEXT(os_t, os_node)) { + (void) printf("\t\tObjset entry: %"PRIu64"\n\t\t\t", + os_node->scmo_id); + print_blkptr(&os_node->scmo_chain_head); + (void) zil_parse_raw(spa, &os_node->scmo_chain_head, + chain_map_dump_blk_cb, chain_map_dump_lr_cb, NULL); + } + } +} + static void dump_zpool(spa_t *spa) { @@ -8269,6 +8321,9 @@ dump_zpool(spa_t *spa) (void) dmu_objset_find(spa_name(spa), dump_one_objset, NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); + if (spa_is_shared_log(spa)) + dump_chain_map(spa); + if (rc == 0 && !dump_opt['L']) rc = dump_mos_leaks(spa); diff --git a/cmd/zhack.c b/cmd/zhack.c index f15a6ece538c..8f342de22ba1 100644 --- a/cmd/zhack.c +++ b/cmd/zhack.c @@ -104,7 +104,7 @@ fatal(spa_t *spa, const void *tag, const char *fmt, ...) if (spa != NULL) { spa_close(spa, tag); - (void) spa_export(g_pool, NULL, B_TRUE, B_FALSE); + (void) spa_export(g_pool, NULL, B_TRUE, B_FALSE, NULL); } va_start(ap, fmt); @@ -1015,7 +1015,8 @@ main(int argc, char **argv) usage(); } - if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) { + if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE, + NULL) != 0) { fatal(NULL, FTAG, "pool export failed; " "changes may not be committed to disk\n"); } diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 5cb8a0a350c4..d0b94876ec70 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -388,8 +388,8 @@ get_usage(zpool_help_t idx) "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] " "[-R root] [-F [-n]] -a\n" "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m -L pool] " - "[-N] [-R root] [-F [-n]]\n" + "\t [-d dir | -c cachefile] [-D] [-l] [-f] " + "[-m [-L pool]] [-N] [-R root] [-F [-n]]\n" "\t [--rewind-to-checkpoint] [newpool]\n")); case HELP_IOSTAT: return (gettext("\tiostat [[[-c [script1,script2,...]" @@ -3661,10 +3661,31 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, uint_t npools = 0; + int err = 0; + nvpair_t *elem = NULL, *next = NULL; + boolean_t first = B_TRUE; tpool_t *tp = NULL; if (import->do_all) { tp = tpool_create(1, 5 * sysconf(_SC_NPROCESSORS_ONLN), 0, NULL); + + elem = nvlist_next_nvpair(pools, NULL); + next = nvlist_next_nvpair(pools, elem); + + while (elem != NULL) { + verify(nvpair_value_nvlist(elem, &config) == 0); + if (fnvlist_lookup_boolean(config, + ZPOOL_CONFIG_IS_SHARED_LOG)) { + err = do_import(config, NULL, mntopts, props, + flags, mount_tp_nthr); + first = B_FALSE; + fnvlist_remove_nvpair(pools, elem); + } + elem = next; + next = nvlist_next_nvpair(pools, elem); + } + if (err != 0) + return (err); } /* @@ -3673,9 +3694,6 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, * post-process the list to deal with pool state and possible * duplicate names. */ - int err = 0; - nvpair_t *elem = NULL; - boolean_t first = B_TRUE; if (!pool_specified && import->do_all) { while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) npools++; @@ -6741,8 +6759,8 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, if (!printed) { /* LINTED E_SEC_PRINTF_VAR_FMT */ - (void) printf(dashes, depth + 2, "", cb->cb_namewidth, - class_name[n]); + (void) printf(dashes, depth + 2, "", + cb->cb_namewidth, class_name[n]); printed = B_TRUE; } vname = zpool_vdev_name(g_zfs, zhp, child[c], @@ -6756,7 +6774,8 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, uint64_t shared_log_guid; if (name == NULL && nvlist_lookup_uint64(zpool_get_config(zhp, NULL), ZPOOL_CONFIG_SHARED_LOG_POOL, &shared_log_guid) == 0) { - (void) printf(dashes, depth + 2, "", cb->cb_namewidth, "shared log"); + (void) printf(dashes, depth + 2, "", cb->cb_namewidth, + "shared log"); zpool_handle_t *shared_log = find_by_guid(g_zfs, shared_log_guid); VERIFY(shared_log); @@ -8009,31 +8028,36 @@ struct recycle_data { boolean_t verbose; }; +static void +print_recycle_info(nvlist_t *nvl, boolean_t dryrun) +{ + printf("Cleaned up%s: [", dryrun ? " (dry run)" : ""); + nvpair_t *elem = NULL; + boolean_t first = B_TRUE; + while ((elem = nvlist_next_nvpair(nvl, elem))) { + printf("%s%s", first ? "" : ",\n\t", nvpair_name(elem)); + first = B_FALSE; + } + printf("]\n"); +} + static int recycle_callback(zpool_handle_t *zhp, void *data) { struct recycle_data *rd = data; nvlist_t *nvl; - int err = lzc_recycle(zpool_get_name(zhp), rd->dryrun, &nvl); + int err = lzc_recycle(zpool_get_name(zhp), NULL, rd->dryrun, &nvl); if (err) return (err); - if (rd->verbose) { - printf("Cleaned up%s: [", rd->dryrun ? " (dry run)" : ""); - nvpair_t *elem = NULL; - boolean_t first = B_TRUE; - while ((elem = nvlist_next_nvpair(nvl, elem))) { - printf("%s%s", first ? "" : ",\n\t", nvpair_name(elem)); - first = B_FALSE; - } - printf("]\n"); - } + if (rd->verbose) + print_recycle_info(nvl, rd->dryrun); nvlist_free(nvl); return (0); } /* - * zpool recycle ... + * zpool recycle [-a] [-n] [-v] [pool]... * * Cleans up chain maps for non-attached client pools */ @@ -8042,9 +8066,10 @@ zpool_do_recycle(int argc, char **argv) { int c; struct recycle_data rd = {0}; + boolean_t doall = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, "nv")) != -1) { + while ((c = getopt(argc, argv, "nva")) != -1) { switch (c) { case 'n': rd.dryrun = B_TRUE; @@ -8052,6 +8077,9 @@ zpool_do_recycle(int argc, char **argv) case 'v': rd.verbose = B_TRUE; break; + case 'a': + doall = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -8065,10 +8093,44 @@ zpool_do_recycle(int argc, char **argv) if (argc < 1) { (void) fprintf(stderr, gettext("missing pool name argument\n")); usage(B_FALSE); + } else if (argc == 1 && !doall) { + (void) fprintf(stderr, gettext("missing client pools\n")); + usage(B_FALSE); + } else if (argc > 1 && doall) { + (void) fprintf(stderr, gettext("specific client pools and " + "do_all\n")); + usage(B_FALSE); } - return (for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, - B_FALSE, recycle_callback, &rd)); + if (doall) { + return (for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, recycle_callback, &rd)); + } + + const char *pool = argv[0]; + argc--; + argv++; + + nvlist_t *clients = NULL; + if (argc > 0) + clients = fnvlist_alloc(); + while (argc > 0) { + fnvlist_add_boolean(clients, argv[0]); + argc--; + argv++; + } + + nvlist_t *nvl; + int err = lzc_recycle(pool, clients, rd.dryrun, &nvl); + if (clients) + nvlist_free(clients); + if (err) + return (err); + if (rd.verbose) + print_recycle_info(nvl, rd.dryrun); + nvlist_free(nvl); + + return (0); } /* diff --git a/cmd/ztest.c b/cmd/ztest.c index f77a37c21545..b6a617fa1f92 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -3045,7 +3045,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) * an export concurrently. */ VERIFY0(spa_open(zo->zo_pool, &spa, FTAG)); - int error = spa_destroy(zo->zo_pool); + int error = spa_destroy(zo->zo_pool, NULL); if (error != EBUSY && error != ZFS_ERR_EXPORT_IN_PROGRESS) { fatal(B_FALSE, "spa_destroy(%s) returned unexpected value %d", spa->spa_name, error); @@ -3147,7 +3147,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) /* * Clean up from previous runs. */ - (void) spa_destroy(name); + (void) spa_destroy(name, NULL); raidz_children = ztest_get_raidz_children(ztest_spa); @@ -3601,7 +3601,7 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) } /* clean up the old pool, if any */ - (void) spa_destroy("splitp"); + (void) spa_destroy("splitp", NULL); spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -7366,7 +7366,7 @@ ztest_spa_import_export(char *oldname, char *newname) /* * Clean up from previous runs. */ - (void) spa_destroy(newname); + (void) spa_destroy(newname, NULL); /* * Get the pool's configuration and guid. @@ -7387,7 +7387,7 @@ ztest_spa_import_export(char *oldname, char *newname) /* * Export it. */ - VERIFY0(spa_export(oldname, &config, B_FALSE, B_FALSE)); + VERIFY0(spa_export(oldname, &config, B_FALSE, B_FALSE, NULL)); ztest_walk_pool_directory("pools after export"); @@ -8531,7 +8531,7 @@ ztest_init(ztest_shared_t *zs) /* * Create the storage pool. */ - (void) spa_destroy(ztest_opts.zo_pool); + (void) spa_destroy(ztest_opts.zo_pool, NULL); ztest_shared->zs_vdev_next_leaf = 0; zs->zs_splits = 0; zs->zs_mirrors = ztest_opts.zo_mirrors; diff --git a/include/libzfs.h b/include/libzfs.h index 1c744254a3b9..5dd93b72c9c7 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -346,6 +346,7 @@ _LIBZFS_H uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path); _LIBZFS_H const char *zpool_get_state_str(zpool_handle_t *); +_LIBZFS_H zpool_handle_t *zpool_get_shared_log(zpool_handle_t *); /* * Functions to manage pool properties diff --git a/include/libzfs_core.h b/include/libzfs_core.h index 98c1e4c60601..484cd9a22569 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -153,13 +153,18 @@ _LIBZFS_CORE_H int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *); _LIBZFS_CORE_H int lzc_set_bootenv(const char *, const nvlist_t *); _LIBZFS_CORE_H int lzc_get_bootenv(const char *, nvlist_t **); -_LIBZFS_CORE_H int lzc_recycle(const char *, boolean_t, nvlist_t **); +_LIBZFS_CORE_H int lzc_recycle(const char *, nvlist_t *,boolean_t, + nvlist_t **); _LIBZFS_CORE_H int lzc_get_vdev_prop(const char *, nvlist_t *, nvlist_t **); _LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **); _LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **); +_LIBZFS_CORE_H int lzc_pool_destroy(const char *, const char *, nvlist_t **); +_LIBZFS_CORE_H int lzc_pool_export(const char *, const char *, boolean_t, + boolean_t, nvlist_t **); + #ifdef __cplusplus } #endif diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index a0883d859475..f8c89a10f37b 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1515,6 +1515,8 @@ typedef enum zfs_ioc { ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */ ZFS_IOC_POOL_SCRUB, /* 0x5a57 */ ZFS_IOC_POOL_RECYCLE, /* 0x5a58 */ + ZFS_IOC_POOL_DESTROY_NEW, /* 0x5a59 */ + ZFS_IOC_POOL_EXPORT_NEW, /* 0x5a5a */ /* * Per-platform (Optional) - 8/128 numbers reserved. @@ -1724,9 +1726,21 @@ typedef enum { #define ZFS_WAIT_WAITED "wait_waited" /* - * The following name is used when invoking ZFS_IOC_POOL_RECYCLE. + * The following names are used when invoking ZFS_IOC_POOL_RECYCLE. */ #define ZPOOL_RECYCLE_DRYRUN "dryrun" +#define ZPOOL_RECYCLE_CLIENTS "clients" + +/* + * The following are names used when invoking ZFS_IOC_POOL_EXPORT_NEW. + */ +#define ZPOOL_EXPORT_FORCE "force" +#define ZPOOL_EXPORT_HARDFORCE "hardforce" + +/* + * Name that is used to convey client information for shared log pools. + */ +#define ZPOOL_SHARED_LOG_CLIENTS "clients" /* * Flags for ZFS_IOC_VDEV_SET_STATE diff --git a/include/sys/spa.h b/include/sys/spa.h index c8c2e03c4c60..9e722839ea24 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -750,11 +750,11 @@ extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags); extern nvlist_t *spa_tryimport(nvlist_t *tryconfig); -extern int spa_destroy(const char *pool); +extern int spa_destroy(const char *pool, nvlist_t *ounvl); extern int spa_checkpoint(const char *pool); extern int spa_checkpoint_discard(const char *pool); extern int spa_export(const char *pool, nvlist_t **oldconfig, boolean_t force, - boolean_t hardforce); + boolean_t hardforce, nvlist_t *outnvl); extern int spa_reset(const char *pool); extern void spa_async_request(spa_t *spa, int flag); extern void spa_async_unrequest(spa_t *spa, int flag); @@ -1231,7 +1231,9 @@ extern void spa_zil_delete(spa_t *spa, objset_t *os); extern void spa_zil_header_convert(spa_t *spa, objset_t *os, blkptr_t *bp); extern void spa_zil_header_mask(spa_t *spa, blkptr_t *bp); extern spa_t *spa_get_shared_log_pool(spa_t *spa); -extern int spa_recycle(spa_t *spa, boolean_t dryrun, nvlist_t *outnvl); +extern int spa_recycle_all(spa_t *spa, boolean_t dryrun, nvlist_t *outnvl); +extern int spa_recycle_clients(spa_t *spa, nvlist_t *clients, + boolean_t dryrun, nvlist_t *outnvl); /* module param call functions */ int param_set_deadman_ziotime(ZFS_MODULE_PARAM_ARGS); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 09059a832be6..108bf892579b 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -228,6 +228,8 @@ typedef struct spa_zil_update_head { uint64_t szuh_id; blkptr_t szuh_chain_head; boolean_t szuh_set; + // Only used for the special once-per-pool entry + boolean_t szuh_force; } spa_zil_update_head_t; typedef struct spa_zil_update { @@ -235,6 +237,11 @@ typedef struct spa_zil_update { blkptr_t szu_chain_head; } spa_zil_update_t; +typedef struct spa_zil_chain_map_value { + char szcmv_pool_name[ZFS_MAX_DATASET_NAME_LEN]; + blkptr_t szcmv_bp; +} spa_zil_chain_map_value_t; + typedef struct spa_chain_map_os { avl_node_t scmo_avl; uint64_t scmo_id; @@ -244,6 +251,7 @@ typedef struct spa_chain_map_os { typedef struct spa_chain_map_pool { avl_node_t scmp_avl; uint64_t scmp_guid; + char scmp_name[ZFS_MAX_DATASET_NAME_LEN]; avl_tree_t scmp_os_tree; } spa_chain_map_pool_t; @@ -256,7 +264,7 @@ struct spa { avl_node_t spa_avl; /* node in spa_namespace_avl */ avl_node_t spa_log_avl; /* node in spa_shared_log_avl */ /* node in spa_registered_clients */ - avl_node_t spa_client_avl; + list_node_t spa_client_node; nvlist_t *spa_config; /* last synced config */ nvlist_t *spa_config_syncing; /* currently syncing config */ nvlist_t *spa_config_splitting; /* config for splitting */ @@ -279,6 +287,11 @@ struct spa { kthread_t *spa_export_thread; /* valid during pool export */ /* true if pool's log device is shared log */ boolean_t spa_uses_shared_log; + /* + * true if pool was imported with MISSING_LOGS and couldn't find + * its shared log pool + */ + boolean_t spa_discarding_shared_log; kthread_t *spa_load_thread; /* loading, no namespace lock */ metaslab_class_t *spa_normal_class; /* normal data class */ metaslab_class_t *spa_log_class; /* intent log data class */ @@ -338,7 +351,7 @@ struct spa { boolean_t spa_extreme_rewind; /* rewind past deferred frees */ kmutex_t spa_scrub_lock; /* resilver/scrub lock */ uint64_t spa_scrub_inflight; /* in-flight scrub bytes */ - boolean_t spa_pool_type; /* normal or object-based */ + spa_pool_type_t spa_pool_type; /* in-flight verification bytes */ uint64_t spa_load_verify_bytes; @@ -514,7 +527,7 @@ struct spa { /* Only used if type is shared log */ kmutex_t spa_chain_map_lock; avl_tree_t spa_chain_map; - avl_tree_t spa_registered_clients; + list_t spa_registered_clients; /* Only used during syncing context if using shared log */ kmutex_t spa_zil_map_lock; diff --git a/include/sys/zil.h b/include/sys/zil.h index 24fed4ab4863..89ff54143aba 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -581,6 +581,8 @@ extern void zil_commit_impl(zilog_t *zilog, uint64_t oid); extern void zil_remove_async(zilog_t *zilog, uint64_t oid); extern int zil_reset(const char *osname, void *txarg); +extern int zil_clear(struct dsl_pool *dp, + struct dsl_dataset *ds, void *txarg); extern int zil_claim(struct dsl_pool *dp, struct dsl_dataset *ds, void *txarg); extern int zil_check_log_chain(struct dsl_pool *dp, diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 979bbdd3809a..4f333f700419 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -239,6 +239,39 @@ zpool_pool_state_to_name(pool_state_t state) return (gettext("UNKNOWN")); } +struct shared_log_cbdata { + uint64_t guid; + zpool_handle_t *shared_log_pool; +}; + +static int +shared_log_cb(zpool_handle_t *hdl, void *arg) +{ + struct shared_log_cbdata *data = arg; + if (fnvlist_lookup_uint64(hdl->zpool_config, ZPOOL_CONFIG_POOL_GUID) == + data->guid) { + data->shared_log_pool = hdl; + } + return (0); +} + +zpool_handle_t * +zpool_get_shared_log(zpool_handle_t *zhp) +{ + uint64_t guid; + if (nvlist_lookup_uint64(zhp->zpool_config, + ZPOOL_CONFIG_SHARED_LOG_POOL, &guid) != 0) { + return (NULL); + } + struct shared_log_cbdata data; + data.guid = guid; + int err = zpool_iter(zhp->zpool_hdl, shared_log_cb, &data); + if (err != 0) { + return (NULL); + } + return (data.shared_log_pool); +} + /* * Given a pool handle, return the pool health string ("ONLINE", "DEGRADED", * "SUSPENDED", etc). @@ -265,6 +298,10 @@ zpool_get_state_str(zpool_handle_t *zhp) vdev_stat_t *vs = (vdev_stat_t *)fnvlist_lookup_uint64_array( nvroot, ZPOOL_CONFIG_VDEV_STATS, &vsc); str = zpool_state_to_name(vs->vs_state, vs->vs_aux); + zpool_handle_t *shared_log = zpool_get_shared_log(zhp); + if (vs->vs_state == VDEV_STATE_HEALTHY && shared_log != NULL) { + str = zpool_get_state_str(shared_log); + } } return (str); } @@ -1645,23 +1682,48 @@ zpool_destroy(zpool_handle_t *zhp, const char *log_str) libzfs_handle_t *hdl = zhp->zpool_hdl; char errbuf[ERRBUFLEN]; - if (zhp->zpool_state == POOL_STATE_ACTIVE && - (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL) - return (-1); + nvlist_t *outnvl; + int err = lzc_pool_destroy(zhp->zpool_name, log_str, &outnvl); + if (err == ZFS_ERR_IOC_CMD_UNAVAIL) { + if (zhp->zpool_state == POOL_STATE_ACTIVE && + (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) + == NULL) + return (-1); - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_history = (uint64_t)(uintptr_t)log_str; + (void) strlcpy(zc.zc_name, zhp->zpool_name, + sizeof (zc.zc_name)); + zc.zc_history = (uint64_t)(uintptr_t)log_str; + if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) + err = errno; + else + err = 0; + } - if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) { + if (err != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), zhp->zpool_name); - if (errno == EROFS) { + if (err == EROFS) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "one or more devices is read only")); (void) zfs_error(hdl, EZFS_BADDEV, errbuf); + } else if (err == EBUSY && outnvl != NULL) { + nvlist_t *clients = fnvlist_lookup_nvlist(outnvl, + ZPOOL_SHARED_LOG_CLIENTS); + nvpair_t *elem = nvlist_next_nvpair(clients, NULL); + char buf[ERRBUFLEN]; + int idx = snprintf(buf, ERRBUFLEN, "%s", + nvpair_name(elem)); + while ((elem = nvlist_next_nvpair(clients, elem)) + != NULL && idx < ERRBUFLEN) { + idx += snprintf(buf + idx, ERRBUFLEN - idx, + ", %s", nvpair_name(elem)); + } + zfs_error_aux(hdl, "pool has active clients: %s", buf); + (void) zfs_error(hdl, EZFS_BUSY, errbuf); + fnvlist_free(outnvl); } else { - (void) zpool_standard_error(hdl, errno, errbuf); + (void) zpool_standard_error(hdl, err, errbuf); } if (zfp) @@ -1839,27 +1901,52 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce, { zfs_cmd_t zc = {"\0"}; - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_cookie = force; - zc.zc_guid = hardforce; - zc.zc_history = (uint64_t)(uintptr_t)log_str; + nvlist_t *outnvl; + int err = lzc_pool_export(zhp->zpool_name, log_str, force, hardforce, + &outnvl); + if (err == ZFS_ERR_IOC_CMD_UNAVAIL) { - if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) { - switch (errno) { - case EXDEV: - zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, - "use '-f' to override the following errors:\n" - "'%s' has an active shared spare which could be" - " used by other pools once '%s' is exported."), - zhp->zpool_name, zhp->zpool_name); - return (zfs_error_fmt(zhp->zpool_hdl, EZFS_ACTIVE_SPARE, - dgettext(TEXT_DOMAIN, "cannot export '%s'"), - zhp->zpool_name)); - default: - return (zpool_standard_error_fmt(zhp->zpool_hdl, errno, - dgettext(TEXT_DOMAIN, "cannot export '%s'"), - zhp->zpool_name)); + (void) strlcpy(zc.zc_name, zhp->zpool_name, + sizeof (zc.zc_name)); + zc.zc_cookie = force; + zc.zc_guid = hardforce; + zc.zc_history = (uint64_t)(uintptr_t)log_str; + + if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) + err = errno; + else + err = 0; + } + + if (err == EXDEV) { + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "use '-f' to override the following errors:\n" + "'%s' has an active shared spare which could be" + " used by other pools once '%s' is exported."), + zhp->zpool_name, zhp->zpool_name); + return (zfs_error_fmt(zhp->zpool_hdl, EZFS_ACTIVE_SPARE, + dgettext(TEXT_DOMAIN, "cannot export '%s'"), + zhp->zpool_name)); + } else if (err == EBUSY && outnvl != NULL) { + libzfs_handle_t *hdl = zhp->zpool_hdl; + nvlist_t *clients = fnvlist_lookup_nvlist(outnvl, + ZPOOL_SHARED_LOG_CLIENTS); + nvpair_t *elem = nvlist_next_nvpair(clients, NULL); + char buf[ERRBUFLEN]; + int idx = snprintf(buf, ERRBUFLEN, "%s", nvpair_name(elem)); + while ((elem = nvlist_next_nvpair(clients, elem)) != NULL && + idx < ERRBUFLEN) { + idx += snprintf(buf + idx, ERRBUFLEN - idx, ", %s", + nvpair_name(elem)); } + fnvlist_free(outnvl); + zfs_error_aux(hdl, "pool has active clients: %s", buf); + return (zfs_error_fmt(hdl, EZFS_BUSY, dgettext(TEXT_DOMAIN, + "cannot export '%s'"), zhp->zpool_name)); + } else if (err != 0) { + return (zpool_standard_error_fmt(zhp->zpool_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot export '%s'"), + zhp->zpool_name)); } return (0); @@ -2292,6 +2379,11 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, "the maximum allowable length")); (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc); break; + case ESRCH: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "shared log pool no longer contains this client")); + (void) zfs_error(hdl, EZFS_NOENT, desc); + break; default: (void) zpool_standard_error(hdl, error, desc); zpool_explain_recover(hdl, diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c index a2259eee91ca..802c488ee3e3 100644 --- a/lib/libzfs/libzfs_status.c +++ b/lib/libzfs/libzfs_status.c @@ -510,6 +510,10 @@ zpool_get_status(zpool_handle_t *zhp, const char **msgid, zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata, compatibility); + if (ret == ZPOOL_STATUS_OK && zpool_get_shared_log(zhp)) { + ret = check_status(zpool_get_shared_log(zhp)->zpool_config, + B_FALSE, errata, compatibility); + } if (msgid != NULL) { if (ret >= NMSGID) diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index 36993f2bc9c2..f20afc1f7f48 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -1909,11 +1909,37 @@ lzc_get_bootenv(const char *pool, nvlist_t **outnvl) } int -lzc_recycle(const char *pool, boolean_t dryrun, nvlist_t **outnvl) +lzc_recycle(const char *pool, nvlist_t *clients, boolean_t dryrun, + nvlist_t **outnvl) { nvlist_t *args = fnvlist_alloc(); fnvlist_add_boolean_value(args, ZPOOL_RECYCLE_DRYRUN, dryrun); + if (clients != NULL) + fnvlist_add_nvlist(args, ZPOOL_RECYCLE_CLIENTS, clients); int err = lzc_ioctl(ZFS_IOC_POOL_RECYCLE, pool, args, outnvl); fnvlist_free(args); return (err); } + +int +lzc_pool_destroy(const char *pool, const char *log_str, nvlist_t **outnvl) +{ + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_string(args, ZPOOL_HIST_CMD, log_str); + int err = lzc_ioctl(ZFS_IOC_POOL_DESTROY_NEW, pool, args, outnvl); + fnvlist_free(args); + return (err); +} + +int +lzc_pool_export(const char *pool, const char *log_str, boolean_t force, + boolean_t hardforce, nvlist_t **outnvl) +{ + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_string(args, ZPOOL_HIST_CMD, log_str); + fnvlist_add_boolean_value(args, ZPOOL_EXPORT_FORCE, force); + fnvlist_add_boolean_value(args, ZPOOL_EXPORT_HARDFORCE, hardforce); + int err = lzc_ioctl(ZFS_IOC_POOL_EXPORT_NEW, pool, args, outnvl); + fnvlist_free(args); + return (err); +} diff --git a/man/man7/zpoolconcepts.7 b/man/man7/zpoolconcepts.7 index ca3fb8379357..8cb3f41e38d5 100644 --- a/man/man7/zpoolconcepts.7 +++ b/man/man7/zpoolconcepts.7 @@ -180,7 +180,7 @@ For more information, see the section. .It Sy shared log A separate ZFS storage pool used as a shared intent log device. -Only one shared log can specified at pool creation or import, and a normal log +Only one shared log can be specified at pool creation or import, and a normal log device cannot also be specified. For more information, see the .Sx Intent Log diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 8db01f35bd6d..54987e2e46ab 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -170,6 +170,9 @@ static int zfs_zil_clean_taskq_nthr_pct = 100; static int zfs_zil_clean_taskq_minalloc = 1024; static int zfs_zil_clean_taskq_maxalloc = 1024 * 1024; +static unsigned int chain_map_zap_default_bs = 17; +static unsigned int chain_map_zap_default_ibs = 15; + int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) { @@ -559,7 +562,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops __attribute__((unused)), if (spa_is_shared_log(spa)) { dp->dp_chain_map_obj = zap_create_flags(dp->dp_meta_objset, 0, ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY | - ZAP_FLAG_PRE_HASHED_KEY, DMU_OTN_ZAP_METADATA, 0, 0, + ZAP_FLAG_PRE_HASHED_KEY, DMU_OTN_ZAP_METADATA, + chain_map_zap_default_bs, chain_map_zap_default_ibs, DMU_OT_NONE, 0, tx); VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHAIN_MAP_OBJ, sizeof (uint64_t), 1, diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 9daa9a37f25a..3e3052e6b64d 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1667,6 +1667,19 @@ get_shared_log_pool(nvlist_t *config, spa_t **out) mutex_enter(&result->spa_chain_map_lock); mutex_exit(&spa_shared_log_lock); *out = result; + + avl_tree_t *t = &result->spa_chain_map; + spa_chain_map_pool_t *search_scmp = kmem_zalloc(sizeof (*search_scmp), + KM_SLEEP); + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid)) + return (0); + + search_scmp->scmp_guid = guid; + spa_chain_map_pool_t *result_scmp = avl_find(t, search_scmp, NULL); + kmem_free(search_scmp, sizeof (*search_scmp)); + if (!result_scmp) { + return (ESRCH); + } return (0); } @@ -1676,22 +1689,29 @@ extern metaslab_ops_t *metaslab_allocator(spa_t *shared_log); * Activate an uninitialized pool. */ static int -spa_activate(spa_t *spa, nvlist_t *config, spa_mode_t mode) +spa_activate(spa_t *spa, nvlist_t *config, spa_mode_t mode, boolean_t creating) { metaslab_ops_t *msp = metaslab_allocator(spa); ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); + boolean_t missing_logs = spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG; int error = 0; spa_t *shared_log = NULL; if (strcmp(spa->spa_name, TRYIMPORT_NAME) != 0 && (error = get_shared_log_pool(config, &shared_log)) != 0) { - // We handle this case in spa_check_for_missing_logs - if (error == ENOENT && - (spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { + // We handle the ENOENT case in spa_check_for_missing_logs + if (missing_logs && (error == ENOENT || error == ESRCH)) { + spa->spa_discarding_shared_log = B_TRUE; error = 0; - } else { - return (error); } + if (error == ESRCH) { + if (creating) + error = 0; + else + mutex_exit(&shared_log->spa_chain_map_lock); + } + if (error) + return (error); } spa->spa_state = POOL_STATE_ACTIVE; @@ -1700,7 +1720,7 @@ spa_activate(spa_t *spa, nvlist_t *config, spa_mode_t mode) spa->spa_normal_class = metaslab_class_create(spa, msp); if (shared_log != NULL) { - avl_add(&shared_log->spa_registered_clients, spa); + list_insert_tail(&shared_log->spa_registered_clients, spa); mutex_exit(&shared_log->spa_chain_map_lock); spa->spa_log_class = metaslab_class_create(spa, @@ -1782,6 +1802,12 @@ spa_activate(spa_t *spa, nvlist_t *config, spa_mode_t mode) avl_create(&spa->spa_zil_map, spa_zil_update_head_compare, sizeof (spa_zil_update_head_t), offsetof(spa_zil_update_head_t, szuh_avl)); + if (spa->spa_uses_shared_log) { + spa_zil_update_head_t *entry = kmem_zalloc(sizeof (*entry), + KM_SLEEP); + entry->szuh_force = B_TRUE; + avl_add(&spa->spa_zil_map, entry); + } spa_activate_os(spa); @@ -1902,7 +1928,7 @@ spa_deactivate(spa_t *spa) spa_t *shared_log; if ((shared_log = spa_get_shared_log_pool(spa)) != NULL) { mutex_enter(&shared_log->spa_chain_map_lock); - avl_remove(&shared_log->spa_registered_clients, spa); + list_remove(&shared_log->spa_registered_clients, spa); mutex_exit(&shared_log->spa_chain_map_lock); } metaslab_class_destroy(spa->spa_log_class); @@ -2597,7 +2623,6 @@ static int spa_check_for_missing_logs(spa_t *spa) { vdev_t *rvd = spa->spa_root_vdev; - uint64_t guid; /* * If we're doing a normal import, then build up any additional @@ -2644,10 +2669,7 @@ spa_check_for_missing_logs(spa_t *spa) vdev_dbgmsg_print_tree(rvd, 2); return (SET_ERROR(ENXIO)); } - } else if (nvlist_lookup_uint64(spa->spa_config, - ZPOOL_CONFIG_SHARED_LOG_POOL, &guid)) { - if (spa_uses_shared_log(spa)) - return (0); + } else if (spa->spa_discarding_shared_log) { spa_set_log_state(spa, SPA_LOG_CLEAR); spa_load_note(spa, "shared log pool is " "missing, ZIL is dropped."); @@ -4048,6 +4070,7 @@ spa_ld_parse_config(spa_t *spa, spa_import_type_t type) parse = (type == SPA_IMPORT_EXISTING ? VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); error = spa_config_parse(spa, &rvd, nvtree, NULL, 0, parse); + spa_set_pool_type(spa); spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) { @@ -4426,6 +4449,7 @@ spa_ld_trusted_config(spa_t *spa, spa_import_type_t type, error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); } + spa_set_pool_type(spa); /* * Vdev paths in the MOS may be obsolete. If the untrusted config was @@ -4760,7 +4784,7 @@ load_chain_map_cb(void *arg) kmem_free(lcmca, sizeof (*lcmca)); } -static int +noinline static int spa_load_chain_map(spa_t *spa) { int error = 0; @@ -4773,34 +4797,49 @@ spa_load_chain_map(spa_t *spa) zap_cursor_t zc; zap_attribute_t attr; objset_t *os = spa->spa_dsl_pool->dp_meta_objset; + spa_zil_chain_map_value_t *szcmv = kmem_alloc(sizeof (*szcmv), + KM_SLEEP); for (zap_cursor_init(&zc, os, chain_map_zap); zap_cursor_retrieve(&zc, &attr) == 0; zap_cursor_advance(&zc)) { uint64_t pool_guid = ((uint64_t *)attr.za_name)[0]; uint64_t os_guid = ((uint64_t *)attr.za_name)[1]; + error = zap_lookup_uint64(os, chain_map_zap, + (uint64_t *)attr.za_name, 2, sizeof (uint64_t), + sizeof (*szcmv) / sizeof (uint64_t), + szcmv); + if (error != 0) { + break; + } avl_index_t where; spa_chain_map_pool_t search; search.scmp_guid = pool_guid; - spa_chain_map_pool_t *pool_entry = avl_find(&spa->spa_chain_map, - &search, &where); + spa_chain_map_pool_t *pool_entry = + avl_find(&spa->spa_chain_map, &search, &where); if (pool_entry == NULL) { - pool_entry = kmem_alloc(sizeof (*pool_entry), KM_SLEEP); + pool_entry = kmem_alloc(sizeof (*pool_entry), + KM_SLEEP); pool_entry->scmp_guid = pool_guid; avl_create(&pool_entry->scmp_os_tree, spa_chain_map_os_compare, sizeof (spa_chain_map_os_t), offsetof(spa_chain_map_os_t, scmo_avl)); + strlcpy(pool_entry->scmp_name, + szcmv->szcmv_pool_name, ZFS_MAX_DATASET_NAME_LEN); avl_insert(&spa->spa_chain_map, pool_entry, where); } + + if (os_guid == 0) { + /* + * This is the dummy marker to make sure we know about + * the pool; no need to add an os-specific entry + */ + continue; + } + spa_chain_map_os_t *os_entry = kmem_alloc(sizeof (*os_entry), KM_SLEEP); os_entry->scmo_id = os_guid; - error = zap_lookup_uint64(os, chain_map_zap, - (uint64_t *)attr.za_name, 2, sizeof (uint64_t), - sizeof (blkptr_t) / sizeof (uint64_t), - &os_entry->scmo_chain_head); - if (error != 0) { - break; - } + os_entry->scmo_chain_head = szcmv->szcmv_bp; avl_add(&pool_entry->scmp_os_tree, os_entry); struct load_chain_map_arg *arg = kmem_alloc(sizeof (*arg), KM_SLEEP); @@ -4810,6 +4849,7 @@ spa_load_chain_map(spa_t *spa) (void) taskq_dispatch(spa->spa_chain_map_taskq, load_chain_map_cb, arg, TQ_SLEEP); } + kmem_free(szcmv, sizeof (*szcmv)); if (error != 0) { void *cookie = NULL; @@ -5239,6 +5279,13 @@ spa_ld_claim_log_blocks(spa_t *spa) (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj, zil_claim, tx, DS_FIND_CHILDREN); dmu_tx_commit(tx); + } else if (spa_get_log_state(spa) == SPA_LOG_CLEAR) { + ASSERT(spa->spa_discarding_shared_log); + tx = dmu_tx_create_assigned(dp, spa_first_txg(spa)); + (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj, + zil_clear, tx, DS_FIND_CHILDREN); + dmu_tx_commit(tx); + spa->spa_discarding_shared_log = B_FALSE; } spa->spa_claiming = B_FALSE; @@ -5286,7 +5333,7 @@ spa_ld_prepare_for_reload(spa_t *spa) spa_unload(spa); spa_deactivate(spa); - VERIFY0(spa_activate(spa, spa->spa_config, mode)); + VERIFY0(spa_activate(spa, spa->spa_config, mode, B_FALSE)); /* * We save the value of spa_async_suspended as it gets reset to 0 by @@ -5864,7 +5911,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state) spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1; - VERIFY0(spa_activate(spa, spa->spa_config, mode)); + VERIFY0(spa_activate(spa, spa->spa_config, mode, B_FALSE)); spa_async_suspend(spa); spa_load_note(spa, "spa_load_retry: rewind, max txg: %llu", @@ -6031,7 +6078,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag, if (policy.zlp_rewind & ZPOOL_DO_REWIND) state = SPA_LOAD_RECOVER; - error = spa_activate(spa, spa->spa_config, spa_mode_global); + error = spa_activate(spa, spa->spa_config, spa_mode_global, B_FALSE); if (error != 0) { spa_remove(spa); if (locked) @@ -6641,7 +6688,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); spa = spa_add(poolname, nvl, altroot); fnvlist_free(nvl); - error = spa_activate(spa, nvroot, spa_mode_global); + error = spa_activate(spa, nvroot, spa_mode_global, B_TRUE); if (error != 0) { spa_remove(spa); mutex_exit(&spa_namespace_lock); @@ -6683,15 +6730,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, } } - if (!has_shared_log && spa_uses_shared_log(spa)) { - spa_deactivate(spa); - spa_remove(spa); - mutex_exit(&spa_namespace_lock); - return (SET_ERROR(ENOTSUP)); - } - - if (!has_shared_log && fnvlist_lookup_boolean(nvroot, - ZPOOL_CONFIG_IS_SHARED_LOG)) { + if (!has_shared_log && (spa_uses_shared_log(spa) || + fnvlist_lookup_boolean(nvroot, ZPOOL_CONFIG_IS_SHARED_LOG))) { spa_deactivate(spa); spa_remove(spa); mutex_exit(&spa_namespace_lock); @@ -6748,6 +6788,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); + if (error == 0) + spa_set_pool_type(spa); ASSERT(error != 0 || rvd != NULL); ASSERT(error != 0 || spa->spa_root_vdev == rvd); @@ -6988,7 +7030,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) return (0); } - error = spa_activate(spa, config, mode); + error = spa_activate(spa, config, mode, B_FALSE); if (error != 0) { spa_remove(spa); mutex_exit(&spa_namespace_lock); @@ -7149,7 +7191,17 @@ spa_tryimport(nvlist_t *tryconfig) mutex_enter(&spa_namespace_lock); spa = spa_add(name, tryconfig, NULL); kmem_free(name, MAXPATHLEN); - error = spa_activate(spa, tryconfig, SPA_MODE_READ); + + /* + * spa_import() relies on a pool config fetched by spa_try_import() + * for spare/cache devices. Import flags are not passed to + * spa_tryimport(), which makes it return early due to a missing log + * device and missing retrieving the cache device and spare eventually. + * Passing ZFS_IMPORT_MISSING_LOG to spa_tryimport() makes it fetch + * the correct configuration regardless of the missing log device. + */ + spa->spa_import_flags |= ZFS_IMPORT_MISSING_LOG; + error = spa_activate(spa, tryconfig, SPA_MODE_READ, B_FALSE); if (error != 0) { spa_remove(spa); mutex_exit(&spa_namespace_lock); @@ -7177,16 +7229,6 @@ spa_tryimport(nvlist_t *tryconfig) spa->spa_config_source = SPA_CONFIG_SRC_SCAN; } - /* - * spa_import() relies on a pool config fetched by spa_try_import() - * for spare/cache devices. Import flags are not passed to - * spa_tryimport(), which makes it return early due to a missing log - * device and missing retrieving the cache device and spare eventually. - * Passing ZFS_IMPORT_MISSING_LOG to spa_tryimport() makes it fetch - * the correct configuration regardless of the missing log device. - */ - spa->spa_import_flags |= ZFS_IMPORT_MISSING_LOG; - error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING); /* @@ -7207,7 +7249,6 @@ spa_tryimport(nvlist_t *tryconfig) uint64_t shared_log_guid; if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_SHARED_LOG_POOL, &shared_log_guid) == 0) { - zfs_dbgmsg("in tryimport: got %llu", (unsigned long long) shared_log_guid); fnvlist_add_uint64(config, ZPOOL_CONFIG_SHARED_LOG_POOL, shared_log_guid); } @@ -7273,7 +7314,7 @@ spa_tryimport(nvlist_t *tryconfig) */ static int spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, - boolean_t force, boolean_t hardforce) + boolean_t force, boolean_t hardforce, nvlist_t *outnvl) { int error = 0; spa_t *spa; @@ -7293,7 +7334,19 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, if (spa_is_shared_log(spa)) { mutex_enter(&spa->spa_chain_map_lock); - if (avl_numnodes(&spa->spa_registered_clients) != 0) { + if (!list_is_empty(&spa->spa_registered_clients)) { + if (outnvl != NULL) { + spa_t *client; + list_t *l = &spa->spa_registered_clients; + nvlist_t *clients = fnvlist_alloc(); + for (client = list_head(l); client != NULL; + client = list_next(l, client)) { + fnvlist_add_boolean(clients, + spa_name(client)); + } + fnvlist_add_nvlist(outnvl, + ZPOOL_SHARED_LOG_CLIENTS, clients); + } mutex_exit(&spa->spa_chain_map_lock); mutex_exit(&spa_namespace_lock); return (SET_ERROR(EBUSY)); @@ -7477,10 +7530,10 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, * Destroy a storage pool. */ int -spa_destroy(const char *pool) +spa_destroy(const char *pool, nvlist_t *outnvl) { return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, - B_FALSE, B_FALSE)); + B_FALSE, B_FALSE, outnvl)); } /* @@ -7488,10 +7541,10 @@ spa_destroy(const char *pool) */ int spa_export(const char *pool, nvlist_t **oldconfig, boolean_t force, - boolean_t hardforce) + boolean_t hardforce, nvlist_t *outnvl) { return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, - force, hardforce)); + force, hardforce, outnvl)); } /* @@ -7502,7 +7555,7 @@ int spa_reset(const char *pool) { return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, - B_FALSE, B_FALSE)); + B_FALSE, B_FALSE, NULL)); } /* @@ -8778,7 +8831,7 @@ spa_vdev_split_mirror(spa_t *spa, const char *newname, nvlist_t *config, if (zio_injection_enabled) zio_handle_panic_injection(spa, FTAG, 1); - VERIFY0(spa_activate(newspa, config, spa_mode_global)); + VERIFY0(spa_activate(newspa, config, spa_mode_global, B_TRUE)); spa_async_suspend(newspa); /* @@ -8897,7 +8950,7 @@ spa_vdev_split_mirror(spa_t *spa, const char *newname, nvlist_t *config, /* if we're not going to mount the filesystems in userland, export */ if (exp) error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL, - B_FALSE, B_FALSE); + B_FALSE, B_FALSE, NULL); return (error); @@ -11399,6 +11452,7 @@ spa_chain_map_update(spa_t *spa) spa_chain_map_os_compare, sizeof (spa_chain_map_os_t), offsetof(spa_chain_map_os_t, scmo_avl)); + strcpy(pool_entry->scmp_name, spa_name(spa)); avl_insert(&target->spa_chain_map, pool_entry, where); } avl_tree_t *target_tree = &pool_entry->scmp_os_tree; @@ -11419,17 +11473,37 @@ spa_chain_map_update(spa_t *spa) list_create(&local_frees, sizeof (spa_zil_update_t), offsetof(spa_zil_update_t, szu_list)); spa_zil_update_head_t *node; - uint64_t buf[2]; - buf[0] = spa_guid(spa); - for (node = avl_first(t); node; node = AVL_NEXT(t, node)) { + uint64_t keybuf[2]; + keybuf[0] = spa_guid(spa); + spa_zil_chain_map_value_t szcmv = {0}; + strcpy(szcmv.szcmv_pool_name, spa_name(spa)); + for (node = avl_first(t); node; ) { uint64_t guid = node->szuh_id; + + if (node->szuh_force) { + ASSERT0(node->szuh_id); + ASSERT(BP_IS_HOLE(&node->szuh_chain_head)); + keybuf[1] = 0; + int res = zap_add_uint64(target_mos, chain_map_zap, + keybuf, sizeof (keybuf) / sizeof (uint64_t), + sizeof (uint64_t), sizeof (szcmv) / + sizeof (uint64_t), (uint64_t *)&szcmv, tx); + IMPLY(res != 0, res == EEXIST); + spa_zil_update_head_t *next = AVL_NEXT(t, node); + avl_remove(t, node); + kmem_free(node, sizeof (*node)); + node = next; + continue; + } + list_t *l = &node->szuh_list; spa_zil_update_t *szu = list_head(l); if (!node->szuh_set || szu == NULL || BP_IS_HOLE(&node->szuh_chain_head)) { + node = AVL_NEXT(t, node); continue; } - buf[1] = guid; + keybuf[1] = guid; spa_chain_map_os_t osearch; osearch.scmo_id = guid; spa_chain_map_os_t *os_entry = avl_find(target_tree, @@ -11459,18 +11533,21 @@ spa_chain_map_update(spa_t *spa) os_entry->scmo_id = guid; os_entry->scmo_chain_head = node->szuh_chain_head; avl_insert(&pool_entry->scmp_os_tree, os_entry, where); - blkptr_t *bp = &os_entry->scmo_chain_head; + szcmv.szcmv_bp = os_entry->scmo_chain_head; - zap_add_uint64(target_mos, chain_map_zap, buf, 2, - sizeof (uint64_t), sizeof (*bp) / sizeof (uint64_t), - bp, tx); + VERIFY0(zap_add_uint64(target_mos, chain_map_zap, + keybuf, sizeof (keybuf) / sizeof (uint64_t), + sizeof (uint64_t), sizeof (szcmv) / + sizeof (uint64_t), (uint64_t *)&szcmv, tx)); } else { os_entry->scmo_chain_head = node->szuh_chain_head; - blkptr_t *bp = &os_entry->scmo_chain_head; - zap_update_uint64(target_mos, chain_map_zap, buf, 2, - sizeof (uint64_t), sizeof (*bp) / sizeof (uint64_t), - bp, tx); + szcmv.szcmv_bp = os_entry->scmo_chain_head; + VERIFY0(zap_update_uint64(target_mos, chain_map_zap, + keybuf, sizeof (keybuf) / sizeof (uint64_t), + sizeof (uint64_t), sizeof (szcmv) / + sizeof (uint64_t), (uint64_t *)&szcmv, tx)); } + node = AVL_NEXT(t, node); } /* @@ -11496,9 +11573,10 @@ spa_chain_map_update(spa_t *spa) avl_remove(target_tree, tree_entry); kmem_free(tree_entry, sizeof (*tree_entry)); - buf[1] = entry->zde_guid; + keybuf[1] = entry->zde_guid; kmem_free(entry, sizeof (*entry)); - zap_remove_uint64(target_mos, chain_map_zap, buf, 2, tx); + VERIFY0(zap_remove_uint64(target_mos, chain_map_zap, keybuf, 2, + tx)); } mutex_exit(&target->spa_chain_map_lock); @@ -11588,55 +11666,111 @@ spa_zil_header_mask(spa_t *spa, blkptr_t *bp) *bp = masked; } +static int +spa_recycle_one(spa_t *spa, spa_chain_map_pool_t *entry, boolean_t dryrun, + nvlist_t *outnvl) +{ + int err = 0; + uint64_t guid = entry->scmp_guid; + spa_t *search = kmem_zalloc(sizeof (spa_t), KM_SLEEP); + search->spa_config_guid = guid; + + spa_t *client; + list_t *l = &spa->spa_registered_clients; + for (client = list_head(l); client != NULL; + client = list_next(l, client)) { + if (spa_const_guid(client) == entry->scmp_guid) + break; + } + if (!client) { + fnvlist_add_uint64(outnvl, entry->scmp_name, guid); + } + if (dryrun || client) { + return (err); + } + + uint64_t chain_map_zap = spa->spa_dsl_pool->dp_chain_map_obj; + dmu_tx_t *tx = dmu_tx_create_mos(spa->spa_dsl_pool); + dmu_tx_hold_zap(tx, chain_map_zap, B_TRUE, NULL); + dmu_tx_assign(tx, TXG_WAIT); + uint64_t keybuf[2]; + keybuf[0] = entry->scmp_guid; + + avl_tree_t *os_tree = &entry->scmp_os_tree; + spa_chain_map_os_t *os = NULL; + void *cookie = NULL; + while ((os = avl_destroy_nodes(os_tree, &cookie))) { + struct spa_chain_map_free_cb_arg arg; + arg.smcfca_end = NULL; + arg.smcfca_guid = os->scmo_id; + arg.smcfca_txg = spa->spa_syncing_txg; + (void) zil_parse_raw(spa, &os->scmo_chain_head, + spa_chain_map_free_blk_cb, spa_chain_map_free_lr_cb, &arg); + + keybuf[1] = os->scmo_id; + zap_remove_uint64(spa->spa_dsl_pool->dp_meta_objset, + chain_map_zap, keybuf, sizeof (keybuf) / sizeof (uint64_t), + tx); + kmem_free(os, sizeof (*os)); + } + dmu_tx_commit(tx); + avl_destroy(&entry->scmp_os_tree); + kmem_free(search, sizeof (*search)); + + avl_remove(&spa->spa_chain_map, entry); + kmem_free(entry, sizeof (*entry)); + return (err); +} + int -spa_recycle(spa_t *spa, boolean_t dryrun, nvlist_t *outnvl) +spa_recycle_all(spa_t *spa, boolean_t dryrun, nvlist_t *outnvl) { int err = 0; if (!spa_is_shared_log(spa)) { return (SET_ERROR(ENOTSUP)); } - - spa_t *search = kmem_zalloc(sizeof (spa_t), KM_SLEEP); mutex_enter(&spa->spa_chain_map_lock); avl_tree_t *t = &spa->spa_chain_map; spa_chain_map_pool_t *entry = avl_first(t); while (entry != NULL) { - uint64_t guid = entry->scmp_guid; - search->spa_config_guid = guid; - spa_t *client = avl_find(&spa->spa_registered_clients, search, - NULL); spa_chain_map_pool_t *next = AVL_NEXT(t, entry); - if (!client) { - char buf[64]; - snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)guid); - fnvlist_add_boolean(outnvl, buf); - } - if (dryrun || client) { - entry = next; - continue; - } - avl_tree_t *os_tree = &entry->scmp_os_tree; - spa_chain_map_os_t *os = NULL; - void *cookie = NULL; - while ((os = avl_destroy_nodes(os_tree, &cookie))) { - struct spa_chain_map_free_cb_arg arg; - arg.smcfca_end = NULL; - arg.smcfca_guid = os->scmo_id; - arg.smcfca_txg = spa->spa_syncing_txg; - int this_err = zil_parse_raw(spa, &os->scmo_chain_head, - spa_chain_map_free_blk_cb, - spa_chain_map_free_lr_cb, &arg); - if (this_err != 0 && err == 0) - err = this_err; - kmem_free(os, sizeof (*os)); - } - avl_remove(t, entry); - avl_destroy(&entry->scmp_os_tree); - kmem_free(entry, sizeof (*entry)); + + int this_err = spa_recycle_one(spa, entry, dryrun, outnvl); + if (this_err != 0 && err == 0) + err = this_err; + entry = next; } mutex_exit(&spa->spa_chain_map_lock); - kmem_free(search, sizeof (*search)); + return (err); +} + +int +spa_recycle_clients(spa_t *spa, nvlist_t *clients, boolean_t dryrun, + nvlist_t *outnvl) +{ + int err = 0; + if (!spa_is_shared_log(spa)) { + return (SET_ERROR(ENOTSUP)); + } + mutex_enter(&spa->spa_chain_map_lock); + for (nvpair_t *pair = nvlist_next_nvpair(clients, NULL); + pair != NULL; pair = nvlist_next_nvpair(clients, pair)) { + avl_tree_t *t = &spa->spa_chain_map; + spa_chain_map_pool_t *entry = avl_first(t); + while (entry != NULL) { + spa_chain_map_pool_t *next = AVL_NEXT(t, entry); + + if (strcmp(entry->scmp_name, nvpair_name(pair)) != 0) { + entry = next; + continue; + } + + err = spa_recycle_one(spa, entry, dryrun, outnvl); + break; + } + } + mutex_exit(&spa->spa_chain_map_lock); return (err); } diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index ce587513a073..912cf11e2b9c 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -482,6 +482,7 @@ spa_uses_shared_log(const spa_t *spa) return (spa->spa_uses_shared_log); } + /* * ========================================================================== * SPA config locking @@ -825,8 +826,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) sizeof (metaslab_t), offsetof(metaslab_t, ms_spa_txg_node)); avl_create(&spa->spa_sm_logs_by_txg, spa_log_sm_sort_by_txg, sizeof (spa_log_sm_t), offsetof(spa_log_sm_t, sls_node)); - avl_create(&spa->spa_registered_clients, spa_guid_compare, - sizeof (spa_t), offsetof(spa_t, spa_client_avl)); + list_create(&spa->spa_registered_clients, sizeof (spa_t), + offsetof(spa_t, spa_client_node)); list_create(&spa->spa_log_summary, sizeof (log_summary_entry_t), offsetof(log_summary_entry_t, lse_node)); @@ -931,7 +932,7 @@ spa_remove(spa_t *spa) avl_destroy(&spa->spa_metaslabs_by_flushed); avl_destroy(&spa->spa_sm_logs_by_txg); - avl_destroy(&spa->spa_registered_clients); + list_destroy(&spa->spa_registered_clients); list_destroy(&spa->spa_log_summary); list_destroy(&spa->spa_config_list); list_destroy(&spa->spa_leaf_list); diff --git a/module/zfs/zap_leaf.c b/module/zfs/zap_leaf.c index 032aca92695e..a5f15cf0a2ee 100644 --- a/module/zfs/zap_leaf.c +++ b/module/zfs/zap_leaf.c @@ -332,7 +332,7 @@ zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) { uint64_t *thiskey = kmem_alloc(array_numints * sizeof (*thiskey), KM_SLEEP); - ASSERT(zn->zn_key_intlen == sizeof (*thiskey)); + ASSERT3S(zn->zn_key_intlen, ==, sizeof (*thiskey)); zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints, sizeof (*thiskey), array_numints, thiskey); @@ -342,7 +342,7 @@ zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, return (match); } - ASSERT(zn->zn_key_intlen == 1); + ASSERT3S(zn->zn_key_intlen, ==, 1); if (zn->zn_matchtype & MT_NORMALIZE) { char *thisname = kmem_alloc(array_numints, KM_SLEEP); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index e5ca35cc8a9b..1d224c3434f2 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -353,19 +353,24 @@ zpl_earlier_version(const char *name, int version) } static void -zfs_log_history(zfs_cmd_t *zc) +zfs_log_history_string(const char *pool, const char *buf) { spa_t *spa; - char *buf; - - if ((buf = history_str_get(zc)) == NULL) - return; - - if (spa_open(zc->zc_name, &spa, FTAG) == 0) { + if (spa_open(pool, &spa, FTAG) == 0) { if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY) (void) spa_history_log(spa, buf); spa_close(spa, FTAG); } +} + +static void +zfs_log_history(zfs_cmd_t *zc) +{ + char *buf; + + if ((buf = history_str_get(zc)) == NULL) + return; + zfs_log_history_string(zc->zc_name, buf); history_str_free(buf); } @@ -1502,7 +1507,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) */ if (!error && (error = zfs_set_prop_nvlist(spa_name, ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) { - (void) spa_destroy(spa_name); + (void) spa_destroy(spa_name, NULL); unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */ } @@ -1521,7 +1526,23 @@ zfs_ioc_pool_destroy(zfs_cmd_t *zc) { int error; zfs_log_history(zc); - error = spa_destroy(zc->zc_name); + error = spa_destroy(zc->zc_name, NULL); + + return (error); +} + +static const zfs_ioc_key_t zfs_keys_pool_destroy_new[] = { + {ZPOOL_HIST_CMD, DATA_TYPE_STRING, 0}, +}; + +static int +zfs_ioc_pool_destroy_new(const char *pool, nvlist_t *innvl, nvlist_t *outnvl) +{ + int error; + + zfs_log_history_string(pool, fnvlist_lookup_string(innvl, + ZPOOL_HIST_CMD)); + error = spa_destroy(pool, outnvl); return (error); } @@ -1571,7 +1592,28 @@ zfs_ioc_pool_export(zfs_cmd_t *zc) boolean_t hardforce = (boolean_t)zc->zc_guid; zfs_log_history(zc); - error = spa_export(zc->zc_name, NULL, force, hardforce); + error = spa_export(zc->zc_name, NULL, force, hardforce, NULL); + + return (error); +} + +static const zfs_ioc_key_t zfs_keys_pool_export_new[] = { + {ZPOOL_HIST_CMD, DATA_TYPE_STRING, 0}, + {ZPOOL_EXPORT_FORCE, DATA_TYPE_BOOLEAN_VALUE, 0}, + {ZPOOL_EXPORT_HARDFORCE, DATA_TYPE_BOOLEAN_VALUE, 0}, +}; + +static int +zfs_ioc_pool_export_new(const char *pool, nvlist_t *innvl, nvlist_t *outnvl) +{ + int error; + + zfs_log_history_string(pool, + fnvlist_lookup_string(innvl, ZPOOL_HIST_CMD)); + error = spa_export(pool, NULL, + fnvlist_lookup_boolean_value(innvl, ZPOOL_EXPORT_FORCE), + fnvlist_lookup_boolean_value(innvl, ZPOOL_EXPORT_HARDFORCE), + outnvl); return (error); } @@ -7040,6 +7082,7 @@ zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl) static const zfs_ioc_key_t zfs_keys_pool_recycle[] = { {ZPOOL_RECYCLE_DRYRUN, DATA_TYPE_BOOLEAN_VALUE, 0}, + {ZPOOL_RECYCLE_CLIENTS, DATA_TYPE_NVLIST, ZK_OPTIONAL}, }; static int @@ -7048,6 +7091,7 @@ zfs_ioc_pool_recycle(const char *pool, nvlist_t *innvl, nvlist_t *outnvl) int err; boolean_t rc, dryrun = B_FALSE; spa_t *spa; + nvlist_t *clients = NULL; if ((err = spa_open(pool, &spa, FTAG)) != 0) return (err); @@ -7057,9 +7101,14 @@ zfs_ioc_pool_recycle(const char *pool, nvlist_t *innvl, nvlist_t *outnvl) &rc); if (err == 0) dryrun = rc; + nvlist_lookup_nvlist(innvl, ZPOOL_RECYCLE_CLIENTS, + &clients); + } + if (clients) { + err = spa_recycle_clients(spa, clients, dryrun, outnvl); + } else { + err = spa_recycle_all(spa, dryrun, outnvl); } - - err = spa_recycle(spa, dryrun, outnvl); spa_close(spa, FTAG); @@ -7361,6 +7410,16 @@ zfs_ioctl_init(void) POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE, zfs_keys_pool_recycle, ARRAY_SIZE(zfs_keys_pool_recycle)); + zfs_ioctl_register("zpool_destroy_new", ZFS_IOC_POOL_DESTROY_NEW, + zfs_ioc_pool_destroy_new, zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, + zfs_keys_pool_destroy_new, ARRAY_SIZE(zfs_keys_pool_destroy_new)); + + zfs_ioctl_register("zpool_export_new", ZFS_IOC_POOL_EXPORT_NEW, + zfs_ioc_pool_export_new, zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, + zfs_keys_pool_export_new, ARRAY_SIZE(zfs_keys_pool_export_new)); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, diff --git a/module/zfs/zil.c b/module/zfs/zil.c index f89d2629fb13..333112bb0287 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -487,13 +487,17 @@ zil_parse_raw_impl(spa_t *spa, const blkptr_t *bp, return (error); } +/* + * Because we don't have access to the zilog_t, we cannot know when the chain + * is supposed to end. As a result, all IOs need to be marked as speculative. + */ int zil_parse_raw(spa_t *spa, const blkptr_t *bp, zil_parse_raw_blk_func_t *parse_blk_func, zil_parse_raw_lr_func_t *parse_lr_func, void *arg) { return (zil_parse_raw_impl(spa, bp, parse_blk_func, parse_lr_func, arg, - 0)); + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB)); } struct parse_arg { @@ -1197,6 +1201,49 @@ zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx) zil_free_log_record, tx, zilog->zl_header->zh_claim_txg, B_FALSE); } +/* + * This function's only job is to clear the zil chain for the given dataset. + * It is called when we're using a shared log pool and we import discarding + * logs. + */ +int +zil_clear(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) +{ + dmu_tx_t *tx = txarg; + zilog_t *zilog; + zil_header_t *zh; + objset_t *os; + int error; + + ASSERT3U(spa_get_log_state(dp->dp_spa), ==, SPA_LOG_CLEAR); + + error = dmu_objset_own_obj(dp, ds->ds_object, + DMU_OST_ANY, B_FALSE, B_FALSE, FTAG, &os); + if (error != 0) { + /* + * EBUSY indicates that the objset is inconsistent, in which + * case it can not have a ZIL. + */ + if (error != EBUSY) { + cmn_err(CE_WARN, "can't open objset for %llu, error %u", + (unsigned long long)ds->ds_object, error); + } + + return (0); + } + + zilog = dmu_objset_zil(os); + zh = zil_header_in_syncing_context(zilog); + ASSERT3U(tx->tx_txg, ==, spa_first_txg(zilog->zl_spa)); + + BP_ZERO(&zh->zh_log); + if (os->os_encrypted) + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; + dsl_dataset_dirty(dmu_objset_ds(os), tx); + dmu_objset_disown(os, B_FALSE, FTAG); + return (0); +} + int zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) { diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index e5bf9135da47..511f32066957 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -920,7 +920,7 @@ tags = ['functional', 'scrub_mirror'] [tests/functional/shared_log] tests = ['shared_log_001_pos', 'shared_log_002_pos', 'shared_log_003_pos', 'shared_log_004_pos', - 'shared_log_005_pos', 'shared_log_006_neg'] + 'shared_log_005_pos', 'shared_log_006_neg', 'shared_log_007_pos', 'shared_log_008_pos'] tags = ['functional', 'shared_log'] [tests/functional/slog] diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 1fa31761f4ce..7955aacf089b 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -1586,10 +1586,10 @@ function create_pool #pool devs_list if is_global_zone ; then [[ -d /$pool ]] && rm -rf /$pool - log_must zpool create -f $pool $@ + zpool create -f $pool $@ fi - return 0 + return $? } # Return 0 if destroy successfully or the pool exists; $? otherwise diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 7ed295e17444..dcc9e2917c50 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1946,6 +1946,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/shared_log/shared_log_004_pos.ksh \ functional/shared_log/shared_log_005_pos.ksh \ functional/shared_log/shared_log_006_neg.ksh \ + functional/shared_log/shared_log_007_pos.ksh \ + functional/shared_log/shared_log_008_pos.ksh \ functional/slog/cleanup.ksh \ functional/slog/setup.ksh \ functional/slog/slog_001_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log.kshlib b/tests/zfs-tests/tests/functional/shared_log/shared_log.kshlib index 5aa9a9208a3c..db85c5b0f9e5 100644 --- a/tests/zfs-tests/tests/functional/shared_log/shared_log.kshlib +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log.kshlib @@ -27,6 +27,10 @@ function cleanup { + zpool import $LOGPOOL + zpool import ${LOGPOOL}2 + zpool import $TESTPOOL + zpool import $TESTPOOL2 poolexists $TESTPOOL && destroy_pool $TESTPOOL poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2 poolexists $LOGPOOL && destroy_pool $LOGPOOL diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_001_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_001_pos.ksh index 2a5faf38ddd5..62df834a4be7 100755 --- a/tests/zfs-tests/tests/functional/shared_log/shared_log_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_001_pos.ksh @@ -44,5 +44,7 @@ log_onexit cleanup log_must create_pool $LOGPOOL -L "$DISK0" log_must create_pool $TESTPOOL -l $LOGPOOL "$DISK1" log_must verify_shared_log $TESTPOOL $LOGPOOL +verify_pool $LOGPOOL +verify_pool $TESTPOOL log_pass "Creating a pool with a shared log succeeds." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_002_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_002_pos.ksh index 4bd519ab7b83..f178679cb601 100755 --- a/tests/zfs-tests/tests/functional/shared_log/shared_log_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_002_pos.ksh @@ -55,5 +55,7 @@ log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128 log_must zpool export $TESTPOOL log_must zpool import $TESTPOOL log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128 +verify_pool $LOGPOOL +verify_pool $TESTPOOL log_pass "Using a pool with a shared log device succeeds at basic operations." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_003_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_003_pos.ksh index 675512951f13..1b46544c58fb 100755 --- a/tests/zfs-tests/tests/functional/shared_log/shared_log_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_003_pos.ksh @@ -58,5 +58,7 @@ log_must zpool export $LOGPOOL log_must zpool import $LOGPOOL log_must zpool import $TESTPOOL log_must dd if=/dev/urandom of="$mntpnt/f2" bs=8k count=128 +verify_pool $LOGPOOL +verify_pool $TESTPOOL log_pass "Shared log pool can be exported and imported." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_004_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_004_pos.ksh index bbdb997d69c6..82f7a4fc4ab7 100755 --- a/tests/zfs-tests/tests/functional/shared_log/shared_log_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_004_pos.ksh @@ -36,7 +36,7 @@ # 3. Export client and provider # 4. Import client with -m # 5. Export client -# 6. Import client with -m and new pool +# 6. Import client with -m and new provider # verify_runnable "global" @@ -63,5 +63,8 @@ log_must zpool import $LOGPOOL log_must zpool import -m -L ${LOGPOOL}2 $TESTPOOL log_must verify_shared_log $TESTPOOL ${LOGPOOL}2 log_must dd if=/dev/urandom of="$mntpnt/f3" bs=8k count=128 +verify_pool $LOGPOOL +verify_pool $LOGPOOL2 +verify_pool $TESTPOOL log_pass "Client pools can be reimported without provider, with flag." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_006_neg.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_006_neg.ksh index b2a32d4b44ad..1469d76bb49d 100755 --- a/tests/zfs-tests/tests/functional/shared_log/shared_log_006_neg.ksh +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_006_neg.ksh @@ -31,9 +31,14 @@ # Negative shared log testing. # # STRATEGY: -# 1. Create shared log pool & client -# 2. Write some data to the client pool -# 3. Scrub client and provider pools +# 1. Attempt to create a client pool with a missing shared log pool +# 2. Attempt to create a client pool with mis-named shared log pool +# 3. Attempt to create a client pool with a shared log and a log device +# 4. Attempt to use a client pool after the shared log has been destroyed +# 5. Attempt to create a client pool when the feature is disabled +# 6. Attempt to export/destroy an active shared log +# 7. Attempt to reguid a client/log pool +# 8. Attempt to checkpoint a client/log pool # verify_runnable "global" diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_007_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_007_pos.ksh new file mode 100755 index 000000000000..06865be80db5 --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_007_pos.ksh @@ -0,0 +1,51 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2024 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib + +# +# DESCRIPTION: +# Test fault behavior of shared log pool +# +# STRATEGY: +# 1. Create shared log pool & client +# 2. Fault the provider pool +# 3. Verify the client pool also faults +# + +verify_runnable "global" + +log_assert "Test fault behavior of shared log pools." +log_onexit cleanup + +typeset FS="$TESTPOOL/fs" + +log_must create_pool $LOGPOOL -L "$DISK0" +log_must create_pool $TESTPOOL -l $LOGPOOL "$DISK1" +log_must zinject -d "$DISK0" -A degrade $LOGPOOL +log_must eval "zpool status -e $TESTPOOL | grep DEGRADED" + +log_pass "Test fault behavior of shared log pools." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_008_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_008_pos.ksh new file mode 100755 index 000000000000..223f9be6a096 --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_008_pos.ksh @@ -0,0 +1,81 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2024 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib + +# +# DESCRIPTION: +# Test zpool recycle +# +# STRATEGY: +# 1. Create shared log pool & clients +# 2. Verify zpool recycle -a doesn't recycle anything +# 3. Export clients +# 4. Verify zpool recycle -a recycles everything +# 5. Re-add clients and export both +# 6. Verify zpool recycle of a single client works as expected +# 7. Re-add client and export it +# 8. Verify zpool recycle of multiple clients works as expected +# + +verify_runnable "global" + +log_assert "Test zpool recycle." +log_onexit cleanup + +typeset FS="$TESTPOOL/fs" + +log_must create_pool $LOGPOOL -L "$DISK0" +log_must create_pool $TESTPOOL -l $LOGPOOL "$DISK1" +log_must create_pool ${TESTPOOL}2 -l $LOGPOOL "$DISK2" +log_must zfs create -o sync=always ${TESTPOOL}/fs +log_must zfs create -o sync=always ${TESTPOOL}2/fs +log_must dd if=/dev/urandom of=/${TESTPOOL}/fs/f1 bs=128k count=128 +log_must dd if=/dev/urandom of=/${TESTPOOL}2/fs/f1 bs=128k count=128 +log_must eval "zpool recycle -a -v $LOGPOOL | grep '\\[\\]' >/dev/null" + +log_must zpool export $TESTPOOL +log_must zpool export ${TESTPOOL}2 +log_must zpool recycle -a -v $LOGPOOL +log_mustnot zpool import $TESTPOOL +log_mustnot zpool import ${TESTPOOL}2 + +log_must zpool import -m -L $LOGPOOL $TESTPOOL +log_must zpool import -m -L $LOGPOOL ${TESTPOOL}2 +log_must dd if=/dev/urandom of=/${TESTPOOL}/fs/f1 bs=128k count=128 +log_must zpool export $TESTPOOL +log_must zpool export ${TESTPOOL}2 +log_must zpool recycle $LOGPOOL $TESTPOOL +log_mustnot zpool import $TESTPOOL + +log_must zpool import -m -L $LOGPOOL $TESTPOOL +log_must dd if=/dev/urandom of=/${TESTPOOL}/fs/f1 bs=128k count=128 +log_must zpool export $TESTPOOL +log_must zpool recycle $LOGPOOL $TESTPOOL ${TESTPOOL2} +log_mustnot zpool import $TESTPOOL +log_mustnot zpool import ${TESTPOOL}2 + +log_pass "Test zpool recycle."