From 42233706498852d9a3a212bbda81f1d8d59e5088 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Fri, 19 Aug 2022 20:52:36 +0500 Subject: [PATCH] Fix for hotplug issues ZED relies on udev to match vdev guids when device is removed. However, udev does not contain the correct blkid information for the vdev due to which the vdev is failed to match when detached and we have to rely fault handler to make device unavailable. This PR allow vdev to trigger a Disk Change event whenever a new vdev is added to sync blkid information with udev. This PR also change the device state to REMOVED whenever the device is unplugged instead of UNAVAIL. When vdev is unplugged, zfs_retire_recv() is not able to match Signed-off-by: Ameer Hamza --- cmd/zed/agents/zfs_retire.c | 37 ++++++++--- cmd/ztest.c | 2 +- include/libzfs.h | 1 + include/os/linux/Makefile.am | 1 + include/os/linux/spl/sys/misc_spl.h | 30 +++++++++ include/sys/spa.h | 2 +- include/sys/vdev.h | 3 + include/sys/vdev_impl.h | 3 + include/sys/zfs_context.h | 1 + lib/libzfs/libzfs.abi | 41 +++++++----- lib/libzfs/libzfs_pool.c | 33 ++++++++++ module/os/linux/spl/spl-generic.c | 24 +++++++ module/os/linux/zfs/vdev_disk.c | 22 ++++++- module/zfs/spa.c | 19 ++++-- module/zfs/spa_config.c | 14 ++++- module/zfs/spa_misc.c | 4 +- module/zfs/vdev.c | 62 +++++++++++++++++++ module/zfs/zfs_ioctl.c | 6 +- tests/zfs-tests/include/libtest.shlib | 2 +- .../functional/fault/auto_offline_001_pos.ksh | 27 ++++---- 20 files changed, 285 insertions(+), 49 deletions(-) create mode 100644 include/os/linux/spl/sys/misc_spl.h diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c index a9e8baaa2c54..1f580750a6cc 100644 --- a/cmd/zed/agents/zfs_retire.c +++ b/cmd/zed/agents/zfs_retire.c @@ -323,6 +323,9 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, boolean_t is_disk; vdev_aux_t aux; uint64_t state = 0; + int l2arc = 0; + vdev_stat_t *vs; + unsigned int c; fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class); @@ -351,13 +354,33 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, devname = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); - /* Can't replace l2arc with a spare: offline the device */ - if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, - &devtype) == 0 && strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) { - fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", devname); - zpool_vdev_offline(zhp, devname, B_TRUE); - } else if (!fmd_prop_get_int32(hdl, "spare_on_remove") || - replace_with_spare(hdl, zhp, vdev) == B_FALSE) { + nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c); + + /* + * If state removed is requested for already removed vdev, + * its a loopback event from spa_async_remove(). Just + * ignore it. + */ + if ((vs->vs_state == VDEV_STATE_REMOVED) && + (state == VDEV_STATE_REMOVED)) { + return; + } + + l2arc = (nvlist_lookup_string(nvl, + FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, &devtype) == 0 && + strcmp(devtype, VDEV_TYPE_L2CACHE) == 0); + + /* Remove the vdev since device is unplugged */ + if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) { + int status = zpool_vdev_remove_wanted(zhp, devname); + fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'" + ", ret:%d", devname, status); + } + + /* Replace the vdev with a spare if its not a l2arc */ + if (!l2arc && (!fmd_prop_get_int32(hdl, "spare_on_remove") || + replace_with_spare(hdl, zhp, vdev) == B_FALSE)) { /* Could not handle with spare */ fmd_hdl_debug(hdl, "no spare for '%s'", devname); } diff --git a/cmd/ztest.c b/cmd/ztest.c index 31b9990a1fcf..e03e2714f57a 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -1165,7 +1165,7 @@ ztest_kill(ztest_shared_t *zs) * See comment above spa_write_cachefile(). */ mutex_enter(&spa_namespace_lock); - spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE); + spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE, B_FALSE); mutex_exit(&spa_namespace_lock); (void) raise(SIGKILL); diff --git a/include/libzfs.h b/include/libzfs.h index 96cf1e186521..39587d31fa6d 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -308,6 +308,7 @@ _LIBZFS_H int zpool_vdev_indirect_size(zpool_handle_t *, const char *, uint64_t *); _LIBZFS_H int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *, splitflags_t); +_LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *); _LIBZFS_H int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); _LIBZFS_H int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); diff --git a/include/os/linux/Makefile.am b/include/os/linux/Makefile.am index 4d6901c694c8..df248d82eb44 100644 --- a/include/os/linux/Makefile.am +++ b/include/os/linux/Makefile.am @@ -71,6 +71,7 @@ kernel_spl_sys_HEADERS = \ %D%/spl/sys/kmem_cache.h \ %D%/spl/sys/kstat.h \ %D%/spl/sys/list.h \ + %D%/spl/sys/misc_spl.h \ %D%/spl/sys/mod_os.h \ %D%/spl/sys/mutex.h \ %D%/spl/sys/param.h \ diff --git a/include/os/linux/spl/sys/misc_spl.h b/include/os/linux/spl/sys/misc_spl.h new file mode 100644 index 000000000000..887feec87225 --- /dev/null +++ b/include/os/linux/spl/sys/misc_spl.h @@ -0,0 +1,30 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#ifndef _OS_LINUX_SPL_MISC_H +#define _OS_LINUX_SPL_MISC_H + +#include +#include + +extern void spl_signal_kobj_evt(struct block_device *bdev); + +#endif diff --git a/include/sys/spa.h b/include/sys/spa.h index e185ce6b1d8e..0cea113b83b8 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -838,7 +838,7 @@ extern kmutex_t spa_namespace_lock; #define SPA_CONFIG_UPDATE_POOL 0 #define SPA_CONFIG_UPDATE_VDEVS 1 -extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t); +extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t); extern void spa_config_load(void); extern nvlist_t *spa_all_configs(uint64_t *); extern void spa_config_set(spa_t *spa, nvlist_t *config); diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 5fec1d51a5f2..7a7c70dc1598 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -148,6 +148,7 @@ extern int vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux); extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *); extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags); +extern int vdev_remove_wanted(spa_t *spa, uint64_t guid); extern void vdev_clear(spa_t *spa, vdev_t *vd); extern boolean_t vdev_is_dead(vdev_t *vd); @@ -190,6 +191,8 @@ typedef enum vdev_config_flag { VDEV_CONFIG_MISSING = 1 << 4 } vdev_config_flag_t; +extern void vdev_post_kobj_evt(vdev_t *vd); +extern void vdev_clear_kobj_evt(vdev_t *vd); extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config); extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, vdev_config_flag_t flags); diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index d22abfbc2598..a70cb8aa96e3 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -69,6 +69,7 @@ extern uint32_t zfs_vdev_async_write_max_active; * Virtual device operations */ typedef int vdev_init_func_t(spa_t *spa, nvlist_t *nv, void **tsd); +typedef void vdev_kobj_post_evt_func_t(vdev_t *vd); typedef void vdev_fini_func_t(vdev_t *vd); typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size, uint64_t *ashift, uint64_t *pshift); @@ -123,6 +124,7 @@ typedef const struct vdev_ops { vdev_config_generate_func_t *vdev_op_config_generate; vdev_nparity_func_t *vdev_op_nparity; vdev_ndisks_func_t *vdev_op_ndisks; + vdev_kobj_post_evt_func_t *vdev_op_kobj_evt_post; char vdev_op_type[16]; boolean_t vdev_op_leaf; } vdev_ops_t; @@ -436,6 +438,7 @@ struct vdev { boolean_t vdev_isl2cache; /* was a l2cache device */ boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */ boolean_t vdev_resilver_deferred; /* resilver deferred */ + boolean_t vdev_kobj_flag; /* kobj event record */ vdev_queue_t vdev_queue; /* I/O deadline schedule queue */ vdev_cache_t vdev_cache; /* physical block cache */ spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */ diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index aa4f78789631..7f57f8525137 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -50,6 +50,7 @@ extern "C" { #include #include #include +#include #include #include #include diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 0494aec208e5..b944fca6ffdc 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -548,6 +548,7 @@ + @@ -2302,6 +2303,7 @@ + @@ -3150,6 +3152,7 @@ + @@ -3485,6 +3488,11 @@ + + + + + @@ -3750,7 +3758,7 @@ - + @@ -3790,6 +3798,9 @@ + + + @@ -3903,16 +3914,17 @@ + - + - + @@ -4032,8 +4044,8 @@ - - + + @@ -4102,15 +4114,15 @@ - + - - + + @@ -4123,7 +4135,7 @@ - + @@ -4771,8 +4783,8 @@ - - + + @@ -4872,7 +4884,7 @@ - + @@ -4935,7 +4947,7 @@ - + @@ -5013,7 +5025,7 @@ - + @@ -5455,7 +5467,6 @@ - diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 928f8b4287ba..292c7f550a16 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3073,6 +3073,39 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) } } +/* + * Remove the specified vdev. Called from zed on udev remove event. + */ +int +zpool_vdev_remove_wanted(zpool_handle_t *zhp, const char *path) +{ + zfs_cmd_t zc = {"\0"}; + char errbuf[ERRBUFLEN]; + nvlist_t *tgt; + boolean_t avail_spare, l2cache; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot remove %s"), path); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, + NULL)) == NULL) + return (zfs_error(hdl, EZFS_NODEVICE, errbuf)); + + zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID); + + if (avail_spare) + return (zfs_error(hdl, EZFS_ISSPARE, errbuf)); + + zc.zc_cookie = VDEV_STATE_REMOVED; + + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) + return (0); + + return (zpool_standard_error(hdl, errno, errbuf)); +} + /* * Mark the given vdev faulted. */ diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index 5179100d1665..f582f3215b13 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -45,8 +45,10 @@ #include #include #include +#include #include #include +#include unsigned long spl_hostid = 0; EXPORT_SYMBOL(spl_hostid); @@ -517,6 +519,28 @@ ddi_copyin(const void *from, void *to, size_t len, int flags) } EXPORT_SYMBOL(ddi_copyin); +/* + * Post a uevent to userspace whenever a new vdev adds to the pool. It is + * necessary to sync blkid information with udev, which zed daemon uses + * during device hotplug to identify the vdev. + */ +void +spl_signal_kobj_evt(struct block_device *bdev) +{ +#if defined(_KERNEL) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) + struct kobject *disk_kobj = bdev_kobj(bdev); +#else + struct kobject *disk_kobj = &part_to_dev(bdev->bd_part)->kobj; +#endif + int ret = kobject_uevent(disk_kobj, KOBJ_CHANGE); + if (ret) { + pr_warn("ZFS: Sending event '%d' to kobject: '%s' (%p): failed" + "(ret:%d)\n", KOBJ_CHANGE, kobject_name(disk_kobj), + disk_kobj, ret); + } +} +EXPORT_SYMBOL(spl_signal_kobj_evt); + int ddi_copyout(const void *from, void *to, size_t len, int flags) { diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index ba7adcc1b576..6fbb81904acd 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -179,6 +179,18 @@ vdev_disk_error(zio_t *zio) zio->io_flags); } +static void +vdev_disk_kobj_evt_post(vdev_t *v) +{ + vdev_disk_t *vd = v->vdev_tsd; + if (vd == NULL) { + vdev_dbgmsg(v, "vdev_disk_t is NULL for VDEV:%s\n", + v->vdev_path); + } else { + spl_signal_kobj_evt(vd->vd_bdev); + } +} + static int vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, uint64_t *logical_ashift, uint64_t *physical_ashift) @@ -290,6 +302,13 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, bdev = blkdev_get_by_path(v->vdev_path, mode | FMODE_EXCL, zfs_vdev_holder); if (unlikely(PTR_ERR(bdev) == -ENOENT)) { + /* + * There is no point of waiting since device is removed + * explicitly + */ + if (v->vdev_removed) + break; + schedule_timeout(MSEC_TO_TICK(10)); } else if (unlikely(PTR_ERR(bdev) == -ERESTARTSYS)) { timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms * 10); @@ -957,7 +976,8 @@ vdev_ops_t vdev_disk_ops = { .vdev_op_nparity = NULL, .vdev_op_ndisks = NULL, .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */ - .vdev_op_leaf = B_TRUE /* leaf vdev */ + .vdev_op_leaf = B_TRUE, /* leaf vdev */ + .vdev_op_kobj_evt_post = vdev_disk_kobj_evt_post }; /* diff --git a/module/zfs/spa.c b/module/zfs/spa.c index b2b59af42947..52742f34b564 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -910,7 +910,16 @@ spa_change_guid(spa_t *spa) spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED); if (error == 0) { - spa_write_cachefile(spa, B_FALSE, B_TRUE); + /* + * Clear the kobj flag from all the vdevs to allow + * vdev_cache_process_kobj_evt() to post events to all the + * vdevs since GUID is updated. + */ + vdev_clear_kobj_evt(spa->spa_root_vdev); + for (int i = 0; i < spa->spa_l2cache.sav_count; i++) + vdev_clear_kobj_evt(spa->spa_l2cache.sav_vdevs[i]); + + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID); } @@ -5221,7 +5230,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag, */ spa_unload(spa); spa_deactivate(spa); - spa_write_cachefile(spa, B_TRUE, B_TRUE); + spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE); spa_remove(spa); if (locked) mutex_exit(&spa_namespace_lock); @@ -6045,7 +6054,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_spawn_aux_threads(spa); - spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE); /* * Don't count references from objsets that are already closed @@ -6108,7 +6117,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) if (props != NULL) spa_configfile_set(spa, props, B_FALSE); - spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); zfs_dbgmsg("spa_import: verbatim import of %s", pool); mutex_exit(&spa_namespace_lock); @@ -6506,7 +6515,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, if (new_state != POOL_STATE_UNINITIALIZED) { if (!hardforce) - spa_write_cachefile(spa, B_TRUE, B_TRUE); + spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE); spa_remove(spa); } else { /* diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 91ac5c05e8af..5165c370403b 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -240,7 +240,8 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) * would be required. */ void -spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent) +spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent, + boolean_t postblkidevent) { spa_config_dirent_t *dp, *tdp; nvlist_t *nvl; @@ -346,6 +347,16 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent) if (postsysevent) spa_event_notify(target, NULL, NULL, ESC_ZFS_CONFIG_SYNC); + + /* + * Post udev event to sync blkid information if the pool is created + * or a new vdev is added to the pool. + */ + if ((target->spa_root_vdev) && postblkidevent) { + vdev_post_kobj_evt(target->spa_root_vdev); + for (int i = 0; i < target->spa_l2cache.sav_count; i++) + vdev_post_kobj_evt(target->spa_l2cache.sav_vdevs[i]); + } } /* @@ -600,6 +611,7 @@ spa_config_update(spa_t *spa, int what) */ if (!spa->spa_is_root) { spa_write_cachefile(spa, B_FALSE, + what != SPA_CONFIG_UPDATE_POOL, what != SPA_CONFIG_UPDATE_POOL); } diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index decf4ddae6af..b971bfc0cdc1 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1290,7 +1290,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, * If the config changed, update the config cache. */ if (config_changed) - spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE); } /* @@ -1385,7 +1385,7 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error) */ if (config_changed) { mutex_enter(&spa_namespace_lock); - spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE); mutex_exit(&spa_namespace_lock); } diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index ea0245610fb7..69921c09b68f 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1921,6 +1921,13 @@ vdev_open(vdev_t *vd) error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &logical_ashift, &physical_ashift); + + if (error == ENOENT && vd->vdev_removed) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_REMOVED, + VDEV_AUX_NONE); + return (error); + } + /* * Physical volume size should never be larger than its max size, unless * the disk has shrunk while we were reading it or the device is buggy @@ -3139,6 +3146,38 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, mutex_exit(&vd->vdev_dtl_lock); } +/* + * Iterate over all the vdevs except l2arc and spare and + * post kobj events + */ +void +vdev_post_kobj_evt(vdev_t *vd) +{ + if (vd->vdev_ops->vdev_op_kobj_evt_post && + vd->vdev_kobj_flag == B_FALSE) { + vd->vdev_kobj_flag = B_TRUE; + vd->vdev_ops->vdev_op_kobj_evt_post(vd); + } + + for (int c = 0; c < vd->vdev_children; c++) { + vdev_post_kobj_evt(vd->vdev_child[c]); + } +} + +/* + * Iterate over all the vdevs except l2arc and spare and + * clear kobj events + */ +void +vdev_clear_kobj_evt(vdev_t *vd) +{ + vd->vdev_kobj_flag = B_FALSE; + + for (int c = 0; c < vd->vdev_children; c++) { + vdev_clear_kobj_evt(vd->vdev_child[c]); + } +} + int vdev_dtl_load(vdev_t *vd) { @@ -3920,6 +3959,29 @@ vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux) return (spa_vdev_state_exit(spa, vd, 0)); } +int +vdev_remove_wanted(spa_t *spa, uint64_t guid) +{ + vdev_t *vd; + + spa_vdev_state_enter(spa, SCL_NONE); + + if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) + return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV))); + + /* + * If the vdev is already removed, then don't do anything. + */ + if (vd->vdev_removed) + return (spa_vdev_state_exit(spa, NULL, 0)); + + vd->vdev_remove_wanted = B_TRUE; + spa_async_request(spa, SPA_ASYNC_REMOVE); + + return (spa_vdev_state_exit(spa, vd, 0)); +} + + /* * Online the given vdev. * diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 382975208b97..f52f1b9c7aba 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1912,6 +1912,10 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc) error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj); break; + case VDEV_STATE_REMOVED: + error = vdev_remove_wanted(spa, zc->zc_guid); + break; + default: error = SET_ERROR(EINVAL); } @@ -2928,7 +2932,7 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc) mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(zc->zc_name)) != NULL) { spa_configfile_set(spa, props, B_FALSE); - spa_write_cachefile(spa, B_FALSE, B_TRUE); + spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE); } mutex_exit(&spa_namespace_lock); if (spa != NULL) { diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 435dcb81c3c3..3483fdbffb68 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -1884,7 +1884,7 @@ function wait_hotspare_state # pool disk state timeout # # Return 0 is pool/disk matches expected state, 1 otherwise # -function check_vdev_state # pool disk state{online,offline,unavail} +function check_vdev_state # pool disk state{online,offline,unavail,removed} { typeset pool=$1 typeset disk=${2#*$DEV_DSKDIR/} diff --git a/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh index 17bde9a70636..0ab9317c0a06 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh @@ -24,29 +24,28 @@ # # DESCRIPTION: -# Testing Fault Management Agent ZED Logic - Physically removed device is -# made unavail and onlined when reattached +# Testing Fault Management Agent ZED Logic - Physically detached device is +# made removed and onlined when reattached # # STRATEGY: # 1. Create a pool # 2. Simulate physical removal of one device -# 3. Verify the device is unavailable +# 3. Verify the device is removed when detached # 4. Reattach the device # 5. Verify the device is onlined # 6. Repeat the same tests with a spare device: # zed will use the spare to handle the removed data device # 7. Repeat the same tests again with a faulted spare device: -# the removed data device should be unavailable +# the removed data device should be removed # # NOTE: the use of 'block_device_wait' throughout the test helps avoid race # conditions caused by mixing creation/removal events from partitioning the # disk (zpool create) and events from physically removing it (remove_disk). # -# NOTE: the test relies on 'zpool sync' to prompt the kmods to transition a -# vdev to the unavailable state. The ZED does receive a removal notification -# but only relies on it to activate a hot spare. Additional work is planned -# to extend an existing ioctl interface to allow the ZED to transition the -# vdev in to a removed state. +# NOTE: the test relies on ZED to transit state to removed on device removed +# event. The ZED does receive a removal notification but only relies on it to +# activate a hot spare. Additional work is planned to extend an existing ioctl +# interface to allow the ZED to transition the vdev in to a removed state. # verify_runnable "both" @@ -103,8 +102,8 @@ do log_must mkfile 1m $mntpnt/file sync_pool $TESTPOOL - # 3. Verify the device is unavailable. - log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" + # 3. Verify the device is removed. + log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" # 4. Reattach the device insert_disk $removedev @@ -136,7 +135,7 @@ do # 3. Verify the device is handled by the spare. log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE" - log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" + log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" # 4. Reattach the device insert_disk $removedev @@ -170,8 +169,8 @@ do log_must mkfile 1m $mntpnt/file sync_pool $TESTPOOL - # 4. Verify the device is unavailable - log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" + # 4. Verify the device is removed + log_must wait_vdev_state $TESTPOOL $removedev "REMOVED" # 5. Reattach the device insert_disk $removedev