Skip to content

Commit

Permalink
ZED: Match added disk by pool/vdev GUID if found (openzfs#12217)
Browse files Browse the repository at this point in the history
This enables ZED to auto-online vdevs that are not wholedisk managed by
ZFS.

Signed-off-by: Ryan Moeller <[email protected]>
Reviewed-by: Don Brady <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Tony Hutter <[email protected]>
  • Loading branch information
Ryan Moeller authored and Ubuntu committed Aug 24, 2021
1 parent ec6a6e8 commit 4b6be24
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 10 deletions.
40 changes: 34 additions & 6 deletions cmd/zed/agents/zfs_mod.c
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,27 @@ devid_iter(const char *devid, zfs_process_func_t func, boolean_t is_slice)
return (data.dd_found);
}

/*
* Given a device guid, find any vdevs with a matching guid.
*/
static boolean_t
guid_iter(uint64_t pool_guid, uint64_t vdev_guid, const char *devid,
zfs_process_func_t func, boolean_t is_slice)
{
dev_data_t data = { 0 };

data.dd_func = func;
data.dd_found = B_FALSE;
data.dd_pool_guid = pool_guid;
data.dd_vdev_guid = vdev_guid;
data.dd_islabeled = is_slice;
data.dd_new_devid = devid;

(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);

return (data.dd_found);
}

/*
* Handle a EC_DEV_ADD.ESC_DISK event.
*
Expand All @@ -663,15 +684,18 @@ static int
zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
{
char *devpath = NULL, *devid;
uint64_t pool_guid = 0, vdev_guid = 0;
boolean_t is_slice;

/*
* Expecting a devid string and an optional physical location
* Expecting a devid string and an optional physical location and guid
*/
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0)
return (-1);

(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);

is_slice = (nvlist_lookup_boolean(nvl, DEV_IS_PART) == 0);

Expand All @@ -682,12 +706,16 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
* Iterate over all vdevs looking for a match in the following order:
* 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
* 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
*
* For disks, we only want to pay attention to vdevs marked as whole
* disks or are a multipath device.
* 3. ZPOOL_CONFIG_GUID (identifies unique vdev).
*/
if (!devid_iter(devid, zfs_process_add, is_slice) && devpath != NULL)
(void) devphys_iter(devpath, devid, zfs_process_add, is_slice);
if (devid_iter(devid, zfs_process_add, is_slice))
return (0);
if (devpath != NULL && devphys_iter(devpath, devid, zfs_process_add,
is_slice))
return (0);
if (vdev_guid != 0)
(void) guid_iter(pool_guid, vdev_guid, devid, zfs_process_add,
is_slice);

return (0);
}
Expand Down
2 changes: 2 additions & 0 deletions cmd/zed/zed_disk_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
if (nvlist_lookup_boolean(nvl, DEV_IS_PART) == B_TRUE)
zed_log_msg(LOG_INFO, "\t%s: B_TRUE", DEV_IS_PART);
if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
Expand Down
9 changes: 5 additions & 4 deletions tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,11 @@ tests = ['fallocate_prealloc', 'fallocate_punch-hole']
tags = ['functional', 'fallocate']

[tests/functional/fault:Linux]
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos',
'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_multiple',
'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault',
'decompress_fault', 'scrub_after_resilver', 'zpool_status_-s']
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
'auto_replace_001_pos', 'auto_spare_001_pos', 'auto_spare_002_pos',
'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_shared',
'decrypt_fault', 'decompress_fault', 'scrub_after_resilver',
'zpool_status_-s']
tags = ['functional', 'fault']

[tests/functional/features/large_dnode:Linux]
Expand Down
1 change: 1 addition & 0 deletions tests/test-runner/bin/zts-report.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ if os.environ.get('CI') == 'true':
'cli_root/zpool_split/zpool_split_wholedisk': ['SKIP', ci_reason],
'fault/auto_offline_001_pos': ['SKIP', ci_reason],
'fault/auto_online_001_pos': ['SKIP', ci_reason],
'fault/auto_online_002_pos': ['SKIP', ci_reason],
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
'fault/auto_spare_ashift': ['SKIP', ci_reason],
'fault/auto_spare_shared': ['SKIP', ci_reason],
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/fault/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ dist_pkgdata_SCRIPTS = \
cleanup.ksh \
auto_offline_001_pos.ksh \
auto_online_001_pos.ksh \
auto_online_002_pos.ksh \
auto_replace_001_pos.ksh \
auto_spare_001_pos.ksh \
auto_spare_002_pos.ksh \
Expand Down
94 changes: 94 additions & 0 deletions tests/zfs-tests/tests/functional/fault/auto_online_002_pos.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
# Copyright (c) 2019 by Delphix. All rights reserved.
# Portions Copyright 2021 iXsystems, Inc.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg

#
# DESCRIPTION:
# Testing Fault Management Agent ZED Logic - Automated Auto-Online Test.
# Now with partitioned vdevs.
#
# STRATEGY:
# 1. Partition a scsi_debug device for simulating removal
# 2. Create a pool
# 3. Offline disk
# 4. ZED polls for an event change for online disk to be automatically
# added back to the pool.
#
verify_runnable "both"

function cleanup
{
poolexists ${TESTPOOL} && destroy_pool ${TESTPOOL}
unload_scsi_debug
}

log_assert "Testing automated auto-online FMA test with partitioned vdev"

log_onexit cleanup

load_scsi_debug ${SDSIZE} ${SDHOSTS} ${SDTGTS} ${SDLUNS} '512b'
SDDEVICE=$(get_debug_device)
zpool labelclear -f ${SDDEVICE}
partition_disk ${SDSIZE} ${SDDEVICE} 1
part=${SDDEVICE}1
host=$(get_scsi_host ${SDDEVICE})

block_device_wait /dev/${part}
log_must zpool create -f ${TESTPOOL} raidz1 ${part} ${DISKS}

# Add some data to the pool
log_must mkfile ${FSIZE} /${TESTPOOL}/data

remove_disk ${SDDEVICE}
check_state ${TESTPOOL} "" "degraded" || \
log_fail "${TESTPOOL} is not degraded"

# Clear zpool events
log_must zpool events -c

# Online disk
insert_disk ${SDDEVICE} ${host}

log_note "Delay for ZED auto-online"
typeset -i timeout=0
until is_pool_resilvered ${TESTPOOL}; do
if ((timeout++ == MAXTIMEOUT)); then
log_fail "Timeout occurred"
fi
sleep 1
done
log_note "Auto-online of ${SDDEVICE} is complete"

# Validate auto-online was successful
sleep 1
check_state ${TESTPOOL} "" "online" || \
log_fail "${TESTPOOL} is not back online"

log_must zpool destroy ${TESTPOOL}

log_pass "Auto-online with partitioned vdev test successful"

0 comments on commit 4b6be24

Please sign in to comment.