Skip to content

Commit

Permalink
Various ZED fixes
Browse files Browse the repository at this point in the history
* Teach ZED to handle spares usingi the configured ashift: if the zpool
   'ashift' property is set then ZED should use its value when kicking
   in a hotspare; with this change 512e disks can be used as spares
   for VDEVs that were created with ashift=9, even if ZFS natively
   detects them as 4K block devices.

 * Introduce an additional auto_spare test case which verifies that in
   the face of multiple device failures an appropiate number of spares
   are kicked in.

 * Fix zed_stop() in "libtest.shlib" which did not correctly wait the
   target pid.

 * Fix ZED crashing on startup caused by a race condition in libzfs
   when used in multi-threaded context.

 * Convert ZED over to using the tpool library which is already present
   in the Illumos FMA code.

Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: loli10K <[email protected]>
Closes openzfs#2562 
Closes openzfs#6858
  • Loading branch information
loli10K authored and behlendorf committed Dec 9, 2017
1 parent 3ab3166 commit 4e9b156
Show file tree
Hide file tree
Showing 19 changed files with 368 additions and 101 deletions.
16 changes: 0 additions & 16 deletions cmd/zed/agents/zfs_agents.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,19 +350,3 @@ zfs_agent_fini(void)

g_zfs_hdl = NULL;
}

/*
* In ZED context, all the FMA agents run in the same thread
* and do not require a unique libzfs instance. Modules should
* use these stubs.
*/
libzfs_handle_t *
__libzfs_init(void)
{
return (g_zfs_hdl);
}

void
__libzfs_fini(libzfs_handle_t *hdl)
{
}
7 changes: 0 additions & 7 deletions cmd/zed/agents/zfs_agents.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,6 @@ extern int zfs_slm_init(void);
extern void zfs_slm_fini(void);
extern void zfs_slm_event(const char *, const char *, nvlist_t *);

/*
* In ZED context, all the FMA agents run in the same thread
* and do not require a unique libzfs instance.
*/
extern libzfs_handle_t *__libzfs_init(void);
extern void __libzfs_fini(libzfs_handle_t *);

#ifdef __cplusplus
}
#endif
Expand Down
10 changes: 5 additions & 5 deletions cmd/zed/agents/zfs_diagnosis.c
Original file line number Diff line number Diff line change
Expand Up @@ -919,27 +919,27 @@ _zfs_diagnosis_init(fmd_hdl_t *hdl)
{
libzfs_handle_t *zhdl;

if ((zhdl = __libzfs_init()) == NULL)
if ((zhdl = libzfs_init()) == NULL)
return;

if ((zfs_case_pool = uu_list_pool_create("zfs_case_pool",
sizeof (zfs_case_t), offsetof(zfs_case_t, zc_node),
NULL, UU_LIST_POOL_DEBUG)) == NULL) {
__libzfs_fini(zhdl);
libzfs_fini(zhdl);
return;
}

if ((zfs_cases = uu_list_create(zfs_case_pool, NULL,
UU_LIST_DEBUG)) == NULL) {
uu_list_pool_destroy(zfs_case_pool);
__libzfs_fini(zhdl);
libzfs_fini(zhdl);
return;
}

if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
uu_list_destroy(zfs_cases);
uu_list_pool_destroy(zfs_case_pool);
__libzfs_fini(zhdl);
libzfs_fini(zhdl);
return;
}

Expand Down Expand Up @@ -975,5 +975,5 @@ _zfs_diagnosis_fini(fmd_hdl_t *hdl)
uu_list_pool_destroy(zfs_case_pool);

zhdl = fmd_hdl_getspecific(hdl);
__libzfs_fini(zhdl);
libzfs_fini(zhdl);
}
46 changes: 17 additions & 29 deletions cmd/zed/agents/zfs_mod.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@
* trigger the FMA fault that we skipped earlier.
*
* ZFS on Linux porting notes:
* In lieu of a thread pool, just spawn a thread on demmand.
* Linux udev provides a disk insert for both the disk and the partition
*
*/
Expand All @@ -83,6 +82,7 @@
#include <sys/sunddi.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/dev.h>
#include <thread_pool.h>
#include <pthread.h>
#include <unistd.h>
#include "zfs_agents.h"
Expand All @@ -97,12 +97,12 @@ typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
libzfs_handle_t *g_zfshdl;
list_t g_pool_list; /* list of unavailable pools at initialization */
list_t g_device_list; /* list of disks with asynchronous label request */
tpool_t *g_tpool;
boolean_t g_enumeration_done;
pthread_t g_zfs_tid;
pthread_t g_zfs_tid; /* zfs_enum_pools() thread */

typedef struct unavailpool {
zpool_handle_t *uap_zhp;
pthread_t uap_enable_tid; /* dataset enable thread if activated */
list_node_t uap_node;
} unavailpool_t;

Expand Down Expand Up @@ -135,7 +135,6 @@ zfs_unavail_pool(zpool_handle_t *zhp, void *data)
unavailpool_t *uap;
uap = malloc(sizeof (unavailpool_t));
uap->uap_zhp = zhp;
uap->uap_enable_tid = 0;
list_insert_tail((list_t *)data, uap);
} else {
zpool_close(zhp);
Expand Down Expand Up @@ -512,19 +511,14 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
(dp->dd_func)(zhp, nvl, dp->dd_islabeled);
}

static void *
void
zfs_enable_ds(void *arg)
{
unavailpool_t *pool = (unavailpool_t *)arg;

assert(pool->uap_enable_tid = pthread_self());

(void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
zpool_close(pool->uap_zhp);
pool->uap_zhp = NULL;

/* Note: zfs_slm_fini() will cleanup this pool entry on exit */
return (NULL);
free(pool);
}

static int
Expand Down Expand Up @@ -559,15 +553,13 @@ zfs_iter_pool(zpool_handle_t *zhp, void *data)
for (pool = list_head(&g_pool_list); pool != NULL;
pool = list_next(&g_pool_list, pool)) {

if (pool->uap_enable_tid != 0)
continue; /* entry already processed */
if (strcmp(zpool_get_name(zhp),
zpool_get_name(pool->uap_zhp)))
continue;
if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
/* send to a background thread; keep on list */
(void) pthread_create(&pool->uap_enable_tid,
NULL, zfs_enable_ds, pool);
list_remove(&g_pool_list, pool);
(void) tpool_dispatch(g_tpool, zfs_enable_ds,
pool);
break;
}
}
Expand Down Expand Up @@ -857,7 +849,7 @@ zfs_enum_pools(void *arg)
int
zfs_slm_init()
{
if ((g_zfshdl = __libzfs_init()) == NULL)
if ((g_zfshdl = libzfs_init()) == NULL)
return (-1);

/*
Expand All @@ -869,7 +861,7 @@ zfs_slm_init()

if (pthread_create(&g_zfs_tid, NULL, zfs_enum_pools, NULL) != 0) {
list_destroy(&g_pool_list);
__libzfs_fini(g_zfshdl);
libzfs_fini(g_zfshdl);
return (-1);
}

Expand All @@ -887,19 +879,15 @@ zfs_slm_fini()

/* wait for zfs_enum_pools thread to complete */
(void) pthread_join(g_zfs_tid, NULL);
/* destroy the thread pool */
if (g_tpool != NULL) {
tpool_wait(g_tpool);
tpool_destroy(g_tpool);
}

while ((pool = (list_head(&g_pool_list))) != NULL) {
/*
* each pool entry has two possibilities
* 1. was made available (so wait for zfs_enable_ds thread)
* 2. still unavailable (just close the pool)
*/
if (pool->uap_enable_tid)
(void) pthread_join(pool->uap_enable_tid, NULL);
else if (pool->uap_zhp != NULL)
zpool_close(pool->uap_zhp);

list_remove(&g_pool_list, pool);
zpool_close(pool->uap_zhp);
free(pool);
}
list_destroy(&g_pool_list);
Expand All @@ -910,7 +898,7 @@ zfs_slm_fini()
}
list_destroy(&g_device_list);

__libzfs_fini(g_zfshdl);
libzfs_fini(g_zfshdl);
}

void
Expand Down
16 changes: 14 additions & 2 deletions cmd/zed/agents/zfs_retire.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
nvlist_t **spares;
uint_t s, nspares;
char *dev_name;
zprop_source_t source;
int ashift;

config = zpool_get_config(zhp, NULL);
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
Expand All @@ -189,6 +191,11 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
&spares, &nspares) != 0)
return;

/*
* lookup "ashift" pool property, we may need it for the replacement
*/
ashift = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, &source);

replacement = fmd_nvl_alloc(hdl, FMD_SLEEP);

(void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
Expand All @@ -207,6 +214,11 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
&spare_name) != 0)
continue;

/* if set, add the "ashift" pool property to the spare nvlist */
if (source != ZPROP_SRC_DEFAULT)
(void) nvlist_add_uint64(spares[s],
ZPOOL_CONFIG_ASHIFT, ashift);

(void) nvlist_add_nvlist_array(replacement,
ZPOOL_CONFIG_CHILDREN, &spares[s], 1);

Expand Down Expand Up @@ -483,7 +495,7 @@ _zfs_retire_init(fmd_hdl_t *hdl)
zfs_retire_data_t *zdp;
libzfs_handle_t *zhdl;

if ((zhdl = __libzfs_init()) == NULL)
if ((zhdl = libzfs_init()) == NULL)
return;

if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
Expand All @@ -504,7 +516,7 @@ _zfs_retire_fini(fmd_hdl_t *hdl)

if (zdp != NULL) {
zfs_retire_clear_data(hdl, zdp);
__libzfs_fini(zdp->zrd_hdl);
libzfs_fini(zdp->zrd_hdl);
fmd_hdl_free(hdl, zdp, sizeof (zfs_retire_data_t));
}
}
2 changes: 1 addition & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ tags = ['functional', 'exec']

[tests/functional/fault]
tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
'auto_spare_002_pos.ksh']
'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple']
tags = ['functional', 'fault']

[tests/functional/features/async_destroy]
Expand Down
36 changes: 33 additions & 3 deletions tests/zfs-tests/include/blkdev.shlib
Original file line number Diff line number Diff line change
Expand Up @@ -353,16 +353,35 @@ function insert_disk #disk scsi_host

#
# Load scsi_debug module with specified parameters
# $blksz can be either one of: < 512b | 512e | 4Kn >
#
function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz
{
typeset devsize=$1
typeset hosts=$2
typeset tgts=$3
typeset luns=$4
typeset blksz=$5

[[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \
[[ -z $luns ]] && log_fail "Arguments invalid or missing"
[[ -z $luns ]] || [[ -z $blksz ]] && \
log_fail "Arguments invalid or missing"

case "$5" in
'512b')
typeset sector=512
typeset blkexp=0
;;
'512e')
typeset sector=512
typeset blkexp=3
;;
'4Kn')
typeset sector=4096
typeset blkexp=0
;;
*) log_fail "Unsupported blksz value: $5" ;;
esac

if is_linux; then
modprobe -n scsi_debug
Expand All @@ -375,7 +394,8 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
log_fail "scsi_debug module already installed"
else
log_must modprobe scsi_debug dev_size_mb=$devsize \
add_host=$hosts num_tgts=$tgts max_luns=$luns
add_host=$hosts num_tgts=$tgts max_luns=$luns \
sector_size=$sector physblk_exp=$blkexp
block_device_wait
lsscsi | egrep scsi_debug > /dev/null
if (($? == 1)); then
Expand All @@ -385,6 +405,16 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
fi
}

#
# Unload scsi_debug module, if needed.
#
function unload_scsi_debug
{
if lsmod | grep scsi_debug >/dev/null; then
log_must modprobe -r scsi_debug
fi
}

#
# Get scsi_debug device name.
# Returns basename of scsi_debug device (for example "sdb").
Expand Down
16 changes: 14 additions & 2 deletions tests/zfs-tests/include/libtest.shlib
Original file line number Diff line number Diff line change
Expand Up @@ -3158,13 +3158,25 @@ function zed_stop
if [[ -f ${ZEDLET_DIR}/zed.pid ]]; then
zedpid=$(cat ${ZEDLET_DIR}/zed.pid)
kill $zedpid
wait $zedpid
while ps -p $zedpid > /dev/null; do
sleep 1
done
rm -f ${ZEDLET_DIR}/zed.pid
fi

return 0
}

#
# Drain all zevents
#
function zed_events_drain
{
while [ $(zpool events -H | wc -l) -ne 0 ]; do
sleep 1
zpool events -c >/dev/null
done
}

#
# Check is provided device is being active used as a swap device.
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ if is_linux; then
for SDDEVICE in $(get_debug_device); do
unplug $SDDEVICE
done
modprobe -r scsi_debug
unload_scsi_debug
fi

log_pass
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ verify_runnable "global"

# Create scsi_debug devices for the reopen tests
if is_linux; then
load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS
load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
else
log_unsupported "scsi debug module unsupported"
fi
Expand Down
4 changes: 3 additions & 1 deletion tests/zfs-tests/tests/functional/fault/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ dist_pkgdata_SCRIPTS = \
auto_online_001_pos.ksh \
auto_replace_001_pos.ksh \
auto_spare_001_pos.ksh \
auto_spare_002_pos.ksh
auto_spare_002_pos.ksh \
auto_spare_ashift.ksh \
auto_spare_multiple.ksh
Loading

0 comments on commit 4e9b156

Please sign in to comment.