Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend deadman logic #6999

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -4542,13 +4542,23 @@ main(int argc, char **argv)
}
} else {
error = open_objset(target, DMU_OST_ANY, FTAG, &os);
if (error == 0)
spa = dmu_objset_spa(os);
}
}
nvlist_free(policy);

if (error)
fatal("can't open '%s': %s", target, strerror(error));

/*
* Set the pool failure mode to panic in order to prevent the pool
* from suspending. A suspended I/O will have no way to resume and
* can prevent the zdb(8) command from terminating as expected.
*/
if (spa != NULL)
spa->spa_failmode = ZIO_FAILURE_MODE_PANIC;

argv++;
argc--;
if (!dump_opt['R']) {
Expand Down
78 changes: 66 additions & 12 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = {

extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
extern unsigned long zfs_deadman_synctime_ms;
extern int metaslab_preload_limit;
extern boolean_t zfs_compressed_arc_enabled;
extern int zfs_abd_scatter_enabled;
Expand Down Expand Up @@ -447,6 +448,7 @@ static kmutex_t ztest_vdev_lock;
static rwlock_t ztest_name_lock;

static boolean_t ztest_dump_core = B_TRUE;
static boolean_t ztest_dump_debug_buffer = B_FALSE;
static boolean_t ztest_exiting;

/* Global commit callback list */
Expand Down Expand Up @@ -495,6 +497,16 @@ _umem_logging_init(void)
return ("fail,contents"); /* $UMEM_LOGGING setting */
}

static void
dump_debug_buffer(void)
{
if (!ztest_dump_debug_buffer)
return;

(void) printf("\n");
zfs_dbgmsg_print("ztest");
}

#define BACKTRACE_SZ 100

static void sig_handler(int signo)
Expand All @@ -507,6 +519,7 @@ static void sig_handler(int signo)
nptrs = backtrace(buffer, BACKTRACE_SZ);
backtrace_symbols_fd(buffer, nptrs, STDERR_FILENO);
#endif
dump_debug_buffer();

/*
* Restore default action and re-raise signal so SIGSEGV and
Expand Down Expand Up @@ -544,6 +557,9 @@ fatal(int do_perror, char *message, ...)
}
(void) fprintf(stderr, "%s\n", buf);
fatal_msg = buf; /* to ease debugging */

dump_debug_buffer();

if (ztest_dump_core)
abort();
exit(3);
Expand Down Expand Up @@ -641,6 +657,7 @@ usage(boolean_t requested)
"\t[-B alt_ztest (default: <none>)] alternate ztest path\n"
"\t[-o variable=value] ... set global variable to an unsigned\n"
"\t 32-bit integer value\n"
"\t[-G dump zfs_dbgmsg buffer before exiting due to an error\n"
"\t[-h] (print help)\n"
"",
zo->zo_pool,
Expand Down Expand Up @@ -676,7 +693,7 @@ process_options(int argc, char **argv)
bcopy(&ztest_opts_defaults, zo, sizeof (*zo));

while ((opt = getopt(argc, argv,
"v:s:a:m:r:R:d:t:g:i:k:p:f:MVET:P:hF:B:o:")) != EOF) {
"v:s:a:m:r:R:d:t:g:i:k:p:f:MVET:P:hF:B:o:G")) != EOF) {
value = 0;
switch (opt) {
case 'v':
Expand Down Expand Up @@ -771,6 +788,9 @@ process_options(int argc, char **argv)
if (set_global_var(optarg) != 0)
usage(B_FALSE);
break;
case 'G':
ztest_dump_debug_buffer = B_TRUE;
break;
case 'h':
usage(B_TRUE);
break;
Expand Down Expand Up @@ -6224,15 +6244,48 @@ ztest_resume_thread(void *arg)
thread_exit();
}

#define GRACE 300

#if 0
static void
ztest_deadman_alarm(int sig)
ztest_deadman_thread(void *arg)
{
fatal(0, "failed to complete within %d seconds of deadline", GRACE);
ztest_shared_t *zs = arg;
spa_t *spa = ztest_spa;
hrtime_t delta, overdue, total = 0;

for (;;) {
delta = zs->zs_thread_stop - zs->zs_thread_start +
MSEC2NSEC(zfs_deadman_synctime_ms);

(void) poll(NULL, 0, (int)NSEC2MSEC(delta));

/*
* If the pool is suspended then fail immediately. Otherwise,
* check to see if the pool is making any progress. If
* vdev_deadman() discovers that there hasn't been any recent
* I/Os then it will end up aborting the tests.
*/
if (spa_suspended(spa) || spa->spa_root_vdev == NULL) {
fatal(0, "aborting test after %llu seconds because "
"pool has transitioned to a suspended state.",
zfs_deadman_synctime_ms / 1000);
}
vdev_deadman(spa->spa_root_vdev, FTAG);

/*
* If the process doesn't complete within a grace period of
* zfs_deadman_synctime_ms over the expected finish time,
* then it may be hung and is terminated.
*/
overdue = zs->zs_proc_stop + MSEC2NSEC(zfs_deadman_synctime_ms);
total += zfs_deadman_synctime_ms / 1000;
if (gethrtime() > overdue) {
fatal(0, "aborting test after %llu seconds because "
"the process is overdue for termination.", total);
}

(void) printf("ztest has been running for %lld seconds\n",
total);
}
}
#endif

static void
ztest_execute(int test, ztest_info_t *zi, uint64_t id)
Expand Down Expand Up @@ -6491,13 +6544,13 @@ ztest_run(ztest_shared_t *zs)
resume_thread = thread_create(NULL, 0, ztest_resume_thread,
spa, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri);

#if 0
/*
* Set a deadman alarm to abort() if we hang.
* Create a deadman thread and set to panic if we hang.
*/
signal(SIGALRM, ztest_deadman_alarm);
alarm((zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + GRACE);
#endif
(void) thread_create(NULL, 0, ztest_deadman_thread,
zs, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri);

spa->spa_deadman_failmode = ZIO_FAILURE_MODE_PANIC;

/*
* Verify that we can safely inquire about about any object,
Expand Down Expand Up @@ -7047,6 +7100,7 @@ main(int argc, char **argv)
(void) setvbuf(stdout, NULL, _IOLBF, 0);

dprintf_setup(&argc, argv);
zfs_deadman_synctime_ms = 300000;

action.sa_handler = sig_handler;
sigemptyset(&action.sa_mask);
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/cli_user/zpool_list/Makefile
tests/zfs-tests/tests/functional/compression/Makefile
tests/zfs-tests/tests/functional/ctime/Makefile
tests/zfs-tests/tests/functional/deadman/Makefile
tests/zfs-tests/tests/functional/delegate/Makefile
tests/zfs-tests/tests/functional/devices/Makefile
tests/zfs-tests/tests/functional/events/Makefile
Expand Down
1 change: 1 addition & 0 deletions include/sys/fm/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ extern "C" {
#define FM_EREPORT_ZFS_IO "io"
#define FM_EREPORT_ZFS_DATA "data"
#define FM_EREPORT_ZFS_DELAY "delay"
#define FM_EREPORT_ZFS_DEADMAN "deadman"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll want to add this to the zfs-events(5) man page and also probably beef up the existing documentation for the plain "delay" event. In particular, we'll want to mention its (the plain 'delay' event) interaction with zfs_deadman_ziotime_ms.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do. I also opted simply to expand on the deadman behavior in the existing modules man page rather than add a new one.

#define FM_EREPORT_ZFS_POOL "zpool"
#define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown"
#define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed"
Expand Down
9 changes: 8 additions & 1 deletion include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -955,12 +955,15 @@ extern void spa_evicting_os_deregister(spa_t *, objset_t *os);
extern void spa_evicting_os_wait(spa_t *spa);
extern int spa_max_replication(spa_t *spa);
extern int spa_prev_software_version(spa_t *spa);
extern uint8_t spa_get_failmode(spa_t *spa);
extern uint64_t spa_get_failmode(spa_t *spa);
extern uint64_t spa_get_deadman_failmode(spa_t *spa);
extern void spa_set_deadman_failmode(spa_t *spa, const char *failmode);
extern boolean_t spa_suspended(spa_t *spa);
extern uint64_t spa_bootfs(spa_t *spa);
extern uint64_t spa_delegation(spa_t *spa);
extern objset_t *spa_meta_objset(spa_t *spa);
extern uint64_t spa_deadman_synctime(spa_t *spa);
extern uint64_t spa_deadman_ziotime(spa_t *spa);

/* Miscellaneous support routines */
extern void spa_activate_mos_feature(spa_t *spa, const char *feature,
Expand Down Expand Up @@ -1074,6 +1077,10 @@ extern boolean_t spa_debug_enabled(spa_t *spa);
}

extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */
extern int zfs_deadman_enabled;
extern unsigned long zfs_deadman_synctime_ms;
extern unsigned long zfs_deadman_ziotime_ms;
extern unsigned long zfs_deadman_checktime_ms;

#ifdef __cplusplus
}
Expand Down
4 changes: 3 additions & 1 deletion include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ struct spa {
uint64_t spa_pool_props_object; /* object for properties */
uint64_t spa_bootfs; /* default boot filesystem */
uint64_t spa_failmode; /* failure mode for the pool */
uint64_t spa_deadman_failmode; /* failure mode for deadman */
uint64_t spa_delegation; /* delegation on/off */
list_t spa_config_list; /* previous cache file(s) */
/* per-CPU array of root of async I/O: */
Expand Down Expand Up @@ -270,7 +271,8 @@ struct spa {
taskqid_t spa_deadman_tqid; /* Task id */
uint64_t spa_deadman_calls; /* number of deadman calls */
hrtime_t spa_sync_starttime; /* starting time of spa_sync */
uint64_t spa_deadman_synctime; /* deadman expiration timer */
uint64_t spa_deadman_synctime; /* deadman sync expiration */
uint64_t spa_deadman_ziotime; /* deadman zio expiration */
uint64_t spa_all_vdev_zaps; /* ZAP of per-vd ZAP obj #s */
spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */
uint64_t spa_errata; /* errata issues detected */
Expand Down
2 changes: 1 addition & 1 deletion include/sys/vdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_metaslab_set_size(vdev_t *);
extern void vdev_expand(vdev_t *vd, uint64_t txg);
extern void vdev_split(vdev_t *vd);
extern void vdev_deadman(vdev_t *vd);
extern void vdev_deadman(vdev_t *vd, char *tag);

extern void vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx);
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
Expand Down
1 change: 1 addition & 0 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
extern void cv_signal(kcondvar_t *cv);
extern void cv_broadcast(kcondvar_t *cv);

#define cv_timedwait_io(cv, mp, at) cv_timedwait(cv, mp, at)
#define cv_timedwait_sig(cv, mp, at) cv_timedwait(cv, mp, at)
#define cv_wait_sig(cv, mp) cv_wait(cv, mp)
#define cv_wait_io(cv, mp) cv_wait(cv, mp)
Expand Down
1 change: 1 addition & 0 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,7 @@ extern void zio_execute(zio_t *zio);
extern void zio_interrupt(zio_t *zio);
extern void zio_delay_init(zio_t *zio);
extern void zio_delay_interrupt(zio_t *zio);
extern void zio_deadman(zio_t *zio, char *tag);

extern zio_t *zio_walk_parents(zio_t *cio, zio_link_t **);
extern zio_t *zio_walk_children(zio_t *pio, zio_link_t **);
Expand Down
4 changes: 4 additions & 0 deletions man/man1/ztest.1
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ Total test run time.
.BI "\-z" " zil_failure_rate" " (default: fail every 2^5 allocs)
.IP
Injected failure rate.
.HP
.BI "\-G"
.IP
Dump zfs_dbgmsg buffer before exiting.
.SH "EXAMPLES"
.LP
To override /tmp as your location for block files, you can use the -f
Expand Down
19 changes: 16 additions & 3 deletions man/man5/zfs-events.5
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ part here.
\fBchecksum\fR
.ad
.RS 12n
Issued when a checksum error have been detected.
Issued when a checksum error has been detected.
.RE

.sp
Expand All @@ -76,14 +76,27 @@ Issued when there is an I/O error in a vdev in the pool.
Issued when there have been data errors in the pool.
.RE

.sp
.ne 2
.na
\fBdeadman\fR
.ad
.RS 12n
Issued when an I/O is determined to be "hung", this can be caused by lost
completion events due to flaky hardware or drivers. See the
\fBzfs_deadman_failmode\fR module option description for additional
information regarding "hung" I/O detection and configuration.
.RE

.sp
.ne 2
.na
\fBdelay\fR
.ad
.RS 12n
Issued when an I/O was slow to complete as defined by the zio_delay_max module
option.
Issued when a completed I/O exceeds the maximum allowed time specified
by the \fBzio_delay_max\fR module option. This can be an indicator of
problems with the underlying storage device.
.RE

.sp
Expand Down
Loading