From 0ac440de5259fcbeb6cf905d61bc0b911dbf1835 Mon Sep 17 00:00:00 2001 From: Don Brady Date: Thu, 13 Jun 2019 22:03:15 -0600 Subject: [PATCH 1/2] OpenZFS 9425 - channel programs can be interrupted Problem Statement ================= ZFS Channel program scripts currently require a timeout, so that hung or long-running scripts return a timeout error instead of causing ZFS to get wedged. This limit can currently be set up to 100 million Lua instructions. Even with a limit in place, it would be desirable to have a sys admin (support engineer) be able to cancel a script that is taking a long time. Proposed Solution ================= Make it possible to abort a channel program by sending an interrupt signal.In the underlying txg_wait_sync function, switch the cv_wait to a cv_wait_sig to catch the signal. Once a signal is encountered, the dsl_sync_task function can install a Lua hook that will get called before the Lua interpreter executes a new line of code. The dsl_sync_task can resume with a standard txg_wait_sync call and wait for the txg to complete. Meanwhile, the hook will abort the script and indicate that the channel program was canceled. The kernel returns a EINTR to indicate that the channel program run was canceled. Porting notes: Added missing return value from cv_wait_sig() Authored by: Don Brady Reviewed by: Sebastien Roy Reviewed by: Serapheim Dimitropoulos Reviewed by: Matt Ahrens Approved by: Robert Mustacchi Ported-by: Don Brady OpenZFS-issue: https://www.illumos.org/issues/9425 OpenZFS-commit: https://github.com/illumos/illumos-gate/commit/d0cb1fb926 --- include/spl/sys/condvar.h | 2 +- include/sys/dsl_synctask.h | 3 + include/sys/txg.h | 5 + include/sys/zcp.h | 31 +++ include/sys/zfs_context.h | 2 +- lib/libzpool/kernel.c | 7 + module/spl/spl-condvar.c | 10 +- module/zfs/dsl_synctask.c | 24 ++- module/zfs/txg.c | 36 +++- module/zfs/zcp.c | 183 ++++++++++-------- tests/runfiles/linux.run | 2 +- .../channel_program/synctask_core/Makefile.am | 3 +- .../synctask_core/tst.terminate_by_signal.ksh | 98 ++++++++++ 13 files changed, 309 insertions(+), 97 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh diff --git a/include/spl/sys/condvar.h b/include/spl/sys/condvar.h index 28caea57181c..98cf3e9a4922 100644 --- a/include/spl/sys/condvar.h +++ b/include/spl/sys/condvar.h @@ -54,7 +54,7 @@ extern void __cv_init(kcondvar_t *, char *, kcv_type_t, void *); extern void __cv_destroy(kcondvar_t *); extern void __cv_wait(kcondvar_t *, kmutex_t *); extern void __cv_wait_io(kcondvar_t *, kmutex_t *); -extern void __cv_wait_sig(kcondvar_t *, kmutex_t *); +extern int __cv_wait_sig(kcondvar_t *, kmutex_t *); extern clock_t __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t); extern clock_t __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t); extern clock_t __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t); diff --git a/include/sys/dsl_synctask.h b/include/sys/dsl_synctask.h index da6c7a40daca..957963ffe553 100644 --- a/include/sys/dsl_synctask.h +++ b/include/sys/dsl_synctask.h @@ -37,6 +37,7 @@ struct dsl_pool; typedef int (dsl_checkfunc_t)(void *, dmu_tx_t *); typedef void (dsl_syncfunc_t)(void *, dmu_tx_t *); +typedef void (dsl_sigfunc_t)(void *, dmu_tx_t *); typedef enum zfs_space_check { /* @@ -116,6 +117,8 @@ int dsl_early_sync_task(const char *, dsl_checkfunc_t *, dsl_syncfunc_t *, void *, int, zfs_space_check_t); void dsl_early_sync_task_nowait(struct dsl_pool *, dsl_syncfunc_t *, void *, int, zfs_space_check_t, dmu_tx_t *); +int dsl_sync_task_sig(const char *, dsl_checkfunc_t *, dsl_syncfunc_t *, + dsl_sigfunc_t *, void *, int, zfs_space_check_t); #ifdef __cplusplus } diff --git a/include/sys/txg.h b/include/sys/txg.h index 760d5208bf4a..260a3b43cfe8 100644 --- a/include/sys/txg.h +++ b/include/sys/txg.h @@ -87,6 +87,11 @@ extern void txg_kick(struct dsl_pool *dp); */ extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg); +/* + * Wait as above. Returns true if the thread was signaled while waiting. + */ +extern boolean_t txg_wait_synced_sig(struct dsl_pool *dp, uint64_t txg); + /* * Wait until the given transaction group, or one after it, is * the open transaction group. Try to make this happen as soon diff --git a/include/sys/zcp.h b/include/sys/zcp.h index b9c8ef0069f1..b720d863779c 100644 --- a/include/sys/zcp.h +++ b/include/sys/zcp.h @@ -52,6 +52,12 @@ typedef struct zcp_cleanup_handler { list_node_t zch_node; } zcp_cleanup_handler_t; +typedef struct zcp_alloc_arg { + boolean_t aa_must_succeed; + int64_t aa_alloc_remaining; + int64_t aa_alloc_limit; +} zcp_alloc_arg_t; + typedef struct zcp_run_info { dsl_pool_t *zri_pool; @@ -93,6 +99,11 @@ typedef struct zcp_run_info { */ boolean_t zri_timed_out; + /* + * Channel program was canceled by user + */ + boolean_t zri_canceled; + /* * Boolean indicating whether or not we are running in syncing * context. @@ -104,6 +115,26 @@ typedef struct zcp_run_info { * triggered in the event of a fatal error. */ list_t zri_cleanup_handlers; + + /* + * The Lua state context of our channel program. + */ + lua_State *zri_state; + + /* + * Lua memory allocator arguments. + */ + zcp_alloc_arg_t *zri_allocargs; + + /* + * Contains output values from zcp script or error string. + */ + nvlist_t *zri_outnvl; + + /* + * The errno number returned to caller of zcp_eval(). + */ + int zri_result; } zcp_run_info_t; zcp_run_info_t *zcp_run_info(lua_State *); diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index e3fa2e61bdc9..4101e7202e39 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -305,6 +305,7 @@ typedef pthread_cond_t kcondvar_t; extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg); extern void cv_destroy(kcondvar_t *cv); extern void cv_wait(kcondvar_t *cv, kmutex_t *mp); +extern int cv_wait_sig(kcondvar_t *cv, kmutex_t *mp); extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime); extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag); @@ -313,7 +314,6 @@ extern void cv_broadcast(kcondvar_t *cv); #define cv_timedwait_io(cv, mp, at) cv_timedwait(cv, mp, at) #define cv_timedwait_sig(cv, mp, at) cv_timedwait(cv, mp, at) -#define cv_wait_sig(cv, mp) cv_wait(cv, mp) #define cv_wait_io(cv, mp) cv_wait(cv, mp) #define cv_timedwait_sig_hires(cv, mp, t, r, f) \ cv_timedwait_hires(cv, mp, t, r, f) diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 0f39e0d72bca..da172449c73e 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -339,6 +339,13 @@ cv_wait(kcondvar_t *cv, kmutex_t *mp) mp->m_owner = pthread_self(); } +int +cv_wait_sig(kcondvar_t *cv, kmutex_t *mp) +{ + cv_wait(cv, mp); + return (1); +} + clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) { diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index a7a9d1db9a98..58d0d5d41bc1 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -29,6 +29,12 @@ #include #include +#include + +#ifdef HAVE_SCHED_SIGNAL_HEADER +#include +#endif + void __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) { @@ -144,10 +150,12 @@ __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp) } EXPORT_SYMBOL(__cv_wait_io); -void +int __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp) { cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); + + return (signal_pending(current) ? 0 : 1); } EXPORT_SYMBOL(__cv_wait_sig); diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c index b63ce5cad90c..d2cd1a60fa48 100644 --- a/module/zfs/dsl_synctask.c +++ b/module/zfs/dsl_synctask.c @@ -41,7 +41,7 @@ dsl_null_checkfunc(void *arg, dmu_tx_t *tx) static int dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc, - dsl_syncfunc_t *syncfunc, void *arg, + dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg, int blocks_modified, zfs_space_check_t space_check, boolean_t early) { spa_t *spa; @@ -85,6 +85,11 @@ dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc, dmu_tx_commit(tx); + if (sigfunc != NULL && txg_wait_synced_sig(dp, dst.dst_txg)) { + /* current contract is to call func once */ + sigfunc(arg, tx); + sigfunc = NULL; /* in case of an EAGAIN retry */ + } txg_wait_synced(dp, dst.dst_txg); if (dst.dst_error == EAGAIN) { @@ -124,7 +129,7 @@ dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified, zfs_space_check_t space_check) { - return (dsl_sync_task_common(pool, checkfunc, syncfunc, arg, + return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg, blocks_modified, space_check, B_FALSE)); } @@ -146,10 +151,23 @@ dsl_early_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified, zfs_space_check_t space_check) { - return (dsl_sync_task_common(pool, checkfunc, syncfunc, arg, + return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg, blocks_modified, space_check, B_TRUE)); } +/* + * A standard synctask that can be interrupted from a signal. The sigfunc + * is called once if a signal occurred while waiting for the task to sync. + */ +int +dsl_sync_task_sig(const char *pool, dsl_checkfunc_t *checkfunc, + dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg, + int blocks_modified, zfs_space_check_t space_check) +{ + return (dsl_sync_task_common(pool, checkfunc, syncfunc, sigfunc, arg, + blocks_modified, space_check, B_FALSE)); +} + static void dsl_sync_task_nowait_common(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified, zfs_space_check_t space_check, dmu_tx_t *tx, diff --git a/module/zfs/txg.c b/module/zfs/txg.c index 0fcd569e3b44..ce134e5a3964 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -675,8 +675,8 @@ txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution) mutex_exit(&tx->tx_sync_lock); } -void -txg_wait_synced(dsl_pool_t *dp, uint64_t txg) +static boolean_t +txg_wait_synced_impl(dsl_pool_t *dp, uint64_t txg, boolean_t wait_sig) { tx_state_t *tx = &dp->dp_tx; @@ -695,9 +695,39 @@ txg_wait_synced(dsl_pool_t *dp, uint64_t txg) "tx_synced=%llu waiting=%llu dp=%p\n", tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp); cv_broadcast(&tx->tx_sync_more_cv); - cv_wait_io(&tx->tx_sync_done_cv, &tx->tx_sync_lock); + if (wait_sig) { + /* + * Condition wait here but stop if the thread receives a + * signal. The caller may call txg_wait_synced*() again + * to resume waiting for this txg. + */ + if (cv_wait_sig(&tx->tx_sync_done_cv, + &tx->tx_sync_lock) == 0) { + mutex_exit(&tx->tx_sync_lock); + return (B_TRUE); + } + } else { + cv_wait_io(&tx->tx_sync_done_cv, &tx->tx_sync_lock); + } } mutex_exit(&tx->tx_sync_lock); + return (B_FALSE); +} + +void +txg_wait_synced(dsl_pool_t *dp, uint64_t txg) +{ + VERIFY0(txg_wait_synced_impl(dp, txg, B_FALSE)); +} + +/* + * Similar to a txg_wait_synced but it can be interrupted from a signal. + * Returns B_TRUE if the thread was signaled while waiting. + */ +boolean_t +txg_wait_synced_sig(dsl_pool_t *dp, uint64_t txg) +{ + return (txg_wait_synced_impl(dp, txg, B_TRUE)); } /* diff --git a/module/zfs/zcp.c b/module/zfs/zcp.c index 4894df11d5fb..2c7ae64f9cbc 100644 --- a/module/zfs/zcp.c +++ b/module/zfs/zcp.c @@ -118,21 +118,6 @@ static int zcp_nvpair_value_to_lua(lua_State *, nvpair_t *, char *, int); static int zcp_lua_to_nvlist_impl(lua_State *, int, nvlist_t *, const char *, int); -typedef struct zcp_alloc_arg { - boolean_t aa_must_succeed; - int64_t aa_alloc_remaining; - int64_t aa_alloc_limit; -} zcp_alloc_arg_t; - -typedef struct zcp_eval_arg { - lua_State *ea_state; - zcp_alloc_arg_t *ea_allocargs; - cred_t *ea_cred; - nvlist_t *ea_outnvl; - int ea_result; - uint64_t ea_instrlimit; -} zcp_eval_arg_t; - /* * The outer-most error callback handler for use with lua_pcall(). On * error Lua will call this callback with a single argument that @@ -452,7 +437,7 @@ zcp_lua_to_nvlist_helper(lua_State *state) static void zcp_convert_return_values(lua_State *state, nvlist_t *nvl, - const char *key, zcp_eval_arg_t *evalargs) + const char *key, int *result) { int err; VERIFY3U(1, ==, lua_gettop(state)); @@ -464,7 +449,7 @@ zcp_convert_return_values(lua_State *state, nvlist_t *nvl, err = lua_pcall(state, 3, 0, 0); /* zcp_lua_to_nvlist_helper */ if (err != 0) { zcp_lua_to_nvlist(state, 1, nvl, ZCP_RET_ERROR); - evalargs->ea_result = SET_ERROR(ECHRNG); + *result = SET_ERROR(ECHRNG); } } @@ -791,13 +776,24 @@ zcp_lua_alloc(void *ud, void *ptr, size_t osize, size_t nsize) static void zcp_lua_counthook(lua_State *state, lua_Debug *ar) { - /* - * If we're called, check how many instructions the channel program has - * executed so far, and compare against the limit. - */ lua_getfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY); zcp_run_info_t *ri = lua_touserdata(state, -1); + /* + * Check if we were canceled while waiting for the + * txg to sync or from our open context thread + */ + if (ri->zri_canceled || + (!ri->zri_sync && issig(JUSTLOOKING) && issig(FORREAL))) { + ri->zri_canceled = B_TRUE; + (void) lua_pushstring(state, "Channel program was canceled."); + (void) lua_error(state); + } + + /* + * Check how many instructions the channel program has + * executed so far, and compare against the limit. + */ ri->zri_curinstrs += zfs_lua_check_instrlimit_interval; if (ri->zri_maxinstrs != 0 && ri->zri_curinstrs > ri->zri_maxinstrs) { ri->zri_timed_out = B_TRUE; @@ -816,31 +812,25 @@ zcp_panic_cb(lua_State *state) } static void -zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) +zcp_eval_impl(dmu_tx_t *tx, zcp_run_info_t *ri) { int err; - zcp_run_info_t ri; - lua_State *state = evalargs->ea_state; + lua_State *state = ri->zri_state; VERIFY3U(3, ==, lua_gettop(state)); + /* finish initializing our runtime state */ + ri->zri_pool = dmu_tx_pool(tx); + ri->zri_tx = tx; + list_create(&ri->zri_cleanup_handlers, sizeof (zcp_cleanup_handler_t), + offsetof(zcp_cleanup_handler_t, zch_node)); + /* * Store the zcp_run_info_t struct for this run in the Lua registry. * Registry entries are not directly accessible by the Lua scripts but * can be accessed by our callbacks. */ - ri.zri_space_used = 0; - ri.zri_pool = dmu_tx_pool(tx); - ri.zri_cred = evalargs->ea_cred; - ri.zri_tx = tx; - ri.zri_timed_out = B_FALSE; - ri.zri_sync = sync; - list_create(&ri.zri_cleanup_handlers, sizeof (zcp_cleanup_handler_t), - offsetof(zcp_cleanup_handler_t, zch_node)); - ri.zri_curinstrs = 0; - ri.zri_maxinstrs = evalargs->ea_instrlimit; - - lua_pushlightuserdata(state, &ri); + lua_pushlightuserdata(state, ri); lua_setfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY); VERIFY3U(3, ==, lua_gettop(state)); @@ -857,7 +847,7 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * off control to the channel program. Channel programs that use too * much memory should die with ENOSPC. */ - evalargs->ea_allocargs->aa_must_succeed = B_FALSE; + ri->zri_allocargs->aa_must_succeed = B_FALSE; /* * Call the Lua function that open-context passed us. This pops the @@ -869,14 +859,14 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) /* * Let Lua use KM_SLEEP while we interpret the return values. */ - evalargs->ea_allocargs->aa_must_succeed = B_TRUE; + ri->zri_allocargs->aa_must_succeed = B_TRUE; /* * Remove the error handler callback from the stack. At this point, * there shouldn't be any cleanup handler registered in the handler * list (zri_cleanup_handlers), regardless of whether it ran or not. */ - list_destroy(&ri.zri_cleanup_handlers); + list_destroy(&ri->zri_cleanup_handlers); lua_remove(state, 1); switch (err) { @@ -896,16 +886,16 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) int return_count = lua_gettop(state); if (return_count == 1) { - evalargs->ea_result = 0; - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_RETURN, evalargs); + ri->zri_result = 0; + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_RETURN, &ri->zri_result); } else if (return_count > 1) { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); lua_settop(state, 0); (void) lua_pushfstring(state, "Multiple return " "values not supported"); - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); } break; } @@ -919,19 +909,20 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * stack. */ VERIFY3U(1, ==, lua_gettop(state)); - if (ri.zri_timed_out) { - evalargs->ea_result = SET_ERROR(ETIME); + if (ri->zri_timed_out) { + ri->zri_result = SET_ERROR(ETIME); + } else if (ri->zri_canceled) { + ri->zri_result = SET_ERROR(EINTR); } else { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); } - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); - if (evalargs->ea_result == ETIME && - evalargs->ea_outnvl != NULL) { - (void) nvlist_add_uint64(evalargs->ea_outnvl, - ZCP_ARG_INSTRLIMIT, ri.zri_curinstrs); + if (ri->zri_result == ETIME && ri->zri_outnvl != NULL) { + (void) nvlist_add_uint64(ri->zri_outnvl, + ZCP_ARG_INSTRLIMIT, ri->zri_curinstrs); } break; } @@ -943,14 +934,16 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * return the error message. */ VERIFY3U(1, ==, lua_gettop(state)); - if (ri.zri_timed_out) { - evalargs->ea_result = SET_ERROR(ETIME); + if (ri->zri_timed_out) { + ri->zri_result = SET_ERROR(ETIME); + } else if (ri->zri_canceled) { + ri->zri_result = SET_ERROR(EINTR); } else { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); } - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); break; } case LUA_ERRMEM: @@ -958,7 +951,7 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * Lua ran out of memory while running the channel program. * There's not much we can do. */ - evalargs->ea_result = SET_ERROR(ENOSPC); + ri->zri_result = SET_ERROR(ENOSPC); break; default: VERIFY0(err); @@ -966,21 +959,35 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) } static void -zcp_pool_error(zcp_eval_arg_t *evalargs, const char *poolname) +zcp_pool_error(zcp_run_info_t *ri, const char *poolname) { - evalargs->ea_result = SET_ERROR(ECHRNG); - lua_settop(evalargs->ea_state, 0); - (void) lua_pushfstring(evalargs->ea_state, "Could not open pool: %s", + ri->zri_result = SET_ERROR(ECHRNG); + lua_settop(ri->zri_state, 0); + (void) lua_pushfstring(ri->zri_state, "Could not open pool: %s", poolname); - zcp_convert_return_values(evalargs->ea_state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(ri->zri_state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); + +} + +/* + * This callback is called when txg_wait_synced_sig encountered a signal. + * The txg_wait_synced_sig will continue to wait for the txg to complete + * after calling this callback. + */ +/* ARGSUSED */ +static void +zcp_eval_sig(void *arg, dmu_tx_t *tx) +{ + zcp_run_info_t *ri = arg; + ri->zri_canceled = B_TRUE; } static void zcp_eval_sync(void *arg, dmu_tx_t *tx) { - zcp_eval_arg_t *evalargs = arg; + zcp_run_info_t *ri = arg; /* * Open context should have setup the stack to contain: @@ -988,15 +995,14 @@ zcp_eval_sync(void *arg, dmu_tx_t *tx) * 2: Script to run (converted to a Lua function) * 3: nvlist input to function (converted to Lua table or nil) */ - VERIFY3U(3, ==, lua_gettop(evalargs->ea_state)); + VERIFY3U(3, ==, lua_gettop(ri->zri_state)); - zcp_eval_impl(tx, B_TRUE, evalargs); + zcp_eval_impl(tx, ri); } static void -zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) +zcp_eval_open(zcp_run_info_t *ri, const char *poolname) { - int error; dsl_pool_t *dp; dmu_tx_t *tx; @@ -1004,11 +1010,11 @@ zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) /* * See comment from the same assertion in zcp_eval_sync(). */ - VERIFY3U(3, ==, lua_gettop(evalargs->ea_state)); + VERIFY3U(3, ==, lua_gettop(ri->zri_state)); error = dsl_pool_hold(poolname, FTAG, &dp); if (error != 0) { - zcp_pool_error(evalargs, poolname); + zcp_pool_error(ri, poolname); return; } @@ -1023,7 +1029,7 @@ zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) */ tx = dmu_tx_create_dd(dp->dp_mos_dir); - zcp_eval_impl(tx, B_FALSE, evalargs); + zcp_eval_impl(tx, ri); dmu_tx_abort(tx); @@ -1036,7 +1042,7 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync, { int err; lua_State *state; - zcp_eval_arg_t evalargs; + zcp_run_info_t runinfo; if (instrlimit > zfs_lua_max_instrlimit) return (SET_ERROR(EINVAL)); @@ -1136,24 +1142,29 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync, } VERIFY3U(3, ==, lua_gettop(state)); - evalargs.ea_state = state; - evalargs.ea_allocargs = &allocargs; - evalargs.ea_instrlimit = instrlimit; - evalargs.ea_cred = CRED(); - evalargs.ea_outnvl = outnvl; - evalargs.ea_result = 0; + runinfo.zri_state = state; + runinfo.zri_allocargs = &allocargs; + runinfo.zri_outnvl = outnvl; + runinfo.zri_result = 0; + runinfo.zri_cred = CRED(); + runinfo.zri_timed_out = B_FALSE; + runinfo.zri_canceled = B_FALSE; + runinfo.zri_sync = sync; + runinfo.zri_space_used = 0; + runinfo.zri_curinstrs = 0; + runinfo.zri_maxinstrs = instrlimit; if (sync) { - err = dsl_sync_task(poolname, NULL, - zcp_eval_sync, &evalargs, 0, ZFS_SPACE_CHECK_ZCP_EVAL); + err = dsl_sync_task_sig(poolname, NULL, zcp_eval_sync, + zcp_eval_sig, &runinfo, 0, ZFS_SPACE_CHECK_ZCP_EVAL); if (err != 0) - zcp_pool_error(&evalargs, poolname); + zcp_pool_error(&runinfo, poolname); } else { - zcp_eval_open(&evalargs, poolname); + zcp_eval_open(&runinfo, poolname); } lua_close(state); - return (evalargs.ea_result); + return (runinfo.zri_result); } /* diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 22fc26212c0d..340145dc2bca 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -87,7 +87,7 @@ tests = ['tst.destroy_fs', 'tst.destroy_snap', 'tst.get_count_and_limit', 'tst.list_user_props', 'tst.parse_args_neg','tst.promote_conflict', 'tst.promote_multiple', 'tst.promote_simple', 'tst.rollback_mult', 'tst.rollback_one', 'tst.snapshot_destroy', 'tst.snapshot_neg', - 'tst.snapshot_recursive', 'tst.snapshot_simple'] + 'tst.snapshot_recursive', 'tst.snapshot_simple', 'tst.terminate_by_signal'] tags = ['functional', 'channel_program', 'synctask_core'] [tests/functional/chattr] diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am b/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am index 7bdaf53de2fa..cc86a2db9193 100644 --- a/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am +++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/Makefile.am @@ -27,7 +27,8 @@ dist_pkgdata_SCRIPTS = \ tst.snapshot_destroy.ksh \ tst.snapshot_neg.ksh \ tst.snapshot_recursive.ksh \ - tst.snapshot_simple.ksh + tst.snapshot_simple.ksh \ + tst.terminate_by_signal.ksh dist_pkgdata_DATA = \ tst.get_index_props.out \ diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh new file mode 100755 index 000000000000..6f58cc1f4f8d --- /dev/null +++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.terminate_by_signal.ksh @@ -0,0 +1,98 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2017 by Delphix. All rights reserved. +# +. $STF_SUITE/tests/functional/channel_program/channel_common.kshlib + +# +# DESCRIPTION: Execute a long-running zfs channel program and attempt to +# cancel it by sending a signal. +# + +verify_runnable "global" + +rootfs=$TESTPOOL/$TESTFS +snapname=snap +limit=50000000 + +function cleanup +{ + datasetexists $rootfs && log_must zfs destroy -R $rootfs +} + +log_onexit cleanup + +# +# Create a working set of 100 file systems +# +for i in {1..100}; do + log_must zfs create "$rootfs/child$i" +done + +# +# Attempt to create 100 snapshots with zfs.sync.snapshot() along with some +# time consuming efforts. We use loops of zfs.check.* (dry run operations) +# to consume instructions before the next zfs.sync.snapshot() occurs. +# +# Without a signal interruption this ZCP would take several minutes and +# generate over 30 million Lua instructions. +# +function chan_prog +{ +zfs program -t $limit $TESTPOOL - $rootfs $snapname <<-EOF + arg = ... + fs = arg["argv"][1] + snap = arg["argv"][2] + for child in zfs.list.children(fs) do + local snapname = child .. "@" .. snap + zfs.check.snapshot(snapname) + zfs.sync.snapshot(snapname) + for i=1,20000,1 do + zfs.check.snapshot(snapname) + zfs.check.destroy(snapname) + zfs.check.destroy(fs) + end + end + return "should not have reached here" +EOF +} + +log_note "Executing a long-running zfs program in the background" +chan_prog & +CHILD=$! + +# +# After waiting, send a kill signal to the channel program process. +# This should stop the ZCP near a million instructions but still have +# created some of the snapshots. Note that since the above zfs program +# command might get wrapped, we also issue a kill to the group. +# +sleep 10 +log_pos pkill -P $CHILD +log_pos kill $CHILD + +# +# Make sure the channel program did not fully complete by enforcing +# that not all of the snapshots were created. +# +snap_count=$(zfs list -t snapshot | grep $TESTPOOL | wc -l) +log_note "$snap_count snapshots created by ZCP" + +if [ "$snap_count" -eq 0 ]; then + log_fail "Channel progam failed to run." +elif [ "$snap_count" -gt 50 ]; then + log_fail "Too many snapshots after a cancel ($snap_count)." +else + log_pass "Canceling a long-running channel program works." +fi From d476a0f183968dd75b0f0211c46bd2b999270138 Mon Sep 17 00:00:00 2001 From: Don Brady Date: Fri, 14 Jun 2019 17:32:37 -0600 Subject: [PATCH 2/2] Add cv_wait_io_sig Signed-off-by: Don Brady --- include/spl/sys/condvar.h | 2 ++ include/sys/zfs_context.h | 1 + module/spl/spl-condvar.c | 9 +++++++++ module/zfs/dsl_synctask.c | 2 +- module/zfs/txg.c | 2 +- module/zfs/zcp.c | 2 ++ 6 files changed, 16 insertions(+), 2 deletions(-) diff --git a/include/spl/sys/condvar.h b/include/spl/sys/condvar.h index 98cf3e9a4922..f1438c4e2455 100644 --- a/include/spl/sys/condvar.h +++ b/include/spl/sys/condvar.h @@ -54,6 +54,7 @@ extern void __cv_init(kcondvar_t *, char *, kcv_type_t, void *); extern void __cv_destroy(kcondvar_t *); extern void __cv_wait(kcondvar_t *, kmutex_t *); extern void __cv_wait_io(kcondvar_t *, kmutex_t *); +extern int __cv_wait_io_sig(kcondvar_t *, kmutex_t *); extern int __cv_wait_sig(kcondvar_t *, kmutex_t *); extern clock_t __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t); extern clock_t __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t); @@ -69,6 +70,7 @@ extern void __cv_broadcast(kcondvar_t *c); #define cv_destroy(cvp) __cv_destroy(cvp) #define cv_wait(cvp, mp) __cv_wait(cvp, mp) #define cv_wait_io(cvp, mp) __cv_wait_io(cvp, mp) +#define cv_wait_io_sig(cvp, mp) __cv_wait_io_sig(cvp, mp) #define cv_wait_sig(cvp, mp) __cv_wait_sig(cvp, mp) #define cv_wait_interruptible(cvp, mp) cv_wait_sig(cvp, mp) #define cv_timedwait(cvp, mp, t) __cv_timedwait(cvp, mp, t) diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 4101e7202e39..ba0ea571be0f 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -315,6 +315,7 @@ extern void cv_broadcast(kcondvar_t *cv); #define cv_timedwait_io(cv, mp, at) cv_timedwait(cv, mp, at) #define cv_timedwait_sig(cv, mp, at) cv_timedwait(cv, mp, at) #define cv_wait_io(cv, mp) cv_wait(cv, mp) +#define cv_wait_io_sig(cv, mp) cv_wait_sig(cv, mp) #define cv_timedwait_sig_hires(cv, mp, t, r, f) \ cv_timedwait_hires(cv, mp, t, r, f) diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index 58d0d5d41bc1..19c575f770b8 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -150,6 +150,15 @@ __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp) } EXPORT_SYMBOL(__cv_wait_io); +int +__cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp) +{ + cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1); + + return (signal_pending(current) ? 0 : 1); +} +EXPORT_SYMBOL(__cv_wait_io_sig); + int __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp) { diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c index d2cd1a60fa48..b225eed37d40 100644 --- a/module/zfs/dsl_synctask.c +++ b/module/zfs/dsl_synctask.c @@ -88,7 +88,7 @@ dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc, if (sigfunc != NULL && txg_wait_synced_sig(dp, dst.dst_txg)) { /* current contract is to call func once */ sigfunc(arg, tx); - sigfunc = NULL; /* in case of an EAGAIN retry */ + sigfunc = NULL; /* in case we're performing an EAGAIN retry */ } txg_wait_synced(dp, dst.dst_txg); diff --git a/module/zfs/txg.c b/module/zfs/txg.c index ce134e5a3964..d1fb50188e4d 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -701,7 +701,7 @@ txg_wait_synced_impl(dsl_pool_t *dp, uint64_t txg, boolean_t wait_sig) * signal. The caller may call txg_wait_synced*() again * to resume waiting for this txg. */ - if (cv_wait_sig(&tx->tx_sync_done_cv, + if (cv_wait_io_sig(&tx->tx_sync_done_cv, &tx->tx_sync_lock) == 0) { mutex_exit(&tx->tx_sync_lock); return (B_TRUE); diff --git a/module/zfs/zcp.c b/module/zfs/zcp.c index 2c7ae64f9cbc..1aeea131449e 100644 --- a/module/zfs/zcp.c +++ b/module/zfs/zcp.c @@ -788,6 +788,7 @@ zcp_lua_counthook(lua_State *state, lua_Debug *ar) ri->zri_canceled = B_TRUE; (void) lua_pushstring(state, "Channel program was canceled."); (void) lua_error(state); + /* Unreachable */ } /* @@ -800,6 +801,7 @@ zcp_lua_counthook(lua_State *state, lua_Debug *ar) (void) lua_pushstring(state, "Channel program timed out."); (void) lua_error(state); + /* Unreachable */ } }