Skip to content

Commit

Permalink
Add subcommand to wait for background zfs activity to complete
Browse files Browse the repository at this point in the history
Currently the best way to wait for the completion of a long-running
operation in a pool, like a scrub or device removal, is to poll 'zpool
status' and parse its output, which is neither efficient nor convenient.

This change adds a 'wait' subcommand to the zpool command. When invoked,
'zpool wait' will block until a specified type of background activity
completes. Currently, this subcommand can wait for any of the following:

 - Scrubs or resilvers to complete
 - Devices to initialized
 - Devices to be replaced
 - Devices to be removed
 - Checkpoints to be discarded
 - Background freeing to complete

For example, a scrub that is in progress could be waited for by running

    zpool wait -t scrub <pool>

This also adds a -w flag to the attach, checkpoint, initialize, replace,
remove, and scrub subcommands. When used, this flag makes the operations
kicked off by these subcommands synchronous instead of asynchronous.

This functionality is implemented using a new ioctl. The type of
activity to wait for is provided as input to the ioctl, and the ioctl
blocks until all activity of that type has completed. An ioctl was used
over other methods of kernel-userspace communiction primarily for the
sake of portability.

Porting Notes:
This is ported from Delphix OS change DLPX-44432. The following changes
were made while porting:

 - Added ZoL-style ioctl input declaration.
 - Reorganized error handling in zpool_initialize in libzfs to integrate
   better with changes made for TRIM support.
 - Fixed check for whether a checkpoint discard is in progress.
   Previously it also waited if the pool had a checkpoint, instead of
   just if a checkpoint was being discarded.
 - Exposed zfs_initialize_chunk_size as a ZoL-style tunable.
 - Updated more existing tests to make use of new 'zpool wait'
   functionality, tests that don't exist in Delphix OS.
 - Used existing ZoL tunable zfs_scan_suspend_progress, together with
   zinject, in place of a new tunable zfs_scan_max_blks_per_txg.
 - Added support for a non-integral interval argument to zpool wait.

Future work:
ZoL has support for trimming devices, which Delphix OS does not. In the
future, 'zpool wait' could be extended to add the ability to wait for
trim operations to complete.

Reviewed-by: Matt Ahrens <[email protected]>
Reviewed-by: John Kennedy <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: John Gallagher <[email protected]>
Closes #9162
  • Loading branch information
jgallag88 authored and behlendorf committed Sep 14, 2019
1 parent 7238cbd commit e60e158
Show file tree
Hide file tree
Showing 61 changed files with 2,662 additions and 144 deletions.
559 changes: 517 additions & 42 deletions cmd/zpool/zpool_main.c

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/cli_root/zpool_trim/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_upgrade/blockfiles/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_wait/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_wait/scan/Makefile
tests/zfs-tests/tests/functional/cli_user/Makefile
tests/zfs-tests/tests/functional/cli_user/misc/Makefile
tests/zfs-tests/tests/functional/cli_user/zfs_list/Makefile
Expand Down
7 changes: 7 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ typedef struct zfs_handle zfs_handle_t;
typedef struct zpool_handle zpool_handle_t;
typedef struct libzfs_handle libzfs_handle_t;

extern int zpool_wait(zpool_handle_t *, zpool_wait_activity_t);
extern int zpool_wait_status(zpool_handle_t *, zpool_wait_activity_t,
boolean_t *, boolean_t *);

/*
* Library initialization
*/
Expand Down Expand Up @@ -275,6 +279,8 @@ typedef struct trimflags {
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t,
nvlist_t *);
extern int zpool_initialize_wait(zpool_handle_t *, pool_initialize_func_t,
nvlist_t *);
extern int zpool_trim(zpool_handle_t *, pool_trim_func_t, nvlist_t *,
trimflags_t *);

Expand Down Expand Up @@ -317,6 +323,7 @@ extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *,
size_t proplen, zprop_source_t *, boolean_t literal);
extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t,
zprop_source_t *);
extern int zpool_props_refresh(zpool_handle_t *);

extern const char *zpool_prop_to_name(zpool_prop_t);
extern const char *zpool_prop_values(zpool_prop_t);
Expand Down
3 changes: 3 additions & 0 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ int lzc_reopen(const char *, boolean_t);
int lzc_pool_checkpoint(const char *);
int lzc_pool_checkpoint_discard(const char *);

int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *);
int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, boolean_t *);

#ifdef __cplusplus
}
#endif
Expand Down
19 changes: 19 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,7 @@ typedef enum zfs_ioc {
ZFS_IOC_POOL_TRIM, /* 0x5a50 */
ZFS_IOC_REDACT, /* 0x5a51 */
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
ZFS_IOC_WAIT, /* 0x5a53 */

/*
* Linux - 3/64 numbers reserved.
Expand Down Expand Up @@ -1340,6 +1341,17 @@ typedef enum {
SPA_LOAD_CREATE /* creation in progress */
} spa_load_state_t;

typedef enum {
ZPOOL_WAIT_CKPT_DISCARD,
ZPOOL_WAIT_FREE,
ZPOOL_WAIT_INITIALIZE,
ZPOOL_WAIT_REPLACE,
ZPOOL_WAIT_REMOVE,
ZPOOL_WAIT_RESILVER,
ZPOOL_WAIT_SCRUB,
ZPOOL_WAIT_NUM_ACTIVITIES
} zpool_wait_activity_t;

/*
* Bookmark name values.
*/
Expand Down Expand Up @@ -1390,6 +1402,13 @@ typedef enum {
#define ZPOOL_TRIM_RATE "trim_rate"
#define ZPOOL_TRIM_SECURE "trim_secure"

/*
* The following are names used when invoking ZFS_IOC_POOL_WAIT.
*/
#define ZPOOL_WAIT_ACTIVITY "wait_activity"
#define ZPOOL_WAIT_TAG "wait_tag"
#define ZPOOL_WAIT_WAITED "wait_waited"

/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/
Expand Down
8 changes: 8 additions & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,14 @@ extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);
extern void spa_event_notify(spa_t *spa, vdev_t *vdev, nvlist_t *hist_nvl,
const char *name);

/* waiting for pool activities to complete */
extern int spa_wait(const char *pool, zpool_wait_activity_t activity,
boolean_t *waited);
extern int spa_wait_tag(const char *name, zpool_wait_activity_t activity,
uint64_t tag, boolean_t *waited);
extern void spa_notify_waiters(spa_t *spa);
extern void spa_wake_waiters(spa_t *spa);

#ifdef ZFS_DEBUG
#define dprintf_bp(bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
Expand Down
7 changes: 7 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,13 @@ struct spa {
uint64_t spa_leaf_list_gen; /* track leaf_list changes */
uint32_t spa_hostid; /* cached system hostid */

/* synchronization for threads in spa_wait */
kmutex_t spa_activities_lock;
kcondvar_t spa_activities_cv;
kcondvar_t spa_waiters_cv;
int spa_waiters; /* number of waiting threads */
boolean_t spa_waiters_cancel; /* waiters should return */

/*
* spa_refcount & spa_config_lock must be the last elements
* because zfs_refcount_t changes size based on compilation options.
Expand Down
1 change: 1 addition & 0 deletions include/sys/vdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ extern void vdev_indirect_mark_obsolete(vdev_t *vd, uint64_t offset,
uint64_t size);
extern void spa_vdev_indirect_mark_obsolete(spa_t *spa, uint64_t vdev,
uint64_t offset, uint64_t size, dmu_tx_t *tx);
extern boolean_t vdev_replace_in_progress(vdev_t *vdev);

extern void vdev_hold(vdev_t *);
extern void vdev_rele(vdev_t *);
Expand Down
120 changes: 100 additions & 20 deletions lib/libzfs/libzfs_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ zpool_get_all_props(zpool_handle_t *zhp)
return (0);
}

static int
int
zpool_props_refresh(zpool_handle_t *zhp)
{
nvlist_t *old_props;
Expand Down Expand Up @@ -2158,10 +2158,9 @@ xlate_init_err(int err)
* blocks) for the given vdevs in the given pool.
*/
int
zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
nvlist_t *vds)
zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
nvlist_t *vds, boolean_t wait)
{
char msg[1024];
int err;

nvlist_t *vdev_guids = fnvlist_alloc();
Expand All @@ -2173,26 +2172,46 @@ zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
err = zpool_translate_vdev_guids(zhp, vds, vdev_guids,
guids_to_paths, &vd_errlist);

if (err == 0) {
err = lzc_initialize(zhp->zpool_name, cmd_type,
vdev_guids, &errlist);
if (err == 0) {
fnvlist_free(vdev_guids);
fnvlist_free(guids_to_paths);
return (0);
}
if (err != 0) {
verify(vd_errlist != NULL);
goto list_errors;
}

err = lzc_initialize(zhp->zpool_name, cmd_type,
vdev_guids, &errlist);

if (err != 0) {
if (errlist != NULL) {
vd_errlist = fnvlist_lookup_nvlist(errlist,
ZPOOL_INITIALIZE_VDEVS);
goto list_errors;
}

(void) snprintf(msg, sizeof (msg),
(void) zpool_standard_error(zhp->zpool_hdl, err,
dgettext(TEXT_DOMAIN, "operation failed"));
} else {
verify(vd_errlist != NULL);
goto out;
}

if (wait) {
for (elem = nvlist_next_nvpair(vdev_guids, NULL); elem != NULL;
elem = nvlist_next_nvpair(vdev_guids, elem)) {

uint64_t guid = fnvpair_value_uint64(elem);

err = lzc_wait_tag(zhp->zpool_name,
ZPOOL_WAIT_INITIALIZE, guid, NULL);
if (err != 0) {
(void) zpool_standard_error_fmt(zhp->zpool_hdl,
err, dgettext(TEXT_DOMAIN, "error "
"waiting for '%s' to initialize"),
nvpair_name(elem));

goto out;
}
}
}
goto out;

list_errors:
for (elem = nvlist_next_nvpair(vd_errlist, NULL); elem != NULL;
elem = nvlist_next_nvpair(vd_errlist, elem)) {
int64_t vd_error = xlate_init_err(fnvpair_value_int64(elem));
Expand All @@ -2206,15 +2225,28 @@ zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
"cannot initialize '%s'", path);
}

out:
fnvlist_free(vdev_guids);
fnvlist_free(guids_to_paths);

if (vd_errlist != NULL) {
if (vd_errlist != NULL)
fnvlist_free(vd_errlist);
return (-1);
}

return (zpool_standard_error(zhp->zpool_hdl, err, msg));
return (err == 0 ? 0 : -1);
}

int
zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
nvlist_t *vds)
{
return (zpool_initialize_impl(zhp, cmd_type, vds, B_FALSE));
}

int
zpool_initialize_wait(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
nvlist_t *vds)
{
return (zpool_initialize_impl(zhp, cmd_type, vds, B_TRUE));
}

static int
Expand Down Expand Up @@ -4782,3 +4814,51 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)

return (0);
}

/*
* Wait while the specified activity is in progress in the pool.
*/
int
zpool_wait(zpool_handle_t *zhp, zpool_wait_activity_t activity)
{
boolean_t missing;

int error = zpool_wait_status(zhp, activity, &missing, NULL);

if (missing) {
(void) zpool_standard_error_fmt(zhp->zpool_hdl, ENOENT,
dgettext(TEXT_DOMAIN, "error waiting in pool '%s'"),
zhp->zpool_name);
return (ENOENT);
} else {
return (error);
}
}

/*
* Wait for the given activity and return the status of the wait (whether or not
* any waiting was done) in the 'waited' parameter. Non-existent pools are
* reported via the 'missing' parameter, rather than by printing an error
* message. This is convenient when this function is called in a loop over a
* long period of time (as it is, for example, by zpool's wait cmd). In that
* scenario, a pool being exported or destroyed should be considered a normal
* event, so we don't want to print an error when we find that the pool doesn't
* exist.
*/
int
zpool_wait_status(zpool_handle_t *zhp, zpool_wait_activity_t activity,
boolean_t *missing, boolean_t *waited)
{
int error = lzc_wait(zhp->zpool_name, activity, waited);
*missing = (error == ENOENT);
if (*missing)
return (0);

if (error != 0) {
(void) zpool_standard_error_fmt(zhp->zpool_hdl, error,
dgettext(TEXT_DOMAIN, "error waiting in pool '%s'"),
zhp->zpool_name);
}

return (error);
}
36 changes: 36 additions & 0 deletions lib/libzfs_core/libzfs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1579,3 +1579,39 @@ lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv)
fnvlist_free(args);
return (error);
}

static int
wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag,
uint64_t tag, boolean_t *waited)
{
nvlist_t *args = fnvlist_alloc();
nvlist_t *result = NULL;

fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity);
if (use_tag)
fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag);

int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result);

if (error == 0 && waited != NULL)
*waited = fnvlist_lookup_boolean_value(result,
ZPOOL_WAIT_WAITED);

fnvlist_free(args);
fnvlist_free(result);

return (error);
}

int
lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited)
{
return (wait_common(pool, activity, B_FALSE, 0, waited));
}

int
lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
boolean_t *waited)
{
return (wait_common(pool, activity, B_TRUE, tag, waited));
}
12 changes: 12 additions & 0 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -1968,6 +1968,18 @@ Pattern written to vdev free space by \fBzpool initialize\fR.
Default value: \fB16,045,690,984,833,335,022\fR (0xdeadbeefdeadbeee).
.RE

.sp
.ne 2
.na
\fBzfs_initialize_chunk_size\fR (ulong)
.ad
.RS 12n
Size of writes used by \fBzpool initialize\fR.
This option is used by the test suite to facilitate testing.
.sp
Default value: \fB1,048,576\fR
.RE

.sp
.ne 2
.na
Expand Down
Loading

0 comments on commit e60e158

Please sign in to comment.