Skip to content

Commit

Permalink
Add 'zfs wait' command
Browse files Browse the repository at this point in the history
Add a mechanism to wait for delete queue to drain.

When doing redacted send/recv, many workflows involve deleting files 
that contain sensitive data. Because of the way zfs handles file 
deletions, snapshots taken quickly after a rm operation can sometimes 
still contain the file in question, especially if the file is very 
large. This can result in issues for redacted send/recv users who 
expect the deleted files to be redacted in the send streams, and not 
appear in their clones.

This change duplicates much of the zpool wait related logic into a 
zfs wait command, which can be used to wait until the internal
deleteq has been drained.  Additional wait activities may be added 
in the future. 

Reviewed-by: Matthew Ahrens <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: John Gallagher <[email protected]>
Signed-off-by: Paul Dagnelie <[email protected]>
Closes openzfs#9707
  • Loading branch information
pcd1193182 authored and jsai20 committed Mar 30, 2021
1 parent 301aef6 commit d8c59ad
Show file tree
Hide file tree
Showing 25 changed files with 679 additions and 11 deletions.
91 changes: 90 additions & 1 deletion cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ static int zfs_do_change_key(int argc, char **argv);
static int zfs_do_project(int argc, char **argv);
static int zfs_do_version(int argc, char **argv);
static int zfs_do_redact(int argc, char **argv);
static int zfs_do_wait(int argc, char **argv);

#ifdef __FreeBSD__
static int zfs_do_jail(int argc, char **argv);
Expand Down Expand Up @@ -183,7 +184,8 @@ typedef enum {
HELP_VERSION,
HELP_REDACT,
HELP_JAIL,
HELP_UNJAIL
HELP_UNJAIL,
HELP_WAIT,
} zfs_help_t;

typedef struct zfs_command {
Expand Down Expand Up @@ -248,6 +250,7 @@ static zfs_command_t command_table[] = {
{ "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY },
{ "change-key", zfs_do_change_key, HELP_CHANGE_KEY },
{ "redact", zfs_do_redact, HELP_REDACT },
{ "wait", zfs_do_wait, HELP_WAIT },

#ifdef __FreeBSD__
{ "jail", zfs_do_jail, HELP_JAIL },
Expand Down Expand Up @@ -410,6 +413,8 @@ get_usage(zfs_help_t idx)
return (gettext("\tjail <jailid|jailname> <filesystem>\n"));
case HELP_UNJAIL:
return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
case HELP_WAIT:
return (gettext("\twait [-t <activity>] <filesystem>\n"));
}

abort();
Expand Down Expand Up @@ -8317,6 +8322,90 @@ zfs_do_project(int argc, char **argv)
return (ret);
}

static int
zfs_do_wait(int argc, char **argv)
{
boolean_t enabled[ZFS_WAIT_NUM_ACTIVITIES];
int error, i;
char c;

/* By default, wait for all types of activity. */
for (i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++)
enabled[i] = B_TRUE;

while ((c = getopt(argc, argv, "t:")) != -1) {
switch (c) {
case 't':
{
static char *col_subopts[] = { "deleteq", NULL };
char *value;

/* Reset activities array */
bzero(&enabled, sizeof (enabled));
while (*optarg != '\0') {
int activity = getsubopt(&optarg, col_subopts,
&value);

if (activity < 0) {
(void) fprintf(stderr,
gettext("invalid activity '%s'\n"),
value);
usage(B_FALSE);
}

enabled[activity] = B_TRUE;
}
break;
}
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}

argv += optind;
argc -= optind;
if (argc < 1) {
(void) fprintf(stderr, gettext("missing 'filesystem' "
"argument\n"));
usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}

zfs_handle_t *zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM);
if (zhp == NULL)
return (1);

for (;;) {
boolean_t missing = B_FALSE;
boolean_t any_waited = B_FALSE;

for (int i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++) {
boolean_t waited;

if (!enabled[i])
continue;

error = zfs_wait_status(zhp, i, &missing, &waited);
if (error != 0 || missing)
break;

any_waited = (any_waited || waited);
}

if (error != 0 || missing || !any_waited)
break;
}

zfs_close(zhp);

return (error);
}

/*
* Display version message
*/
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile
tests/zfs-tests/tests/functional/cli_root/zpool_attach/Makefile
Expand Down
3 changes: 3 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,9 @@ extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);

extern int zfs_wait_status(zfs_handle_t *, zfs_wait_activity_t,
boolean_t *, boolean_t *);

/*
* zfs encryption management
*/
Expand Down
1 change: 1 addition & 0 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ int lzc_pool_checkpoint_discard(const char *);

int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *);
int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, boolean_t *);
int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *);

#ifdef __cplusplus
}
Expand Down
8 changes: 8 additions & 0 deletions include/sys/dsl_dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ struct dsl_dir {
bplist_t dd_pending_frees;
bplist_t dd_pending_allocs;

kmutex_t dd_activity_lock;
kcondvar_t dd_activity_cv;
boolean_t dd_activity_cancelled;
uint64_t dd_activity_waiters;

/* protected by dd_lock; keep at end of struct for better locality */
char dd_myname[ZFS_MAX_DATASET_NAME_LEN];
};
Expand Down Expand Up @@ -192,6 +197,9 @@ boolean_t dsl_dir_is_zapified(dsl_dir_t *dd);
void dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj);
void dsl_dir_livelist_close(dsl_dir_t *dd);
void dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total);
int dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
boolean_t *waited);
void dsl_dir_cancel_waiters(dsl_dir_t *dd);

/* internal reserved dir name */
#define MOS_DIR_NAME "$MOS"
Expand Down
12 changes: 12 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,7 @@ typedef enum zfs_ioc {
ZFS_IOC_REDACT, /* 0x5a51 */
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
ZFS_IOC_WAIT, /* 0x5a53 */
ZFS_IOC_WAIT_FS, /* 0x5a54 */

/*
* Per-platform (Optional) - 6/128 numbers reserved.
Expand Down Expand Up @@ -1358,6 +1359,11 @@ typedef enum {
ZPOOL_WAIT_NUM_ACTIVITIES
} zpool_wait_activity_t;

typedef enum {
ZFS_WAIT_DELETEQ,
ZFS_WAIT_NUM_ACTIVITIES
} zfs_wait_activity_t;

/*
* Bookmark name values.
*/
Expand Down Expand Up @@ -1415,6 +1421,12 @@ typedef enum {
#define ZPOOL_WAIT_TAG "wait_tag"
#define ZPOOL_WAIT_WAITED "wait_waited"

/*
* The following are names used when invoking ZFS_IOC_WAIT_FS.
*/
#define ZFS_WAIT_ACTIVITY "wait_activity"
#define ZFS_WAIT_WAITED "wait_waited"

/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/
Expand Down
28 changes: 28 additions & 0 deletions lib/libzfs/libzfs_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -5599,3 +5599,31 @@ zvol_volsize_to_reservation(zpool_handle_t *zph, uint64_t volsize,
volsize += numdb;
return (volsize);
}

/*
* Wait for the given activity and return the status of the wait (whether or not
* any waiting was done) in the 'waited' parameter. Non-existent fses are
* reported via the 'missing' parameter, rather than by printing an error
* message. This is convenient when this function is called in a loop over a
* long period of time (as it is, for example, by zfs's wait cmd). In that
* scenario, a fs being exported or destroyed should be considered a normal
* event, so we don't want to print an error when we find that the fs doesn't
* exist.
*/
int
zfs_wait_status(zfs_handle_t *zhp, zfs_wait_activity_t activity,
boolean_t *missing, boolean_t *waited)
{
int error = lzc_wait_fs(zhp->zfs_name, activity, waited);
*missing = (error == ENOENT);
if (*missing)
return (0);

if (error != 0) {
(void) zfs_standard_error_fmt(zhp->zfs_hdl, error,
dgettext(TEXT_DOMAIN, "error waiting in fs '%s'"),
zhp->zfs_name);
}

return (error);
}
20 changes: 20 additions & 0 deletions lib/libzfs_core/libzfs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1621,3 +1621,23 @@ lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
{
return (wait_common(pool, activity, B_TRUE, tag, waited));
}

int
lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
{
nvlist_t *args = fnvlist_alloc();
nvlist_t *result = NULL;

fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity);

int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result);

if (error == 0 && waited != NULL)
*waited = fnvlist_lookup_boolean_value(result,
ZFS_WAIT_WAITED);

fnvlist_free(args);
fnvlist_free(result);

return (error);
}
1 change: 1 addition & 0 deletions man/man8/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ dist_man_MANS = \
zfs-unmount.8 \
zfs-upgrade.8 \
zfs-userspace.8 \
zfs-wait.8 \
zgenhostid.8 \
zinject.8 \
zpool.8 \
Expand Down
71 changes: 71 additions & 0 deletions man/man8/zfs-wait.8
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
.\"
.\" CDDL HEADER START
.\"
.\" The contents of this file are subject to the terms of the
.\" Common Development and Distribution License (the "License").
.\" You may not use this file except in compliance with the License.
.\"
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
.\" or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions
.\" and limitations under the License.
.\"
.\" When distributing Covered Code, include this CDDL HEADER in each
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
.\"
.\" CDDL HEADER END
.\"
.\"
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
.\" Copyright (c) 2017 Datto Inc.
.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
.Dd August 9, 2019
.Dt ZFS-WAIT 8
.Os Linux
.Sh NAME
.Nm zfs Ns Pf - Cm wait
.Nd Wait for background activity to stop in a ZFS filesystem
.Sh SYNOPSIS
.Nm
.Cm wait
.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns ...
.Ar fs
.Sh DESCRIPTION
.Bl -tag -width Ds
.It Xo
.Nm
.Cm wait
.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns ...
.Ar fs
.Xc
Waits until all background activity of the given types has ceased in the given
filesystem.
The activity could cease because it has completed or because the filesystem has
been destroyed or unmounted.
If no activities are specified, the command waits until background activity of
every type listed below has ceased.
If there is no activity of the given types in progress, the command returns
immediately.
.Pp
These are the possible values for
.Ar activity ,
along with what each one waits for:
.Bd -literal
deleteq The filesystem's internal delete queue to empty
.Ed
.Pp
Note that the internal delete queue does not finish draining until
all large files have had time to be fully destroyed and all open file
handles to unlinked files are closed.
.El
.El
.Sh SEE ALSO
.Xr lsof 8
5 changes: 5 additions & 0 deletions man/man8/zfs.8
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,11 @@ Attaches a filesystem to a jail.
.It Xr zfs-unjail 8
Detaches a filesystem from a jail.
.El
.Ss Waiting
.Bl -tag -width ""
.It Xr zfs-wait 8
Wait for background activity in a filesystem to complete.
.El
.Sh EXIT STATUS
The
.Nm
Expand Down
11 changes: 11 additions & 0 deletions module/os/linux/zfs/zfs_dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
#include <sys/zfs_fuid.h>
#include <sys/sa.h>
#include <sys/zfs_sa.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_dir.h>

/*
* zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups
Expand Down Expand Up @@ -739,6 +741,8 @@ zfs_rmnode(znode_t *zp)
zfs_unlinked_add(xzp, tx);
}

mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock);

/*
* Remove this znode from the unlinked set. If a has rollback has
* occurred while a file is open and unlinked. Then when the file
Expand All @@ -749,6 +753,13 @@ zfs_rmnode(znode_t *zp)
zp->z_id, tx);
VERIFY(error == 0 || error == ENOENT);

uint64_t count;
if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) {
cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv);
}

mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock);

dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);

zfs_znode_delete(zp, tx);
Expand Down
Loading

0 comments on commit d8c59ad

Please sign in to comment.