Skip to content

Commit

Permalink
libzfs, libzfs_core: send: always write to pipe
Browse files Browse the repository at this point in the history
By introducing lzc_send_wrapper() and routing all ZFS_IOC_SEND*
users through it, we fix a Linux 5.10-introduced bug (see comment)

This is all /transparent/ to the users API, ABI, and usage-wise,
and disabled on FreeBSD and if the output is already a pipe,
and transparently nestable (i.e. zfs_send_one() is wrapped,
but so is lzc_send_redacted() it calls to ‒ this wouldn't be strictly
necessary if ZFS_IOC_SEND_PROGRESS wasn't strictly denominational w.r.t.
the descriptor the send is happening on)

Supersedes openzfs#11992
Closes openzfs#11445
Co-authored-by: Rich Ercolani <[email protected]>
Signed-off-by: Ahelenia Ziemiańska <[email protected]>
  • Loading branch information
nabijaczleweli committed Mar 7, 2022
1 parent 6ccd507 commit 3dcbed0
Show file tree
Hide file tree
Showing 3 changed files with 260 additions and 10 deletions.
1 change: 1 addition & 0 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ enum lzc_send_flags {
LZC_SEND_FLAG_SAVED = 1 << 4,
};

_LIBZFS_CORE_H int lzc_send_wrapper(int (*)(int, void *), int, void *);
_LIBZFS_CORE_H int lzc_send(const char *, const char *, int,
enum lzc_send_flags);
_LIBZFS_CORE_H int lzc_send_resume(const char *, const char *, int,
Expand Down
106 changes: 100 additions & 6 deletions lib/libzfs/libzfs_sendrecv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1682,7 +1682,7 @@ lzc_flags_from_resume_nvl(nvlist_t *resume_nvl)
}

static int
zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
nvlist_t *resume_nvl)
{
char errbuf[1024];
Expand Down Expand Up @@ -1893,6 +1893,32 @@ zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
return (error);
}

struct zfs_send_resume_impl {
libzfs_handle_t *hdl;
sendflags_t *flags;
nvlist_t *resume_nvl;
};

static int
zfs_send_resume_impl_cb(int outfd, void *arg)
{
struct zfs_send_resume_impl *zsri = arg;
return (zfs_send_resume_impl_cb_impl(zsri->hdl, zsri->flags, outfd,
zsri->resume_nvl));
}

static int
zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
nvlist_t *resume_nvl)
{
struct zfs_send_resume_impl zsri = {
.hdl = hdl,
.flags = flags,
.resume_nvl = resume_nvl,
};
return (lzc_send_wrapper(zfs_send_resume_impl_cb, outfd, &zsri));
}

int
zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
const char *resume_token)
Expand Down Expand Up @@ -2170,9 +2196,11 @@ send_prelim_records(zfs_handle_t *zhp, const char *from, int fd,
* if "replicate" is set. If "doall" is set, dump all the intermediate
* snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
* case too. If "props" is set, send properties.
*
* Pre-wrapped (cf. lzc_send_wrapper()).
*/
int
zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
static int
zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
void *cb_arg, nvlist_t **debugnvp)
{
Expand Down Expand Up @@ -2374,6 +2402,42 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
return (err);
}

struct zfs_send {
zfs_handle_t *zhp;
const char *fromsnap;
const char *tosnap;
sendflags_t *flags;
snapfilter_cb_t *filter_func;
void *cb_arg;
nvlist_t **debugnvp;
};

static int
zfs_send_cb(int outfd, void *arg)
{
struct zfs_send *zs = arg;
return (zfs_send_cb_impl(zs->zhp, zs->fromsnap, zs->tosnap, zs->flags,
outfd, zs->filter_func, zs->cb_arg, zs->debugnvp));
}

int
zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
void *cb_arg, nvlist_t **debugnvp)
{
struct zfs_send arg = {
.zhp = zhp,
.fromsnap = fromsnap,
.tosnap = tosnap,
.flags = flags,
.filter_func = filter_func,
.cb_arg = cb_arg,
.debugnvp = debugnvp,
};
return (lzc_send_wrapper(zfs_send_cb, outfd, &arg));
}


static zfs_handle_t *
name_to_dir_handle(libzfs_handle_t *hdl, const char *snapname)
{
Expand Down Expand Up @@ -2450,10 +2514,12 @@ snapshot_is_before(zfs_handle_t *earlier, zfs_handle_t *later)
* The "zhp" argument is the handle of the dataset to send (typically a
* snapshot). The "from" argument is the full name of the snapshot or
* bookmark that is the incremental source.
*
* Pre-wrapped (cf. lzc_send_wrapper()).
*/
int
zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
const char *redactbook)
static int
zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
sendflags_t *flags, const char *redactbook)
{
int err;
libzfs_handle_t *hdl = zhp->zfs_hdl;
Expand Down Expand Up @@ -2642,6 +2708,34 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
return (err != 0);
}

struct zfs_send_one {
zfs_handle_t *zhp;
const char *from;
sendflags_t *flags;
const char *redactbook;
};

static int
zfs_send_one_cb(int fd, void *arg)
{
struct zfs_send_one *zso = arg;
return (zfs_send_one_cb_impl(zso->zhp, zso->from, fd, zso->flags,
zso->redactbook));
}

int
zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
const char *redactbook)
{
struct zfs_send_one zso = {
.zhp = zhp,
.from = from,
.flags = flags,
.redactbook = redactbook,
};
return (lzc_send_wrapper(zfs_send_one_cb, fd, &zso));
}

/*
* Routines specific to "zfs recv"
*/
Expand Down
163 changes: 159 additions & 4 deletions lib/libzfs_core/libzfs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,84 @@ max_pipe_buffer(int infd)
#endif
}

#if __linux__
struct send_worker_ctx {
int from; /* read end of pipe, with send data; closed on exit */
int to; /* original arbitrary output fd; mustn't be a pipe */
};

static void *
send_worker(void *arg)
{
struct send_worker_ctx *ctx = arg;
unsigned int bufsiz = max_pipe_buffer(ctx->from);
ssize_t rd;

while ((rd = splice(ctx->from, NULL, ctx->to, NULL, bufsiz,
SPLICE_F_MOVE | SPLICE_F_MORE)) > 0)
;

int err = (rd == -1) ? errno : 0;
close(ctx->from);
return ((void *)(uintptr_t)err);
}
#endif

/*
* Since Linux 5.10, 4d03e3cc59828c82ee89ea6e27a2f3cdf95aaadf
* ("fs: don't allow kernel reads and writes without iter ops"),
* ZFS_IOC_SEND* will EINVAL when writing to /dev/null, /dev/zero, &c.
*
* This wrapper transparently executes func() with a pipe
* by spawning a thread to copy from that pipe to the original output
* in the background.
*
* Returns the error from func(), if nonzero,
* otherwise the error from the thread.
*
* No-op if orig_fd is -1, already a pipe, and on not-Linux;
* as such, it is safe to wrap/call wrapped functions in a wrapped context.
*/
int
lzc_send_wrapper(int (*func)(int, void *), int orig_fd, void *data)
{
#if __linux__
struct stat sb;
if (orig_fd != -1 && fstat(orig_fd, &sb) == -1)
return (errno);
if (orig_fd == -1 || S_ISFIFO(sb.st_mode))
return (func(orig_fd, data));
if ((fcntl(orig_fd, F_GETFL) & O_ACCMODE) == O_RDONLY)
return (errno = EBADF);

int rw[2];
if (pipe2(rw, O_CLOEXEC) == -1)
return (errno);

int err;
pthread_t send_thread;
struct send_worker_ctx ctx = {.from = rw[0], .to = orig_fd};
if ((err = pthread_create(&send_thread, NULL, send_worker, &ctx))
!= 0) {
close(rw[0]);
close(rw[1]);
return (errno = err);
}

err = func(rw[1], data);

void *send_err;
close(rw[1]);
pthread_join(send_thread, &send_err);
if (err == 0 && send_err != 0)
errno = err = (uintptr_t)send_err;

return (err);
#else
return (func(orig_fd, data));
#endif
}

/*
* Generate a zfs send stream for the specified snapshot and write it to
* the specified file descriptor.
Expand Down Expand Up @@ -687,9 +765,11 @@ lzc_send_resume(const char *snapname, const char *from, int fd,
* redactnv: nvlist of string -> boolean(ignored) containing the names of all
* the snapshots that we should redact with respect to.
* redactbook: Name of the redaction bookmark to create.
*
* Pre-wrapped.
*/
int
lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
static int
lzc_send_resume_redacted_cb_impl(const char *snapname, const char *from, int fd,
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
const char *redactbook)
{
Expand Down Expand Up @@ -722,6 +802,40 @@ lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
return (err);
}

struct lzc_send_resume_redacted {
const char *snapname;
const char *from;
enum lzc_send_flags flags;
uint64_t resumeobj;
uint64_t resumeoff;
const char *redactbook;
};

static int
lzc_send_resume_redacted_cb(int fd, void *arg)
{
struct lzc_send_resume_redacted *zsrr = arg;
return (lzc_send_resume_redacted_cb_impl(zsrr->snapname, zsrr->from,
fd, zsrr->flags, zsrr->resumeobj, zsrr->resumeoff,
zsrr->redactbook));
}

int
lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
const char *redactbook)
{
struct lzc_send_resume_redacted zsrr = {
.snapname = snapname,
.from = from,
.flags = flags,
.resumeobj = resumeobj,
.resumeoff = resumeoff,
.redactbook = redactbook,
};
return (lzc_send_wrapper(lzc_send_resume_redacted_cb, fd, &zsrr));
}

/*
* "from" can be NULL, a snapshot, or a bookmark.
*
Expand All @@ -737,9 +851,11 @@ lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
* significantly more I/O and be less efficient than a send space estimation on
* an equivalent snapshot. This process is also used if redact_snaps is
* non-null.
*
* Pre-wrapped.
*/
int
lzc_send_space_resume_redacted(const char *snapname, const char *from,
static int
lzc_send_space_resume_redacted_cb_impl(const char *snapname, const char *from,
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
{
Expand Down Expand Up @@ -776,6 +892,45 @@ lzc_send_space_resume_redacted(const char *snapname, const char *from,
return (err);
}

struct lzc_send_space_resume_redacted {
const char *snapname;
const char *from;
enum lzc_send_flags flags;
uint64_t resumeobj;
uint64_t resumeoff;
uint64_t resume_bytes;
const char *redactbook;
uint64_t *spacep;
};

static int
lzc_send_space_resume_redacted_cb(int fd, void *arg)
{
struct lzc_send_space_resume_redacted *zssrr = arg;
return (lzc_send_space_resume_redacted_cb_impl(zssrr->snapname,
zssrr->from, zssrr->flags, zssrr->resumeobj, zssrr->resumeoff,
zssrr->resume_bytes, zssrr->redactbook, fd, zssrr->spacep));
}

int
lzc_send_space_resume_redacted(const char *snapname, const char *from,
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
{
struct lzc_send_space_resume_redacted zssrr = {
.snapname = snapname,
.from = from,
.flags = flags,
.resumeobj = resumeobj,
.resumeoff = resumeoff,
.resume_bytes = resume_bytes,
.redactbook = redactbook,
.spacep = spacep,
};
return (lzc_send_wrapper(lzc_send_space_resume_redacted_cb,
fd, &zssrr));
}

int
lzc_send_space(const char *snapname, const char *from,
enum lzc_send_flags flags, uint64_t *spacep)
Expand Down

0 comments on commit 3dcbed0

Please sign in to comment.