Skip to content

Commit

Permalink
Fix Linux 4.1 compat regarding loop device on ZFS
Browse files Browse the repository at this point in the history
Starting from Linux 4.1 allows iov_iter with bio_vec to be passed into
iter_read/iter_write. Notably, the loop device will pass bio_vec to backend
filesystem. However, current ZFS code assumes iovec without any check, so it
will always crash when using loop device.

With the restructured uio_t, we can safely pass bio_vec in uio_t with UIO_BVEC
set. The uio* functions are modified to handle bio_vec case separately.

The const uio_iov causes some warning in xuio related stuff, so explicit
convert them to non const.

Signed-off-by: Chunwei Chen <[email protected]>
  • Loading branch information
tuxoko committed Aug 3, 2015
1 parent 0b09ac3 commit e51f422
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 137 deletions.
205 changes: 108 additions & 97 deletions module/zcommon/zfs_uio.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
* software developed by the University of California, Berkeley, and its
* contributors.
*/
/*
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

/*
* The uio support from OpenSolaris has been added as a short term
Expand All @@ -46,27 +49,25 @@

#include <sys/types.h>
#include <sys/uio_impl.h>
#include <linux/kmap_compat.h>

/*
* Move "n" bytes at byte address "p"; "rw" indicates the direction
* of the move, and the I/O parameters are provided in "uio", which is
* update to reflect the data which was moved. Returns 0 on success or
* a non-zero errno on failure.
*/
int
uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
static int
uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
{
struct iovec *iov;
const struct iovec *iov = uio->uio_iov;
size_t skip = uio->uio_skip;
ulong_t cnt;

ASSERT3U(skip, <, iov->iov_len);

while (n && uio->uio_resid) {
iov = uio->uio_iov;
cnt = MIN(iov->iov_len, n);
if (cnt == 0l) {
uio->uio_iov++;
uio->uio_iovcnt--;
continue;
}
cnt = MIN(iov->iov_len - skip, n);
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
Expand All @@ -75,29 +76,80 @@ uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
* iov->iov_base = user data pointer
*/
if (rw == UIO_READ) {
if (copy_to_user(iov->iov_base, p, cnt))
if (copy_to_user(iov->iov_base+skip, p, cnt))
return (EFAULT);
} else {
if (copy_from_user(p, iov->iov_base, cnt))
if (copy_from_user(p, iov->iov_base+skip, cnt))
return (EFAULT);
}
break;
case UIO_SYSSPACE:
if (rw == UIO_READ)
bcopy(p, iov->iov_base, cnt);
bcopy(p, iov->iov_base + skip, cnt);
else
bcopy(iov->iov_base, p, cnt);
bcopy(iov->iov_base + skip, p, cnt);
break;
default:
ASSERT(0);
}
skip += cnt;
if (skip == iov->iov_len) {
skip = 0;
uio->uio_iov = (++iov);
uio->uio_iovcnt--;
}
iov->iov_base += cnt;
iov->iov_len -= cnt;
uio->uio_skip = skip;
uio->uio_resid -= cnt;
uio->uio_loffset += cnt;
p = (caddr_t)p + cnt;
n -= cnt;
}
return (0);
}

static int
uiomove_bvec(void *p, size_t n, enum uio_rw rw, struct uio *uio)
{
const struct bio_vec *bv = uio->uio_bvec;
size_t skip = uio->uio_skip;
ulong_t cnt;

ASSERT3U(skip, <, bv->bv_len);

while (n && uio->uio_resid) {
void *paddr;
cnt = MIN(bv->bv_len - skip, n);

paddr = zfs_kmap_atomic(bv->bv_page, KM_USER1);
if (rw == UIO_READ)
bcopy(p, paddr + bv->bv_offset + skip, cnt);
else
bcopy(paddr + bv->bv_offset + skip, p, cnt);
zfs_kunmap_atomic(paddr, KM_USER1);

skip += cnt;
if (skip == bv->bv_len) {
skip = 0;
uio->uio_bvec = (++bv);
uio->uio_iovcnt--;
}
uio->uio_skip = skip;
uio->uio_resid -= cnt;
uio->uio_loffset += cnt;
p = (caddr_t)p + cnt;
n -= cnt;
}
return (0);
}

int
uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
{
if (uio->uio_segflg != UIO_BVEC)
return (uiomove_iov(p, n, rw, uio));
else
return (uiomove_bvec(p, n, rw, uio));
}
EXPORT_SYMBOL(uiomove);

#define fuword8(uptr, vptr) get_user((*vptr), (uptr))
Expand All @@ -111,39 +163,39 @@ EXPORT_SYMBOL(uiomove);
void
uio_prefaultpages(ssize_t n, struct uio *uio)
{
struct iovec *iov;
const struct iovec *iov;
ulong_t cnt, incr;
caddr_t p;
uint8_t tmp;
int iovcnt;
size_t skip = uio->uio_skip;

/* no need to fault in kernel pages */
switch (uio->uio_segflg) {
case UIO_SYSSPACE:
case UIO_BVEC:
return;
case UIO_USERSPACE:
case UIO_USERISPACE:
break;
default:
ASSERT(0);
}

iov = uio->uio_iov;
iovcnt = uio->uio_iovcnt;
ASSERT3U(skip, <, iov->iov_len);

while ((n > 0) && (iovcnt > 0)) {
cnt = MIN(iov->iov_len, n);
if (cnt == 0) {
/* empty iov entry */
iov++;
iovcnt--;
continue;
}
cnt = MIN(iov->iov_len - skip, n);
n -= cnt;
/*
* touch each page in this segment.
*/
p = iov->iov_base;
p = iov->iov_base + skip;
while (cnt) {
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
if (fuword8((uint8_t *) p, &tmp))
return;
break;
case UIO_SYSSPACE:
bcopy(p, &tmp, 1);
break;
}
if (fuword8((uint8_t *) p, &tmp))
return;
incr = MIN(cnt, PAGESIZE);
p += incr;
cnt -= incr;
Expand All @@ -152,18 +204,11 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
* touch the last byte in case it straddles a page.
*/
p--;
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
if (fuword8((uint8_t *) p, &tmp))
return;
break;
case UIO_SYSSPACE:
bcopy(p, &tmp, 1);
break;
}
if (fuword8((uint8_t *) p, &tmp))
return;
iov++;
iovcnt--;
skip = 0;
}
}
EXPORT_SYMBOL(uio_prefaultpages);
Expand All @@ -175,49 +220,13 @@ EXPORT_SYMBOL(uio_prefaultpages);
int
uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
{
struct iovec *iov;
ulong_t cnt;
int iovcnt;
struct uio uio_copy;
int ret;

iovcnt = uio->uio_iovcnt;
*cbytes = 0;

for (iov = uio->uio_iov; n && iovcnt; iov++, iovcnt--) {
cnt = MIN(iov->iov_len, n);
if (cnt == 0)
continue;

switch (uio->uio_segflg) {

case UIO_USERSPACE:
case UIO_USERISPACE:
/*
* p = kernel data pointer
* iov->iov_base = user data pointer
*/
if (rw == UIO_READ) {
/* UIO_READ = copy data from kernel to user */
if (copy_to_user(iov->iov_base, p, cnt))
return (EFAULT);
} else {
/* UIO_WRITE = copy data from user to kernel */
if (copy_from_user(p, iov->iov_base, cnt))
return (EFAULT);
}
break;

case UIO_SYSSPACE:
if (rw == UIO_READ)
bcopy(p, iov->iov_base, cnt);
else
bcopy(iov->iov_base, p, cnt);
break;
}
p = (caddr_t)p + cnt;
n -= cnt;
*cbytes += cnt;
}
return (0);
bcopy(uio, &uio_copy, sizeof (struct uio));
ret = uiomove(p, n, rw, &uio_copy);
*cbytes = uio->uio_resid - uio_copy.uio_resid;
return (ret);
}
EXPORT_SYMBOL(uiocopy);

Expand All @@ -229,21 +238,23 @@ uioskip(uio_t *uiop, size_t n)
{
if (n > uiop->uio_resid)
return;
while (n != 0) {
iovec_t *iovp = uiop->uio_iov;
size_t niovb = MIN(iovp->iov_len, n);

if (niovb == 0) {
uiop->uio_skip += n;
if (uiop->uio_segflg != UIO_BVEC) {
while (uiop->uio_skip >= uiop->uio_iov->iov_len) {
uiop->uio_skip -= uiop->uio_iov->iov_len;
uiop->uio_iov++;
uiop->uio_iovcnt--;
}
} else {
while (uiop->uio_skip >= uiop->uio_bvec->bv_len) {
uiop->uio_skip -= uiop->uio_bvec->bv_len;
uiop->uio_iov++;
uiop->uio_iovcnt--;
continue;
}
iovp->iov_base += niovb;
uiop->uio_loffset += niovb;
iovp->iov_len -= niovb;
uiop->uio_resid -= niovb;
n -= niovb;
}
uiop->uio_loffset += n;
uiop->uio_resid -= n;
}
EXPORT_SYMBOL(uioskip);
#endif /* _KERNEL */
5 changes: 3 additions & 2 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

#include <sys/dmu.h>
Expand Down Expand Up @@ -933,7 +934,7 @@ dmu_xuio_init(xuio_t *xuio, int nblk)
priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_SLEEP);
priv->cnt = nblk;
priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_SLEEP);
priv->iovp = uio->uio_iov;
priv->iovp = (iovec_t *)uio->uio_iov;
XUIO_XUZC_PRIV(xuio) = priv;

if (XUIO_XUZC_RW(xuio) == UIO_READ)
Expand Down Expand Up @@ -974,7 +975,7 @@ dmu_xuio_add(xuio_t *xuio, arc_buf_t *abuf, offset_t off, size_t n)

ASSERT(i < priv->cnt);
ASSERT(off + n <= arc_buf_size(abuf));
iov = uio->uio_iov + i;
iov = (iovec_t *)uio->uio_iov + i;
iov->iov_base = (char *)abuf->b_data + off;
iov->iov_len = n;
priv->bufs[i] = abuf;
Expand Down
6 changes: 4 additions & 2 deletions module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

/* Portions Copyright 2007 Jeremy Teo */
Expand Down Expand Up @@ -591,10 +592,10 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
int max_blksz = zsb->z_max_blksz;
int error = 0;
arc_buf_t *abuf;
iovec_t *aiov = NULL;
const iovec_t *aiov = NULL;
xuio_t *xuio = NULL;
int i_iov = 0;
iovec_t *iovp = uio->uio_iov;
const iovec_t *iovp = uio->uio_iov;
int write_eof;
int count = 0;
sa_bulk_attr_t bulk[4];
Expand Down Expand Up @@ -714,6 +715,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)

if (xuio && abuf == NULL) {
ASSERT(i_iov < iovcnt);
ASSERT3U(uio->uio_segflg, !=, UIO_BVEC);
aiov = &iovp[i_iov];
abuf = dmu_xuio_arcbuf(xuio, i_iov);
dmu_xuio_clear(xuio, i_iov);
Expand Down
Loading

0 comments on commit e51f422

Please sign in to comment.