From 3ccebe9fe376eac5d4b74a8cc56771a20632ceec Mon Sep 17 00:00:00 2001 From: Matthew Macy Date: Mon, 30 Sep 2019 16:43:08 -0700 Subject: [PATCH] Factor out non-portable vnode_t usage On FreeBSD file offset state is maintained in struct file. A given vnode can be referenced from many different struct file *. As a consequence, FreeBSD's SPL doesn't support vn_rdwr with the FAPPEND flag. This change replaces the non-portable vnode_t with the portable file_t in the common code. Signed-off-by: Matt Macy --- cmd/ztest/ztest.c | 4 +- include/os/linux/spl/sys/Makefile.am | 1 - include/os/linux/spl/sys/kmem_cache.h | 2 + include/os/linux/spl/sys/kobj.h | 42 --- include/os/linux/spl/sys/vnode.h | 38 +-- include/sys/Makefile.am | 1 + include/sys/dmu.h | 2 +- include/sys/dmu_recv.h | 5 +- include/sys/vdev_file.h | 2 +- include/sys/vdev_impl.h | 2 - include/sys/zfs_context.h | 42 +-- include/sys/zfs_file.h | 33 +++ lib/libzpool/Makefile.am | 1 + lib/libzpool/kernel.c | 232 --------------- module/os/linux/spl/Makefile.in | 1 - module/os/linux/spl/spl-generic.c | 91 +++++- module/os/linux/spl/spl-kobj.c | 86 ------ module/os/linux/spl/spl-vnode.c | 371 ++--------------------- module/os/linux/zfs/Makefile.in | 1 + module/os/linux/zfs/vdev_disk.c | 3 - module/os/linux/zfs/vdev_file.c | 49 ++-- module/os/linux/zfs/zfs_file_os.c | 408 ++++++++++++++++++++++++++ module/zfs/dmu_diff.c | 32 +- module/zfs/dmu_recv.c | 15 +- module/zfs/fm.c | 1 - module/zfs/spa.c | 3 +- module/zfs/spa_config.c | 76 ++--- module/zfs/zfs_ioctl.c | 39 ++- 28 files changed, 678 insertions(+), 905 deletions(-) delete mode 100644 include/os/linux/spl/sys/kobj.h create mode 100644 include/sys/zfs_file.h delete mode 100644 module/os/linux/spl/spl-kobj.c create mode 100644 module/os/linux/zfs/zfs_file_os.c diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index b886f1e99b03..2b224a03c81f 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -5826,8 +5826,8 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) (long long)vd0->vdev_id, (int)maxfaults); if (vf != NULL && ztest_random(3) == 0) { - (void) close(vf->vf_vnode->v_fd); - vf->vf_vnode->v_fd = -1; + (void) close(vf->vf_file->f_fd); + vf->vf_file->f_fd = -1; } else if (ztest_random(2) == 0) { vd0->vdev_cant_read = B_TRUE; } else { diff --git a/include/os/linux/spl/sys/Makefile.am b/include/os/linux/spl/sys/Makefile.am index e3df4edaeee9..65ab776870bd 100644 --- a/include/os/linux/spl/sys/Makefile.am +++ b/include/os/linux/spl/sys/Makefile.am @@ -19,7 +19,6 @@ KERNEL_H = \ $(top_srcdir)/include/os/linux/spl/sys/isa_defs.h \ $(top_srcdir)/include/os/linux/spl/sys/kmem_cache.h \ $(top_srcdir)/include/os/linux/spl/sys/kmem.h \ - $(top_srcdir)/include/os/linux/spl/sys/kobj.h \ $(top_srcdir)/include/os/linux/spl/sys/kstat.h \ $(top_srcdir)/include/os/linux/spl/sys/list.h \ $(top_srcdir)/include/os/linux/spl/sys/mod_os.h \ diff --git a/include/os/linux/spl/sys/kmem_cache.h b/include/os/linux/spl/sys/kmem_cache.h index 3584eefdfc9f..1639db5d4cce 100644 --- a/include/os/linux/spl/sys/kmem_cache.h +++ b/include/os/linux/spl/sys/kmem_cache.h @@ -231,6 +231,8 @@ extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache); spl_kmem_cache_reap_now(skc, skc->skc_reap) #define kmem_reap() spl_kmem_reap() +extern spl_kmem_cache_t *vn_file_cache; + /* * The following functions are only available for internal use. */ diff --git a/include/os/linux/spl/sys/kobj.h b/include/os/linux/spl/sys/kobj.h deleted file mode 100644 index 558ec39a808f..000000000000 --- a/include/os/linux/spl/sys/kobj.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - */ - -#ifndef _SPL_KOBJ_H -#define _SPL_KOBJ_H - -#include - -typedef struct _buf { - vnode_t *vp; -} _buf_t; - -typedef struct _buf buf_t; - -extern struct _buf *kobj_open_file(const char *name); -extern void kobj_close_file(struct _buf *file); -extern int kobj_read_file(struct _buf *file, char *buf, unsigned size, - unsigned off); -extern int kobj_get_filesize(struct _buf *file, uint64_t *size); - -#endif /* SPL_KOBJ_H */ diff --git a/include/os/linux/spl/sys/vnode.h b/include/os/linux/spl/sys/vnode.h index 7bd278e4e13b..0a1558c4f3b0 100644 --- a/include/os/linux/spl/sys/vnode.h +++ b/include/os/linux/spl/sys/vnode.h @@ -137,19 +137,6 @@ typedef struct vattr { struct dentry *va_dentry; /* dentry to wire */ } vattr_t; -typedef struct vnode { - struct file *v_file; - kmutex_t v_lock; /* protects vnode fields */ - uint_t v_flag; /* vnode flags (see below) */ - uint_t v_count; /* reference count */ - void *v_data; /* private data for fs */ - struct vfs *v_vfsp; /* ptr to containing VFS */ - struct stdata *v_stream; /* associated stream */ - enum vtype v_type; /* vnode type */ - dev_t v_rdev; /* device (VCHR, VBLK) */ - gfp_t v_gfp_mask; /* original mapping gfp mask */ -} vnode_t; - typedef struct vn_file { int f_fd; /* linux fd for lookup */ struct task_struct *f_task; /* linux task this fd belongs to */ @@ -157,39 +144,24 @@ typedef struct vn_file { atomic_t f_ref; /* ref count */ kmutex_t f_lock; /* struct lock */ loff_t f_offset; /* offset */ - vnode_t *f_vnode; /* vnode */ struct list_head f_list; /* list referenced file_t's */ } file_t; -extern vnode_t *vn_alloc(int flag); -void vn_free(vnode_t *vp); extern vtype_t vn_mode_to_vtype(mode_t); extern mode_t vn_vtype_to_mode(vtype_t); -extern int vn_open(const char *path, uio_seg_t seg, int flags, int mode, - vnode_t **vpp, int x1, void *x2); -extern int vn_openat(const char *path, uio_seg_t seg, int flags, int mode, - vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd); -extern int vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, - offset_t off, uio_seg_t seg, int x1, rlim64_t x2, - void *x3, ssize_t *residp); -extern int vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4); -extern int vn_seek(vnode_t *vp, offset_t o, offset_t *op, void *ct); - -extern int vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4); -extern int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4); -extern int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, +extern int vn_seek(offset_t o, offset_t *op, void *ct); + +extern int vn_space(file_t *fp, int cmd, struct flock *bfp, int flag, offset_t offset, void *x6, void *x7); extern file_t *vn_getf(int fd); extern void vn_releasef(int fd); extern void vn_areleasef(int fd, uf_info_t *fip); +extern void vn_file_add(file_t *fp); int spl_vn_init(void); void spl_vn_fini(void); -#define VOP_CLOSE vn_close #define VOP_SEEK vn_seek -#define VOP_GETATTR vn_getattr -#define VOP_FSYNC vn_fsync #define VOP_SPACE vn_space #define VOP_PUTPAGE(vp, o, s, f, x1, x2) ((void)0) #define vn_is_readonly(vp) 0 @@ -197,6 +169,4 @@ void spl_vn_fini(void); #define releasef vn_releasef #define areleasef vn_areleasef -extern vnode_t *rootdir; - #endif /* SPL_VNODE_H */ diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am index 21e85431cb67..75f7d93e7ca8 100644 --- a/include/sys/Makefile.am +++ b/include/sys/Makefile.am @@ -104,6 +104,7 @@ COMMON_H = \ $(top_srcdir)/include/sys/zfs_context.h \ $(top_srcdir)/include/sys/zfs_debug.h \ $(top_srcdir)/include/sys/zfs_delay.h \ + $(top_srcdir)/include/sys/zfs_file.h \ $(top_srcdir)/include/sys/zfs_fuid.h \ $(top_srcdir)/include/sys/zfs_project.h \ $(top_srcdir)/include/sys/zfs_ratelimit.h \ diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 44889bb10b84..2c8ad4ed2714 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -1071,7 +1071,7 @@ void dmu_traverse_objset(objset_t *os, uint64_t txg_start, dmu_traverse_cb_t cb, void *arg); int dmu_diff(const char *tosnap_name, const char *fromsnap_name, - struct vnode *vp, offset_t *offp); + file_t *fp, offset_t *offp); /* CRC64 table */ #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ diff --git a/include/sys/dmu_recv.h b/include/sys/dmu_recv.h index 1a7347d66e8f..6ef91f1d67b8 100644 --- a/include/sys/dmu_recv.h +++ b/include/sys/dmu_recv.h @@ -62,7 +62,7 @@ typedef struct dmu_recv_cookie { nvlist_t *drc_begin_nvl; objset_t *drc_os; - vnode_t *drc_vp; /* The vnode to read the stream from */ + file_t *drc_fp; /* The file to read the stream from */ uint64_t drc_voff; /* The current offset in the stream */ uint64_t drc_bytes_read; /* @@ -82,10 +82,11 @@ typedef struct dmu_recv_cookie { int dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, boolean_t force, boolean_t resumable, nvlist_t *localprops, nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc, - vnode_t *vp, offset_t *voffp); + file_t *fp, offset_t *voffp); int dmu_recv_stream(dmu_recv_cookie_t *drc, int cleanup_fd, uint64_t *action_handlep, offset_t *voffp); int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner); boolean_t dmu_objset_is_receiving(objset_t *os); + #endif /* _DMU_RECV_H */ diff --git a/include/sys/vdev_file.h b/include/sys/vdev_file.h index 9a398c58391e..d9ec6d10e938 100644 --- a/include/sys/vdev_file.h +++ b/include/sys/vdev_file.h @@ -34,7 +34,7 @@ extern "C" { #endif typedef struct vdev_file { - vnode_t *vf_vnode; + file_t *vf_file; } vdev_file_t; extern void vdev_file_init(void); diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index ae82b75c0413..4f63e1ae5f8c 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -220,8 +220,6 @@ struct vdev { vdev_ops_t *vdev_ops; /* vdev operations */ spa_t *vdev_spa; /* spa for this vdev */ void *vdev_tsd; /* type-specific data */ - vnode_t *vdev_name_vp; /* vnode for pathname */ - vnode_t *vdev_devid_vp; /* vnode for devid */ vdev_t *vdev_top; /* top-level vdev */ vdev_t *vdev_parent; /* parent vdev */ vdev_t **vdev_child; /* array of children */ diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 59846b1c8da5..8e06ce48e73a 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -514,15 +513,11 @@ extern void system_taskq_fini(void); #define XVA_MAPSIZE 3 #define XVA_MAGIC 0x78766174 -/* - * vnodes - */ -typedef struct vnode { - uint64_t v_size; - int v_fd; - char *v_path; - int v_dump_fd; -} vnode_t; +typedef struct file { + int f_fd; + loff_t f_pos; + loff_t f_offset; +} file_t; extern char *vn_dumpdir; #define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ @@ -593,40 +588,22 @@ typedef struct vsecattr { #define F_FREESP 11 -extern int fop_getattr(vnode_t *vp, vattr_t *vap); - -#define VOP_CLOSE(vp, f, c, o, cr, ct) vn_close(vp) #define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0 -#define VOP_GETATTR(vp, vap, fl, cr, ct) fop_getattr((vp), (vap)); - -#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd) #if defined(HAVE_FILE_FALLOCATE) && \ defined(FALLOC_FL_PUNCH_HOLE) && \ defined(FALLOC_FL_KEEP_SIZE) -#define VOP_SPACE(vp, cmd, flck, fl, off, cr, ct) \ - fallocate((vp)->v_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, \ +#define VOP_SPACE(fp, cmd, flck, fl, off, cr, ct) \ + fallocate((fp)->f_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, \ (flck)->l_start, (flck)->l_len) #else #define VOP_SPACE(vp, cmd, flck, fl, off, cr, ct) (0) #endif -#define VN_RELE(vp) vn_close(vp) - -extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp, - int x2, int x3); -extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp, - int x2, int x3, vnode_t *vp, int fd); -extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, - offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp); -extern void vn_close(vnode_t *vp); - #define vn_remove(path, x1, x2) remove(path) #define vn_rename(from, to, seg) rename((from), (to)) #define vn_is_readonly(vp) B_FALSE -extern vnode_t *rootdir; - #include /* for FREAD, FWRITE, etc */ /* @@ -759,11 +736,6 @@ typedef struct ace_object { #define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07 #define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08 -extern struct _buf *kobj_open_file(char *name); -extern int kobj_read_file(struct _buf *file, char *buf, unsigned size, - unsigned off); -extern void kobj_close_file(struct _buf *file); -extern int kobj_get_filesize(struct _buf *file, uint64_t *size); extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr); extern int zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr); diff --git a/include/sys/zfs_file.h b/include/sys/zfs_file.h new file mode 100644 index 000000000000..f445a8924de5 --- /dev/null +++ b/include/sys/zfs_file.h @@ -0,0 +1,33 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#ifndef _SYS_ZFS_FILE_H +#define _SYS_ZFS_FILE_H + +int zfs_file_write(file_t *, const void *, size_t, loff_t *, ssize_t *); +int zfs_file_read(file_t *, void *, size_t, loff_t *, ssize_t *); +int zfs_file_seek(file_t *, loff_t *, int); +int zfs_file_open(const char *path, int flags, int mode, file_t **fp); +void zfs_file_close(file_t *fp); +int zfs_file_getattr(file_t *fp, vattr_t *vap); +int zfs_file_fsync(file_t *fp, int flags); + +#endif /* _SYS_ZFS_FILE_H */ diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 9c097ce79dd0..43214d73a7c6 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -144,6 +144,7 @@ KERNEL_C = \ zfeature.c \ zfs_byteswap.c \ zfs_debug.c \ + zfs_file_os.c \ zfs_fm.c \ zfs_fuid.c \ zfs_sa.c \ diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index c14468cb2510..3a96541e2983 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -49,7 +49,6 @@ int aok; uint64_t physmem; -vnode_t *rootdir = (vnode_t *)0xabcd1234; char hw_serial[HW_HOSTID_LEN]; struct utsname hw_utsname; vmem_t *zio_arena = NULL; @@ -488,183 +487,6 @@ procfs_list_add(procfs_list_t *procfs_list, void *p) * vnode operations * ========================================================================= */ -/* - * Note: for the xxxat() versions of these functions, we assume that the - * starting vp is always rootdir (which is true for spa_directory.c, the only - * ZFS consumer of these interfaces). We assert this is true, and then emulate - * them by adding '/' in front of the path. - */ - -/*ARGSUSED*/ -int -vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) -{ - int fd = -1; - int dump_fd = -1; - vnode_t *vp; - int old_umask = 0; - struct stat64 st; - int err; - - if (!(flags & FCREAT) && stat64(path, &st) == -1) { - err = errno; - return (err); - } - - if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) - flags |= O_DIRECT; - - if (flags & FCREAT) - old_umask = umask(0); - - /* - * The construct 'flags - FREAD' conveniently maps combinations of - * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. - */ - fd = open64(path, flags - FREAD, mode); - if (fd == -1) { - err = errno; - return (err); - } - - if (flags & FCREAT) - (void) umask(old_umask); - - if (vn_dumpdir != NULL) { - char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL); - (void) snprintf(dumppath, MAXPATHLEN, - "%s/%s", vn_dumpdir, basename(path)); - dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); - umem_free(dumppath, MAXPATHLEN); - if (dump_fd == -1) { - err = errno; - close(fd); - return (err); - } - } else { - dump_fd = -1; - } - - if (fstat64_blk(fd, &st) == -1) { - err = errno; - close(fd); - if (dump_fd != -1) - close(dump_fd); - return (err); - } - - (void) fcntl(fd, F_SETFD, FD_CLOEXEC); - - *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); - - vp->v_fd = fd; - vp->v_size = st.st_size; - vp->v_path = spa_strdup(path); - vp->v_dump_fd = dump_fd; - - return (0); -} - -/*ARGSUSED*/ -int -vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, - int x3, vnode_t *startvp, int fd) -{ - char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); - int ret; - - ASSERT(startvp == rootdir); - (void) sprintf(realpath, "/%s", path); - - /* fd ignored for now, need if want to simulate nbmand support */ - ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); - - umem_free(realpath, strlen(path) + 2); - - return (ret); -} - -/*ARGSUSED*/ -int -vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, - int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) -{ - ssize_t rc, done = 0, split; - - if (uio == UIO_READ) { - rc = pread64(vp->v_fd, addr, len, offset); - if (vp->v_dump_fd != -1 && rc != -1) { - int status; - status = pwrite64(vp->v_dump_fd, addr, rc, offset); - ASSERT(status != -1); - } - } else { - /* - * To simulate partial disk writes, we split writes into two - * system calls so that the process can be killed in between. - */ - int sectors = len >> SPA_MINBLOCKSHIFT; - split = (sectors > 0 ? rand() % sectors : 0) << - SPA_MINBLOCKSHIFT; - rc = pwrite64(vp->v_fd, addr, split, offset); - if (rc != -1) { - done = rc; - rc = pwrite64(vp->v_fd, (char *)addr + split, - len - split, offset + split); - } - } - -#ifdef __linux__ - if (rc == -1 && errno == EINVAL) { - /* - * Under Linux, this most likely means an alignment issue - * (memory or disk) due to O_DIRECT, so we abort() in order to - * catch the offender. - */ - abort(); - } -#endif - if (rc == -1) - return (errno); - - done += rc; - - if (residp) - *residp = len - done; - else if (done != len) - return (EIO); - return (0); -} - -void -vn_close(vnode_t *vp) -{ - close(vp->v_fd); - if (vp->v_dump_fd != -1) - close(vp->v_dump_fd); - spa_strfree(vp->v_path); - umem_free(vp, sizeof (vnode_t)); -} - -/* - * At a minimum we need to update the size since vdev_reopen() - * will no longer call vn_openat(). - */ -int -fop_getattr(vnode_t *vp, vattr_t *vap) -{ - struct stat64 st; - int err; - - if (fstat64_blk(vp->v_fd, &st) == -1) { - err = errno; - close(vp->v_fd); - return (err); - } - - vap->va_size = st.st_size; - return (0); -} /* * ========================================================================= @@ -858,60 +680,6 @@ cmn_err(int ce, const char *fmt, ...) va_end(adx); } -/* - * ========================================================================= - * kobj interfaces - * ========================================================================= - */ -struct _buf * -kobj_open_file(char *name) -{ - struct _buf *file; - vnode_t *vp; - - /* set vp as the _fd field of the file */ - if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, - -1) != 0) - return ((void *)-1UL); - - file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); - file->_fd = (intptr_t)vp; - return (file); -} - -int -kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) -{ - ssize_t resid = 0; - - if (vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, - UIO_SYSSPACE, 0, 0, 0, &resid) != 0) - return (-1); - - return (size - resid); -} - -void -kobj_close_file(struct _buf *file) -{ - vn_close((vnode_t *)file->_fd); - umem_free(file, sizeof (struct _buf)); -} - -int -kobj_get_filesize(struct _buf *file, uint64_t *size) -{ - struct stat64 st; - vnode_t *vp = (vnode_t *)file->_fd; - - if (fstat64(vp->v_fd, &st) == -1) { - vn_close(vp); - return (errno); - } - *size = st.st_size; - return (0); -} - /* * ========================================================================= * misc routines diff --git a/module/os/linux/spl/Makefile.in b/module/os/linux/spl/Makefile.in index 94804bfed4c9..410e57999734 100644 --- a/module/os/linux/spl/Makefile.in +++ b/module/os/linux/spl/Makefile.in @@ -5,7 +5,6 @@ $(MODULE)-objs += ../os/linux/spl/spl-err.o $(MODULE)-objs += ../os/linux/spl/spl-generic.o $(MODULE)-objs += ../os/linux/spl/spl-kmem.o $(MODULE)-objs += ../os/linux/spl/spl-kmem-cache.o -$(MODULE)-objs += ../os/linux/spl/spl-kobj.o $(MODULE)-objs += ../os/linux/spl/spl-kstat.o $(MODULE)-objs += ../os/linux/spl/spl-proc.o $(MODULE)-objs += ../os/linux/spl/spl-procfs-list.o diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index 01c8636e717f..a87738703f5a 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -47,6 +46,8 @@ #include #include "zfs_gitrev.h" #include +#include +#include char spl_gitrev[64] = ZFS_META_GITREV; @@ -520,6 +521,70 @@ ddi_copyout(const void *from, void *to, size_t len, int flags) } EXPORT_SYMBOL(ddi_copyout); +static ssize_t +spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) +{ +#if defined(HAVE_KERNEL_READ_PPOS) + return (kernel_read(file, buf, count, pos)); +#else + mm_segment_t saved_fs; + ssize_t ret; + + saved_fs = get_fs(); + set_fs(KERNEL_DS); + + ret = vfs_read(file, (void __user *)buf, count, pos); + + set_fs(saved_fs); + + return (ret); +#endif +} + +int +spl_getattr(struct file *filp, vattr_t *vap) +{ + int rc; + struct kstat stat; + + ASSERT(filp); + ASSERT(vap); + + +#if defined(HAVE_4ARGS_VFS_GETATTR) + rc = vfs_getattr(&filp->f_path, &stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT); +#elif defined(HAVE_2ARGS_VFS_GETATTR) + rc = vfs_getattr(&filp->f_path, &stat); +#else + rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, &stat); +#endif + if (rc) + return (-rc); + + vap->va_type = vn_mode_to_vtype(stat.mode); + vap->va_mode = stat.mode; + vap->va_uid = KUID_TO_SUID(stat.uid); + vap->va_gid = KGID_TO_SGID(stat.gid); + vap->va_fsid = 0; + vap->va_nodeid = stat.ino; + vap->va_nlink = stat.nlink; + vap->va_size = stat.size; + vap->va_blksize = stat.blksize; + vap->va_atime = stat.atime; + vap->va_mtime = stat.mtime; + vap->va_ctime = stat.ctime; + vap->va_rdev = stat.rdev; + vap->va_nblocks = stat.blocks; + + return (0); +} + +static void +spl_close(struct file *filp) +{ + filp_close(filp, 0); +} /* * Read the unique system identifier from the /etc/hostid file. * @@ -563,38 +628,42 @@ static int hostid_read(uint32_t *hostid) { uint64_t size; - struct _buf *file; uint32_t value = 0; int error; + loff_t off; + vattr_t vap; + struct file *filp; + + filp = filp_open(spl_hostid_path, FREAD, 0); - file = kobj_open_file(spl_hostid_path); - if (file == (struct _buf *)-1) + if (IS_ERR(filp)) return (ENOENT); - error = kobj_get_filesize(file, &size); + error = spl_getattr(filp, &vap); if (error) { - kobj_close_file(file); + spl_close(filp); return (error); } - + size = vap.va_size; if (size < sizeof (HW_HOSTID_MASK)) { - kobj_close_file(file); + spl_close(filp); return (EINVAL); } + off = 0; /* * Read directly into the variable like eglibc does. * Short reads are okay; native behavior is preserved. */ - error = kobj_read_file(file, (char *)&value, sizeof (value), 0); + error = spl_kernel_read(filp, &value, sizeof (value), &off); if (error < 0) { - kobj_close_file(file); + spl_close(filp); return (EIO); } /* Mask down to 32 bits like coreutils does. */ *hostid = (value & HW_HOSTID_MASK); - kobj_close_file(file); + spl_close(filp); return (0); } diff --git a/module/os/linux/spl/spl-kobj.c b/module/os/linux/spl/spl-kobj.c deleted file mode 100644 index 7019369bd231..000000000000 --- a/module/os/linux/spl/spl-kobj.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - * - * Solaris Porting Layer (SPL) Kobj Implementation. - */ - -#include - -struct _buf * -kobj_open_file(const char *name) -{ - struct _buf *file; - vnode_t *vp; - int rc; - - file = kmalloc(sizeof (_buf_t), kmem_flags_convert(KM_SLEEP)); - if (file == NULL) - return ((_buf_t *)-1UL); - - if ((rc = vn_open(name, UIO_SYSSPACE, FREAD, 0644, &vp, 0, 0))) { - kfree(file); - return ((_buf_t *)-1UL); - } - - file->vp = vp; - - return (file); -} /* kobj_open_file() */ -EXPORT_SYMBOL(kobj_open_file); - -void -kobj_close_file(struct _buf *file) -{ - VOP_CLOSE(file->vp, 0, 0, 0, 0, 0); - kfree(file); -} /* kobj_close_file() */ -EXPORT_SYMBOL(kobj_close_file); - -int -kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) -{ - ssize_t resid; - - if (vn_rdwr(UIO_READ, file->vp, buf, size, (offset_t)off, - UIO_SYSSPACE, 0, 0, 0, &resid) != 0) - return (-1); - - return (size - resid); -} /* kobj_read_file() */ -EXPORT_SYMBOL(kobj_read_file); - -int -kobj_get_filesize(struct _buf *file, uint64_t *size) -{ - vattr_t vap; - int rc; - - rc = VOP_GETATTR(file->vp, &vap, 0, 0, NULL); - if (rc) - return (rc); - - *size = vap.va_size; - - return (rc); -} /* kobj_get_filesize() */ -EXPORT_SYMBOL(kobj_get_filesize); diff --git a/module/os/linux/spl/spl-vnode.c b/module/os/linux/spl/spl-vnode.c index d9056c964e5a..c8f2965f1928 100644 --- a/module/os/linux/spl/spl-vnode.c +++ b/module/os/linux/spl/spl-vnode.c @@ -34,11 +34,8 @@ #include #endif -vnode_t *rootdir = (vnode_t *)0xabcd1234; -EXPORT_SYMBOL(rootdir); - -static spl_kmem_cache_t *vn_cache; -static spl_kmem_cache_t *vn_file_cache; +spl_kmem_cache_t *vn_file_cache; +EXPORT_SYMBOL(vn_file_cache); static spinlock_t vn_file_lock; static LIST_HEAD(vn_file_list); @@ -63,56 +60,6 @@ spl_filp_fallocate(struct file *fp, int mode, loff_t offset, loff_t len) return (error); } -static int -spl_filp_fsync(struct file *fp, int sync) -{ -#ifdef HAVE_2ARGS_VFS_FSYNC - return (vfs_fsync(fp, sync)); -#else - return (vfs_fsync(fp, (fp)->f_dentry, sync)); -#endif /* HAVE_2ARGS_VFS_FSYNC */ -} - -static ssize_t -spl_kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) -{ -#if defined(HAVE_KERNEL_WRITE_PPOS) - return (kernel_write(file, buf, count, pos)); -#else - mm_segment_t saved_fs; - ssize_t ret; - - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - ret = vfs_write(file, (__force const char __user *)buf, count, pos); - - set_fs(saved_fs); - - return (ret); -#endif -} - -static ssize_t -spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) -{ -#if defined(HAVE_KERNEL_READ_PPOS) - return (kernel_read(file, buf, count, pos)); -#else - mm_segment_t saved_fs; - ssize_t ret; - - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - ret = vfs_read(file, (void __user *)buf, count, pos); - - set_fs(saved_fs); - - return (ret); -#endif -} - vtype_t vn_mode_to_vtype(mode_t mode) { @@ -169,256 +116,19 @@ vn_vtype_to_mode(vtype_t vtype) } /* vn_vtype_to_mode() */ EXPORT_SYMBOL(vn_vtype_to_mode); -vnode_t * -vn_alloc(int flag) -{ - vnode_t *vp; - - vp = kmem_cache_alloc(vn_cache, flag); - if (vp != NULL) { - vp->v_file = NULL; - vp->v_type = 0; - } - - return (vp); -} /* vn_alloc() */ -EXPORT_SYMBOL(vn_alloc); - -void -vn_free(vnode_t *vp) -{ - kmem_cache_free(vn_cache, vp); -} /* vn_free() */ -EXPORT_SYMBOL(vn_free); - -int -vn_open(const char *path, uio_seg_t seg, int flags, int mode, vnode_t **vpp, - int x1, void *x2) -{ - struct file *fp; - struct kstat stat; - int rc, saved_umask = 0; - gfp_t saved_gfp; - vnode_t *vp; - - ASSERT(flags & (FWRITE | FREAD)); - ASSERT(seg == UIO_SYSSPACE); - ASSERT(vpp); - *vpp = NULL; - - if (!(flags & FCREAT) && (flags & FWRITE)) - flags |= FEXCL; - - /* - * Note for filp_open() the two low bits must be remapped to mean: - * 01 - read-only -> 00 read-only - * 10 - write-only -> 01 write-only - * 11 - read-write -> 10 read-write - */ - flags--; - - if (flags & FCREAT) - saved_umask = xchg(¤t->fs->umask, 0); - - fp = filp_open(path, flags, mode); - - if (flags & FCREAT) - (void) xchg(¤t->fs->umask, saved_umask); - - if (IS_ERR(fp)) - return (-PTR_ERR(fp)); - -#if defined(HAVE_4ARGS_VFS_GETATTR) - rc = vfs_getattr(&fp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT); -#elif defined(HAVE_2ARGS_VFS_GETATTR) - rc = vfs_getattr(&fp->f_path, &stat); -#else - rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat); -#endif - if (rc) { - filp_close(fp, 0); - return (-rc); - } - - vp = vn_alloc(KM_SLEEP); - if (!vp) { - filp_close(fp, 0); - return (ENOMEM); - } - - saved_gfp = mapping_gfp_mask(fp->f_mapping); - mapping_set_gfp_mask(fp->f_mapping, saved_gfp & ~(__GFP_IO|__GFP_FS)); - - mutex_enter(&vp->v_lock); - vp->v_type = vn_mode_to_vtype(stat.mode); - vp->v_file = fp; - vp->v_gfp_mask = saved_gfp; - *vpp = vp; - mutex_exit(&vp->v_lock); - - return (0); -} /* vn_open() */ -EXPORT_SYMBOL(vn_open); - -int -vn_openat(const char *path, uio_seg_t seg, int flags, int mode, - vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd) -{ - char *realpath; - int len, rc; - - ASSERT(vp == rootdir); - - len = strlen(path) + 2; - realpath = kmalloc(len, kmem_flags_convert(KM_SLEEP)); - if (!realpath) - return (ENOMEM); - - (void) snprintf(realpath, len, "/%s", path); - rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2); - kfree(realpath); - - return (rc); -} /* vn_openat() */ -EXPORT_SYMBOL(vn_openat); - -int -vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off, - uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp) -{ - struct file *fp = vp->v_file; - loff_t offset = off; - int rc; - - ASSERT(uio == UIO_WRITE || uio == UIO_READ); - ASSERT(seg == UIO_SYSSPACE); - ASSERT((ioflag & ~FAPPEND) == 0); - - if (ioflag & FAPPEND) - offset = fp->f_pos; - - if (uio & UIO_WRITE) - rc = spl_kernel_write(fp, addr, len, &offset); - else - rc = spl_kernel_read(fp, addr, len, &offset); - - fp->f_pos = offset; - - if (rc < 0) - return (-rc); - - if (residp) { - *residp = len - rc; - } else { - if (rc != len) - return (EIO); - } - - return (0); -} /* vn_rdwr() */ -EXPORT_SYMBOL(vn_rdwr); - -int -vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4) -{ - int rc; - - ASSERT(vp); - ASSERT(vp->v_file); - - mapping_set_gfp_mask(vp->v_file->f_mapping, vp->v_gfp_mask); - rc = filp_close(vp->v_file, 0); - vn_free(vp); - - return (-rc); -} /* vn_close() */ -EXPORT_SYMBOL(vn_close); - /* * vn_seek() does not actually seek it only performs bounds checking on the * proposed seek. We perform minimal checking and allow vn_rdwr() to catch * anything more serious. */ int -vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, void *ct) +vn_seek(offset_t ooff, offset_t *noffp, void *ct) { return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); } EXPORT_SYMBOL(vn_seek); -int -vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4) -{ - struct file *fp; - struct kstat stat; - int rc; - - ASSERT(vp); - ASSERT(vp->v_file); - ASSERT(vap); - - fp = vp->v_file; - -#if defined(HAVE_4ARGS_VFS_GETATTR) - rc = vfs_getattr(&fp->f_path, &stat, STATX_BASIC_STATS, - AT_STATX_SYNC_AS_STAT); -#elif defined(HAVE_2ARGS_VFS_GETATTR) - rc = vfs_getattr(&fp->f_path, &stat); -#else - rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat); -#endif - if (rc) - return (-rc); - - vap->va_type = vn_mode_to_vtype(stat.mode); - vap->va_mode = stat.mode; - vap->va_uid = KUID_TO_SUID(stat.uid); - vap->va_gid = KGID_TO_SGID(stat.gid); - vap->va_fsid = 0; - vap->va_nodeid = stat.ino; - vap->va_nlink = stat.nlink; - vap->va_size = stat.size; - vap->va_blksize = stat.blksize; - vap->va_atime = stat.atime; - vap->va_mtime = stat.mtime; - vap->va_ctime = stat.ctime; - vap->va_rdev = stat.rdev; - vap->va_nblocks = stat.blocks; - - return (0); -} -EXPORT_SYMBOL(vn_getattr); - -int -vn_fsync(vnode_t *vp, int flags, void *x3, void *x4) -{ - int datasync = 0; - int error; - int fstrans; - - ASSERT(vp); - ASSERT(vp->v_file); - - if (flags & FDSYNC) - datasync = 1; - - /* - * May enter XFS which generates a warning when PF_FSTRANS is set. - * To avoid this the flag is cleared over vfs_sync() and then reset. - */ - fstrans = __spl_pf_fstrans_check(); - if (fstrans) - current->flags &= ~(__SPL_PF_FSTRANS); - - error = -spl_filp_fsync(vp->v_file, datasync); - if (fstrans) - current->flags |= __SPL_PF_FSTRANS; - - return (error); -} /* vn_fsync() */ -EXPORT_SYMBOL(vn_fsync); - -int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, +int vn_space(file_t *fp, int cmd, struct flock *bfp, int flag, offset_t offset, void *x6, void *x7) { int error = EOPNOTSUPP; @@ -429,8 +139,8 @@ int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, if (cmd != F_FREESP || bfp->l_whence != SEEK_SET) return (EOPNOTSUPP); - ASSERT(vp); - ASSERT(vp->v_file); + ASSERT(fp); + ASSERT(fp->f_file); ASSERT(bfp->l_start >= 0 && bfp->l_len > 0); #ifdef FALLOC_FL_PUNCH_HOLE @@ -446,7 +156,7 @@ int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, * When supported by the underlying file system preferentially * use the fallocate() callback to preallocate the space. */ - error = -spl_filp_fallocate(vp->v_file, + error = -spl_filp_fallocate(fp->f_file, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, bfp->l_start, bfp->l_len); @@ -458,9 +168,9 @@ int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, #endif #ifdef HAVE_INODE_TRUNCATE_RANGE - if (vp->v_file->f_dentry && vp->v_file->f_dentry->d_inode && - vp->v_file->f_dentry->d_inode->i_op && - vp->v_file->f_dentry->d_inode->i_op->truncate_range) { + if (fp->f_file->f_dentry && fp->f_file->f_dentry->d_inode && + fp->f_file->f_dentry->d_inode->i_op && + fp->f_file->f_dentry->d_inode->i_op->truncate_range) { off_t end = bfp->l_start + bfp->l_len; /* * Judging from the code in shmem_truncate_range(), @@ -474,8 +184,8 @@ int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, } --end; - vp->v_file->f_dentry->d_inode->i_op->truncate_range( - vp->v_file->f_dentry->d_inode, bfp->l_start, end); + fp->f_file->f_dentry->d_inode->i_op->truncate_range( + fp->f_file->f_dentry->d_inode, bfp->l_start, end); return (0); } @@ -501,13 +211,23 @@ file_find(int fd, struct task_struct *task) return (NULL); } /* file_find() */ + +void +vn_file_add(file_t *fp) +{ + /* Put it on the tracking list */ + spin_lock(&vn_file_lock); + list_add(&fp->f_list, &vn_file_list); + spin_unlock(&vn_file_lock); +} +EXPORT_SYMBOL(vn_file_add); + file_t * vn_getf(int fd) { struct kstat stat; struct file *lfp; file_t *fp; - vnode_t *vp; int rc = 0; if (fd < 0) @@ -529,7 +249,6 @@ vn_getf(int fd) */ if (lfp != fp->f_file) { fp->f_file = lfp; - fp->f_vnode->v_file = lfp; } atomic_inc(&fp->f_ref); spin_unlock(&vn_file_lock); @@ -554,10 +273,6 @@ vn_getf(int fd) if (lfp == NULL) goto out_mutex; - vp = vn_alloc(KM_SLEEP); - if (vp == NULL) - goto out_fget; - #if defined(HAVE_4ARGS_VFS_GETATTR) rc = vfs_getattr(&lfp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT); @@ -567,26 +282,15 @@ vn_getf(int fd) rc = vfs_getattr(lfp->f_path.mnt, lfp->f_dentry, &stat); #endif if (rc) - goto out_vnode; - - mutex_enter(&vp->v_lock); - vp->v_type = vn_mode_to_vtype(stat.mode); - vp->v_file = lfp; - mutex_exit(&vp->v_lock); + goto out_fget; - fp->f_vnode = vp; fp->f_file = lfp; - /* Put it on the tracking list */ - spin_lock(&vn_file_lock); - list_add(&fp->f_list, &vn_file_list); - spin_unlock(&vn_file_lock); + vn_file_add(fp); mutex_exit(&fp->f_lock); return (fp); -out_vnode: - vn_free(vp); out_fget: fput(lfp); out_mutex: @@ -600,11 +304,9 @@ EXPORT_SYMBOL(getf); static void releasef_locked(file_t *fp) { ASSERT(fp->f_file); - ASSERT(fp->f_vnode); /* Unlinked from list, no refs, safe to free outside mutex */ fput(fp->f_file); - vn_free(fp->f_vnode); kmem_cache_free(vn_file_cache, fp); } @@ -641,24 +343,6 @@ vn_areleasef(int fd, uf_info_t *fip) } /* releasef() */ EXPORT_SYMBOL(areleasef); -static int -vn_cache_constructor(void *buf, void *cdrarg, int kmflags) -{ - struct vnode *vp = buf; - - mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL); - - return (0); -} /* vn_cache_constructor() */ - -static void -vn_cache_destructor(void *buf, void *cdrarg) -{ - struct vnode *vp = buf; - - mutex_destroy(&vp->v_lock); -} /* vn_cache_destructor() */ - static int vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags) { @@ -684,10 +368,6 @@ spl_vn_init(void) { spin_lock_init(&vn_file_lock); - vn_cache = kmem_cache_create("spl_vn_cache", - sizeof (struct vnode), 64, vn_cache_constructor, - vn_cache_destructor, NULL, NULL, NULL, 0); - vn_file_cache = kmem_cache_create("spl_vn_file_cache", sizeof (file_t), 64, vn_file_cache_constructor, vn_file_cache_destructor, NULL, NULL, NULL, 0); @@ -715,5 +395,4 @@ spl_vn_fini(void) printk(KERN_WARNING "WARNING: %d vnode files leaked\n", leaked); kmem_cache_destroy(vn_file_cache); - kmem_cache_destroy(vn_cache); } /* spl_vn_fini() */ diff --git a/module/os/linux/zfs/Makefile.in b/module/os/linux/zfs/Makefile.in index 1532773782ae..60d92182f388 100644 --- a/module/os/linux/zfs/Makefile.in +++ b/module/os/linux/zfs/Makefile.in @@ -26,6 +26,7 @@ $(MODULE)-objs += ../os/linux/zfs/zfs_acl.o $(MODULE)-objs += ../os/linux/zfs/zfs_ctldir.o $(MODULE)-objs += ../os/linux/zfs/zfs_debug.o $(MODULE)-objs += ../os/linux/zfs/zfs_dir.o +$(MODULE)-objs += ../os/linux/zfs/zfs_file_os.o $(MODULE)-objs += ../os/linux/zfs/zfs_ioctl_os.o $(MODULE)-objs += ../os/linux/zfs/zfs_onexit_os.o $(MODULE)-objs += ../os/linux/zfs/zfs_sysfs.o diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 6b4c035f00a4..4ee41931ee67 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -889,9 +889,6 @@ vdev_disk_hold(vdev_t *vd) if (vd->vdev_tsd != NULL) return; - /* XXX: Implement me as a vnode lookup for the device */ - vd->vdev_name_vp = NULL; - vd->vdev_devid_vp = NULL; } static void diff --git a/module/os/linux/zfs/vdev_file.c b/module/os/linux/zfs/vdev_file.c index b79017f3a610..395bfd6a5cdf 100644 --- a/module/os/linux/zfs/vdev_file.c +++ b/module/os/linux/zfs/vdev_file.c @@ -35,6 +35,7 @@ #include #include #include +#include /* * Virtual device vector for files. @@ -59,7 +60,7 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { vdev_file_t *vf; - vnode_t *vp; + file_t *fp; vattr_t vattr; int error; @@ -108,21 +109,24 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, * to local zone users, so the underlying devices should be as well. */ ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/'); - error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, - spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1); + error = zfs_file_open(vd->vdev_path, spa_mode(vd->vdev_spa) | FOFFMAX, + 0, &fp); if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } - vf->vf_vnode = vp; + vf->vf_file = fp; #ifdef _KERNEL /* * Make sure it's a regular file. */ - if (vp->v_type != VREG) { + if (zfs_file_getattr(fp, &vattr)) + return (SET_ERROR(ENODEV)); + + if (vattr.va_type != VREG) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (SET_ERROR(ENODEV)); } @@ -133,7 +137,8 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, * Determine the physical size of the file. */ vattr.va_mask = AT_SIZE; - error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL); + + error = zfs_file_getattr(vf->vf_file, &vattr); if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); @@ -153,10 +158,10 @@ vdev_file_close(vdev_t *vd) if (vd->vdev_reopening || vf == NULL) return; - if (vf->vf_vnode != NULL) { - (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL); - (void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0, - kcred, NULL); + if (vf->vf_file != NULL) { + /* no-op */ + (void) VOP_PUTPAGE(vf->vf_file, 0, 0, B_INVAL, kcred, NULL); + (void) zfs_file_close(vf->vf_file); } vd->vdev_delayed_close = B_FALSE; @@ -172,20 +177,26 @@ vdev_file_io_strategy(void *arg) vdev_file_t *vf = vd->vdev_tsd; ssize_t resid; void *buf; + loff_t off; + ssize_t size; + int err; if (zio->io_type == ZIO_TYPE_READ) buf = abd_borrow_buf(zio->io_abd, zio->io_size); else buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size); - zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? - UIO_READ : UIO_WRITE, vf->vf_vnode, buf, zio->io_size, - zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); + off = zio->io_offset; + size = zio->io_size; + if (zio->io_type == ZIO_TYPE_READ) + err = zfs_file_read(vf->vf_file, buf, size, &off, &resid); + else + err = zfs_file_write(vf->vf_file, buf, size, &off, &resid); if (zio->io_type == ZIO_TYPE_READ) - abd_return_buf_copy(zio->io_abd, buf, zio->io_size); + abd_return_buf_copy(zio->io_abd, buf, size); else - abd_return_buf(zio->io_abd, buf, zio->io_size); + abd_return_buf(zio->io_abd, buf, size); if (resid != 0 && zio->io_error == 0) zio->io_error = SET_ERROR(ENOSPC); @@ -199,7 +210,7 @@ vdev_file_io_fsync(void *arg) zio_t *zio = (zio_t *)arg; vdev_file_t *vf = zio->io_vd->vdev_tsd; - zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL); + zio->io_error = zfs_file_fsync(vf->vf_file, FSYNC | FDSYNC); zio_interrupt(zio); } @@ -238,8 +249,8 @@ vdev_file_io_start(zio_t *zio) return; } - zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, - kcred, NULL); + zio->io_error = zfs_file_fsync(vf->vf_file, + FSYNC | FDSYNC); break; default: zio->io_error = SET_ERROR(ENOTSUP); @@ -257,7 +268,7 @@ vdev_file_io_start(zio_t *zio) flck.l_len = zio->io_size; flck.l_whence = SEEK_SET; - zio->io_error = VOP_SPACE(vf->vf_vnode, F_FREESP, &flck, + zio->io_error = VOP_SPACE(vf->vf_file, F_FREESP, &flck, 0, 0, kcred, NULL); zio_execute(zio); diff --git a/module/os/linux/zfs/zfs_file_os.c b/module/os/linux/zfs/zfs_file_os.c new file mode 100644 index 000000000000..d6be55881891 --- /dev/null +++ b/module/os/linux/zfs/zfs_file_os.c @@ -0,0 +1,408 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifdef _KERNEL +static ssize_t +spl_kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) +{ +#if defined(HAVE_KERNEL_WRITE_PPOS) + return (kernel_write(file, buf, count, pos)); +#else + mm_segment_t saved_fs; + ssize_t ret; + + saved_fs = get_fs(); + set_fs(KERNEL_DS); + + ret = vfs_write(file, (__force const char __user *)buf, count, pos); + + set_fs(saved_fs); + + return (ret); +#endif +} + +static ssize_t +spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) +{ +#if defined(HAVE_KERNEL_READ_PPOS) + return (kernel_read(file, buf, count, pos)); +#else + mm_segment_t saved_fs; + ssize_t ret; + + saved_fs = get_fs(); + set_fs(KERNEL_DS); + + ret = vfs_read(file, (void __user *)buf, count, pos); + + set_fs(saved_fs); + + return (ret); +#endif +} + +#else + +static ssize_t +spl_kernel_write(file_t *fp, const void *buf, + size_t count, loff_t *pos) +{ + ssize_t rc; + + rc = pwrite64(fp->f_fd, buf, count, *pos); + if (rc < 0) + return (-errno); + *pos += rc; + fp->f_pos = fp->f_offset = *pos; + return (rc); +} + +static ssize_t +spl_kernel_read(file_t *fp, void *buf, + size_t count, loff_t *pos) + +{ + ssize_t rc; + + rc = pread64(fp->f_fd, buf, count, *pos); + if (rc < 0) + return (-errno); + *pos += rc; + fp->f_pos = fp->f_offset = *pos; + return (rc); +} + +static loff_t +vfs_llseek(file_t *fp, loff_t offset, int whence) +{ + loff_t off; + + off = lseek(fp->f_fd, offset, whence); + if (off < 0) + return (off); + fp->f_pos = fp->f_offset = off; + return (off); +} +#endif + +/* + * zfs_file_open -> filp_open + * zfs_file_close -> filp_close + * zfs_file_seek -> vfs_llseek + * zfs_file_sync -> spl_filp_fsync + * zfs_file_pwrite -> spl_kernel_write + * zfs_file_pread -> spl_kernel_read + * zfs_file_stat -> vfs_getattr + * zfs_file_unlink -> vfs_unlink + * zfs_file_get -> fget + * zfs_file_put -> fput + */ + +#ifdef _KERNEL +#include + +#define FILE2FP(file) ((file)->f_file) + +int +zfs_file_open(const char *path, int flags, int mode, file_t **fpp) +{ + struct file *filp; + file_t *fp; + struct kstat stat; + int rc; + + ASSERT(flags & (FWRITE | FREAD)); + + if (!(flags & FCREAT) && (flags & FWRITE)) + flags |= FEXCL; + + filp = filp_open(path, flags, mode); + + if (IS_ERR(filp)) + return (-PTR_ERR(filp)); + +#if defined(HAVE_4ARGS_VFS_GETATTR) + rc = vfs_getattr(&filp->f_path, &stat, STATX_TYPE, + AT_STATX_SYNC_AS_STAT); +#elif defined(HAVE_2ARGS_VFS_GETATTR) + rc = vfs_getattr(&filp->f_path, &stat); +#else + rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, &stat); +#endif + if (rc) { + filp_close(filp, 0); + return (-rc); + } + + /* File was not yet opened create the object and setup */ + fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP); + if (fp == NULL) + goto out; + + mutex_enter(&fp->f_lock); + + fp->f_fd = 0; + fp->f_task = current; + fp->f_offset = 0; + fp->f_file = filp; + atomic_inc(&fp->f_ref); + + vn_file_add(fp); + mutex_exit(&fp->f_lock); + *fpp = fp; + return (0); + +out: + filp_close(filp, 0); + return (ENOMEM); +} + +void +zfs_file_close(file_t *fp) +{ + filp_close(fp->f_file, 0); + kmem_cache_free(vn_file_cache, fp); +} + +int +zfs_file_getattr(file_t *fp, vattr_t *vap) +{ + struct file *filp; + struct kstat stat; + int rc; + + ASSERT(fp); + ASSERT(fp->f_file); + ASSERT(vap); + + filp = fp->f_file; + +#if defined(HAVE_4ARGS_VFS_GETATTR) + rc = vfs_getattr(&filp->f_path, &stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT); +#elif defined(HAVE_2ARGS_VFS_GETATTR) + rc = vfs_getattr(&filp->f_path, &stat); +#else + rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, &stat); +#endif + if (rc) + return (-rc); + + vap->va_type = vn_mode_to_vtype(stat.mode); + vap->va_mode = stat.mode; + vap->va_uid = KUID_TO_SUID(stat.uid); + vap->va_gid = KGID_TO_SGID(stat.gid); + vap->va_fsid = 0; + vap->va_nodeid = stat.ino; + vap->va_nlink = stat.nlink; + vap->va_size = stat.size; + vap->va_blksize = stat.blksize; + vap->va_atime = stat.atime; + vap->va_mtime = stat.mtime; + vap->va_ctime = stat.ctime; + vap->va_rdev = stat.rdev; + vap->va_nblocks = stat.blocks; + + return (0); +} + +int +zfs_file_fsync(file_t *fp, int flags) +{ + int datasync = 0; + int error; + int fstrans; + struct file *filp; + + ASSERT(fp); + ASSERT(fp->f_file); + + if (flags & FDSYNC) + datasync = 1; + + filp = fp->f_file; + /* + * May enter XFS which generates a warning when PF_FSTRANS is set. + * To avoid this the flag is cleared over vfs_sync() and then reset. + */ + fstrans = __spl_pf_fstrans_check(); + if (fstrans) + current->flags &= ~(__SPL_PF_FSTRANS); +#ifdef HAVE_2ARGS_VFS_FSYNC + error = -vfs_fsync(filp, datasync); +#else + error = -vfs_fsync(filp, filp->f_dentry, datasync); +#endif + if (fstrans) + current->flags |= __SPL_PF_FSTRANS; + + return (error); +} /* vn_fsync() */ + +#else +#define FILE2FP(file) (file) + +int +zfs_file_open(const char *path, int flags, int mode, file_t **fpp) +{ + int fd = -1; + int err; + int old_umask = 0; + file_t *fp; + struct stat64 st; + + if (!(flags & FCREAT) && stat64(path, &st) == -1) { + err = errno; + return (err); + } + + if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) + flags |= O_DIRECT; + + + if (flags & FCREAT) + old_umask = umask(0); + + /* + * The construct 'flags - FREAD' conveniently maps combinations of + * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. + */ + fd = open64(path, flags - FREAD, mode); + if (fd == -1) { + err = errno; + return (err); + } + + if (flags & FCREAT) + (void) umask(old_umask); + + (void) fcntl(fd, F_SETFD, FD_CLOEXEC); + + fp = umem_zalloc(sizeof (file_t), UMEM_NOFAIL); + bzero(fp, sizeof (file_t)); + fp->f_fd = fd; + *fpp = fp; + return (0); +} + +void +zfs_file_close(file_t *fp) +{ + close(fp->f_fd); + umem_free(fp, sizeof (file_t)); +} + +/* + * At a minimum we need to update the size since vdev_reopen() + * will no longer call vn_openat(). + */ +int +zfs_file_getattr(file_t *fp, vattr_t *vap) +{ + struct stat64 st; + int err; + + if (fstat64_blk(fp->f_fd, &st) == -1) { + err = errno; + close(fp->f_fd); + return (err); + } + + vap->va_size = st.st_size; + return (0); +} + +int +zfs_file_fsync(file_t *fp, int flags) +{ + return (fsync(fp->f_fd)); +} + +#endif + +int +zfs_file_write(file_t *file, const void *buf, size_t count, loff_t *offp, + ssize_t *resid) +{ + ssize_t rc; + struct file *fp; + + fp = FILE2FP(file); + rc = spl_kernel_write(fp, buf, count, offp); + if (rc < 0) + return ((int)-rc); + file->f_offset = fp->f_pos = *offp; + *resid = count - rc; + return (0); +} + +int +zfs_file_read(file_t *file, void *buf, size_t count, loff_t *offp, + ssize_t *resid) +{ + ssize_t rc; + struct file *fp; + + fp = FILE2FP(file); + rc = spl_kernel_read(fp, buf, count, offp); + if (rc < 0) + return ((int)-rc); + file->f_offset = fp->f_pos = *offp; + *resid = count - rc; + return (0); +} + +int +zfs_file_seek(file_t *file, loff_t *offp, int whence) +{ + struct file *fp; + loff_t rc; + + fp = FILE2FP(file); + rc = vfs_llseek(fp, *offp, whence); + if (rc < 0) + return (-rc); + file->f_offset = fp->f_pos; + *offp = rc; + return (0); +} diff --git a/module/zfs/dmu_diff.c b/module/zfs/dmu_diff.c index c40ed57f243d..a18faea2c4b3 100644 --- a/module/zfs/dmu_diff.c +++ b/module/zfs/dmu_diff.c @@ -40,17 +40,20 @@ #include #include #include +#include -struct diffarg { - struct vnode *da_vp; /* file to which we are reporting */ + +typedef struct dmu_diffarg { + file_t *da_fp; /* file to which we are reporting */ offset_t *da_offp; int da_err; /* error that stopped diff search */ dmu_diff_record_t da_ddr; -}; +} dmu_diffarg_t; -static int -write_record(struct diffarg *da) +int +write_record(dmu_diffarg_t *da) { + file_t *fp; ssize_t resid; /* have to get resid to get detailed errno */ if (da->da_ddr.ddr_type == DDR_NONE) { @@ -58,15 +61,14 @@ write_record(struct diffarg *da) return (0); } - da->da_err = vn_rdwr(UIO_WRITE, da->da_vp, (caddr_t)&da->da_ddr, - sizeof (da->da_ddr), 0, UIO_SYSSPACE, FAPPEND, - RLIM64_INFINITY, CRED(), &resid); - *da->da_offp += sizeof (da->da_ddr); + fp = da->da_fp; + da->da_err = zfs_file_write(fp, (caddr_t)&da->da_ddr, + sizeof (da->da_ddr), (loff_t *)da->da_offp, &resid); return (da->da_err); } static int -report_free_dnode_range(struct diffarg *da, uint64_t first, uint64_t last) +report_free_dnode_range(dmu_diffarg_t *da, uint64_t first, uint64_t last) { ASSERT(first <= last); if (da->da_ddr.ddr_type != DDR_FREE || @@ -83,7 +85,7 @@ report_free_dnode_range(struct diffarg *da, uint64_t first, uint64_t last) } static int -report_dnode(struct diffarg *da, uint64_t object, dnode_phys_t *dnp) +report_dnode(dmu_diffarg_t *da, uint64_t object, dnode_phys_t *dnp) { ASSERT(dnp != NULL); if (dnp->dn_type == DMU_OT_NONE) @@ -110,7 +112,7 @@ static int diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { - struct diffarg *da = arg; + dmu_diffarg_t *da = arg; int err = 0; if (issig(JUSTLOOKING) && issig(FORREAL)) @@ -162,9 +164,9 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, int dmu_diff(const char *tosnap_name, const char *fromsnap_name, - struct vnode *vp, offset_t *offp) + file_t *fp, offset_t *offp) { - struct diffarg da; + dmu_diffarg_t da; dsl_dataset_t *fromsnap; dsl_dataset_t *tosnap; dsl_pool_t *dp; @@ -205,7 +207,7 @@ dmu_diff(const char *tosnap_name, const char *fromsnap_name, dsl_dataset_long_hold(tosnap, FTAG); dsl_pool_rele(dp, FTAG); - da.da_vp = vp; + da.da_fp = fp; da.da_offp = offp; da.da_ddr.ddr_type = DDR_NONE; da.da_ddr.ddr_first = da.da_ddr.ddr_last = 0; diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index 48c3705c65a6..d0b207f57b44 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -61,6 +61,7 @@ #ifdef _KERNEL #include #endif +#include int zfs_recv_queue_length = SPA_MAXBLOCKSIZE; int zfs_recv_queue_ff = 20; @@ -1103,7 +1104,7 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) int dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, boolean_t force, boolean_t resumable, nvlist_t *localprops, - nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc, vnode_t *vp, + nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc, file_t *fp, offset_t *voffp) { dmu_recv_begin_arg_t drba = { 0 }; @@ -1131,7 +1132,7 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, return (SET_ERROR(EINVAL)); } - drc->drc_vp = vp; + drc->drc_fp = fp; drc->drc_voff = *voffp; drc->drc_featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); @@ -1248,12 +1249,11 @@ receive_read(dmu_recv_cookie_t *drc, int len, void *buf) while (done < len) { ssize_t resid; + file_t *fp; - drc->drc_err = vn_rdwr(UIO_READ, drc->drc_vp, - (char *)buf + done, len - done, - drc->drc_voff, UIO_SYSSPACE, FAPPEND, - RLIM64_INFINITY, CRED(), &resid); - + fp = drc->drc_fp; + drc->drc_err = zfs_file_read(fp, (char *)buf + done, + len - done, (loff_t *)&drc->drc_voff, &resid); if (resid == len - done) { /* * Note: ECKSUM indicates that the receive @@ -1261,7 +1261,6 @@ receive_read(dmu_recv_cookie_t *drc, int len, void *buf) */ drc->drc_err = SET_ERROR(ECKSUM); } - drc->drc_voff += len - done - resid; done = len - resid; if (drc->drc_err != 0) return (drc->drc_err); diff --git a/module/zfs/fm.c b/module/zfs/fm.c index 722855492d0b..99c04d874112 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -67,7 +67,6 @@ #include #include #include -#include #include #include diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 9bfd24d98e68..c15aa39d1fdd 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -8073,8 +8073,7 @@ spa_async_dispatch(spa_t *spa) mutex_enter(&spa->spa_async_lock); if (spa_async_tasks_pending(spa) && !spa->spa_async_suspended && - spa->spa_async_thread == NULL && - rootdir != NULL) + spa->spa_async_thread == NULL) spa->spa_async_thread = thread_create(NULL, 0, spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); mutex_exit(&spa->spa_async_lock); diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index de1ad21da6de..fd8bd0c0d5d8 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -37,8 +37,8 @@ #include #include #include +#include #ifdef _KERNEL -#include #include #endif @@ -80,8 +80,11 @@ spa_config_load(void) nvlist_t *nvlist, *child; nvpair_t *nvpair; char *pathname; - struct _buf *file; - uint64_t fsize; + ssize_t resid; + loff_t off; + file_t *fp; + vattr_t vap; + int err; #ifdef _KERNEL if (zfs_autoimport_disable) @@ -95,28 +98,30 @@ spa_config_load(void) (void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path); - file = kobj_open_file(pathname); + err = zfs_file_open(pathname, FREAD, 0, &fp); kmem_free(pathname, MAXPATHLEN); - if (file == (struct _buf *)-1) + if (err) return; - if (kobj_get_filesize(file, &fsize) != 0) + if (zfs_file_getattr(fp, &vap)) goto out; - buf = kmem_alloc(fsize, KM_SLEEP); + buf = kmem_alloc(vap.va_size, KM_SLEEP); + + off = 0; /* * Read the nvlist from the file. */ - if (kobj_read_file(file, buf, fsize, 0) < 0) + if (zfs_file_read(fp, buf, vap.va_size, &off, &resid) < 0) goto out; /* * Unpack the nvlist. */ - if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) + if (nvlist_unpack(buf, vap.va_size, &nvlist, KM_SLEEP) != 0) goto out; /* @@ -141,9 +146,9 @@ spa_config_load(void) out: if (buf != NULL) - kmem_free(buf, fsize); + kmem_free(buf, vap.va_size); - kobj_close_file(file); + zfs_file_close(fp); } static int @@ -151,13 +156,12 @@ spa_config_remove(spa_config_dirent_t *dp) { #if defined(__linux__) && defined(_KERNEL) int error, flags = FWRITE | FTRUNC; - uio_seg_t seg = UIO_SYSSPACE; - vnode_t *vp; + file_t *fp; - error = vn_open(dp->scd_path, seg, flags, 0644, &vp, 0, 0); + error = zfs_file_open(dp->scd_path, flags, 0644, &fp); if (error == 0) { - (void) VOP_FSYNC(vp, FSYNC, kcred, NULL); - (void) VOP_CLOSE(vp, 0, 1, 0, kcred, NULL); + (void) zfs_file_fsync(fp, FSYNC); + (void) zfs_file_close(fp); } return (error); @@ -171,10 +175,10 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) { size_t buflen; char *buf; - vnode_t *vp; int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX; char *temp; int err; + file_t *fp; /* * If the nvlist is empty (NULL), then remove the old cachefile. @@ -193,46 +197,26 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) buf = fnvlist_pack(nvl, &buflen); temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP); -#if defined(__linux__) && defined(_KERNEL) /* * Write the configuration to disk. Due to the complexity involved * in performing a rename and remove from within the kernel the file * is instead truncated and overwritten in place. This way we always * have a consistent view of the data or a zero length file. */ - err = vn_open(dp->scd_path, UIO_SYSSPACE, oflags, 0644, &vp, 0, 0); + err = zfs_file_open(dp->scd_path, oflags, 0644, &fp); if (err == 0) { - err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, - UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, NULL); + loff_t off; + ssize_t resid; + + off = 0; + err = zfs_file_write(fp, buf, buflen, &off, &resid); if (err == 0) - err = VOP_FSYNC(vp, FSYNC, kcred, NULL); + err = zfs_file_fsync(fp, FSYNC); - (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); + zfs_file_close(fp); if (err) (void) spa_config_remove(dp); } -#else - /* - * Write the configuration to disk. We need to do the traditional - * 'write to temporary file, sync, move over original' to make sure we - * always have a consistent view of the data. - */ - (void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path); - - err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0); - if (err == 0) { - err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, - 0, RLIM64_INFINITY, kcred, NULL); - if (err == 0) - err = VOP_FSYNC(vp, FSYNC, kcred, NULL); - if (err == 0) - err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE); - (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); - } - - (void) vn_remove(temp, UIO_SYSSPACE, RMFILE); -#endif - fnvlist_pack_free(buf, buflen); kmem_free(temp, MAXPATHLEN); return (err); @@ -258,7 +242,7 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent) ASSERT(MUTEX_HELD(&spa_namespace_lock)); - if (rootdir == NULL || !(spa_mode_global & FWRITE)) + if (!(spa_mode_global & FWRITE)) return; /* diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 25863c8e434c..2b42bc1afb95 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -192,6 +192,7 @@ #include #include #include +#include #include #include @@ -4727,7 +4728,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops, off = input_fp->f_offset; error = dmu_recv_begin(tofs, tosnap, begin_record, force, - resumable, localprops, hidden_args, origin, &drc, input_fp->f_vnode, + resumable, localprops, hidden_args, origin, &drc, input_fp, &off); if (error != 0) goto out; @@ -4903,7 +4904,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops, } *read_bytes = off - input_fp->f_offset; - if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0) + if (VOP_SEEK(input_fp->f_offset, &off, NULL) == 0) input_fp->f_offset = off; #ifdef DEBUG @@ -5221,8 +5222,8 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) } typedef struct dump_bytes_io { - vnode_t *dbi_vp; - void *dbi_buf; + file_t *dbi_fp; + caddr_t dbi_buf; int dbi_len; int dbi_err; } dump_bytes_io_t; @@ -5231,11 +5232,19 @@ static void dump_bytes_cb(void *arg) { dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg; - ssize_t resid; /* have to get resid to get detailed errno */ + file_t *fp; + loff_t off; + size_t resid; + caddr_t buf; + + fp = dbi->dbi_fp; + buf = dbi->dbi_buf; + off = (loff_t)0; + dbi->dbi_err = zfs_file_seek(fp, &off, SEEK_END); + if (dbi->dbi_err && dbi->dbi_err != ESPIPE) + return; - dbi->dbi_err = vn_rdwr(UIO_WRITE, dbi->dbi_vp, - (caddr_t)dbi->dbi_buf, dbi->dbi_len, - 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); + dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, &off, &resid); } static int @@ -5243,7 +5252,7 @@ dump_bytes(objset_t *os, void *buf, int len, void *arg) { dump_bytes_io_t dbi; - dbi.dbi_vp = arg; + dbi.dbi_fp = arg; dbi.dbi_buf = buf; dbi.dbi_len = len; @@ -5353,13 +5362,13 @@ zfs_ioc_send(zfs_cmd_t *zc) off = fp->f_offset; dmu_send_outparams_t out = {0}; out.dso_outfunc = dump_bytes; - out.dso_arg = fp->f_vnode; + out.dso_arg = fp; out.dso_dryrun = B_FALSE; error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, zc->zc_fromobj, embedok, large_block_ok, compressok, rawok, zc->zc_cookie, &off, &out); - if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + if (VOP_SEEK(fp->f_offset, &off, NULL) == 0) fp->f_offset = off; releasef(zc->zc_cookie); } @@ -5934,9 +5943,9 @@ zfs_ioc_diff(zfs_cmd_t *zc) off = fp->f_offset; - error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off); + error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off); - if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + if (VOP_SEEK(fp->f_offset, &off, NULL) == 0) fp->f_offset = off; releasef(zc->zc_cookie); @@ -6307,12 +6316,12 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) off = fp->f_offset; dmu_send_outparams_t out = {0}; out.dso_outfunc = dump_bytes; - out.dso_arg = fp->f_vnode; + out.dso_arg = fp; out.dso_dryrun = B_FALSE; error = dmu_send(snapname, fromname, embedok, largeblockok, compressok, rawok, resumeobj, resumeoff, redactbook, fd, &off, &out); - if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + if (VOP_SEEK(fp->f_offset, &off, NULL) == 0) fp->f_offset = off; releasef(fd);