From 4a1f22c0f0c5a996dd66fe1a6373ce3795f5a290 Mon Sep 17 00:00:00 2001 From: Matthew Macy Date: Mon, 30 Sep 2019 16:43:08 -0700 Subject: [PATCH] Factor out non-portable vnode_t usage On FreeBSD file offset state is maintained in struct file. A given vnode can be referenced from many different struct file *. As a consequence, FreeBSD's SPL doesn't support vn_rdwr with the FAPPEND flag. This change replaces the non-portable vnode_t with the portable file_t in the common code. Signed-off-by: Matt Macy --- cmd/raidz_test/raidz_test.c | 2 +- cmd/zdb/zdb.c | 2 +- cmd/zhack/zhack.c | 3 +- cmd/ztest/ztest.c | 14 +- include/os/linux/spl/sys/Makefile.am | 2 - include/os/linux/spl/sys/kobj.h | 42 -- include/os/linux/spl/sys/mode.h | 32 - include/os/linux/spl/sys/vnode.h | 95 --- include/os/linux/zfs/sys/policy.h | 2 +- include/os/linux/zfs/sys/zfs_vnops.h | 1 + include/sys/Makefile.am | 1 + include/sys/dmu.h | 3 +- include/sys/dmu_recv.h | 4 +- include/sys/spa.h | 12 +- include/sys/spa_impl.h | 2 +- include/sys/vdev_file.h | 2 +- include/sys/vdev_impl.h | 2 - include/sys/zfs_context.h | 53 +- include/sys/zfs_file.h | 61 ++ include/sys/zil.h | 2 +- lib/libspl/include/os/linux/sys/Makefile.am | 1 - lib/libspl/include/os/linux/sys/file.h | 49 -- lib/libzpool/kernel.c | 610 +++++++++++------- module/os/linux/spl/Makefile.in | 2 - module/os/linux/spl/spl-generic.c | 85 ++- module/os/linux/spl/spl-kobj.c | 86 --- module/os/linux/spl/spl-vnode.c | 681 -------------------- module/os/linux/zfs/Makefile.in | 1 + module/os/linux/zfs/mmp_os.c | 2 +- module/os/linux/zfs/policy.c | 2 +- module/os/linux/zfs/spa_misc_os.c | 4 +- module/os/linux/zfs/vdev_disk.c | 13 +- module/os/linux/zfs/vdev_file.c | 96 +-- module/os/linux/zfs/zfs_acl.c | 1 - module/os/linux/zfs/zfs_dir.c | 1 - module/os/linux/zfs/zfs_file_os.c | 427 ++++++++++++ module/os/linux/zfs/zfs_ioctl_os.c | 10 +- module/os/linux/zfs/zfs_onexit_os.c | 2 +- module/os/linux/zfs/zfs_vnops.c | 19 +- module/os/linux/zfs/zfs_znode.c | 1 - module/os/linux/zfs/zpl_file.c | 18 +- module/zfs/arc.c | 4 +- module/zfs/dmu_diff.c | 33 +- module/zfs/dmu_recv.c | 16 +- module/zfs/dsl_crypt.c | 2 +- module/zfs/fm.c | 3 +- module/zfs/spa.c | 21 +- module/zfs/spa_config.c | 84 +-- module/zfs/spa_misc.c | 14 +- module/zfs/vdev_label.c | 2 +- module/zfs/zfs_ioctl.c | 89 ++- module/zfs/zfs_log.c | 5 +- module/zfs/zfs_replay.c | 4 +- 53 files changed, 1170 insertions(+), 1555 deletions(-) delete mode 100644 include/os/linux/spl/sys/kobj.h delete mode 100644 include/os/linux/spl/sys/mode.h create mode 100644 include/sys/zfs_file.h delete mode 100644 lib/libspl/include/os/linux/sys/file.h delete mode 100644 module/os/linux/spl/spl-kobj.c delete mode 100644 module/os/linux/spl/spl-vnode.c create mode 100644 module/os/linux/zfs/zfs_file_os.c diff --git a/cmd/raidz_test/raidz_test.c b/cmd/raidz_test/raidz_test.c index a05070399cab..66f36b0d56ca 100644 --- a/cmd/raidz_test/raidz_test.c +++ b/cmd/raidz_test/raidz_test.c @@ -757,7 +757,7 @@ main(int argc, char **argv) process_options(argc, argv); - kernel_init(FREAD); + kernel_init(SPA_MODE_READ); /* setup random data because rand() is not reentrant */ rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 7c3ef3ef4b54..81c9eaf8c649 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -6781,7 +6781,7 @@ main(int argc, char **argv) */ spa_load_verify_dryrun = B_TRUE; - kernel_init(FREAD); + kernel_init(SPA_MODE_READ); if (dump_all) verbose = MAX(verbose, 1); diff --git a/cmd/zhack/zhack.c b/cmd/zhack/zhack.c index 57e497f62d13..bb974133d1d0 100644 --- a/cmd/zhack/zhack.c +++ b/cmd/zhack/zhack.c @@ -126,7 +126,8 @@ zhack_import(char *target, boolean_t readonly) nvlist_t *props; int error; - kernel_init(readonly ? FREAD : (FREAD | FWRITE)); + kernel_init(readonly ? SPA_MODE_READ : + (SPA_MODE_READ | SPA_MODE_WRITE)); dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb); diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index b886f1e99b03..dbc5084d07e1 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -5826,8 +5826,8 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) (long long)vd0->vdev_id, (int)maxfaults); if (vf != NULL && ztest_random(3) == 0) { - (void) close(vf->vf_vnode->v_fd); - vf->vf_vnode->v_fd = -1; + (void) close(vf->vf_file->f_fd); + vf->vf_file->f_fd = -1; } else if (ztest_random(2) == 0) { vd0->vdev_cant_read = B_TRUE; } else { @@ -6933,7 +6933,7 @@ ztest_run(ztest_shared_t *zs) /* * Open our pool. */ - kernel_init(FREAD | FWRITE); + kernel_init(SPA_MODE_READ | SPA_MODE_WRITE); VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); metaslab_preload_limit = ztest_random(20) + 1; ztest_spa = spa; @@ -7122,7 +7122,7 @@ ztest_freeze(void) if (ztest_opts.zo_verbose >= 3) (void) printf("testing spa_freeze()...\n"); - kernel_init(FREAD | FWRITE); + kernel_init(SPA_MODE_READ | SPA_MODE_WRITE); VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); VERIFY3U(0, ==, ztest_dataset_open(0)); ztest_spa = spa; @@ -7189,7 +7189,7 @@ ztest_freeze(void) /* * Open and close the pool and dataset to induce log replay. */ - kernel_init(FREAD | FWRITE); + kernel_init(SPA_MODE_READ | SPA_MODE_WRITE); VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); ASSERT(spa_freeze_txg(spa) == UINT64_MAX); VERIFY3U(0, ==, ztest_dataset_open(0)); @@ -7262,7 +7262,7 @@ ztest_import(ztest_shared_t *zs) mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL); VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL)); - kernel_init(FREAD | FWRITE); + kernel_init(SPA_MODE_READ | SPA_MODE_WRITE); searchdirs[0] = ztest_opts.zo_dir; args.paths = nsearch; @@ -7308,7 +7308,7 @@ ztest_init(ztest_shared_t *zs) mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL); VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL)); - kernel_init(FREAD | FWRITE); + kernel_init(SPA_MODE_READ | SPA_MODE_WRITE); /* * Create the storage pool. diff --git a/include/os/linux/spl/sys/Makefile.am b/include/os/linux/spl/sys/Makefile.am index e3df4edaeee9..63b5ba71b983 100644 --- a/include/os/linux/spl/sys/Makefile.am +++ b/include/os/linux/spl/sys/Makefile.am @@ -19,11 +19,9 @@ KERNEL_H = \ $(top_srcdir)/include/os/linux/spl/sys/isa_defs.h \ $(top_srcdir)/include/os/linux/spl/sys/kmem_cache.h \ $(top_srcdir)/include/os/linux/spl/sys/kmem.h \ - $(top_srcdir)/include/os/linux/spl/sys/kobj.h \ $(top_srcdir)/include/os/linux/spl/sys/kstat.h \ $(top_srcdir)/include/os/linux/spl/sys/list.h \ $(top_srcdir)/include/os/linux/spl/sys/mod_os.h \ - $(top_srcdir)/include/os/linux/spl/sys/mode.h \ $(top_srcdir)/include/os/linux/spl/sys/mutex.h \ $(top_srcdir)/include/os/linux/spl/sys/param.h \ $(top_srcdir)/include/os/linux/spl/sys/processor.h \ diff --git a/include/os/linux/spl/sys/kobj.h b/include/os/linux/spl/sys/kobj.h deleted file mode 100644 index 558ec39a808f..000000000000 --- a/include/os/linux/spl/sys/kobj.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - */ - -#ifndef _SPL_KOBJ_H -#define _SPL_KOBJ_H - -#include - -typedef struct _buf { - vnode_t *vp; -} _buf_t; - -typedef struct _buf buf_t; - -extern struct _buf *kobj_open_file(const char *name); -extern void kobj_close_file(struct _buf *file); -extern int kobj_read_file(struct _buf *file, char *buf, unsigned size, - unsigned off); -extern int kobj_get_filesize(struct _buf *file, uint64_t *size); - -#endif /* SPL_KOBJ_H */ diff --git a/include/os/linux/spl/sys/mode.h b/include/os/linux/spl/sys/mode.h deleted file mode 100644 index 02802d0d4cb3..000000000000 --- a/include/os/linux/spl/sys/mode.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - */ - -#ifndef _SPL_MODE_H -#define _SPL_MODE_H - -#define IFTOVT(mode) vn_mode_to_vtype(mode) -#define VTTOIF(vtype) vn_vtype_to_mode(vtype) -#define MAKEIMODE(T, M) (VTTOIF(T) | ((M) & ~S_IFMT)) - -#endif /* SPL_MODE_H */ diff --git a/include/os/linux/spl/sys/vnode.h b/include/os/linux/spl/sys/vnode.h index 7bd278e4e13b..07eac8e44173 100644 --- a/include/os/linux/spl/sys/vnode.h +++ b/include/os/linux/spl/sys/vnode.h @@ -51,23 +51,8 @@ #define O_DSYNC O_SYNC #endif -#define FREAD 1 -#define FWRITE 2 -#define FCREAT O_CREAT -#define FTRUNC O_TRUNC -#define FOFFMAX O_LARGEFILE -#define FSYNC O_SYNC -#define FDSYNC O_DSYNC -#define FEXCL O_EXCL -#define FDIRECT O_DIRECT -#define FAPPEND O_APPEND - -#define FNODSYNC 0x10000 /* fsync pseudo flag */ -#define FNOFOLLOW 0x20000 /* don't follow symlinks */ - #define F_FREESP 11 /* Free file space */ - /* * The vnode AT_ flags are mapped to the Linux ATTR_* flags. * This allows them to be used safely with an iattr structure. @@ -102,23 +87,7 @@ #define CREATE_XATTR_DIR 0x04 #define ATTR_NOACLCHECK 0x20 -typedef enum vtype { - VNON = 0, - VREG = 1, - VDIR = 2, - VBLK = 3, - VCHR = 4, - VLNK = 5, - VFIFO = 6, - VDOOR = 7, - VPROC = 8, - VSOCK = 9, - VPORT = 10, - VBAD = 11 -} vtype_t; - typedef struct vattr { - enum vtype va_type; /* vnode type */ uint32_t va_mask; /* attribute bit-mask */ ushort_t va_mode; /* acc mode */ uid_t va_uid; /* owner uid */ @@ -133,70 +102,6 @@ typedef struct vattr { dev_t va_rdev; /* dev */ uint64_t va_nblocks; /* space used */ uint32_t va_blksize; /* block size */ - uint32_t va_seq; /* sequence */ struct dentry *va_dentry; /* dentry to wire */ } vattr_t; - -typedef struct vnode { - struct file *v_file; - kmutex_t v_lock; /* protects vnode fields */ - uint_t v_flag; /* vnode flags (see below) */ - uint_t v_count; /* reference count */ - void *v_data; /* private data for fs */ - struct vfs *v_vfsp; /* ptr to containing VFS */ - struct stdata *v_stream; /* associated stream */ - enum vtype v_type; /* vnode type */ - dev_t v_rdev; /* device (VCHR, VBLK) */ - gfp_t v_gfp_mask; /* original mapping gfp mask */ -} vnode_t; - -typedef struct vn_file { - int f_fd; /* linux fd for lookup */ - struct task_struct *f_task; /* linux task this fd belongs to */ - struct file *f_file; /* linux file struct */ - atomic_t f_ref; /* ref count */ - kmutex_t f_lock; /* struct lock */ - loff_t f_offset; /* offset */ - vnode_t *f_vnode; /* vnode */ - struct list_head f_list; /* list referenced file_t's */ -} file_t; - -extern vnode_t *vn_alloc(int flag); -void vn_free(vnode_t *vp); -extern vtype_t vn_mode_to_vtype(mode_t); -extern mode_t vn_vtype_to_mode(vtype_t); -extern int vn_open(const char *path, uio_seg_t seg, int flags, int mode, - vnode_t **vpp, int x1, void *x2); -extern int vn_openat(const char *path, uio_seg_t seg, int flags, int mode, - vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd); -extern int vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, - offset_t off, uio_seg_t seg, int x1, rlim64_t x2, - void *x3, ssize_t *residp); -extern int vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4); -extern int vn_seek(vnode_t *vp, offset_t o, offset_t *op, void *ct); - -extern int vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4); -extern int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4); -extern int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, - offset_t offset, void *x6, void *x7); -extern file_t *vn_getf(int fd); -extern void vn_releasef(int fd); -extern void vn_areleasef(int fd, uf_info_t *fip); - -int spl_vn_init(void); -void spl_vn_fini(void); - -#define VOP_CLOSE vn_close -#define VOP_SEEK vn_seek -#define VOP_GETATTR vn_getattr -#define VOP_FSYNC vn_fsync -#define VOP_SPACE vn_space -#define VOP_PUTPAGE(vp, o, s, f, x1, x2) ((void)0) -#define vn_is_readonly(vp) 0 -#define getf vn_getf -#define releasef vn_releasef -#define areleasef vn_areleasef - -extern vnode_t *rootdir; - #endif /* SPL_VNODE_H */ diff --git a/include/os/linux/zfs/sys/policy.h b/include/os/linux/zfs/sys/policy.h index 23d7d4db77f2..a8327e02fd2a 100644 --- a/include/os/linux/zfs/sys/policy.h +++ b/include/os/linux/zfs/sys/policy.h @@ -51,7 +51,7 @@ int secpolicy_zfs(const cred_t *); void secpolicy_setid_clear(vattr_t *, cred_t *); int secpolicy_setid_setsticky_clear(struct inode *, vattr_t *, const vattr_t *, cred_t *); -int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, vtype_t); +int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, mode_t); int secpolicy_vnode_setattr(cred_t *, struct inode *, struct vattr *, const struct vattr *, int, int (void *, int, cred_t *), void *); int secpolicy_basic_link(const cred_t *); diff --git a/include/os/linux/zfs/sys/zfs_vnops.h b/include/os/linux/zfs/sys/zfs_vnops.h index 767cba10da2b..21b61c07fa2a 100644 --- a/include/os/linux/zfs/sys/zfs_vnops.h +++ b/include/os/linux/zfs/sys/zfs_vnops.h @@ -32,6 +32,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am index 21e85431cb67..75f7d93e7ca8 100644 --- a/include/sys/Makefile.am +++ b/include/sys/Makefile.am @@ -104,6 +104,7 @@ COMMON_H = \ $(top_srcdir)/include/sys/zfs_context.h \ $(top_srcdir)/include/sys/zfs_debug.h \ $(top_srcdir)/include/sys/zfs_delay.h \ + $(top_srcdir)/include/sys/zfs_file.h \ $(top_srcdir)/include/sys/zfs_fuid.h \ $(top_srcdir)/include/sys/zfs_project.h \ $(top_srcdir)/include/sys/zfs_ratelimit.h \ diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 44889bb10b84..24cbb2f7a693 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -49,6 +49,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -1071,7 +1072,7 @@ void dmu_traverse_objset(objset_t *os, uint64_t txg_start, dmu_traverse_cb_t cb, void *arg); int dmu_diff(const char *tosnap_name, const char *fromsnap_name, - struct vnode *vp, offset_t *offp); + zfs_file_t *fp, offset_t *offp); /* CRC64 table */ #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ diff --git a/include/sys/dmu_recv.h b/include/sys/dmu_recv.h index 1a7347d66e8f..2cbe49c49dac 100644 --- a/include/sys/dmu_recv.h +++ b/include/sys/dmu_recv.h @@ -62,7 +62,7 @@ typedef struct dmu_recv_cookie { nvlist_t *drc_begin_nvl; objset_t *drc_os; - vnode_t *drc_vp; /* The vnode to read the stream from */ + zfs_file_t *drc_fp; /* The file to read the stream from */ uint64_t drc_voff; /* The current offset in the stream */ uint64_t drc_bytes_read; /* @@ -82,7 +82,7 @@ typedef struct dmu_recv_cookie { int dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, boolean_t force, boolean_t resumable, nvlist_t *localprops, nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc, - vnode_t *vp, offset_t *voffp); + zfs_file_t *fp, offset_t *voffp); int dmu_recv_stream(dmu_recv_cookie_t *drc, int cleanup_fd, uint64_t *action_handlep, offset_t *voffp); int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner); diff --git a/include/sys/spa.h b/include/sys/spa.h index cb91577388b4..cae29a4f4a2d 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -723,6 +723,12 @@ typedef enum spa_import_type { SPA_IMPORT_ASSEMBLE } spa_import_type_t; +typedef enum spa_mode { + SPA_MODE_UNINIT = 0, + SPA_MODE_READ = 1, + SPA_MODE_WRITE = 2, +} spa_mode_t; + /* * Send TRIM commands in-line during normal pool operation while deleting. * OFF: no @@ -1099,7 +1105,7 @@ extern uint32_t spa_get_hostid(spa_t *spa); extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *); extern boolean_t spa_livelist_delete_check(spa_t *spa); -extern int spa_mode(spa_t *spa); +extern spa_mode_t spa_mode(spa_t *spa); extern uint64_t zfs_strtonum(const char *str, char **nptr); extern char *spa_his_ievent_table[]; @@ -1149,7 +1155,7 @@ extern void vdev_mirror_stat_init(void); extern void vdev_mirror_stat_fini(void); /* Initialization and termination */ -extern void spa_init(int flags); +extern void spa_init(spa_mode_t mode); extern void spa_fini(void); extern void spa_boot_init(void); @@ -1189,7 +1195,7 @@ _NOTE(CONSTCOND) } while (0) #define dprintf_bp(bp, fmt, ...) #endif -extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */ +extern spa_mode_t spa_mode_global; extern int zfs_deadman_enabled; extern unsigned long zfs_deadman_synctime_ms; extern unsigned long zfs_deadman_ziotime_ms; diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index c83252bcafec..5a581214d0a2 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -362,7 +362,7 @@ struct spa { uint8_t spa_claiming; /* pool is doing zil_claim() */ boolean_t spa_is_root; /* pool is root */ int spa_minref; /* num refs when first opened */ - int spa_mode; /* FREAD | FWRITE */ + spa_mode_t spa_mode; /* SPA_MODE_{READ|WRITE} */ spa_log_state_t spa_log_state; /* log state */ uint64_t spa_autoexpand; /* lun expansion on/off */ ddt_t *spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */ diff --git a/include/sys/vdev_file.h b/include/sys/vdev_file.h index 9a398c58391e..1514a44fcabb 100644 --- a/include/sys/vdev_file.h +++ b/include/sys/vdev_file.h @@ -34,7 +34,7 @@ extern "C" { #endif typedef struct vdev_file { - vnode_t *vf_vnode; + zfs_file_t *vf_file; } vdev_file_t; extern void vdev_file_init(void); diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index ae82b75c0413..4f63e1ae5f8c 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -220,8 +220,6 @@ struct vdev { vdev_ops_t *vdev_ops; /* vdev operations */ spa_t *vdev_spa; /* spa for this vdev */ void *vdev_tsd; /* type-specific data */ - vnode_t *vdev_name_vp; /* vnode for pathname */ - vnode_t *vdev_devid_vp; /* vnode for devid */ vdev_t *vdev_top; /* top-level vdev */ vdev_t *vdev_parent; /* parent vdev */ vdev_t **vdev_child; /* array of children */ diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index f221091d8934..c14d92a2cb8e 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -515,16 +514,6 @@ extern void system_taskq_fini(void); #define XVA_MAPSIZE 3 #define XVA_MAGIC 0x78766174 -/* - * vnodes - */ -typedef struct vnode { - uint64_t v_size; - int v_fd; - char *v_path; - int v_dump_fd; -} vnode_t; - extern char *vn_dumpdir; #define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ @@ -593,40 +582,7 @@ typedef struct vsecattr { #define CRCREAT 0 #define F_FREESP 11 - -extern int fop_getattr(vnode_t *vp, vattr_t *vap); - -#define VOP_CLOSE(vp, f, c, o, cr, ct) vn_close(vp) -#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0 -#define VOP_GETATTR(vp, vap, fl, cr, ct) fop_getattr((vp), (vap)); - -#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd) - -#if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) -#define VOP_SPACE(vp, cmd, flck, fl, off, cr, ct) \ - fallocate((vp)->v_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, \ - (flck)->l_start, (flck)->l_len) -#else -#define VOP_SPACE(vp, cmd, flck, fl, off, cr, ct) (0) -#endif - -#define VN_RELE(vp) vn_close(vp) - -extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp, - int x2, int x3); -extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp, - int x2, int x3, vnode_t *vp, int fd); -extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, - offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp); -extern void vn_close(vnode_t *vp); - -#define vn_remove(path, x1, x2) remove(path) -#define vn_rename(from, to, seg) rename((from), (to)) -#define vn_is_readonly(vp) B_FALSE - -extern vnode_t *rootdir; - -#include /* for FREAD, FWRITE, etc */ +#define FIGNORECASE 0x80000 /* request case-insensitive lookups */ /* * Random stuff @@ -681,7 +637,7 @@ extern int lowbit64(uint64_t i); extern int random_get_bytes(uint8_t *ptr, size_t len); extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len); -extern void kernel_init(int); +extern void kernel_init(int mode); extern void kernel_fini(void); extern void random_init(void); extern void random_fini(void); @@ -758,11 +714,6 @@ typedef struct ace_object { #define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07 #define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08 -extern struct _buf *kobj_open_file(char *name); -extern int kobj_read_file(struct _buf *file, char *buf, unsigned size, - unsigned off); -extern void kobj_close_file(struct _buf *file); -extern int kobj_get_filesize(struct _buf *file, uint64_t *size); extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr); extern int zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr); diff --git a/include/sys/zfs_file.h b/include/sys/zfs_file.h new file mode 100644 index 000000000000..6b33420e375e --- /dev/null +++ b/include/sys/zfs_file.h @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#ifndef _SYS_ZFS_FILE_H +#define _SYS_ZFS_FILE_H + +#ifndef _KERNEL +typedef struct zfs_file { + int f_fd; + int f_dump_fd; +} zfs_file_t; +#elif defined(__linux__) || defined(__FreeBSD__) +typedef struct file zfs_file_t; +#else +#error "unknown OS" +#endif + +typedef struct zfs_file_attr { + uint64_t zfa_size; /* file size */ + mode_t zfa_mode; /* file type */ +} zfs_file_attr_t; + +int zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fp); +void zfs_file_close(zfs_file_t *fp); + +int zfs_file_write(zfs_file_t *fp, const void *buf, size_t len, ssize_t *resid); +int zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t len, loff_t off, + ssize_t *resid); +int zfs_file_read(zfs_file_t *fp, void *buf, size_t len, ssize_t *resid); +int zfs_file_pread(zfs_file_t *fp, void *buf, size_t len, loff_t off, + ssize_t *resid); + +int zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence); +int zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr); +int zfs_file_fsync(zfs_file_t *fp, int flags); +int zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len); +loff_t zfs_file_off(zfs_file_t *fp); +int zfs_file_unlink(const char *); + +int zfs_file_get(int fd, zfs_file_t **fp); +void zfs_file_put(int fd); + +#endif /* _SYS_ZFS_FILE_H */ diff --git a/include/sys/zil.h b/include/sys/zil.h index e84fb864abc3..716b19e56496 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -373,7 +373,7 @@ typedef struct { * - the write occupies only one block * WR_COPIED: * If we know we'll immediately be committing the - * transaction (FSYNC or FDSYNC), then we allocate a larger + * transaction (O_SYNC or O_DSYNC), then we allocate a larger * log record here for the data and copy the data in. * WR_NEED_COPY: * Otherwise we don't allocate a buffer, and *if* we need to diff --git a/lib/libspl/include/os/linux/sys/Makefile.am b/lib/libspl/include/os/linux/sys/Makefile.am index 6b170fa8c846..f8b6d9fae888 100644 --- a/lib/libspl/include/os/linux/sys/Makefile.am +++ b/lib/libspl/include/os/linux/sys/Makefile.am @@ -2,7 +2,6 @@ libspldir = $(includedir)/libspl/sys libspl_HEADERS = \ $(top_srcdir)/lib/libspl/include/os/linux/sys/byteorder.h \ $(top_srcdir)/lib/libspl/include/os/linux/sys/errno.h \ - $(top_srcdir)/lib/libspl/include/os/linux/sys/file.h \ $(top_srcdir)/lib/libspl/include/os/linux/sys/mnttab.h \ $(top_srcdir)/lib/libspl/include/os/linux/sys/mount.h \ $(top_srcdir)/lib/libspl/include/os/linux/sys/param.h \ diff --git a/lib/libspl/include/os/linux/sys/file.h b/lib/libspl/include/os/linux/sys/file.h deleted file mode 100644 index e0752ac25c2b..000000000000 --- a/lib/libspl/include/os/linux/sys/file.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBSPL_SYS_FILE_H -#define _LIBSPL_SYS_FILE_H - -#include_next - -#include - -#define FREAD 1 -#define FWRITE 2 -// #define FAPPEND 8 - -#define FCREAT O_CREAT -#define FTRUNC O_TRUNC -#define FOFFMAX O_LARGEFILE -#define FSYNC O_SYNC -#define FDSYNC O_DSYNC -#define FEXCL O_EXCL - -#define FNODSYNC 0x10000 /* fsync pseudo flag */ -#define FNOFOLLOW 0x20000 /* don't follow symlinks */ -#define FIGNORECASE 0x80000 /* request case-insensitive lookups */ - -#endif diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index c14468cb2510..ef52ed3afa2a 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -49,7 +49,6 @@ int aok; uint64_t physmem; -vnode_t *rootdir = (vnode_t *)0xabcd1234; char hw_serial[HW_HOSTID_LEN]; struct utsname hw_utsname; vmem_t *zio_arena = NULL; @@ -488,183 +487,6 @@ procfs_list_add(procfs_list_t *procfs_list, void *p) * vnode operations * ========================================================================= */ -/* - * Note: for the xxxat() versions of these functions, we assume that the - * starting vp is always rootdir (which is true for spa_directory.c, the only - * ZFS consumer of these interfaces). We assert this is true, and then emulate - * them by adding '/' in front of the path. - */ - -/*ARGSUSED*/ -int -vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) -{ - int fd = -1; - int dump_fd = -1; - vnode_t *vp; - int old_umask = 0; - struct stat64 st; - int err; - - if (!(flags & FCREAT) && stat64(path, &st) == -1) { - err = errno; - return (err); - } - - if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) - flags |= O_DIRECT; - - if (flags & FCREAT) - old_umask = umask(0); - - /* - * The construct 'flags - FREAD' conveniently maps combinations of - * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. - */ - fd = open64(path, flags - FREAD, mode); - if (fd == -1) { - err = errno; - return (err); - } - - if (flags & FCREAT) - (void) umask(old_umask); - - if (vn_dumpdir != NULL) { - char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL); - (void) snprintf(dumppath, MAXPATHLEN, - "%s/%s", vn_dumpdir, basename(path)); - dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); - umem_free(dumppath, MAXPATHLEN); - if (dump_fd == -1) { - err = errno; - close(fd); - return (err); - } - } else { - dump_fd = -1; - } - - if (fstat64_blk(fd, &st) == -1) { - err = errno; - close(fd); - if (dump_fd != -1) - close(dump_fd); - return (err); - } - - (void) fcntl(fd, F_SETFD, FD_CLOEXEC); - - *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); - - vp->v_fd = fd; - vp->v_size = st.st_size; - vp->v_path = spa_strdup(path); - vp->v_dump_fd = dump_fd; - - return (0); -} - -/*ARGSUSED*/ -int -vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, - int x3, vnode_t *startvp, int fd) -{ - char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); - int ret; - - ASSERT(startvp == rootdir); - (void) sprintf(realpath, "/%s", path); - - /* fd ignored for now, need if want to simulate nbmand support */ - ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); - - umem_free(realpath, strlen(path) + 2); - - return (ret); -} - -/*ARGSUSED*/ -int -vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, - int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) -{ - ssize_t rc, done = 0, split; - - if (uio == UIO_READ) { - rc = pread64(vp->v_fd, addr, len, offset); - if (vp->v_dump_fd != -1 && rc != -1) { - int status; - status = pwrite64(vp->v_dump_fd, addr, rc, offset); - ASSERT(status != -1); - } - } else { - /* - * To simulate partial disk writes, we split writes into two - * system calls so that the process can be killed in between. - */ - int sectors = len >> SPA_MINBLOCKSHIFT; - split = (sectors > 0 ? rand() % sectors : 0) << - SPA_MINBLOCKSHIFT; - rc = pwrite64(vp->v_fd, addr, split, offset); - if (rc != -1) { - done = rc; - rc = pwrite64(vp->v_fd, (char *)addr + split, - len - split, offset + split); - } - } - -#ifdef __linux__ - if (rc == -1 && errno == EINVAL) { - /* - * Under Linux, this most likely means an alignment issue - * (memory or disk) due to O_DIRECT, so we abort() in order to - * catch the offender. - */ - abort(); - } -#endif - if (rc == -1) - return (errno); - - done += rc; - - if (residp) - *residp = len - done; - else if (done != len) - return (EIO); - return (0); -} - -void -vn_close(vnode_t *vp) -{ - close(vp->v_fd); - if (vp->v_dump_fd != -1) - close(vp->v_dump_fd); - spa_strfree(vp->v_path); - umem_free(vp, sizeof (vnode_t)); -} - -/* - * At a minimum we need to update the size since vdev_reopen() - * will no longer call vn_openat(). - */ -int -fop_getattr(vnode_t *vp, vattr_t *vap) -{ - struct stat64 st; - int err; - - if (fstat64_blk(vp->v_fd, &st) == -1) { - err = errno; - close(vp->v_fd); - return (err); - } - - vap->va_size = st.st_size; - return (0); -} /* * ========================================================================= @@ -858,60 +680,6 @@ cmn_err(int ce, const char *fmt, ...) va_end(adx); } -/* - * ========================================================================= - * kobj interfaces - * ========================================================================= - */ -struct _buf * -kobj_open_file(char *name) -{ - struct _buf *file; - vnode_t *vp; - - /* set vp as the _fd field of the file */ - if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, - -1) != 0) - return ((void *)-1UL); - - file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); - file->_fd = (intptr_t)vp; - return (file); -} - -int -kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) -{ - ssize_t resid = 0; - - if (vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, - UIO_SYSSPACE, 0, 0, 0, &resid) != 0) - return (-1); - - return (size - resid); -} - -void -kobj_close_file(struct _buf *file) -{ - vn_close((vnode_t *)file->_fd); - umem_free(file, sizeof (struct _buf)); -} - -int -kobj_get_filesize(struct _buf *file, uint64_t *size) -{ - struct stat64 st; - vnode_t *vp = (vnode_t *)file->_fd; - - if (fstat64(vp->v_fd, &st) == -1) { - vn_close(vp); - return (errno); - } - *size = st.st_size; - return (0); -} - /* * ========================================================================= * misc routines @@ -1059,7 +827,7 @@ kernel_init(int mode) (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); (void) snprintf(hw_serial, sizeof (hw_serial), "%ld", - (mode & FWRITE) ? get_system_hostid() : 0); + (mode & SPA_MODE_WRITE) ? get_system_hostid() : 0); random_init(); @@ -1068,7 +836,7 @@ kernel_init(int mode) system_taskq_init(); icp_init(); - spa_init(mode); + spa_init((spa_mode_t)mode); fletcher_4_init(); @@ -1265,3 +1033,377 @@ zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname, boolean_t async) { } + +/* + * Open file + * + * path - fully qualified path to file + * flags - file attributes O_READ / O_WRITE / O_EXCL + * fpp - pointer to return file pointer + * + * Returns 0 on success underlying error on failure. + */ +int +zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp) +{ + int fd = -1; + int dump_fd = -1; + int err; + int old_umask = 0; + zfs_file_t *fp; + struct stat64 st; + + if (!(flags & O_CREAT) && stat64(path, &st) == -1) + return (errno); + + if (!(flags & O_CREAT) && S_ISBLK(st.st_mode)) + flags |= O_DIRECT; + + if (flags & O_CREAT) + old_umask = umask(0); + + fd = open64(path, flags, mode); + if (fd == -1) + return (errno); + + if (flags & O_CREAT) + (void) umask(old_umask); + + if (vn_dumpdir != NULL) { + char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL); + char *inpath = basename((char *)(uintptr_t)path); + + (void) snprintf(dumppath, MAXPATHLEN, + "%s/%s", vn_dumpdir, inpath); + dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); + umem_free(dumppath, MAXPATHLEN); + if (dump_fd == -1) { + err = errno; + close(fd); + return (err); + } + } else { + dump_fd = -1; + } + + (void) fcntl(fd, F_SETFD, FD_CLOEXEC); + + fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL); + fp->f_fd = fd; + fp->f_dump_fd = dump_fd; + *fpp = fp; + + return (0); +} + +void +zfs_file_close(zfs_file_t *fp) +{ + close(fp->f_fd); + if (fp->f_dump_fd != -1) + close(fp->f_dump_fd); + + umem_free(fp, sizeof (zfs_file_t)); +} + +/* + * Stateful write - use os internal file pointer to determine where to + * write and update on successful completion. + * + * fp - pointer to file (pipe, socket, etc) to write to + * buf - buffer to write + * count - # of bytes to write + * resid - pointer to count of unwritten bytes (if short write) + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid) +{ + ssize_t rc; + + rc = write(fp->f_fd, buf, count); + if (rc < 0) + return (errno); + + if (resid) { + *resid = count - rc; + } else if (rc != count) { + return (EIO); + } + + return (0); +} + +/* + * Stateless write - os internal file pointer is not updated. + * + * fp - pointer to file (pipe, socket, etc) to write to + * buf - buffer to write + * count - # of bytes to write + * off - file offset to write to (only valid for seekable types) + * resid - pointer to count of unwritten bytes + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_pwrite(zfs_file_t *fp, const void *buf, + size_t count, loff_t pos, ssize_t *resid) +{ + ssize_t rc, split, done; + int sectors; + + /* + * To simulate partial disk writes, we split writes into two + * system calls so that the process can be killed in between. + * This is used by ztest to simulate realistic failure modes. + */ + sectors = count >> SPA_MINBLOCKSHIFT; + split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT; + rc = pwrite64(fp->f_fd, buf, split, pos); + if (rc != -1) { + done = rc; + rc = pwrite64(fp->f_fd, (char *)buf + split, + count - split, pos + split); + } +#ifdef __linux__ + if (rc == -1 && errno == EINVAL) { + /* + * Under Linux, this most likely means an alignment issue + * (memory or disk) due to O_DIRECT, so we abort() in order + * to catch the offender. + */ + abort(); + } +#endif + + if (rc < 0) + return (errno); + + done += rc; + + if (resid) { + *resid = count - done; + } else if (done != count) { + return (EIO); + } + + return (0); +} + +/* + * Stateful read - use os internal file pointer to determine where to + * read and update on successful completion. + * + * fp - pointer to file (pipe, socket, etc) to read from + * buf - buffer to write + * count - # of bytes to read + * resid - pointer to count of unread bytes (if short read) + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid) +{ + int rc; + + rc = read(fp->f_fd, buf, count); + if (rc < 0) + return (errno); + + if (resid) { + *resid = count - rc; + } else if (rc != count) { + return (EIO); + } + + return (0); +} + +/* + * Stateless read - os internal file pointer is not updated. + * + * fp - pointer to file (pipe, socket, etc) to read from + * buf - buffer to write + * count - # of bytes to write + * off - file offset to read from (only valid for seekable types) + * resid - pointer to count of unwritten bytes (if short write) + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off, + ssize_t *resid) +{ + ssize_t rc; + + rc = pread64(fp->f_fd, buf, count, off); + if (rc < 0) { +#ifdef __linux__ + /* + * Under Linux, this most likely means an alignment issue + * (memory or disk) due to O_DIRECT, so we abort() in order to + * catch the offender. + */ + if (errno == EINVAL) + abort(); +#endif + return (errno); + } + + if (fp->f_dump_fd != -1) { + int status; + + status = pwrite64(fp->f_dump_fd, buf, rc, off); + ASSERT(status != -1); + } + + if (resid) { + *resid = count - rc; + } else if (rc != count) { + return (EIO); + } + + return (0); +} + +/* + * lseek - set / get file pointer + * + * fp - pointer to file (pipe, socket, etc) to read from + * offp - value to seek to, returns current value plus passed offset + * whence - see man pages for standard lseek whence values + * + * Returns 0 on success errno on failure (ESPIPE for non seekable types) + */ +int +zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence) +{ + loff_t rc; + + rc = lseek(fp->f_fd, *offp, whence); + if (rc < 0) + return (errno); + + *offp = rc; + + return (0); +} + +/* + * Get file attributes + * + * filp - file pointer + * zfattr - pointer to file attr structure + * + * Currently only used for fetching size and file mode + * + * Returns 0 on success or error code of underlying getattr call on failure. + */ +int +zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr) +{ + struct stat64 st; + + if (fstat64_blk(fp->f_fd, &st) == -1) + return (errno); + + zfattr->zfa_size = st.st_size; + zfattr->zfa_mode = st.st_mode; + + return (0); +} + +/* + * Sync file to disk + * + * filp - file pointer + * flags - O_SYNC and or O_DSYNC + * + * Returns 0 on success or error code of underlying sync call on failure. + */ +int +zfs_file_fsync(zfs_file_t *fp, int flags) +{ + int rc; + + rc = fsync(fp->f_fd); + if (rc < 0) + return (errno); + + return (0); +} + +/* + * fallocate - allocate or free space on disk + * + * fp - file pointer + * mode (non-standard options for hole punching etc) + * offset - offset to start allocating or freeing from + * len - length to free / allocate + * + * OPTIONAL + */ +int +zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len) +{ + return (fallocate(fp->f_fd, mode, offset, len)); +} + +/* + * Request current file pointer offset + * + * fp - pointer to file + * + * Returns current file offset. + */ +loff_t +zfs_file_off(zfs_file_t *fp) +{ + return (lseek(fp->f_fd, SEEK_CUR, 0)); +} + +/* + * unlink file + * + * path - fully qualified file path + * + * Returns 0 on success. + * + * OPTIONAL + */ +int +zfs_file_unlink(const char *path) +{ + return (remove(path)); +} + +/* + * Get reference to file pointer + * + * fd - input file descriptor + * fpp - pointer to file pointer + * + * Returns 0 on success EBADF on failure. + * Unsupported in user space. + */ +int +zfs_file_get(int fd, zfs_file_t **fpp) +{ + abort(); + + return (EOPNOTSUPP); +} + +/* + * Drop reference to file pointer + * + * fd - input file descriptor + * + * Unsupported in user space. + */ +void +zfs_file_put(int fd) +{ + abort(); +} diff --git a/module/os/linux/spl/Makefile.in b/module/os/linux/spl/Makefile.in index 94804bfed4c9..b2325f91b4a7 100644 --- a/module/os/linux/spl/Makefile.in +++ b/module/os/linux/spl/Makefile.in @@ -5,7 +5,6 @@ $(MODULE)-objs += ../os/linux/spl/spl-err.o $(MODULE)-objs += ../os/linux/spl/spl-generic.o $(MODULE)-objs += ../os/linux/spl/spl-kmem.o $(MODULE)-objs += ../os/linux/spl/spl-kmem-cache.o -$(MODULE)-objs += ../os/linux/spl/spl-kobj.o $(MODULE)-objs += ../os/linux/spl/spl-kstat.o $(MODULE)-objs += ../os/linux/spl/spl-proc.o $(MODULE)-objs += ../os/linux/spl/spl-procfs-list.o @@ -14,6 +13,5 @@ $(MODULE)-objs += ../os/linux/spl/spl-thread.o $(MODULE)-objs += ../os/linux/spl/spl-trace.o $(MODULE)-objs += ../os/linux/spl/spl-tsd.o $(MODULE)-objs += ../os/linux/spl/spl-vmem.o -$(MODULE)-objs += ../os/linux/spl/spl-vnode.o $(MODULE)-objs += ../os/linux/spl/spl-xdr.o $(MODULE)-objs += ../os/linux/spl/spl-zlib.o diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index 01c8636e717f..aa1051f5d69e 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -47,6 +46,8 @@ #include #include "zfs_gitrev.h" #include +#include +#include char spl_gitrev[64] = ZFS_META_GITREV; @@ -520,6 +521,48 @@ ddi_copyout(const void *from, void *to, size_t len, int flags) } EXPORT_SYMBOL(ddi_copyout); +static ssize_t +spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) +{ +#if defined(HAVE_KERNEL_READ_PPOS) + return (kernel_read(file, buf, count, pos)); +#else + mm_segment_t saved_fs; + ssize_t ret; + + saved_fs = get_fs(); + set_fs(KERNEL_DS); + + ret = vfs_read(file, (void __user *)buf, count, pos); + + set_fs(saved_fs); + + return (ret); +#endif +} + +int +spl_getattr(struct file *filp, struct kstat *stat) +{ + int rc; + + ASSERT(filp); + ASSERT(stat); + +#if defined(HAVE_4ARGS_VFS_GETATTR) + rc = vfs_getattr(&filp->f_path, stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT); +#elif defined(HAVE_2ARGS_VFS_GETATTR) + rc = vfs_getattr(&filp->f_path, stat); +#else + rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, stat); +#endif + if (rc) + return (-rc); + + return (0); +} + /* * Read the unique system identifier from the /etc/hostid file. * @@ -563,38 +606,42 @@ static int hostid_read(uint32_t *hostid) { uint64_t size; - struct _buf *file; uint32_t value = 0; int error; + loff_t off; + struct file *filp; + struct kstat stat; + + filp = filp_open(spl_hostid_path, 0, 0); - file = kobj_open_file(spl_hostid_path); - if (file == (struct _buf *)-1) + if (IS_ERR(filp)) return (ENOENT); - error = kobj_get_filesize(file, &size); + error = spl_getattr(filp, &stat); if (error) { - kobj_close_file(file); + filp_close(filp, 0); return (error); } - + size = stat.size; if (size < sizeof (HW_HOSTID_MASK)) { - kobj_close_file(file); + filp_close(filp, 0); return (EINVAL); } + off = 0; /* * Read directly into the variable like eglibc does. * Short reads are okay; native behavior is preserved. */ - error = kobj_read_file(file, (char *)&value, sizeof (value), 0); + error = spl_kernel_read(filp, &value, sizeof (value), &off); if (error < 0) { - kobj_close_file(file); + filp_close(filp, 0); return (EIO); } /* Mask down to 32 bits like coreutils does. */ *hostid = (value & HW_HOSTID_MASK); - kobj_close_file(file); + filp_close(filp, 0); return (0); } @@ -704,26 +751,21 @@ spl_init(void) if ((rc = spl_kmem_cache_init())) goto out4; - if ((rc = spl_vn_init())) - goto out5; - if ((rc = spl_proc_init())) - goto out6; + goto out5; if ((rc = spl_kstat_init())) - goto out7; + goto out6; if ((rc = spl_zlib_init())) - goto out8; + goto out7; return (rc); -out8: - spl_kstat_fini(); out7: - spl_proc_fini(); + spl_kstat_fini(); out6: - spl_vn_fini(); + spl_proc_fini(); out5: spl_kmem_cache_fini(); out4: @@ -742,7 +784,6 @@ spl_fini(void) spl_zlib_fini(); spl_kstat_fini(); spl_proc_fini(); - spl_vn_fini(); spl_kmem_cache_fini(); spl_taskq_fini(); spl_tsd_fini(); diff --git a/module/os/linux/spl/spl-kobj.c b/module/os/linux/spl/spl-kobj.c deleted file mode 100644 index 7019369bd231..000000000000 --- a/module/os/linux/spl/spl-kobj.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - * - * Solaris Porting Layer (SPL) Kobj Implementation. - */ - -#include - -struct _buf * -kobj_open_file(const char *name) -{ - struct _buf *file; - vnode_t *vp; - int rc; - - file = kmalloc(sizeof (_buf_t), kmem_flags_convert(KM_SLEEP)); - if (file == NULL) - return ((_buf_t *)-1UL); - - if ((rc = vn_open(name, UIO_SYSSPACE, FREAD, 0644, &vp, 0, 0))) { - kfree(file); - return ((_buf_t *)-1UL); - } - - file->vp = vp; - - return (file); -} /* kobj_open_file() */ -EXPORT_SYMBOL(kobj_open_file); - -void -kobj_close_file(struct _buf *file) -{ - VOP_CLOSE(file->vp, 0, 0, 0, 0, 0); - kfree(file); -} /* kobj_close_file() */ -EXPORT_SYMBOL(kobj_close_file); - -int -kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) -{ - ssize_t resid; - - if (vn_rdwr(UIO_READ, file->vp, buf, size, (offset_t)off, - UIO_SYSSPACE, 0, 0, 0, &resid) != 0) - return (-1); - - return (size - resid); -} /* kobj_read_file() */ -EXPORT_SYMBOL(kobj_read_file); - -int -kobj_get_filesize(struct _buf *file, uint64_t *size) -{ - vattr_t vap; - int rc; - - rc = VOP_GETATTR(file->vp, &vap, 0, 0, NULL); - if (rc) - return (rc); - - *size = vap.va_size; - - return (rc); -} /* kobj_get_filesize() */ -EXPORT_SYMBOL(kobj_get_filesize); diff --git a/module/os/linux/spl/spl-vnode.c b/module/os/linux/spl/spl-vnode.c deleted file mode 100644 index 5de350f10534..000000000000 --- a/module/os/linux/spl/spl-vnode.c +++ /dev/null @@ -1,681 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - * - * Solaris Porting Layer (SPL) Vnode Implementation. - */ - -#include -#include -#include -#include -#include -#include -#ifdef HAVE_FDTABLE_HEADER -#include -#endif - -vnode_t *rootdir = (vnode_t *)0xabcd1234; -EXPORT_SYMBOL(rootdir); - -static spl_kmem_cache_t *vn_cache; -static spl_kmem_cache_t *vn_file_cache; - -static spinlock_t vn_file_lock; -static LIST_HEAD(vn_file_list); - -static int -spl_filp_fallocate(struct file *fp, int mode, loff_t offset, loff_t len) -{ - int error = -EOPNOTSUPP; - - if (fp->f_op->fallocate) - error = fp->f_op->fallocate(fp, mode, offset, len); - - return (error); -} - -static int -spl_filp_fsync(struct file *fp, int sync) -{ - return (vfs_fsync(fp, sync)); -} - -static ssize_t -spl_kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) -{ -#if defined(HAVE_KERNEL_WRITE_PPOS) - return (kernel_write(file, buf, count, pos)); -#else - mm_segment_t saved_fs; - ssize_t ret; - - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - ret = vfs_write(file, (__force const char __user *)buf, count, pos); - - set_fs(saved_fs); - - return (ret); -#endif -} - -static ssize_t -spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) -{ -#if defined(HAVE_KERNEL_READ_PPOS) - return (kernel_read(file, buf, count, pos)); -#else - mm_segment_t saved_fs; - ssize_t ret; - - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - ret = vfs_read(file, (void __user *)buf, count, pos); - - set_fs(saved_fs); - - return (ret); -#endif -} - -vtype_t -vn_mode_to_vtype(mode_t mode) -{ - if (S_ISREG(mode)) - return (VREG); - - if (S_ISDIR(mode)) - return (VDIR); - - if (S_ISCHR(mode)) - return (VCHR); - - if (S_ISBLK(mode)) - return (VBLK); - - if (S_ISFIFO(mode)) - return (VFIFO); - - if (S_ISLNK(mode)) - return (VLNK); - - if (S_ISSOCK(mode)) - return (VSOCK); - - return (VNON); -} /* vn_mode_to_vtype() */ -EXPORT_SYMBOL(vn_mode_to_vtype); - -mode_t -vn_vtype_to_mode(vtype_t vtype) -{ - if (vtype == VREG) - return (S_IFREG); - - if (vtype == VDIR) - return (S_IFDIR); - - if (vtype == VCHR) - return (S_IFCHR); - - if (vtype == VBLK) - return (S_IFBLK); - - if (vtype == VFIFO) - return (S_IFIFO); - - if (vtype == VLNK) - return (S_IFLNK); - - if (vtype == VSOCK) - return (S_IFSOCK); - - return (VNON); -} /* vn_vtype_to_mode() */ -EXPORT_SYMBOL(vn_vtype_to_mode); - -vnode_t * -vn_alloc(int flag) -{ - vnode_t *vp; - - vp = kmem_cache_alloc(vn_cache, flag); - if (vp != NULL) { - vp->v_file = NULL; - vp->v_type = 0; - } - - return (vp); -} /* vn_alloc() */ -EXPORT_SYMBOL(vn_alloc); - -void -vn_free(vnode_t *vp) -{ - kmem_cache_free(vn_cache, vp); -} /* vn_free() */ -EXPORT_SYMBOL(vn_free); - -int -vn_open(const char *path, uio_seg_t seg, int flags, int mode, vnode_t **vpp, - int x1, void *x2) -{ - struct file *fp; - struct kstat stat; - int rc, saved_umask = 0; - gfp_t saved_gfp; - vnode_t *vp; - - ASSERT(flags & (FWRITE | FREAD)); - ASSERT(seg == UIO_SYSSPACE); - ASSERT(vpp); - *vpp = NULL; - - if (!(flags & FCREAT) && (flags & FWRITE)) - flags |= FEXCL; - - /* - * Note for filp_open() the two low bits must be remapped to mean: - * 01 - read-only -> 00 read-only - * 10 - write-only -> 01 write-only - * 11 - read-write -> 10 read-write - */ - flags--; - - if (flags & FCREAT) - saved_umask = xchg(¤t->fs->umask, 0); - - fp = filp_open(path, flags, mode); - - if (flags & FCREAT) - (void) xchg(¤t->fs->umask, saved_umask); - - if (IS_ERR(fp)) - return (-PTR_ERR(fp)); - -#if defined(HAVE_4ARGS_VFS_GETATTR) - rc = vfs_getattr(&fp->f_path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT); -#elif defined(HAVE_2ARGS_VFS_GETATTR) - rc = vfs_getattr(&fp->f_path, &stat); -#else - rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat); -#endif - if (rc) { - filp_close(fp, 0); - return (-rc); - } - - vp = vn_alloc(KM_SLEEP); - if (!vp) { - filp_close(fp, 0); - return (ENOMEM); - } - - saved_gfp = mapping_gfp_mask(fp->f_mapping); - mapping_set_gfp_mask(fp->f_mapping, saved_gfp & ~(__GFP_IO|__GFP_FS)); - - mutex_enter(&vp->v_lock); - vp->v_type = vn_mode_to_vtype(stat.mode); - vp->v_file = fp; - vp->v_gfp_mask = saved_gfp; - *vpp = vp; - mutex_exit(&vp->v_lock); - - return (0); -} /* vn_open() */ -EXPORT_SYMBOL(vn_open); - -int -vn_openat(const char *path, uio_seg_t seg, int flags, int mode, - vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd) -{ - char *realpath; - int len, rc; - - ASSERT(vp == rootdir); - - len = strlen(path) + 2; - realpath = kmalloc(len, kmem_flags_convert(KM_SLEEP)); - if (!realpath) - return (ENOMEM); - - (void) snprintf(realpath, len, "/%s", path); - rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2); - kfree(realpath); - - return (rc); -} /* vn_openat() */ -EXPORT_SYMBOL(vn_openat); - -int -vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off, - uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp) -{ - struct file *fp = vp->v_file; - loff_t offset = off; - int rc; - - ASSERT(uio == UIO_WRITE || uio == UIO_READ); - ASSERT(seg == UIO_SYSSPACE); - ASSERT((ioflag & ~FAPPEND) == 0); - - if (ioflag & FAPPEND) - offset = fp->f_pos; - - if (uio & UIO_WRITE) - rc = spl_kernel_write(fp, addr, len, &offset); - else - rc = spl_kernel_read(fp, addr, len, &offset); - - fp->f_pos = offset; - - if (rc < 0) - return (-rc); - - if (residp) { - *residp = len - rc; - } else { - if (rc != len) - return (EIO); - } - - return (0); -} /* vn_rdwr() */ -EXPORT_SYMBOL(vn_rdwr); - -int -vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4) -{ - int rc; - - ASSERT(vp); - ASSERT(vp->v_file); - - mapping_set_gfp_mask(vp->v_file->f_mapping, vp->v_gfp_mask); - rc = filp_close(vp->v_file, 0); - vn_free(vp); - - return (-rc); -} /* vn_close() */ -EXPORT_SYMBOL(vn_close); - -/* - * vn_seek() does not actually seek it only performs bounds checking on the - * proposed seek. We perform minimal checking and allow vn_rdwr() to catch - * anything more serious. - */ -int -vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, void *ct) -{ - return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); -} -EXPORT_SYMBOL(vn_seek); - -int -vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4) -{ - struct file *fp; - struct kstat stat; - int rc; - - ASSERT(vp); - ASSERT(vp->v_file); - ASSERT(vap); - - fp = vp->v_file; - -#if defined(HAVE_4ARGS_VFS_GETATTR) - rc = vfs_getattr(&fp->f_path, &stat, STATX_BASIC_STATS, - AT_STATX_SYNC_AS_STAT); -#elif defined(HAVE_2ARGS_VFS_GETATTR) - rc = vfs_getattr(&fp->f_path, &stat); -#else - rc = vfs_getattr(fp->f_path.mnt, fp->f_dentry, &stat); -#endif - if (rc) - return (-rc); - - vap->va_type = vn_mode_to_vtype(stat.mode); - vap->va_mode = stat.mode; - vap->va_uid = KUID_TO_SUID(stat.uid); - vap->va_gid = KGID_TO_SGID(stat.gid); - vap->va_fsid = 0; - vap->va_nodeid = stat.ino; - vap->va_nlink = stat.nlink; - vap->va_size = stat.size; - vap->va_blksize = stat.blksize; - vap->va_atime = stat.atime; - vap->va_mtime = stat.mtime; - vap->va_ctime = stat.ctime; - vap->va_rdev = stat.rdev; - vap->va_nblocks = stat.blocks; - - return (0); -} -EXPORT_SYMBOL(vn_getattr); - -int -vn_fsync(vnode_t *vp, int flags, void *x3, void *x4) -{ - int datasync = 0; - int error; - int fstrans; - - ASSERT(vp); - ASSERT(vp->v_file); - - if (flags & FDSYNC) - datasync = 1; - - /* - * May enter XFS which generates a warning when PF_FSTRANS is set. - * To avoid this the flag is cleared over vfs_sync() and then reset. - */ - fstrans = __spl_pf_fstrans_check(); - if (fstrans) - current->flags &= ~(__SPL_PF_FSTRANS); - - error = -spl_filp_fsync(vp->v_file, datasync); - if (fstrans) - current->flags |= __SPL_PF_FSTRANS; - - return (error); -} /* vn_fsync() */ -EXPORT_SYMBOL(vn_fsync); - -int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, - offset_t offset, void *x6, void *x7) -{ - int error = EOPNOTSUPP; -#ifdef FALLOC_FL_PUNCH_HOLE - int fstrans; -#endif - - if (cmd != F_FREESP || bfp->l_whence != SEEK_SET) - return (EOPNOTSUPP); - - ASSERT(vp); - ASSERT(vp->v_file); - ASSERT(bfp->l_start >= 0 && bfp->l_len > 0); - -#ifdef FALLOC_FL_PUNCH_HOLE - /* - * May enter XFS which generates a warning when PF_FSTRANS is set. - * To avoid this the flag is cleared over vfs_sync() and then reset. - */ - fstrans = __spl_pf_fstrans_check(); - if (fstrans) - current->flags &= ~(__SPL_PF_FSTRANS); - - /* - * When supported by the underlying file system preferentially - * use the fallocate() callback to preallocate the space. - */ - error = -spl_filp_fallocate(vp->v_file, - FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, - bfp->l_start, bfp->l_len); - - if (fstrans) - current->flags |= __SPL_PF_FSTRANS; - - if (error == 0) - return (0); -#endif - return (error); -} -EXPORT_SYMBOL(vn_space); - -/* Function must be called while holding the vn_file_lock */ -static file_t * -file_find(int fd, struct task_struct *task) -{ - file_t *fp; - - list_for_each_entry(fp, &vn_file_list, f_list) { - if (fd == fp->f_fd && fp->f_task == task) { - ASSERT(atomic_read(&fp->f_ref) != 0); - return (fp); - } - } - - return (NULL); -} /* file_find() */ - -file_t * -vn_getf(int fd) -{ - struct kstat stat; - struct file *lfp; - file_t *fp; - vnode_t *vp; - int rc = 0; - - if (fd < 0) - return (NULL); - - /* Already open just take an extra reference */ - spin_lock(&vn_file_lock); - - fp = file_find(fd, current); - if (fp) { - lfp = fget(fd); - fput(fp->f_file); - /* - * areleasef() can cause us to see a stale reference when - * userspace has reused a file descriptor before areleasef() - * has run. fput() the stale reference and replace it. We - * retain the original reference count such that the concurrent - * areleasef() will decrement its reference and terminate. - */ - if (lfp != fp->f_file) { - fp->f_file = lfp; - fp->f_vnode->v_file = lfp; - } - atomic_inc(&fp->f_ref); - spin_unlock(&vn_file_lock); - return (fp); - } - - spin_unlock(&vn_file_lock); - - /* File was not yet opened create the object and setup */ - fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP); - if (fp == NULL) - goto out; - - mutex_enter(&fp->f_lock); - - fp->f_fd = fd; - fp->f_task = current; - fp->f_offset = 0; - atomic_inc(&fp->f_ref); - - lfp = fget(fd); - if (lfp == NULL) - goto out_mutex; - - vp = vn_alloc(KM_SLEEP); - if (vp == NULL) - goto out_fget; - -#if defined(HAVE_4ARGS_VFS_GETATTR) - rc = vfs_getattr(&lfp->f_path, &stat, STATX_TYPE, - AT_STATX_SYNC_AS_STAT); -#elif defined(HAVE_2ARGS_VFS_GETATTR) - rc = vfs_getattr(&lfp->f_path, &stat); -#else - rc = vfs_getattr(lfp->f_path.mnt, lfp->f_dentry, &stat); -#endif - if (rc) - goto out_vnode; - - mutex_enter(&vp->v_lock); - vp->v_type = vn_mode_to_vtype(stat.mode); - vp->v_file = lfp; - mutex_exit(&vp->v_lock); - - fp->f_vnode = vp; - fp->f_file = lfp; - - /* Put it on the tracking list */ - spin_lock(&vn_file_lock); - list_add(&fp->f_list, &vn_file_list); - spin_unlock(&vn_file_lock); - - mutex_exit(&fp->f_lock); - return (fp); - -out_vnode: - vn_free(vp); -out_fget: - fput(lfp); -out_mutex: - mutex_exit(&fp->f_lock); - kmem_cache_free(vn_file_cache, fp); -out: - return (NULL); -} /* getf() */ -EXPORT_SYMBOL(getf); - -static void releasef_locked(file_t *fp) -{ - ASSERT(fp->f_file); - ASSERT(fp->f_vnode); - - /* Unlinked from list, no refs, safe to free outside mutex */ - fput(fp->f_file); - vn_free(fp->f_vnode); - - kmem_cache_free(vn_file_cache, fp); -} - -void -vn_releasef(int fd) -{ - areleasef(fd, P_FINFO(current)); -} -EXPORT_SYMBOL(releasef); - -void -vn_areleasef(int fd, uf_info_t *fip) -{ - file_t *fp; - struct task_struct *task = (struct task_struct *)fip; - - if (fd < 0) - return; - - spin_lock(&vn_file_lock); - fp = file_find(fd, task); - if (fp) { - atomic_dec(&fp->f_ref); - if (atomic_read(&fp->f_ref) > 0) { - spin_unlock(&vn_file_lock); - return; - } - - list_del(&fp->f_list); - releasef_locked(fp); - } - spin_unlock(&vn_file_lock); -} /* releasef() */ -EXPORT_SYMBOL(areleasef); - -static int -vn_cache_constructor(void *buf, void *cdrarg, int kmflags) -{ - struct vnode *vp = buf; - - mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL); - - return (0); -} /* vn_cache_constructor() */ - -static void -vn_cache_destructor(void *buf, void *cdrarg) -{ - struct vnode *vp = buf; - - mutex_destroy(&vp->v_lock); -} /* vn_cache_destructor() */ - -static int -vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags) -{ - file_t *fp = buf; - - atomic_set(&fp->f_ref, 0); - mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL); - INIT_LIST_HEAD(&fp->f_list); - - return (0); -} /* vn_file_cache_constructor() */ - -static void -vn_file_cache_destructor(void *buf, void *cdrarg) -{ - file_t *fp = buf; - - mutex_destroy(&fp->f_lock); -} /* vn_file_cache_destructor() */ - -int -spl_vn_init(void) -{ - spin_lock_init(&vn_file_lock); - - vn_cache = kmem_cache_create("spl_vn_cache", - sizeof (struct vnode), 64, vn_cache_constructor, - vn_cache_destructor, NULL, NULL, NULL, 0); - - vn_file_cache = kmem_cache_create("spl_vn_file_cache", - sizeof (file_t), 64, vn_file_cache_constructor, - vn_file_cache_destructor, NULL, NULL, NULL, 0); - - return (0); -} /* spl_vn_init() */ - -void -spl_vn_fini(void) -{ - file_t *fp, *next_fp; - int leaked = 0; - - spin_lock(&vn_file_lock); - - list_for_each_entry_safe(fp, next_fp, &vn_file_list, f_list) { - list_del(&fp->f_list); - releasef_locked(fp); - leaked++; - } - - spin_unlock(&vn_file_lock); - - if (leaked > 0) - printk(KERN_WARNING "WARNING: %d vnode files leaked\n", leaked); - - kmem_cache_destroy(vn_file_cache); - kmem_cache_destroy(vn_cache); -} /* spl_vn_fini() */ diff --git a/module/os/linux/zfs/Makefile.in b/module/os/linux/zfs/Makefile.in index 1532773782ae..60d92182f388 100644 --- a/module/os/linux/zfs/Makefile.in +++ b/module/os/linux/zfs/Makefile.in @@ -26,6 +26,7 @@ $(MODULE)-objs += ../os/linux/zfs/zfs_acl.o $(MODULE)-objs += ../os/linux/zfs/zfs_ctldir.o $(MODULE)-objs += ../os/linux/zfs/zfs_debug.o $(MODULE)-objs += ../os/linux/zfs/zfs_dir.o +$(MODULE)-objs += ../os/linux/zfs/zfs_file_os.o $(MODULE)-objs += ../os/linux/zfs/zfs_ioctl_os.o $(MODULE)-objs += ../os/linux/zfs/zfs_onexit_os.o $(MODULE)-objs += ../os/linux/zfs/zfs_sysfs.o diff --git a/module/os/linux/zfs/mmp_os.c b/module/os/linux/zfs/mmp_os.c index b63f164b6e01..9c8545f3c3d8 100644 --- a/module/os/linux/zfs/mmp_os.c +++ b/module/os/linux/zfs/mmp_os.c @@ -34,7 +34,7 @@ param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp) if (ret < 0) return (ret); - if (spa_mode_global != 0) + if (spa_mode_global != SPA_MODE_UNINIT) mmp_signal_all_threads(); return (ret); diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c index 8cfc6fdc91a4..5525302266c7 100644 --- a/module/os/linux/zfs/policy.c +++ b/module/os/linux/zfs/policy.c @@ -324,7 +324,7 @@ secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap, * Check privileges for setting xvattr attributes */ int -secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, vtype_t vtype) +secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, mode_t type) { return (secpolicy_vnode_chown(cr, owner)); } diff --git a/module/os/linux/zfs/spa_misc_os.c b/module/os/linux/zfs/spa_misc_os.c index 5c222d2fb3e6..97d91f9822ea 100644 --- a/module/os/linux/zfs/spa_misc_os.c +++ b/module/os/linux/zfs/spa_misc_os.c @@ -52,7 +52,7 @@ param_set_deadman_ziotime(const char *val, zfs_kernel_param_t *kp) if (error < 0) return (SET_ERROR(error)); - if (spa_mode_global != 0) { + if (spa_mode_global != SPA_MODE_UNINIT) { mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) spa->spa_deadman_ziotime = @@ -73,7 +73,7 @@ param_set_deadman_synctime(const char *val, zfs_kernel_param_t *kp) if (error < 0) return (SET_ERROR(error)); - if (spa_mode_global != 0) { + if (spa_mode_global != SPA_MODE_UNINIT) { mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) spa->spa_deadman_synctime = diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 328e47f522f6..2c7dda686f26 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -55,16 +55,14 @@ typedef struct dio_request { } dio_request_t; static fmode_t -vdev_bdev_mode(int smode) +vdev_bdev_mode(spa_mode_t spa_mode) { fmode_t mode = 0; - ASSERT3S(smode & (FREAD | FWRITE), !=, 0); - - if (smode & FREAD) + if (spa_mode & SPA_MODE_READ) mode |= FMODE_READ; - if (smode & FWRITE) + if (spa_mode & SPA_MODE_WRITE) mode |= FMODE_WRITE; return (mode); @@ -849,9 +847,6 @@ vdev_disk_hold(vdev_t *vd) if (vd->vdev_tsd != NULL) return; - /* XXX: Implement me as a vnode lookup for the device */ - vd->vdev_name_vp = NULL; - vd->vdev_devid_vp = NULL; } static void @@ -874,7 +869,7 @@ param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp) if ((p = strchr(val, '\n')) != NULL) *p = '\0'; - if (spa_mode_global != 0) { + if (spa_mode_global != SPA_MODE_UNINIT) { mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) { if (spa_state(spa) != POOL_STATE_ACTIVE || diff --git a/module/os/linux/zfs/vdev_file.c b/module/os/linux/zfs/vdev_file.c index b79017f3a610..fba5cdcedd9b 100644 --- a/module/os/linux/zfs/vdev_file.c +++ b/module/os/linux/zfs/vdev_file.c @@ -35,6 +35,9 @@ #include #include #include +#include + +#include /* * Virtual device vector for files. @@ -54,13 +57,29 @@ vdev_file_rele(vdev_t *vd) ASSERT(vd->vdev_path != NULL); } +static mode_t +vdev_file_open_mode(spa_mode_t spa_mode) +{ + mode_t mode = 0; + + if ((spa_mode & SPA_MODE_READ) && (spa_mode & SPA_MODE_WRITE)) { + mode = O_RDWR; + } else if (spa_mode & SPA_MODE_READ) { + mode = O_RDONLY; + } else if (spa_mode & SPA_MODE_WRITE) { + mode = O_WRONLY; + } + + return (mode | O_LARGEFILE); +} + static int vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { vdev_file_t *vf; - vnode_t *vp; - vattr_t vattr; + zfs_file_t *fp; + zfs_file_attr_t zfa; int error; /* @@ -108,38 +127,38 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, * to local zone users, so the underlying devices should be as well. */ ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/'); - error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, - spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1); + error = zfs_file_open(vd->vdev_path, + vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp); if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } - vf->vf_vnode = vp; + vf->vf_file = fp; #ifdef _KERNEL /* * Make sure it's a regular file. */ - if (vp->v_type != VREG) { + if (zfs_file_getattr(fp, &zfa)) { + return (SET_ERROR(ENODEV)); + } + if (!S_ISREG(zfa.zfa_mode)) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (SET_ERROR(ENODEV)); } #endif skip_open: - /* - * Determine the physical size of the file. - */ - vattr.va_mask = AT_SIZE; - error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL); + + error = zfs_file_getattr(vf->vf_file, &zfa); if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } - *max_psize = *psize = vattr.va_size; + *max_psize = *psize = zfa.zfa_size; *ashift = SPA_MINBLOCKSHIFT; return (0); @@ -153,10 +172,8 @@ vdev_file_close(vdev_t *vd) if (vd->vdev_reopening || vf == NULL) return; - if (vf->vf_vnode != NULL) { - (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL); - (void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0, - kcred, NULL); + if (vf->vf_file != NULL) { + (void) zfs_file_close(vf->vf_file); } vd->vdev_delayed_close = B_FALSE; @@ -172,21 +189,24 @@ vdev_file_io_strategy(void *arg) vdev_file_t *vf = vd->vdev_tsd; ssize_t resid; void *buf; + loff_t off; + ssize_t size; + int err; + + off = zio->io_offset; + size = zio->io_size; + resid = 0; - if (zio->io_type == ZIO_TYPE_READ) + if (zio->io_type == ZIO_TYPE_READ) { buf = abd_borrow_buf(zio->io_abd, zio->io_size); - else + err = zfs_file_pread(vf->vf_file, buf, size, off, &resid); + abd_return_buf_copy(zio->io_abd, buf, size); + } else { buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size); - - zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? - UIO_READ : UIO_WRITE, vf->vf_vnode, buf, zio->io_size, - zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); - - if (zio->io_type == ZIO_TYPE_READ) - abd_return_buf_copy(zio->io_abd, buf, zio->io_size); - else - abd_return_buf(zio->io_abd, buf, zio->io_size); - + err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid); + abd_return_buf(zio->io_abd, buf, size); + } + zio->io_error = err; if (resid != 0 && zio->io_error == 0) zio->io_error = SET_ERROR(ENOSPC); @@ -199,7 +219,7 @@ vdev_file_io_fsync(void *arg) zio_t *zio = (zio_t *)arg; vdev_file_t *vf = zio->io_vd->vdev_tsd; - zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL); + zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC | O_DSYNC); zio_interrupt(zio); } @@ -238,8 +258,8 @@ vdev_file_io_start(zio_t *zio) return; } - zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, - kcred, NULL); + zio->io_error = zfs_file_fsync(vf->vf_file, + O_SYNC | O_DSYNC); break; default: zio->io_error = SET_ERROR(ENOTSUP); @@ -248,18 +268,12 @@ vdev_file_io_start(zio_t *zio) zio_execute(zio); return; } else if (zio->io_type == ZIO_TYPE_TRIM) { - struct flock flck; + int mode; ASSERT3U(zio->io_size, !=, 0); - bzero(&flck, sizeof (flck)); - flck.l_type = F_FREESP; - flck.l_start = zio->io_offset; - flck.l_len = zio->io_size; - flck.l_whence = SEEK_SET; - - zio->io_error = VOP_SPACE(vf->vf_vnode, F_FREESP, &flck, - 0, 0, kcred, NULL); - + mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; + zio->io_error = zfs_file_fallocate(vf->vf_file, + mode, zio->io_offset, zio->io_size); zio_execute(zio); return; } diff --git a/module/os/linux/zfs/zfs_acl.c b/module/os/linux/zfs/zfs_acl.c index 4c21350c0d63..1bb2b892d042 100644 --- a/module/os/linux/zfs/zfs_acl.c +++ b/module/os/linux/zfs/zfs_acl.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/module/os/linux/zfs/zfs_dir.c b/module/os/linux/zfs/zfs_dir.c index 6bdad737cd84..89704d0e4b88 100644 --- a/module/os/linux/zfs/zfs_dir.c +++ b/module/os/linux/zfs/zfs_dir.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff --git a/module/os/linux/zfs/zfs_file_os.c b/module/os/linux/zfs/zfs_file_os.c new file mode 100644 index 000000000000..1c9b84d66f51 --- /dev/null +++ b/module/os/linux/zfs/zfs_file_os.c @@ -0,0 +1,427 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_FDTABLE_HEADER +#include +#endif + +/* + * Open file + * + * path - fully qualified path to file + * flags - file attributes O_READ / O_WRITE / O_EXCL + * fpp - pointer to return file pointer + * + * Returns 0 on success underlying error on failure. + */ +int +zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp) +{ + struct file *filp; + int saved_umask; + + if (!(flags & O_CREAT) && (flags & O_WRONLY)) + flags |= O_EXCL; + + if (flags & O_CREAT) + saved_umask = xchg(¤t->fs->umask, 0); + + filp = filp_open(path, flags, mode); + + if (flags & O_CREAT) + (void) xchg(¤t->fs->umask, saved_umask); + + if (IS_ERR(filp)) + return (-PTR_ERR(filp)); + + *fpp = filp; + return (0); +} + +void +zfs_file_close(zfs_file_t *fp) +{ + filp_close(fp, 0); +} + +static ssize_t +zfs_file_write_impl(zfs_file_t *fp, const void *buf, size_t count, loff_t *off) +{ +#if defined(HAVE_KERNEL_WRITE_PPOS) + return (kernel_write(fp, buf, count, off)); +#else + mm_segment_t saved_fs; + ssize_t rc; + + saved_fs = get_fs(); + set_fs(KERNEL_DS); + + rc = vfs_write(fp, (__force const char __user __user *)buf, count, off); + + set_fs(saved_fs); + + return (rc); +#endif +} + +/* + * Stateful write - use os internal file pointer to determine where to + * write and update on successful completion. + * + * fp - pointer to file (pipe, socket, etc) to write to + * buf - buffer to write + * count - # of bytes to write + * resid - pointer to count of unwritten bytes (if short write) + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid) +{ + loff_t off = fp->f_pos; + ssize_t rc; + + rc = zfs_file_write_impl(fp, buf, count, &off); + if (rc < 0) + return (-rc); + + fp->f_pos = off; + + if (resid) { + *resid = count - rc; + } else if (rc != count) { + return (EIO); + } + + return (0); +} + +/* + * Stateless write - os internal file pointer is not updated. + * + * fp - pointer to file (pipe, socket, etc) to write to + * buf - buffer to write + * count - # of bytes to write + * off - file offset to write to (only valid for seekable types) + * resid - pointer to count of unwritten bytes + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off, + ssize_t *resid) +{ + ssize_t rc; + + rc = zfs_file_write_impl(fp, buf, count, &off); + if (rc < 0) + return (-rc); + + if (resid) { + *resid = count - rc; + } else if (rc != count) { + return (EIO); + } + + return (0); +} + +static ssize_t +zfs_file_read_impl(zfs_file_t *fp, void *buf, size_t count, loff_t *off) +{ +#if defined(HAVE_KERNEL_READ_PPOS) + return (kernel_read(fp, buf, count, off)); +#else + mm_segment_t saved_fs; + ssize_t rc; + + saved_fs = get_fs(); + set_fs(KERNEL_DS); + + rc = vfs_read(fp, (void __user *)buf, count, off); + set_fs(saved_fs); + + return (rc); +#endif +} + +/* + * Stateful read - use os internal file pointer to determine where to + * read and update on successful completion. + * + * fp - pointer to file (pipe, socket, etc) to read from + * buf - buffer to write + * count - # of bytes to read + * resid - pointer to count of unread bytes (if short read) + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid) +{ + loff_t off = fp->f_pos; + ssize_t rc; + + rc = zfs_file_read_impl(fp, buf, count, &off); + if (rc < 0) + return (-rc); + + fp->f_pos = off; + + if (resid) { + *resid = count - rc; + } else if (rc != count) { + return (EIO); + } + + return (0); +} + +/* + * Stateless read - os internal file pointer is not updated. + * + * fp - pointer to file (pipe, socket, etc) to read from + * buf - buffer to write + * count - # of bytes to write + * off - file offset to read from (only valid for seekable types) + * resid - pointer to count of unwritten bytes (if short write) + * + * Returns 0 on success errno on failure. + */ +int +zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off, + ssize_t *resid) +{ + ssize_t rc; + + rc = zfs_file_read_impl(fp, buf, count, &off); + if (rc < 0) + return (-rc); + + if (resid) { + *resid = count - rc; + } else if (rc != count) { + return (EIO); + } + + return (0); +} + +/* + * lseek - set / get file pointer + * + * fp - pointer to file (pipe, socket, etc) to read from + * offp - value to seek to, returns current value plus passed offset + * whence - see man pages for standard lseek whence values + * + * Returns 0 on success errno on failure (ESPIPE for non seekable types) + */ +int +zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence) +{ + loff_t rc; + + if (*offp < 0 || *offp > MAXOFFSET_T) + return (EINVAL); + + rc = vfs_llseek(fp, *offp, whence); + if (rc < 0) + return (-rc); + + *offp = rc; + + return (0); +} + +/* + * Get file attributes + * + * filp - file pointer + * zfattr - pointer to file attr structure + * + * Currently only used for fetching size and file mode. + * + * Returns 0 on success or error code of underlying getattr call on failure. + */ +int +zfs_file_getattr(zfs_file_t *filp, zfs_file_attr_t *zfattr) +{ + struct kstat stat; + int rc; + +#if defined(HAVE_4ARGS_VFS_GETATTR) + rc = vfs_getattr(&filp->f_path, &stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT); +#elif defined(HAVE_2ARGS_VFS_GETATTR) + rc = vfs_getattr(&filp->f_path, &stat); +#else + rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, &stat); +#endif + if (rc) + return (-rc); + + zfattr->zfa_size = stat.size; + zfattr->zfa_mode = stat.mode; + + return (0); +} + +/* + * Sync file to disk + * + * filp - file pointer + * flags - O_SYNC and or O_DSYNC + * + * Returns 0 on success or error code of underlying sync call on failure. + */ +int +zfs_file_fsync(zfs_file_t *filp, int flags) +{ + int datasync = 0; + int error; + int fstrans; + + if (flags & O_DSYNC) + datasync = 1; + + /* + * May enter XFS which generates a warning when PF_FSTRANS is set. + * To avoid this the flag is cleared over vfs_sync() and then reset. + */ + fstrans = __spl_pf_fstrans_check(); + if (fstrans) + current->flags &= ~(__SPL_PF_FSTRANS); + + error = -vfs_fsync(filp, datasync); + + if (fstrans) + current->flags |= __SPL_PF_FSTRANS; + + return (error); +} + +/* + * fallocate - allocate or free space on disk + * + * fp - file pointer + * mode (non-standard options for hole punching etc) + * offset - offset to start allocating or freeing from + * len - length to free / allocate + * + * OPTIONAL + */ +int +zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len) +{ + /* + * May enter XFS which generates a warning when PF_FSTRANS is set. + * To avoid this the flag is cleared over vfs_sync() and then reset. + */ + int fstrans = __spl_pf_fstrans_check(); + if (fstrans) + current->flags &= ~(__SPL_PF_FSTRANS); + + /* + * When supported by the underlying file system preferentially + * use the fallocate() callback to preallocate the space. + */ + int error = EOPNOTSUPP; + if (fp->f_op->fallocate) + error = fp->f_op->fallocate(fp, mode, offset, len); + + if (fstrans) + current->flags |= __SPL_PF_FSTRANS; + + return (error); +} + +/* + * Request current file pointer offset + * + * fp - pointer to file + * + * Returns current file offset. + */ +loff_t +zfs_file_off(zfs_file_t *fp) +{ + return (fp->f_pos); +} + +/* + * unlink file + * + * path - fully qualified file path + * + * Returns 0 on success. + * + * OPTIONAL + */ +int +zfs_file_unlink(const char *path) +{ + return (EOPNOTSUPP); +} + +/* + * Get reference to file pointer + * + * fd - input file descriptor + * fpp - pointer to file pointer + * + * Returns 0 on success EBADF on failure. + */ +int +zfs_file_get(int fd, zfs_file_t **fpp) +{ + zfs_file_t *fp; + + fp = fget(fd); + if (fp == NULL) + return (EBADF); + + *fpp = fp; + + return (0); +} + +/* + * Drop reference to file pointer + * + * fd - input file descriptor + */ +void +zfs_file_put(int fd) +{ + struct file *fp; + + if ((fp = fget(fd)) != NULL) { + fput(fp); + fput(fp); + } +} diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c index 4e69eecf3414..543748c14b04 100644 --- a/module/os/linux/zfs/zfs_ioctl_os.c +++ b/module/os/linux/zfs/zfs_ioctl_os.c @@ -178,15 +178,15 @@ int zfsdev_getminor(int fd, minor_t *minorp) { zfsdev_state_t *zs, *fpd; - file_t *fp; + struct file *fp; + int rc; ASSERT(!MUTEX_HELD(&zfsdev_state_lock)); - fp = getf(fd); - if (fp == NULL) - return (SET_ERROR(EBADF)); + if ((rc = zfs_file_get(fd, &fp))) + return (rc); - fpd = fp->f_file->private_data; + fpd = fp->private_data; if (fpd == NULL) return (SET_ERROR(EBADF)); diff --git a/module/os/linux/zfs/zfs_onexit_os.c b/module/os/linux/zfs/zfs_onexit_os.c index 95dbe8dbec5e..879ea28ec607 100644 --- a/module/os/linux/zfs/zfs_onexit_os.c +++ b/module/os/linux/zfs/zfs_onexit_os.c @@ -60,5 +60,5 @@ zfs_onexit_fd_hold(int fd, minor_t *minorp) void zfs_onexit_fd_rele(int fd) { - releasef(fd); + zfs_file_put(fd); } diff --git a/module/os/linux/zfs/zfs_vnops.c b/module/os/linux/zfs/zfs_vnops.c index edbb2fc6410b..e7d0e8933f2f 100644 --- a/module/os/linux/zfs/zfs_vnops.c +++ b/module/os/linux/zfs/zfs_vnops.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -423,7 +422,7 @@ unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT; * IN: ip - inode of file to be read from. * uio - structure supplying read location, range info, * and return buffer. - * ioflag - FSYNC flags; used to provide FRSYNC semantics. + * ioflag - O_SYNC flags; used to provide FRSYNC semantics. * O_DIRECT flag; used to bypass page cache. * cr - credentials of caller. * @@ -473,7 +472,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * Only do this for non-snapshots. * * Some platforms do not support FRSYNC and instead map it - * to FSYNC, which results in unnecessary calls to zil_commit. We + * to O_SYNC, which results in unnecessary calls to zil_commit. We * only honor FRSYNC requests on platforms which support it. */ frsync = !!(ioflag & FRSYNC); @@ -570,7 +569,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * IN: ip - inode of file to be written to. * uio - structure supplying write location, range info, * and data buffer. - * ioflag - FAPPEND flag set if in append mode. + * ioflag - O_APPEND flag set if in append mode. * O_DIRECT flag; used to bypass page cache. * cr - credentials of caller. * @@ -629,7 +628,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * If immutable or not appending then return EPERM */ if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || - ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && + ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & O_APPEND) && (uio->uio_loffset < zp->z_size))) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EPERM)); @@ -638,7 +637,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) /* * Validate file offset */ - offset_t woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; + offset_t woff = ioflag & O_APPEND ? zp->z_size : uio->uio_loffset; if (woff < 0) { ZFS_EXIT(zfsvfs); return (SET_ERROR(EINVAL)); @@ -667,7 +666,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * If in append mode, set the io offset pointer to eof. */ zfs_locked_range_t *lr; - if (ioflag & FAPPEND) { + if (ioflag & O_APPEND) { /* * Obtain an appending range lock to guarantee file append * semantics. We reset the write offset once we have the lock. @@ -961,7 +960,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) return (error); } - if (ioflag & (FSYNC | FDSYNC) || + if (ioflag & (O_SYNC | O_DSYNC) || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, zp->z_id); @@ -1486,7 +1485,7 @@ zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl, zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); } else { - int aflags = (flag & FAPPEND) ? V_APPEND : 0; + int aflags = (flag & O_APPEND) ? V_APPEND : 0; if (have_acl) zfs_acl_ids_free(&acl_ids); @@ -2486,7 +2485,6 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr) */ mutex_enter(&zp->z_lock); - vap->va_type = vn_mode_to_vtype(zp->z_mode); vap->va_mode = zp->z_mode; vap->va_fsid = ZTOI(zp)->i_sb->s_dev; vap->va_nodeid = zp->z_id; @@ -2497,7 +2495,6 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr) vap->va_nlink = MIN(links, ZFS_LINK_MAX); vap->va_size = i_size_read(ip); vap->va_rdev = ip->i_rdev; - vap->va_seq = ip->i_generation; /* * Add in any requested optional attributes and the create time. diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c index c623d61f7066..53ba1f63ed64 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index b098703c2c94..54e80e50eedd 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -190,19 +190,19 @@ zfs_io_flags(struct kiocb *kiocb) #if defined(IOCB_DSYNC) if (kiocb->ki_flags & IOCB_DSYNC) - flags |= FDSYNC; + flags |= O_DSYNC; #endif #if defined(IOCB_SYNC) if (kiocb->ki_flags & IOCB_SYNC) - flags |= FSYNC; + flags |= O_SYNC; #endif #if defined(IOCB_APPEND) if (kiocb->ki_flags & IOCB_APPEND) - flags |= FAPPEND; + flags |= O_APPEND; #endif #if defined(IOCB_DIRECT) if (kiocb->ki_flags & IOCB_DIRECT) - flags |= FDIRECT; + flags |= O_DIRECT; #endif return (flags); } @@ -728,16 +728,14 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc) static long zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) { - int error = -EOPNOTSUPP; - -#if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) cred_t *cr = CRED(); flock64_t bf; loff_t olen; fstrans_cookie_t cookie; + int error; if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) - return (error); + return (-EOPNOTSUPP); if (offset < 0 || len <= 0) return (-EINVAL); @@ -759,14 +757,12 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) crhold(cr); cookie = spl_fstrans_mark(); - error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr); + error = -zfs_space(ip, F_FREESP, &bf, O_RDWR, offset, cr); spl_fstrans_unmark(cookie); spl_inode_unlock(ip); crfree(cr); -#endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */ - ASSERT3S(error, <=, 0); return (error); } diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 6ed2a220d9c7..87df07cc065e 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -8666,7 +8666,7 @@ l2arc_fini(void) void l2arc_start(void) { - if (!(spa_mode_global & FWRITE)) + if (!(spa_mode_global & SPA_MODE_WRITE)) return; (void) thread_create(NULL, 0, l2arc_feed_thread, NULL, 0, &p0, @@ -8676,7 +8676,7 @@ l2arc_start(void) void l2arc_stop(void) { - if (!(spa_mode_global & FWRITE)) + if (!(spa_mode_global & SPA_MODE_WRITE)) return; mutex_enter(&l2arc_feed_thr_lock); diff --git a/module/zfs/dmu_diff.c b/module/zfs/dmu_diff.c index c40ed57f243d..e08cf21abf00 100644 --- a/module/zfs/dmu_diff.c +++ b/module/zfs/dmu_diff.c @@ -40,33 +40,36 @@ #include #include #include +#include -struct diffarg { - struct vnode *da_vp; /* file to which we are reporting */ + +typedef struct dmu_diffarg { + zfs_file_t *da_fp; /* file to which we are reporting */ offset_t *da_offp; int da_err; /* error that stopped diff search */ dmu_diff_record_t da_ddr; -}; +} dmu_diffarg_t; -static int -write_record(struct diffarg *da) +int +write_record(dmu_diffarg_t *da) { - ssize_t resid; /* have to get resid to get detailed errno */ + zfs_file_t *fp; + ssize_t resid; if (da->da_ddr.ddr_type == DDR_NONE) { da->da_err = 0; return (0); } - da->da_err = vn_rdwr(UIO_WRITE, da->da_vp, (caddr_t)&da->da_ddr, - sizeof (da->da_ddr), 0, UIO_SYSSPACE, FAPPEND, - RLIM64_INFINITY, CRED(), &resid); + fp = da->da_fp; + da->da_err = zfs_file_write(fp, (caddr_t)&da->da_ddr, + sizeof (da->da_ddr), &resid); *da->da_offp += sizeof (da->da_ddr); return (da->da_err); } static int -report_free_dnode_range(struct diffarg *da, uint64_t first, uint64_t last) +report_free_dnode_range(dmu_diffarg_t *da, uint64_t first, uint64_t last) { ASSERT(first <= last); if (da->da_ddr.ddr_type != DDR_FREE || @@ -83,7 +86,7 @@ report_free_dnode_range(struct diffarg *da, uint64_t first, uint64_t last) } static int -report_dnode(struct diffarg *da, uint64_t object, dnode_phys_t *dnp) +report_dnode(dmu_diffarg_t *da, uint64_t object, dnode_phys_t *dnp) { ASSERT(dnp != NULL); if (dnp->dn_type == DMU_OT_NONE) @@ -110,7 +113,7 @@ static int diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { - struct diffarg *da = arg; + dmu_diffarg_t *da = arg; int err = 0; if (issig(JUSTLOOKING) && issig(FORREAL)) @@ -162,9 +165,9 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, int dmu_diff(const char *tosnap_name, const char *fromsnap_name, - struct vnode *vp, offset_t *offp) + zfs_file_t *fp, offset_t *offp) { - struct diffarg da; + dmu_diffarg_t da; dsl_dataset_t *fromsnap; dsl_dataset_t *tosnap; dsl_pool_t *dp; @@ -205,7 +208,7 @@ dmu_diff(const char *tosnap_name, const char *fromsnap_name, dsl_dataset_long_hold(tosnap, FTAG); dsl_pool_rele(dp, FTAG); - da.da_vp = vp; + da.da_fp = fp; da.da_offp = offp; da.da_ddr.ddr_type = DDR_NONE; da.da_ddr.ddr_first = da.da_ddr.ddr_last = 0; diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index 48c3705c65a6..f68419bfae6d 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -61,6 +61,7 @@ #ifdef _KERNEL #include #endif +#include int zfs_recv_queue_length = SPA_MAXBLOCKSIZE; int zfs_recv_queue_ff = 20; @@ -1103,8 +1104,8 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) int dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, boolean_t force, boolean_t resumable, nvlist_t *localprops, - nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc, vnode_t *vp, - offset_t *voffp) + nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc, + zfs_file_t *fp, offset_t *voffp) { dmu_recv_begin_arg_t drba = { 0 }; int err; @@ -1131,7 +1132,7 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, return (SET_ERROR(EINVAL)); } - drc->drc_vp = vp; + drc->drc_fp = fp; drc->drc_voff = *voffp; drc->drc_featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); @@ -1248,12 +1249,11 @@ receive_read(dmu_recv_cookie_t *drc, int len, void *buf) while (done < len) { ssize_t resid; + zfs_file_t *fp; - drc->drc_err = vn_rdwr(UIO_READ, drc->drc_vp, - (char *)buf + done, len - done, - drc->drc_voff, UIO_SYSSPACE, FAPPEND, - RLIM64_INFINITY, CRED(), &resid); - + fp = drc->drc_fp; + drc->drc_err = zfs_file_read(fp, (char *)buf + done, + len - done, &resid); if (resid == len - done) { /* * Note: ECKSUM indicates that the receive diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 162a3613c282..96a402ffa220 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -921,7 +921,7 @@ spa_keystore_unload_wkey(const char *dsname) * Wait for any outstanding txg IO to complete, releasing any * remaining references on the wkey. */ - if (spa_mode(spa) != FREAD) + if (spa_mode(spa) != SPA_MODE_READ) txg_wait_synced(spa->spa_dsl_pool, 0); spa_close(spa, FTAG); diff --git a/module/zfs/fm.c b/module/zfs/fm.c index 722855492d0b..e3c3a1700f62 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -67,7 +67,6 @@ #include #include #include -#include #include #include @@ -597,7 +596,7 @@ zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze) void zfs_zevent_fd_rele(int fd) { - releasef(fd); + zfs_file_put(fd); } /* diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 9bfd24d98e68..0eb4910af843 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -319,7 +319,7 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, metaslab_class_expandable_space(mc), src); spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, - (spa_mode(spa) == FREAD), src); + (spa_mode(spa) == SPA_MODE_READ), src); cap = (size == 0) ? 0 : (alloc * 100 / size); spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); @@ -1196,7 +1196,7 @@ spa_thread(void *arg) * Activate an uninitialized pool. */ static void -spa_activate(spa_t *spa, int mode) +spa_activate(spa_t *spa, spa_mode_t mode) { ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); @@ -3362,7 +3362,7 @@ spa_ld_open_vdevs(spa_t *spa) if (spa->spa_missing_tvds != 0) { spa_load_note(spa, "vdev tree has %lld missing top-level " "vdevs.", (u_longlong_t)spa->spa_missing_tvds); - if (spa->spa_trust_config && (spa->spa_mode & FWRITE)) { + if (spa->spa_trust_config && (spa->spa_mode & SPA_MODE_WRITE)) { /* * Although theoretically we could allow users to open * incomplete pools in RW mode, we'd need to add a lot @@ -4358,7 +4358,7 @@ spa_ld_check_for_config_update(spa_t *spa, uint64_t config_cache_txg, static void spa_ld_prepare_for_reload(spa_t *spa) { - int mode = spa->spa_mode; + spa_mode_t mode = spa->spa_mode; int async_suspended = spa->spa_async_suspended; spa_unload(spa); @@ -4868,7 +4868,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport) static int spa_load_retry(spa_t *spa, spa_load_state_t state) { - int mode = spa->spa_mode; + spa_mode_t mode = spa->spa_mode; spa_unload(spa); spa_deactivate(spa); @@ -5915,7 +5915,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) char *altroot = NULL; spa_load_state_t state = SPA_LOAD_IMPORT; zpool_load_policy_t policy; - uint64_t mode = spa_mode_global; + spa_mode_t mode = spa_mode_global; uint64_t readonly = B_FALSE; int error; nvlist_t *nvroot; @@ -5939,7 +5939,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) (void) nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); if (readonly) - mode = FREAD; + mode = SPA_MODE_READ; spa = spa_add(pool, config, altroot); spa->spa_import_flags = flags; @@ -6109,7 +6109,7 @@ spa_tryimport(nvlist_t *tryconfig) */ mutex_enter(&spa_namespace_lock); spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); - spa_activate(spa, FREAD); + spa_activate(spa, SPA_MODE_READ); /* * Rewind pool if a max txg was provided. @@ -6219,7 +6219,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, if (oldconfig) *oldconfig = NULL; - if (!(spa_mode_global & FWRITE)) + if (!(spa_mode_global & SPA_MODE_WRITE)) return (SET_ERROR(EROFS)); mutex_enter(&spa_namespace_lock); @@ -8073,8 +8073,7 @@ spa_async_dispatch(spa_t *spa) mutex_enter(&spa->spa_async_lock); if (spa_async_tasks_pending(spa) && !spa->spa_async_suspended && - spa->spa_async_thread == NULL && - rootdir != NULL) + spa->spa_async_thread == NULL) spa->spa_async_thread = thread_create(NULL, 0, spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); mutex_exit(&spa->spa_async_lock); diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index de1ad21da6de..3a65b0bb4321 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -37,8 +37,8 @@ #include #include #include +#include #ifdef _KERNEL -#include #include #endif @@ -80,8 +80,10 @@ spa_config_load(void) nvlist_t *nvlist, *child; nvpair_t *nvpair; char *pathname; - struct _buf *file; + zfs_file_t *fp; + zfs_file_attr_t zfa; uint64_t fsize; + int err; #ifdef _KERNEL if (zfs_autoimport_disable) @@ -95,22 +97,23 @@ spa_config_load(void) (void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path); - file = kobj_open_file(pathname); + err = zfs_file_open(pathname, O_RDONLY, 0, &fp); kmem_free(pathname, MAXPATHLEN); - if (file == (struct _buf *)-1) + if (err) return; - if (kobj_get_filesize(file, &fsize) != 0) + if (zfs_file_getattr(fp, &zfa)) goto out; + fsize = zfa.zfa_size; buf = kmem_alloc(fsize, KM_SLEEP); /* * Read the nvlist from the file. */ - if (kobj_read_file(file, buf, fsize, 0) < 0) + if (zfs_file_read(fp, buf, fsize, NULL) < 0) goto out; /* @@ -143,27 +146,32 @@ spa_config_load(void) if (buf != NULL) kmem_free(buf, fsize); - kobj_close_file(file); + zfs_file_close(fp); } static int spa_config_remove(spa_config_dirent_t *dp) { -#if defined(__linux__) && defined(_KERNEL) - int error, flags = FWRITE | FTRUNC; - uio_seg_t seg = UIO_SYSSPACE; - vnode_t *vp; - - error = vn_open(dp->scd_path, seg, flags, 0644, &vp, 0, 0); - if (error == 0) { - (void) VOP_FSYNC(vp, FSYNC, kcred, NULL); - (void) VOP_CLOSE(vp, 0, 1, 0, kcred, NULL); + int error = 0; + + /* + * Remove the cache file. If zfs_file_unlink() in not supported by the + * platform fallback to truncating the file which is functionally + * equivalent. + */ + error = zfs_file_unlink(dp->scd_path); + if (error == EOPNOTSUPP) { + int flags = O_RDWR | O_TRUNC; + zfs_file_t *fp; + + error = zfs_file_open(dp->scd_path, flags, 0644, &fp); + if (error == 0) { + (void) zfs_file_fsync(fp, O_SYNC); + (void) zfs_file_close(fp); + } } return (error); -#else - return (vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE)); -#endif } static int @@ -171,10 +179,10 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) { size_t buflen; char *buf; - vnode_t *vp; - int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX; + int oflags = O_RDWR | O_TRUNC | O_CREAT | O_LARGEFILE; char *temp; int err; + zfs_file_t *fp; /* * If the nvlist is empty (NULL), then remove the old cachefile. @@ -193,46 +201,22 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) buf = fnvlist_pack(nvl, &buflen); temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP); -#if defined(__linux__) && defined(_KERNEL) /* * Write the configuration to disk. Due to the complexity involved * in performing a rename and remove from within the kernel the file * is instead truncated and overwritten in place. This way we always * have a consistent view of the data or a zero length file. */ - err = vn_open(dp->scd_path, UIO_SYSSPACE, oflags, 0644, &vp, 0, 0); + err = zfs_file_open(dp->scd_path, oflags, 0644, &fp); if (err == 0) { - err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, - UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, NULL); + err = zfs_file_write(fp, buf, buflen, NULL); if (err == 0) - err = VOP_FSYNC(vp, FSYNC, kcred, NULL); + err = zfs_file_fsync(fp, O_SYNC); - (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); + zfs_file_close(fp); if (err) (void) spa_config_remove(dp); } -#else - /* - * Write the configuration to disk. We need to do the traditional - * 'write to temporary file, sync, move over original' to make sure we - * always have a consistent view of the data. - */ - (void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path); - - err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0); - if (err == 0) { - err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, - 0, RLIM64_INFINITY, kcred, NULL); - if (err == 0) - err = VOP_FSYNC(vp, FSYNC, kcred, NULL); - if (err == 0) - err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE); - (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); - } - - (void) vn_remove(temp, UIO_SYSSPACE, RMFILE); -#endif - fnvlist_pack_free(buf, buflen); kmem_free(temp, MAXPATHLEN); return (err); @@ -258,7 +242,7 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent) ASSERT(MUTEX_HELD(&spa_namespace_lock)); - if (rootdir == NULL || !(spa_mode_global & FWRITE)) + if (!(spa_mode_global & SPA_MODE_WRITE)) return; /* diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 5dd11f6a3ba8..d49c92e20986 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -242,7 +242,7 @@ static kmutex_t spa_l2cache_lock; static avl_tree_t spa_l2cache_avl; kmem_cache_t *spa_buffer_pool; -int spa_mode_global; +spa_mode_t spa_mode_global = SPA_MODE_UNINIT; #ifdef ZFS_DEBUG /* @@ -2282,7 +2282,7 @@ spa_boot_init(void) } void -spa_init(int mode) +spa_init(spa_mode_t mode) { mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL); @@ -2301,7 +2301,7 @@ spa_init(int mode) spa_mode_global = mode; #ifndef _KERNEL - if (spa_mode_global != FREAD && dprintf_find_string("watch")) { + if (spa_mode_global != SPA_MODE_READ && dprintf_find_string("watch")) { struct sigaction sa; sa.sa_flags = SA_SIGINFO; @@ -2406,7 +2406,7 @@ spa_is_root(spa_t *spa) boolean_t spa_writeable(spa_t *spa) { - return (!!(spa->spa_mode & FWRITE) && spa->spa_trust_config); + return (!!(spa->spa_mode & SPA_MODE_WRITE) && spa->spa_trust_config); } /* @@ -2420,7 +2420,7 @@ spa_has_pending_synctask(spa_t *spa) !txg_all_lists_empty(&spa->spa_dsl_pool->dp_early_sync_tasks)); } -int +spa_mode_t spa_mode(spa_t *spa) { return (spa->spa_mode); @@ -2670,7 +2670,7 @@ boolean_t spa_importing_readonly_checkpoint(spa_t *spa) { return ((spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT) && - spa->spa_mode == FREAD); + spa->spa_mode == SPA_MODE_READ); } uint64_t @@ -2724,7 +2724,7 @@ param_set_deadman_failmode(const char *val, zfs_kernel_param_t *kp) strcmp(val, "panic")) return (SET_ERROR(-EINVAL)); - if (spa_mode_global != 0) { + if (spa_mode_global != SPA_MODE_UNINIT) { mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) spa_set_deadman_failmode(spa, val); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 6bb3c3c68072..aeee6499bfaa 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -932,7 +932,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason, */ if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && (spa = spa_by_guid(pool_guid, device_guid)) != NULL && - spa_mode(spa) == FREAD) + spa_mode(spa) == SPA_MODE_READ) state = POOL_STATE_ACTIVE; /* diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 25863c8e434c..902c0b2b2a48 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -192,6 +192,7 @@ #include #include #include +#include #include #include @@ -4708,26 +4709,26 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops, dmu_recv_cookie_t drc; int error = 0; int props_error = 0; - offset_t off; + offset_t off, noff; nvlist_t *local_delayprops = NULL; nvlist_t *recv_delayprops = NULL; nvlist_t *origprops = NULL; /* existing properties */ nvlist_t *origrecvd = NULL; /* existing received properties */ boolean_t first_recvd_props = B_FALSE; boolean_t tofs_was_redacted; - file_t *input_fp; + zfs_file_t *input_fp; *read_bytes = 0; *errflags = 0; *errors = fnvlist_alloc(); + off = 0; - input_fp = getf(input_fd); - if (input_fp == NULL) - return (SET_ERROR(EBADF)); + if ((error = zfs_file_get(input_fd, &input_fp))) + return (error); - off = input_fp->f_offset; + noff = off = zfs_file_off(input_fp); error = dmu_recv_begin(tofs, tosnap, begin_record, force, - resumable, localprops, hidden_args, origin, &drc, input_fp->f_vnode, + resumable, localprops, hidden_args, origin, &drc, input_fp, &off); if (error != 0) goto out; @@ -4901,10 +4902,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops, ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0); nvlist_free(local_delayprops); } - - *read_bytes = off - input_fp->f_offset; - if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0) - input_fp->f_offset = off; + *read_bytes = off - noff; #ifdef DEBUG if (zfs_ioc_recv_inject_err) { @@ -5006,7 +5004,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops, nvlist_free(inheritprops); } out: - releasef(input_fd); + zfs_file_put(input_fd); nvlist_free(origrecvd); nvlist_free(origprops); @@ -5221,8 +5219,8 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) } typedef struct dump_bytes_io { - vnode_t *dbi_vp; - void *dbi_buf; + zfs_file_t *dbi_fp; + caddr_t dbi_buf; int dbi_len; int dbi_err; } dump_bytes_io_t; @@ -5231,11 +5229,13 @@ static void dump_bytes_cb(void *arg) { dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg; - ssize_t resid; /* have to get resid to get detailed errno */ + zfs_file_t *fp; + caddr_t buf; + + fp = dbi->dbi_fp; + buf = dbi->dbi_buf; - dbi->dbi_err = vn_rdwr(UIO_WRITE, dbi->dbi_vp, - (caddr_t)dbi->dbi_buf, dbi->dbi_len, - 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); + dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL); } static int @@ -5243,7 +5243,7 @@ dump_bytes(objset_t *os, void *buf, int len, void *arg) { dump_bytes_io_t dbi; - dbi.dbi_vp = arg; + dbi.dbi_fp = arg; dbi.dbi_buf = buf; dbi.dbi_len = len; @@ -5346,22 +5346,21 @@ zfs_ioc_send(zfs_cmd_t *zc) dsl_dataset_rele(tosnap, FTAG); dsl_pool_rele(dp, FTAG); } else { - file_t *fp = getf(zc->zc_cookie); - if (fp == NULL) - return (SET_ERROR(EBADF)); - - off = fp->f_offset; + zfs_file_t *fp; dmu_send_outparams_t out = {0}; + + if ((error = zfs_file_get(zc->zc_cookie, &fp))) + return (error); + + off = zfs_file_off(fp); out.dso_outfunc = dump_bytes; - out.dso_arg = fp->f_vnode; + out.dso_arg = fp; out.dso_dryrun = B_FALSE; error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, zc->zc_fromobj, embedok, large_block_ok, compressok, rawok, zc->zc_cookie, &off, &out); - if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) - fp->f_offset = off; - releasef(zc->zc_cookie); + zfs_file_put(zc->zc_cookie); } return (error); } @@ -5924,21 +5923,17 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) static int zfs_ioc_diff(zfs_cmd_t *zc) { - file_t *fp; + zfs_file_t *fp; offset_t off; int error; - fp = getf(zc->zc_cookie); - if (fp == NULL) - return (SET_ERROR(EBADF)); - - off = fp->f_offset; + if ((error = zfs_file_get(zc->zc_cookie, &fp))) + return (error); - error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off); + off = zfs_file_off(fp); + error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off); - if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) - fp->f_offset = off; - releasef(zc->zc_cookie); + zfs_file_put(zc->zc_cookie); return (error); } @@ -6278,7 +6273,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) offset_t off; char *fromname = NULL; int fd; - file_t *fp; + zfs_file_t *fp; boolean_t largeblockok; boolean_t embedok; boolean_t compressok; @@ -6301,21 +6296,19 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) (void) nvlist_lookup_string(innvl, "redactbook", &redactbook); - if ((fp = getf(fd)) == NULL) - return (SET_ERROR(EBADF)); + if ((error = zfs_file_get(fd, &fp))) + return (error); + + off = zfs_file_off(fp); - off = fp->f_offset; dmu_send_outparams_t out = {0}; out.dso_outfunc = dump_bytes; - out.dso_arg = fp->f_vnode; + out.dso_arg = fp; out.dso_dryrun = B_FALSE; error = dmu_send(snapname, fromname, embedok, largeblockok, compressok, rawok, resumeobj, resumeoff, redactbook, fd, &off, &out); - if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) - fp->f_offset = off; - - releasef(fd); + zfs_file_put(fd); return (error); } @@ -7438,7 +7431,7 @@ zfs_kmod_init(void) if ((error = zvol_init()) != 0) return (error); - spa_init(FREAD | FWRITE); + spa_init(SPA_MODE_READ | SPA_MODE_WRITE); zfs_init(); zfs_ioctl_init(); diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 8b7c594f8882..9cf55c29b9f1 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -528,7 +527,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, else if (!spa_has_slogs(zilog->zl_spa) && resid >= zfs_immediate_write_sz) write_state = WR_INDIRECT; - else if (ioflag & (FSYNC | FDSYNC)) + else if (ioflag & (O_SYNC | O_DSYNC)) write_state = WR_COPIED; else write_state = WR_NEED_COPY; @@ -578,7 +577,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx->itx_private = ZTOZSB(zp); - if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) && + if (!(ioflag & (O_SYNC | O_DSYNC)) && (zp->z_sync_cnt == 0) && (fsync_cnt == 0)) itx->itx_sync = B_FALSE; diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index 7dea85bb6614..59b3f3af163a 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -61,7 +60,6 @@ zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode, { bzero(vap, sizeof (*vap)); vap->va_mask = (uint_t)mask; - vap->va_type = IFTOVT(mode); vap->va_mode = mode; vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid; vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid; @@ -796,7 +794,7 @@ zfs_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) fl.l_start = lr->lr_offset; fl.l_len = lr->lr_length; - error = zfs_space(ZTOI(zp), F_FREESP, &fl, FWRITE | FOFFMAX, + error = zfs_space(ZTOI(zp), F_FREESP, &fl, O_RDWR | O_LARGEFILE, lr->lr_offset, kcred); iput(ZTOI(zp));