From 8840d44301c5ba315479ad8eee29cbe804a94160 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 28 Mar 2023 08:19:03 -0700 Subject: [PATCH] Additional limits on hole reporting Holding the zp->z_rangelock as a RL_READER over the range 0-UINT64_MAX is sufficient to prevent the dnode from being re-dirtied by concurrent writers. To avoid potentially looping multiple times for external caller which do not take the rangelock holes are not reported after the first sync. While not optimal this is always functionally correct. This change adds the missing rangelock calls on FreeBSD to zvol_cdev_ioctl(). Reviewed-by: Brian Atkinson Signed-off-by: Brian Behlendorf Closes #14512 Closes #14641 --- module/os/freebsd/zfs/zvol_os.c | 3 +++ module/zfs/dmu.c | 32 ++++++++++++++++++-------------- module/zfs/zfs_vnops.c | 2 +- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/module/os/freebsd/zfs/zvol_os.c b/module/os/freebsd/zfs/zvol_os.c index 65981d7e13cb..eccf5e4d2622 100644 --- a/module/os/freebsd/zfs/zvol_os.c +++ b/module/os/freebsd/zfs/zvol_os.c @@ -1212,7 +1212,10 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, hole = (cmd == FIOSEEKHOLE); noff = *off; + lr = zfs_rangelock_enter(&zv->zv_rangelock, 0, UINT64_MAX, + RL_READER); error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff); + zfs_rangelock_exit(lr); *off = noff; break; } diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index b6c75280eee8..9a6d98c8e347 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2123,18 +2123,18 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) } /* - * This function is only called from zfs_holey_common() for zpl_llseek() - * in order to determine the location of holes. In order to accurately - * report holes all dirty data must be synced to disk. This causes extremely - * poor performance when seeking for holes in a dirty file. As a compromise, - * only provide hole data when the dnode is clean. When a dnode is dirty - * report the dnode as having no holes which is always a safe thing to do. + * Reports the location of data and holes in an object. In order to + * accurately report holes all dirty data must be synced to disk. This + * causes extremely poor performance when seeking for holes in a dirty file. + * As a compromise, only provide hole data when the dnode is clean. When + * a dnode is dirty report the dnode as having no holes by returning EBUSY + * which is always safe to do. */ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) { dnode_t *dn; - int err; + int restarted = 0, err; restart: err = dnode_hold(os, object, FTAG, &dn); @@ -2146,19 +2146,23 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) if (dnode_is_dirty(dn)) { /* * If the zfs_dmu_offset_next_sync module option is enabled - * then strict hole reporting has been requested. Dirty - * dnodes must be synced to disk to accurately report all - * holes. When disabled dirty dnodes are reported to not - * have any holes which is always safe. + * then hole reporting has been requested. Dirty dnodes + * must be synced to disk to accurately report holes. * - * When called by zfs_holey_common() the zp->z_rangelock - * is held to prevent zfs_write() and mmap writeback from - * re-dirtying the dnode after txg_wait_synced(). + * Provided a RL_READER rangelock spanning 0-UINT64_MAX is + * held by the caller only a single restart will be required. + * We tolerate callers which do not hold the rangelock by + * returning EBUSY and not reporting holes after one restart. */ if (zfs_dmu_offset_next_sync) { rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); + + if (restarted) + return (SET_ERROR(EBUSY)); + txg_wait_synced(dmu_objset_pool(os), 0); + restarted = 1; goto restart; } diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index db80be783899..91b594e41cda 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -111,7 +111,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) if (zn_has_cached_data(zp, 0, file_sz - 1)) zn_flush_cached_data(zp, B_FALSE); - lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER); + lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER); error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); zfs_rangelock_exit(lr);