diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 5885d50cc489..35a29ed095dc 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -46,6 +46,7 @@ typedef enum vdev_dtl_type { extern int zfs_nocacheflush; extern int zfs_notrim; +extern int zfs_trim_zero; extern int vdev_open(vdev_t *); extern void vdev_open_children(vdev_t *); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 6b9aa733bcd8..3c399ae030df 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -64,6 +64,15 @@ static vdev_ops_t *vdev_ops_table[] = { /* maximum scrub/resilver I/O queue per leaf vdev */ int zfs_scrub_limit = 10; +/* + * Make sure TRIM zeroes data. + * + * On disk vdevs, don't use DISCARD and write zero pages instead. + * + * On file vdevs, if hole punching fails, then write zeroes instead. + */ +int zfs_trim_zero = 0; + /* * Given a vdev type, return the appropriate ops vector. */ @@ -3200,4 +3209,7 @@ EXPORT_SYMBOL(vdev_clear); module_param(zfs_scrub_limit, int, 0644); MODULE_PARM_DESC(zfs_scrub_limit, "Max scrub/resilver I/O per leaf vdev"); + +module_param(zfs_trim_zero, int, 0644); +MODULE_PARM_DESC(zfs_trim_zero, "Make sure TRIM zeroes data (only for debugging)"); #endif diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 6edabcd6c45d..dbab8992f613 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -494,15 +494,22 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, if (flags & REQ_DISCARD) { ASSERT(!kbuf_ptr); - if (!blk_queue_discard(q) || - !q->limits.max_discard_sectors) - return EOPNOTSUPP; - - max_discard_size = MIN(q->limits.max_discard_sectors << 9, - INT_MAX); - if (q->limits.discard_granularity) - max_discard_size &= ~(q->limits.discard_granularity - 1); - max_discard_size &= ~511; + if (zfs_trim_zero) + max_discard_size = PAGE_SIZE; + else { + if (!blk_queue_discard(q) || + !q->limits.max_discard_sectors) + return EOPNOTSUPP; + + max_discard_size = MIN( + q->limits.max_discard_sectors << 9, + INT_MAX); + if (q->limits.discard_granularity) + max_discard_size &= + ~(q->limits.discard_granularity + - 1); + max_discard_size &= ~511; + } } ASSERT3U(kbuf_offset + kbuf_size, <=, bdev->bd_inode->i_size); @@ -563,8 +570,14 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, dr->dr_bio[i]->bi_private = dr; if (flags & REQ_DISCARD) { - dr->dr_bio[i]->bi_size = MIN(bio_size, - max_discard_size); + if (zfs_trim_zero) { + dr->dr_bio[i]->bi_rw &= ~REQ_DISCARD; + bio_add_page(dr->dr_bio[i], + ZERO_PAGE(0), MIN(bio_size, + max_discard_size), 0); + } else + dr->dr_bio[i]->bi_size = MIN(bio_size, + max_discard_size); bio_size -= dr->dr_bio[i]->bi_size; } else { /* diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index 9ff6b6e80c1f..57ca54535b4e 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -30,6 +30,10 @@ #include #include +#ifndef _KERNEL +static char empty_zero_page[4096]; +#endif + /* * Virtual device vector for files. */ @@ -137,13 +141,55 @@ vdev_file_close(vdev_t *vd) vd->vdev_tsd = NULL; } +static void vdev_file_trim(zio_t *zio) +{ + struct flock fl; + uint64_t len; + ssize_t resid = 0; + vdev_t *vd = zio->io_vd; + vdev_file_t *vf = vd->vdev_tsd; + + if (vd->vdev_notrim) { + zio->io_error = EOPNOTSUPP; + return; + } + + bzero(&fl, sizeof(fl)); + fl.l_type = F_WRLCK; + fl.l_whence = 0; + fl.l_start = zio->io_offset; + fl.l_len = zio->io_size; + zio->io_error = VOP_SPACE(vf->vf_vnode, F_FREESP, &fl, + FWRITE | FOFFMAX, zio->io_offset, kcred, NULL); + + if (zfs_trim_zero && zio->io_error) { + while (fl.l_len > 0) { + len = MIN(fl.l_len, sizeof(empty_zero_page)); + + zio->io_error = vn_rdwr(UIO_WRITE, vf->vf_vnode, + empty_zero_page, len, fl.l_start, + UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, + &resid); + if (resid != 0 && zio->io_error == 0) + zio->io_error = ENOSPC; + if (zio->io_error) + return; + + fl.l_len -= len; + fl.l_start += len; + } + } + + if (zio->io_error == EOPNOTSUPP) + vd->vdev_notrim = B_TRUE; +} + static int vdev_file_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; vdev_file_t *vf; ssize_t resid = 0; - struct flock fl; if (!vdev_readable(vd)) { zio->io_error = ENXIO; @@ -159,19 +205,7 @@ vdev_file_io_start(zio_t *zio) kcred, NULL); break; case DKIOCTRIM: - if (vd->vdev_notrim) - zio->io_error = EOPNOTSUPP; - else { - bzero(&fl, sizeof(fl)); - fl.l_type = F_WRLCK; - fl.l_whence = 0; - fl.l_start = zio->io_offset; - fl.l_len = zio->io_size; - zio->io_error = VOP_SPACE(vf->vf_vnode, F_FREESP, &fl, - FWRITE | FOFFMAX, zio->io_offset, kcred, NULL); - if (zio->io_error == EOPNOTSUPP) - vd->vdev_notrim = B_TRUE; - } + vdev_file_trim(zio); break; default: zio->io_error = ENOTSUP;