Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TRIM/UNMAP/DISCARD support for vdevs #924

Closed
wants to merge 43 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
cc6cd40
Add TRIM support for disk vdevs.
dechamps Aug 31, 2012
248da0c
Fix "‘flags’ may be used uninitialized" GCC warning.
dechamps Sep 1, 2012
ce1e714
Fix null dereference bug in zio.
dechamps Sep 1, 2012
4e789e1
Add TRIM support for file vdevs.
dechamps Sep 3, 2012
8889058
Fix error handling in fop_space().
dechamps Sep 3, 2012
ce8d0bf
Use EOPNOTSUPP instead of ENOTSUP when DISCARD isn't supported.
dechamps Sep 5, 2012
95b1e94
Don't try to DISCARD if we know we can't.
dechamps Sep 6, 2012
6a32827
Don't ignore EOPNOTSUPP for file vdevs.
dechamps Sep 6, 2012
32d0e89
Don't allow live modification of zfs_notrim.
dechamps Sep 6, 2012
ac998d3
Fix assertion failure due to stale inflight writes.
dechamps Sep 6, 2012
0a616c2
Fix assertion failure due to inflight frees.
dechamps Sep 7, 2012
51c1a59
Don't reset notrim after low-level vdev open.
dechamps Sep 10, 2012
19afab1
Make sure free ZIO sizes are correct for gang blocks.
dechamps Sep 12, 2012
eefbe9b
Don't test for gang blocks when populating the trim map.
dechamps Sep 12, 2012
4f21d6f
Use correct I/O size for free gang data blocks.
dechamps Sep 12, 2012
5a53020
Don't TRIM after the SPA has been frozen.
dechamps Sep 14, 2012
a324087
Add trim_map.h to the list of header files.
dechamps Sep 14, 2012
65be9d3
Make sure zio->io_trim_node is empty on ZIO construction.
dechamps Sep 17, 2012
33194d4
Add ZIO TRIM statistics.
dechamps Sep 17, 2012
58f7dd8
Explain why we're not using BLKDISCARD in fop_space().
dechamps Sep 17, 2012
fbc1546
Merge vdev_disk_io_trim() into __vdev_disk_physio().
dechamps Sep 18, 2012
37806f0
Add zfs_trim_zero parameter.
dechamps Sep 18, 2012
4cb878d
Use zfs_trim_zero=1 in ztest.
dechamps Sep 18, 2012
2bcfaf6
Fix BIO memory issues when using TRIM on disk vdevs.
dechamps Sep 18, 2012
d9ee94e
Submit BIOs with the correct parameters.
dechamps Sep 18, 2012
06e385f
Fix TRIM with ashift=12.
dechamps Sep 19, 2012
906e314
Add conditional compilation for old kernels.
dechamps Sep 19, 2012
464818e
Remove useless configure checking.
dechamps Sep 20, 2012
dccbfaf
Fix an issue with a NULL zio buf free on raidz.
dechamps Sep 20, 2012
8cbb5cd
TRIM the whole vdev on create/add/attach.
dechamps Sep 20, 2012
a3adbc8
Take zfs_notrim into account when trimming whole vdevs.
dechamps Sep 21, 2012
b3dac52
Lock SCL_STATE while waiting for TRIM I/Os to complete.
dechamps Sep 21, 2012
dabccd1
Don't use ZIO_FLAG_CONFIG_WRITER for TRIM ZIOs.
dechamps Sep 21, 2012
deda532
Add TRIM support for L2ARC.
dechamps Sep 25, 2012
94d6d97
Switch KM_SLEEP to KM_PUSHPAGE
dechamps Sep 25, 2012
4c1f8e1
TRIM cache devices on export and remove.
dechamps Sep 25, 2012
1f06386
Trim whole cache vdev on pool destroy.
dechamps Sep 25, 2012
7be8b0c
Don't register repair writes in the trim map.
dechamps Oct 2, 2012
b967a38
Improve TXG handling in the TRIM module.
dechamps Oct 3, 2012
765351d
Silence the "task trim blocked" kernel message.
dechamps Oct 3, 2012
206dddd
Add TRIM TXG batching.
dechamps Oct 3, 2012
d08ba15
Simplify.
dechamps Oct 3, 2012
5445fb9
TRIM cache devices based on time instead of TXGs.
dechamps Oct 3, 2012
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -5683,6 +5683,9 @@ main(int argc, char **argv)
VERIFY(asprintf((char **)&spa_config_path, "%s/zpool.cache",
zopt_dir) != -1);

/* Make sure TRIM zeroes data so that we can test it */
zfs_trim_zero = 1;

/*
* Blow away any existing copy of zpool.cache
*/
Expand Down
20 changes: 20 additions & 0 deletions config/kernel-max-discard-sectors.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
dnl #
dnl # 2.6.32 API change
dnl # max_discard_sectors is available.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_MAX_DISCARD_SECTORS], [
AC_MSG_CHECKING([whether ql->max_discard_sectors is available])
ZFS_LINUX_TRY_COMPILE([
#include <linux/blkdev.h>
],[
struct queue_limits ql __attribute__ ((unused));

ql.max_discard_sectors = 0;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_MAX_DISCARD_SECTORS, 1,
[ql->max_discard_sectors is available])
],[
AC_MSG_RESULT(no)
])
])
1 change: 1 addition & 0 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_GET_GENDISK
ZFS_AC_KERNEL_RQ_IS_SYNC
ZFS_AC_KERNEL_RQ_FOR_EACH_SEGMENT
ZFS_AC_KERNEL_MAX_DISCARD_SECTORS
ZFS_AC_KERNEL_DISCARD_GRANULARITY
ZFS_AC_KERNEL_CONST_XATTR_HANDLER
ZFS_AC_KERNEL_XATTR_HANDLER_GET
Expand Down
37 changes: 37 additions & 0 deletions include/linux/blkdev_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,37 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags)
# define VDEV_REQ_DISCARD REQ_DISCARD
#endif

/*
* 2.6.32 API change
* blk_queue_discard is now available.
*/
#ifdef REQ_DISCARD
#ifndef HAVE_BLK_QUEUE_DISCARD
static inline unsigned long
blk_queue_discard(struct request_queue *q)
{
return !!q->prepare_discard_fn;
}
#endif
#endif

/*
* 2.6.32 API change
* On 2.6.32, maximum discard request size lies in:
* request_queue.limits.max_discard_sectors
* Before 2.6.32, it lies in:
* request_queue.max_hw_sectors
*/
static inline unsigned int
blk_queue_max_discard_sectors_get(struct request_queue *q)
{
#ifdef HAVE_MAX_DISCARD_SECTORS
return q->limits.max_discard_sectors;
#else
return q->max_hw_sectors;
#endif
}

/*
* 2.6.33 API change
* Discard granularity and alignment restrictions may now be set. For
Expand All @@ -443,8 +474,14 @@ blk_queue_discard_granularity(struct request_queue *q, unsigned int dg)
{
q->limits.discard_granularity = dg;
}
static inline unsigned int
blk_queue_discard_granularity_get(struct request_queue *q)
{
return q->limits.discard_granularity;
}
#else
#define blk_queue_discard_granularity(x, dg) ((void)0)
#define blk_queue_discard_granularity_get(x) (0)
#endif /* HAVE_DISCARD_GRANULARITY */

/*
Expand Down
1 change: 1 addition & 0 deletions include/sys/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/space_map.h \
$(top_srcdir)/include/sys/spa.h \
$(top_srcdir)/include/sys/spa_impl.h \
$(top_srcdir)/include/sys/trim_map.h \
$(top_srcdir)/include/sys/txg.h \
$(top_srcdir)/include/sys/txg_impl.h \
$(top_srcdir)/include/sys/u8_textprep_data.h \
Expand Down
2 changes: 1 addition & 1 deletion include/sys/arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ void arc_fini(void);
*/

void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
void l2arc_remove_vdev(vdev_t *vd);
void l2arc_remove_vdev(vdev_t *vd, int permanent);
boolean_t l2arc_vdev_present(vdev_t *vd);
void l2arc_init(void);
void l2arc_fini(void);
Expand Down
3 changes: 3 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,9 @@ struct spa {
spa_proc_state_t spa_proc_state; /* see definition */
proc_t *spa_proc; /* "zpool-poolname" process */
uint64_t spa_did; /* if procp != p0, did of t1 */
kthread_t *spa_trim_thread; /* thread sending TRIM I/Os */
kmutex_t spa_trim_lock; /* protects spa_trim_cv */
kcondvar_t spa_trim_cv; /* used to notify TRIM thread */
boolean_t spa_autoreplace; /* autoreplace set in open */
int spa_vdev_locks; /* locks grabbed */
uint64_t spa_creation_version; /* version at pool creation */
Expand Down
51 changes: 51 additions & 0 deletions include/sys/trim_map.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 Pawel Jakub Dawidek <[email protected]>.
* All rights reserved.
*/

#ifndef _SYS_TRIM_MAP_H
#define _SYS_TRIM_MAP_H

#include <sys/avl.h>
#include <sys/list.h>
#include <sys/spa.h>

#ifdef __cplusplus
extern "C" {
#endif

extern void trim_map_create(vdev_t *vd);
extern void trim_map_destroy(vdev_t *vd);
extern void trim_map_free(vdev_t *vd, uint64_t offset, uint64_t size, uint64_t txg);
extern boolean_t trim_map_write_start(zio_t *zio);
extern void trim_map_write_done(zio_t *zio);

extern void trim_thread_create(spa_t *spa);
extern void trim_thread_destroy(spa_t *spa);
extern void trim_thread_wakeup(spa_t *spa);

#ifdef __cplusplus
}
#endif

#endif /* _SYS_TRIM_MAP_H */
2 changes: 2 additions & 0 deletions include/sys/vdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ typedef enum vdev_dtl_type {
} vdev_dtl_type_t;

extern int zfs_nocacheflush;
extern int zfs_notrim;
extern int zfs_trim_zero;

extern int vdev_open(vdev_t *);
extern void vdev_open_children(vdev_t *);
Expand Down
2 changes: 2 additions & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ struct vdev {
uint64_t vdev_unspare; /* unspare when resilvering done */
hrtime_t vdev_last_try; /* last reopen time */
boolean_t vdev_nowritecache; /* true if flushwritecache failed */
boolean_t vdev_notrim; /* true if trim failed */
boolean_t vdev_checkremove; /* temporary online test */
boolean_t vdev_forcefault; /* force online fault */
boolean_t vdev_splitting; /* split or repair in progress */
Expand All @@ -201,6 +202,7 @@ struct vdev {
spa_aux_vdev_t *vdev_aux; /* for l2cache vdevs */
zio_t *vdev_probe_zio; /* root of current probe */
vdev_aux_t vdev_label_aux; /* on-disk aux state */
struct trim_map *vdev_trimmap;

/*
* For DTrace to work in userland (libzpool) context, these fields must
Expand Down
6 changes: 6 additions & 0 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -490,13 +490,19 @@ typedef struct vsecattr {

#define CRCREAT 0

#define F_FREESP 11 /* Free file space */

extern int fop_getattr(vnode_t *vp, vattr_t *vap);
extern int fop_space(vnode_t *vp, int cmd, struct flock *bfp,
int flag, off_t offset);

#define VOP_CLOSE(vp, f, c, o, cr, ct) 0
#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0
#define VOP_GETATTR(vp, vap, fl, cr, ct) fop_getattr((vp), (vap));

#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd)
#define VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct) \
fop_space((vp), (cmd), (bfp), (flag), (offset))

#define VN_RELE(vp) vn_close(vp)

Expand Down
46 changes: 43 additions & 3 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ enum zio_compress {
#define ZIO_PRIORITY_RESILVER (zio_priority_table[9])
#define ZIO_PRIORITY_SCRUB (zio_priority_table[10])
#define ZIO_PRIORITY_DDT_PREFETCH (zio_priority_table[11])
#define ZIO_PRIORITY_TABLE_SIZE 12
#define ZIO_PRIORITY_TRIM (zio_priority_table[12])
#define ZIO_PRIORITY_TABLE_SIZE 13

#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
Expand Down Expand Up @@ -357,6 +358,39 @@ typedef struct zio_link {
list_node_t zl_child_node;
} zio_link_t;

/*
* Used for TRIM kstat.
*/
typedef struct zio_trim_stats {
/*
* Number of bytes successfully TRIMmed.
*/
kstat_named_t zio_trim_bytes;

/*
* Number of successful TRIM requests.
*/
kstat_named_t zio_trim_success;

/*
* Number of TRIM requests that failed because TRIM is not
* supported.
*/
kstat_named_t zio_trim_unsupported;

/*
* Number of TRIM requests that failed for other reasons.
*/
kstat_named_t zio_trim_failed;
} zio_trim_stats_t;

extern zio_trim_stats_t zio_trim_stats;

#define ZIO_TRIM_STAT_INCR(stat, val) \
atomic_add_64(&zio_trim_stats.stat.value.ui64, (val));
#define ZIO_TRIM_STAT_BUMP(stat) \
ZIO_TRIM_STAT_INCR(stat, 1);

struct zio {
/* Core information about this I/O */
zbookmark_t io_bookmark;
Expand Down Expand Up @@ -429,6 +463,9 @@ struct zio {

/* Taskq dispatching state */
taskq_ent_t io_tqent;

avl_node_t io_trim_node;
list_node_t io_trim_link;
};

extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
Expand Down Expand Up @@ -459,7 +496,8 @@ extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
zio_done_func_t *done, void *private, enum zio_flag flags);

extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_done_func_t *done, void *private, int priority, enum zio_flag flags);
uint64_t offset, uint64_t size, zio_done_func_t *done,
void *private, int priority, enum zio_flag flags);

extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
Expand All @@ -472,12 +510,14 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
boolean_t labels);

extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);
const blkptr_t *bp, uint64_t size, enum zio_flag flags);

extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
blkptr_t *old_bp, uint64_t size, boolean_t use_slog);
extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern zio_t *zio_trim(zio_t *zio, spa_t *spa, vdev_t *vd,
uint64_t offset, uint64_t size, enum zio_flag flags);
extern void zio_shrink(zio_t *zio, uint64_t size);

extern int zio_wait(zio_t *zio);
Expand Down
10 changes: 6 additions & 4 deletions include/sys/zio_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ enum zio_stage {

ZIO_STAGE_READY = 1 << 15, /* RWFCI */

ZIO_STAGE_VDEV_IO_START = 1 << 16, /* RW--I */
ZIO_STAGE_VDEV_IO_DONE = 1 << 17, /* RW--I */
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 18, /* RW--I */
ZIO_STAGE_VDEV_IO_START = 1 << 16, /* RWF-I */
ZIO_STAGE_VDEV_IO_DONE = 1 << 17, /* RWF-- */
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 18, /* RWF-I */

ZIO_STAGE_CHECKSUM_VERIFY = 1 << 19, /* R---- */

Expand Down Expand Up @@ -143,7 +143,9 @@ enum zio_stage {
#define ZIO_FREE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_STAGE_FREE_BP_INIT | \
ZIO_STAGE_DVA_FREE)
ZIO_STAGE_DVA_FREE | \
ZIO_STAGE_VDEV_IO_START | \
ZIO_STAGE_VDEV_IO_ASSESS)

#define ZIO_DDT_FREE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
Expand Down
2 changes: 2 additions & 0 deletions lib/libspl/include/sys/dkio.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ struct dk_geom {
*/
#define DKIOCFLUSHWRITECACHE (DKIOC|34) /* flush cache to phys medium */

#define DKIOCTRIM (DKIOC|35) /* TRIM a block */

struct dk_callback {
void (*dkc_callback)(void *dkc_cookie, int error);
void *dkc_cookie;
Expand Down
8 changes: 8 additions & 0 deletions lib/libspl/include/sys/time.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
#include_next <sys/time.h>
#include <sys/types.h>

#ifndef TIME_MAX
#define TIME_MAX LLONG_MAX
#endif

#ifndef SEC
#define SEC 1
#endif
Expand All @@ -50,6 +54,10 @@
#define NSEC_PER_USEC 1000L
#endif

#ifndef NSEC_PER_SEC
#define NSEC_PER_SEC 1000000000L
#endif

extern hrtime_t gethrtime(void);
extern void gethrestime(timestruc_t *);

Expand Down
1 change: 1 addition & 0 deletions lib/libzpool/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ libzpool_la_SOURCES = \
$(top_srcdir)/module/zfs/spa_history.c \
$(top_srcdir)/module/zfs/spa_misc.c \
$(top_srcdir)/module/zfs/space_map.c \
$(top_srcdir)/module/zfs/trim_map.c \
$(top_srcdir)/module/zfs/txg.c \
$(top_srcdir)/module/zfs/uberblock.c \
$(top_srcdir)/module/zfs/unique.c \
Expand Down
Loading