Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenZFS - 6363 Add UNMAP/TRIM functionality #5925

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
009fc57
Refresh dkio.h and add dkioc_free_util.h
dweeezil Jul 23, 2015
56e203e
6363 Add UNMAP/TRIM functionality to ZFS
Apr 20, 2015
6174b30
Trimming an offlined vdev asserts in zio_create.
skiselkov Apr 15, 2017
368d6fa
Want extended zpool iostat trim support
dweeezil Apr 15, 2017
0223319
Matt Ahrens' review comments, round 2. Brian Behlendorf's review comm…
skiselkov Apr 19, 2017
3e21bdf
Async TRIM, Extended Stats
behlendorf Apr 18, 2017
545b210
Review feedback
behlendorf Apr 27, 2017
cf31b3d
Fix abd_alloc_sametype() panic
behlendorf Apr 28, 2017
d51c91d
Matt Ahrens' review comments.
skiselkov Apr 13, 2017
4b4b390
Matt Ahrens' review comments, round 3.
skiselkov Apr 26, 2017
5fe7d91
Tim Chase's review comments, round 2.
behlendorf May 22, 2017
1c7a1e1
Matt Ahren's review comments round 4:
skiselkov May 12, 2017
d58a40d
Deadlockiness associated with doing postponing trimming on a metaslab…
skiselkov May 18, 2017
a9fcf1e
Matt Ahrens' review comments, round 5.
skiselkov May 22, 2017
65bacdf
Deadlockiness in autotrim due to recent changes.
skiselkov May 22, 2017
704e6ea
Want manual trim feature to skip never-allocated space
Apr 10, 2017
06956b6
Update and add additional TRIM test cases
behlendorf May 22, 2017
036345e
Review feedback
behlendorf May 24, 2017
db2744e
Remove vdev_raidz_map_alloc()
huangheintel May 24, 2017
a325309
Review feedback 2
behlendorf May 25, 2017
30bbcd2
Add trim manpage
davidchenntnx Mar 28, 2018
125070b
Fix wrong logical operator
davidchenntnx Apr 11, 2018
abaf267
Wait for 1 sec before check trim status
davidchenntnx Apr 11, 2018
6288187
Clean-ups following rebase to master
dweeezil Jul 9, 2018
f68bf9a
ZIO_PIPELINE_CONTINUE
dweeezil Dec 26, 2018
048794e
More fixups
dweeezil Dec 26, 2018
d5615fd
Add tags to trim test cases
dweeezil Dec 30, 2018
586bdd9
Preserve activation flags when sorting metaslabs
dweeezil Jan 2, 2019
6e6f243
Trim should skip removed devices
dweeezil Jan 2, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
226 changes: 216 additions & 10 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
* Copyright (c) 2012 by Cyril Plisko. All rights reserved.
* Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved.
Expand Down Expand Up @@ -99,6 +99,7 @@ static int zpool_do_split(int, char **);

static int zpool_do_scrub(int, char **);
static int zpool_do_resilver(int, char **);
static int zpool_do_trim(int, char **);

static int zpool_do_import(int, char **);
static int zpool_do_export(int, char **);
Expand Down Expand Up @@ -152,6 +153,7 @@ typedef enum {
HELP_REMOVE,
HELP_SCRUB,
HELP_RESILVER,
HELP_TRIM,
HELP_STATUS,
HELP_UPGRADE,
HELP_EVENTS,
Expand Down Expand Up @@ -191,7 +193,7 @@ enum iostat_type {
* of all the nvlists a flag requires. Also specifies the order in
* which data gets printed in zpool iostat.
*/
static const char *vsx_type_to_nvlist[IOS_COUNT][11] = {
static const char *vsx_type_to_nvlist[IOS_COUNT][13] = {
[IOS_L_HISTO] = {
ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
Expand All @@ -202,19 +204,26 @@ static const char *vsx_type_to_nvlist[IOS_COUNT][11] = {
ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO,
ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
ZPOOL_CONFIG_VDEV_AUTO_TRIM_LAT_HISTO,
ZPOOL_CONFIG_VDEV_MAN_TRIM_LAT_HISTO,
NULL},
[IOS_LATENCY] = {
ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
ZPOOL_CONFIG_VDEV_AUTO_TRIM_LAT_HISTO,
ZPOOL_CONFIG_VDEV_MAN_TRIM_LAT_HISTO,
NULL},
[IOS_QUEUES] = {
ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_AUTO_TRIM_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_MAN_TRIM_ACTIVE_QUEUE,
NULL},
[IOS_RQ_HISTO] = {
ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO,
Expand All @@ -227,6 +236,8 @@ static const char *vsx_type_to_nvlist[IOS_COUNT][11] = {
ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO,
ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO,
ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO,
ZPOOL_CONFIG_VDEV_IND_AUTO_TRIM_HISTO,
ZPOOL_CONFIG_VDEV_IND_MAN_TRIM_HISTO,
NULL},
};

Expand Down Expand Up @@ -281,6 +292,8 @@ static zpool_command_t command_table[] = {
{ "scrub", zpool_do_scrub, HELP_SCRUB },
{ "resilver", zpool_do_resilver, HELP_RESILVER },
{ NULL },
{ "trim", zpool_do_trim, HELP_TRIM },
{ NULL },
{ "import", zpool_do_import, HELP_IMPORT },
{ "export", zpool_do_export, HELP_EXPORT },
{ "upgrade", zpool_do_upgrade, HELP_UPGRADE },
Expand Down Expand Up @@ -364,6 +377,8 @@ get_usage(zpool_help_t idx)
return (gettext("\tscrub [-s | -p] <pool> ...\n"));
case HELP_RESILVER:
return (gettext("\tresilver <pool> ...\n"));
case HELP_TRIM:
return (gettext("\ttrim [-s|-r <rate>] <pool> ...\n"));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HELP_SCRUB needs the new -p something like "gettext("\ttrim [-s] | [-p] [-r ] ...\n"));"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually HELP_TRIM.

case HELP_STATUS:
return (gettext("\tstatus [-c [script1,script2,...]] "
"[-gLpPsvxD] [-T d|u] [pool] ... \n"
Expand Down Expand Up @@ -3191,21 +3206,22 @@ typedef struct name_and_columns {
unsigned int columns; /* Center name to this number of columns */
} name_and_columns_t;

#define IOSTAT_MAX_LABELS 11 /* Max number of labels on one line */
#define IOSTAT_MAX_LABELS 15 /* Max number of labels on one line */

static const name_and_columns_t iostat_top_labels[][IOSTAT_MAX_LABELS] =
{
[IOS_DEFAULT] = {{"capacity", 2}, {"operations", 2}, {"bandwidth", 2},
{NULL}},
[IOS_LATENCY] = {{"total_wait", 2}, {"disk_wait", 2}, {"syncq_wait", 2},
{"asyncq_wait", 2}, {"scrub"}},
{"asyncq_wait", 2}, {"scrub"}, {"atrim"}, {"mtrim"}},
[IOS_QUEUES] = {{"syncq_read", 2}, {"syncq_write", 2},
{"asyncq_read", 2}, {"asyncq_write", 2}, {"scrubq_read", 2},
{NULL}},
{"auto_trimq", 2}, {"man_trimq", 2}, {NULL}},
[IOS_L_HISTO] = {{"total_wait", 2}, {"disk_wait", 2},
{"sync_queue", 2}, {"async_queue", 2}, {NULL}},
[IOS_RQ_HISTO] = {{"sync_read", 2}, {"sync_write", 2},
{"async_read", 2}, {"async_write", 2}, {"scrub", 2}, {NULL}},
{"async_read", 2}, {"async_write", 2}, {"scrub", 2},
{"trim", 2}, {NULL}},

};

Expand All @@ -3215,13 +3231,16 @@ static const name_and_columns_t iostat_bottom_labels[][IOSTAT_MAX_LABELS] =
[IOS_DEFAULT] = {{"alloc"}, {"free"}, {"read"}, {"write"}, {"read"},
{"write"}, {NULL}},
[IOS_LATENCY] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"},
{"write"}, {"read"}, {"write"}, {"wait"}, {NULL}},
{"write"}, {"read"}, {"write"}, {"wait"}, {"wait"},
{"wait"}, {NULL}},
[IOS_QUEUES] = {{"pend"}, {"activ"}, {"pend"}, {"activ"}, {"pend"},
{"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"}, {NULL}},
{"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"},
{"pend"}, {"activ"}, {"pend"}, {"activ"}, {NULL}},
[IOS_L_HISTO] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"},
{"write"}, {"read"}, {"write"}, {"scrub"}, {NULL}},
{"write"}, {"read"}, {"write"}, {"scrub"}, {"atrim"},
{"mtrim"}, {NULL}},
[IOS_RQ_HISTO] = {{"ind"}, {"agg"}, {"ind"}, {"agg"}, {"ind"}, {"agg"},
{"ind"}, {"agg"}, {"ind"}, {"agg"}, {NULL}},
{"ind"}, {"agg"}, {"ind"}, {"agg"}, {"auto"}, {"man"}, {NULL}},
};

static const char *histo_to_title[] = {
Expand Down Expand Up @@ -3845,6 +3864,10 @@ print_iostat_queues(iostat_cbdata_t *cb, nvlist_t *oldnv,
ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE,
ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_AUTO_TRIM_PEND_QUEUE,
ZPOOL_CONFIG_VDEV_AUTO_TRIM_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_MAN_TRIM_PEND_QUEUE,
ZPOOL_CONFIG_VDEV_MAN_TRIM_ACTIVE_QUEUE,
};

struct stat_array *nva;
Expand Down Expand Up @@ -3883,6 +3906,8 @@ print_iostat_latency(iostat_cbdata_t *cb, nvlist_t *oldnv,
ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO,
ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
ZPOOL_CONFIG_VDEV_AUTO_TRIM_LAT_HISTO,
ZPOOL_CONFIG_VDEV_MAN_TRIM_LAT_HISTO,
};
struct stat_array *nva;

Expand Down Expand Up @@ -6392,6 +6417,32 @@ scrub_callback(zpool_handle_t *zhp, void *data)
return (err != 0);
}

typedef struct trim_cbdata {
boolean_t cb_start;
uint64_t cb_rate;
boolean_t cb_fulltrim;
} trim_cbdata_t;

int
trim_callback(zpool_handle_t *zhp, void *data)
{
trim_cbdata_t *cb = data;
int err;

/*
* Ignore faulted pools.
*/
if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
(void) fprintf(stderr, gettext("cannot trim '%s': pool is "
"currently unavailable\n"), zpool_get_name(zhp));
return (1);
}

err = zpool_trim(zhp, cb->cb_start, cb->cb_rate, cb->cb_fulltrim);

return (err != 0);
}

/*
* zpool scrub [-s | -p] <pool> ...
*
Expand Down Expand Up @@ -6480,6 +6531,53 @@ zpool_do_resilver(int argc, char **argv)
return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
}

/*
* zpool trim [-s|-r <rate>] <pool> ...
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing -p

zpool trim [-ps|-r <rate>] <pool> ...

You're also going to want to add it to HELP_TRIM above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

*
* -p Partial trim. Skips never-allocated space.
* -s Stop. Stops any in-progress trim.
* -r <rate> Sets the TRIM rate in bytes (per second). Supports
* adding a multiplier suffix such as 'k' or 'm'.
*/
int
zpool_do_trim(int argc, char **argv)
{
int c;
trim_cbdata_t cb;

cb.cb_start = B_TRUE;
cb.cb_rate = 0;
cb.cb_fulltrim = B_TRUE;

/* check options */
while ((c = getopt(argc, argv, "psr:")) != -1) {
switch (c) {
case 'p':
cb.cb_fulltrim = B_FALSE;
break;
case 's':
cb.cb_start = B_FALSE;
break;
case 'r':
if (zfs_nicestrtonum(NULL, optarg, &cb.cb_rate) == -1) {
(void) fprintf(stderr,
gettext("invalid value for rate\n"));
usage(B_FALSE);
}
break;
}
}

argc -= optind;
argv += optind;

if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
usage(B_FALSE);
}

return (for_each_pool(argc, argv, B_TRUE, NULL, trim_callback, &cb));
}

/*
* Print out detailed scrub status.
Expand Down Expand Up @@ -6803,6 +6901,58 @@ print_checkpoint_status(pool_checkpoint_stat_t *pcs)
space_buf);
}

static void
print_trim_status(uint64_t trim_prog, uint64_t total_size, uint64_t rate,
uint64_t start_time_u64, uint64_t end_time_u64)
{
time_t start_time = start_time_u64, end_time = end_time_u64;
char *buf;

if (trim_prog != 0 && trim_prog != total_size) {
buf = ctime(&start_time);
buf[strlen(buf) - 1] = '\0'; /* strip trailing newline */
if (rate != 0) {
char rate_str[32];
zfs_nicenum(rate, rate_str, sizeof (rate_str));
(void) printf(" trim: %.02f%%\tstarted: %s\t"
"(rate limit: %s/s)\n", MIN((((double)trim_prog) /
total_size) * 100, 100), buf, rate_str);
} else {
(void) printf(" trim: %.02f%%\tstarted: %s\t"
"(rate limit: none)\n", MIN((((double)trim_prog) /
total_size) * 100, 100), buf);
}
} else {
if (start_time != 0) {
/*
* Non-zero start time means we were run at some point
* in the past.
*/
if (end_time != 0) {
/* Non-zero end time means we completed */
time_t diff = end_time - start_time;
int hrs, mins;

buf = ctime(&end_time);
buf[strlen(buf) - 1] = '\0';
hrs = diff / 3600;
mins = (diff % 3600) / 60;
(void) printf(gettext(" trim: completed on %s "
"(after %dh%dm)\n"), buf, hrs, mins);
} else {
buf = ctime(&start_time);
buf[strlen(buf) - 1] = '\0';
/* Zero end time means we were interrupted */
(void) printf(gettext(" trim: interrupted\t"
"(started %s)\n"), buf);
}
} else {
/* trim was never run */
(void) printf(gettext(" trim: none requested\n"));
}
}
}

static void
print_error_log(zpool_handle_t *zhp)
{
Expand Down Expand Up @@ -6914,6 +7064,43 @@ print_dedup_stats(nvlist_t *config)
zpool_dump_ddt(dds, ddh);
}

/*
* Calculates the total space available on log devices on the pool.
* For whatever reason, this is not counted in the root vdev's space stats.
*/
static uint64_t
zpool_slog_space(nvlist_t *nvroot)
{
nvlist_t **newchild;
uint_t c, children;
uint64_t space = 0;

verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&newchild, &children) == 0);

for (c = 0; c < children; c++) {
uint64_t islog = B_FALSE;
vdev_stat_t *vs;
uint_t n;
uint_t n_subchildren = 1;
nvlist_t **subchild;

(void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_LOG,
&islog);
if (!islog)
continue;
verify(nvlist_lookup_uint64_array(newchild[c],
ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &n) == 0);

/* vdev can be non-leaf, so multiply by number of children */
(void) nvlist_lookup_nvlist_array(newchild[c],
ZPOOL_CONFIG_CHILDREN, &subchild, &n_subchildren);
space += n_subchildren * vs->vs_space;
}

return (space);
}

/*
* Display a summary of pool status. Displays a summary such as:
*
Expand Down Expand Up @@ -7231,6 +7418,7 @@ status_callback(zpool_handle_t *zhp, void *data)
pool_checkpoint_stat_t *pcs = NULL;
pool_scan_stat_t *ps = NULL;
pool_removal_stat_t *prs = NULL;
uint64_t trim_prog, trim_rate, trim_start_time, trim_stop_time;

(void) nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c);
Expand All @@ -7249,6 +7437,24 @@ status_callback(zpool_handle_t *zhp, void *data)
if (cbp->cb_namewidth < 10)
cbp->cb_namewidth = 10;

/* Grab trim stats if the pool supports it */
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_TRIM_PROG,
&trim_prog) == 0 &&
nvlist_lookup_uint64(config, ZPOOL_CONFIG_TRIM_RATE,
&trim_rate) == 0 &&
nvlist_lookup_uint64(config, ZPOOL_CONFIG_TRIM_START_TIME,
&trim_start_time) == 0 &&
nvlist_lookup_uint64(config, ZPOOL_CONFIG_TRIM_STOP_TIME,
&trim_stop_time) == 0) {
/*
* For whatever reason, root vdev_stats_t don't
* include log devices.
*/
print_trim_status(trim_prog, (vs->vs_space -
vs->vs_alloc) + zpool_slog_space(nvroot),
trim_rate, trim_start_time, trim_stop_time);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should add both new TRIM queues to the extended zpool iostat -qlrw output to complement the kstat. This way we'll be able to see in-flight/pending trims in the queues and request size and latency histograms of the TRIM request zios. That should provide some nice visibility in to specific devices are behaving. The existing stats are collected in vdev_config_generate_stats() and reported as part of ZFS_IOC_POOL_STATS.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do. It'll be included in the next PR refresh.


(void) printf(gettext("config:\n\n"));
(void) printf(gettext("\t%-*s %-8s %5s %5s %5s"),
cbp->cb_namewidth, "NAME", "STATE", "READ", "WRITE",
Expand Down
Loading