-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add TRIM support - replaces #5925 and #7363 #8255
Closed
Closed
Changes from all commits
Commits
Show all changes
38 commits
Select commit
Hold shift + click to select a range
21306c0
Refresh dkio.h and add dkioc_free_util.h
dweeezil aec7542
6363 Add UNMAP/TRIM functionality to ZFS
1748965
Trimming an offlined vdev asserts in zio_create.
skiselkov 6f7dc83
Want extended zpool iostat trim support
dweeezil 0f0c103
Matt Ahrens' review comments, round 2. Brian Behlendorf's review comm…
skiselkov 3ccb6dd
Async TRIM, Extended Stats
behlendorf f05d5e0
Review feedback
behlendorf 957d56f
Fix abd_alloc_sametype() panic
behlendorf d27976d
Matt Ahrens' review comments.
skiselkov 8fb4ccf
Matt Ahrens' review comments, round 3.
skiselkov 76439d5
Tim Chase's review comments, round 2.
behlendorf f476747
Matt Ahren's review comments round 4:
skiselkov 5614f2b
Deadlockiness associated with doing postponing trimming on a metaslab…
skiselkov feae3c2
Matt Ahrens' review comments, round 5.
skiselkov 7d12663
Deadlockiness in autotrim due to recent changes.
skiselkov feb47a9
Want manual trim feature to skip never-allocated space
0e381de
Update and add additional TRIM test cases
behlendorf 46d31de
Review feedback
behlendorf d56cfce
Remove vdev_raidz_map_alloc()
huangheintel ae1457d
Review feedback 2
behlendorf 8b88b3e
Add trim manpage
davidchenntnx 977c20e
Fix wrong logical operator
davidchenntnx 3670596
Wait for 1 sec before check trim status
davidchenntnx f19d6f6
Clean-ups following rebase to master
dweeezil ec4e894
ZIO_PIPELINE_CONTINUE fix
dweeezil 0b8cc39
More fixups
dweeezil 26fdfdb
Add tags to trim test cases
dweeezil 3c5eb9c
Preserve activation flags when sorting metaslabs
dweeezil 014e259
Trim should skip removed devices
dweeezil af4a108
Don't dereference null vdev_ms
dweeezil c641ee6
Account for non-concrete vdevs in spa_num_auto_trimming
dweeezil f8e5760
Use proper tag for spa config refcounts
dweeezil c18f20a
Re-instate tracked spa config refcounts
dweeezil 562bb92
Skip non-concrete vdevs...
dweeezil 7806538
Re-work management of the auto trim taskq...
dweeezil e38d791
Move the auto trim taskq start-up on import
dweeezil 8d4a118
Set spa->spa_auto_trim on pool creation
dweeezil 3a184b8
Only call spa_man_trim_taskq_destroy() when needed
dweeezil File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,8 +21,8 @@ | |
|
||
/* | ||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | ||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. | ||
* Copyright (c) 2011, 2018 by Delphix. All rights reserved. | ||
* Copyright 2017 Nexenta Systems, Inc. All rights reserved. | ||
* Copyright (c) 2012 by Frederik Wessels. All rights reserved. | ||
* Copyright (c) 2012 by Cyril Plisko. All rights reserved. | ||
* Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved. | ||
|
@@ -100,6 +100,7 @@ static int zpool_do_split(int, char **); | |
static int zpool_do_initialize(int, char **); | ||
static int zpool_do_scrub(int, char **); | ||
static int zpool_do_resilver(int, char **); | ||
static int zpool_do_trim(int, char **); | ||
|
||
static int zpool_do_import(int, char **); | ||
static int zpool_do_export(int, char **); | ||
|
@@ -154,6 +155,7 @@ typedef enum { | |
HELP_INITIALIZE, | ||
HELP_SCRUB, | ||
HELP_RESILVER, | ||
HELP_TRIM, | ||
HELP_STATUS, | ||
HELP_UPGRADE, | ||
HELP_EVENTS, | ||
|
@@ -193,7 +195,7 @@ enum iostat_type { | |
* of all the nvlists a flag requires. Also specifies the order in | ||
* which data gets printed in zpool iostat. | ||
*/ | ||
static const char *vsx_type_to_nvlist[IOS_COUNT][11] = { | ||
static const char *vsx_type_to_nvlist[IOS_COUNT][13] = { | ||
[IOS_L_HISTO] = { | ||
ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, | ||
|
@@ -204,19 +206,26 @@ static const char *vsx_type_to_nvlist[IOS_COUNT][11] = { | |
ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_AUTO_TRIM_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_MAN_TRIM_LAT_HISTO, | ||
NULL}, | ||
[IOS_LATENCY] = { | ||
ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_AUTO_TRIM_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_MAN_TRIM_LAT_HISTO, | ||
NULL}, | ||
[IOS_QUEUES] = { | ||
ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, | ||
ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, | ||
ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, | ||
ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, | ||
ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, | ||
ZPOOL_CONFIG_VDEV_AUTO_TRIM_ACTIVE_QUEUE, | ||
ZPOOL_CONFIG_VDEV_MAN_TRIM_ACTIVE_QUEUE, | ||
NULL}, | ||
[IOS_RQ_HISTO] = { | ||
ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, | ||
|
@@ -229,6 +238,8 @@ static const char *vsx_type_to_nvlist[IOS_COUNT][11] = { | |
ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, | ||
ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, | ||
ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, | ||
ZPOOL_CONFIG_VDEV_IND_AUTO_TRIM_HISTO, | ||
ZPOOL_CONFIG_VDEV_IND_MAN_TRIM_HISTO, | ||
NULL}, | ||
}; | ||
|
||
|
@@ -284,6 +295,8 @@ static zpool_command_t command_table[] = { | |
{ "scrub", zpool_do_scrub, HELP_SCRUB }, | ||
{ "resilver", zpool_do_resilver, HELP_RESILVER }, | ||
{ NULL }, | ||
{ "trim", zpool_do_trim, HELP_TRIM }, | ||
{ NULL }, | ||
{ "import", zpool_do_import, HELP_IMPORT }, | ||
{ "export", zpool_do_export, HELP_EXPORT }, | ||
{ "upgrade", zpool_do_upgrade, HELP_UPGRADE }, | ||
|
@@ -370,6 +383,8 @@ get_usage(zpool_help_t idx) | |
return (gettext("\tscrub [-s | -p] <pool> ...\n")); | ||
case HELP_RESILVER: | ||
return (gettext("\tresilver <pool> ...\n")); | ||
case HELP_TRIM: | ||
return (gettext("\ttrim [-s|-r <rate>] <pool> ...\n")); | ||
case HELP_STATUS: | ||
return (gettext("\tstatus [-c [script1,script2,...]] " | ||
"[-igLpPsvxD] [-T d|u] [pool] ... \n" | ||
|
@@ -3360,21 +3375,22 @@ typedef struct name_and_columns { | |
unsigned int columns; /* Center name to this number of columns */ | ||
} name_and_columns_t; | ||
|
||
#define IOSTAT_MAX_LABELS 11 /* Max number of labels on one line */ | ||
#define IOSTAT_MAX_LABELS 15 /* Max number of labels on one line */ | ||
|
||
static const name_and_columns_t iostat_top_labels[][IOSTAT_MAX_LABELS] = | ||
{ | ||
[IOS_DEFAULT] = {{"capacity", 2}, {"operations", 2}, {"bandwidth", 2}, | ||
{NULL}}, | ||
[IOS_LATENCY] = {{"total_wait", 2}, {"disk_wait", 2}, {"syncq_wait", 2}, | ||
{"asyncq_wait", 2}, {"scrub"}}, | ||
{"asyncq_wait", 2}, {"scrub"}, {"atrim"}, {"mtrim"}}, | ||
[IOS_QUEUES] = {{"syncq_read", 2}, {"syncq_write", 2}, | ||
{"asyncq_read", 2}, {"asyncq_write", 2}, {"scrubq_read", 2}, | ||
{NULL}}, | ||
{"auto_trimq", 2}, {"man_trimq", 2}, {NULL}}, | ||
[IOS_L_HISTO] = {{"total_wait", 2}, {"disk_wait", 2}, | ||
{"sync_queue", 2}, {"async_queue", 2}, {NULL}}, | ||
[IOS_RQ_HISTO] = {{"sync_read", 2}, {"sync_write", 2}, | ||
{"async_read", 2}, {"async_write", 2}, {"scrub", 2}, {NULL}}, | ||
{"async_read", 2}, {"async_write", 2}, {"scrub", 2}, | ||
{"trim", 2}, {NULL}}, | ||
|
||
}; | ||
|
||
|
@@ -3384,13 +3400,16 @@ static const name_and_columns_t iostat_bottom_labels[][IOSTAT_MAX_LABELS] = | |
[IOS_DEFAULT] = {{"alloc"}, {"free"}, {"read"}, {"write"}, {"read"}, | ||
{"write"}, {NULL}}, | ||
[IOS_LATENCY] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"}, | ||
{"write"}, {"read"}, {"write"}, {"wait"}, {NULL}}, | ||
{"write"}, {"read"}, {"write"}, {"wait"}, {"wait"}, | ||
{"wait"}, {NULL}}, | ||
[IOS_QUEUES] = {{"pend"}, {"activ"}, {"pend"}, {"activ"}, {"pend"}, | ||
{"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"}, {NULL}}, | ||
{"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"}, | ||
{"pend"}, {"activ"}, {"pend"}, {"activ"}, {NULL}}, | ||
[IOS_L_HISTO] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"}, | ||
{"write"}, {"read"}, {"write"}, {"scrub"}, {NULL}}, | ||
{"write"}, {"read"}, {"write"}, {"scrub"}, {"atrim"}, | ||
{"mtrim"}, {NULL}}, | ||
[IOS_RQ_HISTO] = {{"ind"}, {"agg"}, {"ind"}, {"agg"}, {"ind"}, {"agg"}, | ||
{"ind"}, {"agg"}, {"ind"}, {"agg"}, {NULL}}, | ||
{"ind"}, {"agg"}, {"ind"}, {"agg"}, {"auto"}, {"man"}, {NULL}}, | ||
}; | ||
|
||
static const char *histo_to_title[] = { | ||
|
@@ -4014,6 +4033,10 @@ print_iostat_queues(iostat_cbdata_t *cb, nvlist_t *oldnv, | |
ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, | ||
ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, | ||
ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, | ||
ZPOOL_CONFIG_VDEV_AUTO_TRIM_PEND_QUEUE, | ||
ZPOOL_CONFIG_VDEV_AUTO_TRIM_ACTIVE_QUEUE, | ||
ZPOOL_CONFIG_VDEV_MAN_TRIM_PEND_QUEUE, | ||
ZPOOL_CONFIG_VDEV_MAN_TRIM_ACTIVE_QUEUE, | ||
}; | ||
|
||
struct stat_array *nva; | ||
|
@@ -4052,6 +4075,8 @@ print_iostat_latency(iostat_cbdata_t *cb, nvlist_t *oldnv, | |
ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_AUTO_TRIM_LAT_HISTO, | ||
ZPOOL_CONFIG_VDEV_MAN_TRIM_LAT_HISTO, | ||
}; | ||
struct stat_array *nva; | ||
|
||
|
@@ -6561,6 +6586,32 @@ scrub_callback(zpool_handle_t *zhp, void *data) | |
return (err != 0); | ||
} | ||
|
||
typedef struct trim_cbdata { | ||
boolean_t cb_start; | ||
uint64_t cb_rate; | ||
boolean_t cb_fulltrim; | ||
} trim_cbdata_t; | ||
|
||
int | ||
trim_callback(zpool_handle_t *zhp, void *data) | ||
{ | ||
trim_cbdata_t *cb = data; | ||
int err; | ||
|
||
/* | ||
* Ignore faulted pools. | ||
*/ | ||
if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { | ||
(void) fprintf(stderr, gettext("cannot trim '%s': pool is " | ||
"currently unavailable\n"), zpool_get_name(zhp)); | ||
return (1); | ||
} | ||
|
||
err = zpool_trim(zhp, cb->cb_start, cb->cb_rate, cb->cb_fulltrim); | ||
|
||
return (err != 0); | ||
} | ||
|
||
/* | ||
* zpool scrub [-s | -p] <pool> ... | ||
* | ||
|
@@ -6649,6 +6700,53 @@ zpool_do_resilver(int argc, char **argv) | |
return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb)); | ||
} | ||
|
||
/* | ||
* zpool trim [-s|-r <rate>] <pool> ... | ||
* | ||
* -p Partial trim. Skips never-allocated space. | ||
* -s Stop. Stops any in-progress trim. | ||
* -r <rate> Sets the TRIM rate in bytes (per second). Supports | ||
* adding a multiplier suffix such as 'k' or 'm'. | ||
*/ | ||
int | ||
zpool_do_trim(int argc, char **argv) | ||
{ | ||
int c; | ||
trim_cbdata_t cb; | ||
|
||
cb.cb_start = B_TRUE; | ||
cb.cb_rate = 0; | ||
cb.cb_fulltrim = B_TRUE; | ||
|
||
/* check options */ | ||
while ((c = getopt(argc, argv, "psr:")) != -1) { | ||
switch (c) { | ||
case 'p': | ||
cb.cb_fulltrim = B_FALSE; | ||
break; | ||
case 's': | ||
cb.cb_start = B_FALSE; | ||
break; | ||
case 'r': | ||
if (zfs_nicestrtonum(NULL, optarg, &cb.cb_rate) == -1) { | ||
(void) fprintf(stderr, | ||
gettext("invalid value for rate\n")); | ||
usage(B_FALSE); | ||
} | ||
break; | ||
} | ||
} | ||
|
||
argc -= optind; | ||
argv += optind; | ||
|
||
if (argc < 1) { | ||
(void) fprintf(stderr, gettext("missing pool name argument\n")); | ||
usage(B_FALSE); | ||
} | ||
|
||
return (for_each_pool(argc, argv, B_TRUE, NULL, trim_callback, &cb)); | ||
} | ||
|
||
/* | ||
* Print out detailed scrub status. | ||
|
@@ -6972,6 +7070,58 @@ print_checkpoint_status(pool_checkpoint_stat_t *pcs) | |
space_buf); | ||
} | ||
|
||
static void | ||
print_trim_status(uint64_t trim_prog, uint64_t total_size, uint64_t rate, | ||
uint64_t start_time_u64, uint64_t end_time_u64) | ||
{ | ||
time_t start_time = start_time_u64, end_time = end_time_u64; | ||
char *buf; | ||
|
||
if (trim_prog != 0 && trim_prog != total_size) { | ||
buf = ctime(&start_time); | ||
buf[strlen(buf) - 1] = '\0'; /* strip trailing newline */ | ||
if (rate != 0) { | ||
char rate_str[32]; | ||
zfs_nicenum(rate, rate_str, sizeof (rate_str)); | ||
(void) printf(" trim: %.02f%%\tstarted: %s\t" | ||
"(rate limit: %s/s)\n", MIN((((double)trim_prog) / | ||
total_size) * 100, 100), buf, rate_str); | ||
} else { | ||
(void) printf(" trim: %.02f%%\tstarted: %s\t" | ||
"(rate limit: none)\n", MIN((((double)trim_prog) / | ||
total_size) * 100, 100), buf); | ||
} | ||
} else { | ||
if (start_time != 0) { | ||
/* | ||
* Non-zero start time means we were run at some point | ||
* in the past. | ||
*/ | ||
if (end_time != 0) { | ||
/* Non-zero end time means we completed */ | ||
time_t diff = end_time - start_time; | ||
int hrs, mins; | ||
|
||
buf = ctime(&end_time); | ||
buf[strlen(buf) - 1] = '\0'; | ||
hrs = diff / 3600; | ||
mins = (diff % 3600) / 60; | ||
(void) printf(gettext(" trim: completed on %s " | ||
"(after %dh%dm)\n"), buf, hrs, mins); | ||
} else { | ||
buf = ctime(&start_time); | ||
buf[strlen(buf) - 1] = '\0'; | ||
/* Zero end time means we were interrupted */ | ||
(void) printf(gettext(" trim: interrupted\t" | ||
"(started %s)\n"), buf); | ||
} | ||
} else { | ||
/* trim was never run */ | ||
(void) printf(gettext(" trim: none requested\n")); | ||
} | ||
} | ||
} | ||
|
||
static void | ||
print_error_log(zpool_handle_t *zhp) | ||
{ | ||
|
@@ -7083,6 +7233,43 @@ print_dedup_stats(nvlist_t *config) | |
zpool_dump_ddt(dds, ddh); | ||
} | ||
|
||
/* | ||
* Calculates the total space available on log devices on the pool. | ||
* For whatever reason, this is not counted in the root vdev's space stats. | ||
*/ | ||
static uint64_t | ||
zpool_slog_space(nvlist_t *nvroot) | ||
{ | ||
nvlist_t **newchild; | ||
uint_t c, children; | ||
uint64_t space = 0; | ||
|
||
verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, | ||
&newchild, &children) == 0); | ||
|
||
for (c = 0; c < children; c++) { | ||
uint64_t islog = B_FALSE; | ||
vdev_stat_t *vs; | ||
uint_t n; | ||
uint_t n_subchildren = 1; | ||
nvlist_t **subchild; | ||
|
||
(void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_LOG, | ||
&islog); | ||
if (!islog) | ||
continue; | ||
verify(nvlist_lookup_uint64_array(newchild[c], | ||
ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &n) == 0); | ||
|
||
/* vdev can be non-leaf, so multiply by number of children */ | ||
(void) nvlist_lookup_nvlist_array(newchild[c], | ||
ZPOOL_CONFIG_CHILDREN, &subchild, &n_subchildren); | ||
space += n_subchildren * vs->vs_space; | ||
} | ||
|
||
return (space); | ||
} | ||
|
||
/* | ||
* Display a summary of pool status. Displays a summary such as: | ||
* | ||
|
@@ -7400,6 +7587,7 @@ status_callback(zpool_handle_t *zhp, void *data) | |
pool_checkpoint_stat_t *pcs = NULL; | ||
pool_scan_stat_t *ps = NULL; | ||
pool_removal_stat_t *prs = NULL; | ||
uint64_t trim_prog, trim_rate, trim_start_time, trim_stop_time; | ||
|
||
(void) nvlist_lookup_uint64_array(nvroot, | ||
ZPOOL_CONFIG_CHECKPOINT_STATS, (uint64_t **)&pcs, &c); | ||
|
@@ -7418,6 +7606,24 @@ status_callback(zpool_handle_t *zhp, void *data) | |
if (cbp->cb_namewidth < 10) | ||
cbp->cb_namewidth = 10; | ||
|
||
/* Grab trim stats if the pool supports it */ | ||
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_TRIM_PROG, | ||
&trim_prog) == 0 && | ||
nvlist_lookup_uint64(config, ZPOOL_CONFIG_TRIM_RATE, | ||
&trim_rate) == 0 && | ||
nvlist_lookup_uint64(config, ZPOOL_CONFIG_TRIM_START_TIME, | ||
&trim_start_time) == 0 && | ||
nvlist_lookup_uint64(config, ZPOOL_CONFIG_TRIM_STOP_TIME, | ||
&trim_stop_time) == 0) { | ||
/* | ||
* For whatever reason, root vdev_stats_t don't | ||
* include log devices. | ||
*/ | ||
print_trim_status(trim_prog, (vs->vs_space - | ||
vs->vs_alloc) + zpool_slog_space(nvroot), | ||
trim_rate, trim_start_time, trim_stop_time); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's model getting the trim stats off the way this is handled for checkpoint, scan, and removal stats. Specifically, with a new |
||
|
||
(void) printf(gettext("config:\n\n")); | ||
(void) printf(gettext("\t%-*s %-8s %5s %5s %5s"), | ||
cbp->cb_namewidth, "NAME", "STATE", "READ", "WRITE", | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: missing
-p
flag