diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 0603efdbae81..072404264b01 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -153,6 +153,7 @@ enum iostat_type { IOS_LATENCY = 1, IOS_QUEUES = 2, IOS_L_HISTO = 3, + IOS_RQ_HISTO = 4, IOS_COUNT, /* always last element */ }; @@ -161,6 +162,62 @@ enum iostat_type { #define IOS_LATENCY_M (1ULL << IOS_LATENCY) #define IOS_QUEUES_M (1ULL << IOS_QUEUES) #define IOS_L_HISTO_M (1ULL << IOS_L_HISTO) +#define IOS_RQ_HISTO_M (1ULL << IOS_RQ_HISTO) + +/* Mask of all the histo bits */ +#define IOS_ANYHISTO_M (IOS_L_HISTO_M | IOS_RQ_HISTO_M) + +/* + * Lookup table for iostat flags to nvlist names. Basically a list + * of all the nvlists a flag requires. Also specifies the order in + * which data gets printed in zpool iostat. + */ +static const char *vsx_type_to_nvlist[IOS_COUNT][11] = { + [IOS_L_HISTO] = { + ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, + NULL}, + [IOS_LATENCY] = { + ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, + NULL}, + [IOS_QUEUES] = { + ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, + NULL}, + [IOS_RQ_HISTO] = { + ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, + ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, + ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, + ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, + ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, + ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, + NULL}, +}; + + +/* + * Given a cb->cb_flags with a histogram bit set, return the iostat_type. + * Right now, only one histo bit is ever set at one time, so we can + * just do a highbit64(a) + */ +#define IOS_HISTO_IDX(a) (highbit64(a & IOS_ANYHISTO_M) - 1) typedef struct zpool_command { const char *name; @@ -255,7 +312,8 @@ get_usage(zpool_help_t idx) { "[-R root] [-F [-n]]\n" "\t [newpool]\n")); case HELP_IOSTAT: - return (gettext("\tiostat [-T d | u] [-ghHLpPvy] [[-lq]|-w]\n" + return (gettext("\tiostat [-T d | u] [-ghHLpPvy] " + "[[-lq]|[-r|-w]]\n" "\t [[pool ...]|[pool vdev ...]|[vdev ...]] " "[interval [count]]\n")); case HELP_LABELCLEAR: @@ -2531,6 +2589,9 @@ static const name_and_columns_t iostat_top_labels[][IOSTAT_MAX_LABELS] = {NULL}}, [IOS_L_HISTO] = {{"total_wait", 2}, {"disk_wait", 2}, {"sync_queue", 2}, {"async_queue", 2}, {NULL}}, + [IOS_RQ_HISTO] = {{"sync_read", 2}, {"sync_write", 2}, + {"async_read", 2}, {"async_write", 2}, {"scrub", 2}, {NULL}}, + }; /* Shorthand - if "columns" field not set, default to 1 column */ @@ -2544,6 +2605,13 @@ static const name_and_columns_t iostat_bottom_labels[][IOSTAT_MAX_LABELS] = {"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"}, {NULL}}, [IOS_L_HISTO] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"}, {"write"}, {"read"}, {"write"}, {"scrub"}, {NULL}}, + [IOS_RQ_HISTO] = {{"ind"}, {"agg"}, {"ind"}, {"agg"}, {"ind"}, {"agg"}, + {"ind"}, {"agg"}, {"ind"}, {"agg"}, {NULL}}, +}; + +static const char *histo_to_title[] = { + [IOS_L_HISTO] = "latency", + [IOS_RQ_HISTO] = "req_size", }; /* @@ -2562,6 +2630,25 @@ label_array_len(const name_and_columns_t *labels) return (i); } +/* + * Return the number of strings in a null-terminated string array. + * For example: + * + * const char foo[] = {"bar", "baz", NULL} + * + * returns 2 + */ +static uint64_t +str_array_len(const char *array[]) +{ + uint64_t i = 0; + while (array[i]) + i++; + + return (i); +} + + /* * Return a default column width for default/latency/queue columns. This does * not include histograms, which have their columns autosized. @@ -2673,14 +2760,22 @@ print_iostat_dashes(iostat_cbdata_t *cb, unsigned int force_column_width, uint64_t f; int idx; const name_and_columns_t *labels; + const char *title; + + + if (cb->cb_flags & IOS_ANYHISTO_M) { + title = histo_to_title[IOS_HISTO_IDX(cb->cb_flags)]; + } else if (cb->cb_vdev_names_count) { + title = "vdev"; + } else { + title = "pool"; + } + + namewidth = MAX(MAX(strlen(title), cb->cb_namewidth), + name ? strlen(name) : 0); - if (cb->cb_flags & IOS_L_HISTO_M) - namewidth = MAX(cb->cb_namewidth, strlen("latency")); - else - namewidth = cb->cb_namewidth; if (name) { - namewidth = MAX(cb->cb_namewidth, strlen(name)); printf("%-*s", namewidth, name); } else { for (i = 0; i < namewidth; i++) @@ -2727,22 +2822,28 @@ print_iostat_header_impl(iostat_cbdata_t *cb, unsigned int force_column_width, const char *histo_vdev_name) { unsigned int namewidth; - uint64_t flags = cb->cb_flags; + const char *title; - if (flags & IOS_L_HISTO_M) - namewidth = MAX(cb->cb_namewidth, strlen("latency")); - else - namewidth = cb->cb_namewidth; + if (cb->cb_flags & IOS_ANYHISTO_M) { + title = histo_to_title[IOS_HISTO_IDX(cb->cb_flags)]; + } else if (cb->cb_vdev_names_count) { + title = "vdev"; + } else { + title = "pool"; + } - if (flags & IOS_L_HISTO_M) + namewidth = MAX(MAX(strlen(title), cb->cb_namewidth), + histo_vdev_name ? strlen(histo_vdev_name) : 0); + + if (histo_vdev_name) printf("%-*s", namewidth, histo_vdev_name); else printf("%*s", namewidth, ""); + print_iostat_labels(cb, force_column_width, iostat_top_labels); - printf("%-*s", namewidth, flags & IOS_L_HISTO_M ? "latency" : - cb->cb_vdev_names_count ? "vdev" : "pool"); + printf("%-*s", namewidth, title); print_iostat_labels(cb, force_column_width, iostat_bottom_labels); @@ -2918,6 +3019,7 @@ print_iostat_histo(struct stat_array *nva, unsigned int len, uint64_t val; enum zfs_nicenum_format format; unsigned int buckets; + unsigned int start_bucket; if (cb->cb_literal) format = ZFS_NICENUM_RAW; @@ -2927,12 +3029,25 @@ print_iostat_histo(struct stat_array *nva, unsigned int len, /* All these histos are the same size, so just use nva[0].count */ buckets = nva[0].count; - for (j = 0; j < buckets; j++) { - /* Ending range of this bucket */ - val = (1UL << (j + 1)) - 1; + if (cb->cb_flags & IOS_RQ_HISTO_M) { + /* Start at 512 - req size should never be lower than this */ + start_bucket = 9; + } else { + start_bucket = 0; + } + for (j = start_bucket; j < buckets; j++) { /* Print histogram bucket label */ - zfs_nicetime(val, buf, sizeof (buf)); + if (cb->cb_flags & IOS_L_HISTO_M) { + /* Ending range of this bucket */ + val = (1UL << (j + 1)) - 1; + zfs_nicetime(val, buf, sizeof (buf)); + } else { + /* Request size (starting range of bucket) */ + val = (1UL << j); + zfs_nicenum(val, buf, sizeof (buf)); + } + if (cb->cb_scripted) printf("%llu", (u_longlong_t) val); else @@ -2962,30 +3077,29 @@ print_iostat_histos(iostat_cbdata_t *cb, nvlist_t *oldnv, unsigned int column_width; unsigned int namewidth; unsigned int entire_width; - - const char *names[] = { - ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, - ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, - ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, - ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, - ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, - ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, - ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, - ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, - ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, - }; + enum iostat_type type; struct stat_array *nva; - nva = calc_and_alloc_stats_ex(names, ARRAY_SIZE(names), oldnv, newnv); + const char **names; + unsigned int names_len; + + /* What type of histo are we? */ + type = IOS_HISTO_IDX(cb->cb_flags); + + /* Get NULL-terminated array of nvlist names for our histo */ + names = vsx_type_to_nvlist[type]; + names_len = str_array_len(names); /* num of names */ + + nva = calc_and_alloc_stats_ex(names, names_len, oldnv, newnv); if (cb->cb_literal) { column_width = MAX(5, - (unsigned int) log10(stat_histo_max(nva, - ARRAY_SIZE(names))) + 1); + (unsigned int) log10(stat_histo_max(nva, names_len)) + 1); } else { column_width = 5; } - namewidth = MAX(cb->cb_namewidth, strlen("latency")); + namewidth = MAX(cb->cb_namewidth, + strlen(histo_to_title[IOS_HISTO_IDX(cb->cb_flags)])); /* * Calculate the entire line width of what we're printing. The @@ -2998,17 +3112,17 @@ print_iostat_histos(iostat_cbdata_t *cb, nvlist_t *oldnv, /* |__________| <--- entire_width */ /* */ entire_width = namewidth + (column_width + 2) * - label_array_len(iostat_bottom_labels[IOS_L_HISTO]); + label_array_len(iostat_bottom_labels[type]); if (cb->cb_scripted) printf("%s\n", name); else print_iostat_header_impl(cb, column_width, name); - print_iostat_histo(nva, ARRAY_SIZE(names), cb, column_width, + print_iostat_histo(nva, names_len, cb, column_width, namewidth, scale); - free_calc_stats(nva, ARRAY_SIZE(names)); + free_calc_stats(nva, names_len); if (!cb->cb_scripted) print_solid_separator(entire_width); } @@ -3219,7 +3333,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, * Print the vdev name unless it's is a histogram. Histograms * display the vdev name in the header itself. */ - if (!(cb->cb_flags & IOS_L_HISTO_M)) { + if (!(cb->cb_flags & IOS_ANYHISTO_M)) { if (cb->cb_scripted) { printf("%s", name); } else { @@ -3234,7 +3348,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, /* Calculate our scaling factor */ tdelta = newvs->vs_timestamp - oldvs->vs_timestamp; - if ((oldvs->vs_timestamp == 0) && (cb->cb_flags & IOS_L_HISTO_M)) { + if ((oldvs->vs_timestamp == 0) && (cb->cb_flags & IOS_ANYHISTO_M)) { /* * If we specify printing histograms with no time interval, then * print the histogram numbers over the entire lifetime of the @@ -3256,12 +3370,12 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, print_iostat_latency(cb, oldnv, newnv, scale); if (cb->cb_flags & IOS_QUEUES_M) print_iostat_queues(cb, oldnv, newnv, scale); - if (cb->cb_flags & IOS_L_HISTO_M) { + if (cb->cb_flags & IOS_ANYHISTO_M) { printf("\n"); print_iostat_histos(cb, oldnv, newnv, scale, name); } - if (!(cb->cb_flags & IOS_L_HISTO_M)) + if (!(cb->cb_flags & IOS_ANYHISTO_M)) printf("\n"); free(calcvs); @@ -3303,7 +3417,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, */ if (num_logs(newnv) > 0) { - if ((!(cb->cb_flags & IOS_L_HISTO_M)) && !cb->cb_scripted && + if ((!(cb->cb_flags & IOS_ANYHISTO_M)) && !cb->cb_scripted && !cb->cb_vdev_names) { print_iostat_dashes(cb, 0, "logs"); } @@ -3337,7 +3451,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, return (ret); if (children > 0) { - if ((!(cb->cb_flags & IOS_L_HISTO_M)) && !cb->cb_scripted && + if ((!(cb->cb_flags & IOS_ANYHISTO_M)) && !cb->cb_scripted && !cb->cb_vdev_names) { print_iostat_dashes(cb, 0, "cache"); } @@ -3399,9 +3513,10 @@ print_iostat(zpool_handle_t *zhp, void *data) ret = print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot, cb, 0); - if ((ret != 0) && !(cb->cb_flags & IOS_L_HISTO_M) && !cb->cb_scripted && - cb->cb_verbose && !cb->cb_vdev_names_count) - print_iostat_separator(cb); + if ((ret != 0) && !(cb->cb_flags & IOS_ANYHISTO_M) && + !cb->cb_scripted && cb->cb_verbose && !cb->cb_vdev_names_count) { + print_iostat_separator(cb); + } return (ret); } @@ -3553,37 +3668,6 @@ get_stat_flags_cb(zpool_handle_t *zhp, void *data) uint64_t flags = 0; int i, j; - /* - * Lookup table for extended iostat flags to nvlist names. - * Basically a list of all the nvpairs a flag requires. - */ - static const char *vsx_type_to_nvlist[IOS_COUNT][10] = { - [IOS_L_HISTO] = { - ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, - ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, - ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, - ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, - ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, - ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, - ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, - ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, - ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, - NULL}, - [IOS_LATENCY] = { - ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, - ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, - ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, - ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, - NULL}, - [IOS_QUEUES] = { - ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, - ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, - ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, - ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, - ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, - NULL} - }; - config = zpool_get_config(zhp, NULL); verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); @@ -3818,7 +3902,7 @@ fsleep(float sec) { /* - * zpool iostat [-ghHLpPvy] [[-lq]-w] [-n name] [-T d|u] + * zpool iostat [-ghHLpPvy] [[-lq]|[-r|-w]] [-n name] [-T d|u] * [[ pool ...]|[pool vdev ...]|[vdev ...]] * [interval [count]] * @@ -3832,7 +3916,8 @@ fsleep(float sec) { * by a single tab. * -l Display average latency * -q Display queue depths - * -w Display histograms + * -w Display latency histograms + * -r Display request size histogram * -T Display a timestamp in date(1) or Unix format * * This command can be tricky because we want to be able to deal with pool @@ -3851,7 +3936,7 @@ zpool_do_iostat(int argc, char **argv) unsigned long count = 0; zpool_list_t *list; boolean_t verbose = B_FALSE; - boolean_t latency = B_FALSE, histo = B_FALSE; + boolean_t latency = B_FALSE, l_histo = B_FALSE, rq_histo = B_FALSE; boolean_t queues = B_FALSE, parsable = B_FALSE, scripted = B_FALSE; boolean_t omit_since_boot = B_FALSE; boolean_t guid = B_FALSE; @@ -3861,12 +3946,12 @@ zpool_do_iostat(int argc, char **argv) /* Used for printing error message */ const char flag_to_arg[] = {[IOS_LATENCY] = 'l', [IOS_QUEUES] = 'q', - [IOS_L_HISTO] = 'w'}; + [IOS_L_HISTO] = 'w', [IOS_RQ_HISTO] = 'r'}; uint64_t unsupported_flags; /* check options */ - while ((c = getopt(argc, argv, "gLPT:vyhplqwH")) != -1) { + while ((c = getopt(argc, argv, "gLPT:vyhplqrwH")) != -1) { switch (c) { case 'g': guid = B_TRUE; @@ -3896,7 +3981,10 @@ zpool_do_iostat(int argc, char **argv) scripted = B_TRUE; break; case 'w': - histo = B_TRUE; + l_histo = B_TRUE; + break; + case 'r': + rq_histo = B_TRUE; break; case 'y': omit_since_boot = B_TRUE; @@ -3997,10 +4085,18 @@ zpool_do_iostat(int argc, char **argv) return (1); } - if (histo && (queues || latency)) { + if ((l_histo || rq_histo) && (queues || latency)) { + pool_list_free(list); + (void) fprintf(stderr, + gettext("[-r|-w] isn't allowed with [-q|-l]\n")); + usage(B_FALSE); + return (1); + } + + if (l_histo && rq_histo) { pool_list_free(list); (void) fprintf(stderr, - gettext("-w isn't allowed with [-q|-l]\n")); + gettext("Only one of [-r|-w] can be passed at a time\n")); usage(B_FALSE); return (1); } @@ -4010,13 +4106,15 @@ zpool_do_iostat(int argc, char **argv) */ cb.cb_list = list; - if (histo) { + if (l_histo) { /* * Histograms tables look out of place when you try to display * them with the other stats, so make a rule that you can only * print histograms by themselves. */ cb.cb_flags = IOS_L_HISTO_M; + } else if (rq_histo) { + cb.cb_flags = IOS_RQ_HISTO_M; } else { cb.cb_flags = IOS_DEFAULT_M; if (latency) @@ -4088,7 +4186,7 @@ zpool_do_iostat(int argc, char **argv) */ if (((++cb.cb_iteration == 1 && !skip) || (skip != verbose)) && - (!(cb.cb_flags & IOS_L_HISTO_M)) && + (!(cb.cb_flags & IOS_ANYHISTO_M)) && !cb.cb_scripted) print_iostat_header(&cb); @@ -4108,8 +4206,8 @@ zpool_do_iostat(int argc, char **argv) * we also want an ending separator. */ if (((npools > 1 && !verbose && - !(cb.cb_flags & IOS_L_HISTO_M)) || - (!(cb.cb_flags & IOS_L_HISTO_M) && + !(cb.cb_flags & IOS_ANYHISTO_M)) || + (!(cb.cb_flags & IOS_ANYHISTO_M) && cb.cb_vdev_names_count)) && !cb.cb_scripted) { print_iostat_separator(&cb); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 65dba125c941..9de50e9a273f 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -558,7 +558,17 @@ typedef struct zpool_rewind_policy { #define ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO "vdev_async_w_lat_histo" #define ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO "vdev_scrub_histo" - +/* Request size histograms */ +#define ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO "vdev_sync_ind_r_histo" +#define ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO "vdev_sync_ind_w_histo" +#define ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO "vdev_async_ind_r_histo" +#define ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO "vdev_async_ind_w_histo" +#define ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO "vdev_ind_scrub_histo" +#define ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO "vdev_sync_agg_r_histo" +#define ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO "vdev_sync_agg_w_histo" +#define ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO "vdev_async_agg_r_histo" +#define ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO "vdev_async_agg_w_histo" +#define ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO "vdev_agg_scrub_histo" #define ZPOOL_CONFIG_WHOLE_DISK "whole_disk" #define ZPOOL_CONFIG_ERRCOUNT "error_count" @@ -824,20 +834,33 @@ typedef struct vdev_stat_ex { * 2^37 nanoseconds = 134s. Timeouts will probably start kicking in * before this. */ -#define VDEV_HISTO_BUCKETS 37 +#define VDEV_L_HISTO_BUCKETS 37 /* Latency histo buckets */ +#define VDEV_RQ_HISTO_BUCKETS 25 /* Request size histo buckets */ + /* Amount of time in ZIO queue (ns) */ uint64_t vsx_queue_histo[ZIO_PRIORITY_NUM_QUEUEABLE] - [VDEV_HISTO_BUCKETS]; + [VDEV_L_HISTO_BUCKETS]; /* Total ZIO latency (ns). Includes queuing and disk access time */ - uint64_t vsx_total_histo[ZIO_TYPES][VDEV_HISTO_BUCKETS]; + uint64_t vsx_total_histo[ZIO_TYPES][VDEV_L_HISTO_BUCKETS]; /* Amount of time to read/write the disk (ns) */ - uint64_t vsx_disk_histo[ZIO_TYPES][VDEV_HISTO_BUCKETS]; + uint64_t vsx_disk_histo[ZIO_TYPES][VDEV_L_HISTO_BUCKETS]; + + /* "lookup the bucket for a value" histogram macros */ +#define HISTO(val, buckets) (val != 0 ? MIN(highbit64(val) - 1, \ + buckets - 1) : 0) +#define L_HISTO(a) HISTO(a, VDEV_L_HISTO_BUCKETS) +#define RQ_HISTO(a) HISTO(a, VDEV_RQ_HISTO_BUCKETS) + + /* Physical IO histogram */ + uint64_t vsx_ind_histo[ZIO_PRIORITY_NUM_QUEUEABLE] + [VDEV_RQ_HISTO_BUCKETS]; - /* "lookup the bucket for a value" macro */ -#define HISTO(a) (a != 0 ? MIN(highbit64(a) - 1, VDEV_HISTO_BUCKETS - 1) : 0) + /* Delegated (aggregated) physical IO histogram */ + uint64_t vsx_agg_histo[ZIO_PRIORITY_NUM_QUEUEABLE] + [VDEV_RQ_HISTO_BUCKETS]; } vdev_stat_ex_t; diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index eb35b74c02e0..80402c55ec63 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -95,7 +95,7 @@ zpool \- configures ZFS storage pools .LP .nf -\fB\fBzpool iostat\fR [\fB-T\fR \fBd\fR | \fBu\fR] [\fB-ghHLpPvy\fR] [\fB-G\fR|[\fB-lq\fR]] +\fB\fBzpool iostat\fR [\fB-T\fR \fBd\fR | \fBu\fR] [\fB-ghHLpPvy\fR] [\fB-lq\fR]|[\fB-r\fR|-\fBw\fR]] [[\fIpool\fR ...]|[\fIpool vdev\fR ...]|[\fIvdev\fR ...]] [\fIinterval\fR[\fIcount\fR]]\fR .fi @@ -1510,7 +1510,7 @@ Scan using the default search path, the libblkid cache will not be consulted. A .sp .ne 2 .na -\fB\fBzpool iostat\fR [\fB-T\fR \fBd\fR | \fBu\fR] [\fB-ghHLpPvy\fR] [\fB-w\fR|[\fB-lq\fR]] [[\fIpool\fR ...]|[\fIpool vdev\fR ...]|[\fIvdev\fR ...]] [\fIinterval\fR[\fIcount\fR]]\fR +\fB\fBzpool iostat\fR [\fB-T\fR \fBd\fR | \fBu\fR] [\fB-ghHLpPvy\fR] [[\fB-lq\fR]|[\fB-r\fR|\fB-w\fR]] [[\fIpool\fR ...]|[\fIpool vdev\fR ...]|[\fIvdev\fR ...]] [\fIinterval\fR[\fIcount\fR]]\fR .ad .sp .6 @@ -1582,6 +1582,19 @@ Display numbers in parsable (exact) values. Time values are in nanoseconds. Display full paths for vdevs instead of only the last component of the path. This can be used in conjunction with the \fB-L\fR flag. .RE +.sp +.ne 2 +.na +\fB\fB-r\fR\fR +.ad +.RS 12n +Print request size histograms for the leaf ZIOs. This includes histograms of +individual ZIOs ("ind") and aggregate ZIOs ("agg"). These stats can be useful +for seeing how well the ZFS IO aggregator is working. Do not confuse these +request size stats with the block layer requests; it's possible ZIOs can +be broken up before being sent to the block device. +.RE + .sp .ne 2 .na diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 13739017382a..607be3ce3b81 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -2784,21 +2784,30 @@ vdev_get_child_stat_ex(vdev_t *cvd, vdev_stat_ex_t *vsx, vdev_stat_ex_t *cvsx) { int t, b; for (t = 0; t < ZIO_TYPES; t++) { - for (b = 0; b < VDEV_HISTO_BUCKETS; b++) { + for (b = 0; b < ARRAY_SIZE(vsx->vsx_disk_histo[0]); b++) vsx->vsx_disk_histo[t][b] += cvsx->vsx_disk_histo[t][b]; + + for (b = 0; b < ARRAY_SIZE(vsx->vsx_total_histo[0]); b++) { vsx->vsx_total_histo[t][b] += cvsx->vsx_total_histo[t][b]; } } for (t = 0; t < ZIO_PRIORITY_NUM_QUEUEABLE; t++) { - for (b = 0; b < VDEV_HISTO_BUCKETS; b++) { + for (b = 0; b < ARRAY_SIZE(vsx->vsx_queue_histo[0]); b++) { vsx->vsx_queue_histo[t][b] += cvsx->vsx_queue_histo[t][b]; } vsx->vsx_active_queue[t] += cvsx->vsx_active_queue[t]; vsx->vsx_pend_queue[t] += cvsx->vsx_pend_queue[t]; + + for (b = 0; b < ARRAY_SIZE(vsx->vsx_ind_histo[0]); b++) + vsx->vsx_ind_histo[t][b] += cvsx->vsx_ind_histo[t][b]; + + for (b = 0; b < ARRAY_SIZE(vsx->vsx_agg_histo[0]); b++) + vsx->vsx_agg_histo[t][b] += cvsx->vsx_agg_histo[t][b]; } + } /* @@ -2974,13 +2983,21 @@ vdev_stat_update(zio_t *zio, uint64_t psize) vs->vs_ops[type]++; vs->vs_bytes[type] += psize; + if (flags & ZIO_FLAG_DELEGATED) { + vsx->vsx_agg_histo[zio->io_priority] + [RQ_HISTO(zio->io_size)]++; + } else { + vsx->vsx_ind_histo[zio->io_priority] + [RQ_HISTO(zio->io_size)]++; + } + if (zio->io_delta && zio->io_delay) { vsx->vsx_queue_histo[zio->io_priority] - [HISTO(zio->io_delta - zio->io_delay)]++; + [L_HISTO(zio->io_delta - zio->io_delay)]++; vsx->vsx_disk_histo[type] - [HISTO(zio->io_delay)]++; + [L_HISTO(zio->io_delay)]++; vsx->vsx_total_histo[type] - [HISTO(zio->io_delta)]++; + [L_HISTO(zio->io_delta)]++; } } diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 1400aee7b757..59bce1370923 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -302,6 +302,47 @@ vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv) vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB], ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB])); + /* Request sizes */ + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, + vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_READ], + ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_READ])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, + vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_WRITE], + ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_WRITE])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, + vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_READ], + ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_READ])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, + vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_WRITE], + ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_WRITE])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, + vsx->vsx_ind_histo[ZIO_PRIORITY_SCRUB], + ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_SCRUB])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, + vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_READ], + ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_READ])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, + vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_WRITE], + ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_WRITE])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, + vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_READ], + ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_READ])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, + vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_WRITE], + ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_WRITE])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, + vsx->vsx_agg_histo[ZIO_PRIORITY_SCRUB], + ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_SCRUB])); + /* Add extended stats nvlist to main nvlist */ fnvlist_add_nvlist(nv, ZPOOL_CONFIG_VDEV_STATS_EX, nvx); diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_003_neg.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_003_neg.ksh index ae1e5a1523c2..097be9e7449e 100755 --- a/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_003_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_003_neg.ksh @@ -52,7 +52,8 @@ fi set -A args "" "-?" "-f" "nonexistpool" "$TESTPOOL/$TESTFS" \ "$testpool 0" "$testpool -1" "$testpool 1 0" \ - "$testpool 0 0" "$testpool -wl" "$testpool -wq" + "$testpool 0 0" "$testpool -wl" "$testpool -wq" "$testpool -wr" \ + "$testpool -rq" "$testpool -lr" log_assert "Executing 'zpool iostat' with bad options fails" diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_004_pos.ksh index f8bd8e3db69c..0119a7061f32 100755 --- a/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_004_pos.ksh @@ -36,7 +36,7 @@ # # DESCRIPTION: # Executing 'zpool iostat' command with various combinations of extended -# stats (-vqL), parsable/script options (-pH), and misc lists of pools +# stats (-lqwr), parsable/script options (-pH), and misc lists of pools # and vdevs. # # STRATEGY: @@ -59,7 +59,9 @@ set -A args "" "-v" "-q" "-l" "-lq $TESTPOOL" "-ql ${DISKS[0]} ${DISKS[1]}" \ "-wp $TESTPOOL" \ "-qlH $TESTPOOL ${DISKS[0]}" \ "-vpH ${DISKS[0]}" \ - "-wpH ${DISKS[0]}" + "-wpH ${DISKS[0]}" \ + "-r ${DISKS[0]}" \ + "-rpH ${DISKS[0]}" log_assert "Executing 'zpool iostat' with extended stat options succeeds" log_note "testpool: $TESTPOOL, disks $DISKS"