Skip to content

Commit

Permalink
OpenZFS 6531 - Provide mechanism to artificially limit disk performance
Browse files Browse the repository at this point in the history
Reviewed by: Paul Dagnelie <[email protected]>
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: George Wilson <[email protected]>
Approved by: Dan McDonald <[email protected]>
Ported by: Tony Hutter <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>

OpenZFS-issue: https://www.illumos.org/issues/6531
OpenZFS-commit: openzfs/openzfs@97e8130

Porting notes:
- Added new IO delay tracepoints, and moved common ZIO tracepoint macros
  to a new trace_common.h file.
- Used zio_delay_taskq() in place of OpenZFS's timeout_generic() function.
- Updated zinject man page
- Updated zpool_scrub test files
  • Loading branch information
tonyhutter authored and behlendorf committed May 26, 2016
1 parent 7e94507 commit 26ef0cc
Show file tree
Hide file tree
Showing 18 changed files with 680 additions and 120 deletions.
108 changes: 105 additions & 3 deletions cmd/zinject/zinject.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/

/*
Expand Down Expand Up @@ -239,6 +239,38 @@ usage(void)
"\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
"\t\tPerform a specific action on a particular device.\n"
"\n"
"\tzinject -d device -D latency:lanes pool\n"
"\n"
"\t\tAdd an artificial delay to IO requests on a particular\n"
"\t\tdevice, such that the requests take a minimum of 'latency'\n"
"\t\tmilliseconds to complete. Each delay has an associated\n"
"\t\tnumber of 'lanes' which defines the number of concurrent\n"
"\t\tIO requests that can be processed.\n"
"\n"
"\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
"\t\tthe device will only be able to service a single IO request\n"
"\t\tat a time with each request taking 10 ms to complete. So,\n"
"\t\tif only a single request is submitted every 10 ms, the\n"
"\t\taverage latency will be 10 ms; but if more than one request\n"
"\t\tis submitted every 10 ms, the average latency will be more\n"
"\t\tthan 10 ms.\n"
"\n"
"\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
"\t\tlanes (-D 10:2), then the device will be able to service\n"
"\t\ttwo requests at a time, each with a minimum latency of\n"
"\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
"\t\tthe average latency will be 10 ms; but if more than two\n"
"\t\trequests are submitted every 10 ms, the average latency\n"
"\t\twill be more than 10 ms.\n"
"\n"
"\t\tAlso note, these delays are additive. So two invocations\n"
"\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
"\t\tof '-D 10:2'. This also means, one can specify multiple\n"
"\t\tlanes with differing target latencies. For example, an\n"
"\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
"\t\tcreate 3 lanes on the device; one lane with a latency\n"
"\t\tof 10 ms and two lanes with a 25 ms latency.\n"
"\n"
"\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
"\t\tCause the pool to stop writing blocks yet not\n"
"\t\treport errors for a duration. Simulates buggy hardware\n"
Expand Down Expand Up @@ -353,6 +385,9 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
if (record->zi_guid == 0 || record->zi_func[0] != '\0')
return (0);

if (record->zi_cmd == ZINJECT_DELAY_IO)
return (0);

if (*count == 0) {
(void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID");
(void) printf("--- --------------- ----------------\n");
Expand All @@ -366,6 +401,35 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
return (0);
}

static int
print_delay_handler(int id, const char *pool, zinject_record_t *record,
void *data)
{
int *count = data;

if (record->zi_guid == 0 || record->zi_func[0] != '\0')
return (0);

if (record->zi_cmd != ZINJECT_DELAY_IO)
return (0);

if (*count == 0) {
(void) printf("%3s %-15s %-15s %-15s %s\n",
"ID", "POOL", "DELAY (ms)", "LANES", "GUID");
(void) printf("--- --------------- --------------- "
"--------------- ----------------\n");
}

*count += 1;

(void) printf("%3d %-15s %-15llu %-15llu %llx\n", id, pool,
(u_longlong_t)NSEC2MSEC(record->zi_timer),
(u_longlong_t)record->zi_nlanes,
(u_longlong_t)record->zi_guid);

return (0);
}

static int
print_panic_handler(int id, const char *pool, zinject_record_t *record,
void *data)
Expand Down Expand Up @@ -403,6 +467,13 @@ print_all_handlers(void)
count = 0;
}

(void) iter_handlers(print_delay_handler, &count);
if (count > 0) {
total += count;
(void) printf("\n");
count = 0;
}

(void) iter_handlers(print_data_handler, &count);
if (count > 0) {
total += count;
Expand Down Expand Up @@ -545,6 +616,35 @@ perform_action(const char *pool, zinject_record_t *record, int cmd)
return (1);
}

static int
parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
{
unsigned long scan_delay;
unsigned long scan_nlanes;

if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
return (1);

/*
* We explicitly disallow a delay of zero here, because we key
* off this value being non-zero in translate_device(), to
* determine if the fault is a ZINJECT_DELAY_IO fault or not.
*/
if (scan_delay == 0)
return (1);

/*
* The units for the CLI delay parameter is milliseconds, but
* the data passed to the kernel is interpreted as nanoseconds.
* Thus we scale the milliseconds to nanoseconds here, and this
* nanosecond value is used to pass the delay to the kernel.
*/
*delay = MSEC2NSEC(scan_delay);
*nlanes = scan_nlanes;

return (0);
}

int
main(int argc, char **argv)
{
Expand Down Expand Up @@ -628,8 +728,10 @@ main(int argc, char **argv)
break;
case 'D':
errno = 0;
record.zi_timer = strtoull(optarg, &end, 10);
if (errno != 0 || *end != '\0') {
ret = parse_delay(optarg, &record.zi_timer,
&record.zi_nlanes);
if (ret != 0) {

(void) fprintf(stderr, "invalid i/o delay "
"value: '%s'\n", optarg);
usage();
Expand Down
2 changes: 2 additions & 0 deletions include/sys/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,15 @@ COMMON_H = \
$(top_srcdir)/include/sys/trace.h \
$(top_srcdir)/include/sys/trace_acl.h \
$(top_srcdir)/include/sys/trace_arc.h \
$(top_srcdir)/include/sys/trace_common.h \
$(top_srcdir)/include/sys/trace_dbgmsg.h \
$(top_srcdir)/include/sys/trace_dbuf.h \
$(top_srcdir)/include/sys/trace_dmu.h \
$(top_srcdir)/include/sys/trace_dnode.h \
$(top_srcdir)/include/sys/trace_multilist.h \
$(top_srcdir)/include/sys/trace_txg.h \
$(top_srcdir)/include/sys/trace_zil.h \
$(top_srcdir)/include/sys/trace_zio.h \
$(top_srcdir)/include/sys/trace_zrlock.h \
$(top_srcdir)/include/sys/txg.h \
$(top_srcdir)/include/sys/txg_impl.h \
Expand Down
81 changes: 1 addition & 80 deletions include/sys/trace_arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

#include <linux/tracepoint.h>
#include <sys/types.h>
#include <sys/trace_common.h> /* For ZIO macros */

/*
* Generic support for one argument tracepoints of the form:
Expand Down Expand Up @@ -115,86 +116,6 @@ DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__miss);
* zio_t *, ...);
*/

#define ZIO_TP_STRUCT_ENTRY \
__field(zio_type_t, zio_type) \
__field(int, zio_cmd) \
__field(zio_priority_t, zio_priority) \
__field(uint64_t, zio_size) \
__field(uint64_t, zio_orig_size) \
__field(uint64_t, zio_offset) \
__field(hrtime_t, zio_timestamp) \
__field(hrtime_t, zio_delta) \
__field(uint64_t, zio_delay) \
__field(enum zio_flag, zio_flags) \
__field(enum zio_stage, zio_stage) \
__field(enum zio_stage, zio_pipeline) \
__field(enum zio_flag, zio_orig_flags) \
__field(enum zio_stage, zio_orig_stage) \
__field(enum zio_stage, zio_orig_pipeline) \
__field(uint8_t, zio_reexecute) \
__field(uint64_t, zio_txg) \
__field(int, zio_error) \
__field(uint64_t, zio_ena) \
\
__field(enum zio_checksum, zp_checksum) \
__field(enum zio_compress, zp_compress) \
__field(dmu_object_type_t, zp_type) \
__field(uint8_t, zp_level) \
__field(uint8_t, zp_copies) \
__field(boolean_t, zp_dedup) \
__field(boolean_t, zp_dedup_verify) \
__field(boolean_t, zp_nopwrite)

#define ZIO_TP_FAST_ASSIGN \
__entry->zio_type = zio->io_type; \
__entry->zio_cmd = zio->io_cmd; \
__entry->zio_priority = zio->io_priority; \
__entry->zio_size = zio->io_size; \
__entry->zio_orig_size = zio->io_orig_size; \
__entry->zio_offset = zio->io_offset; \
__entry->zio_timestamp = zio->io_timestamp; \
__entry->zio_delta = zio->io_delta; \
__entry->zio_delay = zio->io_delay; \
__entry->zio_flags = zio->io_flags; \
__entry->zio_stage = zio->io_stage; \
__entry->zio_pipeline = zio->io_pipeline; \
__entry->zio_orig_flags = zio->io_orig_flags; \
__entry->zio_orig_stage = zio->io_orig_stage; \
__entry->zio_orig_pipeline = zio->io_orig_pipeline; \
__entry->zio_reexecute = zio->io_reexecute; \
__entry->zio_txg = zio->io_txg; \
__entry->zio_error = zio->io_error; \
__entry->zio_ena = zio->io_ena; \
\
__entry->zp_checksum = zio->io_prop.zp_checksum; \
__entry->zp_compress = zio->io_prop.zp_compress; \
__entry->zp_type = zio->io_prop.zp_type; \
__entry->zp_level = zio->io_prop.zp_level; \
__entry->zp_copies = zio->io_prop.zp_copies; \
__entry->zp_dedup = zio->io_prop.zp_dedup; \
__entry->zp_nopwrite = zio->io_prop.zp_nopwrite; \
__entry->zp_dedup_verify = zio->io_prop.zp_dedup_verify;

#define ZIO_TP_PRINTK_FMT \
"zio { type %u cmd %i prio %u size %llu orig_size %llu " \
"offset %llu timestamp %llu delta %llu delay %llu " \
"flags 0x%x stage 0x%x pipeline 0x%x orig_flags 0x%x " \
"orig_stage 0x%x orig_pipeline 0x%x reexecute %u " \
"txg %llu error %d ena %llu prop { checksum %u compress %u " \
"type %u level %u copies %u dedup %u dedup_verify %u nopwrite %u } }"

#define ZIO_TP_PRINTK_ARGS \
__entry->zio_type, __entry->zio_cmd, __entry->zio_priority, \
__entry->zio_size, __entry->zio_orig_size, __entry->zio_offset, \
__entry->zio_timestamp, __entry->zio_delta, __entry->zio_delay, \
__entry->zio_flags, __entry->zio_stage, __entry->zio_pipeline, \
__entry->zio_orig_flags, __entry->zio_orig_stage, \
__entry->zio_orig_pipeline, __entry->zio_reexecute, \
__entry->zio_txg, __entry->zio_error, __entry->zio_ena, \
__entry->zp_checksum, __entry->zp_compress, __entry->zp_type, \
__entry->zp_level, __entry->zp_copies, __entry->zp_dedup, \
__entry->zp_dedup_verify, __entry->zp_nopwrite

DECLARE_EVENT_CLASS(zfs_l2arc_rw_class,
TP_PROTO(vdev_t *vd, zio_t *zio),
TP_ARGS(vd, zio),
Expand Down
112 changes: 112 additions & 0 deletions include/sys/trace_common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* This file contains commonly used trace macros. Feel free to add and use
* them in your tracepoint headers.
*/

#ifndef _SYS_TRACE_COMMON_H
#define _SYS_TRACE_COMMON_H
#include <linux/tracepoint.h>

/* ZIO macros */
#define ZIO_TP_STRUCT_ENTRY \
__field(zio_type_t, zio_type) \
__field(int, zio_cmd) \
__field(zio_priority_t, zio_priority) \
__field(uint64_t, zio_size) \
__field(uint64_t, zio_orig_size) \
__field(uint64_t, zio_offset) \
__field(hrtime_t, zio_timestamp) \
__field(hrtime_t, zio_delta) \
__field(uint64_t, zio_delay) \
__field(enum zio_flag, zio_flags) \
__field(enum zio_stage, zio_stage) \
__field(enum zio_stage, zio_pipeline) \
__field(enum zio_flag, zio_orig_flags) \
__field(enum zio_stage, zio_orig_stage) \
__field(enum zio_stage, zio_orig_pipeline) \
__field(uint8_t, zio_reexecute) \
__field(uint64_t, zio_txg) \
__field(int, zio_error) \
__field(uint64_t, zio_ena) \
\
__field(enum zio_checksum, zp_checksum) \
__field(enum zio_compress, zp_compress) \
__field(dmu_object_type_t, zp_type) \
__field(uint8_t, zp_level) \
__field(uint8_t, zp_copies) \
__field(boolean_t, zp_dedup) \
__field(boolean_t, zp_dedup_verify) \
__field(boolean_t, zp_nopwrite)

#define ZIO_TP_FAST_ASSIGN \
__entry->zio_type = zio->io_type; \
__entry->zio_cmd = zio->io_cmd; \
__entry->zio_priority = zio->io_priority; \
__entry->zio_size = zio->io_size; \
__entry->zio_orig_size = zio->io_orig_size; \
__entry->zio_offset = zio->io_offset; \
__entry->zio_timestamp = zio->io_timestamp; \
__entry->zio_delta = zio->io_delta; \
__entry->zio_delay = zio->io_delay; \
__entry->zio_flags = zio->io_flags; \
__entry->zio_stage = zio->io_stage; \
__entry->zio_pipeline = zio->io_pipeline; \
__entry->zio_orig_flags = zio->io_orig_flags; \
__entry->zio_orig_stage = zio->io_orig_stage; \
__entry->zio_orig_pipeline = zio->io_orig_pipeline; \
__entry->zio_reexecute = zio->io_reexecute; \
__entry->zio_txg = zio->io_txg; \
__entry->zio_error = zio->io_error; \
__entry->zio_ena = zio->io_ena; \
\
__entry->zp_checksum = zio->io_prop.zp_checksum; \
__entry->zp_compress = zio->io_prop.zp_compress; \
__entry->zp_type = zio->io_prop.zp_type; \
__entry->zp_level = zio->io_prop.zp_level; \
__entry->zp_copies = zio->io_prop.zp_copies; \
__entry->zp_dedup = zio->io_prop.zp_dedup; \
__entry->zp_nopwrite = zio->io_prop.zp_nopwrite; \
__entry->zp_dedup_verify = zio->io_prop.zp_dedup_verify;

#define ZIO_TP_PRINTK_FMT \
"zio { type %u cmd %i prio %u size %llu orig_size %llu " \
"offset %llu timestamp %llu delta %llu delay %llu " \
"flags 0x%x stage 0x%x pipeline 0x%x orig_flags 0x%x " \
"orig_stage 0x%x orig_pipeline 0x%x reexecute %u " \
"txg %llu error %d ena %llu prop { checksum %u compress %u " \
"type %u level %u copies %u dedup %u dedup_verify %u nopwrite %u } }"

#define ZIO_TP_PRINTK_ARGS \
__entry->zio_type, __entry->zio_cmd, __entry->zio_priority, \
__entry->zio_size, __entry->zio_orig_size, __entry->zio_offset, \
__entry->zio_timestamp, __entry->zio_delta, __entry->zio_delay, \
__entry->zio_flags, __entry->zio_stage, __entry->zio_pipeline, \
__entry->zio_orig_flags, __entry->zio_orig_stage, \
__entry->zio_orig_pipeline, __entry->zio_reexecute, \
__entry->zio_txg, __entry->zio_error, __entry->zio_ena, \
__entry->zp_checksum, __entry->zp_compress, __entry->zp_type, \
__entry->zp_level, __entry->zp_copies, __entry->zp_dedup, \
__entry->zp_dedup_verify, __entry->zp_nopwrite

#endif /* _SYS_TRACE_COMMON_H */
Loading

0 comments on commit 26ef0cc

Please sign in to comment.