Skip to content

Commit

Permalink
3306 zdb should be able to issue reads in parallel
Browse files Browse the repository at this point in the history
3321 'zpool reopen' command should be documented in the man page and help message
Reviewed by: Adam Leventhal <[email protected]>
Reviewed by: Matt Ahrens <[email protected]>
Reviewed by: Christopher Siden <[email protected]>
Approved by: Garrett D'Amore <[email protected]>
  • Loading branch information
grwilson authored and Christopher Siden committed Nov 2, 2012
1 parent 5c68564 commit 31d7e8f
Show file tree
Hide file tree
Showing 10 changed files with 287 additions and 66 deletions.
106 changes: 77 additions & 29 deletions usr/src/cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ extern void dump_intent_log(zilog_t *);
uint64_t *zopt_object = NULL;
int zopt_objects = 0;
libzfs_handle_t *g_zfs;
uint64_t max_inflight = 200;

/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
Expand All @@ -108,13 +109,14 @@ usage(void)
{
(void) fprintf(stderr,
"Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
"poolname [object...]\n"
" %s [-divPA] [-e -p path...] dataset [object...]\n"
" %s -m [-LXFPA] [-t txg] [-e [-p path...]] "
"[-U config] [-M inflight I/Os] poolname [object...]\n"
" %s [-divPA] [-e -p path...] [-U config] dataset "
"[object...]\n"
" %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
"poolname [vdev [metaslab...]]\n"
" %s -R [-A] [-e [-p path...]] poolname "
"vdev:offset:size[:flags]\n"
" %s -S [-PA] [-e [-p path...]] poolname\n"
" %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
" %s -l [-uA] device\n"
" %s -C [-A] [-U config]\n\n",
cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
Expand Down Expand Up @@ -161,6 +163,8 @@ usage(void)
(void) fprintf(stderr, " -P print numbers in parseable form\n");
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n");
(void) fprintf(stderr, " -M <number of inflight I/Os> -- "
"specify the maximum number of checksumming I/Os [default is 200]");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
Expand Down Expand Up @@ -2028,6 +2032,45 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
}

static void
zdb_blkptr_done(zio_t *zio)
{
spa_t *spa = zio->io_spa;
blkptr_t *bp = zio->io_bp;
int ioerr = zio->io_error;
zdb_cb_t *zcb = zio->io_private;
zbookmark_t *zb = &zio->io_bookmark;

zio_data_buf_free(zio->io_data, zio->io_size);

mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
cv_broadcast(&spa->spa_scrub_io_cv);

if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
char blkbuf[BP_SPRINTF_LEN];

zcb->zcb_haderrors = 1;
zcb->zcb_errors[ioerr]++;

if (dump_opt['b'] >= 2)
sprintf_blkptr(blkbuf, bp);
else
blkbuf[0] = '\0';

(void) printf("zdb_blkptr_cb: "
"Got error %d reading "
"<%llu, %llu, %lld, %llx> %s -- skipping\n",
ioerr,
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
(u_longlong_t)zb->zb_level,
(u_longlong_t)zb->zb_blkid,
blkbuf);
}
mutex_exit(&spa->spa_scrub_lock);
}

/* ARGSUSED */
static int
zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
Expand All @@ -2049,39 +2092,23 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));

if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
int ioerr;
size_t size = BP_GET_PSIZE(bp);
void *data = malloc(size);
void *data = zio_data_buf_alloc(size);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;

/* If it's an intent log block, failure is expected. */
if (zb->zb_level == ZB_ZIL_LEVEL)
flags |= ZIO_FLAG_SPECULATIVE;

ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));

free(data);
mutex_enter(&spa->spa_scrub_lock);
while (spa->spa_scrub_inflight > max_inflight)
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
spa->spa_scrub_inflight++;
mutex_exit(&spa->spa_scrub_lock);

if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
zcb->zcb_haderrors = 1;
zcb->zcb_errors[ioerr]++;
zio_nowait(zio_read(NULL, spa, bp, data, size,
zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));

if (dump_opt['b'] >= 2)
sprintf_blkptr(blkbuf, bp);
else
blkbuf[0] = '\0';

(void) printf("zdb_blkptr_cb: "
"Got error %d reading "
"<%llu, %llu, %lld, %llx> %s -- skipping\n",
ioerr,
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
(u_longlong_t)zb->zb_level,
(u_longlong_t)zb->zb_blkid,
blkbuf);
}
}

zcb->zcb_readfails = 0;
Expand Down Expand Up @@ -2283,6 +2310,18 @@ dump_block_stats(spa_t *spa)

zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);

/*
* If we've traversed the data blocks then we need to wait for those
* I/Os to complete. We leverage "The Godfather" zio to wait on
* all async I/Os to complete.
*/
if (dump_opt['c']) {
(void) zio_wait(spa->spa_async_zio_root);
spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
ZIO_FLAG_GODFATHER);
}

if (zcb.zcb_haderrors) {
(void) printf("\nError counts:\n\n");
(void) printf("\t%5s %s\n", "errno", "count");
Expand Down Expand Up @@ -3040,7 +3079,7 @@ main(int argc, char **argv)

dprintf_setup(&argc, argv);

while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) {
switch (c) {
case 'b':
case 'c':
Expand Down Expand Up @@ -3069,6 +3108,15 @@ main(int argc, char **argv)
case 'v':
verbose++;
break;
case 'M':
max_inflight = strtoull(optarg, NULL, 0);
if (max_inflight == 0) {
(void) fprintf(stderr, "maximum number "
"of inflight I/Os must be greater "
"than 0\n");
usage();
}
break;
case 'p':
if (searchdirs == NULL) {
searchdirs = umem_alloc(sizeof (char *),
Expand Down
27 changes: 21 additions & 6 deletions usr/src/cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ get_usage(zpool_help_t idx) {
case HELP_REMOVE:
return (gettext("\tremove <pool> <device> ...\n"));
case HELP_REOPEN:
return (""); /* Undocumented command */
return (gettext("\treopen <pool>\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s] <pool> ...\n"));
case HELP_STATUS:
Expand Down Expand Up @@ -3550,22 +3550,37 @@ zpool_do_reguid(int argc, char **argv)
* zpool reopen <pool>
*
* Reopen the pool so that the kernel can update the sizes of all vdevs.
*
* NOTE: This command is currently undocumented. If the command is ever
* exposed then the appropriate usage() messages will need to be made.
*/
int
zpool_do_reopen(int argc, char **argv)
{
int c;
int ret = 0;
zpool_handle_t *zhp;
char *pool;

/* check options */
while ((c = getopt(argc, argv, "")) != -1) {
switch (c) {
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}

argc--;
argv++;

if (argc != 1)
return (2);
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name\n"));
usage(B_FALSE);
}

if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}

pool = argv[0];
if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)
Expand Down
45 changes: 45 additions & 0 deletions usr/src/lib/libzpool/common/kernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1012,3 +1012,48 @@ zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
{
return (0);
}

void
bioinit(buf_t *bp)
{
bzero(bp, sizeof (buf_t));
}

void
biodone(buf_t *bp)
{
if (bp->b_iodone != NULL) {
(*(bp->b_iodone))(bp);
return;
}
ASSERT((bp->b_flags & B_DONE) == 0);
bp->b_flags |= B_DONE;
}

void
bioerror(buf_t *bp, int error)
{
ASSERT(bp != NULL);
ASSERT(error >= 0);

if (error != 0) {
bp->b_flags |= B_ERROR;
} else {
bp->b_flags &= ~B_ERROR;
}
bp->b_error = error;
}


int
geterror(struct buf *bp)
{
int error = 0;

if (bp->b_flags & B_ERROR) {
error = bp->b_error;
if (!error)
error = EIO;
}
return (error);
}
30 changes: 30 additions & 0 deletions usr/src/lib/libzpool/common/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,36 @@ extern cyclic_id_t cyclic_add(cyc_handler_t *, cyc_time_t *);
extern void cyclic_remove(cyclic_id_t);
extern int cyclic_reprogram(cyclic_id_t, hrtime_t);

/*
* Buf structure
*/
#define B_BUSY 0x0001
#define B_DONE 0x0002
#define B_ERROR 0x0004
#define B_READ 0x0040 /* read when I/O occurs */
#define B_WRITE 0x0100 /* non-read pseudo-flag */

typedef struct buf {
int b_flags;
size_t b_bcount;
union {
caddr_t b_addr;
} b_un;

lldaddr_t _b_blkno;
#define b_lblkno _b_blkno._f
size_t b_resid;
size_t b_bufsize;
int (*b_iodone)(struct buf *);
int b_error;
void *b_private;
} buf_t;

extern void bioinit(buf_t *);
extern void biodone(buf_t *);
extern void bioerror(buf_t *, int);
extern int geterror(buf_t *);

#ifdef __cplusplus
}
#endif
Expand Down
30 changes: 22 additions & 8 deletions usr/src/man/man1m/zdb.1m
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
.\"
.\"
.\" Copyright 2012, Richard Lowe.
.\" Copyright (c) 2012 by Delphix. All rights reserved.
.\"
.TH "ZDB" "1M" "February 15, 2012" "" ""

Expand All @@ -19,21 +20,23 @@

.SH "SYNOPSIS"
\fBzdb\fR [-CumdibcsDvhLXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR]
\fIpoolname\fR [\fIobject\fR ...]
[-U \fIcache\fR] [-M \fIinflight I/Os\fR] [\fIpoolname\fR
[\fIobject\fR ...]]

.P
\fBzdb\fR [-divPA] [-e [-p \fIpath\fR...]] \fIdataset\fR [\fIobject\fR ...]
\fBzdb\fR [-divPA] [-e [-p \fIpath\fR...]] [-U \fIcache\fR]
\fIdataset\fR [\fIobject\fR ...]

.P
\fBzdb\fR -m [-LXFPA] [-t \fItxg\fR] [-e [-p \fIpath\fR...]] \fIpoolname\fR
[\fIvdev\fR [\fImetaslab\fR ...]]
\fBzdb\fR -m [-LXFPA] [-t \fItxg\fR] [-e [-p \fIpath\fR...]] [-U \fIcache\fR]
\fIpoolname\fR [\fIvdev\fR [\fImetaslab\fR ...]]

.P
\fBzdb\fR -R [-A] [-e [-p \fIpath\fR...]] \fIpoolname\fR
\fBzdb\fR -R [-A] [-e [-p \fIpath\fR...]] [-U \fIcache\fR] \fIpoolname\fR
\fIvdev\fR:\fIoffset\fR:\fIsize\fR[:\fIflags\fR]

.P
\fBzdb\fR -S [-AP] [-e [-p \fIpath\fR...]] \fIpoolname\fR
\fBzdb\fR -S [-AP] [-e [-p \fIpath\fR...]] [-U \fIcache\fR] \fIpoolname\fR

.P
\fBzdb\fR -l [-uA] \fIdevice\fR
Expand Down Expand Up @@ -354,6 +357,18 @@ Attempt to make an unreadable pool readable by trying progressively older
transactions.
.RE

.sp
.ne 2
.na
\fB-M \fIinflight I/Os\fR \fR
.ad
.sp .6
.RS 4n
Limit the number of outstanding checksum I/Os to the specified value. The
default value is 200. This option affects the performance of the \fB-c\fR
option.
.RE

.sp
.ne 2
.na
Expand Down Expand Up @@ -384,8 +399,7 @@ and their associated transaction numbers.
.ad
.sp .6
.RS 4n
Use a cache file other than \fB/etc/zfs/zpool.cache\fR. This option is only
valid with \fB-C\fR
Use a cache file other than \fB/etc/zfs/zpool.cache\fR.
.RE

.sp
Expand Down
Loading

0 comments on commit 31d7e8f

Please sign in to comment.