Skip to content

Commit

Permalink
=> openzfs#2598: dweeezil/illumos-4970-to-4974 - illumos 4970 through…
Browse files Browse the repository at this point in the history
… 4974 - extreme rewind enhancements.

illumos 4970 through 4974 - extreme rewind enhancements

4970 need controls on i/o issued by zpool import -XF
4971 zpool import -T should accept hex values
4972 zpool import -T implies extreme rewind, and thus a scrub
4973 spa_load_retry retries the same txg
4974 spa_load_verify() reads all data twice
Reviewed by: Christopher Siden <[email protected]>
Reviewed by: Dan McDonald <[email protected]>
Reviewed by: George Wilson <[email protected]>
Approved by: Robert Mustacchi <[email protected]>

References:
    https://www.illumos.org/issues/4970
    https://www.illumos.org/issues/4971
    https://www.illumos.org/issues/4972
    https://www.illumos.org/issues/4973
    https://www.illumos.org/issues/4974

Notes:
    This set of patches adds a set of tunable parameters for the "extreme
    rewind" mode of pool import which allows control over the implicit
    scrub performed during such an import.

Ported by: Tim Chase <[email protected]>
  • Loading branch information
ahrens authored and FransUrbo committed Aug 19, 2014
1 parent c3a3b6f commit 4fe5a22
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 15 deletions.
4 changes: 2 additions & 2 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
* Copyright (c) 2012 by Cyril Plisko. All rights reserved.
*/
Expand Down Expand Up @@ -2082,7 +2082,7 @@ zpool_do_import(int argc, char **argv)

case 'T':
errno = 0;
txg = strtoull(optarg, &endptr, 10);
txg = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0') {
(void) fprintf(stderr,
gettext("invalid txg value\n"));
Expand Down
47 changes: 47 additions & 0 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,53 @@ they operate close to quota or capacity limits.
Default value: 24
.RE

.sp
.ne 2
.na
\fBspa_load_verify_data\fR (int)
.ad
.RS 12n
Whether to traverse data blocks during an "extreme rewind" (\fB-FX\fR or
\fB-FXT <txg>\fR) pool import. Use 0 to disable and 1 to enable.

An extreme rewind import normally performs a full scrub of the pool.
If this parameter is set to 1, the scrub skips data (non-metadata) blocks.
It can be set to 1 prior to the import to completely disable scrubbing
data blocks or it can be set to 1 once the import has started to stop
scrubbing data blocks.
.sp
Default value: 1
.RE

.sp
.ne 2
.na
\fBspa_load_verify_metadata\fR (int)
.ad
.RS 12n
Whether to traverse metadata blocks during an "extreme rewind" (\fB-FX\fR
or \fB-FXT <txg>\fR) pool import. Use 0 to disable and 1 to enable.

An extreme rewind import normally performs a full scrub of the pool.
If this parameter is set to 1, the scrub is not performed. It can be
set to 1 prior to the import to completely disable the scrub or it can be
set to 1 once the import has started to stop a partially-completed scrub.
.sp
Default value: 1
.RE

.sp
.ne 2
.na
\fBspa_load_verify_maxinflight\fR (int)
.ad
.RS 12n
Maximum concurrent scrub I/Os during an "extreme rewind" (\fB-FX\fR or
\fB-FXT <txg>\fR) pool import.
.sp
Default value: 10000
.RE

.sp
.ne 2
.na
Expand Down
79 changes: 66 additions & 13 deletions module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -1855,6 +1855,7 @@ spa_load_verify_done(zio_t *zio)
spa_load_error_t *sle = zio->io_private;
dmu_object_type_t type = BP_GET_TYPE(bp);
int error = zio->io_error;
spa_t *spa = zio->io_spa;

if (error) {
if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
Expand All @@ -1864,23 +1865,56 @@ spa_load_verify_done(zio_t *zio)
atomic_add_64(&sle->sle_data_count, 1);
}
zio_data_buf_free(zio->io_data, zio->io_size);

mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
cv_broadcast(&spa->spa_scrub_io_cv);
mutex_exit(&spa->spa_scrub_lock);
}

/*
* Maximum number of concurrent scrub i/os to create while verifying
* a pool while importing it.
*/
int spa_load_verify_maxinflight = 10000;
int spa_load_verify_metadata = B_TRUE;
int spa_load_verify_data = B_TRUE;

/*ARGSUSED*/
static int
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
zio_t *rio = arg;
size_t size = BP_GET_PSIZE(bp);
void *data = zio_data_buf_alloc(size);
zio_t *rio;
size_t size;
void *data;

zio_nowait(zio_read(rio, spa, bp, data, size,
spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
}
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);
/*
* Note: normally this routine will not be called if
* spa_load_verify_metadata is not set. However, it may be useful
* to manually set the flag after the traversal has begun.
*/
if (!spa_load_verify_metadata)
return (0);
if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data)
return (0);

rio = arg;
size = BP_GET_PSIZE(bp);
data = zio_data_buf_alloc(size);

mutex_enter(&spa->spa_scrub_lock);
while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight)
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
spa->spa_scrub_inflight++;
mutex_exit(&spa->spa_scrub_lock);

zio_nowait(zio_read(rio, spa, bp, data, size,
spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
return (0);
}

Expand All @@ -1891,7 +1925,7 @@ spa_load_verify(spa_t *spa)
spa_load_error_t sle = { 0 };
zpool_rewind_policy_t policy;
boolean_t verify_ok = B_FALSE;
int error;
int error = 0;

zpool_get_rewind_policy(spa->spa_config, &policy);

Expand All @@ -1901,8 +1935,11 @@ spa_load_verify(spa_t *spa)
rio = zio_root(spa, NULL, &sle,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);

error = traverse_pool(spa, spa->spa_verify_min_txg,
TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
if (spa_load_verify_metadata) {
error = traverse_pool(spa, spa->spa_verify_min_txg,
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
spa_load_verify_cb, rio);
}

(void) zio_wait(rio);

Expand Down Expand Up @@ -2781,7 +2818,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
spa_unload(spa);
spa_deactivate(spa);

spa->spa_load_max_txg--;
spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1;

spa_activate(spa, mode);
spa_async_suspend(spa);
Expand Down Expand Up @@ -2811,6 +2848,8 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
spa_set_log_state(spa, SPA_LOG_CLEAR);
} else {
spa->spa_load_max_txg = max_request;
if (max_request != UINT64_MAX)
spa->spa_extreme_rewind = B_TRUE;
}

load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
Expand Down Expand Up @@ -6603,3 +6642,17 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
/* asynchronous event notification */
EXPORT_SYMBOL(spa_event_notify);
#endif

#if defined(_KERNEL) && defined(HAVE_SPL)
module_param(spa_load_verify_maxinflight, int, 0644);
MODULE_PARM_DESC(spa_load_verify_maxinflight,
"Maximum concurrent scrub I/Os while verifying pool during import");

module_param(spa_load_verify_metadata, int, 0644);
MODULE_PARM_DESC(spa_load_verify_metadata,
"Set to traverse metadata on pool import");

module_param(spa_load_verify_data, int, 0644);
MODULE_PARM_DESC(spa_load_verify_data,
"Set to traverse data on pool import");
#endif

0 comments on commit 4fe5a22

Please sign in to comment.