Skip to content

Commit

Permalink
Write label 2,3 uberblocks when vdev expands
Browse files Browse the repository at this point in the history
When vdev_psize increases, the location of labels 2 and 3 changes
because their location is relative to the end of the device.

The configs for labels 2 and 3 are written during the next spa_sync()
because the vdev is added to the dirty config list.  However, the
uberblock rings are not re-written in their new location, leaving the
device vulnerable to the beginning of the device being overwritten or
damaged.

This patch copies the uberblock ring from label 0 to labels 2 and 3,
in their new locations, at the next sync after vdev_psize increases.

Also, add a test zpool_expand_004_pos.ksh to confirm the uberblocks
are copied.

Reviewed-by: BearBabyLiu <[email protected]>
Reviewed-by: Andreas Dilger <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Olaf Faaland <[email protected]>
Closes openzfs#5108
  • Loading branch information
ofaaland authored and behlendorf committed May 2, 2017
1 parent e7fbeb6 commit 9d3f7b8
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 2 deletions.
1 change: 1 addition & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ struct vdev {
boolean_t vdev_cant_write; /* vdev is failing all writes */
boolean_t vdev_isspare; /* was a hot spare */
boolean_t vdev_isl2cache; /* was a l2cache device */
boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
Expand Down
7 changes: 7 additions & 0 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,13 @@ vdev_open(vdev_t *vd)
max_asize = max_osize;
}

/*
* If the vdev was expanded, record this so that we can re-create the
* uberblock rings in labels {2,3}, during the next sync.
*/
if ((psize > vd->vdev_psize) && (vd->vdev_psize != 0))
vd->vdev_copy_uberblocks = B_TRUE;

vd->vdev_psize = psize;

/*
Expand Down
61 changes: 61 additions & 0 deletions module/zfs/vdev_label.c
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,60 @@ vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
spa_config_exit(spa, SCL_ALL, FTAG);
}

/*
* For use when a leaf vdev is expanded.
* The location of labels 2 and 3 changed, and at the new location the
* uberblock rings are either empty or contain garbage. The sync will write
* new configs there because the vdev is dirty, but expansion also needs the
* uberblock rings copied. Read them from label 0 which did not move.
*
* Since the point is to populate labels {2,3} with valid uberblocks,
* we zero uberblocks we fail to read or which are not valid.
*/

static void
vdev_copy_uberblocks(vdev_t *vd)
{
abd_t *ub_abd;
zio_t *write_zio;
int locks = (SCL_L2ARC | SCL_ZIO);
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SPECULATIVE;

ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_READER) ==
SCL_STATE);
ASSERT(vd->vdev_ops->vdev_op_leaf);

spa_config_enter(vd->vdev_spa, locks, FTAG, RW_READER);

ub_abd = abd_alloc(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);

write_zio = zio_root(vd->vdev_spa, NULL, NULL, flags);
for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
const int src_label = 0;
zio_t *zio;

zio = zio_root(vd->vdev_spa, NULL, NULL, flags);
vdev_label_read(zio, vd, src_label, ub_abd,
VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
NULL, NULL, flags);

if (zio_wait(zio) || uberblock_verify(abd_to_buf(ub_abd)))
abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));

for (int l = 2; l < VDEV_LABELS; l++)
vdev_label_write(write_zio, vd, l, ub_abd,
VDEV_UBERBLOCK_OFFSET(vd, n),
VDEV_UBERBLOCK_SIZE(vd), NULL, NULL,
flags | ZIO_FLAG_DONT_PROPAGATE);
}
(void) zio_wait(write_zio);

spa_config_exit(vd->vdev_spa, locks, FTAG);

abd_free(ub_abd);
}

/*
* On success, increment root zio's count of good writes.
* We only get credit for writes to known-visible vdevs; see spa_vdev_add().
Expand Down Expand Up @@ -1163,6 +1217,13 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
if (!vdev_writeable(vd))
return;

/* If the vdev was expanded, need to copy uberblock rings. */
if (vd->vdev_state == VDEV_STATE_HEALTHY &&
vd->vdev_copy_uberblocks == B_TRUE) {
vdev_copy_uberblocks(vd);
vd->vdev_copy_uberblocks = B_FALSE;
}

n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);

/* Copy the uberblock_t into the ABD */
Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ tests = ['zpool_detach_001_neg']
# zpool_expand_001_pos - https://github.com/zfsonlinux/zfs/issues/2437
# zpool_expand_003_pos - https://github.com/zfsonlinux/zfs/issues/2437
[tests/functional/cli_root/zpool_expand]
tests = ['zpool_expand_002_pos']
tests = ['zpool_expand_002_pos', 'zpool_expand_004_pos']

# DISABLED:
# zpool_export_004_pos - https://github.com/zfsonlinux/zfs/issues/3484
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ dist_pkgdata_SCRIPTS = \
cleanup.ksh \
zpool_expand_001_pos.ksh \
zpool_expand_002_pos.ksh \
zpool_expand_003_neg.ksh
zpool_expand_003_neg.ksh \
zpool_expand_004_pos.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#! /bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#

#
# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
# Copyright (c) 2017 Lawrence Livermore National Security, LLC.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_expand/zpool_expand.cfg

#
# DESCRIPTION:
# After vdev expansion, all 4 labels have the same set of uberblocks.
#
#
# STRATEGY:
# 1) Create 3 files
# 2) Create a pool backed by the files
# 3) Expand the files' size with truncate
# 4) Use zpool online -e to expand the vdevs
# 5) Check that for all the devices, all 4 labels have the same uberblocks
#

verify_runnable "global"

function cleanup
{
if poolexists $TESTPOOL1; then
log_must zpool destroy $TESTPOOL1
fi

for i in 1 2 3; do
[ -e ${TEMPFILE}.$i ] && log_must rm ${TEMPFILE}.$i
done
}

log_onexit cleanup

log_assert "After vdev expansion, all 4 labels have the same set of uberblocks."

for type in " " mirror raidz raidz2; do
for i in 1 2 3; do
log_must truncate -s $org_size ${TEMPFILE}.$i
done

log_must zpool create $TESTPOOL1 $type $TEMPFILE.1 \
$TEMPFILE.2 $TEMPFILE.3

sync_pool $TESTPOOL1

for i in 1 2 3; do
log_must truncate -s $exp_size ${TEMPFILE}.$i
done

for i in 1 2 3; do
log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i
done

sync_pool $TESTPOOL1


for i in 1 2 3; do
non_uniform=$(zdb -lu ${TEMPFILE}.$i | \
grep 'labels = ' | \
grep -c -v 'labels = 0 1 2 3')

log_note "non-uniform label count: $non_uniform"

if [[ $non_uniform -ne 0 ]]; then
log_fail "After vdev expansion, all labels contents are not identical"
fi
done

log_must zpool destroy $TESTPOOL1
done

log_pass "After vdev expansion, all 4 labels have the same set of uberblocks."

0 comments on commit 9d3f7b8

Please sign in to comment.