From fe3eed4492c4e9af88bec4d309aa4729004e010c Mon Sep 17 00:00:00 2001 From: Don Brady Date: Tue, 16 Feb 2021 15:48:59 -0700 Subject: [PATCH 1/2] Checksum errors may not be counted Signed-off-by: Don Brady --- include/sys/spa.h | 3 +- module/zfs/spa.c | 8 +- module/zfs/vdev.c | 5 +- module/zfs/vdev_indirect.c | 10 +- module/zfs/vdev_raidz.c | 20 ++- module/zfs/zfs_fm.c | 46 +++++- module/zfs/zio.c | 11 +- tests/runfiles/common.run | 3 +- .../cli_root/zpool_events/Makefile.am | 3 +- .../zpool_events_clear_retained.ksh | 135 ++++++++++++++++++ .../zpool_events/zpool_events_duplicates.ksh | 11 -- 11 files changed, 213 insertions(+), 42 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh diff --git a/include/sys/spa.h b/include/sys/spa.h index 0762ae8a3e13..8391be8328b6 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2020 by Delphix. All rights reserved. + * Copyright (c) 2011, 2021 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. @@ -1150,6 +1150,7 @@ extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd, extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd, zio_t *zio); extern void zfs_ereport_taskq_fini(void); +extern void zfs_ereport_clear(spa_t *spa, vdev_t *vd); extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type, const char *name, nvlist_t *aux); extern void zfs_post_remove(spa_t *spa, vdev_t *vd); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 56354a107e66..b3a2a7277eb9 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2020 by Delphix. All rights reserved. + * Copyright (c) 2011, 2021 by Delphix. All rights reserved. * Copyright (c) 2018, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. @@ -9759,6 +9759,12 @@ void spa_event_notify(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, const char *name) { spa_event_post(spa_event_create(spa, vd, hist_nvl, name)); + + if (strcmp(name, ESC_ZFS_SCRUB_FINISH) == 0 || + strcmp(name, ESC_ZFS_RESILVER_FINISH) == 0) { + /* Clear recent events (i.e. duplicate events tracking) */ + zfs_ereport_clear(spa, vd == spa->spa_root_vdev ? NULL : vd); + } } /* state manipulation functions */ diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 36001e0a6626..e0b4c7d60e18 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2020 by Delphix. All rights reserved. + * Copyright (c) 2011, 2021 by Delphix. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Toomas Soome @@ -4170,6 +4170,9 @@ vdev_clear(spa_t *spa, vdev_t *vd) vd->vdev_parent->vdev_ops == &vdev_spare_ops && vd->vdev_parent->vdev_child[0] == vd) vd->vdev_unspare = B_TRUE; + + /* Clear recent events cache (i.e. duplicate events tracking) */ + zfs_ereport_clear(spa, vd == spa->spa_root_vdev ? NULL : vd); } boolean_t diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index b26d0993711a..416f4c54d8e8 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -1485,14 +1485,12 @@ vdev_indirect_all_checksum_errors(zio_t *zio) vdev_t *vd = ic->ic_vdev; - int ret = zfs_ereport_post_checksum(zio->io_spa, vd, + (void) zfs_ereport_post_checksum(zio->io_spa, vd, NULL, zio, is->is_target_offset, is->is_size, NULL, NULL, NULL); - if (ret != EALREADY) { - mutex_enter(&vd->vdev_stat_lock); - vd->vdev_stat.vs_checksum_errors++; - mutex_exit(&vd->vdev_stat_lock); - } + mutex_enter(&vd->vdev_stat_lock); + vd->vdev_stat.vs_checksum_errors++; + mutex_exit(&vd->vdev_stat_lock); } } } diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index f4812e61252c..57a594c80ce3 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -1852,14 +1852,12 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data) zbc.zbc_has_cksum = 0; zbc.zbc_injected = rm->rm_ecksuminjected; - int ret = zfs_ereport_post_checksum(zio->io_spa, vd, + (void) zfs_ereport_post_checksum(zio->io_spa, vd, &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size, rc->rc_abd, bad_data, &zbc); - if (ret != EALREADY) { - mutex_enter(&vd->vdev_stat_lock); - vd->vdev_stat.vs_checksum_errors++; - mutex_exit(&vd->vdev_stat_lock); - } + mutex_enter(&vd->vdev_stat_lock); + vd->vdev_stat.vs_checksum_errors++; + mutex_exit(&vd->vdev_stat_lock); } } @@ -2453,14 +2451,12 @@ vdev_raidz_io_done_unrecoverable(zio_t *zio) zbc.zbc_has_cksum = 0; zbc.zbc_injected = rm->rm_ecksuminjected; - int ret = zfs_ereport_start_checksum(zio->io_spa, + (void) zfs_ereport_start_checksum(zio->io_spa, cvd, &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size, (void *)(uintptr_t)c, &zbc); - if (ret != EALREADY) { - mutex_enter(&cvd->vdev_stat_lock); - cvd->vdev_stat.vs_checksum_errors++; - mutex_exit(&cvd->vdev_stat_lock); - } + mutex_enter(&cvd->vdev_stat_lock); + cvd->vdev_stat.vs_checksum_errors++; + mutex_exit(&cvd->vdev_stat_lock); } } } diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index ea71ef325c89..9e9f4a80ba1d 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2012,2020 by Delphix. All rights reserved. + * Copyright (c) 2012,2021 by Delphix. All rights reserved. */ #include @@ -247,6 +247,44 @@ zfs_ereport_schedule_cleaner(void) ddi_get_lbolt() + NSEC_TO_TICK(timeout)); } +/* + * Clear entries for a given vdev or all vdevs in a pool when vdev == NULL + */ +void +zfs_ereport_clear(spa_t *spa, vdev_t *vd) +{ + uint64_t vdev_guid, pool_guid; + int cnt = 0; + + ASSERT(vd != NULL || spa != NULL); + if (vd == NULL) { + vdev_guid = 0; + pool_guid = spa_guid(spa); + } else { + vdev_guid = vd->vdev_guid; + pool_guid = 0; + } + + mutex_enter(&recent_events_lock); + + recent_events_node_t *next = list_head(&recent_events_list); + while (next != NULL) { + recent_events_node_t *entry = next; + + next = list_next(&recent_events_list, next); + + if (entry->re_vdev_guid == vdev_guid || + entry->re_pool_guid == pool_guid) { + avl_remove(&recent_events_tree, entry); + list_remove(&recent_events_list, entry); + kmem_free(entry, sizeof (*entry)); + cnt++; + } + } + + mutex_exit(&recent_events_lock); +} + /* * Check if an ereport would be a duplicate of one recently posted. * @@ -951,6 +989,12 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, } return (eip); } +#else +/*ARGSUSED*/ +void +zfs_ereport_clear(spa_t *spa, vdev_t *vd) +{ +} #endif /* diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 538a2a2cdd9b..74d1595a85d3 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -4255,15 +4255,12 @@ zio_checksum_verify(zio_t *zio) zio->io_error = error; if (error == ECKSUM && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { - int ret = zfs_ereport_start_checksum(zio->io_spa, + (void) zfs_ereport_start_checksum(zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, zio->io_offset, zio->io_size, NULL, &info); - - if (ret != EALREADY) { - mutex_enter(&zio->io_vd->vdev_stat_lock); - zio->io_vd->vdev_stat.vs_checksum_errors++; - mutex_exit(&zio->io_vd->vdev_stat_lock); - } + mutex_enter(&zio->io_vd->vdev_stat_lock); + zio->io_vd->vdev_stat.vs_checksum_errors++; + mutex_exit(&zio->io_vd->vdev_stat_lock); } } diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 11960629e39e..836b9a606188 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -361,7 +361,8 @@ tags = ['functional', 'cli_root', 'zpool_detach'] [tests/functional/cli_root/zpool_events] tests = ['zpool_events_clear', 'zpool_events_cliargs', 'zpool_events_follow', - 'zpool_events_poolname', 'zpool_events_errors', 'zpool_events_duplicates'] + 'zpool_events_poolname', 'zpool_events_errors', 'zpool_events_duplicates', + 'zpool_events_clear_retained'] tags = ['functional', 'cli_root', 'zpool_events'] [tests/functional/cli_root/zpool_export] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am index 99c46f0143c2..765df102229d 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am @@ -11,7 +11,8 @@ dist_pkgdata_SCRIPTS = \ zpool_events_follow.ksh \ zpool_events_poolname.ksh \ zpool_events_errors.ksh \ - zpool_events_duplicates.ksh + zpool_events_duplicates.ksh \ + zpool_events_clear_retained.ksh dist_pkgdata_DATA = \ zpool_events.cfg \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh new file mode 100755 index 000000000000..fdf56b2cf9a6 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh @@ -0,0 +1,135 @@ +#!/bin/ksh -p +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. +# Copyright (c) 2021 by Delphix. All rights reserved. +# + +# DESCRIPTION: +# Verify that new errors after a pool scrub are considered a duplicate +# +# STRATEGY: +# 1. Create a raidz pool with a file +# 2. Inject garbage into one of the vdevs +# 3. Scrub the pool +# 4. Observe the checksum error counts +# 5. Repeat inject and pool scrub +# 6. Verify that second pass also produces similar errors (i.e. not +# treated as a duplicate) +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +MOUNTDIR=$TEST_BASE_DIR/mount +FILEPATH=$MOUNTDIR/target +VDEV1=$TEST_BASE_DIR/vfile1 +VDEV2=$TEST_BASE_DIR/vfile2 +VDEV3=$TEST_BASE_DIR/vfile3 +SUPPLY=$TEST_BASE_DIR/supply +POOL=test_pool +FILESIZE="15M" +DAMAGEBLKS=10 + +OLD_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX) +RETAIN_MAX=$(get_tunable ZEVENT_RETAIN_MAX) +OLD_CHECKSUMS=$(get_tunable CHECKSUM_EVENTS_PER_SECOND) + +EREPORTS="$STF_SUITE/tests/functional/cli_root/zpool_events/ereports" + +function cleanup +{ + log_must set_tunable64 CHECKSUM_EVENTS_PER_SECOND $OLD_CHECKSUMS + log_must set_tunable64 ZEVENT_LEN_MAX $OLD_LEN_MAX + + zpool events -c + if poolexists $POOL ; then + zpool export $POOL + fi + log_must rm -f $VDEV1 $VDEV2 $VDEV3 +} + +function damage_and_repair +{ + log_must zpool clear $POOL $VDEV1 + log_must zpool events -c + + log_note injecting damage to $VDEV1 + log_must dd conv=notrunc if=$SUPPLY of=$VDEV1 bs=1M seek=4 count=$DAMAGEBLKS + log_must zpool scrub $POOL + log_must zpool wait -t scrub $POOL + log_note "pass $1 observed $($EREPORTS | grep -c checksum) checksum ereports" + + repaired=$(zpool status $POOL | grep "scan: scrub repaired" | awk '{print $4}') + if [ "$repaired" == "0B" ]; then + log_fail "INVALID TEST -- expected scrub to repair some blocks" + else + log_note "$repaired repaired during scrub" + fi +} + +function checksum_error_count +{ + zpool status -p $POOL | grep $VDEV1 | awk '{print $5}' +} + +assertion="Damage to recently repaired blocks should be reported/counted" +log_assert "$assertion" +log_note "zevent retain max setting: $RETAIN_MAX" + +log_onexit cleanup + +# Set our threshold high to avoid dropping events. +set_tunable64 ZEVENT_LEN_MAX 20000 +set_tunable64 CHECKSUM_EVENTS_PER_SECOND 20000 + +# Initialize resources for the test +log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2 $VDEV3 +log_must dd if=/dev/urandom of=$SUPPLY bs=1M count=$DAMAGEBLKS +log_must mkdir -p $MOUNTDIR +log_must zpool create -f -m $MOUNTDIR -o failmode=continue $POOL raidz $VDEV1 $VDEV2 $VDEV3 +log_must zfs set compression=off recordsize=16k $POOL +# create a file full of zeros +log_must mkfile -v $FILESIZE $FILEPATH +log_must zpool sync $POOL + +# run once and observe the checksum errors +damage_and_repair 1 +errcnt=$(checksum_error_count) +log_note "$errcnt errors observed" +# set expectaton of at least 75% of what we observed in first pass +(( expected = (errcnt * 75) / 100 )) + +# run again and we should observe new checksum errors +damage_and_repair 2 +errcnt=$(checksum_error_count) + +log_must zpool destroy $POOL + +if (( errcnt < expected )); then + log_fail "FAILED -- expecting at least $expected checksum errors but only observed $errcnt" +else + log_note observed $errcnt new checksum errors after a scrub + log_pass "$assertion" +fi + diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh index 1ba7b1b34496..d4194a5b8f5b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh @@ -114,21 +114,10 @@ function do_dup_test if [ "$RW" == "write" ] ; then log_must mkfile $FILESIZE $FILEPATH log_must zpool sync $POOL - else - # scrub twice to generate some duplicates - log_must zpool scrub $POOL - log_must zpool wait -t scrub $POOL - log_must zpool scrub $POOL - log_must zpool wait -t scrub $POOL fi log_must zinject -c all - # Wait for the pool to settle down and finish resilvering (if - # necessary). We want the errors to stop incrementing before we - # check for duplicates. - zpool wait -t resilver $POOL - ereports="$($EREPORTS | sort)" actual=$(echo "$ereports" | wc -l) unique=$(echo "$ereports" | uniq | wc -l) From 6a379cfb52aacc4b6e1946287c49b0a54e8fe3f0 Mon Sep 17 00:00:00 2001 From: Don Brady Date: Wed, 17 Feb 2021 16:04:36 -0700 Subject: [PATCH 2/2] Use dsl_scan_done() & vdev_rebuild_complete_sync() Signed-off-by: Don Brady --- module/zfs/dsl_scan.c | 6 +++++- module/zfs/spa.c | 8 +------- module/zfs/vdev.c | 4 ++-- module/zfs/vdev_rebuild.c | 3 +++ 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 40adfbcee4e1..a54cd6ca800e 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2021 by Delphix. All rights reserved. * Copyright 2016 Gary Mills * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. @@ -987,6 +987,10 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) (u_longlong_t)spa_get_errlog_size(spa)); spa_async_request(spa, SPA_ASYNC_RESILVER); } + + /* Clear recent error events (i.e. duplicate events tracking) */ + if (complete) + zfs_ereport_clear(spa, NULL); } scn->scn_phys.scn_end_time = gethrestime_sec(); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index b3a2a7277eb9..56354a107e66 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2021 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2018, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. @@ -9759,12 +9759,6 @@ void spa_event_notify(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, const char *name) { spa_event_post(spa_event_create(spa, vd, hist_nvl, name)); - - if (strcmp(name, ESC_ZFS_SCRUB_FINISH) == 0 || - strcmp(name, ESC_ZFS_RESILVER_FINISH) == 0) { - /* Clear recent events (i.e. duplicate events tracking) */ - zfs_ereport_clear(spa, vd == spa->spa_root_vdev ? NULL : vd); - } } /* state manipulation functions */ diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index e0b4c7d60e18..64016311b9cf 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -4171,8 +4171,8 @@ vdev_clear(spa_t *spa, vdev_t *vd) vd->vdev_parent->vdev_child[0] == vd) vd->vdev_unspare = B_TRUE; - /* Clear recent events cache (i.e. duplicate events tracking) */ - zfs_ereport_clear(spa, vd == spa->spa_root_vdev ? NULL : vd); + /* Clear recent error events cache (i.e. duplicate events tracking) */ + zfs_ereport_clear(spa, vd); } boolean_t diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c index 784d1af15a81..037abc01f7a7 100644 --- a/module/zfs/vdev_rebuild.c +++ b/module/zfs/vdev_rebuild.c @@ -338,6 +338,9 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx) } cv_broadcast(&vd->vdev_rebuild_cv); + + /* Clear recent error events (i.e. duplicate events tracking) */ + zfs_ereport_clear(spa, NULL); } /*