From dd19821149cb7e3785249eb9be75dd9864c88d56 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 8 May 2023 11:17:41 -0700 Subject: [PATCH 1/3] zdb: consistent xattr output When using zdb to output the value of an xattr only interpret it as printable characters if the entire byte array is printable. Additionally, if the --parseable option is set always output the buffer contents as octal for easy parsing. Reviewed-by: Olaf Faaland Signed-off-by: Brian Behlendorf Closes #14830 --- cmd/zdb/zdb.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index ec5d1acacf85..cea80b690841 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -3322,13 +3322,22 @@ dump_znode_sa_xattr(sa_handle_t *hdl) (void) printf("\tSA xattrs: %d bytes, %d entries\n\n", sa_xattr_size, sa_xattr_entries); while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) { + boolean_t can_print = !dump_opt['P']; uchar_t *value; uint_t cnt, idx; (void) printf("\t\t%s = ", nvpair_name(elem)); nvpair_value_byte_array(elem, &value, &cnt); + + for (idx = 0; idx < cnt; ++idx) { + if (!isprint(value[idx])) { + can_print = B_FALSE; + break; + } + } + for (idx = 0; idx < cnt; ++idx) { - if (isprint(value[idx])) + if (can_print) (void) putchar(value[idx]); else (void) printf("\\%3.3o", value[idx]); From 3095ca91c261756c509d0afb4422027753e68c90 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Mon, 8 May 2023 11:20:23 -0700 Subject: [PATCH 2/3] Verify block pointers before writing them out If a block pointer is corrupted (but the block containing it checksums correctly, e.g. due to a bug that overwrites random memory), we can often detect it before the block is read, with the `zfs_blkptr_verify()` function, which is used in `arc_read()`, `zio_free()`, etc. However, such corruption is not typically recoverable. To recover from it we would need to detect the memory error before the block pointer is written to disk. This PR verifies BP's that are contained in indirect blocks and dnodes before they are written to disk, in `dbuf_write_ready()`. This way, we'll get a panic before the on-disk data is corrupted. This will help us to diagnose what's causing the corruption, as well as being much easier to recover from. To minimize performance impact, only checks that can be done without holding the spa_config_lock are performed. Additionally, when corruption is detected, the raw words of the block pointer are logged. (Note that `dprintf_bp()` is a no-op by default, but if enabled it is not safe to use with invalid block pointers.) Reviewed-by: Rich Ercolani Reviewed-by: Brian Behlendorf Reviewed-by: Paul Zuchowski Reviewed-by: Alexander Motin Signed-off-by: Matthew Ahrens Closes #14817 --- cmd/zdb/zdb.c | 8 ++-- include/sys/zio.h | 8 +++- module/zfs/arc.c | 4 +- module/zfs/dbuf.c | 16 ++++++++ module/zfs/dsl_scan.c | 3 +- module/zfs/spa.c | 2 +- module/zfs/zio.c | 92 +++++++++++++++++++++++++++++++------------ 7 files changed, 98 insertions(+), 35 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index cea80b690841..5ab13b470dc0 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -8499,8 +8499,8 @@ zdb_read_block(char *thing, spa_t *spa) !(flags & ZDB_FLAG_DECOMPRESS)) { const blkptr_t *b = (const blkptr_t *)(void *) ((uintptr_t)buf + (uintptr_t)blkptr_offset); - if (zfs_blkptr_verify(spa, b, B_FALSE, BLK_VERIFY_ONLY) == - B_FALSE) { + if (zfs_blkptr_verify(spa, b, + BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY) == B_FALSE) { abd_return_buf_copy(pabd, buf, lsize); borrowed = B_FALSE; buf = lbuf; @@ -8508,8 +8508,8 @@ zdb_read_block(char *thing, spa_t *spa) lbuf, lsize, psize, flags); b = (const blkptr_t *)(void *) ((uintptr_t)buf + (uintptr_t)blkptr_offset); - if (failed || zfs_blkptr_verify(spa, b, B_FALSE, - BLK_VERIFY_LOG) == B_FALSE) { + if (failed || zfs_blkptr_verify(spa, b, + BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) == B_FALSE) { printf("invalid block pointer at this DVA\n"); goto out; } diff --git a/include/sys/zio.h b/include/sys/zio.h index 3463682a1065..695bc09e6cb7 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -531,6 +531,12 @@ enum blk_verify_flag { BLK_VERIFY_HALT }; +enum blk_config_flag { + BLK_CONFIG_HELD, // SCL_VDEV held for writer + BLK_CONFIG_NEEDED, // SCL_VDEV should be obtained for reader + BLK_CONFIG_SKIP, // skip checks which require SCL_VDEV +}; + extern int zio_bookmark_compare(const void *, const void *); extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, @@ -646,7 +652,7 @@ extern int zio_resume(spa_t *spa); extern void zio_resume_wait(spa_t *spa); extern boolean_t zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, - boolean_t config_held, enum blk_verify_flag blk_verify); + enum blk_config_flag blk_config, enum blk_verify_flag blk_verify); /* * Initial setup and teardown. diff --git a/module/zfs/arc.c b/module/zfs/arc.c index c50228a2682f..bf8d99f94c39 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -5696,8 +5696,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, * and treat it as a checksum error. This allows an alternate blkptr * to be tried when one is available (e.g. ditto blocks). */ - if (!zfs_blkptr_verify(spa, bp, zio_flags & ZIO_FLAG_CONFIG_WRITER, - BLK_VERIFY_LOG)) { + if (!zfs_blkptr_verify(spa, bp, (zio_flags & ZIO_FLAG_CONFIG_WRITER) ? + BLK_CONFIG_HELD : BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) { rc = SET_ERROR(ECKSUM); goto done; } diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 8193fb244079..6a50f1927add 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -4636,6 +4636,20 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) i += DNODE_MIN_SIZE; if (dnp->dn_type != DMU_OT_NONE) { fill++; + for (int j = 0; j < dnp->dn_nblkptr; + j++) { + (void) zfs_blkptr_verify(spa, + &dnp->dn_blkptr[j], + BLK_CONFIG_SKIP, + BLK_VERIFY_HALT); + } + if (dnp->dn_flags & + DNODE_FLAG_SPILL_BLKPTR) { + (void) zfs_blkptr_verify(spa, + DN_SPILL_BLKPTR(dnp), + BLK_CONFIG_SKIP, + BLK_VERIFY_HALT); + } i += dnp->dn_extra_slots * DNODE_MIN_SIZE; } @@ -4653,6 +4667,8 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, ibp++) { if (BP_IS_HOLE(ibp)) continue; + (void) zfs_blkptr_verify(spa, ibp, + BLK_CONFIG_SKIP, BLK_VERIFY_HALT); fill += BP_GET_FILL(ibp); } } diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index d6a9365df120..d398b6705551 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -1970,7 +1970,8 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, DMU_USERUSED_OBJECT, tx); } arc_buf_destroy(buf, &buf); - } else if (!zfs_blkptr_verify(spa, bp, B_FALSE, BLK_VERIFY_LOG)) { + } else if (!zfs_blkptr_verify(spa, bp, + BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) { /* * Sanity check the block pointer contents, this is handled * by arc_read() for the cases above. diff --git a/module/zfs/spa.c b/module/zfs/spa.c index c2a67fbc7c55..16396170273c 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -2387,7 +2387,7 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, * When damaged consider it to be a metadata error since we cannot * trust the BP_GET_TYPE and BP_GET_LEVEL values. */ - if (!zfs_blkptr_verify(spa, bp, B_FALSE, BLK_VERIFY_LOG)) { + if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) { atomic_inc_64(&sle->sle_meta_count); return (0); } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 0924fb6f40bc..365d34832c3a 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -935,9 +935,35 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp, (void) vsnprintf(buf, sizeof (buf), fmt, adx); va_end(adx); + zfs_dbgmsg("bad blkptr at %px: " + "DVA[0]=%#llx/%#llx " + "DVA[1]=%#llx/%#llx " + "DVA[2]=%#llx/%#llx " + "prop=%#llx " + "pad=%#llx,%#llx " + "phys_birth=%#llx " + "birth=%#llx " + "fill=%#llx " + "cksum=%#llx/%#llx/%#llx/%#llx", + bp, + (long long)bp->blk_dva[0].dva_word[0], + (long long)bp->blk_dva[0].dva_word[1], + (long long)bp->blk_dva[1].dva_word[0], + (long long)bp->blk_dva[1].dva_word[1], + (long long)bp->blk_dva[2].dva_word[0], + (long long)bp->blk_dva[2].dva_word[1], + (long long)bp->blk_prop, + (long long)bp->blk_pad[0], + (long long)bp->blk_pad[1], + (long long)bp->blk_phys_birth, + (long long)bp->blk_birth, + (long long)bp->blk_fill, + (long long)bp->blk_cksum.zc_word[0], + (long long)bp->blk_cksum.zc_word[1], + (long long)bp->blk_cksum.zc_word[2], + (long long)bp->blk_cksum.zc_word[3]); switch (blk_verify) { case BLK_VERIFY_HALT: - dprintf_bp(bp, "blkptr at %p dprintf_bp():", bp); zfs_panic_recover("%s: %s", spa_name(spa), buf); break; case BLK_VERIFY_LOG: @@ -958,47 +984,54 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp, * If everything checks out B_TRUE is returned. The zfs_blkptr_verify * argument controls the behavior when an invalid field is detected. * - * Modes for zfs_blkptr_verify: - * 1) BLK_VERIFY_ONLY (evaluate the block) - * 2) BLK_VERIFY_LOG (evaluate the block and log problems) - * 3) BLK_VERIFY_HALT (call zfs_panic_recover on error) + * Values for blk_verify_flag: + * BLK_VERIFY_ONLY: evaluate the block + * BLK_VERIFY_LOG: evaluate the block and log problems + * BLK_VERIFY_HALT: call zfs_panic_recover on error + * + * Values for blk_config_flag: + * BLK_CONFIG_HELD: caller holds SCL_VDEV for writer + * BLK_CONFIG_NEEDED: caller holds no config lock, SCL_VDEV will be + * obtained for reader + * BLK_CONFIG_SKIP: skip checks which require SCL_VDEV, for better + * performance */ boolean_t -zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held, - enum blk_verify_flag blk_verify) +zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, + enum blk_config_flag blk_config, enum blk_verify_flag blk_verify) { int errors = 0; if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %p has invalid TYPE %llu", + "blkptr at %px has invalid TYPE %llu", bp, (longlong_t)BP_GET_TYPE(bp)); } if (BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %p has invalid CHECKSUM %llu", + "blkptr at %px has invalid CHECKSUM %llu", bp, (longlong_t)BP_GET_CHECKSUM(bp)); } if (BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_FUNCTIONS) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %p has invalid COMPRESS %llu", + "blkptr at %px has invalid COMPRESS %llu", bp, (longlong_t)BP_GET_COMPRESS(bp)); } if (BP_GET_LSIZE(bp) > SPA_MAXBLOCKSIZE) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %p has invalid LSIZE %llu", + "blkptr at %px has invalid LSIZE %llu", bp, (longlong_t)BP_GET_LSIZE(bp)); } if (BP_GET_PSIZE(bp) > SPA_MAXBLOCKSIZE) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %p has invalid PSIZE %llu", + "blkptr at %px has invalid PSIZE %llu", bp, (longlong_t)BP_GET_PSIZE(bp)); } if (BP_IS_EMBEDDED(bp)) { if (BPE_GET_ETYPE(bp) >= NUM_BP_EMBEDDED_TYPES) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %p has invalid ETYPE %llu", + "blkptr at %px has invalid ETYPE %llu", bp, (longlong_t)BPE_GET_ETYPE(bp)); } } @@ -1010,10 +1043,19 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held, if (!spa->spa_trust_config) return (errors == 0); - if (!config_held) - spa_config_enter(spa, SCL_VDEV, bp, RW_READER); - else + switch (blk_config) { + case BLK_CONFIG_HELD: ASSERT(spa_config_held(spa, SCL_VDEV, RW_WRITER)); + break; + case BLK_CONFIG_NEEDED: + spa_config_enter(spa, SCL_VDEV, bp, RW_READER); + break; + case BLK_CONFIG_SKIP: + return (errors == 0); + default: + panic("invalid blk_config %u", blk_config); + } + /* * Pool-specific checks. * @@ -1028,20 +1070,20 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held, if (vdevid >= spa->spa_root_vdev->vdev_children) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %p DVA %u has invalid VDEV %llu", + "blkptr at %px DVA %u has invalid VDEV %llu", bp, i, (longlong_t)vdevid); continue; } vdev_t *vd = spa->spa_root_vdev->vdev_child[vdevid]; if (vd == NULL) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %p DVA %u has invalid VDEV %llu", + "blkptr at %px DVA %u has invalid VDEV %llu", bp, i, (longlong_t)vdevid); continue; } if (vd->vdev_ops == &vdev_hole_ops) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %p DVA %u has hole VDEV %llu", + "blkptr at %px DVA %u has hole VDEV %llu", bp, i, (longlong_t)vdevid); continue; } @@ -1059,13 +1101,11 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held, asize = vdev_gang_header_asize(vd); if (offset + asize > vd->vdev_asize) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, - "blkptr at %p DVA %u has invalid OFFSET %llu", + "blkptr at %px DVA %u has invalid OFFSET %llu", bp, i, (longlong_t)offset); } } - if (errors > 0) - dprintf_bp(bp, "blkptr at %p dprintf_bp():", bp); - if (!config_held) + if (blk_config == BLK_CONFIG_NEEDED) spa_config_exit(spa, SCL_VDEV, bp); return (errors == 0); @@ -1203,7 +1243,7 @@ void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) { - (void) zfs_blkptr_verify(spa, bp, B_FALSE, BLK_VERIFY_HALT); + (void) zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_HALT); /* * The check for EMBEDDED is a performance optimization. We @@ -1282,8 +1322,8 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, { zio_t *zio; - (void) zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER, - BLK_VERIFY_HALT); + (void) zfs_blkptr_verify(spa, bp, (flags & ZIO_FLAG_CONFIG_WRITER) ? + BLK_CONFIG_HELD : BLK_CONFIG_NEEDED, BLK_VERIFY_HALT); if (BP_IS_EMBEDDED(bp)) return (zio_null(pio, spa, NULL, NULL, NULL, 0)); From 4eca03faaf6a1c05d739c738e3d5c0df2931da98 Mon Sep 17 00:00:00 2001 From: George Amanakis Date: Mon, 8 May 2023 22:35:03 +0200 Subject: [PATCH 3/3] Fixes in head_errlog feature with encryption For the head_errlog feature use dsl_dataset_hold_obj_flags() instead of dsl_dataset_hold_obj() in order to enable access to the encryption keys (if loaded). This enables reporting of errors in encrypted filesystems which are not mounted but have their keys loaded. Reviewed-by: Brian Behlendorf Signed-off-by: George Amanakis Closes #14837 --- man/man7/zpool-features.7 | 7 +- module/zfs/spa_errlog.c | 76 ++++++++----------- .../zfs_receive/zfs_receive_corrective.ksh | 19 ++++- .../zpool_status/zpool_status_005_pos.ksh | 6 +- 4 files changed, 56 insertions(+), 52 deletions(-) diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7 index efe9e833996a..2b7dcb63829c 100644 --- a/man/man7/zpool-features.7 +++ b/man/man7/zpool-features.7 @@ -562,13 +562,12 @@ This feature enables the upgraded version of errlog, which required an on-disk error log format change. Now the error log of each head dataset is stored separately in the zap object and keyed by the head id. -In case of encrypted filesystems with unloaded keys or unmounted encrypted -filesystems we are unable to check their snapshots or clones for errors and -these will not be reported. -In this case no filenames will be reported either. With this feature enabled, every dataset affected by an error block is listed in the output of .Nm zpool Cm status . +In case of encrypted filesystems with unloaded keys we are unable to check +their snapshots or clones for errors and these will not be reported. +An "access denied" error will be reported. .Pp \*[instant-never] . diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c index e0604c4a84af..44950a769d3b 100644 --- a/module/zfs/spa_errlog.c +++ b/module/zfs/spa_errlog.c @@ -163,15 +163,15 @@ name_to_object(char *buf, uint64_t *obj) static int get_head_ds(spa_t *spa, uint64_t dsobj, uint64_t *head_ds) { dsl_dataset_t *ds; - int error = dsl_dataset_hold_obj(spa->spa_dsl_pool, - dsobj, FTAG, &ds); + int error = dsl_dataset_hold_obj_flags(spa->spa_dsl_pool, + dsobj, DS_HOLD_FLAG_DECRYPT, FTAG, &ds); if (error != 0) return (error); ASSERT(head_ds); *head_ds = dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj; - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); return (error); } @@ -297,7 +297,8 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, dsl_dataset_t *ds; dsl_pool_t *dp = spa->spa_dsl_pool; - int error = dsl_dataset_hold_obj(dp, head_ds, FTAG, &ds); + int error = dsl_dataset_hold_obj_flags(dp, head_ds, + DS_HOLD_FLAG_DECRYPT, FTAG, &ds); if (error != 0) return (error); @@ -306,23 +307,6 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, boolean_t check_snapshot = B_TRUE; error = find_birth_txg(ds, zep, &latest_txg); - /* - * If the filesystem is encrypted and the key is not loaded - * or the encrypted filesystem is not mounted the error will be EACCES. - * In that case report an error in the head filesystem and return. - */ - if (error == EACCES) { - dsl_dataset_rele(ds, FTAG); - zbookmark_phys_t zb; - zep_to_zb(head_ds, zep, &zb); - error = copyout_entry(&zb, uaddr, count); - if (error != 0) { - dsl_dataset_rele(ds, FTAG); - return (error); - } - return (0); - } - /* * If find_birth_txg() errors out otherwise, let txg_to_consider be * equal to the spa's syncing txg: if check_filesystem() errors out @@ -334,7 +318,7 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, zep_to_zb(head_ds, zep, &zb); error = copyout_entry(&zb, uaddr, count); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); return (error); } check_snapshot = B_FALSE; @@ -352,14 +336,14 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); return (error); } } if (snap_count == 0) { /* Filesystem without snapshots. */ - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); return (0); } @@ -371,20 +355,21 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, uint64_t snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; uint64_t zap_clone = dsl_dir_phys(ds->ds_dir)->dd_clones; - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); /* Check only snapshots created from this file system. */ while (snap_obj != 0 && zep->zb_birth < snap_obj_txg && snap_obj_txg <= txg_to_consider) { - error = dsl_dataset_hold_obj(dp, snap_obj, FTAG, &ds); + error = dsl_dataset_hold_obj_flags(dp, snap_obj, + DS_HOLD_FLAG_DECRYPT, FTAG, &ds); if (error != 0) goto out; if (dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj != head_ds) { snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); continue; } @@ -404,13 +389,14 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, zep_to_zb(snap_obj, zep, &zb); error = copyout_entry(&zb, uaddr, count); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, + FTAG); goto out; } } snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); } if (zap_clone == 0 || aff_snap_count == 0) @@ -428,8 +414,8 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, zap_cursor_advance(zc)) { dsl_dataset_t *clone; - error = dsl_dataset_hold_obj(dp, za->za_first_integer, - FTAG, &clone); + error = dsl_dataset_hold_obj_flags(dp, za->za_first_integer, + DS_HOLD_FLAG_DECRYPT, FTAG, &clone); if (error != 0) break; @@ -444,7 +430,7 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, == snap_obj_array[i]) found = B_TRUE; } - dsl_dataset_rele(clone, FTAG); + dsl_dataset_rele_flags(clone, DS_HOLD_FLAG_DECRYPT, FTAG); if (!found) continue; @@ -474,14 +460,14 @@ find_top_affected_fs(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep, return (error); dsl_dataset_t *ds; - error = dsl_dataset_hold_obj(spa->spa_dsl_pool, oldest_dsobj, - FTAG, &ds); + error = dsl_dataset_hold_obj_flags(spa->spa_dsl_pool, oldest_dsobj, + DS_HOLD_FLAG_DECRYPT, FTAG, &ds); if (error != 0) return (error); *top_affected_fs = dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj; - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); return (0); } @@ -744,7 +730,8 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj, dsl_dataset_t *ds; objset_t *os; - int error = dsl_dataset_hold_obj(dp, zb.zb_objset, FTAG, &ds); + int error = dsl_dataset_hold_obj_flags(dp, zb.zb_objset, + DS_HOLD_FLAG_DECRYPT, FTAG, &ds); if (error != 0) continue; @@ -759,7 +746,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj, * truly persistent, it should re-appear after a scan. */ if (dmu_objset_from_ds(ds, &os) != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); continue; } @@ -767,7 +754,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj, blkptr_t bp; if (dnode_hold(os, zep.zb_object, FTAG, &dn) != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); continue; } @@ -781,7 +768,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj, rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); if (error != 0 || BP_IS_HOLE(&bp)) continue; @@ -1259,7 +1246,8 @@ find_txg_ancestor_snapshot(spa_t *spa, uint64_t new_head, uint64_t old_head, dsl_dataset_t *ds; dsl_pool_t *dp = spa->spa_dsl_pool; - int error = dsl_dataset_hold_obj(dp, old_head, FTAG, &ds); + int error = dsl_dataset_hold_obj_flags(dp, old_head, + DS_HOLD_FLAG_DECRYPT, FTAG, &ds); if (error != 0) return (error); @@ -1267,9 +1255,9 @@ find_txg_ancestor_snapshot(spa_t *spa, uint64_t new_head, uint64_t old_head, uint64_t prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; while (prev_obj != 0) { - dsl_dataset_rele(ds, FTAG); - if ((error = dsl_dataset_hold_obj(dp, prev_obj, - FTAG, &ds)) == 0 && + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); + if ((error = dsl_dataset_hold_obj_flags(dp, prev_obj, + DS_HOLD_FLAG_DECRYPT, FTAG, &ds)) == 0 && dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj == new_head) break; @@ -1279,7 +1267,7 @@ find_txg_ancestor_snapshot(spa_t *spa, uint64_t new_head, uint64_t old_head, prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; } - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); ASSERT(prev_obj != 0); *txg = prev_obj_txg; return (0); diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh index 9ebde1cd9d32..261fc5eed8cb 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh @@ -163,7 +163,24 @@ corrupt_blocks_at_level "/$TESTPOOL/testfs5/$TESTFILE0" 0 log_must zfs unmount $TESTPOOL/testfs5 log_must zfs unload-key $TESTPOOL/testfs5 # test healing recv (on an encrypted dataset) using a raw send file -test_corrective_recv "$TESTPOOL/testfs5@snap1" $raw_backup +# This is a special case since with unloaded keys we cannot report errors +# in the filesystem. +log_must zpool scrub -w $TESTPOOL +log_must zpool status -v $TESTPOOL +log_mustnot eval "zpool status -v $TESTPOOL | \ + grep \"permission denied\"" +# make sure we will read the corruption from disk by flushing the ARC +log_must zinject -a +log_must eval "zfs recv -c $TESTPOOL/testfs5@snap1 < $raw_backup" + +log_must zpool scrub -w $TESTPOOL +log_must zpool status -v $TESTPOOL +log_mustnot eval "zpool status -v $TESTPOOL | \ + grep \"Permanent errors have been detected\"" +typeset cksum=$(md5digest $file) +[[ "$cksum" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum != $checksum)" + # non raw send file healing an encrypted dataset with an unloaded key will fail log_mustnot eval "zfs recv -c $TESTPOOL/testfs5@snap1 < $backup" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh index 04cd1892380d..ec4c67fb42f5 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_005_pos.ksh @@ -29,7 +29,7 @@ # Verify correct output with 'zpool status -v' after corrupting a file # # STRATEGY: -# 1. Create a pool, an ancrypted filesystem and a file +# 1. Create a pool, an encrypted filesystem and a file # 2. zinject checksum errors # 3. Unmount the filesystem and unload the key # 4. Scrub the pool @@ -76,8 +76,8 @@ log_must zpool sync $TESTPOOL2 log_must zpool scrub $TESTPOOL2 log_must zpool wait -t scrub $TESTPOOL2 log_must zpool status -v $TESTPOOL2 -log_must eval "zpool status -v $TESTPOOL2 | \ - grep \"Permanent errors have been detected\"" +log_mustnot eval "zpool status -v $TESTPOOL2 | \ + grep \"permission denied\"" log_mustnot eval "zpool status -v $TESTPOOL2 | grep '$file'" log_must eval "cat /$TESTPOOL2/pwd | zfs load-key $TESTPOOL2/$TESTFS1"