From 89a6e7c974479d8d70d8ef0f037dffdf08b9187e Mon Sep 17 00:00:00 2001 From: Tim Chase Date: Mon, 3 Feb 2014 16:11:20 -0600 Subject: [PATCH 1/2] Fix generation of non-standard "scan" and "bptree_obj" in object directory. Commit 1421c89 expanded the size of a zbookmark_t from 24 to 25 64-bit values which similarly expands the size of the "scan" entry in the pool's object directory and causes the pool to become un-importable by other OpenZFS implementations. This commit renames "struct zbookmark" to "struct zbookmark_phys" since it is related to an on-disk format and adds a new "struct zbookmark" that contains the former as its first member. The effect is that the "struct zbookmark" items written to the object directory in both the "scan" and "bptree_obj" entries contain only the correct subset of the bookmark. Fixes #2094 --- cmd/zdb/zdb.c | 50 ++++++++++++------------ include/sys/zio.h | 33 ++++++++++------ lib/libzfs/libzfs_pool.c | 8 ++-- module/zfs/bptree.c | 4 +- module/zfs/dmu_diff.c | 10 ++--- module/zfs/dmu_send.c | 22 +++++------ module/zfs/dmu_traverse.c | 36 ++++++++--------- module/zfs/dsl_destroy.c | 2 +- module/zfs/dsl_scan.c | 82 +++++++++++++++++++-------------------- module/zfs/spa_errlog.c | 12 +++--- module/zfs/spa_stats.c | 8 ++-- module/zfs/zfs_fm.c | 8 ++-- module/zfs/zio.c | 26 ++++++------- module/zfs/zio_inject.c | 12 +++--- 14 files changed, 162 insertions(+), 151 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 8e60b9b1a879..d97a25697dc5 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -941,16 +941,16 @@ static uint64_t blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb) { if (dnp == NULL) { - ASSERT(zb->zb_level < 0); - if (zb->zb_object == 0) - return (zb->zb_blkid); - return (zb->zb_blkid * BP_GET_LSIZE(bp)); + ASSERT(zb->zb_phys.zb_level < 0); + if (zb->zb_phys.zb_object == 0) + return (zb->zb_phys.zb_blkid); + return (zb->zb_phys.zb_blkid * BP_GET_LSIZE(bp)); } - ASSERT(zb->zb_level >= 0); + ASSERT(zb->zb_phys.zb_level >= 0); - return ((zb->zb_blkid << - (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * + return ((zb->zb_phys.zb_blkid << + (zb->zb_phys.zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); } @@ -991,15 +991,15 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb, int l; ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); - ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); + ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_phys.zb_level); (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); - ASSERT(zb->zb_level >= 0); + ASSERT(zb->zb_phys.zb_level >= 0); for (l = dnp->dn_nlevels - 1; l >= -1; l--) { - if (l == zb->zb_level) { - (void) printf("L%llx", (u_longlong_t)zb->zb_level); + if (l == zb->zb_phys.zb_level) { + (void) printf("L%llx", (u_longlong_t)zb->zb_phys.zb_level); } else { (void) printf(" "); } @@ -1039,9 +1039,9 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, for (i = 0; i < epb; i++, cbp++) { zbookmark_t czb; - SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, - zb->zb_level - 1, - zb->zb_blkid * epb + i); + SET_BOOKMARK(&czb, zb->zb_phys.zb_objset, zb->zb_phys.zb_object, + zb->zb_phys.zb_level - 1, + zb->zb_phys.zb_blkid * epb + i); err = visit_indirect(spa, dnp, cbp, &czb); if (err) break; @@ -1068,7 +1068,7 @@ dump_indirect(dnode_t *dn) SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset), dn->dn_object, dnp->dn_nlevels - 1, 0); for (j = 0; j < dnp->dn_nblkptr; j++) { - czb.zb_blkid = j; + czb.zb_phys.zb_blkid = j; (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp, &dnp->dn_blkptr[j], &czb); } @@ -2166,10 +2166,10 @@ zdb_blkptr_done(zio_t *zio) "Got error %d reading " "<%llu, %llu, %lld, %llx> %s -- skipping\n", ioerr, - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (u_longlong_t)zb->zb_level, - (u_longlong_t)zb->zb_blkid, + (u_longlong_t)zb->zb_phys.zb_objset, + (u_longlong_t)zb->zb_phys.zb_object, + (u_longlong_t)zb->zb_phys.zb_level, + (u_longlong_t)zb->zb_phys.zb_blkid, blkbuf); } mutex_exit(&spa->spa_scrub_lock); @@ -2200,7 +2200,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; /* If it's an intent log block, failure is expected. */ - if (zb->zb_level == ZB_ZIL_LEVEL) + if (zb->zb_phys.zb_level == ZB_ZIL_LEVEL) flags |= ZIO_FLAG_SPECULATIVE; mutex_enter(&spa->spa_scrub_lock); @@ -2219,9 +2219,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, sprintf_blkptr(blkbuf, bp); (void) printf("objset %llu object %llu " "level %lld offset 0x%llx %s\n", - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (longlong_t)zb->zb_level, + (u_longlong_t)zb->zb_phys.zb_objset, + (u_longlong_t)zb->zb_phys.zb_object, + (longlong_t)zb->zb_phys.zb_level, (u_longlong_t)blkid2offset(dnp, bp, zb), blkbuf); } @@ -2593,10 +2593,10 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (bp == NULL) return (0); - if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { + if (dump_opt['S'] > 1 && zb->zb_phys.zb_level == ZB_ROOT_LEVEL) { (void) printf("traversing objset %llu, %llu objects, " "%lu blocks so far\n", - (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_phys.zb_objset, (u_longlong_t)bp->blk_fill, avl_numnodes(t)); } diff --git a/include/sys/zio.h b/include/sys/zio.h index d4350badc100..3f39a4875002 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -255,21 +255,32 @@ extern const char *zio_type_name[ZIO_TYPES]; * Therefore it must not change size or alignment between 32/64 bit * compilation options. */ -struct zbookmark { +struct zbookmark_phys { uint64_t zb_objset; uint64_t zb_object; int64_t zb_level; uint64_t zb_blkid; +}; + +struct zbookmark { + /* + * The zbookmark_phys must be the first member. + */ + struct zbookmark_phys zb_phys; + + /* + * Add additional members here; they're not stored on-disk. + */ char * zb_func; }; #define SET_BOOKMARK(zb, objset, object, level, blkid) \ { \ - (zb)->zb_objset = objset; \ - (zb)->zb_object = object; \ - (zb)->zb_level = level; \ - (zb)->zb_blkid = blkid; \ - (zb)->zb_func = FTAG; \ + (zb)->zb_phys.zb_objset = objset; \ + (zb)->zb_phys.zb_object = object; \ + (zb)->zb_phys.zb_level = level; \ + (zb)->zb_phys.zb_blkid = blkid; \ + (zb)->zb_func = FTAG; \ } #define ZB_DESTROYED_OBJSET (-1ULL) @@ -282,12 +293,12 @@ struct zbookmark { #define ZB_ZIL_LEVEL (-2LL) #define ZB_IS_ZERO(zb) \ - ((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \ - (zb)->zb_level == 0 && (zb)->zb_blkid == 0) + ((zb)->zb_phys.zb_objset == 0 && (zb)->zb_phys.zb_object == 0 && \ + (zb)->zb_phys.zb_level == 0 && (zb)->zb_phys.zb_blkid == 0) #define ZB_IS_ROOT(zb) \ - ((zb)->zb_object == ZB_ROOT_OBJECT && \ - (zb)->zb_level == ZB_ROOT_LEVEL && \ - (zb)->zb_blkid == ZB_ROOT_BLKID) + ((zb)->zb_phys.zb_object == ZB_ROOT_OBJECT && \ + (zb)->zb_phys.zb_level == ZB_ROOT_LEVEL && \ + (zb)->zb_phys.zb_blkid == ZB_ROOT_BLKID) typedef struct zio_prop { enum zio_checksum zp_checksum; diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index b822ace688c9..42846ed4b168 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3568,19 +3568,19 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) nvlist_t *nv; /* ignoring zb_blkid and zb_level for now */ - if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset && - zb[i-1].zb_object == zb[i].zb_object) + if (i > 0 && zb[i-1].zb_phys.zb_objset == zb[i].zb_phys.zb_objset && + zb[i-1].zb_phys.zb_object == zb[i].zb_phys.zb_object) continue; if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0) goto nomem; if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET, - zb[i].zb_objset) != 0) { + zb[i].zb_phys.zb_objset) != 0) { nvlist_free(nv); goto nomem; } if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT, - zb[i].zb_object) != 0) { + zb[i].zb_phys.zb_object) != 0) { nvlist_free(nv); goto nomem; } diff --git a/module/zfs/bptree.c b/module/zfs/bptree.c index c03cb1f840e8..282235cab580 100644 --- a/module/zfs/bptree.c +++ b/module/zfs/bptree.c @@ -197,9 +197,9 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, if (free) { if (err == ERESTART) { /* save bookmark for future resume */ - ASSERT3U(bte.be_zb.zb_objset, ==, + ASSERT3U(bte.be_zb.zb_phys.zb_objset, ==, ZB_DESTROYED_OBJSET); - ASSERT0(bte.be_zb.zb_level); + ASSERT0(bte.be_zb.zb_phys.zb_level); dmu_write(os, obj, i * sizeof (bte), sizeof (bte), &bte, tx); break; diff --git a/module/zfs/dmu_diff.c b/module/zfs/dmu_diff.c index a2cb2fc8d811..89d525959f34 100644 --- a/module/zfs/dmu_diff.c +++ b/module/zfs/dmu_diff.c @@ -115,18 +115,18 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (issig(JUSTLOOKING) && issig(FORREAL)) return (SET_ERROR(EINTR)); - if (zb->zb_object != DMU_META_DNODE_OBJECT) + if (zb->zb_phys.zb_object != DMU_META_DNODE_OBJECT) return (0); if (bp == NULL) { - uint64_t span = DBP_SPAN(dnp, zb->zb_level); - uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; + uint64_t span = DBP_SPAN(dnp, zb->zb_phys.zb_level); + uint64_t dnobj = (zb->zb_phys.zb_blkid * span) >> DNODE_SHIFT; err = report_free_dnode_range(da, dnobj, dnobj + (span >> DNODE_SHIFT) - 1); if (err) return (err); - } else if (zb->zb_level == 0) { + } else if (zb->zb_phys.zb_level == 0) { dnode_phys_t *blk; arc_buf_t *abuf; uint32_t aflags = ARC_WAIT; @@ -140,7 +140,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, blk = abuf->b_data; for (i = 0; i < blksz >> DNODE_SHIFT; i++) { - uint64_t dnobj = (zb->zb_blkid << + uint64_t dnobj = (zb->zb_phys.zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; err = report_dnode(da, dnobj, blk+i); if (err) diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 9264fbb27f79..396d0b001a8f 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -382,17 +382,17 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (issig(JUSTLOOKING) && issig(FORREAL)) return (SET_ERROR(EINTR)); - if (zb->zb_object != DMU_META_DNODE_OBJECT && - DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { + if (zb->zb_phys.zb_object != DMU_META_DNODE_OBJECT && + DMU_OBJECT_IS_SPECIAL(zb->zb_phys.zb_object)) { return (0); - } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) { - uint64_t span = BP_SPAN(dnp, zb->zb_level); - uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; + } else if (bp == NULL && zb->zb_phys.zb_object == DMU_META_DNODE_OBJECT) { + uint64_t span = BP_SPAN(dnp, zb->zb_phys.zb_level); + uint64_t dnobj = (zb->zb_phys.zb_blkid * span) >> DNODE_SHIFT; err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT); } else if (bp == NULL) { - uint64_t span = BP_SPAN(dnp, zb->zb_level); - err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span); - } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { + uint64_t span = BP_SPAN(dnp, zb->zb_phys.zb_level); + err = dump_free(dsp, zb->zb_phys.zb_object, zb->zb_phys.zb_blkid * span, span); + } else if (zb->zb_phys.zb_level > 0 || type == DMU_OT_OBJSET) { return (0); } else if (type == DMU_OT_DNODE) { dnode_phys_t *blk; @@ -408,7 +408,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, blk = abuf->b_data; for (i = 0; i < blksz >> DNODE_SHIFT; i++) { - uint64_t dnobj = (zb->zb_blkid << + uint64_t dnobj = (zb->zb_phys.zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; err = dump_dnode(dsp, dnobj, blk+i); if (err != 0) @@ -425,7 +425,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, &aflags, zb) != 0) return (SET_ERROR(EIO)); - err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); + err = dump_spill(dsp, zb->zb_phys.zb_object, blksz, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); } else { /* it's a level-0 block of a regular object */ uint32_t aflags = ARC_WAIT; @@ -449,7 +449,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, } } - err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz, + err = dump_data(dsp, type, zb->zb_phys.zb_object, zb->zb_phys.zb_blkid * blksz, blksz, bp, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); } diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index bd291c6a8963..1dbac996ef3f 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -164,7 +164,7 @@ resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp, * If we found the block we're trying to resume from, zero * the bookmark out to indicate that we have resumed. */ - ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object); + ASSERT3U(zb->zb_phys.zb_object, <=, td->td_resume->zb_phys.zb_object); if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) { bzero(td->td_resume, sizeof (*zb)); if (td->td_flags & TRAVERSE_POST) @@ -178,7 +178,7 @@ static void traverse_pause(traverse_data_t *td, const zbookmark_t *zb) { ASSERT(td->td_resume != NULL); - ASSERT0(zb->zb_level); + ASSERT0(zb->zb_phys.zb_level); bcopy(zb, td->td_resume, sizeof (*td->td_resume)); } @@ -271,18 +271,18 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, czb = kmem_alloc(sizeof (zbookmark_t), KM_PUSHPAGE); for (i = 0; i < epb; i++) { - SET_BOOKMARK(czb, zb->zb_objset, zb->zb_object, - zb->zb_level - 1, - zb->zb_blkid * epb + i); + SET_BOOKMARK(czb, zb->zb_phys.zb_objset, zb->zb_phys.zb_object, + zb->zb_phys.zb_level - 1, + zb->zb_phys.zb_blkid * epb + i); traverse_prefetch_metadata(td, &((blkptr_t *)buf->b_data)[i], czb); } /* recursively visitbp() blocks below this */ for (i = 0; i < epb; i++) { - SET_BOOKMARK(czb, zb->zb_objset, zb->zb_object, - zb->zb_level - 1, - zb->zb_blkid * epb + i); + SET_BOOKMARK(czb, zb->zb_phys.zb_objset, zb->zb_phys.zb_object, + zb->zb_phys.zb_level - 1, + zb->zb_phys.zb_blkid * epb + i); err = traverse_visitbp(td, dnp, &((blkptr_t *)buf->b_data)[i], czb); if (err != 0) { @@ -306,14 +306,14 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, dnp = buf->b_data; for (i = 0; i < epb; i++) { - prefetch_dnode_metadata(td, &dnp[i], zb->zb_objset, - zb->zb_blkid * epb + i); + prefetch_dnode_metadata(td, &dnp[i], zb->zb_phys.zb_objset, + zb->zb_phys.zb_blkid * epb + i); } /* recursively visitbp() blocks below this */ for (i = 0; i < epb; i++) { - err = traverse_dnode(td, &dnp[i], zb->zb_objset, - zb->zb_blkid * epb + i); + err = traverse_dnode(td, &dnp[i], zb->zb_phys.zb_objset, + zb->zb_phys.zb_blkid * epb + i); if (err != 0) { if (!TD_HARD(td)) break; @@ -332,16 +332,16 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, osp = buf->b_data; dnp = &osp->os_meta_dnode; - prefetch_dnode_metadata(td, dnp, zb->zb_objset, + prefetch_dnode_metadata(td, dnp, zb->zb_phys.zb_objset, DMU_META_DNODE_OBJECT); if (arc_buf_size(buf) >= sizeof (objset_phys_t)) { prefetch_dnode_metadata(td, &osp->os_groupused_dnode, - zb->zb_objset, DMU_GROUPUSED_OBJECT); + zb->zb_phys.zb_objset, DMU_GROUPUSED_OBJECT); prefetch_dnode_metadata(td, &osp->os_userused_dnode, - zb->zb_objset, DMU_USERUSED_OBJECT); + zb->zb_phys.zb_objset, DMU_USERUSED_OBJECT); } - err = traverse_dnode(td, dnp, zb->zb_objset, + err = traverse_dnode(td, dnp, zb->zb_phys.zb_objset, DMU_META_DNODE_OBJECT); if (err && TD_HARD(td)) { lasterr = err; @@ -349,7 +349,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, } if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) { dnp = &osp->os_groupused_dnode; - err = traverse_dnode(td, dnp, zb->zb_objset, + err = traverse_dnode(td, dnp, zb->zb_phys.zb_objset, DMU_GROUPUSED_OBJECT); } if (err && TD_HARD(td)) { @@ -358,7 +358,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, } if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) { dnp = &osp->os_userused_dnode; - err = traverse_dnode(td, dnp, zb->zb_objset, + err = traverse_dnode(td, dnp, zb->zb_phys.zb_objset, DMU_USERUSED_OBJECT); } } diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index 351165dbfb38..453a31e9f6dd 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -546,7 +546,7 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (bp == NULL) return (0); - if (zb->zb_level == ZB_ZIL_LEVEL) { + if (zb->zb_phys.zb_level == ZB_ZIL_LEVEL) { ASSERT(zilog != NULL); /* * It's a block in the intent log. It has no diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index ea04507813f7..29011ffdae6f 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -395,7 +395,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb) int mintime; /* we never skip user/group accounting objects */ - if (zb && (int64_t)zb->zb_object < 0) + if (zb && (int64_t)zb->zb_phys.zb_object < 0) return (B_FALSE); if (scn->scn_pausing) @@ -405,7 +405,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb) return (B_FALSE); /* we're resuming */ /* We only know how to resume from level-0 blocks. */ - if (zb && zb->zb_level != 0) + if (zb && zb->zb_phys.zb_level != 0) return (B_FALSE); mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ? @@ -417,10 +417,10 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb) spa_shutting_down(scn->scn_dp->dp_spa)) { if (zb) { dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n", - (longlong_t)zb->zb_objset, - (longlong_t)zb->zb_object, - (longlong_t)zb->zb_level, - (longlong_t)zb->zb_blkid); + (longlong_t)zb->zb_phys.zb_objset, + (longlong_t)zb->zb_phys.zb_object, + (longlong_t)zb->zb_phys.zb_level, + (longlong_t)zb->zb_phys.zb_blkid); scn->scn_phys.scn_bookmark = *zb; } dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n", @@ -553,7 +553,7 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp, * We never skip over user/group accounting objects (obj<0) */ if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark) && - (int64_t)zb->zb_object >= 0) { + (int64_t)zb->zb_phys.zb_object >= 0) { /* * If we already visited this bp & everything below (in * a prior txg sync), don't bother doing it again. @@ -568,12 +568,12 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp, * again. */ if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 || - zb->zb_object > scn->scn_phys.scn_bookmark.zb_object) { + zb->zb_phys.zb_object > scn->scn_phys.scn_bookmark.zb_phys.zb_object) { dprintf("resuming at %llx/%llx/%llx/%llx\n", - (longlong_t)zb->zb_objset, - (longlong_t)zb->zb_object, - (longlong_t)zb->zb_level, - (longlong_t)zb->zb_blkid); + (longlong_t)zb->zb_phys.zb_objset, + (longlong_t)zb->zb_phys.zb_object, + (longlong_t)zb->zb_phys.zb_level, + (longlong_t)zb->zb_phys.zb_blkid); bzero(&scn->scn_phys.scn_bookmark, sizeof (*zb)); } } @@ -606,15 +606,15 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, return (err); } for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) { - dsl_scan_prefetch(scn, *bufp, cbp, zb->zb_objset, - zb->zb_object, zb->zb_blkid * epb + i); + dsl_scan_prefetch(scn, *bufp, cbp, zb->zb_phys.zb_objset, + zb->zb_phys.zb_object, zb->zb_phys.zb_blkid * epb + i); } for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) { zbookmark_t czb; - SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, - zb->zb_level - 1, - zb->zb_blkid * epb + i); + SET_BOOKMARK(&czb, zb->zb_phys.zb_objset, zb->zb_phys.zb_object, + zb->zb_phys.zb_level - 1, + zb->zb_phys.zb_blkid * epb + i); dsl_scan_visitbp(cbp, &czb, dnp, *bufp, ds, scn, ostype, tx); } @@ -643,12 +643,12 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, for (j = 0; j < cdnp->dn_nblkptr; j++) { blkptr_t *cbp = &cdnp->dn_blkptr[j]; dsl_scan_prefetch(scn, *bufp, cbp, - zb->zb_objset, zb->zb_blkid * epb + i, j); + zb->zb_phys.zb_objset, zb->zb_phys.zb_blkid * epb + i, j); } } for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) { dsl_scan_visitdnode(scn, ds, ostype, - cdnp, *bufp, zb->zb_blkid * epb + i, tx); + cdnp, *bufp, zb->zb_phys.zb_blkid * epb + i, tx); } } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { @@ -750,7 +750,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, * dprintf_bp(bp, * "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx buf=%p bp=%p", * ds, ds ? ds->ds_object : 0, - * zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid, + * zb->zb_phys.zb_objset, zb->zb_phys.zb_object, zb->zb_phys.zb_level, zb->zb_phys.zb_blkid, * pbuf, bp); */ @@ -812,10 +812,10 @@ dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx) if (scn->scn_phys.scn_state != DSS_SCANNING) return; - if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) { + if (scn->scn_phys.scn_bookmark.zb_phys.zb_objset == ds->ds_object) { if (dsl_dataset_is_snapshot(ds)) { /* Note, scn_cur_{min,max}_txg stays the same. */ - scn->scn_phys.scn_bookmark.zb_objset = + scn->scn_phys.scn_bookmark.zb_phys.zb_objset = ds->ds_phys->ds_next_snap_obj; zfs_dbgmsg("destroying ds %llu; currently traversing; " "reset zb_objset to %llu", @@ -875,8 +875,8 @@ dsl_scan_ds_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx) ASSERT(ds->ds_phys->ds_prev_snap_obj != 0); - if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) { - scn->scn_phys.scn_bookmark.zb_objset = + if (scn->scn_phys.scn_bookmark.zb_phys.zb_objset == ds->ds_object) { + scn->scn_phys.scn_bookmark.zb_phys.zb_objset = ds->ds_phys->ds_prev_snap_obj; zfs_dbgmsg("snapshotting ds %llu; currently traversing; " "reset zb_objset to %llu", @@ -907,14 +907,14 @@ dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) if (scn->scn_phys.scn_state != DSS_SCANNING) return; - if (scn->scn_phys.scn_bookmark.zb_objset == ds1->ds_object) { - scn->scn_phys.scn_bookmark.zb_objset = ds2->ds_object; + if (scn->scn_phys.scn_bookmark.zb_phys.zb_objset == ds1->ds_object) { + scn->scn_phys.scn_bookmark.zb_phys.zb_objset = ds2->ds_object; zfs_dbgmsg("clone_swap ds %llu; currently traversing; " "reset zb_objset to %llu", (u_longlong_t)ds1->ds_object, (u_longlong_t)ds2->ds_object); - } else if (scn->scn_phys.scn_bookmark.zb_objset == ds2->ds_object) { - scn->scn_phys.scn_bookmark.zb_objset = ds1->ds_object; + } else if (scn->scn_phys.scn_bookmark.zb_phys.zb_objset == ds2->ds_object) { + scn->scn_phys.scn_bookmark.zb_phys.zb_objset = ds1->ds_object; zfs_dbgmsg("clone_swap ds %llu; currently traversing; " "reset zb_objset to %llu", (u_longlong_t)ds2->ds_object, @@ -1222,7 +1222,7 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, const ddt_key_t *ddk = &dde->dde_key; ddt_phys_t *ddp = dde->dde_phys; blkptr_t bp; - zbookmark_t zb = { 0 }; + zbookmark_t zb = { { 0 }, 0 }; int p; if (scn->scn_phys.scn_state != DSS_SCANNING) @@ -1255,7 +1255,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) return; } - if (scn->scn_phys.scn_bookmark.zb_objset == DMU_META_OBJSET) { + if (scn->scn_phys.scn_bookmark.zb_phys.zb_objset == DMU_META_OBJSET) { /* First do the MOS & ORIGIN */ scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg; @@ -1274,7 +1274,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) dp->dp_origin_snap->ds_object, tx); } ASSERT(!scn->scn_pausing); - } else if (scn->scn_phys.scn_bookmark.zb_objset != + } else if (scn->scn_phys.scn_bookmark.zb_phys.zb_objset != ZB_DESTROYED_OBJSET) { /* * If we were paused, continue from here. Note if the @@ -1282,7 +1282,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) * be -1, so we will skip this and find a new objset * below. */ - dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx); + dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_phys.zb_objset, tx); if (scn->scn_pausing) return; } @@ -1500,17 +1500,17 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type, (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum, (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor); - ASSERT(scn->scn_phys.scn_bookmark.zb_objset == 0); - ASSERT(scn->scn_phys.scn_bookmark.zb_object == 0); - ASSERT(scn->scn_phys.scn_bookmark.zb_level == 0); - ASSERT(scn->scn_phys.scn_bookmark.zb_blkid == 0); + ASSERT(scn->scn_phys.scn_bookmark.zb_phys.zb_objset == 0); + ASSERT(scn->scn_phys.scn_bookmark.zb_phys.zb_object == 0); + ASSERT(scn->scn_phys.scn_bookmark.zb_phys.zb_level == 0); + ASSERT(scn->scn_phys.scn_bookmark.zb_phys.zb_blkid == 0); } else { zfs_dbgmsg("doing scan sync txg %llu; bm=%llu/%llu/%llu/%llu", (longlong_t)tx->tx_txg, - (longlong_t)scn->scn_phys.scn_bookmark.zb_objset, - (longlong_t)scn->scn_phys.scn_bookmark.zb_object, - (longlong_t)scn->scn_phys.scn_bookmark.zb_level, - (longlong_t)scn->scn_phys.scn_bookmark.zb_blkid); + (longlong_t)scn->scn_phys.scn_bookmark.zb_phys.zb_objset, + (longlong_t)scn->scn_phys.scn_bookmark.zb_phys.zb_object, + (longlong_t)scn->scn_phys.scn_bookmark.zb_phys.zb_level, + (longlong_t)scn->scn_phys.scn_bookmark.zb_phys.zb_blkid); } scn->scn_zio_root = zio_root(dp->dp_spa, NULL, @@ -1674,7 +1674,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, } /* If it's an intent log block, failure is expected. */ - if (zb->zb_level == ZB_ZIL_LEVEL) + if (zb->zb_phys.zb_level == ZB_ZIL_LEVEL) zio_flags |= ZIO_FLAG_SPECULATIVE; for (d = 0; d < BP_GET_NDVAS(bp); d++) { diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c index 35853e282550..856d281422ec 100644 --- a/module/zfs/spa_errlog.c +++ b/module/zfs/spa_errlog.c @@ -62,8 +62,8 @@ static void bookmark_to_name(zbookmark_t *zb, char *buf, size_t len) { (void) snprintf(buf, len, "%llx:%llx:%llx:%llx", - (u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object, - (u_longlong_t)zb->zb_level, (u_longlong_t)zb->zb_blkid); + (u_longlong_t)zb->zb_phys.zb_objset, (u_longlong_t)zb->zb_phys.zb_object, + (u_longlong_t)zb->zb_phys.zb_level, (u_longlong_t)zb->zb_phys.zb_blkid); } /* @@ -73,13 +73,13 @@ bookmark_to_name(zbookmark_t *zb, char *buf, size_t len) static void name_to_bookmark(char *buf, zbookmark_t *zb) { - zb->zb_objset = strtonum(buf, &buf); + zb->zb_phys.zb_objset = strtonum(buf, &buf); ASSERT(*buf == ':'); - zb->zb_object = strtonum(buf + 1, &buf); + zb->zb_phys.zb_object = strtonum(buf + 1, &buf); ASSERT(*buf == ':'); - zb->zb_level = (int)strtonum(buf + 1, &buf); + zb->zb_phys.zb_level = (int)strtonum(buf + 1, &buf); ASSERT(*buf == ':'); - zb->zb_blkid = strtonum(buf + 1, &buf); + zb->zb_phys.zb_blkid = strtonum(buf + 1, &buf); ASSERT(*buf == '\0'); } #endif diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c index a94fecfe87f8..e6c2be548284 100644 --- a/module/zfs/spa_stats.c +++ b/module/zfs/spa_stats.c @@ -210,10 +210,10 @@ spa_read_history_add(spa_t *spa, const zbookmark_t *zb, uint32_t aflags) strlcpy(srh->origin, zb->zb_func, sizeof (srh->origin)); strlcpy(srh->comm, getcomm(), sizeof (srh->comm)); srh->start = gethrtime(); - srh->objset = zb->zb_objset; - srh->object = zb->zb_object; - srh->level = zb->zb_level; - srh->blkid = zb->zb_blkid; + srh->objset = zb->zb_phys.zb_objset; + srh->object = zb->zb_phys.zb_object; + srh->level = zb->zb_phys.zb_level; + srh->blkid = zb->zb_phys.zb_blkid; srh->aflags = aflags; srh->pid = getpid(); diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index df47d99cfafa..cfa7f3a9a297 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -346,16 +346,16 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET, DATA_TYPE_UINT64, - zio->io_logical->io_bookmark.zb_objset, + zio->io_logical->io_bookmark.zb_phys.zb_objset, FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT, DATA_TYPE_UINT64, - zio->io_logical->io_bookmark.zb_object, + zio->io_logical->io_bookmark.zb_phys.zb_object, FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL, DATA_TYPE_INT64, - zio->io_logical->io_bookmark.zb_level, + zio->io_logical->io_bookmark.zb_phys.zb_level, FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID, DATA_TYPE_UINT64, - zio->io_logical->io_bookmark.zb_blkid, NULL); + zio->io_logical->io_bookmark.zb_phys.zb_blkid, NULL); } else if (vd != NULL) { /* * If we have a vdev but no zio, this is a device fault, and the diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 97f25494cd9b..f29a48cb4717 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -2576,7 +2576,7 @@ zio_vdev_io_start(zio_t *zio) */ if (!(zio->io_flags & ZIO_FLAG_SCAN_THREAD) && zio->io_bp != NULL && vd == vd->vdev_top && !vd->vdev_islog && - zio->io_bookmark.zb_objset != DMU_META_OBJSET && + zio->io_bookmark.zb_phys.zb_objset != DMU_META_OBJSET && zio->io_txg != spa_syncing_txg(spa)) { uint64_t old = spa->spa_last_io; uint64_t new = ddi_get_lbolt64(); @@ -3301,39 +3301,39 @@ zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1, { uint64_t zb1nextL0, zb2thisobj; - ASSERT(zb1->zb_objset == zb2->zb_objset); - ASSERT(zb2->zb_level == 0); + ASSERT(zb1->zb_phys.zb_objset == zb2->zb_phys.zb_objset); + ASSERT(zb2->zb_phys.zb_level == 0); /* * A bookmark in the deadlist is considered to be after * everything else. */ - if (zb2->zb_object == DMU_DEADLIST_OBJECT) + if (zb2->zb_phys.zb_object == DMU_DEADLIST_OBJECT) return (B_TRUE); /* The objset_phys_t isn't before anything. */ if (dnp == NULL) return (B_FALSE); - zb1nextL0 = (zb1->zb_blkid + 1) << - ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)); + zb1nextL0 = (zb1->zb_phys.zb_blkid + 1) << + ((zb1->zb_phys.zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)); - zb2thisobj = zb2->zb_object ? zb2->zb_object : - zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT); + zb2thisobj = zb2->zb_phys.zb_object ? zb2->zb_phys.zb_object : + zb2->zb_phys.zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT); - if (zb1->zb_object == DMU_META_DNODE_OBJECT) { + if (zb1->zb_phys.zb_object == DMU_META_DNODE_OBJECT) { uint64_t nextobj = zb1nextL0 * (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT; return (nextobj <= zb2thisobj); } - if (zb1->zb_object < zb2thisobj) + if (zb1->zb_phys.zb_object < zb2thisobj) return (B_TRUE); - if (zb1->zb_object > zb2thisobj) + if (zb1->zb_phys.zb_object > zb2thisobj) return (B_FALSE); - if (zb2->zb_object == DMU_META_DNODE_OBJECT) + if (zb2->zb_phys.zb_object == DMU_META_DNODE_OBJECT) return (B_FALSE); - return (zb1nextL0 <= zb2->zb_blkid); + return (zb1nextL0 <= zb2->zb_phys.zb_blkid); } #if defined(_KERNEL) && defined(HAVE_SPL) diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c index 39ec590b5e7e..072fc517a519 100644 --- a/module/zfs/zio_inject.c +++ b/module/zfs/zio_inject.c @@ -70,7 +70,7 @@ zio_match_handler(zbookmark_t *zb, uint64_t type, /* * Check for a match against the MOS, which is based on type */ - if (zb->zb_objset == DMU_META_OBJSET && + if (zb->zb_phys.zb_objset == DMU_META_OBJSET && record->zi_objset == DMU_META_OBJSET && record->zi_object == DMU_META_DNODE_OBJECT) { if (record->zi_type == DMU_OT_NONE || @@ -84,11 +84,11 @@ zio_match_handler(zbookmark_t *zb, uint64_t type, /* * Check for an exact match. */ - if (zb->zb_objset == record->zi_objset && - zb->zb_object == record->zi_object && - zb->zb_level == record->zi_level && - zb->zb_blkid >= record->zi_start && - zb->zb_blkid <= record->zi_end && + if (zb->zb_phys.zb_objset == record->zi_objset && + zb->zb_phys.zb_object == record->zi_object && + zb->zb_phys.zb_level == record->zi_level && + zb->zb_phys.zb_blkid >= record->zi_start && + zb->zb_phys.zb_blkid <= record->zi_end && error == record->zi_error) return (record->zi_freq == 0 || spa_get_random(100) < record->zi_freq); From f29cfc10c7ab162e9541be093e26e5864393f947 Mon Sep 17 00:00:00 2001 From: Tim Chase Date: Mon, 3 Feb 2014 16:49:40 -0600 Subject: [PATCH 2/2] Allow import of pools with corrupted "scan" entries in the object directory. Add a comptibility shim to dsl_scan_init() to support importing broken pools that have the larger-than-normal "scan" entry. --- module/zfs/dsl_scan.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 29011ffdae6f..49e8c17798eb 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -125,7 +125,26 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) &scn->scn_phys); if (err == ENOENT) return (0); - else if (err) + /* + * zfsonlinux compatibility hack - Some versions of zfsonlinux + * created "scan" with an extra 64-bit integer added to the + * zbookmark_t. Handle this case and copy scn_state into the + * dsl_scan_t for use below. + */ + if (err == EOVERFLOW) { + uint64_t zaptmp[SCAN_PHYS_NUMINTS + 1]; + + err = zap_lookup(dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_SCAN, sizeof (uint64_t), + SCAN_PHYS_NUMINTS + 1, + &zaptmp); + if (err == 0) + scn->scn_phys.scn_state = + zaptmp[offsetof(dsl_scan_phys_t, + scn_state) / sizeof (uint64_t)]; + } + if (err) return (err); if (scn->scn_phys.scn_state == DSS_SCANNING &&