Skip to content

Commit

Permalink
Set proper birth epochs for Ln (n > 0) holes
Browse files Browse the repository at this point in the history
  • Loading branch information
bprotopopov committed Dec 29, 2015
1 parent 771ced2 commit d891e16
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 11 deletions.
82 changes: 76 additions & 6 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,13 +541,35 @@ dbuf_verify(dmu_buf_impl_t *db)
* If the blkptr isn't set but they have nonzero data,
* it had better be dirty, otherwise we'll lose that
* data when we evict this buffer.
*
* One exception is Ln indirect block representing
* a hole with non-zero birth epoch; such blocks can also
* be filled with Ln-1 hole block pointers
*/
if (db->db_dirtycnt == 0) {
ASSERTV(uint64_t *buf = db->db.db_data);
int i;
if (db->db_level == 0) {
uint64_t *buf = db->db.db_data;
int i;

for (i = 0; i < db->db.db_size >> 3; i++) {
ASSERT(buf[i] == 0);
for (i = 0; i < db->db.db_size >> 3; i++) {
ASSERT(buf[i] == 0);
}
} else {
blkptr_t zero_bp, *bp;

ASSERT3U(1ULL << DB_DNODE(db)->dn_indblkshift,
==, db->db.db_size);
BP_ZERO(&zero_bp);
for (bp = (blkptr_t *)db->db.db_data;
bp < (blkptr_t *)((char *)db->db.db_data +
db->db.db_size);
bp++) {
ASSERT(BP_EQUAL(bp, &zero_bp) ||
(BP_IS_HOLE(bp) &&
(BP_GET_TYPE(bp) == dn->dn_type) &&
(BP_GET_LEVEL(bp) ==
(db->db_level - 1))));
}
}
}
}
Expand Down Expand Up @@ -691,11 +713,44 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
(db->db_level == 0 && (dnode_block_freed(dn, db->db_blkid) ||
BP_IS_HOLE(db->db_blkptr)))) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
dmu_object_type_t dtype = dn->dn_type;
uint32_t bshift = (db->db_level > 1) ?
dn->dn_indblkshift : dn->dn_datablkshift;

DB_DNODE_EXIT(db);
dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
db->db.db_size, db, type));
bzero(db->db.db_data, db->db.db_size);

if (db->db_level > 0 && db->db_blkptr &&
BP_IS_HOLE(db->db_blkptr) && db->db_blkptr->blk_birth > 0) {
blkptr_t *bp;
/*
* If we are reading an Ln hole using a block pointer
* with the birth epoch that is not zero, this means
* that the hole_birth was active when the block pointer
* was written, and we cannot simply zero the buffer
* out: if the block is subsequently partially
* overwritten, then the remaining Ln-1 holes will have
* their birth epochs equal to zero, which can lead to
* incorrectly formed incremental send stream.
* Instead, we pre-fill the initial contents of the Ln
* block with block pointers indicating Ln-1 holes with
* the same type and birth epoch as the Ln hole; the
* logical size is set based on the hole level and the
* dnode data or indirect block shift
*/
for (bp = (blkptr_t *)db->db.db_data;
bp < (blkptr_t *)((char *)db->db.db_data +
db->db.db_size);
bp++) {
BP_SET_TYPE(bp, dtype);
BP_SET_LEVEL(bp, db->db_level-1);
BP_SET_LSIZE(bp, 1ULL << bshift);
BP_SET_BIRTH(bp, db->db_blkptr->blk_birth, 0);
}
}

db->db_state = DB_CACHED;
*flags |= DB_RF_CACHED;
mutex_exit(&db->db_mtx);
Expand Down Expand Up @@ -1274,7 +1329,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp));
ASSERT(db->db.db_size != 0);

dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
dprintf_dbuf(db, "thread %p size=%llx txg=%llx db=%p\n",
curthread, (u_longlong_t)db->db.db_size, tx->tx_txg, db);

if (db->db_blkid != DMU_BONUS_BLKID) {
/*
Expand Down Expand Up @@ -1484,7 +1540,8 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);

dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
dprintf_dbuf(db, "thread %p size=%llx txg=%llx db=%p\n",
curthread, (u_longlong_t)db->db.db_size, tx->tx_txg, db);

ASSERT(db->db.db_size != 0);

Expand Down Expand Up @@ -2831,12 +2888,25 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
}
} else {
blkptr_t *ibp = db->db.db_data;
int epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, ibp++) {
if (BP_IS_HOLE(ibp))
continue;
fill += BP_GET_FILL(ibp);
}
/*
* it turns out that pre-filling Ln holes in dbuf_read_impl()
* can result in metadata block ids pointing beyond the
* dn_maxblkid; this seems like a good place to fix dn_maxblkid
* up in a manner similar to one above for level-0 blocks
*/
mutex_enter(&dn->dn_mtx);
if (db->db_blkid >
dn->dn_phys->dn_maxblkid >> (db->db_level * epbs))
dn->dn_phys->dn_maxblkid = ((db->db_blkid + 1) <<
(db->db_level * epbs)) - 1;
mutex_exit(&dn->dn_mtx);
}
DB_DNODE_EXIT(db);

Expand Down
10 changes: 5 additions & 5 deletions module/zfs/dnode_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)

/* check for existing blkptrs in the dnode */
for (i = 0; i < nblkptr; i++)
if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]))
if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]) ||
dn->dn_phys->dn_blkptr[i].blk_birth != 0)
break;
if (i != nblkptr) {
/* transfer dnode's block pointers to new indirect block */
Expand Down Expand Up @@ -710,10 +711,9 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
int i;
ASSERT(dn->dn_next_nblkptr[txgoff] < dnp->dn_nblkptr);
/* the blkptrs we are losing better be unallocated */
for (i = 0; i < dnp->dn_nblkptr; i++) {
if (i >= dn->dn_next_nblkptr[txgoff])
ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[i]));
}
for (i = dn->dn_next_nblkptr[txgoff];
i < dnp->dn_nblkptr; i++)
ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[i]));
#endif
}
mutex_enter(&dn->dn_mtx);
Expand Down

0 comments on commit d891e16

Please sign in to comment.