Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Illumos 6267 - dn_bonus evicted too early #3874

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,25 @@ typedef struct dmu_buf_impl {
/* User callback information. */
dmu_buf_user_t *db_user;

uint8_t db_immediate_evict;
/*
* Evict user data as soon as the dirty and reference
* counts are equal.
*/
uint8_t db_user_immediate_evict;

/*
* This block was freed while a read or write was
* active.
*/
uint8_t db_freed_in_flight;

/*
* dnode_evict_dbufs() or dnode_evict_bonsu() tried to
* evict this dbuf, but couldn't due to outstanding
* references. Evict once the refcount drops to 0.
*/
uint8_t db_pending_evict;

uint8_t db_dirtycnt;
} dmu_buf_impl_t;

Expand Down
1 change: 0 additions & 1 deletion include/sys/dmu_objset.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ struct objset {
uint8_t os_copies;
enum zio_checksum os_dedup_checksum;
boolean_t os_dedup_verify;
boolean_t os_evicting;
zfs_logbias_op_t os_logbias;
zfs_cache_type_t os_primary_cache;
zfs_cache_type_t os_secondary_cache;
Expand Down
47 changes: 12 additions & 35 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
*/
ASSERT3U(holds, >=, db->db_dirtycnt);
} else {
if (db->db_immediate_evict == TRUE)
if (db->db_user_immediate_evict == TRUE)
ASSERT3U(holds, >=, db->db_dirtycnt);
else
ASSERT3U(holds, >, 0);
Expand Down Expand Up @@ -1880,8 +1880,9 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
db->db_blkptr = blkptr;

db->db_user = NULL;
db->db_immediate_evict = 0;
db->db_freed_in_flight = 0;
db->db_user_immediate_evict = FALSE;
db->db_freed_in_flight = FALSE;
db->db_pending_evict = FALSE;

if (blkid == DMU_BONUS_BLKID) {
ASSERT3P(parent, ==, dn->dn_dbuf);
Expand Down Expand Up @@ -2318,12 +2319,13 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
arc_buf_freeze(db->db_buf);

if (holds == db->db_dirtycnt &&
db->db_level == 0 && db->db_immediate_evict)
db->db_level == 0 && db->db_user_immediate_evict)
dbuf_evict_user(db);

if (holds == 0) {
if (db->db_blkid == DMU_BONUS_BLKID) {
dnode_t *dn;
boolean_t evict_dbuf = db->db_pending_evict;

/*
* If the dnode moves here, we cannot cross this
Expand All @@ -2338,7 +2340,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
* Decrementing the dbuf count means that the bonus
* buffer's dnode hold is no longer discounted in
* dnode_move(). The dnode cannot move until after
* the dnode_rele_and_unlock() below.
* the dnode_rele() below.
*/
DB_DNODE_EXIT(db);

Expand All @@ -2348,35 +2350,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
*/
mutex_exit(&db->db_mtx);

/*
* If the dnode has been freed, evict the bonus
* buffer immediately. The data in the bonus
* buffer is no longer relevant and this prevents
* a stale bonus buffer from being associated
* with this dnode_t should the dnode_t be reused
* prior to being destroyed.
*/
mutex_enter(&dn->dn_mtx);
if (dn->dn_type == DMU_OT_NONE ||
dn->dn_free_txg != 0) {
/*
* Drop dn_mtx. It is a leaf lock and
* cannot be held when dnode_evict_bonus()
* acquires other locks in order to
* perform the eviction.
*
* Freed dnodes cannot be reused until the
* last hold is released. Since this bonus
* buffer has a hold, the dnode will remain
* in the free state, even without dn_mtx
* held, until the dnode_rele_and_unlock()
* below.
*/
mutex_exit(&dn->dn_mtx);
if (evict_dbuf)
dnode_evict_bonus(dn);
mutex_enter(&dn->dn_mtx);
}
dnode_rele_and_unlock(dn, db);

dnode_rele(dn, db);
} else if (db->db_buf == NULL) {
/*
* This is a special case: we never associated this
Expand Down Expand Up @@ -2423,7 +2400,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
} else {
dbuf_clear(db);
}
} else if (db->db_objset->os_evicting ||
} else if (db->db_pending_evict ||
arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
} else {
Expand Down Expand Up @@ -2471,7 +2448,7 @@ dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;

db->db_immediate_evict = TRUE;
db->db_user_immediate_evict = TRUE;
return (dmu_buf_set_user(db_fake, user));
}

Expand Down
1 change: 0 additions & 1 deletion module/zfs/dmu_objset.c
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,6 @@ dmu_objset_evict(objset_t *os)
if (os->os_sa)
sa_tear_down(os);

os->os_evicting = B_TRUE;
dmu_objset_evict_dbufs(os);

mutex_enter(&os->os_lock);
Expand Down
14 changes: 9 additions & 5 deletions module/zfs/dnode_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ dnode_evict_dbufs(dnode_t *dn)
db_next = AVL_NEXT(&dn->dn_dbufs, db_marker);
avl_remove(&dn->dn_dbufs, db_marker);
} else {
db->db_pending_evict = TRUE;
mutex_exit(&db->db_mtx);
db_next = AVL_NEXT(&dn->dn_dbufs, db);
}
Expand All @@ -447,10 +448,14 @@ void
dnode_evict_bonus(dnode_t *dn)
{
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
mutex_enter(&dn->dn_bonus->db_mtx);
dbuf_evict(dn->dn_bonus);
dn->dn_bonus = NULL;
if (dn->dn_bonus != NULL) {
if (refcount_is_zero(&dn->dn_bonus->db_holds)) {
mutex_enter(&dn->dn_bonus->db_mtx);
dbuf_evict(dn->dn_bonus);
dn->dn_bonus = NULL;
} else {
dn->dn_bonus->db_pending_evict = TRUE;
}
}
rw_exit(&dn->dn_struct_rwlock);
}
Expand Down Expand Up @@ -502,7 +507,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)

dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
dnode_evict_dbufs(dn);
ASSERT(avl_is_empty(&dn->dn_dbufs));

/*
* XXX - It would be nice to assert this, but we may still
Expand Down