Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race in dnode_check_slots_free() #8005

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/sys/dmu_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ extern "C" {
* dn_allocated_txg
* dn_free_txg
* dn_assigned_txg
* dn_dirty_txg
* dd_assigned_tx
* dn_notxholds
* dn_dirtyctx
Expand Down
4 changes: 4 additions & 0 deletions include/sys/dnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ struct dnode {
uint64_t dn_allocated_txg;
uint64_t dn_free_txg;
uint64_t dn_assigned_txg;
uint64_t dn_dirty_txg; /* txg dnode was last dirtied */
kcondvar_t dn_notxholds;
enum dnode_dirtycontext dn_dirtyctx;
uint8_t *dn_dirtyctx_firstset; /* dbg: contents meaningless */
Expand Down Expand Up @@ -362,6 +363,9 @@ void dnode_evict_dbufs(dnode_t *dn);
void dnode_evict_bonus(dnode_t *dn);
void dnode_free_interior_slots(dnode_t *dn);

#define DNODE_IS_DIRTY(_dn) \
((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa))

#define DNODE_IS_CACHEABLE(_dn) \
((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \
(DMU_OT_IS_METADATA((_dn)->dn_type) && \
Expand Down
3 changes: 3 additions & 0 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1606,6 +1606,9 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
FTAG);
}
}

if (tx->tx_txg > dn->dn_dirty_txg)
dn->dn_dirty_txg = tx->tx_txg;
mutex_exit(&dn->dn_mtx);

if (db->db_blkid == DMU_SPILL_BLKID)
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2044,7 +2044,7 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
* Check if dnode is dirty
*/
for (i = 0; i < TXG_SIZE; i++) {
if (list_link_active(&dn->dn_dirty_link[i])) {
if (multilist_link_active(&dn->dn_dirty_link[i])) {
clean = B_FALSE;
break;
}
Expand Down
15 changes: 15 additions & 0 deletions module/zfs/dmu_objset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1213,10 +1213,23 @@ dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx)
ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
multilist_sublist_remove(list, dn);

/*
* If we are not doing useraccounting (os_synced_dnodes == NULL)
* we are done with this dnode for this txg. Unset dn_dirty_txg
* if later txgs aren't dirtying it so that future holders do
* not get a stale value. Otherwise, we will do this in
* userquota_updates_task() when processing has completely
* finished for this txg.
*/
multilist_t *newlist = dn->dn_objset->os_synced_dnodes;
if (newlist != NULL) {
(void) dnode_add_ref(dn, newlist);
multilist_insert(newlist, dn);
} else {
mutex_enter(&dn->dn_mtx);
if (dn->dn_dirty_txg == tx->tx_txg)
dn->dn_dirty_txg = 0;
mutex_exit(&dn->dn_mtx);
}

dnode_sync(dn, tx);
Expand Down Expand Up @@ -1621,6 +1634,8 @@ userquota_updates_task(void *arg)
dn->dn_id_flags |= DN_ID_CHKED_BONUS;
}
dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
if (dn->dn_dirty_txg == spa_syncing_txg(os->os_spa))
dn->dn_dirty_txg = 0;
mutex_exit(&dn->dn_mtx);

multilist_sublist_remove(list, dn);
Expand Down
29 changes: 19 additions & 10 deletions module/zfs/dnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz));

for (i = 0; i < TXG_SIZE; i++) {
list_link_init(&dn->dn_dirty_link[i]);
multilist_link_init(&dn->dn_dirty_link[i]);
dn->dn_free_ranges[i] = NULL;
list_create(&dn->dn_dirty_records[i],
sizeof (dbuf_dirty_record_t),
Expand All @@ -147,6 +147,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
dn->dn_assigned_txg = 0;
dn->dn_dirty_txg = 0;
dn->dn_dirtyctx = 0;
dn->dn_dirtyctx_firstset = NULL;
dn->dn_bonus = NULL;
Expand Down Expand Up @@ -184,7 +185,7 @@ dnode_dest(void *arg, void *unused)
ASSERT(!list_link_active(&dn->dn_link));

for (i = 0; i < TXG_SIZE; i++) {
ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
list_destroy(&dn->dn_dirty_records[i]);
ASSERT0(dn->dn_next_nblkptr[i]);
Expand All @@ -199,6 +200,7 @@ dnode_dest(void *arg, void *unused)
ASSERT0(dn->dn_allocated_txg);
ASSERT0(dn->dn_free_txg);
ASSERT0(dn->dn_assigned_txg);
ASSERT0(dn->dn_dirty_txg);
ASSERT0(dn->dn_dirtyctx);
ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL);
ASSERT3P(dn->dn_bonus, ==, NULL);
Expand Down Expand Up @@ -523,6 +525,7 @@ dnode_destroy(dnode_t *dn)
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
dn->dn_assigned_txg = 0;
dn->dn_dirty_txg = 0;

dn->dn_dirtyctx = 0;
if (dn->dn_dirtyctx_firstset != NULL) {
Expand Down Expand Up @@ -592,6 +595,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT0(dn->dn_maxblkid);
ASSERT0(dn->dn_allocated_txg);
ASSERT0(dn->dn_assigned_txg);
ASSERT0(dn->dn_dirty_txg);
ASSERT(refcount_is_zero(&dn->dn_tx_holds));
ASSERT3U(refcount_count(&dn->dn_holds), <=, 1);
ASSERT(avl_is_empty(&dn->dn_dbufs));
Expand All @@ -604,7 +608,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT0(dn->dn_next_bonustype[i]);
ASSERT0(dn->dn_rm_spillblk[i]);
ASSERT0(dn->dn_next_blksz[i]);
ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
}
Expand Down Expand Up @@ -779,6 +783,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
ndn->dn_allocated_txg = odn->dn_allocated_txg;
ndn->dn_free_txg = odn->dn_free_txg;
ndn->dn_assigned_txg = odn->dn_assigned_txg;
ndn->dn_dirty_txg = odn->dn_dirty_txg;
ndn->dn_dirtyctx = odn->dn_dirtyctx;
ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
ASSERT(refcount_count(&odn->dn_tx_holds) == 0);
Expand Down Expand Up @@ -845,6 +850,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
odn->dn_allocated_txg = 0;
odn->dn_free_txg = 0;
odn->dn_assigned_txg = 0;
odn->dn_dirty_txg = 0;
odn->dn_dirtyctx = 0;
odn->dn_dirtyctx_firstset = NULL;
odn->dn_have_spill = B_FALSE;
Expand Down Expand Up @@ -1069,6 +1075,10 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots)
{
ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);

/*
* If all dnode slots are either already free or
* evictable return B_TRUE.
*/
for (int i = idx; i < idx + slots; i++) {
dnode_handle_t *dnh = &children->dnc_children[i];
dnode_t *dn = dnh->dnh_dnode;
Expand All @@ -1077,18 +1087,17 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots)
continue;
} else if (DN_SLOT_IS_PTR(dn)) {
mutex_enter(&dn->dn_mtx);
dmu_object_type_t type = dn->dn_type;
boolean_t can_free = (dn->dn_type == DMU_OT_NONE &&
!DNODE_IS_DIRTY(dn));
mutex_exit(&dn->dn_mtx);

if (type != DMU_OT_NONE)
if (!can_free)
return (B_FALSE);

continue;
else
continue;
} else {
return (B_FALSE);
}

return (B_FALSE);
}

return (B_TRUE);
Expand Down Expand Up @@ -1594,7 +1603,7 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
/*
* If we are already marked dirty, we're done.
*/
if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
if (multilist_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
multilist_sublist_unlock(mls);
return;
}
Expand Down