diff --git a/include/sys/arc.h b/include/sys/arc.h index 8e0843418ac3..6328392bee23 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -136,7 +136,6 @@ void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index); int arc_buf_size(arc_buf_t *buf); void arc_release(arc_buf_t *buf, void *tag); int arc_released(arc_buf_t *buf); -int arc_has_callback(arc_buf_t *buf); void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused); void arc_buf_freeze(arc_buf_t *buf); void arc_buf_thaw(arc_buf_t *buf); @@ -159,7 +158,7 @@ void arc_remove_prune_callback(arc_prune_t *p); void arc_freed(spa_t *spa, const blkptr_t *bp); void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private); -int arc_buf_evict(arc_buf_t *buf); +boolean_t arc_clear_callback(arc_buf_t *buf); void arc_flush(spa_t *spa); void arc_tempreserve_clear(uint64_t reserve); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 27b004132a6c..7c3bebabea5b 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -104,7 +104,7 @@ * with the buffer may be evicted prior to the callback. The callback * must be made with *no locks held* (to prevent deadlock). Additionally, * the users of callbacks must ensure that their private data is - * protected from simultaneous callbacks from arc_buf_evict() + * protected from simultaneous callbacks from arc_clear_callback() * and arc_do_user_evicts(). * * It as also possible to register a callback which is run when the @@ -1604,8 +1604,12 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t)) } } +/* + * Free up buf->b_data and if 'remove' is set, then pull the + * arc_buf_t off of the the arc_buf_hdr_t's list and free it. + */ static void -arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all) +arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove) { arc_buf_t **bufp; @@ -1655,7 +1659,7 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all) } /* only remove the buf if requested */ - if (!all) + if (!remove) return; /* remove the buf from the hdr list */ @@ -2265,7 +2269,7 @@ arc_do_user_evicts(void) mutex_exit(&arc_eviction_mtx); if (buf->b_efunc != NULL) - VERIFY(buf->b_efunc(buf) == 0); + VERIFY0(buf->b_efunc(buf->b_private)); buf->b_efunc = NULL; buf->b_private = NULL; @@ -3576,16 +3580,25 @@ arc_freed(spa_t *spa, const blkptr_t *bp) } /* - * This is used by the DMU to let the ARC know that a buffer is - * being evicted, so the ARC should clean up. If this arc buf - * is not yet in the evicted state, it will be put there. + * Clear the user eviction callback set by arc_set_callback(), first calling + * it if it exists. Because the presence of a callback keeps an arc_buf cached + * clearing the callback may result in the arc_buf being destroyed. However, + * it will not result in the *last* arc_buf being destroyed, hence the data + * will remain cached in the ARC. We make a copy of the arc buffer here so + * that we can process the callback without holding any locks. + * + * It's possible that the callback is already in the process of being cleared + * by another thread. In this case we can not clear the callback. + * + * Returns B_TRUE if the callback was successfully called and cleared. */ -int -arc_buf_evict(arc_buf_t *buf) +boolean_t +arc_clear_callback(arc_buf_t *buf) { arc_buf_hdr_t *hdr; kmutex_t *hash_lock; - arc_buf_t **bufp; + arc_evict_func_t *efunc = buf->b_efunc; + void *private = buf->b_private; mutex_enter(&buf->b_evict_lock); hdr = buf->b_hdr; @@ -3595,17 +3608,16 @@ arc_buf_evict(arc_buf_t *buf) */ ASSERT(buf->b_data == NULL); mutex_exit(&buf->b_evict_lock); - return (0); + return (B_FALSE); } else if (buf->b_data == NULL) { - arc_buf_t copy = *buf; /* structure assignment */ /* * We are on the eviction list; process this buffer now * but let arc_do_user_evicts() do the reaping. */ buf->b_efunc = NULL; mutex_exit(&buf->b_evict_lock); - VERIFY(copy.b_efunc(©) == 0); - return (1); + VERIFY0(efunc(private)); + return (B_TRUE); } hash_lock = HDR_LOCK(hdr); mutex_enter(hash_lock); @@ -3615,48 +3627,21 @@ arc_buf_evict(arc_buf_t *buf) ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt); ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu); - /* - * Pull this buffer off of the hdr - */ - bufp = &hdr->b_buf; - while (*bufp != buf) - bufp = &(*bufp)->b_next; - *bufp = buf->b_next; - - ASSERT(buf->b_data != NULL); - arc_buf_destroy(buf, FALSE, FALSE); - - if (hdr->b_datacnt == 0) { - arc_state_t *old_state = hdr->b_state; - arc_state_t *evicted_state; - - ASSERT(hdr->b_buf == NULL); - ASSERT(refcount_is_zero(&hdr->b_refcnt)); - - evicted_state = - (old_state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost; - - mutex_enter(&old_state->arcs_mtx); - mutex_enter(&evicted_state->arcs_mtx); - - arc_change_state(evicted_state, hdr, hash_lock); - ASSERT(HDR_IN_HASH_TABLE(hdr)); - hdr->b_flags |= ARC_IN_HASH_TABLE; - hdr->b_flags &= ~ARC_BUF_AVAILABLE; + buf->b_efunc = NULL; + buf->b_private = NULL; - mutex_exit(&evicted_state->arcs_mtx); - mutex_exit(&old_state->arcs_mtx); + if (hdr->b_datacnt > 1) { + mutex_exit(&buf->b_evict_lock); + arc_buf_destroy(buf, FALSE, TRUE); + } else { + ASSERT(buf == hdr->b_buf); + hdr->b_flags |= ARC_BUF_AVAILABLE; + mutex_exit(&buf->b_evict_lock); } - mutex_exit(hash_lock); - mutex_exit(&buf->b_evict_lock); - VERIFY(buf->b_efunc(buf) == 0); - buf->b_efunc = NULL; - buf->b_private = NULL; - buf->b_hdr = NULL; - buf->b_next = NULL; - kmem_cache_free(buf_cache, buf); - return (1); + mutex_exit(hash_lock); + VERIFY0(efunc(private)); + return (B_TRUE); } /* @@ -3813,17 +3798,6 @@ arc_released(arc_buf_t *buf) return (released); } -int -arc_has_callback(arc_buf_t *buf) -{ - int callback; - - mutex_enter(&buf->b_evict_lock); - callback = (buf->b_efunc != NULL); - mutex_exit(&buf->b_evict_lock); - return (callback); -} - #ifdef ZFS_DEBUG int arc_referenced(arc_buf_t *buf) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 2b412f1a73c4..7f75e131f458 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -211,8 +211,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db) } /* - * Remove an entry from the hash table. This operation will - * fail if there are any existing holds on the db. + * Remove an entry from the hash table. It must be in the EVICTING state. */ static void dbuf_hash_remove(dmu_buf_impl_t *db) @@ -226,7 +225,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db) idx = hv & h->hash_table_mask; /* - * We musn't hold db_mtx to maintin lock ordering: + * We musn't hold db_mtx to maintain lock ordering: * DBUF_HASH_MUTEX > db_mtx. */ ASSERT(refcount_is_zero(&db->db_holds)); @@ -487,7 +486,6 @@ static void dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) { ASSERT(MUTEX_HELD(&db->db_mtx)); - ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf)); db->db_buf = buf; if (buf != NULL) { ASSERT(buf->b_data != NULL); @@ -1595,12 +1593,15 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) * when we are not holding the dn_dbufs_mtx, we can't clear the * entry in the dn_dbufs list. We have to wait until dbuf_destroy() * in this case. For callers from the DMU we will usually see: - * dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy() + * dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy() * For the arc callback, we will usually see: * dbuf_do_evict()->dbuf_clear();dbuf_destroy() * Sometimes, though, we will get a mix of these two: - * DMU: dbuf_clear()->arc_buf_evict() + * DMU: dbuf_clear()->arc_clear_callback() * ARC: dbuf_do_evict()->dbuf_destroy() + * + * This routine will dissociate the dbuf from the arc, by calling + * arc_clear_callback(), but will not evict the data from the ARC. */ void dbuf_clear(dmu_buf_impl_t *db) @@ -1608,7 +1609,7 @@ dbuf_clear(dmu_buf_impl_t *db) dnode_t *dn; dmu_buf_impl_t *parent = db->db_parent; dmu_buf_impl_t *dndb; - int dbuf_gone = FALSE; + boolean_t dbuf_gone = B_FALSE; ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(refcount_is_zero(&db->db_holds)); @@ -1654,7 +1655,7 @@ dbuf_clear(dmu_buf_impl_t *db) } if (db->db_buf) - dbuf_gone = arc_buf_evict(db->db_buf); + dbuf_gone = arc_clear_callback(db->db_buf); if (!dbuf_gone) mutex_exit(&db->db_mtx); @@ -1831,8 +1832,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, static int dbuf_do_evict(void *private) { - arc_buf_t *buf = private; - dmu_buf_impl_t *db = buf->b_private; + dmu_buf_impl_t *db = private; if (!MUTEX_HELD(&db->db_mtx)) mutex_enter(&db->db_mtx); @@ -2239,11 +2239,23 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) * block on-disk. If so, then we simply evict * ourselves. */ - if (!DBUF_IS_CACHEABLE(db) || - arc_buf_eviction_needed(db->db_buf)) + if (!DBUF_IS_CACHEABLE(db)) { + if (db->db_blkptr != NULL && + !BP_IS_HOLE(db->db_blkptr) && + !BP_IS_EMBEDDED(db->db_blkptr)) { + spa_t *spa = + dmu_objset_spa(db->db_objset); + blkptr_t bp = *db->db_blkptr; + dbuf_clear(db); + arc_freed(spa, &bp); + } else { + dbuf_clear(db); + } + } else if (arc_buf_eviction_needed(db->db_buf)) { dbuf_clear(db); - else + } else { mutex_exit(&db->db_mtx); + } } } else { mutex_exit(&db->db_mtx);