From 033c788bd55d380b8f99b717e417493726522081 Mon Sep 17 00:00:00 2001 From: Matthew Macy Date: Wed, 12 Aug 2020 10:03:24 -0700 Subject: [PATCH] Restore ARC MFU/MRU pressure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The arc_adapt() function tunes LRU/MLU balance according to 4 types of cache hits (which is passed as state agrument): ghost LRU, LRU, MRU, ghost MRU. If this function is called with wrong cache hit (state), adaptation will be sub-optimal and performance will suffer. Some time ago upstream received this commit: 6950 ARC should cache compressed data) in arc_read() do next sequence (access to ghost buffer) Before this commit, hit to any ghost list was passed arc_adapt() before call to arc_access() which revive element in cache and change state from ghost to real hit. After this commit, the order of calls was reverted and arc_adapt() is now called only with «real» hits even if hit was in one of two ghost lists, which renders ghost lists useless and breaks the ARC algorithm. FreeBSD fixed this problem locally in Change D19094 / Commit r348772. This change is an adaptation of the above commit to the current arc code. Reviewed-by: Brian Behlendorf Signed-off-by: Matt Macy Closes #10548 Closes #10618 --- module/zfs/arc.c | 64 +++++++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index ebbc89030bad..dd4b0f39d066 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1098,14 +1098,20 @@ static kmutex_t l2arc_feed_thr_lock; static kcondvar_t l2arc_feed_thr_cv; static uint8_t l2arc_thread_exit; -static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *); +enum arc_hdr_alloc_flags { + ARC_HDR_ALLOC_RDATA = 0x1, + ARC_HDR_DO_ADAPT = 0x2, +}; + + +static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, boolean_t); static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *); -static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *); +static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, boolean_t); static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *); static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *); static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag); static void arc_hdr_free_abd(arc_buf_hdr_t *, boolean_t); -static void arc_hdr_alloc_abd(arc_buf_hdr_t *, boolean_t); +static void arc_hdr_alloc_abd(arc_buf_hdr_t *, int); static void arc_access(arc_buf_hdr_t *, kmutex_t *); static boolean_t arc_is_overflowing(void); static void arc_buf_watch(arc_buf_t *); @@ -1980,7 +1986,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb) ASSERT(HDR_EMPTY_OR_LOCKED(hdr)); ASSERT(HDR_ENCRYPTED(hdr)); - arc_hdr_alloc_abd(hdr, B_FALSE); + arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT); ret = spa_do_crypt_abd(B_FALSE, spa, zb, hdr->b_crypt_hdr.b_ot, B_FALSE, bswap, hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_iv, @@ -2007,7 +2013,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb) * and then loan a buffer from it, rather than allocating a * linear buffer and wrapping it in an abd later. */ - cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, B_TRUE); tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr)); ret = zio_decompress_data(HDR_GET_COMPRESS(hdr), @@ -3312,9 +3318,11 @@ arc_buf_destroy_impl(arc_buf_t *buf) } static void -arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata) +arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags) { uint64_t size; + boolean_t alloc_rdata = ((alloc_flags & ARC_HDR_ALLOC_RDATA) != 0); + boolean_t do_adapt = ((alloc_flags & ARC_HDR_DO_ADAPT) != 0); ASSERT3U(HDR_GET_LSIZE(hdr), >, 0); ASSERT(HDR_HAS_L1HDR(hdr)); @@ -3324,13 +3332,15 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata) if (alloc_rdata) { size = HDR_GET_PSIZE(hdr); ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL); - hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr); + hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr, + do_adapt); ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL); ARCSTAT_INCR(arcstat_raw_size, size); } else { size = arc_hdr_size(hdr); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); - hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr); + hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr, + do_adapt); ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); } @@ -3382,6 +3392,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, arc_buf_contents_t type, boolean_t alloc_rdata) { arc_buf_hdr_t *hdr; + int flags = ARC_HDR_DO_ADAPT; VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA); if (protected) { @@ -3389,6 +3400,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, } else { hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE); } + flags |= alloc_rdata ? ARC_HDR_ALLOC_RDATA : 0; ASSERT(HDR_EMPTY(hdr)); ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); @@ -3412,7 +3424,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, * the compressed or uncompressed data depending on the block * it references and compressed arc enablement. */ - arc_hdr_alloc_abd(hdr, alloc_rdata); + arc_hdr_alloc_abd(hdr, flags); ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); return (hdr); @@ -5507,11 +5519,12 @@ arc_is_overflowing(void) } static abd_t * -arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag) +arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag, + boolean_t do_adapt) { arc_buf_contents_t type = arc_buf_type(hdr); - arc_get_data_impl(hdr, size, tag); + arc_get_data_impl(hdr, size, tag, do_adapt); if (type == ARC_BUFC_METADATA) { return (abd_alloc(size, B_TRUE)); } else { @@ -5525,7 +5538,7 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag) { arc_buf_contents_t type = arc_buf_type(hdr); - arc_get_data_impl(hdr, size, tag); + arc_get_data_impl(hdr, size, tag, B_TRUE); if (type == ARC_BUFC_METADATA) { return (zio_buf_alloc(size)); } else { @@ -5541,12 +5554,14 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag) * limit, we'll only signal the reclaim thread and continue on. */ static void -arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) +arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag, + boolean_t do_adapt) { arc_state_t *state = hdr->b_l1hdr.b_state; arc_buf_contents_t type = arc_buf_type(hdr); - arc_adapt(size, state); + if (do_adapt) + arc_adapt(size, state); /* * If arc_size is currently overflowing, and has grown past our @@ -6346,6 +6361,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, boolean_t devw = B_FALSE; uint64_t size; abd_t *hdr_abd; + int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0; /* * Gracefully handle a damaged logical block size as a @@ -6424,8 +6440,9 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, * do this after we've called arc_access() to * avoid hitting an assert in remove_reference(). */ + arc_adapt(arc_hdr_size(hdr), hdr->b_l1hdr.b_state); arc_access(hdr, hash_lock); - arc_hdr_alloc_abd(hdr, encrypted_read); + arc_hdr_alloc_abd(hdr, alloc_flags); } if (encrypted_read) { @@ -6869,7 +6886,7 @@ arc_release(arc_buf_t *buf, void *tag) if (arc_can_share(hdr, lastbuf)) { arc_share_buf(hdr, lastbuf); } else { - arc_hdr_alloc_abd(hdr, B_FALSE); + arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT); abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data, psize); } @@ -7104,7 +7121,7 @@ arc_write_ready(zio_t *zio) if (ARC_BUF_ENCRYPTED(buf)) { ASSERT3U(psize, >, 0); ASSERT(ARC_BUF_COMPRESSED(buf)); - arc_hdr_alloc_abd(hdr, B_TRUE); + arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA); abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize); } else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) { /* @@ -7114,16 +7131,17 @@ arc_write_ready(zio_t *zio) */ if (BP_IS_ENCRYPTED(bp)) { ASSERT3U(psize, >, 0); - arc_hdr_alloc_abd(hdr, B_TRUE); + arc_hdr_alloc_abd(hdr, + ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA); abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize); } else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF && !ARC_BUF_COMPRESSED(buf)) { ASSERT3U(psize, >, 0); - arc_hdr_alloc_abd(hdr, B_FALSE); + arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT); abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize); } else { ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr)); - arc_hdr_alloc_abd(hdr, B_FALSE); + arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT); abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data, arc_buf_size(buf)); } @@ -8418,7 +8436,8 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb) * until arc_read_done(). */ if (BP_IS_ENCRYPTED(bp)) { - abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, + B_TRUE); zio_crypt_decode_params_bp(bp, salt, iv); zio_crypt_decode_mac_bp(bp, mac); @@ -8454,7 +8473,8 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb) */ if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) { - abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, + B_TRUE); void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr)); ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),