From 5b4d109a50d3110473f236aa14931c5ef6b16983 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 2 Oct 2023 17:33:33 -0400 Subject: [PATCH 1/2] ARC: Remove b_cv from struct l1arc_buf_hdr Earlier as part of #14123 I've removed one use of b_cv. This patch reuses the same approach to remove the other one from much more rare code path. This saves 16 bytes of L1 ARC header on FreeBSD (reducing it from 200 to 184 bytes) and seems even more on Linux. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. --- include/sys/arc_impl.h | 2 -- module/zfs/arc.c | 34 ++++++++++++++++++++++------------ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index 78774792f367..da07fd4f8fae 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -159,8 +159,6 @@ struct arc_write_callback { * these two allocation states. */ typedef struct l1arc_buf_hdr { - /* for waiting on reads to complete */ - kcondvar_t b_cv; uint8_t b_byteswap; /* protected by arc state mutex */ diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 22dc0ed5e3b6..919684a589d8 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1151,7 +1151,6 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) memset(hdr, 0, HDR_FULL_SIZE); hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL); zfs_refcount_create(&hdr->b_l1hdr.b_refcnt); #ifdef ZFS_DEBUG mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); @@ -1211,7 +1210,6 @@ hdr_full_dest(void *vbuf, void *unused) arc_buf_hdr_t *hdr = vbuf; ASSERT(HDR_EMPTY(hdr)); - cv_destroy(&hdr->b_l1hdr.b_cv); zfs_refcount_destroy(&hdr->b_l1hdr.b_refcnt); #ifdef ZFS_DEBUG mutex_destroy(&hdr->b_l1hdr.b_freeze_lock); @@ -5586,13 +5584,6 @@ arc_read_done(zio_t *zio) buf_hash_remove(hdr); } - /* - * Broadcast before we drop the hash_lock to avoid the possibility - * that the hdr (and hence the cv) might be freed before we get to - * the cv_broadcast(). - */ - cv_broadcast(&hdr->b_l1hdr.b_cv); - arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); (void) remove_reference(hdr, hdr); @@ -5787,8 +5778,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, } acb->acb_zio_head = head_zio; acb->acb_next = hdr->b_l1hdr.b_acb; - if (hdr->b_l1hdr.b_acb) - hdr->b_l1hdr.b_acb->acb_prev = acb; + hdr->b_l1hdr.b_acb->acb_prev = acb; hdr->b_l1hdr.b_acb = acb; } mutex_exit(hash_lock); @@ -5928,8 +5918,28 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, * and so the performance impact shouldn't * matter. */ - cv_wait(&hdr->b_l1hdr.b_cv, hash_lock); + arc_callback_t *acb = kmem_zalloc( + sizeof (arc_callback_t), KM_SLEEP); + acb->acb_wait = B_TRUE; + mutex_init(&acb->acb_wait_lock, NULL, + MUTEX_DEFAULT, NULL); + cv_init(&acb->acb_wait_cv, NULL, CV_DEFAULT, + NULL); + acb->acb_zio_head = + hdr->b_l1hdr.b_acb->acb_zio_head; + acb->acb_next = hdr->b_l1hdr.b_acb; + hdr->b_l1hdr.b_acb->acb_prev = acb; + hdr->b_l1hdr.b_acb = acb; mutex_exit(hash_lock); + mutex_enter(&acb->acb_wait_lock); + while (acb->acb_wait) { + cv_wait(&acb->acb_wait_cv, + &acb->acb_wait_lock); + } + mutex_exit(&acb->acb_wait_lock); + mutex_destroy(&acb->acb_wait_lock); + cv_destroy(&acb->acb_wait_cv); + kmem_free(acb, sizeof (arc_callback_t)); goto top; } } From 7a5b5f1f12195e6ea1b3203b0d3e637227e41ff2 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 2 Oct 2023 21:01:49 -0400 Subject: [PATCH 2/2] ARC: Pack b_byteswap in struct arc_buf_hdr While b_byteswap rightfully belongs in struct l1arc_buf_hdr, it does not pack there efficiently. Move it to struct arc_buf_hdr allows to reduce L1 ARC header size by 8 bytes, down to 176 bytes on FreeBSD. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. --- include/sys/arc_impl.h | 4 +--- module/zfs/arc.c | 40 +++++++++++++++++++++------------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index da07fd4f8fae..6708d1997299 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -159,8 +159,6 @@ struct arc_write_callback { * these two allocation states. */ typedef struct l1arc_buf_hdr { - uint8_t b_byteswap; - /* protected by arc state mutex */ arc_state_t *b_state; multilist_node_t b_arc_node; @@ -477,7 +475,7 @@ struct arc_buf_hdr { arc_buf_contents_t b_type; uint8_t b_complevel; - uint8_t b_reserved1; /* used for 4 byte alignment */ + uint8_t b_byteswap; /* could be in b_l1hdr, but fits */ uint16_t b_reserved2; /* used for 4 byte alignment */ arc_buf_hdr_t *b_hash_next; arc_flags_t b_flags; diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 919684a589d8..53a144f896d1 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1150,7 +1150,7 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) arc_buf_hdr_t *hdr = vbuf; memset(hdr, 0, HDR_FULL_SIZE); - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; + hdr->b_byteswap = DMU_BSWAP_NUMFUNCS; zfs_refcount_create(&hdr->b_l1hdr.b_refcnt); #ifdef ZFS_DEBUG mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); @@ -1351,7 +1351,7 @@ arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt, memcpy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN); memcpy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN); memcpy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN); - *byteorder = (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ? + *byteorder = (hdr->b_byteswap == DMU_BSWAP_NUMFUNCS) ? ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER; } @@ -1829,7 +1829,7 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj) ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); ASSERT3U(lsize, ==, psize); ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa, dsobj, abd, - psize, hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS); + psize, hdr->b_byteswap != DMU_BSWAP_NUMFUNCS); } else { ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj, abd, psize, hdr->b_crypt_hdr.b_mac); @@ -1865,7 +1865,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb) abd_t *cabd = NULL; void *tmp = NULL; boolean_t no_crypt = B_FALSE; - boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS); + boolean_t bswap = (hdr->b_byteswap != DMU_BSWAP_NUMFUNCS); ASSERT(HDR_EMPTY_OR_LOCKED(hdr)); ASSERT(HDR_ENCRYPTED(hdr)); @@ -2019,7 +2019,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF); boolean_t compressed = (flags & ARC_FILL_COMPRESSED) != 0; boolean_t encrypted = (flags & ARC_FILL_ENCRYPTED) != 0; - dmu_object_byteswap_t bswap = hdr->b_l1hdr.b_byteswap; + dmu_object_byteswap_t bswap = hdr->b_byteswap; kmutex_t *hash_lock = (flags & ARC_FILL_LOCKED) ? NULL : HDR_LOCK(hdr); ASSERT3P(buf->b_data, !=, NULL); @@ -2739,7 +2739,7 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) boolean_t buf_compressed = ARC_BUF_COMPRESSED(buf) != 0; return (!ARC_BUF_ENCRYPTED(buf) && buf_compressed == hdr_compressed && - hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS && + hdr->b_byteswap == DMU_BSWAP_NUMFUNCS && !HDR_SHARED_DATA(hdr) && (ARC_BUF_LAST(buf) || ARC_BUF_COMPRESSED(buf))); } @@ -3260,7 +3260,7 @@ arc_hdr_free_abd(arc_buf_hdr_t *hdr, boolean_t free_rdata) } if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr)) - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; + hdr->b_byteswap = DMU_BSWAP_NUMFUNCS; ARCSTAT_INCR(arcstat_compressed_size, -size); ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr)); @@ -3489,6 +3489,8 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) nhdr->b_dva = hdr->b_dva; nhdr->b_birth = hdr->b_birth; nhdr->b_type = hdr->b_type; + nhdr->b_complevel = hdr->b_complevel; + nhdr->b_byteswap = hdr->b_byteswap; nhdr->b_flags = hdr->b_flags; nhdr->b_psize = hdr->b_psize; nhdr->b_lsize = hdr->b_lsize; @@ -3497,7 +3499,6 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum; #endif nhdr->b_l1hdr.b_bufcnt = hdr->b_l1hdr.b_bufcnt; - nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap; nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state; nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access; nhdr->b_l1hdr.b_mru_hits = hdr->b_l1hdr.b_mru_hits; @@ -3531,6 +3532,8 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) memset(&hdr->b_dva, 0, sizeof (dva_t)); hdr->b_birth = 0; hdr->b_type = 0; + hdr->b_complevel = 0; + hdr->b_byteswap = 0; hdr->b_flags = 0; hdr->b_psize = 0; hdr->b_lsize = 0; @@ -3540,7 +3543,6 @@ arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) #endif hdr->b_l1hdr.b_buf = NULL; hdr->b_l1hdr.b_bufcnt = 0; - hdr->b_l1hdr.b_byteswap = 0; hdr->b_l1hdr.b_state = NULL; hdr->b_l1hdr.b_arc_access = 0; hdr->b_l1hdr.b_mru_hits = 0; @@ -3589,7 +3591,7 @@ arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, hdr = arc_hdr_realloc_crypt(hdr, B_TRUE); hdr->b_crypt_hdr.b_dsobj = dsobj; hdr->b_crypt_hdr.b_ot = ot; - hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? + hdr->b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot); if (!arc_hdr_has_uncompressed_buf(hdr)) arc_cksum_free(hdr); @@ -3673,7 +3675,7 @@ arc_alloc_raw_buf(spa_t *spa, const void *tag, uint64_t dsobj, hdr->b_crypt_hdr.b_dsobj = dsobj; hdr->b_crypt_hdr.b_ot = ot; - hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? + hdr->b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot); memcpy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN); memcpy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN); @@ -5477,13 +5479,13 @@ arc_read_done(zio_t *zio) /* byteswap if necessary */ if (BP_SHOULD_BYTESWAP(zio->io_bp)) { if (BP_GET_LEVEL(zio->io_bp) > 0) { - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_UINT64; + hdr->b_byteswap = DMU_BSWAP_UINT64; } else { - hdr->b_l1hdr.b_byteswap = + hdr->b_byteswap = DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp)); } } else { - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; + hdr->b_byteswap = DMU_BSWAP_NUMFUNCS; } if (!HDR_L2_READING(hdr)) { hdr->b_complevel = zio->io_prop.zp_complevel; @@ -6572,13 +6574,13 @@ arc_write_ready(zio_t *zio) if (BP_SHOULD_BYTESWAP(bp)) { if (BP_GET_LEVEL(bp) > 0) { - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_UINT64; + hdr->b_byteswap = DMU_BSWAP_UINT64; } else { - hdr->b_l1hdr.b_byteswap = + hdr->b_byteswap = DMU_OT_BYTESWAP(BP_GET_TYPE(bp)); } } else { - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; + hdr->b_byteswap = DMU_BSWAP_NUMFUNCS; } hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp); @@ -6800,7 +6802,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, localprop.zp_compress = HDR_GET_COMPRESS(hdr); localprop.zp_complevel = hdr->b_complevel; localprop.zp_byteorder = - (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ? + (hdr->b_byteswap == DMU_BSWAP_NUMFUNCS) ? ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER; memcpy(localprop.zp_salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN); @@ -9057,7 +9059,7 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize, uint64_t psize = HDR_GET_PSIZE(hdr); uint64_t size = arc_hdr_size(hdr); boolean_t ismd = HDR_ISTYPE_METADATA(hdr); - boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS); + boolean_t bswap = (hdr->b_byteswap != DMU_BSWAP_NUMFUNCS); dsl_crypto_key_t *dck = NULL; uint8_t mac[ZIO_DATA_MAC_LEN] = { 0 }; boolean_t no_crypt = B_FALSE;