Skip to content

Commit

Permalink
Introduce a tunable to exclude special class buffers from L2ARC
Browse files Browse the repository at this point in the history
Special allocation class or dedup vdevs may have roughly the same
performance as L2ARC vdevs. Introduce a new tunable to exclude those
buffers from being cacheable on L2ARC.

Reviewed-by: Don Brady <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: George Amanakis <[email protected]>
Closes openzfs#11761
Closes openzfs#12285
  • Loading branch information
gamanakis authored and tonyhutter committed Sep 14, 2022
1 parent c8f795b commit 8bd3dca
Show file tree
Hide file tree
Showing 8 changed files with 119 additions and 21 deletions.
1 change: 1 addition & 0 deletions include/sys/arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ typedef void arc_prune_func_t(int64_t bytes, void *priv);

/* Shared module parameters */
extern int zfs_arc_average_blocksize;
extern int l2arc_exclude_special;

/* generic arc_done_func_t's which you can use */
arc_read_done_func_t arc_bcopy_func;
Expand Down
11 changes: 1 addition & 10 deletions include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,16 +441,7 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
(dbuf_is_metadata(_db) && \
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))

#define DBUF_IS_L2CACHEABLE(_db) \
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \
(dbuf_is_metadata(_db) && \
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))

#define DNODE_LEVEL_IS_L2CACHEABLE(_dn, _level) \
((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_ALL || \
(((_level) > 0 || \
DMU_OT_IS_METADATA((_dn)->dn_handle->dnh_dnode->dn_type)) && \
((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db);

#ifdef ZFS_DEBUG

Expand Down
4 changes: 0 additions & 4 deletions include/sys/dmu_objset.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,6 @@ struct objset {
#define DMU_GROUPUSED_DNODE(os) ((os)->os_groupused_dnode.dnh_dnode)
#define DMU_PROJECTUSED_DNODE(os) ((os)->os_projectused_dnode.dnh_dnode)

#define DMU_OS_IS_L2CACHEABLE(os) \
((os)->os_secondary_cache == ZFS_CACHE_ALL || \
(os)->os_secondary_cache == ZFS_CACHE_METADATA)

/* called from zpl */
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
Expand Down
5 changes: 5 additions & 0 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ A value of
.Sy 100
disables this feature.
.
.It Sy l2arc_exclude_special Ns = Ns Sy 0 Ns | Ns 1 Pq int
Controls whether buffers present on special vdevs are eligibile for caching
into L2ARC.
If set to 1, exclude dbufs on special vdevs from being cached to L2ARC.
.
.It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq int
Controls whether only MFU metadata and data are cached from ARC into L2ARC.
This may be desired to avoid wasting space on L2ARC when reading/writing large
Expand Down
12 changes: 12 additions & 0 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,14 @@ static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
#define l2arc_hdr_arcstats_decrement_state(hdr) \
l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE)

/*
* l2arc_exclude_special : A zfs module parameter that controls whether buffers
* present on special vdevs are eligibile for caching in L2ARC. If
* set to 1, exclude dbufs on special vdevs from being cached to
* L2ARC.
*/
int l2arc_exclude_special = 0;

/*
* l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
* metadata and data are cached from ARC into L2ARC.
Expand Down Expand Up @@ -11136,6 +11144,10 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
"Cache only MFU data from ARC into L2ARC");

ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
"If set to 1 exclude dbufs on special vdevs from being cached to "
"L2ARC.");

ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes");

Expand Down
71 changes: 67 additions & 4 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include <cityhash.h>
#include <sys/spa_impl.h>
#include <sys/wmsum.h>
#include <sys/vdev_impl.h>

kstat_t *dbuf_ksp;

Expand Down Expand Up @@ -594,6 +595,68 @@ dbuf_is_metadata(dmu_buf_impl_t *db)
}
}

/*
* We want to exclude buffers that are on a special allocation class from
* L2ARC.
*/
boolean_t
dbuf_is_l2cacheable(dmu_buf_impl_t *db)
{
vdev_t *vd = NULL;
zfs_cache_type_t cache = db->db_objset->os_secondary_cache;
blkptr_t *bp = db->db_blkptr;

if (bp != NULL && !BP_IS_HOLE(bp)) {
uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
vdev_t *rvd = db->db_objset->os_spa->spa_root_vdev;

if (vdev < rvd->vdev_children)
vd = rvd->vdev_child[vdev];

if (cache == ZFS_CACHE_ALL ||
(dbuf_is_metadata(db) && cache == ZFS_CACHE_METADATA)) {
if (vd == NULL)
return (B_TRUE);

if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
l2arc_exclude_special == 0)
return (B_TRUE);
}
}

return (B_FALSE);
}

static inline boolean_t
dnode_level_is_l2cacheable(blkptr_t *bp, dnode_t *dn, int64_t level)
{
vdev_t *vd = NULL;
zfs_cache_type_t cache = dn->dn_objset->os_secondary_cache;

if (bp != NULL && !BP_IS_HOLE(bp)) {
uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
vdev_t *rvd = dn->dn_objset->os_spa->spa_root_vdev;

if (vdev < rvd->vdev_children)
vd = rvd->vdev_child[vdev];

if (cache == ZFS_CACHE_ALL || ((level > 0 ||
DMU_OT_IS_METADATA(dn->dn_handle->dnh_dnode->dn_type)) &&
cache == ZFS_CACHE_METADATA)) {
if (vd == NULL)
return (B_TRUE);

if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
l2arc_exclude_special == 0)
return (B_TRUE);
}
}

return (B_FALSE);
}


/*
* This function *must* return indices evenly distributed between all
Expand Down Expand Up @@ -1523,7 +1586,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
DTRACE_SET_STATE(db, "read issued");
mutex_exit(&db->db_mtx);

if (DBUF_IS_L2CACHEABLE(db))
if (dbuf_is_l2cacheable(db))
aflags |= ARC_FLAG_L2CACHE;

dbuf_add_ref(db, NULL);
Expand Down Expand Up @@ -3372,7 +3435,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
dpa->dpa_arg = arg;

/* flag if L2ARC eligible, l2arc_noprefetch then decides */
if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
if (dnode_level_is_l2cacheable(&bp, dn, level))
dpa->dpa_aflags |= ARC_FLAG_L2CACHE;

/*
Expand All @@ -3390,7 +3453,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
zbookmark_phys_t zb;

/* flag if L2ARC eligible, l2arc_noprefetch then decides */
if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
if (dnode_level_is_l2cacheable(&bp, dn, level))
iter_aflags |= ARC_FLAG_L2CACHE;

SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
Expand Down Expand Up @@ -4989,7 +5052,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
children_ready_cb = dbuf_write_children_ready;

dr->dr_zio = arc_write(pio, os->os_spa, txg,
&dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
&dr->dr_bp_copy, data, dbuf_is_l2cacheable(db),
&zp, dbuf_write_ready,
children_ready_cb, dbuf_write_physdone,
dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1846,7 +1846,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
dsa->dsa_tx = NULL;

zio_nowait(arc_write(pio, os->os_spa, txg,
zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
zgd->zgd_bp, dr->dt.dl.dr_data, dbuf_is_l2cacheable(db),
&zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));

Expand Down
34 changes: 32 additions & 2 deletions module/zfs/dmu_objset.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@
#include <sys/dmu_recv.h>
#include <sys/zfs_project.h>
#include "zfs_namecheck.h"
#include <sys/vdev_impl.h>
#include <sys/arc.h>

/*
* Needed to close a window in dnode_move() that allows the objset to be freed
Expand Down Expand Up @@ -411,6 +413,34 @@ dnode_multilist_index_func(multilist_t *ml, void *obj)
multilist_get_num_sublists(ml));
}

static inline boolean_t
dmu_os_is_l2cacheable(objset_t *os)
{
vdev_t *vd = NULL;
zfs_cache_type_t cache = os->os_secondary_cache;
blkptr_t *bp = os->os_rootbp;

if (bp != NULL && !BP_IS_HOLE(bp)) {
uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
vdev_t *rvd = os->os_spa->spa_root_vdev;

if (vdev < rvd->vdev_children)
vd = rvd->vdev_child[vdev];

if (cache == ZFS_CACHE_ALL || cache == ZFS_CACHE_METADATA) {
if (vd == NULL)
return (B_TRUE);

if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
l2arc_exclude_special == 0)
return (B_TRUE);
}
}

return (B_FALSE);
}

/*
* Instantiates the objset_t in-memory structure corresponding to the
* objset_phys_t that's pointed to by the specified blkptr_t.
Expand Down Expand Up @@ -453,7 +483,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);

if (DMU_OS_IS_L2CACHEABLE(os))
if (dmu_os_is_l2cacheable(os))
aflags |= ARC_FLAG_L2CACHE;

if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
Expand Down Expand Up @@ -1661,7 +1691,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
}

zio = arc_write(pio, os->os_spa, tx->tx_txg,
blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
blkptr_copy, os->os_phys_buf, dmu_os_is_l2cacheable(os),
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);

Expand Down

0 comments on commit 8bd3dca

Please sign in to comment.