Skip to content

Commit

Permalink
Unify arc_prune_async() code, fix excessive ARC pruning
Browse files Browse the repository at this point in the history
There is no sense to have separate implementations for FreeBSD and Linux.  Make
Linux code shared as more functional and just register FreeBSD-specific prune
callback with arc_add_prune_callback() API.

Aside of code cleanup this fixes excessive pruning on FreeBSD.

[olce: This code comes from the OpenZFS pull request:
openzfs/zfs#16083, vendor-merged into our tree.  Its
commit message has been slightly adapted to the present context.  The upstream
pull request has been reviewed and merged into 'zfs-2.1.16-staging' as
5b81b1bf5e6d6aeb8a87175dcb12b529185cac2f, which should come into our tree at the
next vendor import.  This is the same code that was merged into stable/14 and
main as part of vendor merges, and released as an EN (FreeBSD-EN-23:18.openzfs)
over releng/14.0 by markj@.]

PR:             275594, 274698
Reported by:    Seigo Tanimura <[email protected]>, markj, and others
Tested by:      olce
Approved by:    emaste (mentor)
Approved by:	so
Obtained from:  OpenZFS
Sponsored by:   iXsystems, Inc.
Sponsored by:   The FreeBSD Foundation
Signed-off-by:  Alexander Motin <[email protected]>

(cherry picked from commit 330954b)
  • Loading branch information
amotin authored and rkojedzinszky committed Jul 12, 2024
1 parent 0e722e7 commit 8b42131
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 117 deletions.
2 changes: 1 addition & 1 deletion sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ extern const struct file_operations zpl_file_operations;
extern const struct file_operations zpl_dir_file_operations;

/* zpl_super.c */
extern void zpl_prune_sb(int64_t nr_to_scan, void *arg);
extern void zpl_prune_sb(uint64_t nr_to_scan, void *arg);

extern const struct super_operations zpl_super_operations;
extern const struct export_operations zpl_export_operations;
Expand Down
2 changes: 1 addition & 1 deletion sys/contrib/openzfs/include/sys/arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ typedef struct arc_prune arc_prune_t;
typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
const blkptr_t *bp, arc_buf_t *buf, void *priv);
typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
typedef void arc_prune_func_t(int64_t bytes, void *priv);
typedef void arc_prune_func_t(uint64_t bytes, void *priv);

/* Shared module parameters */
extern int zfs_arc_average_blocksize;
Expand Down
1 change: 0 additions & 1 deletion sys/contrib/openzfs/include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -994,7 +994,6 @@ extern void arc_wait_for_eviction(uint64_t, boolean_t);

extern void arc_lowmem_init(void);
extern void arc_lowmem_fini(void);
extern void arc_prune_async(int64_t);
extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
extern uint64_t arc_free_memory(void);
extern int64_t arc_available_memory(void);
Expand Down
62 changes: 0 additions & 62 deletions sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@
#include <sys/vm.h>
#include <sys/vmmeter.h>

#if __FreeBSD_version >= 1300139
static struct sx arc_vnlru_lock;
static struct vnode *arc_vnlru_marker;
#endif

extern struct vfsops zfs_vfsops;

uint_t zfs_arc_free_target = 0;
Expand Down Expand Up @@ -151,53 +146,6 @@ arc_default_max(uint64_t min, uint64_t allmem)
return (MAX(allmem * 5 / 8, size));
}

/*
* Helper function for arc_prune_async() it is responsible for safely
* handling the execution of a registered arc_prune_func_t.
*/
static void
arc_prune_task(void *arg)
{
int64_t nr_scan = (intptr_t)arg;

#ifndef __ILP32__
if (nr_scan > INT_MAX)
nr_scan = INT_MAX;
#endif

#if __FreeBSD_version >= 1300139
sx_xlock(&arc_vnlru_lock);
vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
sx_xunlock(&arc_vnlru_lock);
#else
vnlru_free(nr_scan, &zfs_vfsops);
#endif
}

/*
* Notify registered consumers they must drop holds on a portion of the ARC
* buffered they reference. This provides a mechanism to ensure the ARC can
* honor the arc_meta_limit and reclaim otherwise pinned ARC buffers. This
* is analogous to dnlc_reduce_cache() but more generic.
*
* This operation is performed asynchronously so it may be safely called
* in the context of the arc_reclaim_thread(). A reference is taken here
* for each registered arc_prune_t and the arc_prune_task() is responsible
* for releasing it once the registered arc_prune_func_t has completed.
*/
void
arc_prune_async(int64_t adjust)
{

#ifndef __LP64__
if (adjust > INTPTR_MAX)
adjust = INTPTR_MAX;
#endif
taskq_dispatch(arc_prune_taskq, arc_prune_task,
(void *)(intptr_t)adjust, TQ_SLEEP);
ARCSTAT_BUMP(arcstat_prune);
}

uint64_t
arc_all_memory(void)
{
Expand Down Expand Up @@ -248,23 +196,13 @@ arc_lowmem_init(void)
{
arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
EVENTHANDLER_PRI_FIRST);
#if __FreeBSD_version >= 1300139
arc_vnlru_marker = vnlru_alloc_marker();
sx_init(&arc_vnlru_lock, "arc vnlru lock");
#endif
}

void
arc_lowmem_fini(void)
{
if (arc_event_lowmem != NULL)
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
#if __FreeBSD_version >= 1300139
if (arc_vnlru_marker != NULL) {
vnlru_free_marker(arc_vnlru_marker);
sx_destroy(&arc_vnlru_lock);
}
#endif
}

void
Expand Down
32 changes: 32 additions & 0 deletions sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -2097,6 +2097,26 @@ zfs_vnodes_adjust_back(void)
#endif
}

#if __FreeBSD_version >= 1300139
static struct sx zfs_vnlru_lock;
static struct vnode *zfs_vnlru_marker;
#endif
static arc_prune_t *zfs_prune;

static void
zfs_prune_task(uint64_t nr_to_scan, void *arg __unused)
{
if (nr_to_scan > INT_MAX)
nr_to_scan = INT_MAX;
#if __FreeBSD_version >= 1300139
sx_xlock(&zfs_vnlru_lock);
vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker);
sx_xunlock(&zfs_vnlru_lock);
#else
vnlru_free(nr_to_scan, &zfs_vfsops);
#endif
}

void
zfs_init(void)
{
Expand All @@ -2123,11 +2143,23 @@ zfs_init(void)
dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);

zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);

#if __FreeBSD_version >= 1300139
zfs_vnlru_marker = vnlru_alloc_marker();
sx_init(&zfs_vnlru_lock, "zfs vnlru lock");
#endif
zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL);
}

void
zfs_fini(void)
{
arc_remove_prune_callback(zfs_prune);
#if __FreeBSD_version >= 1300139
vnlru_free_marker(zfs_vnlru_marker);
sx_destroy(&zfs_vnlru_lock);
#endif

taskq_destroy(zfsvfs_taskq);
zfsctl_fini();
zfs_znode_fini();
Expand Down
51 changes: 0 additions & 51 deletions sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -488,57 +488,6 @@ arc_unregister_hotplug(void)
}
#endif /* _KERNEL */

/*
* Helper function for arc_prune_async() it is responsible for safely
* handling the execution of a registered arc_prune_func_t.
*/
static void
arc_prune_task(void *ptr)
{
arc_prune_t *ap = (arc_prune_t *)ptr;
arc_prune_func_t *func = ap->p_pfunc;

if (func != NULL)
func(ap->p_adjust, ap->p_private);

zfs_refcount_remove(&ap->p_refcnt, func);
}

/*
* Notify registered consumers they must drop holds on a portion of the ARC
* buffered they reference. This provides a mechanism to ensure the ARC can
* honor the arc_meta_limit and reclaim otherwise pinned ARC buffers. This
* is analogous to dnlc_reduce_cache() but more generic.
*
* This operation is performed asynchronously so it may be safely called
* in the context of the arc_reclaim_thread(). A reference is taken here
* for each registered arc_prune_t and the arc_prune_task() is responsible
* for releasing it once the registered arc_prune_func_t has completed.
*/
void
arc_prune_async(int64_t adjust)
{
arc_prune_t *ap;

mutex_enter(&arc_prune_mtx);
for (ap = list_head(&arc_prune_list); ap != NULL;
ap = list_next(&arc_prune_list, ap)) {

if (zfs_refcount_count(&ap->p_refcnt) >= 2)
continue;

zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
ap->p_adjust = adjust;
if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
ap, TQ_SLEEP) == TASKQID_INVALID) {
zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
continue;
}
ARCSTAT_BUMP(arcstat_prune);
}
mutex_exit(&arc_prune_mtx);
}

/* BEGIN CSTYLED */
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW,
"Limit on number of pages that ARC shrinker can reclaim at once");
Expand Down
2 changes: 1 addition & 1 deletion sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ zpl_kill_sb(struct super_block *sb)
}

void
zpl_prune_sb(int64_t nr_to_scan, void *arg)
zpl_prune_sb(uint64_t nr_to_scan, void *arg)
{
struct super_block *sb = (struct super_block *)arg;
int objects = 0;
Expand Down
52 changes: 52 additions & 0 deletions sys/contrib/openzfs/module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,8 @@ static void l2arc_do_free_on_write(void);
static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
boolean_t state_only);

static void arc_prune_async(uint64_t adjust);

#define l2arc_hdr_arcstats_increment(hdr) \
l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE)
#define l2arc_hdr_arcstats_decrement(hdr) \
Expand Down Expand Up @@ -6521,6 +6523,56 @@ arc_remove_prune_callback(arc_prune_t *p)
kmem_free(p, sizeof (*p));
}

/*
* Helper function for arc_prune_async() it is responsible for safely
* handling the execution of a registered arc_prune_func_t.
*/
static void
arc_prune_task(void *ptr)
{
arc_prune_t *ap = (arc_prune_t *)ptr;
arc_prune_func_t *func = ap->p_pfunc;

if (func != NULL)
func(ap->p_adjust, ap->p_private);

zfs_refcount_remove(&ap->p_refcnt, func);
}

/*
* Notify registered consumers they must drop holds on a portion of the ARC
* buffers they reference. This provides a mechanism to ensure the ARC can
* honor the metadata limit and reclaim otherwise pinned ARC buffers.
*
* This operation is performed asynchronously so it may be safely called
* in the context of the arc_reclaim_thread(). A reference is taken here
* for each registered arc_prune_t and the arc_prune_task() is responsible
* for releasing it once the registered arc_prune_func_t has completed.
*/
static void
arc_prune_async(uint64_t adjust)
{
arc_prune_t *ap;

mutex_enter(&arc_prune_mtx);
for (ap = list_head(&arc_prune_list); ap != NULL;
ap = list_next(&arc_prune_list, ap)) {

if (zfs_refcount_count(&ap->p_refcnt) >= 2)
continue;

zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
ap->p_adjust = adjust;
if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
ap, TQ_SLEEP) == TASKQID_INVALID) {
zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
continue;
}
ARCSTAT_BUMP(arcstat_prune);
}
mutex_exit(&arc_prune_mtx);
}

/*
* Notify the arc that a block was freed, and thus will never be used again.
*/
Expand Down

0 comments on commit 8b42131

Please sign in to comment.