Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update arc_available_memory() to check freemem #3639

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -388,23 +388,24 @@ Default value: \fB5\fR.
.sp
.ne 2
.na
\fBzfs_arc_max\fR (ulong)
\fBzfs_arc_lotsfree_percent\fR (int)
.ad
.RS 12n
Max arc size
Throttle I/O when free system memory drops below this percentage of total
system memory. Setting this value to 0 will disable the throttle.
.sp
Default value: \fB0\fR.
Default value: \fB10\fR.
.RE

.sp
.ne 2
.na
\fBzfs_arc_memory_throttle_disable\fR (int)
\fBzfs_arc_max\fR (ulong)
.ad
.RS 12n
Disable memory throttle
Max arc size
.sp
Use \fB1\fR for yes (default) and \fB0\fR to disable.
Default value: \fB0\fR.
.RE

.sp
Expand Down Expand Up @@ -566,6 +567,19 @@ log2(fraction of arc to reclaim)
Default value: \fB5\fR.
.RE

.sp
.ne 2
.na
\fBzfs_arc_sys_free\fR (ulong)
.ad
.RS 12n
The target number of bytes the ARC should leave as free memory on the system.
Defaults to the larger of 1/64 of physical memory or 512K. Setting this
option to a non-zero value will override the default.
.sp
Default value: \fB0\fR.
.RE

.sp
.ne 2
.na
Expand Down
108 changes: 70 additions & 38 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,14 @@ int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
/*
* These tunables are Linux specific
*/
int zfs_arc_memory_throttle_disable = 1;
unsigned long zfs_arc_sys_free = 0;
int zfs_arc_min_prefetch_lifespan = 0;
int zfs_arc_p_aggressive_disable = 1;
int zfs_arc_p_dampener_disable = 1;
int zfs_arc_meta_prune = 10000;
int zfs_arc_meta_strategy = ARC_STRATEGY_META_BALANCED;
int zfs_arc_meta_adjust_restarts = 4096;
int zfs_arc_lotsfree_percent = 10;

/* The 6 states: */
static arc_state_t ARC_anon;
Expand Down Expand Up @@ -473,6 +474,8 @@ typedef struct arc_stats {
kstat_named_t arcstat_meta_limit;
kstat_named_t arcstat_meta_max;
kstat_named_t arcstat_meta_min;
kstat_named_t arcstat_need_free;
kstat_named_t arcstat_sys_free;
} arc_stats_t;

static arc_stats_t arc_stats = {
Expand Down Expand Up @@ -564,7 +567,9 @@ static arc_stats_t arc_stats = {
{ "arc_meta_used", KSTAT_DATA_UINT64 },
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
{ "arc_meta_max", KSTAT_DATA_UINT64 },
{ "arc_meta_min", KSTAT_DATA_UINT64 }
{ "arc_meta_min", KSTAT_DATA_UINT64 },
{ "arc_need_free", KSTAT_DATA_UINT64 },
{ "arc_sys_free", KSTAT_DATA_UINT64 }
};

#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
Expand Down Expand Up @@ -633,6 +638,8 @@ static arc_state_t *arc_l2c_only;
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
#define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */

#define L2ARC_IS_VALID_COMPRESS(_c_) \
((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY)
Expand Down Expand Up @@ -3222,12 +3229,6 @@ int64_t last_free_memory;
free_memory_reason_t last_free_reason;

#ifdef _KERNEL
#ifdef __linux__
/*
* expiration time for arc_no_grow set by direct memory reclaim.
*/
static clock_t arc_grow_time = 0;
#else
/*
* Additional reserve of pages for pp_reserve.
*/
Expand All @@ -3237,7 +3238,6 @@ int64_t arc_pages_pp_reserve = 64;
* Additional reserve of pages for swapfs.
*/
int64_t arc_swapfs_reserve = 64;
#endif
#endif /* _KERNEL */

/*
Expand All @@ -3250,26 +3250,14 @@ arc_available_memory(void)
{
int64_t lowest = INT64_MAX;
free_memory_reason_t r = FMR_UNKNOWN;

#ifdef _KERNEL
#ifdef __linux__
/*
* Under Linux we are not allowed to directly interrogate the global
* memory state. Instead rely on observing that direct reclaim has
* recently occurred therefore the system must be low on memory. The
* exact values returned are not critical but should be small.
*/
if (ddi_time_after_eq(ddi_get_lbolt(), arc_grow_time))
lowest = PAGE_SIZE;
else
lowest = -PAGE_SIZE;
#else
int64_t n;
#ifdef __linux__
pgcnt_t needfree = btop(arc_need_free);
pgcnt_t lotsfree = btop(arc_sys_free);
pgcnt_t desfree = 0;
#endif

/*
* Platforms like illumos have greater visibility in to the memory
* subsystem and can return a more detailed analysis of memory.
*/
if (needfree > 0) {
n = PAGESIZE * (-needfree);
if (n < lowest) {
Expand All @@ -3291,6 +3279,7 @@ arc_available_memory(void)
r = FMR_LOTSFREE;
}

#ifndef __linux__
/*
* check to make sure that swapfs has enough space so that anon
* reservations can still succeed. anon_resvmem() checks that the
Expand Down Expand Up @@ -3319,6 +3308,7 @@ arc_available_memory(void)
lowest = n;
r = FMR_PAGES_PP_MAXIMUM;
}
#endif

#if defined(__i386)
/*
Expand Down Expand Up @@ -3357,12 +3347,11 @@ arc_available_memory(void)
r = FMR_ZIO_ARENA;
}
}
#endif /* __linux__ */
#else
#else /* _KERNEL */
/* Every 100 calls, free a small amount */
if (spa_get_random(100) == 0)
lowest = -1024;
#endif
#endif /* _KERNEL */

last_free_memory = lowest;
last_free_reason = r;
Expand Down Expand Up @@ -3480,7 +3469,7 @@ arc_reclaim_thread(void)
to_free = (arc_c >> arc_shrink_shift) - free_memory;
if (to_free > 0) {
#ifdef _KERNEL
to_free = MAX(to_free, ptob(needfree));
to_free = MAX(to_free, arc_need_free);
#endif
arc_shrink(to_free);
}
Expand All @@ -3507,9 +3496,11 @@ arc_reclaim_thread(void)
/*
* We're either no longer overflowing, or we
* can't evict anything more, so we should wake
* up any threads before we go to sleep.
* up any threads before we go to sleep and clear
* arc_need_free since nothing more can be done.
*/
cv_broadcast(&arc_reclaim_waiters_cv);
arc_need_free = 0;

/*
* Block until signaled, or after one second (we
Expand Down Expand Up @@ -3713,7 +3704,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
ARCSTAT_BUMP(arcstat_memory_indirect_count);
} else {
arc_no_grow = B_TRUE;
arc_grow_time = ddi_get_lbolt() + (zfs_arc_grow_retry * hz);
arc_need_free = ptob(sc->nr_to_scan);
ARCSTAT_BUMP(arcstat_memory_direct_count);
}

Expand Down Expand Up @@ -5073,18 +5064,41 @@ static int
arc_memory_throttle(uint64_t reserve, uint64_t txg)
{
#ifdef _KERNEL
if (zfs_arc_memory_throttle_disable)
return (0);
uint64_t available_memory = ptob(freemem);
static uint64_t page_load = 0;
static uint64_t last_txg = 0;
#ifdef __linux__
pgcnt_t minfree = btop(arc_sys_free / 4);
#endif

if (freemem > physmem * arc_lotsfree_percent / 100)
return (0);

if (arc_reclaim_needed()) {
if (txg > last_txg) {
last_txg = txg;
page_load = 0;
}

/*
* If we are in pageout, we know that memory is already tight,
* the arc is already going to be evicting, so we just want to
* continue to let page writes occur as quickly as possible.
*/
if (current_is_kswapd()) {
if (page_load > MAX(ptob(minfree), available_memory) / 4) {
DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
return (SET_ERROR(ERESTART));
}
/* Note: reserve is inflated, so we deflate */
page_load += reserve / 8;
return (0);
} else if (page_load > 0 && arc_reclaim_needed()) {
/* memory is low, delay before restarting */
ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
return (SET_ERROR(EAGAIN));
}
page_load = 0;
#endif
return (0);
}
Expand Down Expand Up @@ -5288,6 +5302,16 @@ arc_tuning_update(void)
/* Valid range: 1 - N ticks */
if (zfs_arc_min_prefetch_lifespan)
arc_min_prefetch_lifespan = zfs_arc_min_prefetch_lifespan;

/* Valid range: 0 - 100 */
if ((zfs_arc_lotsfree_percent >= 0) &&
(zfs_arc_lotsfree_percent <= 100))
arc_lotsfree_percent = zfs_arc_lotsfree_percent;

/* Valid range: 0 - <all physical memory> */
if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free))
arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), ptob(physmem));

}

void
Expand Down Expand Up @@ -5329,6 +5353,10 @@ arc_init(void)
* swapping out pages when it is preferable to shrink the arc.
*/
spl_register_shrinker(&arc_shrinker);

/* Set to 1/64 of all memory or a minimum of 512K */
arc_sys_free = MAX(ptob(physmem / 64), (512 * 1024));
arc_need_free = 0;
#endif

/* Set min cache to allow safe operation of arc_adapt() */
Expand Down Expand Up @@ -7024,9 +7052,6 @@ MODULE_PARM_DESC(zfs_disable_dup_eviction, "disable duplicate buffer eviction");
module_param(zfs_arc_average_blocksize, int, 0444);
MODULE_PARM_DESC(zfs_arc_average_blocksize, "Target average block size");

module_param(zfs_arc_memory_throttle_disable, int, 0644);
MODULE_PARM_DESC(zfs_arc_memory_throttle_disable, "disable memory throttle");

module_param(zfs_arc_min_prefetch_lifespan, int, 0644);
MODULE_PARM_DESC(zfs_arc_min_prefetch_lifespan, "Min life of prefetch block");

Expand Down Expand Up @@ -7064,4 +7089,11 @@ MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup");
module_param(l2arc_norw, int, 0644);
MODULE_PARM_DESC(l2arc_norw, "No reads during writes");

module_param(zfs_arc_lotsfree_percent, int, 0644);
MODULE_PARM_DESC(zfs_arc_lotsfree_percent,
"System free memory I/O throttle in bytes");

module_param(zfs_arc_sys_free, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes");

#endif