Skip to content

Commit

Permalink
Update arc_available_memory() to check freemem
Browse files Browse the repository at this point in the history
While Linux doesn't provide detailed information about the state of
the VM it does provide us total free pages.  This information should
be incorporated in to the arc_available_memory() calculation rather
than solely relying on a signal from direct reclaim.  Conceptually
this brings arc_available_memory() back in sync with illumos.

It is also desirable that the target amount of free memory be tunable
on a system.  While the default values are expected to work well
for most workloads there may be cases where custom values are needed.
The zfs_arc_sys_free module option was added for this purpose.

zfs_arc_sys_free - The target number of bytes the ARC should leave
                   as free memory on the system.  This value can
                   checked in /proc/spl/kstat/zfs/arcstats and
                   setting this module option will override the
                   default value.

Signed-off-by: Brian Behlendorf <[email protected]>
Closes #3637
  • Loading branch information
behlendorf committed Jul 30, 2015
1 parent 6339c1b commit 11f552f
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 31 deletions.
13 changes: 13 additions & 0 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,19 @@ log2(fraction of arc to reclaim)
Default value: \fB5\fR.
.RE

.sp
.ne 2
.na
\fBzfs_arc_sys_free\fR (ulong)
.ad
.RS 12n
The target number of bytes the ARC should leave as free memory on the system.
Defaults to the larger of 1/64 of physical memory or 512K. Setting this
option to a non-zero value will override the default.
.sp
Default value: \fB0\fR.
.RE

.sp
.ne 2
.na
Expand Down
64 changes: 33 additions & 31 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
/*
* These tunables are Linux specific
*/
unsigned long zfs_arc_sys_free = 0;
int zfs_arc_memory_throttle_disable = 1;
int zfs_arc_min_prefetch_lifespan = 0;
int zfs_arc_p_aggressive_disable = 1;
Expand Down Expand Up @@ -473,6 +474,8 @@ typedef struct arc_stats {
kstat_named_t arcstat_meta_limit;
kstat_named_t arcstat_meta_max;
kstat_named_t arcstat_meta_min;
kstat_named_t arcstat_need_free;
kstat_named_t arcstat_sys_free;
} arc_stats_t;

static arc_stats_t arc_stats = {
Expand Down Expand Up @@ -564,7 +567,9 @@ static arc_stats_t arc_stats = {
{ "arc_meta_used", KSTAT_DATA_UINT64 },
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
{ "arc_meta_max", KSTAT_DATA_UINT64 },
{ "arc_meta_min", KSTAT_DATA_UINT64 }
{ "arc_meta_min", KSTAT_DATA_UINT64 },
{ "arc_need_free", KSTAT_DATA_UINT64 },
{ "arc_sys_free", KSTAT_DATA_UINT64 }
};

#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
Expand Down Expand Up @@ -633,6 +638,8 @@ static arc_state_t *arc_l2c_only;
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
#define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */

#define L2ARC_IS_VALID_COMPRESS(_c_) \
((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY)
Expand Down Expand Up @@ -3222,12 +3229,6 @@ int64_t last_free_memory;
free_memory_reason_t last_free_reason;

#ifdef _KERNEL
#ifdef __linux__
/*
* expiration time for arc_no_grow set by direct memory reclaim.
*/
static clock_t arc_grow_time = 0;
#else
/*
* Additional reserve of pages for pp_reserve.
*/
Expand All @@ -3237,7 +3238,6 @@ int64_t arc_pages_pp_reserve = 64;
* Additional reserve of pages for swapfs.
*/
int64_t arc_swapfs_reserve = 64;
#endif
#endif /* _KERNEL */

/*
Expand All @@ -3250,26 +3250,14 @@ arc_available_memory(void)
{
int64_t lowest = INT64_MAX;
free_memory_reason_t r = FMR_UNKNOWN;

#ifdef _KERNEL
#ifdef __linux__
/*
* Under Linux we are not allowed to directly interrogate the global
* memory state. Instead rely on observing that direct reclaim has
* recently occurred therefore the system must be low on memory. The
* exact values returned are not critical but should be small.
*/
if (ddi_time_after_eq(ddi_get_lbolt(), arc_grow_time))
lowest = PAGE_SIZE;
else
lowest = -PAGE_SIZE;
#else
int64_t n;
#ifdef __linux__
pgcnt_t needfree = btop(arc_need_free);
pgcnt_t lotsfree = btop(arc_sys_free);
pgcnt_t desfree = 0;
#endif

/*
* Platforms like illumos have greater visibility in to the memory
* subsystem and can return a more detailed analysis of memory.
*/
if (needfree > 0) {
n = PAGESIZE * (-needfree);
if (n < lowest) {
Expand All @@ -3291,6 +3279,7 @@ arc_available_memory(void)
r = FMR_LOTSFREE;
}

#ifndef __linux__
/*
* check to make sure that swapfs has enough space so that anon
* reservations can still succeed. anon_resvmem() checks that the
Expand Down Expand Up @@ -3319,6 +3308,7 @@ arc_available_memory(void)
lowest = n;
r = FMR_PAGES_PP_MAXIMUM;
}
#endif

#if defined(__i386)
/*
Expand Down Expand Up @@ -3357,12 +3347,11 @@ arc_available_memory(void)
r = FMR_ZIO_ARENA;
}
}
#endif /* __linux__ */
#else
#else /* _KERNEL */
/* Every 100 calls, free a small amount */
if (spa_get_random(100) == 0)
lowest = -1024;
#endif
#endif /* _KERNEL */

last_free_memory = lowest;
last_free_reason = r;
Expand Down Expand Up @@ -3480,7 +3469,7 @@ arc_reclaim_thread(void)
to_free = (arc_c >> arc_shrink_shift) - free_memory;
if (to_free > 0) {
#ifdef _KERNEL
to_free = MAX(to_free, ptob(needfree));
to_free = MAX(to_free, arc_need_free);
#endif
arc_shrink(to_free);
}
Expand All @@ -3507,9 +3496,11 @@ arc_reclaim_thread(void)
/*
* We're either no longer overflowing, or we
* can't evict anything more, so we should wake
* up any threads before we go to sleep.
* up any threads before we go to sleep and clear
* arc_need_free since nothing more can be done.
*/
cv_broadcast(&arc_reclaim_waiters_cv);
arc_need_free = 0;

/*
* Block until signaled, or after one second (we
Expand Down Expand Up @@ -3713,7 +3704,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
ARCSTAT_BUMP(arcstat_memory_indirect_count);
} else {
arc_no_grow = B_TRUE;
arc_grow_time = ddi_get_lbolt() + (zfs_arc_grow_retry * hz);
arc_need_free = ptob(sc->nr_to_scan);
ARCSTAT_BUMP(arcstat_memory_direct_count);
}

Expand Down Expand Up @@ -5288,6 +5279,10 @@ arc_tuning_update(void)
/* Valid range: 1 - N ticks */
if (zfs_arc_min_prefetch_lifespan)
arc_min_prefetch_lifespan = zfs_arc_min_prefetch_lifespan;

/* Valid range: 0 - <all physical memory> */
if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free))
arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), ptob(physmem));
}

void
Expand Down Expand Up @@ -5329,6 +5324,10 @@ arc_init(void)
* swapping out pages when it is preferable to shrink the arc.
*/
spl_register_shrinker(&arc_shrinker);

/* Set to 1/64 of all memory or a minimum of 512K */
arc_sys_free = MAX(ptob(physmem / 64), (512 * 1024));
arc_need_free = 0;
#endif

/* Set min cache to allow safe operation of arc_adapt() */
Expand Down Expand Up @@ -7064,4 +7063,7 @@ MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup");
module_param(l2arc_norw, int, 0644);
MODULE_PARM_DESC(l2arc_norw, "No reads during writes");

module_param(zfs_arc_sys_free, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes");

#endif

0 comments on commit 11f552f

Please sign in to comment.