Skip to content

Commit

Permalink
Use smaller default slack/delta value for schedule_hrtimeout_range()
Browse files Browse the repository at this point in the history
For interrupt coalescing, cv_timedwait_hires() uses a 100us slack/delta
for calls to schedule_hrtimeout_range(). This 100us slack can be costly
for small writes.

This change improves small write performance by passing resolution `res`
parameter to schedule_hrtimeout_range() to be used as delta/slack. A new
tunable `spl_schedule_hrtimeout_slack_us` is added to preserve old
behavior when desired.

Performance observations on 8K recordsize filesystem:
- 8K random writes at 1-64 threads, up to 60% improvement for one thread
  and smaller gains as thread count increases. At >64 threads, 2-5%
  decrease in performance was observed.
- 8K sequential writes, similar 60% improvement for one thread and
  leveling out around 64 threads. At >64 threads, 5-10% decrease in
  performance was observed.
- 128K sequential write sees 1-5 for the 128K. No observed regression at
  high thread count.

Testing done on Ubuntu 18.04 with 4.15 kernel, 8vCPUs and SSD storage on
VMware ESX.

Reviewed-by: Richard Elling <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Matt Ahrens <[email protected]>
Signed-off-by: Tony Nguyen <[email protected]>
Closes #9217
  • Loading branch information
tonynguien authored and behlendorf committed Aug 28, 2019
1 parent 07a328d commit 8d04284
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 18 deletions.
54 changes: 37 additions & 17 deletions module/spl/spl-condvar.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,45 @@

#include <sys/condvar.h>
#include <sys/time.h>
#include <sys/sysmacros.h>
#include <linux/hrtimer.h>
#include <linux/compiler_compat.h>
#include <linux/mod_compat.h>

#include <linux/sched.h>

#ifdef HAVE_SCHED_SIGNAL_HEADER
#include <linux/sched/signal.h>
#endif

#define MAX_HRTIMEOUT_SLACK_US 1000
unsigned int spl_schedule_hrtimeout_slack_us = 0;

static int
param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
{
unsigned long val;
int error;

error = kstrtoul(buf, 0, &val);
if (error)
return (error);

if (val > MAX_HRTIMEOUT_SLACK_US)
return (-EINVAL);

error = param_set_uint(buf, kp);
if (error < 0)
return (error);

return (0);
}

module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
"schedule_hrtimeout_range() delta/slack value in us, default(0)");

void
__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
{
Expand Down Expand Up @@ -304,12 +334,13 @@ EXPORT_SYMBOL(__cv_timedwait_sig);
*/
static clock_t
__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
int state)
hrtime_t res, int state)
{
DEFINE_WAIT(wait);
kmutex_t *m;
hrtime_t time_left;
ktime_t ktime_left;
u64 slack = 0;

ASSERT(cvp);
ASSERT(mp);
Expand All @@ -336,13 +367,11 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
* race where 'cvp->cv_waiters > 0' but the list is empty.
*/
mutex_exit(mp);
/*
* Allow a 100 us range to give kernel an opportunity to coalesce
* interrupts
*/

ktime_left = ktime_set(0, time_left);
schedule_hrtimeout_range(&ktime_left, 100 * NSEC_PER_USEC,
HRTIMER_MODE_REL);
slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);

/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
Expand All @@ -369,19 +398,10 @@ static clock_t
cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
hrtime_t res, int flag, int state)
{
if (res > 1) {
/*
* Align expiration to the specified resolution.
*/
if (flag & CALLOUT_FLAG_ROUNDUP)
tim += res - 1;
tim = (tim / res) * res;
}

if (!(flag & CALLOUT_FLAG_ABSOLUTE))
tim += gethrtime();

return (__cv_timedwait_hires(cvp, mp, tim, state));
return (__cv_timedwait_hires(cvp, mp, tim, res, state));
}

clock_t
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/mmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ mmp_thread(void *arg)

CALLB_CPR_SAFE_BEGIN(&cpr);
(void) cv_timedwait_sig_hires(&mmp->mmp_thread_cv,
&mmp->mmp_thread_lock, next_time, USEC2NSEC(1),
&mmp->mmp_thread_lock, next_time, USEC2NSEC(100),
CALLOUT_FLAG_ABSOLUTE);
CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock);
}
Expand Down

0 comments on commit 8d04284

Please sign in to comment.