Skip to content

Commit

Permalink
Disambiguate condvar API contract
Browse files Browse the repository at this point in the history
On Illumos callers of cv_timedwait and cv_timedwait_hires
can't distinguish between whether or not the cv was signaled
or the call timed out. Illumos handles this (for some definition
of handles) by calling cv_signal in the return path if we were
signaled but the return value indicates instead that we timed
out. This would make sense if it were possible to query the the
cv for its net signal disposition. However, this isn't possible
and, in spite of the fact that there are places in the code that
clearly take a different and incompatible path if a timeout value
is indicated, this distinction appears to be rather subtle to most
developers. This problem is further compounded by the fact that on
Linux, calling cv_signal in the return path wouldn't even do the
right thing unless there are other waiters.

Since it is possible for the caller to independently determine how
much time is remaining but it is not possible to query if the cv
was in fact signaled, prioritizing signalling over timeout seems
like a cleaner solution. In addition, judging from usage patterns
within the code itself, it is also less error prone.

Reviewed-by: Jorgen Lundman <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Alexander Motin <[email protected]>
Signed-off-by: Matt Macy <[email protected]>
Closes #10471
  • Loading branch information
mattmacy authored Jun 18, 2020
1 parent 7564073 commit 8056a75
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 36 deletions.
36 changes: 29 additions & 7 deletions include/os/freebsd/spl/sys/condvar.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,31 @@
#include <sys/time.h>
#include <sys/kmem.h>

/*
* cv_timedwait() is similar to cv_wait() except that it additionally expects
* a timeout value specified in ticks. When woken by cv_signal() or
* cv_broadcast() it returns 1, otherwise when the timeout is reached -1 is
* returned.
*
* cv_timedwait_sig() behaves the same as cv_timedwait() but blocks
* interruptibly and can be woken by a signal (EINTR, ERESTART). When
* this occurs 0 is returned.
*
* cv_timedwait_io() and cv_timedwait_sig_io() are variants of cv_timedwait()
* and cv_timedwait_sig() which should be used when waiting for outstanding
* IO to complete. They are responsible for updating the iowait accounting
* when this is supported by the platform.
*
* cv_timedwait_hires() and cv_timedwait_sig_hires() are high resolution
* versions of cv_timedwait() and cv_timedwait_sig(). They expect the timeout
* to be specified as a hrtime_t allowing for timeouts of less than a tick.
*
* N.B. The return values differ slightly from the illumos implementation
* which returns the time remaining, instead of 1, when woken. They both
* return -1 on timeout. Consumers which need to know the time remaining
* are responsible for tracking it themselves.
*/

static __inline sbintime_t
zfs_nstosbt(int64_t _ns)
{
Expand Down Expand Up @@ -120,7 +145,7 @@ cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t timo)
#define cv_timedwait_io cv_timedwait
#define cv_timedwait_sig_io cv_timedwait_sig

static inline clock_t
static inline int
cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
int flag)
{
Expand All @@ -135,19 +160,17 @@ cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,

if (hrtime >= tim)
return (-1);

rc = cv_timedwait_sbt(cvp, mp, zfs_nstosbt(tim),
zfs_nstosbt(res), C_ABSOLUTE);

if (rc == EWOULDBLOCK)
return (-1);

KASSERT(rc == 0, ("unexpected rc value %d", rc));
hrtime = tim - gethrtime();
return ((hrtime > 0) ? hrtime : -1);
return (1);
}

static inline clock_t
static inline int
cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
hrtime_t res, int flag)
{
Expand Down Expand Up @@ -175,8 +198,7 @@ cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
return (0);
default:
KASSERT(rc == 0, ("unexpected rc value %d", rc));
hrtime = tim - gethrtime();
return ((hrtime > 0) ? hrtime : -1);
return (1);
}
}

Expand Down
48 changes: 39 additions & 9 deletions include/os/linux/spl/sys/condvar.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,32 @@
#include <sys/wait.h>
#include <sys/time.h>

/*
* cv_timedwait() is similar to cv_wait() except that it additionally expects
* a timeout value specified in ticks. When woken by cv_signal() or
* cv_broadcast() it returns 1, otherwise when the timeout is reached -1 is
* returned.
*
* cv_timedwait_sig() behaves the same as cv_timedwait() but blocks
* interruptibly and can be woken by a signal (EINTR, ERESTART). When
* this occurs 0 is returned.
*
* cv_timedwait_io() and cv_timedwait_sig_io() are variants of cv_timedwait()
* and cv_timedwait_sig() which should be used when waiting for outstanding
* IO to complete. They are responsible for updating the iowait accounting
* when this is supported by the platform.
*
* cv_timedwait_hires() and cv_timedwait_sig_hires() are high resolution
* versions of cv_timedwait() and cv_timedwait_sig(). They expect the timeout
* to be specified as a hrtime_t allowing for timeouts of less than a tick.
*
* N.B. The return values differ slightly from the illumos implementation
* which returns the time remaining, instead of 1, when woken. They both
* return -1 on timeout. Consumers which need to know the time remaining
* are responsible for tracking it themselves.
*/


/*
* The kcondvar_t struct is protected by mutex taken externally before
* calling any of the wait/signal funs, and passed into the wait funs.
Expand All @@ -56,12 +82,12 @@ extern void __cv_wait(kcondvar_t *, kmutex_t *);
extern void __cv_wait_io(kcondvar_t *, kmutex_t *);
extern int __cv_wait_io_sig(kcondvar_t *, kmutex_t *);
extern int __cv_wait_sig(kcondvar_t *, kmutex_t *);
extern clock_t __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t);
extern clock_t __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t);
extern clock_t __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t);
extern clock_t cv_timedwait_hires(kcondvar_t *, kmutex_t *, hrtime_t,
extern int __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t);
extern int __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t);
extern int __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t);
extern int cv_timedwait_hires(kcondvar_t *, kmutex_t *, hrtime_t,
hrtime_t res, int flag);
extern clock_t cv_timedwait_sig_hires(kcondvar_t *, kmutex_t *, hrtime_t,
extern int cv_timedwait_sig_hires(kcondvar_t *, kmutex_t *, hrtime_t,
hrtime_t res, int flag);
extern void __cv_signal(kcondvar_t *);
extern void __cv_broadcast(kcondvar_t *c);
Expand All @@ -72,12 +98,16 @@ extern void __cv_broadcast(kcondvar_t *c);
#define cv_wait_io(cvp, mp) __cv_wait_io(cvp, mp)
#define cv_wait_io_sig(cvp, mp) __cv_wait_io_sig(cvp, mp)
#define cv_wait_sig(cvp, mp) __cv_wait_sig(cvp, mp)
#define cv_wait_interruptible(cvp, mp) cv_wait_sig(cvp, mp)
#define cv_signal(cvp) __cv_signal(cvp)
#define cv_broadcast(cvp) __cv_broadcast(cvp)

/*
* NB: There is no way to reliably distinguish between having been signalled
* and having timed out on Linux. If the client code needs to reliably
* distinguish between the two it should use the hires variant.
*/
#define cv_timedwait(cvp, mp, t) __cv_timedwait(cvp, mp, t)
#define cv_timedwait_io(cvp, mp, t) __cv_timedwait_io(cvp, mp, t)
#define cv_timedwait_sig(cvp, mp, t) __cv_timedwait_sig(cvp, mp, t)
#define cv_timedwait_interruptible(cvp, mp, t) cv_timedwait_sig(cvp, mp, t)
#define cv_signal(cvp) __cv_signal(cvp)
#define cv_broadcast(cvp) __cv_broadcast(cvp)

#endif /* _SPL_CONDVAR_H */
4 changes: 2 additions & 2 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,8 @@ extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg);
extern void cv_destroy(kcondvar_t *cv);
extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
extern int cv_wait_sig(kcondvar_t *cv, kmutex_t *mp);
extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
extern int cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
extern int cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
hrtime_t res, int flag);
extern void cv_signal(kcondvar_t *cv);
extern void cv_broadcast(kcondvar_t *cv);
Expand Down
4 changes: 2 additions & 2 deletions lib/libzpool/kernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ cv_wait_sig(kcondvar_t *cv, kmutex_t *mp)
return (1);
}

clock_t
int
cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
{
int error;
Expand Down Expand Up @@ -378,7 +378,7 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
}

/*ARGSUSED*/
clock_t
int
cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
int flag)
{
Expand Down
33 changes: 19 additions & 14 deletions module/os/linux/spl/spl-condvar.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,30 +301,32 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
* with a thread holding the mutex and call cv_destroy.
*/
mutex_enter(mp);
return (time_left > 0 ? time_left : -1);
return (time_left > 0 ? 1 : -1);
}

clock_t
int
__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
{
return (__cv_timedwait_common(cvp, mp, exp_time,
TASK_UNINTERRUPTIBLE, 0));
}
EXPORT_SYMBOL(__cv_timedwait);

clock_t
int
__cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
{
return (__cv_timedwait_common(cvp, mp, exp_time,
TASK_UNINTERRUPTIBLE, 1));
}
EXPORT_SYMBOL(__cv_timedwait_io);

clock_t
int
__cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
{
return (__cv_timedwait_common(cvp, mp, exp_time,
TASK_INTERRUPTIBLE, 0));
int rc;

rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
return (signal_pending(current) ? 0 : rc);
}
EXPORT_SYMBOL(__cv_timedwait_sig);

Expand All @@ -341,6 +343,7 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
hrtime_t time_left;
ktime_t ktime_left;
u64 slack = 0;
int rc;

ASSERT(cvp);
ASSERT(mp);
Expand Down Expand Up @@ -371,7 +374,7 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
ktime_left = ktime_set(0, time_left);
slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);

/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
Expand All @@ -387,14 +390,13 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
atomic_dec(&cvp->cv_refs);

mutex_enter(mp);
time_left = expire_time - gethrtime();
return (time_left > 0 ? NSEC_TO_TICK(time_left) : -1);
return (rc == -EINTR ? 1 : -1);
}

/*
* Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
*/
static clock_t
static int
cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
hrtime_t res, int flag, int state)
{
Expand All @@ -404,7 +406,7 @@ cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
return (__cv_timedwait_hires(cvp, mp, tim, res, state));
}

clock_t
int
cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
int flag)
{
Expand All @@ -413,12 +415,15 @@ cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
}
EXPORT_SYMBOL(cv_timedwait_hires);

clock_t
int
cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
hrtime_t res, int flag)
{
return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
TASK_INTERRUPTIBLE));
int rc;

rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
TASK_INTERRUPTIBLE);
return (signal_pending(current) ? 0 : rc);
}
EXPORT_SYMBOL(cv_timedwait_sig_hires);

Expand Down
4 changes: 2 additions & 2 deletions module/zfs/zil.c
Original file line number Diff line number Diff line change
Expand Up @@ -2687,11 +2687,11 @@ zil_commit_waiter(zilog_t *zilog, zil_commit_waiter_t *zcw)
* timeout is reached; responsibility (2) from
* the comment above this function.
*/
clock_t timeleft = cv_timedwait_hires(&zcw->zcw_cv,
int rc = cv_timedwait_hires(&zcw->zcw_cv,
&zcw->zcw_lock, wakeup, USEC2NSEC(1),
CALLOUT_FLAG_ABSOLUTE);

if (timeleft != -1 || zcw->zcw_done)
if (rc != -1 || zcw->zcw_done)
continue;

timedout = B_TRUE;
Expand Down

0 comments on commit 8056a75

Please sign in to comment.