Skip to content

Commit

Permalink
Cleanup: Use OpenSolaris functions to call scheduler
Browse files Browse the repository at this point in the history
In our codebase, `cond_resched() and `schedule()` are Linux kernel
functions that have replaced the OpenSolaris `kpreempt()` functions in
the codebase to such an extent that `kpreempt()` in zfs_context.h was
broken. Nobody noticed because we did not actually use it. The header
had defined `kpreempt()` as `yield()`, which works on OpenSolaris and
Illumos where `sched_yield()` is a wrapper for `yield()`, but that does
not work on any other platform.

The FreeBSD platform specific code implemented shims for these, but the
shim for `schedule()` forced us to wait, which is different than merely
rescheduling to another thread as the original Linux code does, while
the shim for `cond_resched()` had the same definition as its kernel
kpreempt() shim.

After studying this, I have concluded that we should reintroduce the
kpreempt() function in platform independent code with the following
definitions:

	- In the Linux kernel:
		kpreempt(unused)	-> cond_resched()

	- In the FreeBSD kernel:
		kpreempt(unused)	-> kern_yield(PRI_USER)

	- In userspace:
		kpreempt(unused)	-> sched_yield()

In userspace, nothing changes from this cleanup. In the kernels, the
function `fm_fini()` will now call `kern_yield(PRI_USER)` on FreeBSD and
`cond_resched()` on Linux.  This is instead of `pause("schedule", 1)` on
FreeBSD and `schedule()` on Linux. This makes our behavior consistent
across platforms.

Note that Linux's SPL continues to use `cond_resched()` and
`schedule()`.  However, those functions have been removed from both the
FreeBSD code and userspace code.

This should have the benefit of making it slightly easier to port the
code to new platforms by making how things should be mapped less
confusing.

Reviewed-by: Alexander Motin <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Neal Gompa <[email protected]>
Signed-off-by: Richard Yao <[email protected]>
Closes openzfs#13845
  • Loading branch information
ryao authored and beren12 committed Sep 19, 2022
1 parent a021e44 commit df1be95
Show file tree
Hide file tree
Showing 9 changed files with 15 additions and 14 deletions.
2 changes: 2 additions & 0 deletions include/os/freebsd/spl/sys/disp.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

#include <sys/proc.h>

#define KPREEMPT_SYNC (-1)

#define kpreempt(x) kern_yield(PRI_USER)

#endif /* _OPENSOLARIS_SYS_DISP_H_ */
2 changes: 0 additions & 2 deletions include/os/freebsd/spl/sys/timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,4 @@
#define usleep_range(wakeup, wakeupepsilon) \
pause_sbt("usleep_range", ustosbt(wakeup), \
ustosbt(wakeupepsilon - wakeup), 0)

#define schedule() pause("schedule", 1)
#endif
2 changes: 0 additions & 2 deletions include/os/freebsd/zfs/sys/zfs_context_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@
#define HAVE_LARGE_STACKS 1
#endif

#define cond_resched() kern_yield(PRI_USER)

#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
((void) sizeof (dc), taskq_create(a, b, maxclsyspri, d, e, f))

Expand Down
4 changes: 3 additions & 1 deletion include/os/linux/spl/sys/disp.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@

#include <linux/preempt.h>

#define kpreempt(unused) schedule()
#define KPREEMPT_SYNC (-1)

#define kpreempt(unused) cond_resched()
#define kpreempt_disable() preempt_disable()
#define kpreempt_enable() preempt_enable()

Expand Down
5 changes: 3 additions & 2 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,6 @@ typedef pthread_t kthread_t;
#define TS_JOINABLE 0x00000004

#define curthread ((void *)(uintptr_t)pthread_self())
#define kpreempt(x) yield()
#define getcomm() "unknown"

#define thread_create_named(name, stk, stksize, func, arg, len, \
Expand Down Expand Up @@ -248,9 +247,11 @@ extern kthread_t *zk_thread_create(void (*func)(void *), void *arg,
#define issig(why) (FALSE)
#define ISSIG(thr, why) (FALSE)

#define KPREEMPT_SYNC (-1)

#define kpreempt(x) sched_yield()
#define kpreempt_disable() ((void)0)
#define kpreempt_enable() ((void)0)
#define cond_resched() sched_yield()

/*
* Mutexes
Expand Down
4 changes: 2 additions & 2 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -4165,7 +4165,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
* this CPU are able to make progress, make a voluntary preemption
* call here.
*/
cond_resched();
kpreempt(KPREEMPT_SYNC);

return (bytes_evicted);
}
Expand Down Expand Up @@ -10335,7 +10335,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
!dev->l2ad_first)
goto out;

cond_resched();
kpreempt(KPREEMPT_SYNC);
for (;;) {
mutex_enter(&l2arc_rebuild_thr_lock);
if (dev->l2ad_rebuild_cancel) {
Expand Down
6 changes: 3 additions & 3 deletions module/zfs/dnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1142,7 +1142,7 @@ dnode_free_interior_slots(dnode_t *dn)

while (!dnode_slots_tryenter(children, idx, slots)) {
DNODE_STAT_BUMP(dnode_free_interior_lock_retry);
cond_resched();
kpreempt(KPREEMPT_SYNC);
}

dnode_set_slots(children, idx, slots, DN_SLOT_FREE);
Expand Down Expand Up @@ -1423,7 +1423,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
dnode_slots_rele(dnc, idx, slots);
while (!dnode_slots_tryenter(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_alloc_lock_retry);
cond_resched();
kpreempt(KPREEMPT_SYNC);
}

/*
Expand Down Expand Up @@ -1478,7 +1478,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
dnode_slots_rele(dnc, idx, slots);
while (!dnode_slots_tryenter(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_free_lock_retry);
cond_resched();
kpreempt(KPREEMPT_SYNC);
}

if (!dnode_check_slots_free(dnc, idx, slots)) {
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/fm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1354,7 +1354,7 @@ fm_fini(void)
zevent_flags |= ZEVENT_SHUTDOWN;
while (zevent_waiters > 0) {
mutex_exit(&zevent_lock);
schedule();
kpreempt(KPREEMPT_SYNC);
mutex_enter(&zevent_lock);
}
mutex_exit(&zevent_lock);
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/spa_log_spacemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1176,7 +1176,7 @@ spa_ld_log_sm_data(spa_t *spa)
}

/* Load TXG log spacemap into ms_unflushed_allocs/frees. */
cond_resched();
kpreempt(KPREEMPT_SYNC);
ASSERT0(sls->sls_nblocks);
sls->sls_nblocks = space_map_nblocks(sls->sls_sm);
spa->spa_unflushed_stats.sus_nblocks += sls->sls_nblocks;
Expand Down

0 comments on commit df1be95

Please sign in to comment.