diff --git a/tools/sched_ext/scx_common.bpf.h b/tools/sched_ext/scx_common.bpf.h index 81bfe3d041c9a2..38168981fd0b7c 100644 --- a/tools/sched_ext/scx_common.bpf.h +++ b/tools/sched_ext/scx_common.bpf.h @@ -235,108 +235,4 @@ u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; -/* BPF core iterators from tools/testing/selftests/bpf/progs/bpf_misc.h */ -struct bpf_iter_num; - -extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __ksym; -extern int *bpf_iter_num_next(struct bpf_iter_num *it) __ksym; -extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __ksym; - -#ifndef bpf_for_each -/* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for - * using BPF open-coded iterators without having to write mundane explicit - * low-level loop logic. Instead, it provides for()-like generic construct - * that can be used pretty naturally. E.g., for some hypothetical cgroup - * iterator, you'd write: - * - * struct cgroup *cg, *parent_cg = <...>; - * - * bpf_for_each(cgroup, cg, parent_cg, CG_ITER_CHILDREN) { - * bpf_printk("Child cgroup id = %d", cg->cgroup_id); - * if (cg->cgroup_id == 123) - * break; - * } - * - * I.e., it looks almost like high-level for each loop in other languages, - * supports continue/break, and is verifiable by BPF verifier. - * - * For iterating integers, the difference betwen bpf_for_each(num, i, N, M) - * and bpf_for(i, N, M) is in that bpf_for() provides additional proof to - * verifier that i is in [N, M) range, and in bpf_for_each() case i is `int - * *`, not just `int`. So for integers bpf_for() is more convenient. - * - * Note: this macro relies on C99 feature of allowing to declare variables - * inside for() loop, bound to for() loop lifetime. It also utilizes GCC - * extension: __attribute__((cleanup())), supported by both GCC and - * Clang. - */ -#define bpf_for_each(type, cur, args...) for ( \ - /* initialize and define destructor */ \ - struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */, \ - cleanup(bpf_iter_##type##_destroy))), \ - /* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */ \ - *___p __attribute__((unused)) = ( \ - bpf_iter_##type##_new(&___it, ##args), \ - /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ - /* for bpf_iter_##type##_destroy() when used from cleanup() attribute */ \ - (void)bpf_iter_##type##_destroy, (void *)0); \ - /* iteration and termination check */ \ - (((cur) = bpf_iter_##type##_next(&___it))); \ -) -#endif /* bpf_for_each */ - -#ifndef bpf_for -/* bpf_for(i, start, end) implements a for()-like looping construct that sets - * provided integer variable *i* to values starting from *start* through, - * but not including, *end*. It also proves to BPF verifier that *i* belongs - * to range [start, end), so this can be used for accessing arrays without - * extra checks. - * - * Note: *start* and *end* are assumed to be expressions with no side effects - * and whose values do not change throughout bpf_for() loop execution. They do - * not have to be statically known or constant, though. - * - * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for() - * loop bound variables and cleanup attribute, supported by GCC and Clang. - */ -#define bpf_for(i, start, end) for ( \ - /* initialize and define destructor */ \ - struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ - cleanup(bpf_iter_num_destroy))), \ - /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ - *___p __attribute__((unused)) = ( \ - bpf_iter_num_new(&___it, (start), (end)), \ - /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ - /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ - (void)bpf_iter_num_destroy, (void *)0); \ - ({ \ - /* iteration step */ \ - int *___t = bpf_iter_num_next(&___it); \ - /* termination and bounds check */ \ - (___t && ((i) = *___t, (i) >= (start) && (i) < (end))); \ - }); \ -) -#endif /* bpf_for */ - -#ifndef bpf_repeat -/* bpf_repeat(N) performs N iterations without exposing iteration number - * - * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for() - * loop bound variables and cleanup attribute, supported by GCC and Clang. - */ -#define bpf_repeat(N) for ( \ - /* initialize and define destructor */ \ - struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ - cleanup(bpf_iter_num_destroy))), \ - /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ - *___p __attribute__((unused)) = ( \ - bpf_iter_num_new(&___it, 0, (N)), \ - /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ - /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ - (void)bpf_iter_num_destroy, (void *)0); \ - bpf_iter_num_next(&___it); \ - /* nothing here */ \ -) -#endif /* bpf_repeat */ - #endif /* __SCHED_EXT_COMMON_BPF_H */ diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 6d8c6f396577a2..2db3d8d45e6837 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -510,7 +510,15 @@ void BPF_STRUCT_OPS(fcg_stopping, struct task_struct *p, bool runnable) struct cgroup *cgrp; struct fcg_cgrp_ctx *cgc; - /* scale the execution time by the inverse of the weight and charge */ + /* + * Scale the execution time by the inverse of the weight and charge. + * + * Note that the default yield implementation yields by setting + * @p->scx.slice to zero and the following would treat the yielding task + * as if it has consumed all its slice. If this penalizes yielding tasks + * too much, determine the execution time by taking explicit timestamps + * instead of depending on @p->scx.slice. + */ if (!fifo_sched) p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight; diff --git a/tools/sched_ext/scx_rusty/src/bpf/rusty.bpf.c b/tools/sched_ext/scx_rusty/src/bpf/rusty.bpf.c index 436297e6dcac92..5d3af556919131 100644 --- a/tools/sched_ext/scx_rusty/src/bpf/rusty.bpf.c +++ b/tools/sched_ext/scx_rusty/src/bpf/rusty.bpf.c @@ -55,18 +55,18 @@ char _license[] SEC("license") = "GPL"; /* * Domains and cpus */ -const volatile __u32 nr_doms = 32; /* !0 for veristat, set during init */ -const volatile __u32 nr_cpus = 64; /* !0 for veristat, set during init */ -const volatile __u32 cpu_dom_id_map[MAX_CPUS]; -const volatile __u64 dom_cpumasks[MAX_DOMS][MAX_CPUS / 64]; +const volatile u32 nr_doms = 32; /* !0 for veristat, set during init */ +const volatile u32 nr_cpus = 64; /* !0 for veristat, set during init */ +const volatile u32 cpu_dom_id_map[MAX_CPUS]; +const volatile u64 dom_cpumasks[MAX_DOMS][MAX_CPUS / 64]; const volatile bool kthreads_local; const volatile bool fifo_sched; const volatile bool switch_partial; -const volatile __u32 greedy_threshold; +const volatile u32 greedy_threshold; /* base slice duration */ -const volatile __u64 slice_ns = SCX_SLICE_DFL; +const volatile u64 slice_ns = SCX_SLICE_DFL; /* * Exit info @@ -78,10 +78,10 @@ char exit_msg[SCX_EXIT_MSG_LEN]; * Per-CPU context */ struct pcpu_ctx { - __u32 dom_rr_cur; /* used when scanning other doms */ + u32 dom_rr_cur; /* used when scanning other doms */ /* libbpf-rs does not respect the alignment, so pad out the struct explicitly */ - __u8 _padding[CACHELINE_SIZE - sizeof(u32)]; + u8 _padding[CACHELINE_SIZE - sizeof(u32)]; } __attribute__((aligned(CACHELINE_SIZE))); struct pcpu_ctx pcpu_ctx[MAX_CPUS]; @@ -89,12 +89,6 @@ struct pcpu_ctx pcpu_ctx[MAX_CPUS]; /* * Domain context */ -struct dom_ctx { - struct bpf_cpumask __kptr *cpumask; - struct bpf_cpumask __kptr *direct_greedy_cpumask; - u64 vtime_now; -}; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, u32); @@ -131,6 +125,19 @@ struct { __uint(map_flags, 0); } task_data SEC(".maps"); +struct task_ctx *lookup_task_ctx(struct task_struct *p) +{ + struct task_ctx *taskc; + s32 pid = p->pid; + + if ((taskc = bpf_map_lookup_elem(&task_data, &pid))) { + return taskc; + } else { + scx_bpf_error("task_ctx lookup failed for pid %d", p->pid); + return NULL; + } +} + /* * This is populated from userspace to indicate which pids should be reassigned * to new doms. @@ -149,12 +156,12 @@ struct { * that can be used directly in the scheduling paths. */ struct tune_input{ - __u64 gen; - __u64 direct_greedy_cpumask[MAX_CPUS / 64]; - __u64 kick_greedy_cpumask[MAX_CPUS / 64]; + u64 gen; + u64 direct_greedy_cpumask[MAX_CPUS / 64]; + u64 kick_greedy_cpumask[MAX_CPUS / 64]; } tune_input; -__u64 tune_params_gen; +u64 tune_params_gen; private(A) struct bpf_cpumask __kptr *all_cpumask; private(A) struct bpf_cpumask __kptr *direct_greedy_cpumask; private(A) struct bpf_cpumask __kptr *kick_greedy_cpumask; @@ -275,16 +282,14 @@ s32 BPF_STRUCT_OPS(rusty_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { const struct cpumask *idle_smtmask = scx_bpf_get_idle_smtmask(); - struct task_ctx *task_ctx; + struct task_ctx *taskc; struct bpf_cpumask *p_cpumask; - pid_t pid = p->pid; bool prev_domestic, has_idle_cores; s32 cpu; refresh_tune_params(); - if (!(task_ctx = bpf_map_lookup_elem(&task_data, &pid)) || - !(p_cpumask = task_ctx->cpumask)) + if (!(taskc = lookup_task_ctx(p)) || !(p_cpumask = taskc->cpumask)) goto enoent; if (kthreads_local && @@ -302,22 +307,21 @@ s32 BPF_STRUCT_OPS(rusty_select_cpu, struct task_struct *p, s32 prev_cpu, struct task_struct *current = (void *)bpf_get_current_task(); if (!(BPF_CORE_READ(current, flags) & PF_EXITING) && - task_ctx->dom_id < MAX_DOMS) { + taskc->dom_id < MAX_DOMS) { struct dom_ctx *domc; struct bpf_cpumask *d_cpumask; const struct cpumask *idle_cpumask; bool has_idle; - domc = bpf_map_lookup_elem(&dom_ctx, &task_ctx->dom_id); + domc = bpf_map_lookup_elem(&dom_ctx, &taskc->dom_id); if (!domc) { - scx_bpf_error("Failed to find dom%u", - task_ctx->dom_id); + scx_bpf_error("Failed to find dom%u", taskc->dom_id); goto enoent; } d_cpumask = domc->cpumask; if (!d_cpumask) { scx_bpf_error("Failed to acquire dom%u cpumask kptr", - task_ctx->dom_id); + taskc->dom_id); goto enoent; } @@ -418,7 +422,7 @@ s32 BPF_STRUCT_OPS(rusty_select_cpu, struct task_struct *p, s32 prev_cpu, * under-utilized, ignore domain boundaries and push the task there. Try * to find an idle core first. */ - if (task_ctx->all_cpus && direct_greedy_cpumask && + if (taskc->all_cpus && direct_greedy_cpumask && !bpf_cpumask_empty((const struct cpumask *)direct_greedy_cpumask)) { u32 dom_id = cpu_to_dom_id(prev_cpu); struct dom_ctx *domc; @@ -488,7 +492,7 @@ s32 BPF_STRUCT_OPS(rusty_select_cpu, struct task_struct *p, s32 prev_cpu, return cpu; direct: - task_ctx->dispatch_local = true; + taskc->dispatch_local = true; scx_bpf_put_idle_cpumask(idle_smtmask); return cpu; @@ -499,15 +503,16 @@ s32 BPF_STRUCT_OPS(rusty_select_cpu, struct task_struct *p, s32 prev_cpu, void BPF_STRUCT_OPS(rusty_enqueue, struct task_struct *p, u64 enq_flags) { - struct task_ctx *task_ctx; + struct task_ctx *taskc; struct bpf_cpumask *p_cpumask; pid_t pid = p->pid; u32 *new_dom; s32 cpu; - if (!(task_ctx = bpf_map_lookup_elem(&task_data, &pid)) || - !(p_cpumask = task_ctx->cpumask)) { - scx_bpf_error("Failed to lookup task_ctx or cpumask"); + if (!(taskc = lookup_task_ctx(p))) + return; + if (!(p_cpumask = taskc->cpumask)) { + scx_bpf_error("NULL cpmask"); return; } @@ -515,18 +520,18 @@ void BPF_STRUCT_OPS(rusty_enqueue, struct task_struct *p, u64 enq_flags) * Migrate @p to a new domain if requested by userland through lb_data. */ new_dom = bpf_map_lookup_elem(&lb_data, &pid); - if (new_dom && *new_dom != task_ctx->dom_id && - task_set_domain(task_ctx, p, *new_dom, false)) { + if (new_dom && *new_dom != taskc->dom_id && + task_set_domain(taskc, p, *new_dom, false)) { stat_add(RUSTY_STAT_LOAD_BALANCE, 1); - task_ctx->dispatch_local = false; + taskc->dispatch_local = false; cpu = scx_bpf_pick_any_cpu((const struct cpumask *)p_cpumask, 0); if (cpu >= 0) scx_bpf_kick_cpu(cpu, 0); goto dom_queue; } - if (task_ctx->dispatch_local) { - task_ctx->dispatch_local = false; + if (taskc->dispatch_local) { + taskc->dispatch_local = false; scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, enq_flags); return; } @@ -547,11 +552,10 @@ void BPF_STRUCT_OPS(rusty_enqueue, struct task_struct *p, u64 enq_flags) dom_queue: if (fifo_sched) { - scx_bpf_dispatch(p, task_ctx->dom_id, slice_ns, - enq_flags); + scx_bpf_dispatch(p, taskc->dom_id, slice_ns, enq_flags); } else { u64 vtime = p->scx.dsq_vtime; - u32 dom_id = task_ctx->dom_id; + u32 dom_id = taskc->dom_id; struct dom_ctx *domc; domc = bpf_map_lookup_elem(&dom_ctx, &dom_id); @@ -567,8 +571,7 @@ void BPF_STRUCT_OPS(rusty_enqueue, struct task_struct *p, u64 enq_flags) if (vtime_before(vtime, domc->vtime_now - slice_ns)) vtime = domc->vtime_now - slice_ns; - scx_bpf_dispatch_vtime(p, task_ctx->dom_id, slice_ns, vtime, - enq_flags); + scx_bpf_dispatch_vtime(p, taskc->dom_id, slice_ns, vtime, enq_flags); } /* @@ -586,7 +589,7 @@ void BPF_STRUCT_OPS(rusty_enqueue, struct task_struct *p, u64 enq_flags) * CPUs are highly loaded while KICK_GREEDY doesn't. Even under fairly * high utilization, KICK_GREEDY can slightly improve work-conservation. */ - if (task_ctx->all_cpus && kick_greedy_cpumask) { + if (taskc->all_cpus && kick_greedy_cpumask) { cpu = scx_bpf_pick_idle_cpu((const struct cpumask *) kick_greedy_cpumask, 0); if (cpu >= 0) { @@ -654,35 +657,30 @@ void BPF_STRUCT_OPS(rusty_dispatch, s32 cpu, struct task_struct *prev) void BPF_STRUCT_OPS(rusty_runnable, struct task_struct *p, u64 enq_flags) { - struct task_ctx *task_ctx; - pid_t pid = p->pid; + struct task_ctx *taskc; - if (!(task_ctx = bpf_map_lookup_elem(&task_data, &pid))) { - scx_bpf_error("Failed to lookup task_ctx"); + if (!(taskc = lookup_task_ctx(p))) return; - } - task_ctx->runnable_at = bpf_ktime_get_ns(); - task_ctx->is_kworker = p->flags & PF_WQ_WORKER; + taskc->runnable_at = bpf_ktime_get_ns(); + taskc->is_kworker = p->flags & PF_WQ_WORKER; } void BPF_STRUCT_OPS(rusty_running, struct task_struct *p) { struct task_ctx *taskc; struct dom_ctx *domc; - pid_t pid = p->pid; u32 dom_id; if (fifo_sched) return; - taskc = bpf_map_lookup_elem(&task_data, &pid); - if (!taskc) { - scx_bpf_error("Failed to lookup task_ctx"); + if (!(taskc = lookup_task_ctx(p))) return; - } - dom_id = taskc->dom_id; + taskc->running_at = bpf_ktime_get_ns(); + + dom_id = taskc->dom_id; domc = bpf_map_lookup_elem(&dom_ctx, &dom_id); if (!domc) { scx_bpf_error("Failed to lookup dom[%u]", dom_id); @@ -701,41 +699,41 @@ void BPF_STRUCT_OPS(rusty_running, struct task_struct *p) void BPF_STRUCT_OPS(rusty_stopping, struct task_struct *p, bool runnable) { + struct task_ctx *taskc; + if (fifo_sched) return; + if (!(taskc = lookup_task_ctx(p))) + return; + /* scale the execution time by the inverse of the weight and charge */ - p->scx.dsq_vtime += (slice_ns - p->scx.slice) * 100 / p->scx.weight; + p->scx.dsq_vtime += + (bpf_ktime_get_ns() - taskc->running_at) * 100 / p->scx.weight; } void BPF_STRUCT_OPS(rusty_quiescent, struct task_struct *p, u64 deq_flags) { - struct task_ctx *task_ctx; - pid_t pid = p->pid; + struct task_ctx *taskc; - if (!(task_ctx = bpf_map_lookup_elem(&task_data, &pid))) { - scx_bpf_error("Failed to lookup task_ctx"); + if (!(taskc = lookup_task_ctx(p))) return; - } - task_ctx->runnable_for += bpf_ktime_get_ns() - task_ctx->runnable_at; - task_ctx->runnable_at = 0; + taskc->runnable_for += bpf_ktime_get_ns() - taskc->runnable_at; + taskc->runnable_at = 0; } void BPF_STRUCT_OPS(rusty_set_weight, struct task_struct *p, u32 weight) { - struct task_ctx *task_ctx; - pid_t pid = p->pid; + struct task_ctx *taskc; - if (!(task_ctx = bpf_map_lookup_elem(&task_data, &pid))) { - scx_bpf_error("Failed to lookup task_ctx"); + if (!(taskc = lookup_task_ctx(p))) return; - } - task_ctx->weight = weight; + taskc->weight = weight; } -static u32 task_pick_domain(struct task_ctx *task_ctx, struct task_struct *p, +static u32 task_pick_domain(struct task_ctx *taskc, struct task_struct *p, const struct cpumask *cpumask) { s32 cpu = bpf_get_smp_processor_id(); @@ -744,13 +742,13 @@ static u32 task_pick_domain(struct task_ctx *task_ctx, struct task_struct *p, if (cpu < 0 || cpu >= MAX_CPUS) return MAX_DOMS; - task_ctx->dom_mask = 0; + taskc->dom_mask = 0; dom = pcpu_ctx[cpu].dom_rr_cur++; bpf_repeat(nr_doms) { dom = (dom + 1) % nr_doms; if (cpumask_intersects_domain(cpumask, dom)) { - task_ctx->dom_mask |= 1LLU << dom; + taskc->dom_mask |= 1LLU << dom; /* * AsThe starting point is round-robin'd and the first * match should be spread across all the domains. @@ -763,7 +761,7 @@ static u32 task_pick_domain(struct task_ctx *task_ctx, struct task_struct *p, return first_dom; } -static void task_pick_and_set_domain(struct task_ctx *task_ctx, +static void task_pick_and_set_domain(struct task_ctx *taskc, struct task_struct *p, const struct cpumask *cpumask, bool init_dsq_vtime) @@ -771,9 +769,9 @@ static void task_pick_and_set_domain(struct task_ctx *task_ctx, u32 dom_id = 0; if (nr_doms > 1) - dom_id = task_pick_domain(task_ctx, p, cpumask); + dom_id = task_pick_domain(taskc, p, cpumask); - if (!task_set_domain(task_ctx, p, dom_id, init_dsq_vtime)) + if (!task_set_domain(taskc, p, dom_id, init_dsq_vtime)) scx_bpf_error("Failed to set dom%d for %s[%d]", dom_id, p->comm, p->pid); } @@ -781,32 +779,29 @@ static void task_pick_and_set_domain(struct task_ctx *task_ctx, void BPF_STRUCT_OPS(rusty_set_cpumask, struct task_struct *p, const struct cpumask *cpumask) { - struct task_ctx *task_ctx; - pid_t pid = p->pid; + struct task_ctx *taskc; - if (!(task_ctx = bpf_map_lookup_elem(&task_data, &pid))) { - scx_bpf_error("Failed to lookup task_ctx for %s[%d]", - p->comm, pid); + if (!(taskc = lookup_task_ctx(p))) return; - } - task_pick_and_set_domain(task_ctx, p, cpumask, false); + task_pick_and_set_domain(taskc, p, cpumask, false); if (all_cpumask) - task_ctx->all_cpus = bpf_cpumask_subset(all_cpumask, cpumask); + taskc->all_cpus = + bpf_cpumask_subset((const struct cpumask *)all_cpumask, cpumask); } s32 BPF_STRUCT_OPS(rusty_prep_enable, struct task_struct *p, struct scx_enable_args *args) { struct bpf_cpumask *cpumask; - struct task_ctx task_ctx, *map_value; + struct task_ctx taskc, *map_value; long ret; pid_t pid; - memset(&task_ctx, 0, sizeof(task_ctx)); + memset(&taskc, 0, sizeof(taskc)); pid = p->pid; - ret = bpf_map_update_elem(&task_data, &pid, &task_ctx, BPF_NOEXIST); + ret = bpf_map_update_elem(&task_data, &pid, &taskc, BPF_NOEXIST); if (ret) { stat_add(RUSTY_STAT_TASK_GET_ERR, 1); return ret; @@ -883,7 +878,7 @@ static s32 create_dom(u32 dom_id) } for (cpu = 0; cpu < MAX_CPUS; cpu++) { - const volatile __u64 *dmask; + const volatile u64 *dmask; dmask = MEMBER_VPTR(dom_cpumasks, [dom_id][cpu / 64]); if (!dmask) { diff --git a/tools/sched_ext/scx_rusty/src/bpf/rusty.h b/tools/sched_ext/scx_rusty/src/bpf/rusty.h index 28eed277fd8af7..5a48c78fe91748 100644 --- a/tools/sched_ext/scx_rusty/src/bpf/rusty.h +++ b/tools/sched_ext/scx_rusty/src/bpf/rusty.h @@ -13,6 +13,10 @@ #define __kptr #endif +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + #define MAX_CPUS 512 #define MAX_DOMS 64 /* limited to avoid complex bitmask ops */ #define CACHELINE_SIZE 64 @@ -43,13 +47,14 @@ enum stat_idx { struct task_ctx { /* The domains this task can run on */ - unsigned long long dom_mask; + u64 dom_mask; struct bpf_cpumask __kptr *cpumask; - unsigned int dom_id; - unsigned int weight; - unsigned long long runnable_at; - unsigned long long runnable_for; + u32 dom_id; + u32 weight; + u64 runnable_at; + u64 running_at; + u64 runnable_for; /* The task is a workqueue worker thread */ bool is_kworker; @@ -61,4 +66,10 @@ struct task_ctx { bool dispatch_local; }; +struct dom_ctx { + struct bpf_cpumask __kptr *cpumask; + struct bpf_cpumask __kptr *direct_greedy_cpumask; + u64 vtime_now; +}; + #endif /* __RUSTY_H */ diff --git a/tools/sched_ext/scx_simple.bpf.c b/tools/sched_ext/scx_simple.bpf.c index d4528c7da45009..56b589d7f6630e 100644 --- a/tools/sched_ext/scx_simple.bpf.c +++ b/tools/sched_ext/scx_simple.bpf.c @@ -101,7 +101,15 @@ void BPF_STRUCT_OPS(simple_stopping, struct task_struct *p, bool runnable) if (fifo_sched) return; - /* scale the execution time by the inverse of the weight and charge */ + /* + * Scale the execution time by the inverse of the weight and charge. + * + * Note that the default yield implementation yields by setting + * @p->scx.slice to zero and the following would treat the yielding task + * as if it has consumed all its slice. If this penalizes yielding tasks + * too much, determine the execution time by taking explicit timestamps + * instead of depending on @p->scx.slice. + */ p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight; }