diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index d0846ba818eea..7102e4b674a0a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2001,6 +2001,11 @@ bool bpf_jit_supports_kfunc_call(void) return true; } +bool bpf_jit_supports_far_kfunc_call(void) +{ + return true; +} + int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2c6095bd7d69f..88845aadc47da 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2295,6 +2295,9 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, const struct bpf_insn *insn); +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, + u16 btf_fd_idx, u8 **func_addr); + struct bpf_core_ctx { struct bpf_verifier_log *log; const struct btf *btf; @@ -2545,6 +2548,13 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, return NULL; } +static inline int +bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, + u16 btf_fd_idx, u8 **func_addr) +{ + return -ENOTSUPP; +} + static inline bool unprivileged_ebpf_enabled(void) { return false; diff --git a/include/linux/filter.h b/include/linux/filter.h index 5364b0c52c1d4..bbce89937fdeb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -920,6 +920,7 @@ void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); +bool bpf_jit_supports_far_kfunc_call(void); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(const struct cred *cred) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index e2d256c820723..7421487422d48 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1187,6 +1187,7 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, s16 off = insn->off; s32 imm = insn->imm; u8 *addr; + int err; *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL; if (!*func_addr_fixed) { @@ -1201,6 +1202,11 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, addr = (u8 *)prog->aux->func[off]->bpf_func; else return -EINVAL; + } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && + bpf_jit_supports_far_kfunc_call()) { + err = bpf_get_kfunc_addr(prog, insn->imm, insn->off, &addr); + if (err) + return err; } else { /* Address of a BPF helper call. Since part of the core * kernel, it's always at a fixed location. __bpf_call_base @@ -2732,6 +2738,11 @@ bool __weak bpf_jit_supports_kfunc_call(void) return false; } +bool __weak bpf_jit_supports_far_kfunc_call(void) +{ + return false; +} + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e3d5a7a2f4280..4aa6d715e655e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -195,6 +195,8 @@ static void invalidate_non_owning_refs(struct bpf_verifier_env *env); static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg); +static void specialize_kfunc(struct bpf_verifier_env *env, + u32 func_id, u16 offset, unsigned long *addr); static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { @@ -2374,6 +2376,7 @@ struct bpf_kfunc_desc { u32 func_id; s32 imm; u16 offset; + unsigned long addr; }; struct bpf_kfunc_btf { @@ -2383,6 +2386,11 @@ struct bpf_kfunc_btf { }; struct bpf_kfunc_desc_tab { + /* Sorted by func_id (BTF ID) and offset (fd_array offset) during + * verification. JITs do lookups by bpf_insn, where func_id may not be + * available, therefore at the end of verification do_misc_fixups() + * sorts this by imm and offset. + */ struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; u32 nr_descs; }; @@ -2423,6 +2431,19 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off); } +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, + u16 btf_fd_idx, u8 **func_addr) +{ + const struct bpf_kfunc_desc *desc; + + desc = find_kfunc_desc(prog, func_id, btf_fd_idx); + if (!desc) + return -EFAULT; + + *func_addr = (u8 *)desc->addr; + return 0; +} + static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset) { @@ -2602,13 +2623,18 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) func_name); return -EINVAL; } + specialize_kfunc(env, func_id, offset, &addr); - call_imm = BPF_CALL_IMM(addr); - /* Check whether or not the relative offset overflows desc->imm */ - if ((unsigned long)(s32)call_imm != call_imm) { - verbose(env, "address of kernel function %s is out of range\n", - func_name); - return -EINVAL; + if (bpf_jit_supports_far_kfunc_call()) { + call_imm = func_id; + } else { + call_imm = BPF_CALL_IMM(addr); + /* Check whether the relative offset overflows desc->imm */ + if ((unsigned long)(s32)call_imm != call_imm) { + verbose(env, "address of kernel function %s is out of range\n", + func_name); + return -EINVAL; + } } if (bpf_dev_bound_kfunc_id(func_id)) { @@ -2621,6 +2647,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) desc->func_id = func_id; desc->imm = call_imm; desc->offset = offset; + desc->addr = addr; err = btf_distill_func_proto(&env->log, desc_btf, func_proto, func_name, &desc->func_model); @@ -2630,19 +2657,19 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return err; } -static int kfunc_desc_cmp_by_imm(const void *a, const void *b) +static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b) { const struct bpf_kfunc_desc *d0 = a; const struct bpf_kfunc_desc *d1 = b; - if (d0->imm > d1->imm) - return 1; - else if (d0->imm < d1->imm) - return -1; + if (d0->imm != d1->imm) + return d0->imm < d1->imm ? -1 : 1; + if (d0->offset != d1->offset) + return d0->offset < d1->offset ? -1 : 1; return 0; } -static void sort_kfunc_descs_by_imm(struct bpf_prog *prog) +static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog) { struct bpf_kfunc_desc_tab *tab; @@ -2651,7 +2678,7 @@ static void sort_kfunc_descs_by_imm(struct bpf_prog *prog) return; sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), - kfunc_desc_cmp_by_imm, NULL); + kfunc_desc_cmp_by_imm_off, NULL); } bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) @@ -2665,13 +2692,14 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, { const struct bpf_kfunc_desc desc = { .imm = insn->imm, + .offset = insn->off, }; const struct bpf_kfunc_desc *res; struct bpf_kfunc_desc_tab *tab; tab = prog->aux->kfunc_tab; res = bsearch(&desc, tab->descs, tab->nr_descs, - sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm); + sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off); return res ? &res->func_model : NULL; } @@ -17293,11 +17321,45 @@ static int fixup_call_args(struct bpf_verifier_env *env) return err; } +/* replace a generic kfunc with a specialized version if necessary */ +static void specialize_kfunc(struct bpf_verifier_env *env, + u32 func_id, u16 offset, unsigned long *addr) +{ + struct bpf_prog *prog = env->prog; + bool seen_direct_write; + void *xdp_kfunc; + bool is_rdonly; + + if (bpf_dev_bound_kfunc_id(func_id)) { + xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id); + if (xdp_kfunc) { + *addr = (unsigned long)xdp_kfunc; + return; + } + /* fallback to default kfunc when not supported by netdev */ + } + + if (offset) + return; + + if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { + seen_direct_write = env->seen_direct_write; + is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); + + if (is_rdonly) + *addr = (unsigned long)bpf_dynptr_from_skb_rdonly; + + /* restore env->seen_direct_write to its original value, since + * may_access_direct_pkt_data mutates it + */ + env->seen_direct_write = seen_direct_write; + } +} + static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn *insn_buf, int insn_idx, int *cnt) { const struct bpf_kfunc_desc *desc; - void *xdp_kfunc; if (!insn->imm) { verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); @@ -17306,18 +17368,9 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, *cnt = 0; - if (bpf_dev_bound_kfunc_id(insn->imm)) { - xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm); - if (xdp_kfunc) { - insn->imm = BPF_CALL_IMM(xdp_kfunc); - return 0; - } - - /* fallback to default kfunc when not supported by netdev */ - } - - /* insn->imm has the btf func_id. Replace it with - * an address (relative to __bpf_call_base). + /* insn->imm has the btf func_id. Replace it with an offset relative to + * __bpf_call_base, unless the JIT needs to call functions that are + * further than 32 bits away (bpf_jit_supports_far_kfunc_call()). */ desc = find_kfunc_desc(env->prog, insn->imm, insn->off); if (!desc) { @@ -17326,7 +17379,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EFAULT; } - insn->imm = desc->imm; + if (!bpf_jit_supports_far_kfunc_call()) + insn->imm = BPF_CALL_IMM(desc->addr); if (insn->off) return 0; if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) { @@ -17351,17 +17405,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); *cnt = 1; - } else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { - bool seen_direct_write = env->seen_direct_write; - bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); - - if (is_rdonly) - insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly); - - /* restore env->seen_direct_write to its original value, since - * may_access_direct_pkt_data mutates it - */ - env->seen_direct_write = seen_direct_write; } return 0; } @@ -17891,7 +17934,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) } } - sort_kfunc_descs_by_imm(env->prog); + sort_kfunc_descs_by_imm_off(env->prog); return 0; }