Skip to content

Commit

Permalink
Merge branch 'bpf-stack-tracker'
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
bpf: stack depth tracking

Introduce tracking of bpf program stack depth in the verifier and use that
info to reduce bpf program stack consumption in the interpreter and x64 JIT.
Other JITs can take advantage of it as well in the future.
Most of the programs consume very little stack, so it's good optimization
in general and it's the first step toward bpf to bpf function calls.

Also use internal opcode for bpf_tail_call() marking to make clear
that jmp|call|x opcode is not uapi and may be used for actual
indirect call opcode in the future.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed May 31, 2017
2 parents d2e0ef4 + 2960ae4 commit 6c21a2a
Show file tree
Hide file tree
Showing 12 changed files with 147 additions and 71 deletions.
2 changes: 1 addition & 1 deletion arch/arm64/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
}
/* tail call */
case BPF_JMP | BPF_CALL | BPF_X:
case BPF_JMP | BPF_TAIL_CALL:
if (emit_bpf_tail_call(ctx))
return -EFAULT;
break;
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/net/bpf_jit_comp64.c
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
/*
* Tail call
*/
case BPF_JMP | BPF_CALL | BPF_X:
case BPF_JMP | BPF_TAIL_CALL:
ctx->seen |= SEEN_TAILCALL;
bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
break;
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
}
break;
}
case BPF_JMP | BPF_CALL | BPF_X:
case BPF_JMP | BPF_TAIL_CALL:
/*
* Implicit input:
* B1: pointer to ctx
Expand Down
2 changes: 1 addition & 1 deletion arch/sparc/net/bpf_jit_comp_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -1217,7 +1217,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
}

/* tail call */
case BPF_JMP | BPF_CALL |BPF_X:
case BPF_JMP | BPF_TAIL_CALL:
emit_tail_call(ctx);
break;

Expand Down
20 changes: 9 additions & 11 deletions arch/x86/net/bpf_jit.S
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@
*/
#define SKBDATA %r10
#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
#define MAX_BPF_STACK (512 /* from filter.h */ + \
32 /* space for rbx,r13,r14,r15 */ + \
8 /* space for skb_copy_bits */)

#define FUNC(name) \
.globl name; \
Expand Down Expand Up @@ -66,7 +63,7 @@ FUNC(sk_load_byte_positive_offset)

/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
lea -MAX_BPF_STACK + 32(%rbp), %rdx;\
lea 32(%rbp), %rdx;\
FRAME_BEGIN; \
mov %rbx, %rdi; /* arg1 == skb */ \
push %r9; \
Expand All @@ -83,22 +80,22 @@ FUNC(sk_load_byte_positive_offset)
bpf_slow_path_word:
bpf_slow_path_common(4)
js bpf_error
mov - MAX_BPF_STACK + 32(%rbp),%eax
mov 32(%rbp),%eax
bswap %eax
ret

bpf_slow_path_half:
bpf_slow_path_common(2)
js bpf_error
mov - MAX_BPF_STACK + 32(%rbp),%ax
mov 32(%rbp),%ax
rol $8,%ax
movzwl %ax,%eax
ret

bpf_slow_path_byte:
bpf_slow_path_common(1)
js bpf_error
movzbl - MAX_BPF_STACK + 32(%rbp),%eax
movzbl 32(%rbp),%eax
ret

#define sk_negative_common(SIZE) \
Expand Down Expand Up @@ -148,9 +145,10 @@ FUNC(sk_load_byte_negative_offset)
bpf_error:
# force a return 0 from jit handler
xor %eax,%eax
mov - MAX_BPF_STACK(%rbp),%rbx
mov - MAX_BPF_STACK + 8(%rbp),%r13
mov - MAX_BPF_STACK + 16(%rbp),%r14
mov - MAX_BPF_STACK + 24(%rbp),%r15
mov (%rbp),%rbx
mov 8(%rbp),%r13
mov 16(%rbp),%r14
mov 24(%rbp),%r15
add $40, %rbp
leaveq
ret
65 changes: 35 additions & 30 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,31 +197,34 @@ struct jit_context {
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64

#define STACKSIZE \
(MAX_BPF_STACK + \
32 /* space for rbx, r13, r14, r15 */ + \
#define AUX_STACK_SPACE \
(32 /* space for rbx, r13, r14, r15 */ + \
8 /* space for skb_copy_bits() buffer */)

#define PROLOGUE_SIZE 48
#define PROLOGUE_SIZE 37

/* emit x64 prologue code for BPF program and check it's size.
* bpf_tail_call helper will skip it while jumping into another program
*/
static void emit_prologue(u8 **pprog)
static void emit_prologue(u8 **pprog, u32 stack_depth)
{
u8 *prog = *pprog;
int cnt = 0;

EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */

/* sub rsp, STACKSIZE */
EMIT3_off32(0x48, 0x81, 0xEC, STACKSIZE);
/* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
EMIT3_off32(0x48, 0x81, 0xEC,
round_up(stack_depth, 8) + AUX_STACK_SPACE);

/* sub rbp, AUX_STACK_SPACE */
EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);

/* all classic BPF filters use R6(rbx) save it */

/* mov qword ptr [rbp-X],rbx */
EMIT3_off32(0x48, 0x89, 0x9D, -STACKSIZE);
/* mov qword ptr [rbp+0],rbx */
EMIT4(0x48, 0x89, 0x5D, 0);

/* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
* as temporary, so all tcpdump filters need to spill/fill R7(r13) and
Expand All @@ -231,12 +234,12 @@ static void emit_prologue(u8 **pprog)
* than synthetic ones. Therefore not worth adding complexity.
*/

/* mov qword ptr [rbp-X],r13 */
EMIT3_off32(0x4C, 0x89, 0xAD, -STACKSIZE + 8);
/* mov qword ptr [rbp-X],r14 */
EMIT3_off32(0x4C, 0x89, 0xB5, -STACKSIZE + 16);
/* mov qword ptr [rbp-X],r15 */
EMIT3_off32(0x4C, 0x89, 0xBD, -STACKSIZE + 24);
/* mov qword ptr [rbp+8],r13 */
EMIT4(0x4C, 0x89, 0x6D, 8);
/* mov qword ptr [rbp+16],r14 */
EMIT4(0x4C, 0x89, 0x75, 16);
/* mov qword ptr [rbp+24],r15 */
EMIT4(0x4C, 0x89, 0x7D, 24);

/* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
* we need to reset the counter to 0. It's done in two instructions,
Expand All @@ -246,8 +249,8 @@ static void emit_prologue(u8 **pprog)

/* xor eax, eax */
EMIT2(0x31, 0xc0);
/* mov qword ptr [rbp-X], rax */
EMIT3_off32(0x48, 0x89, 0x85, -STACKSIZE + 32);
/* mov qword ptr [rbp+32], rax */
EMIT4(0x48, 0x89, 0x45, 32);

BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
*pprog = prog;
Expand Down Expand Up @@ -289,13 +292,13 @@ static void emit_bpf_tail_call(u8 **pprog)
/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
#define OFFSET2 36
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */

/* prog = array->ptrs[index]; */
EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
Expand Down Expand Up @@ -361,7 +364,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int proglen = 0;
u8 *prog = temp;

emit_prologue(&prog);
emit_prologue(&prog, bpf_prog->aux->stack_depth);

if (seen_ld_abs)
emit_load_skb_data_hlen(&prog);
Expand Down Expand Up @@ -877,7 +880,7 @@ xadd: if (is_imm8(insn->off))
}
break;

case BPF_JMP | BPF_CALL | BPF_X:
case BPF_JMP | BPF_TAIL_CALL:
emit_bpf_tail_call(&prog);
break;

Expand Down Expand Up @@ -1036,15 +1039,17 @@ xadd: if (is_imm8(insn->off))
seen_exit = true;
/* update cleanup_addr */
ctx->cleanup_addr = proglen;
/* mov rbx, qword ptr [rbp-X] */
EMIT3_off32(0x48, 0x8B, 0x9D, -STACKSIZE);
/* mov r13, qword ptr [rbp-X] */
EMIT3_off32(0x4C, 0x8B, 0xAD, -STACKSIZE + 8);
/* mov r14, qword ptr [rbp-X] */
EMIT3_off32(0x4C, 0x8B, 0xB5, -STACKSIZE + 16);
/* mov r15, qword ptr [rbp-X] */
EMIT3_off32(0x4C, 0x8B, 0xBD, -STACKSIZE + 24);

/* mov rbx, qword ptr [rbp+0] */
EMIT4(0x48, 0x8B, 0x5D, 0);
/* mov r13, qword ptr [rbp+8] */
EMIT4(0x4C, 0x8B, 0x6D, 8);
/* mov r14, qword ptr [rbp+16] */
EMIT4(0x4C, 0x8B, 0x75, 16);
/* mov r15, qword ptr [rbp+24] */
EMIT4(0x4C, 0x8B, 0x7D, 24);

/* add rbp, AUX_STACK_SPACE */
EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE);
EMIT1(0xC9); /* leave */
EMIT1(0xC3); /* ret */
break;
Expand Down
1 change: 1 addition & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ struct bpf_prog_aux {
atomic_t refcnt;
u32 used_map_cnt;
u32 max_ctx_offset;
u32 stack_depth;
struct latch_tree_node ksym_tnode;
struct list_head ksym_lnode;
const struct bpf_verifier_ops *ops;
Expand Down
3 changes: 3 additions & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ struct bpf_prog_aux;
#define BPF_REG_AX MAX_BPF_REG
#define MAX_BPF_JIT_REG (MAX_BPF_REG + 1)

/* unused opcode to mark special call to bpf_tail_call() helper */
#define BPF_TAIL_CALL 0xf0

/* As per nm, we expose JITed images as text (code) section for
* kallsyms. That way, tools like perf can find it to match
* addresses.
Expand Down
47 changes: 38 additions & 9 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -763,10 +763,10 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
*
* Decode and execute eBPF instructions.
*/
static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
u64 *stack)
{
u64 stack[MAX_BPF_STACK / sizeof(u64)];
u64 regs[MAX_BPF_REG], tmp;
u64 tmp;
static const void *jumptable[256] = {
[0 ... 255] = &&default_label,
/* Now overwrite non-defaults ... */
Expand Down Expand Up @@ -824,7 +824,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
/* Call instruction */
[BPF_JMP | BPF_CALL] = &&JMP_CALL,
[BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL,
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
/* Jumps */
[BPF_JMP | BPF_JA] = &&JMP_JA,
[BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
Expand Down Expand Up @@ -874,9 +874,6 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
#define CONT ({ insn++; goto select_insn; })
#define CONT_JMP ({ insn++; goto select_insn; })

FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
ARG1 = (u64) (unsigned long) ctx;

select_insn:
goto *jumptable[insn->code];

Expand Down Expand Up @@ -1219,7 +1216,39 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
return 0;
}
STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */
STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */

#define PROG_NAME(stack_size) __bpf_prog_run##stack_size
#define DEFINE_BPF_PROG_RUN(stack_size) \
static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
{ \
u64 stack[stack_size / sizeof(u64)]; \
u64 regs[MAX_BPF_REG]; \
\
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
ARG1 = (u64) (unsigned long) ctx; \
return ___bpf_prog_run(regs, insn, stack); \
}

#define EVAL1(FN, X) FN(X)
#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)

EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);

#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),

static unsigned int (*interpreters[])(const void *ctx,
const struct bpf_insn *insn) = {
EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
};

bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp)
Expand Down Expand Up @@ -1268,7 +1297,7 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
*/
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{
fp->bpf_func = (void *) __bpf_prog_run;
fp->bpf_func = interpreters[round_down(fp->aux->stack_depth, 32) / 32];

/* eBPF JITs can rewrite the program in case constant
* blinding is active. However, in case of error during
Expand Down
13 changes: 11 additions & 2 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,10 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
verbose("invalid stack off=%d size=%d\n", off, size);
return -EACCES;
}

if (env->prog->aux->stack_depth < -off)
env->prog->aux->stack_depth = -off;

if (t == BPF_WRITE) {
if (!env->allow_ptr_leaks &&
state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
Expand Down Expand Up @@ -1032,6 +1036,9 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
return -EACCES;
}

if (env->prog->aux->stack_depth < -off)
env->prog->aux->stack_depth = -off;

if (meta && meta->raw_mode) {
meta->access_size = access_size;
meta->regno = regno;
Expand Down Expand Up @@ -3167,7 +3174,8 @@ static int do_check(struct bpf_verifier_env *env)
insn_idx++;
}

verbose("processed %d insns\n", insn_processed);
verbose("processed %d insns, stack depth %d\n",
insn_processed, env->prog->aux->stack_depth);
return 0;
}

Expand Down Expand Up @@ -3462,14 +3470,15 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
* the program array.
*/
prog->cb_access = 1;
env->prog->aux->stack_depth = MAX_BPF_STACK;

/* mark bpf_tail_call as different opcode to avoid
* conditional branch in the interpeter for every normal
* call and to prevent accidental JITing by JIT compiler
* that doesn't support bpf_tail_call yet
*/
insn->imm = 0;
insn->code |= BPF_X;
insn->code = BPF_JMP | BPF_TAIL_CALL;
continue;
}

Expand Down
Loading

0 comments on commit 6c21a2a

Please sign in to comment.