Skip to content

Commit

Permalink
ebpf: apply extra padding for tracepoints arguments (5.14-el9 kernels)
Browse files Browse the repository at this point in the history
  • Loading branch information
apetruhin committed Nov 26, 2024
1 parent 5430687 commit a87bc34
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 60 deletions.
23 changes: 14 additions & 9 deletions ebpftracer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,30 @@ RUN clang -g -O2 -target bpf -D__KERNEL_FROM=416 -D__TARGET_ARCH_x86 -c ebpf.c -
RUN clang -g -O2 -target bpf -D__KERNEL_FROM=420 -D__TARGET_ARCH_x86 -c ebpf.c -o ebpf420x86.o && llvm-strip --strip-debug ebpf420x86.o
RUN clang -g -O2 -target bpf -D__KERNEL_FROM=506 -D__TARGET_ARCH_x86 -c ebpf.c -o ebpf506x86.o && llvm-strip --strip-debug ebpf506x86.o
RUN clang -g -O2 -target bpf -D__KERNEL_FROM=512 -D__TARGET_ARCH_x86 -c ebpf.c -o ebpf512x86.o && llvm-strip --strip-debug ebpf512x86.o
RUN clang -g -O2 -target bpf -D__KERNEL_FROM=512 -D__TARGET_ARCH_x86 -D__CTX_EXTRA_PADDING -c ebpf.c -o ebpf512x86cep.o && llvm-strip --strip-debug ebpf512x86cep.o
RUN clang -g -O2 -target bpf -D__KERNEL_FROM=416 -D__TARGET_ARCH_arm64 -c ebpf.c -o ebpf416arm64.o && llvm-strip --strip-debug ebpf416arm64.o
RUN clang -g -O2 -target bpf -D__KERNEL_FROM=420 -D__TARGET_ARCH_arm64 -c ebpf.c -o ebpf420arm64.o && llvm-strip --strip-debug ebpf420arm64.o
RUN clang -g -O2 -target bpf -D__KERNEL_FROM=506 -D__TARGET_ARCH_arm64 -c ebpf.c -o ebpf506arm64.o && llvm-strip --strip-debug ebpf506arm64.o
RUN clang -g -O2 -target bpf -D__KERNEL_FROM=512 -D__TARGET_ARCH_arm64 -c ebpf.c -o ebpf512arm64.o && llvm-strip --strip-debug ebpf512arm64.o
RUN clang -g -O2 -target bpf -D__KERNEL_FROM=512 -D__TARGET_ARCH_arm64 -D__CTX_EXTRA_PADDING -c ebpf.c -o ebpf512arm64cep.o && llvm-strip --strip-debug ebpf512arm64cep.o

RUN echo -en '// generated - do not edit\npackage ebpftracer\n\nvar ebpfProgs = map[string][]struct {\n' > ebpf.go \
&& echo -en '\tversion string\n' >> ebpf.go \
&& echo -en '\tprog []byte\n' >> ebpf.go \
&& echo -en '\tflags string\n' >> ebpf.go \
&& echo -en '\tprog []byte\n' >> ebpf.go \
&& echo -en '}{\n' >> ebpf.go \
&& echo -en '\t"amd64": {\n' >> ebpf.go \
&& echo -en '\t\t{"5.12", []byte("' >> ebpf.go && gzip -c ebpf512x86.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"5.6", []byte("' >> ebpf.go && gzip -c ebpf506x86.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"4.20", []byte("' >> ebpf.go && gzip -c ebpf420x86.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"4.16", []byte("' >> ebpf.go && gzip -c ebpf416x86.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"5.12", "ctx-extra-padding", []byte("' >> ebpf.go && gzip -c ebpf512x86cep.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"5.12", "", []byte("' >> ebpf.go && gzip -c ebpf512x86.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"5.6", "", []byte("' >> ebpf.go && gzip -c ebpf506x86.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"4.20", "", []byte("' >> ebpf.go && gzip -c ebpf420x86.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"4.16", "", []byte("' >> ebpf.go && gzip -c ebpf416x86.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t},\n'>> ebpf.go \
&& echo -en '\t"arm64": {\n' >> ebpf.go \
&& echo -en '\t\t{"5.12", []byte("' >> ebpf.go && gzip -c ebpf512arm64.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"5.6", []byte("' >> ebpf.go && gzip -c ebpf506arm64.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"4.20", []byte("' >> ebpf.go && gzip -c ebpf420arm64.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"4.16", []byte("' >> ebpf.go && gzip -c ebpf416arm64.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"5.12", "ctx-extra-padding", []byte("' >> ebpf.go && gzip -c ebpf512arm64cep.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"5.12", "", []byte("' >> ebpf.go && gzip -c ebpf512arm64.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"5.6", "", []byte("' >> ebpf.go && gzip -c ebpf506arm64.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"4.20", "", []byte("' >> ebpf.go && gzip -c ebpf420arm64.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t\t{"4.16", "", []byte("' >> ebpf.go && gzip -c ebpf416arm64.o | base64 -w0 >> ebpf.go && echo '")},' >> ebpf.go \
&& echo -en '\t},\n'>> ebpf.go \
&& echo -en '}\n'>> ebpf.go
19 changes: 11 additions & 8 deletions ebpftracer/ebpf.go

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions ebpftracer/ebpf/ebpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
bpf_trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \
})

struct trace_event_raw_sys_exit__stub {
__u64 unused;
__u64 unused2;
long int ret;
};

#include "proc.c"
#include "file.c"
#include "tcp/state.c"
Expand Down
34 changes: 18 additions & 16 deletions ebpftracer/ebpf/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,29 @@ struct {
__uint(max_entries, 10240);
} open_file_info SEC(".maps");

struct trace_event_raw_sys_enter__stub {
struct trace_event_raw_sys_enter_open__stub {
__u64 unused;
long int id;
long unsigned int args[6];
__u64 unused2;
char *filename;
long int flags;
};

struct trace_event_raw_sys_exit__stub {
struct trace_event_raw_sys_enter_openat__stub {
__u64 unused;
long int id;
long int ret;
__u64 unused2;
__u64 unused3;
char *filename;
long int flags;
};

static __always_inline
int trace_enter(struct trace_event_raw_sys_enter__stub* ctx, int at)
int trace_enter_open(long int flags, char *filename)
{
int flags = (int)ctx->args[at+1];
if (!(flags & O_ACCMODE & (O_WRONLY | O_RDWR))) {
return 0;
}
char p[7];
long res = bpf_probe_read_str(&p, sizeof(p), (void *)ctx->args[at]);
long res = bpf_probe_read_str(&p, sizeof(p), (void *)filename);
if (p[0]=='/' && p[1]=='p' && p[2]=='r' && p[3]=='o' && p[4]=='c' && p[5]=='/') {
return 0;
}
Expand All @@ -56,7 +58,7 @@ int trace_enter(struct trace_event_raw_sys_enter__stub* ctx, int at)
}

static __always_inline
int trace_exit(struct trace_event_raw_sys_exit__stub* ctx)
int trace_exit_open(struct trace_event_raw_sys_exit__stub* ctx)
{
__u64 id = bpf_get_current_pid_tgid();
if (!bpf_map_lookup_elem(&open_file_info, &id)) {
Expand All @@ -77,26 +79,26 @@ int trace_exit(struct trace_event_raw_sys_exit__stub* ctx)

#if defined(__TARGET_ARCH_x86)
SEC("tracepoint/syscalls/sys_enter_open")
int sys_enter_open(struct trace_event_raw_sys_enter__stub* ctx)
int sys_enter_open(struct trace_event_raw_sys_enter_open__stub* ctx)
{
return trace_enter(ctx, 0);
return trace_enter_open(ctx->flags, ctx->filename);
}

SEC("tracepoint/syscalls/sys_exit_open")
int sys_exit_open(struct trace_event_raw_sys_exit__stub* ctx)
{
return trace_exit(ctx);
return trace_exit_open(ctx);
}
#endif

SEC("tracepoint/syscalls/sys_enter_openat")
int sys_enter_openat(struct trace_event_raw_sys_enter__stub* ctx)
int sys_enter_openat(struct trace_event_raw_sys_enter_openat__stub* ctx)
{
return trace_enter(ctx, 1);
return trace_enter_open(ctx->flags, ctx->filename);
}

SEC("tracepoint/syscalls/sys_exit_openat")
int sys_exit_openat(struct trace_event_raw_sys_exit__stub* ctx)
{
return trace_exit(ctx);
return trace_exit_open(ctx);
}
16 changes: 5 additions & 11 deletions ebpftracer/ebpf/l7/l7.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,18 +135,12 @@ struct {

struct trace_event_raw_sys_enter_rw__stub {
__u64 unused;
long int id;
__u64 unused2;
__u64 fd;
char* buf;
__u64 size;
};

struct trace_event_raw_sys_exit_rw__stub {
__u64 unused;
long int id;
long int ret;
};

struct iovec {
char* buf;
__u64 size;
Expand Down Expand Up @@ -597,28 +591,28 @@ int sys_enter_recvfrom(struct trace_event_raw_sys_enter_rw__stub* ctx) {
}

SEC("tracepoint/syscalls/sys_exit_read")
int sys_exit_read(struct trace_event_raw_sys_exit_rw__stub* ctx) {
int sys_exit_read(struct trace_event_raw_sys_exit__stub* ctx) {
__u64 pid_tgid = bpf_get_current_pid_tgid();
__u32 pid = pid_tgid >> 32;
return trace_exit_read(ctx, pid_tgid, pid, 0, ctx->ret);
}

SEC("tracepoint/syscalls/sys_exit_readv")
int sys_exit_readv(struct trace_event_raw_sys_exit_rw__stub* ctx) {
int sys_exit_readv(struct trace_event_raw_sys_exit__stub* ctx) {
__u64 pid_tgid = bpf_get_current_pid_tgid();
__u32 pid = pid_tgid >> 32;
return trace_exit_read(ctx, pid_tgid, pid, 0, ctx->ret);
}

SEC("tracepoint/syscalls/sys_exit_recvmsg")
int sys_exit_recvmsg(struct trace_event_raw_sys_exit_rw__stub* ctx) {
int sys_exit_recvmsg(struct trace_event_raw_sys_exit__stub* ctx) {
__u64 pid_tgid = bpf_get_current_pid_tgid();
__u32 pid = pid_tgid >> 32;
return trace_exit_read(ctx, pid_tgid, pid, 0, ctx->ret);
}

SEC("tracepoint/syscalls/sys_exit_recvfrom")
int sys_exit_recvfrom(struct trace_event_raw_sys_exit_rw__stub* ctx) {
int sys_exit_recvfrom(struct trace_event_raw_sys_exit__stub* ctx) {
__u64 pid_tgid = bpf_get_current_pid_tgid();
__u32 pid = pid_tgid >> 32;
return trace_exit_read(ctx, pid_tgid, pid, 0, ctx->ret);
Expand Down
9 changes: 9 additions & 0 deletions ebpftracer/ebpf/proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ struct {

struct trace_event_raw_task_newtask__stub {
__u64 unused;
#if defined(__CTX_EXTRA_PADDING)
__u32 unused2;
#endif
__u32 pid;
char comm[TASK_COMM_LEN];
long unsigned int clone_flags;
Expand All @@ -43,6 +46,9 @@ int task_newtask(struct trace_event_raw_task_newtask__stub *args)

struct trace_event_raw_sched_process_template__stub {
__u64 unused;
#if defined(__CTX_EXTRA_PADDING)
__u32 unused2;
#endif
char comm[TASK_COMM_LEN];
__u32 pid;
};
Expand All @@ -68,6 +74,9 @@ int sched_process_exit(struct trace_event_raw_sched_process_template__stub *args

struct trace_event_raw_mark_victim__stub {
__u64 unused;
#if defined(__CTX_EXTRA_PADDING)
__u32 unused2;
#endif
int pid;
};

Expand Down
3 changes: 3 additions & 0 deletions ebpftracer/ebpf/tcp/retransmit.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ struct {

struct trace_event_raw_tcp_event_sk_skb__stub {
__u64 unused;
#if defined(__CTX_EXTRA_PADDING)
__u64 unused2;
#endif
void *sbkaddr;
void *skaddr;
#if __KERNEL_FROM >= 420
Expand Down
5 changes: 4 additions & 1 deletion ebpftracer/ebpf/tcp/state.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ struct {

struct trace_event_raw_inet_sock_set_state__stub {
__u64 unused;
#if defined(__CTX_EXTRA_PADDING)
__u64 unused2;
#endif
void *skaddr;
int oldstate;
int newstate;
Expand Down Expand Up @@ -171,7 +174,7 @@ int inet_sock_set_state(void *ctx)

struct trace_event_raw_args_with_fd__stub {
__u64 unused;
long int id;
__u64 unused2;
__u64 fd;
};

Expand Down
69 changes: 54 additions & 15 deletions ebpftracer/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"io"
"os"
"path"
"runtime"
"strconv"
"strings"
Expand Down Expand Up @@ -194,20 +195,38 @@ func (t *Tracer) ebpf(ch chan<- Event) error {
if _, ok := ebpfProgs[runtime.GOARCH]; !ok {
return fmt.Errorf("unsupported architecture: %s", runtime.GOARCH)
}
kv := common.GetKernelVersion()
var prg []byte
for _, p := range ebpfProg[runtime.GOARCH] {
pv, _ := common.VersionFromString(p.v)
if kv.GreaterOrEqual(pv) {
prg = p.p

var traceFsPath string
for _, p := range []string{"/sys/kernel/debug/tracing", "/sys/kernel/tracing"} {
if _, err := os.Stat(p); err == nil {
traceFsPath = p
break
}
}
if len(prg) == 0 {
return fmt.Errorf("unsupported kernel version: %s", kv)
if traceFsPath == "" {
return fmt.Errorf("kernel tracing is not available: debugfs or tracefs must be mounted")
}

var flags string
if isCtxExtraPaddingRequired(traceFsPath) {
flags = "ctx-extra-padding"
}
kv := common.GetKernelVersion()
var prog []byte
for _, p := range ebpfProgs[runtime.GOARCH] {
pv, _ := common.VersionFromString(p.version)
if !kv.GreaterOrEqual(pv) {
continue
}
if flags != p.flags {
continue
}
prog = p.prog
break
}
if len(prog) == 0 {
return fmt.Errorf("unsupported kernel version: %s %s", kv, flags)
}
_, debugFsErr := os.Stat("/sys/kernel/debug/tracing")
_, traceFsErr := os.Stat("/sys/kernel/tracing")

reader, err := gzip.NewReader(base64.NewDecoder(base64.StdEncoding, bytes.NewReader(prog)))
if err != nil {
Expand All @@ -226,9 +245,9 @@ func (t *Tracer) ebpf(ch chan<- Event) error {
//Programs: ebpf.ProgramOptions{LogLevel: 2, LogSize: 20 * 1024 * 1024},
})
if err != nil {
var verr *ebpf.VerifierError
if errors.As(err, &verr) {
klog.Errorf("%+v", verr)
var vErr *ebpf.VerifierError
if errors.As(err, &vErr) {
klog.Errorf("%+v", vErr)
}
return fmt.Errorf("failed to load collection: %w", err)
}
Expand All @@ -244,7 +263,7 @@ func (t *Tracer) ebpf(ch chan<- Event) error {
}

if !t.disableL7Tracing {
perfMaps = append(perfMaps, perfMap{name: "l7_events", typ: perfMapTypeL7Events, perCPUBufferSizePages: 64})
perfMaps = append(perfMaps, perfMap{name: "l7_events", typ: perfMapTypeL7Events, perCPUBufferSizePages: 32})
}

for _, pm := range perfMaps {
Expand Down Expand Up @@ -283,7 +302,7 @@ func (t *Tracer) ebpf(ch chan<- Event) error {
}
if err != nil {
t.Close()
return fmt.Errorf("failed to link program: %w", err)
return fmt.Errorf("failed to link program '%s': %w", programSpec.Name, err)
}
t.links = append(t.links, l)
}
Expand Down Expand Up @@ -469,3 +488,23 @@ func ipPort(ip [16]byte, port uint16) netaddr.IPPort {
i, _ := netaddr.FromStdIP(ip[:])
return netaddr.IPPortFrom(i, port)
}

func isCtxExtraPaddingRequired(traceFsPath string) bool {
f, err := os.Open(path.Join(traceFsPath, "events/task/task_newtask/format"))
if err != nil {
klog.Errorln(err)
return false
}
defer f.Close()
data, err := io.ReadAll(f)
if err != nil {
klog.Errorln(err)
return false
}
for _, line := range strings.Split(string(data), "\n") {
if strings.Contains(line, "common_preempt_lazy_count") {
return true
}
}
return false
}

0 comments on commit a87bc34

Please sign in to comment.