From 460e28a5e25537846f4bb10cb8ac5a85c2394112 Mon Sep 17 00:00:00 2001 From: Francisco Javier Honduvilla Coto Date: Sun, 19 May 2024 12:12:29 +0100 Subject: [PATCH] unwinder: Fix assumption on thread's initial registers The x86_64 ABI mandates that the bottom frame of processes must have their frame pointer (rbp) set to zero. We check for this when we detect and end of stacktrace condition, which is correcto for processes but it's not for threads as their initial frame pointer value is not specified. Test Plan ========= Ran for a while without issues. Unwinding Node.js applications cause the log to show up, this needs more investigation, but in general we should always use frame pointers to unwind Node. We will revisit this once we add proper support for this runtime. --- src/bpf/profiler.bpf.c | 25 ++++++++++++++----------- src/bpf/profiler.h | 2 +- src/bpf/profiler_bindings.rs | 3 ++- src/bpf/shared_maps.h | 2 +- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/bpf/profiler.bpf.c b/src/bpf/profiler.bpf.c index 66422b3..15ab298 100644 --- a/src/bpf/profiler.bpf.c +++ b/src/bpf/profiler.bpf.c @@ -302,6 +302,7 @@ SEC("perf_event") int dwarf_unwind(struct bpf_perf_event_data *ctx) { u64 pid_tgid = bpf_get_current_pid_tgid(); int per_process_id = pid_tgid >> 32; + int per_thread_id = pid_tgid; bool reached_bottom_of_stack = false; u64 zero = 0; @@ -530,22 +531,24 @@ int dwarf_unwind(struct bpf_perf_event_data *ctx) { // // From 3.4.1 Initial Stack and Register State // > %rbp The content of this register is unspecified at process - // > initialization time, > but the user code should mark the deepest - // > stack frame by setting the frame > pointer to zero. + // > initialization time, but the user code should mark the deepest + // > stack frame by setting the frame pointer to zero. + // + // Note: the initial register state only applies to processes not to threads. // // https://refspecs.linuxbase.org/elf/x86_64-abi-0.99.pdf - if (unwind_state->bp == 0) { - LOG("======= reached main! ======="); - add_stack(ctx, pid_tgid, unwind_state); - bump_unwind_success_dwarf(); - } else { - LOG("[error] Could not find unwind table and rbp != 0 (%llx), pc: %llx. Bug / Node.js?", - unwind_state->bp, unwind_state->ip); - // request_refresh_process_info(ctx, user_pid); - bump_unwind_error_pc_not_covered(); + bool main_thread = per_process_id == per_thread_id; + if (main_thread && unwind_state->bp != 0) { + LOG("[error] Expected rbp to be 0 but found %llx, pc: %llx (Node.js is not well supported yet)", unwind_state->bp, unwind_state->ip); + bump_unwind_error_bp_should_be_zero_for_bottom_frame(); } + + LOG("======= reached bottom frame! ======="); + add_stack(ctx, pid_tgid, unwind_state); + bump_unwind_success_dwarf(); return 0; + } else if (unwind_state->stack.len < MAX_STACK_DEPTH && unwind_state->tail_calls < MAX_TAIL_CALLS) { LOG("Continuing walking the stack in a tail call, current tail %d", diff --git a/src/bpf/profiler.h b/src/bpf/profiler.h index bfe4ac7..6d78ebf 100644 --- a/src/bpf/profiler.h +++ b/src/bpf/profiler.h @@ -78,7 +78,7 @@ struct unwinder_stats_t { u64 error_unsupported_cfa_register; u64 error_catchall; u64 error_should_never_happen; - u64 error_pc_not_covered; + u64 error_bp_should_be_zero_for_bottom_frame; u64 error_mapping_not_found; u64 error_mapping_does_not_contain_pc; u64 error_chunk_not_found; diff --git a/src/bpf/profiler_bindings.rs b/src/bpf/profiler_bindings.rs index c9c7661..215798d 100644 --- a/src/bpf/profiler_bindings.rs +++ b/src/bpf/profiler_bindings.rs @@ -50,7 +50,8 @@ impl Add for unwinder_stats_t { error_catchall: self.error_catchall + other.error_catchall, error_should_never_happen: self.error_should_never_happen + other.error_should_never_happen, - error_pc_not_covered: self.error_pc_not_covered + other.error_pc_not_covered, + error_bp_should_be_zero_for_bottom_frame: self.error_bp_should_be_zero_for_bottom_frame + + other.error_bp_should_be_zero_for_bottom_frame, error_binary_search_exausted_iterations: self.error_binary_search_exausted_iterations + other.error_binary_search_exausted_iterations, error_chunk_not_found: self.error_chunk_not_found + other.error_chunk_not_found, diff --git a/src/bpf/shared_maps.h b/src/bpf/shared_maps.h index c213d32..62e9873 100644 --- a/src/bpf/shared_maps.h +++ b/src/bpf/shared_maps.h @@ -37,7 +37,7 @@ DEFINE_COUNTER(error_unsupported_frame_pointer_action); DEFINE_COUNTER(error_unsupported_cfa_register); DEFINE_COUNTER(error_catchall); DEFINE_COUNTER(error_should_never_happen); -DEFINE_COUNTER(error_pc_not_covered); +DEFINE_COUNTER(error_bp_should_be_zero_for_bottom_frame); DEFINE_COUNTER(error_mapping_not_found); DEFINE_COUNTER(error_mapping_does_not_contain_pc); DEFINE_COUNTER(error_chunk_not_found);