From 4429694ea751deb394f6d86426c71b207f6e9ab9 Mon Sep 17 00:00:00 2001 From: Francisco Javier Honduvilla Coto Date: Mon, 2 Dec 2024 10:37:38 +0000 Subject: [PATCH 1/7] Fix counter typo (exausted -> exhausted) --- src/bpf/profiler.bpf.c | 2 +- src/bpf/profiler.h | 2 +- src/bpf/profiler_bindings.rs | 4 ++-- src/bpf/shared_maps.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/bpf/profiler.bpf.c b/src/bpf/profiler.bpf.c index e344778..5216344 100644 --- a/src/bpf/profiler.bpf.c +++ b/src/bpf/profiler.bpf.c @@ -422,7 +422,7 @@ int dwarf_unwind(struct bpf_perf_event_data *ctx) { if (!in_previous_page) { LOG("[error] binary search failed with %llx, pc: %llx", table_idx, unwind_state->ip); if (table_idx == BINARY_SEARCH_EXHAUSTED_ITERATIONS) { - bump_unwind_error_binary_search_exausted_iterations(); + bump_unwind_error_binary_search_exhausted_iterations(); } return 1; } diff --git a/src/bpf/profiler.h b/src/bpf/profiler.h index c90893e..c2d0bc7 100644 --- a/src/bpf/profiler.h +++ b/src/bpf/profiler.h @@ -103,7 +103,7 @@ struct unwinder_stats_t { u64 error_mapping_not_found; u64 error_mapping_does_not_contain_pc; u64 error_chunk_not_found; - u64 error_binary_search_exausted_iterations; + u64 error_binary_search_exhausted_iterations; u64 error_sending_new_process_event; u64 error_cfa_offset_did_not_fit; u64 error_rbp_offset_did_not_fit; diff --git a/src/bpf/profiler_bindings.rs b/src/bpf/profiler_bindings.rs index 45075c6..38a756b 100644 --- a/src/bpf/profiler_bindings.rs +++ b/src/bpf/profiler_bindings.rs @@ -55,8 +55,8 @@ impl Add for unwinder_stats_t { error_previous_rbp_zero: self.error_previous_rbp_zero + other.error_previous_rbp_zero, error_should_never_happen: self.error_should_never_happen + other.error_should_never_happen, - error_binary_search_exausted_iterations: self.error_binary_search_exausted_iterations - + other.error_binary_search_exausted_iterations, + error_binary_search_exhausted_iterations: self.error_binary_search_exhausted_iterations + + other.error_binary_search_exhausted_iterations, error_chunk_not_found: self.error_chunk_not_found + other.error_chunk_not_found, error_mapping_does_not_contain_pc: self.error_mapping_does_not_contain_pc + other.error_mapping_does_not_contain_pc, diff --git a/src/bpf/shared_maps.h b/src/bpf/shared_maps.h index ec1345d..19c0485 100644 --- a/src/bpf/shared_maps.h +++ b/src/bpf/shared_maps.h @@ -41,7 +41,7 @@ DEFINE_COUNTER(error_should_never_happen); DEFINE_COUNTER(error_mapping_not_found); DEFINE_COUNTER(error_mapping_does_not_contain_pc); DEFINE_COUNTER(error_chunk_not_found); -DEFINE_COUNTER(error_binary_search_exausted_iterations); +DEFINE_COUNTER(error_binary_search_exhausted_iterations); DEFINE_COUNTER(error_sending_new_process_event); DEFINE_COUNTER(error_cfa_offset_did_not_fit); DEFINE_COUNTER(error_rbp_offset_did_not_fit); From bbde82dcd541a23313cd7d06ec6dc938b4180c72 Mon Sep 17 00:00:00 2001 From: Francisco Javier Honduvilla Coto Date: Sun, 1 Dec 2024 11:16:16 +0000 Subject: [PATCH 2/7] Initial implementation of debug information manager Currently, executables are opened as soon as possible to have a hold of them to be able to symbolize later on. This approach is not ideal because the number of opened file descriptors can balloon, and this is done without first checking if the executables contain debug information at all. In the future all the local and remote use-cases will transition to using the debug information manager. This first iteration is quite simple and lacks a lot of features needed to make this really work. In this commit it's still disabled by default, and just adds some basic features to upload debug information to a server, if needed. Left various TODOs on the planned changes. Test Plan ========= CI + manual checks. --- Cargo.lock | 2 +- lightswitch-object/Cargo.toml | 2 +- lightswitch-object/src/object.rs | 5 + src/cli/args.rs | 10 ++ src/cli/main.rs | 51 ++++++---- src/collector.rs | 2 +- src/debug_info.rs | 165 +++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/profiler.rs | 34 ++++++- 9 files changed, 249 insertions(+), 23 deletions(-) create mode 100644 src/debug_info.rs diff --git a/Cargo.lock b/Cargo.lock index 73bda20..23939a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1416,7 +1416,7 @@ dependencies = [ [[package]] name = "lightswitch-object" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "data-encoding", diff --git a/lightswitch-object/Cargo.toml b/lightswitch-object/Cargo.toml index 67c938d..3ce2673 100644 --- a/lightswitch-object/Cargo.toml +++ b/lightswitch-object/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lightswitch-object" -version = "0.1.0" +version = "0.1.1" edition = "2021" description = "Deals with object files" license = "MIT" diff --git a/lightswitch-object/src/object.rs b/lightswitch-object/src/object.rs index 0be6d61..4146762 100644 --- a/lightswitch-object/src/object.rs +++ b/lightswitch-object/src/object.rs @@ -94,6 +94,11 @@ impl ObjectFile { Ok(BuildId::sha256_from_digest(&self.code_hash)) } + /// Returns whether the object has debug symbols. + pub fn has_debug_info(&self) -> bool { + self.object.has_debug_symbols() + } + pub fn is_dynamic(&self) -> bool { self.object.kind() == ObjectKind::Dynamic } diff --git a/src/cli/args.rs b/src/cli/args.rs index a3af4de..0980f04 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -42,6 +42,14 @@ pub(crate) enum Symbolizer { None, } +#[derive(PartialEq, clap::ValueEnum, Debug, Clone, Default)] +pub(crate) enum DebugInfoBackend { + #[default] + None, + Copy, + Remote, +} + #[derive(Parser, Debug)] pub(crate) struct CliArgs { /// Specific PIDs to profile @@ -126,4 +134,6 @@ pub(crate) struct CliArgs { pub(crate) exclude_self: bool, #[arg(long, default_value_t, value_enum)] pub(crate) symbolizer: Symbolizer, + #[arg(long, default_value_t, value_enum)] + pub(crate) debug_info_backend: DebugInfoBackend, } diff --git a/src/cli/main.rs b/src/cli/main.rs index a196d62..b58082b 100644 --- a/src/cli/main.rs +++ b/src/cli/main.rs @@ -6,11 +6,13 @@ use std::io::Write; use std::panic; use std::path::PathBuf; use std::sync::{Arc, Mutex}; +use std::time::Duration; use clap::Parser; use crossbeam_channel::bounded; use inferno::flamegraph; use lightswitch::collector::{AggregatorCollector, Collector, NullCollector, StreamingCollector}; +use lightswitch::debug_info::DebugInfoManager; use lightswitch_metadata::metadata_provider::GlobalMetadataProvider; use nix::unistd::Uid; use prost::Message; @@ -21,6 +23,9 @@ use tracing_subscriber::FmtSubscriber; use lightswitch_capabilities::system_info::SystemInfo; use lightswitch_metadata::metadata_provider::ThreadSafeGlobalMetadataProvider; +use lightswitch::debug_info::{ + DebugInfoBackendFilesystem, DebugInfoBackendNull, DebugInfoBackendRemote, +}; use lightswitch::profile::symbolize_profile; use lightswitch::profile::{fold_profile, to_pprof}; use lightswitch::profiler::{Profiler, ProfilerConfig}; @@ -32,12 +37,13 @@ mod args; mod validators; use crate::args::CliArgs; +use crate::args::DebugInfoBackend; use crate::args::LoggingLevel; use crate::args::ProfileFormat; use crate::args::ProfileSender; use crate::args::Symbolizer; -const DEFAULT_PPROF_INGEST_URL: &str = "http://localhost:4567/pprof/new"; +const DEFAULT_SERVER_URL: &str = "http://localhost:4567"; /// Exit the main thread if any thread panics. We prefer this behaviour because pretty much every /// thread is load bearing for the correct functioning. @@ -98,24 +104,34 @@ fn main() -> Result<(), Box> { } } + let server_url = args.server_url.unwrap_or(DEFAULT_SERVER_URL.into()); + let metadata_provider: ThreadSafeGlobalMetadataProvider = Arc::new(Mutex::new(GlobalMetadataProvider::default())); - let collector = Arc::new(Mutex::new(match args.sender { - ProfileSender::None => Box::new(NullCollector::new()) as Box, - ProfileSender::LocalDisk => { - Box::new(AggregatorCollector::new()) as Box - } - ProfileSender::Remote => Box::new(StreamingCollector::new( - args.symbolizer == Symbolizer::Local, - args.server_url - .as_ref() - .map_or(DEFAULT_PPROF_INGEST_URL, |v| v), - ProfilerConfig::default().session_duration, - args.sample_freq, - metadata_provider.clone(), - )) as Box, - })); + let collector: Arc>> = + Arc::new(Mutex::new(match args.sender { + ProfileSender::None => Box::new(NullCollector::new()), + ProfileSender::LocalDisk => Box::new(AggregatorCollector::new()), + ProfileSender::Remote => Box::new(StreamingCollector::new( + args.symbolizer == Symbolizer::Local, + &server_url, + ProfilerConfig::default().session_duration, + args.sample_freq, + metadata_provider.clone(), + )), + })); + + let debug_info_manager: Box = match args.debug_info_backend { + DebugInfoBackend::None => Box::new(DebugInfoBackendNull {}), + DebugInfoBackend::Copy => Box::new(DebugInfoBackendFilesystem { + path: PathBuf::from("/tmp"), + }), + DebugInfoBackend::Remote => Box::new(DebugInfoBackendRemote { + http_client_timeout: Duration::from_millis(500), + server_url, + }), + }; let profiler_config = ProfilerConfig { libbpf_debug: args.libbpf_debug, @@ -128,6 +144,7 @@ fn main() -> Result<(), Box> { mapsize_aggregated_stacks: args.mapsize_aggregated_stacks, mapsize_rate_limits: args.mapsize_rate_limits, exclude_self: args.exclude_self, + debug_info_manager, ..Default::default() }; @@ -260,7 +277,7 @@ mod tests { cmd.arg("--help"); cmd.assert().success(); let actual = String::from_utf8(cmd.unwrap().stdout).unwrap(); - insta::assert_yaml_snapshot!(actual, @r#""Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids \n Specific PIDs to profile\n\n --tids \n Specific TIDs to profile (these can be outside the PIDs selected above)\n\n --show-unwind-info \n Show unwind info for given binary\n\n --show-info \n Show build ID for given binary\n\n -D, --duration \n How long this agent will run in seconds\n \n [default: 18446744073709551615]\n\n --libbpf-debug\n Enable libbpf logs. This includes the BPF verifier output\n\n --bpf-logging\n Enable BPF programs logging\n\n --logging \n Set lightswitch's logging level\n \n [default: info]\n [possible values: trace, debug, info, warn, error]\n\n --sample-freq \n Per-CPU Sampling Frequency in Hz\n \n [default: 19]\n\n --profile-format \n Output file for Flame Graph in SVG format\n \n [default: flame-graph]\n [possible values: none, flame-graph, pprof]\n\n --profile-path \n Path for the generated profile\n\n --profile-name \n Name for the generated profile\n\n --sender \n Where to write the profile\n \n [default: local-disk]\n\n Possible values:\n - none: Discard the profile. Used for kernel tests\n - local-disk\n - remote\n\n --server-url \n \n\n --perf-buffer-bytes \n Size of each profiler perf buffer, in bytes (must be a power of 2)\n \n [default: 524288]\n\n --mapsize-info\n Print eBPF map sizes after creation\n\n --mapsize-stacks \n max number of individual stacks to capture before aggregation\n \n [default: 100000]\n\n --mapsize-aggregated-stacks \n max number of unique stacks after aggregation\n \n [default: 10000]\n\n --mapsize-rate-limits \n max number of rate limit entries\n \n [default: 5000]\n\n --exclude-self\n Do not profile the profiler (myself)\n\n --symbolizer \n [default: local]\n [possible values: local, none]\n\n -h, --help\n Print help (see a summary with '-h')\n""#); + insta::assert_yaml_snapshot!(actual, @r#""Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids \n Specific PIDs to profile\n\n --tids \n Specific TIDs to profile (these can be outside the PIDs selected above)\n\n --show-unwind-info \n Show unwind info for given binary\n\n --show-info \n Show build ID for given binary\n\n -D, --duration \n How long this agent will run in seconds\n \n [default: 18446744073709551615]\n\n --libbpf-debug\n Enable libbpf logs. This includes the BPF verifier output\n\n --bpf-logging\n Enable BPF programs logging\n\n --logging \n Set lightswitch's logging level\n \n [default: info]\n [possible values: trace, debug, info, warn, error]\n\n --sample-freq \n Per-CPU Sampling Frequency in Hz\n \n [default: 19]\n\n --profile-format \n Output file for Flame Graph in SVG format\n \n [default: flame-graph]\n [possible values: none, flame-graph, pprof]\n\n --profile-path \n Path for the generated profile\n\n --profile-name \n Name for the generated profile\n\n --sender \n Where to write the profile\n \n [default: local-disk]\n\n Possible values:\n - none: Discard the profile. Used for kernel tests\n - local-disk\n - remote\n\n --server-url \n \n\n --perf-buffer-bytes \n Size of each profiler perf buffer, in bytes (must be a power of 2)\n \n [default: 524288]\n\n --mapsize-info\n Print eBPF map sizes after creation\n\n --mapsize-stacks \n max number of individual stacks to capture before aggregation\n \n [default: 100000]\n\n --mapsize-aggregated-stacks \n max number of unique stacks after aggregation\n \n [default: 10000]\n\n --mapsize-rate-limits \n max number of rate limit entries\n \n [default: 5000]\n\n --exclude-self\n Do not profile the profiler (myself)\n\n --symbolizer \n [default: local]\n [possible values: local, none]\n\n --debug-info-backend \n [default: none]\n [possible values: none, copy, remote]\n\n -h, --help\n Print help (see a summary with '-h')\n""#); } #[rstest] diff --git a/src/collector.rs b/src/collector.rs index c04f62d..db8d206 100644 --- a/src/collector.rs +++ b/src/collector.rs @@ -88,7 +88,7 @@ impl StreamingCollector { ) -> Self { Self { local_symbolizer, - pprof_ingest_url: pprof_ingest_url.into(), + pprof_ingest_url: format!("{}/pprof/new", pprof_ingest_url), http_client_timeout: Duration::from_secs(30), profile_duration, profile_frequency_hz, diff --git a/src/debug_info.rs b/src/debug_info.rs new file mode 100644 index 0000000..ad972b5 --- /dev/null +++ b/src/debug_info.rs @@ -0,0 +1,165 @@ +use std::io::Read; +use std::path::PathBuf; +use std::time::Duration; + +use reqwest::StatusCode; +use tracing::instrument; + +use lightswitch_object::BuildId; +use lightswitch_object::ExecutableId; + +/// Handles with debug information. +/// +/// This currently experimental, not feature-complete and not used yet during +/// symbolization. The end goal would be to keep track of every debug info +/// that's either present locally or remotely (depending on configuration), while +/// minimizing the number of open FDs, file copies, and race condition windows. +pub trait DebugInfoManager { + fn add_if_not_present( + &self, + name: &str, + build_id: &BuildId, + executable_id: ExecutableId, + file: &mut std::fs::File, + ) -> anyhow::Result<()>; + fn debug_info_path(&self) -> Option; +} + +pub struct DebugInfoBackendNull {} +impl DebugInfoManager for DebugInfoBackendNull { + fn add_if_not_present( + &self, + _name: &str, + _build_id: &BuildId, + _executable_id: ExecutableId, + _file: &mut std::fs::File, + ) -> anyhow::Result<()> { + Ok(()) + } + + fn debug_info_path(&self) -> Option { + None + } +} + +#[derive(Debug)] +pub struct DebugInfoBackendFilesystem { + pub path: PathBuf, +} +impl DebugInfoManager for DebugInfoBackendFilesystem { + #[instrument] + fn add_if_not_present( + &self, + _name: &str, + build_id: &BuildId, + executable_id: ExecutableId, + file: &mut std::fs::File, + ) -> anyhow::Result<()> { + // try to find, else extract + if self.find_in_fs(build_id) { + return Ok(()); + } + + self.add_to_fs(build_id, executable_id, file) + } + + fn debug_info_path(&self) -> Option { + todo!() + } +} + +impl DebugInfoBackendFilesystem { + fn find_in_fs(&self, build_id: &BuildId) -> bool { + self.path.join(build_id.to_string()).exists() + } + + fn add_to_fs( + &self, + build_id: &BuildId, + _executable_id: ExecutableId, + file: &mut std::fs::File, + ) -> anyhow::Result<()> { + // TODO: add support for other methods beyond copying. For example + // hardlinks could be used and only fall back to copying if the src + // and dst filesystems differ. + let mut writer = std::fs::File::create(self.path.join(build_id.to_string()))?; + std::io::copy(file, &mut writer)?; + Ok(()) + } +} + +#[derive(Debug)] +pub struct DebugInfoBackendRemote { + pub http_client_timeout: Duration, + pub server_url: String, +} +impl DebugInfoManager for DebugInfoBackendRemote { + #[instrument(level = "debug")] + fn add_if_not_present( + &self, + name: &str, + build_id: &BuildId, + executable_id: ExecutableId, + file: &mut std::fs::File, + ) -> anyhow::Result<()> { + // TODO: add a local cache to not have to reach to the backend + // unnecessarily. + if self.find_in_backend(build_id)? { + return Ok(()); + } + + // TODO: do this in another thread. + self.upload_to_backend(name, build_id, executable_id, file)?; + Ok(()) + } + + fn debug_info_path(&self) -> Option { + None + } +} + +impl DebugInfoBackendRemote { + /// Whether the backend knows about some debug information. + #[instrument(level = "debug")] + fn find_in_backend(&self, build_id: &BuildId) -> anyhow::Result { + let client_builder = reqwest::blocking::Client::builder().timeout(self.http_client_timeout); + let client = client_builder.build()?; + let response = client + .get(format!( + "{}/debuginfo/{}", + self.server_url.clone(), + build_id + )) + .send(); + + Ok(response?.status() == StatusCode::OK) + } + + /// Send the debug information to the backend. + #[instrument] + fn upload_to_backend( + &self, + name: &str, + build_id: &BuildId, + executable_id: ExecutableId, + file: &mut std::fs::File, + ) -> anyhow::Result<()> { + let client_builder = reqwest::blocking::Client::builder().timeout(self.http_client_timeout); + let client = client_builder.build()?; + let mut debug_info = Vec::new(); + file.read_to_end(&mut debug_info)?; + + let response = client + .post(format!( + "{}/debuginfo/new/{}/{}/{}", + self.server_url.clone(), + name, + build_id, + executable_id + )) + .body(debug_info) + .send()?; + println!("wrote debug info to server {:?}", response); + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 9fc43c6..59d1bb0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ pub mod bpf; pub mod collector; +pub mod debug_info; pub mod ksym; pub mod perf_events; pub mod process; diff --git a/src/profiler.rs b/src/profiler.rs index 80f46e9..ab5833f 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -30,6 +30,8 @@ use crate::bpf::profiler_skel::{OpenProfilerSkel, ProfilerSkel, ProfilerSkelBuil use crate::bpf::tracers_bindings::*; use crate::bpf::tracers_skel::{TracersSkel, TracersSkelBuilder}; use crate::collector::*; +use crate::debug_info::DebugInfoBackendNull; +use crate::debug_info::DebugInfoManager; use crate::perf_events::setup_perf_event; use crate::process::{ ExecutableMapping, ExecutableMappingType, ExecutableMappings, ObjectFileInfo, Pid, ProcessInfo, @@ -101,6 +103,8 @@ pub struct Profiler { exclude_self: bool, /// Sizes for the unwind information buckets. native_unwind_info_bucket_sizes: Vec, + /// Deals with debug information + debug_info_manager: Box, } pub struct ProfilerConfig { @@ -116,6 +120,7 @@ pub struct ProfilerConfig { pub mapsize_rate_limits: u32, pub exclude_self: bool, pub native_unwind_info_bucket_sizes: Vec, + pub debug_info_manager: Box, } impl Default for ProfilerConfig { @@ -136,6 +141,7 @@ impl Default for ProfilerConfig { 1_000, 10_000, 20_000, 40_000, 80_000, 160_000, 320_000, 640_000, 1_280_000, 2_560_000, 3_840_000, 5_120_000, 7_680_000, ], + debug_info_manager: Box::new(DebugInfoBackendNull {}), } } } @@ -359,6 +365,7 @@ impl Profiler { session_duration: profiler_config.session_duration, exclude_self: profiler_config.exclude_self, native_unwind_info_bucket_sizes: profiler_config.native_unwind_info_bucket_sizes, + debug_info_manager: profiler_config.debug_info_manager, } } @@ -1117,7 +1124,6 @@ impl Profiler { }, }); - // This is not released (see note "deadlock") let object_files = self.object_files.read().unwrap(); let executable = object_files.get(&mapping.executable_id).unwrap(); let executable_path = executable.open_file_path(); @@ -1293,7 +1299,7 @@ impl Profiler { // We want to open the file as quickly as possible to minimise the chances of races // if the file is deleted. - let file = match fs::File::open(&abs_path) { + let mut file = match fs::File::open(&abs_path) { Ok(f) => f, Err(e) => { debug!("failed to open file {} due to {:?}", abs_path, e); @@ -1356,11 +1362,33 @@ impl Profiler { soft_delete: false, }); + let abs_path = PathBuf::from(abs_path); + + // If the object file has debug info, add it to our store. + if object_file.has_debug_info() { + let name = match abs_path.file_name() { + Some(os_name) => os_name.to_string_lossy().to_string(), + None => "error".to_string(), + }; + let res = self.debug_info_manager.add_if_not_present( + &name, + &build_id, + executable_id, + &mut file, + ); + debug!("debug info manager add result {:?}", res); + } else { + debug!( + "could not find debug information for {}", + abs_path.display() + ); + } + match object_files.entry(executable_id) { Entry::Vacant(entry) => match object_file.elf_load_segments() { Ok(elf_loads) => { entry.insert(ObjectFileInfo { - path: PathBuf::from(abs_path), + path: abs_path, file, elf_load_segments: elf_loads, is_dyn: object_file.is_dynamic(), From 1398a5d08f0fe8312ce38d62c81bd20f6a6f1f72 Mon Sep 17 00:00:00 2001 From: Francisco Javier Honduvilla Coto Date: Mon, 30 Dec 2024 11:52:18 +0000 Subject: [PATCH 3/7] Move common native unwinder logic deletion to its own function --- src/profiler.rs | 86 +++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 46 deletions(-) diff --git a/src/profiler.rs b/src/profiler.rs index ab5833f..4da224f 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -1,5 +1,6 @@ use libbpf_rs::OpenObject; use std::collections::hash_map::Entry; +use std::collections::hash_map::OccupiedEntry; use std::collections::HashMap; use std::fs; use std::mem::size_of; @@ -532,31 +533,13 @@ impl Profiler { .known_executables .entry(mapping.executable_id) { - Self::delete_bpf_pages( - &self.native_unwinder, - mapping.start_addr, - mapping.end_addr, - mapping.executable_id, - ); - Self::delete_bpf_mappings( - &self.native_unwinder, + Self::delete_bpf_native_unwind_all( pid, - mapping.start_addr, - mapping.end_addr, - ); - let res = Self::delete_bpf_unwind_info_map( &mut self.native_unwinder, - entry.get().bucket_id, - mapping.executable_id, + mapping, + entry, &mut self.native_unwind_state.unwind_info_bucket_usage, ); - if res.is_err() { - info!("deleting the BPF unwind info array failed with {:?}", res); - } - - // The object file (`object_files`) is not removed here as we still need it for - // normalization before sending the profiles. - entry.remove_entry(); } } } @@ -582,35 +565,13 @@ impl Profiler { .known_executables .entry(mapping.executable_id) { - // Delete unwind info. - Self::delete_bpf_pages( - &self.native_unwinder, - mapping.start_addr, - mapping.end_addr, - mapping.executable_id, - ); - Self::delete_bpf_mappings( - &self.native_unwinder, + Self::delete_bpf_native_unwind_all( pid, - mapping.start_addr, - mapping.end_addr, - ); - let res = Self::delete_bpf_unwind_info_map( &mut self.native_unwinder, - entry.get().bucket_id, - mapping.executable_id, + mapping, + entry, &mut self.native_unwind_state.unwind_info_bucket_usage, ); - if res.is_err() { - info!( - "deleting the BPF unwind info array failed with {:?}", - res - ); - } - - // The object file (`object_files`) is not removed here as we still need it for - // normalization before sending the profiles. - entry.remove_entry(); } } } @@ -967,6 +928,39 @@ impl Profiler { res } + /// Called when a process exits or a mapping gets unmapped. Removing the + /// process entry is the responsibility of the caller. + fn delete_bpf_native_unwind_all( + pid: Pid, + native_unwinder: &mut ProfilerSkel, + mapping: &ExecutableMapping, + entry: OccupiedEntry, + unwind_info_bucket_usage: &mut [usize], + ) { + Self::delete_bpf_mappings(native_unwinder, pid, mapping.start_addr, mapping.end_addr); + + Self::delete_bpf_pages( + native_unwinder, + mapping.start_addr, + mapping.end_addr, + mapping.executable_id, + ); + + let res = Self::delete_bpf_unwind_info_map( + native_unwinder, + entry.get().bucket_id, + mapping.executable_id, + unwind_info_bucket_usage, + ); + if res.is_err() { + info!("deleting the BPF unwind info array failed with {:?}", res); + } + + // The object file (`object_files`) is not removed here as we still need it for + // normalization before sending the profiles. + entry.remove_entry(); + } + fn is_bucket_full(unwind_info_bucket_usage: &[usize], bucket_id: usize) -> bool { unwind_info_bucket_usage[bucket_id] >= MAX_OUTER_UNWIND_MAP_ENTRIES as usize } From 0c2e12200a5a334ed2b21bd5d08da688699cedce Mon Sep 17 00:00:00 2001 From: Francisco Javier Honduvilla Coto Date: Fri, 3 Jan 2025 11:04:59 +0000 Subject: [PATCH 4/7] Add unwind information eviction This rather large commit adds unwind information eviction. This can be triggered in two ways: if an unwind information bucket is full, object file that last appeared in the profiles will be evicted to leave space for the new unwind info. There's also a new flag (`--max-native-unwind-info-size-mb`) that will be used as a rough limit for the size of BPF maps used to store unwind information. By default there's no limit on the unwind info stored in BPF maps. Additionally, there's a new event to notify of addresses don't have unwind information as now this can happen after an eviction. Several other changes were made too, such as a bugfis on how the unwind info pages were being deleted, and the addition of `--enable-deadlock-detector` to start a thread that checks for deadlocks in `parking_lot`'s locking facilities. Future changes ============== Something that stood out during the development of this feature is that the generation of compact unwind information from .eh_frame data might be called more frequently. This is perhaps something worth caching on disk, but that'll happen in another PR. Test Plan ========= Ran for a while without issues, also with lower memory limits set. --- Cargo.lock | 24 +++ Cargo.toml | 1 + src/bpf/profiler.bpf.c | 32 +-- src/bpf/profiler.h | 3 +- src/cli/args.rs | 8 + src/cli/main.rs | 22 +- src/profiler.rs | 476 ++++++++++++++++++++++++++++++----------- 7 files changed, 420 insertions(+), 146 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 23939a7..22f720c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1372,6 +1372,7 @@ dependencies = [ "nix", "object", "page_size", + "parking_lot", "perf-event-open-sys", "plain", "primal", @@ -1701,16 +1702,29 @@ dependencies = [ "winapi", ] +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + [[package]] name = "parking_lot_core" version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ + "backtrace", "cfg-if", "libc", + "petgraph", "redox_syscall", "smallvec", + "thread-id", "windows-targets", ] @@ -2576,6 +2590,16 @@ dependencies = [ "syn", ] +[[package]] +name = "thread-id" +version = "4.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe8f25bbdd100db7e1d34acf7fd2dc59c4bf8f7483f505eaa7d4f12f76cc0ea" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "thread_local" version = "1.1.8" diff --git a/Cargo.toml b/Cargo.toml index 29b4096..6503fd9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,6 +61,7 @@ thiserror = { workspace = true } errno = { workspace = true } procfs = { workspace = true } nix = { workspace = true, features = ["user"] } +parking_lot = { version = "0.12.3", features = ["deadlock_detection"] } [dev-dependencies] assert_cmd = { version = "2.0.16" } diff --git a/src/bpf/profiler.bpf.c b/src/bpf/profiler.bpf.c index 5216344..7d9c28f 100644 --- a/src/bpf/profiler.bpf.c +++ b/src/bpf/profiler.bpf.c @@ -192,26 +192,21 @@ find_page(mapping_t *mapping, u64 object_relative_pc, u64 *left, u64 *right) { return NULL; } - -static __always_inline void event_new_process(struct bpf_perf_event_data *ctx, int per_process_id) { - Event event = { - .type = EVENT_NEW_PROCESS, - .pid = per_process_id, - }; - - bool *is_rate_limited = bpf_map_lookup_elem(&rate_limits, &event); +static __always_inline void send_event(Event *event, struct bpf_perf_event_data *ctx) { + bool *is_rate_limited = bpf_map_lookup_elem(&rate_limits, event); if (is_rate_limited != NULL && *is_rate_limited) { - LOG("[debug] event_new_process was rate limited"); + LOG("[debug] send_event was rate limited"); return; } - if (bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(Event)) < 0) { + if (bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, event, sizeof(Event)) < 0) { bump_unwind_error_sending_new_process_event(); } - LOG("[debug] event_new_process event sent"); + LOG("[debug] event type %d sent", event->type); bool rate_limited = true; - bpf_map_update_elem(&rate_limits, &event, &rate_limited, BPF_ANY); + + bpf_map_update_elem(&rate_limits, event, &rate_limited, BPF_ANY); } // Kernel addresses have the top bits set. @@ -398,7 +393,12 @@ int dwarf_unwind(struct bpf_perf_event_data *ctx) { u64 right = 0; void *inner = find_page(mapping, object_relative_pc_high, &left, &right); if (inner == NULL) { - // TODO: add counter + Event event = { + .type = EVENT_NEED_UNWIND_INFO, + .pid = per_process_id, + .address = unwind_state->ip, + }; + send_event(&event, ctx); return 1; } @@ -672,7 +672,11 @@ int on_event(struct bpf_perf_event_data *ctx) { return 0; } - event_new_process(ctx, per_process_id); + Event event = { + .type = EVENT_NEW_PROCESS, + .pid = per_process_id, + }; + send_event(&event, ctx); return 0; } diff --git a/src/bpf/profiler.h b/src/bpf/profiler.h index c2d0bc7..4963b7b 100644 --- a/src/bpf/profiler.h +++ b/src/bpf/profiler.h @@ -184,12 +184,13 @@ typedef struct { enum event_type { EVENT_NEW_PROCESS = 1, - // EVENT_NEED_UNWIND_INFO = 2, need a way to signal of new loaded mappings + EVENT_NEED_UNWIND_INFO = 2, }; typedef struct { enum event_type type; int pid; // use right name here (tgid?) + u64 address; } Event; enum program { diff --git a/src/cli/args.rs b/src/cli/args.rs index 0980f04..a4bba0c 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -136,4 +136,12 @@ pub(crate) struct CliArgs { pub(crate) symbolizer: Symbolizer, #[arg(long, default_value_t, value_enum)] pub(crate) debug_info_backend: DebugInfoBackend, + #[arg( + long, + default_value_t = ProfilerConfig::default().max_native_unwind_info_size_mb, + help = "approximate max size in megabytes used for the BPF maps that hold unwind information" + )] + pub(crate) max_native_unwind_info_size_mb: i32, + #[arg(long, help = "enable parking_lot's deadlock detector")] + pub(crate) enable_deadlock_detector: bool, } diff --git a/src/cli/main.rs b/src/cli/main.rs index b58082b..051cff8 100644 --- a/src/cli/main.rs +++ b/src/cli/main.rs @@ -55,10 +55,28 @@ fn panic_thread_hook() { })); } +/// Starts `parking_lot`'s deadlock detector. +fn start_deadlock_detector() { + std::thread::spawn(move || loop { + std::thread::sleep(std::time::Duration::from_secs(1)); + for deadlock in parking_lot::deadlock::check_deadlock() { + for deadlock in deadlock { + eprintln!( + "Found a deadlock! {}: {:?}", + deadlock.thread_id(), + deadlock.backtrace() + ); + } + } + }); +} + fn main() -> Result<(), Box> { panic_thread_hook(); - let args = CliArgs::parse(); + if args.enable_deadlock_detector { + start_deadlock_detector(); + } if let Some(path) = args.show_unwind_info { show_unwind_info(&path); @@ -277,7 +295,7 @@ mod tests { cmd.arg("--help"); cmd.assert().success(); let actual = String::from_utf8(cmd.unwrap().stdout).unwrap(); - insta::assert_yaml_snapshot!(actual, @r#""Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids \n Specific PIDs to profile\n\n --tids \n Specific TIDs to profile (these can be outside the PIDs selected above)\n\n --show-unwind-info \n Show unwind info for given binary\n\n --show-info \n Show build ID for given binary\n\n -D, --duration \n How long this agent will run in seconds\n \n [default: 18446744073709551615]\n\n --libbpf-debug\n Enable libbpf logs. This includes the BPF verifier output\n\n --bpf-logging\n Enable BPF programs logging\n\n --logging \n Set lightswitch's logging level\n \n [default: info]\n [possible values: trace, debug, info, warn, error]\n\n --sample-freq \n Per-CPU Sampling Frequency in Hz\n \n [default: 19]\n\n --profile-format \n Output file for Flame Graph in SVG format\n \n [default: flame-graph]\n [possible values: none, flame-graph, pprof]\n\n --profile-path \n Path for the generated profile\n\n --profile-name \n Name for the generated profile\n\n --sender \n Where to write the profile\n \n [default: local-disk]\n\n Possible values:\n - none: Discard the profile. Used for kernel tests\n - local-disk\n - remote\n\n --server-url \n \n\n --perf-buffer-bytes \n Size of each profiler perf buffer, in bytes (must be a power of 2)\n \n [default: 524288]\n\n --mapsize-info\n Print eBPF map sizes after creation\n\n --mapsize-stacks \n max number of individual stacks to capture before aggregation\n \n [default: 100000]\n\n --mapsize-aggregated-stacks \n max number of unique stacks after aggregation\n \n [default: 10000]\n\n --mapsize-rate-limits \n max number of rate limit entries\n \n [default: 5000]\n\n --exclude-self\n Do not profile the profiler (myself)\n\n --symbolizer \n [default: local]\n [possible values: local, none]\n\n --debug-info-backend \n [default: none]\n [possible values: none, copy, remote]\n\n -h, --help\n Print help (see a summary with '-h')\n""#); + insta::assert_yaml_snapshot!(actual, @r#""Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids \n Specific PIDs to profile\n\n --tids \n Specific TIDs to profile (these can be outside the PIDs selected above)\n\n --show-unwind-info \n Show unwind info for given binary\n\n --show-info \n Show build ID for given binary\n\n -D, --duration \n How long this agent will run in seconds\n \n [default: 18446744073709551615]\n\n --libbpf-debug\n Enable libbpf logs. This includes the BPF verifier output\n\n --bpf-logging\n Enable BPF programs logging\n\n --logging \n Set lightswitch's logging level\n \n [default: info]\n [possible values: trace, debug, info, warn, error]\n\n --sample-freq \n Per-CPU Sampling Frequency in Hz\n \n [default: 19]\n\n --profile-format \n Output file for Flame Graph in SVG format\n \n [default: flame-graph]\n [possible values: none, flame-graph, pprof]\n\n --profile-path \n Path for the generated profile\n\n --profile-name \n Name for the generated profile\n\n --sender \n Where to write the profile\n \n [default: local-disk]\n\n Possible values:\n - none: Discard the profile. Used for kernel tests\n - local-disk\n - remote\n\n --server-url \n \n\n --perf-buffer-bytes \n Size of each profiler perf buffer, in bytes (must be a power of 2)\n \n [default: 524288]\n\n --mapsize-info\n Print eBPF map sizes after creation\n\n --mapsize-stacks \n max number of individual stacks to capture before aggregation\n \n [default: 100000]\n\n --mapsize-aggregated-stacks \n max number of unique stacks after aggregation\n \n [default: 10000]\n\n --mapsize-rate-limits \n max number of rate limit entries\n \n [default: 5000]\n\n --exclude-self\n Do not profile the profiler (myself)\n\n --symbolizer \n [default: local]\n [possible values: local, none]\n\n --debug-info-backend \n [default: none]\n [possible values: none, copy, remote]\n\n --max-native-unwind-info-size-mb \n approximate max size in megabytes used for the BPF maps that hold unwind information\n \n [default: 2147483647]\n\n --enable-deadlock-detector\n enable parking_lot's deadlock detector\n\n -h, --help\n Print help (see a summary with '-h')\n""#); } #[rstest] diff --git a/src/profiler.rs b/src/profiler.rs index 4da224f..a0e098f 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -1,4 +1,5 @@ use libbpf_rs::OpenObject; +use parking_lot::RwLock; use std::collections::hash_map::Entry; use std::collections::hash_map::OccupiedEntry; use std::collections::HashMap; @@ -9,7 +10,7 @@ use std::mem::MaybeUninit; use std::os::fd::{AsFd, AsRawFd}; use std::os::unix::fs::FileExt; use std::path::PathBuf; -use std::sync::{Arc, RwLock}; +use std::sync::Arc; use std::thread; use std::time::{Duration, Instant}; @@ -52,11 +53,15 @@ pub enum TracerEvent { pub struct KnownExecutableInfo { bucket_id: u32, + unwind_info_start_address: u64, + unwind_info_end_address: u64, + last_used: Instant, } pub struct NativeUnwindState { known_executables: HashMap, unwind_info_bucket_usage: Vec, + last_eviction: Instant, } impl NativeUnwindState { @@ -64,6 +69,7 @@ impl NativeUnwindState { NativeUnwindState { known_executables: HashMap::new(), unwind_info_bucket_usage: vec![0; len], + last_eviction: Instant::now(), } } } @@ -106,6 +112,10 @@ pub struct Profiler { native_unwind_info_bucket_sizes: Vec, /// Deals with debug information debug_info_manager: Box, + /// Maximum size of BPF unwind information maps. A higher value will result in + /// evictions which might reduce the quality of the profiles and in more work + /// for the profiler. + max_native_unwind_info_size_mb: i32, } pub struct ProfilerConfig { @@ -122,6 +132,7 @@ pub struct ProfilerConfig { pub exclude_self: bool, pub native_unwind_info_bucket_sizes: Vec, pub debug_info_manager: Box, + pub max_native_unwind_info_size_mb: i32, } impl Default for ProfilerConfig { @@ -143,6 +154,7 @@ impl Default for ProfilerConfig { 2_560_000, 3_840_000, 5_120_000, 7_680_000, ], debug_info_manager: Box::new(DebugInfoBackendNull {}), + max_native_unwind_info_size_mb: i32::MAX, } } } @@ -367,6 +379,7 @@ impl Profiler { exclude_self: profiler_config.exclude_self, native_unwind_info_bucket_sizes: profiler_config.native_unwind_info_bucket_sizes, debug_info_manager: profiler_config.debug_info_manager, + max_native_unwind_info_size_mb: profiler_config.max_native_unwind_info_size_mb, } } @@ -445,11 +458,10 @@ impl Profiler { thread::spawn(move || loop { match profile_receive.recv() { Ok(profile) => { - collector.lock().unwrap().collect( - profile, - &procs.read().unwrap(), - &object_files.read().unwrap(), - ); + collector + .lock() + .unwrap() + .collect(profile, &procs.read(), &object_files.read()); } Err(_e) => { // println!("failed to receive event {:?}", e); @@ -502,6 +514,8 @@ impl Profiler { // .maps() // .rate_limits() // .delete(unsafe { plain::as_bytes(&event) }); + } else if event.type_ == event_type_EVENT_NEED_UNWIND_INFO { + self.event_need_unwind_info(event.pid, event.address); } else { error!("unknown event type {}", event.type_); } @@ -516,7 +530,7 @@ impl Profiler { pub fn handle_process_exit(&mut self, pid: Pid) { // TODO: remove ratelimits for this process. - let mut procs = self.procs.write().expect("lock"); + let mut procs = self.procs.write(); match procs.get_mut(&pid) { Some(proc_info) => { debug!("marking process {} as exited", pid); @@ -526,7 +540,7 @@ impl Profiler { let _ = Self::delete_bpf_process(&self.native_unwinder, pid); for mapping in &mut proc_info.mappings.0 { - let mut object_files = self.object_files.write().expect("lock"); + let mut object_files = self.object_files.write(); if mapping.mark_as_deleted(&mut object_files) { if let Entry::Occupied(entry) = self .native_unwind_state @@ -551,14 +565,14 @@ impl Profiler { } pub fn handle_munmap(&mut self, pid: Pid, start_address: u64) { - let mut procs = self.procs.write().expect("lock"); + let mut procs = self.procs.write(); match procs.get_mut(&pid) { Some(proc_info) => { for mapping in &mut proc_info.mappings.0 { if mapping.start_addr <= start_address && start_address <= mapping.end_addr { debug!("found memory mapping starting at {:x} for pid {} while handling munmap", start_address, pid); - let mut object_files = self.object_files.write().expect("lock"); + let mut object_files = self.object_files.write(); if mapping.mark_as_deleted(&mut object_files) { if let Entry::Occupied(entry) = self .native_unwind_state @@ -624,6 +638,63 @@ impl Profiler { ); } + /// Accounts what executables got used last. This is needed know what unwind information + /// to evict. + pub fn bump_executable_stats(&mut self, raw_samples: &[RawAggregatedSample]) { + for raw_sample in raw_samples { + let pid = raw_sample.pid; + let ustack = raw_sample.ustack; + let Some(ustack) = ustack else { + continue; + }; + + for (i, addr) in ustack.addresses.into_iter().enumerate() { + if ustack.len <= i.try_into().unwrap() { + break; + } + + let mapping = self + .procs + .read() + .get(&pid) + .unwrap() + .mappings + .for_address(addr); + if let Some(mapping) = mapping { + if let Some(executable) = self + .native_unwind_state + .known_executables + .get_mut(&mapping.executable_id) + { + executable.last_used = Instant::now(); + } + } + } + } + } + + /// Returns the executables, optionally filtered by a bucket, and sorted by when they + /// were used last. + pub fn last_used_executables( + &self, + bucket_id: Option, + ) -> Vec<(ExecutableId, &KnownExecutableInfo)> { + let mut last_used_executable_ids = Vec::new(); + + for (executable_id, executable_info) in &self.native_unwind_state.known_executables { + if let Some(bucket_id) = bucket_id { + if bucket_id != executable_info.bucket_id { + continue; + } + } + + last_used_executable_ids.push((*executable_id, executable_info)); + } + + last_used_executable_ids.sort_by(|a, b| a.1.last_used.cmp(&b.1.last_used)); + last_used_executable_ids + } + /// Collect the BPF unwinder statistics and aggregate the per CPU values. pub fn collect_unwinder_stats(&self) { for key in self.native_unwinder.maps.percpu_stats.keys() { @@ -743,6 +814,7 @@ impl Profiler { debug!("===== got {} unique stacks", all_stacks_bytes.len()); + self.bump_executable_stats(&result); self.collect_unwinder_stats(); self.clear_maps(); self.setup_perf_events(); @@ -750,7 +822,7 @@ impl Profiler { } fn process_is_known(&self, pid: Pid) -> bool { - self.procs.read().expect("lock").get(&pid).is_some() + self.procs.read().get(&pid).is_some() } fn add_bpf_unwind_info(inner: &MapHandle, unwind_info: &[CompactUnwindRow]) { @@ -837,10 +909,13 @@ impl Profiler { // TODO: ensure that at least one entry can be removed. Some might fail as // we prefer to not have to re-read the unwind information and we might attempt // deleting entries that are not present. - let _ = bpf + let ret = bpf .maps .executable_to_page .delete(unsafe { plain::as_bytes(&key) }); + if ret.is_err() { + error!("failed removing BPF pages"); + } } } @@ -941,8 +1016,8 @@ impl Profiler { Self::delete_bpf_pages( native_unwinder, - mapping.start_addr, - mapping.end_addr, + entry.get().unwind_info_start_address, + entry.get().unwind_info_end_address, mapping.executable_id, ); @@ -953,7 +1028,7 @@ impl Profiler { unwind_info_bucket_usage, ); if res.is_err() { - info!("deleting the BPF unwind info array failed with {:?}", res); + error!("deleting the BPF unwind info array failed with {:?}", res); } // The object file (`object_files`) is not removed here as we still need it for @@ -961,10 +1036,12 @@ impl Profiler { entry.remove_entry(); } - fn is_bucket_full(unwind_info_bucket_usage: &[usize], bucket_id: usize) -> bool { - unwind_info_bucket_usage[bucket_id] >= MAX_OUTER_UNWIND_MAP_ENTRIES as usize + /// Returns whether an unwind information bucket is full. + fn is_bucket_full(unwind_info_bucket_usage: &[usize], bucket_id: u32) -> bool { + unwind_info_bucket_usage[bucket_id as usize] >= MAX_OUTER_UNWIND_MAP_ENTRIES as usize } + /// Returns the bucket_id and bucket size for a some unwind information. fn bucket_for_unwind_info( unwind_info_len: usize, native_unwind_info_bucket_sizes: &[u32], @@ -979,7 +1056,24 @@ impl Profiler { None } - fn insert_unwind_info_map( + /// Returns the approximate size in megabytes of the BPF unwind maps. + fn unwind_info_memory_usage( + native_unwind_info_bucket_sizes: &[u32], + unwind_info_bucket_usage: &[usize], + ) -> u32 { + let mut total_mb = 0; + + for (bucket_size, bucket_usage) in native_unwind_info_bucket_sizes + .iter() + .zip(unwind_info_bucket_usage) + { + total_mb += Self::unwind_info_size_mb(*bucket_size) * *bucket_usage as u32; + } + + total_mb + } + + fn create_and_insert_unwind_info_map( bpf: &mut ProfilerSkel, executable_id: u64, unwind_info_len: usize, @@ -1021,7 +1115,7 @@ impl Profiler { } } - fn add_unwind_info(&mut self, pid: Pid) { + fn add_unwind_info_for_process(&mut self, pid: Pid) { if !self.process_is_known(pid) { panic!("add_unwind_info -- expected process to be known"); } @@ -1033,7 +1127,6 @@ impl Profiler { .procs .clone() .read() - .expect("lock") .get(&pid) .unwrap() .mappings @@ -1057,7 +1150,7 @@ impl Profiler { panic!("build id should be present for file backed mappings"); } - let object_file = self.object_files.read().unwrap(); + let object_file = self.object_files.read(); // We might know about a mapping that failed to open for some reason. let object_file_info = object_file.get(&mapping.executable_id); if object_file_info.is_none() { @@ -1078,6 +1171,7 @@ impl Profiler { } else { load_address = mapping.load_address; } + std::mem::drop(object_file); match self .native_unwind_state @@ -1085,7 +1179,7 @@ impl Profiler { .get(&mapping.executable_id) { Some(_) => { - // == Add mapping + // Add mapping. bpf_mappings.push(mapping_t { executable_id: mapping.executable_id, load_address, @@ -1105,7 +1199,7 @@ impl Profiler { } } - // == Add mapping + // Add mapping. bpf_mappings.push(mapping_t { load_address, begin: mapping.start_addr, @@ -1118,116 +1212,223 @@ impl Profiler { }, }); - let object_files = self.object_files.read().unwrap(); - let executable = object_files.get(&mapping.executable_id).unwrap(); - let executable_path = executable.open_file_path(); - - // == Fetch unwind info, so far, this is in mem - // todo, pass file handle - let span = span!( - Level::DEBUG, - "calling in_memory_unwind_info", - "{}", - executable.path.to_string_lossy() - ) - .entered(); - - let found_unwind_info: Vec = - match compact_unwind_info(&executable_path.to_string_lossy()) { - Ok(unwind_info) => unwind_info, - Err(e) => { - let executable_path_str = executable.path.to_string_lossy(); - let known_naughty = executable_path_str.contains("libicudata"); - - // tracing doesn't support a level chosen at runtime: https://github.com/tokio-rs/tracing/issues/2730 - if known_naughty { - debug!( - "failed to get unwind information for {} with {}", - executable_path_str, e - ); - } else { - info!( - "failed to get unwind information for {} with {}", - executable_path_str, e - ); + // Fetch unwind info and store it in in BPF maps. + self.add_unwind_information_for_executable(mapping.executable_id); + } - if let Err(e) = log_unwind_info_sections(&executable_path) { - warn!("log_unwind_info_sections failed with {}", e); - } + // Store all mappings in BPF maps. + if let Err(e) = Self::add_bpf_mappings(&self.native_unwinder, pid, &bpf_mappings) { + warn!("failed to add BPF mappings due to {:?}", e); + } + // Add entry just with the pid to signal processes that we already know about. + if let Err(e) = Self::add_bpf_process(&self.native_unwinder, pid) { + warn!("failed to add BPF process due to {:?}", e); + } + } + + /// Returns the approximate size in megabytes of _n_ rows of unwind information + /// in a BPF map. + fn unwind_info_size_mb(unwind_info_len: u32) -> u32 { + let overhead = 1.02; // Account for internal overhead of the BPF maps + ((unwind_info_len * 8 * 8) as f64 * overhead / 1e+6) as u32 + } + + fn add_unwind_information_for_executable(&mut self, executable_id: ExecutableId) { + let object_files = self.object_files.read(); + let executable_info = object_files.get(&executable_id).unwrap(); + let executable_path_open = executable_info.open_file_path(); + let executable_path = executable_info.path.to_string_lossy().to_string(); + std::mem::drop(object_files); + + let span = span!( + Level::DEBUG, + "calling in_memory_unwind_info", + "{}", + executable_path + ) + .entered(); + + let unwind_info: Vec = + match compact_unwind_info(&executable_path_open.to_string_lossy()) { + Ok(unwind_info) => unwind_info, + Err(e) => { + let executable_path_str = executable_path; + let known_naughty = executable_path_str.contains("libicudata"); + + // tracing doesn't support a level chosen at runtime: https://github.com/tokio-rs/tracing/issues/2730 + if known_naughty { + debug!( + "failed to get unwind information for {} with {}", + executable_path_str, e + ); + } else { + info!( + "failed to get unwind information for {} with {}", + executable_path_str, e + ); + + if let Err(e) = log_unwind_info_sections(&executable_path_open) { + warn!("log_unwind_info_sections failed with {}", e); } - continue; } - }; - span.exit(); - - // Evicting object files can get complicated real quick... this can be implemented once - // we add support for on-demand unwind info generation when we spot a code area that - // we don't know about yet. - if let Some((bucket_id, _)) = Self::bucket_for_unwind_info( - found_unwind_info.len(), - &self.native_unwind_info_bucket_sizes, - ) { - if Self::is_bucket_full( - &self.native_unwind_state.unwind_info_bucket_usage, - bucket_id as usize, - ) { - warn!( - "unwind info bucket for {} is full, pid {} won't be profiled properly", - executable.path.to_string_lossy(), - pid - ); - // Here we could undo all work done so far. return; } - } + }; + span.exit(); + + let bucket = + Self::bucket_for_unwind_info(unwind_info.len(), &self.native_unwind_info_bucket_sizes); - let inner_map_and_id = Self::insert_unwind_info_map( - &mut self.native_unwinder, - mapping.executable_id, - found_unwind_info.len(), - &self.native_unwind_info_bucket_sizes, - &mut self.native_unwind_state.unwind_info_bucket_usage, + let Some((bucket_id, _)) = bucket else { + warn!( + "unwind information too big for executable {} ({} unwind rows)", + executable_path, + unwind_info.len() ); + return; + }; - // Add all the unwind information. - match inner_map_and_id { - Some((inner, bucket_id)) => { - Self::add_bpf_unwind_info(&inner, &found_unwind_info); - Self::add_bpf_pages( - &self.native_unwinder, - &found_unwind_info, - mapping.executable_id, + if !self.maybe_evict_executables(bucket_id, self.max_native_unwind_info_size_mb) { + return; + } + + let inner_map_and_id = Self::create_and_insert_unwind_info_map( + &mut self.native_unwinder, + executable_id, + unwind_info.len(), + &self.native_unwind_info_bucket_sizes, + &mut self.native_unwind_state.unwind_info_bucket_usage, + ); + + // Add all unwind information and its pages. + match inner_map_and_id { + Some((inner, bucket_id)) => { + Self::add_bpf_unwind_info(&inner, &unwind_info); + Self::add_bpf_pages( + &self.native_unwinder, + &unwind_info, + executable_id, + bucket_id, + ); + let unwind_info_start_address = unwind_info.first().unwrap().pc; + let unwind_info_end_address = unwind_info.last().unwrap().pc; + self.native_unwind_state.known_executables.insert( + executable_id, + KnownExecutableInfo { bucket_id, - ); - self.native_unwind_state - .known_executables - .insert(mapping.executable_id, KnownExecutableInfo { bucket_id }); - } - None => { - warn!( - "unwind information too big for executable {} ({} unwind rows)", - obj_path.display(), - found_unwind_info.len() - ); - } + unwind_info_start_address, + unwind_info_end_address, + last_used: Instant::now(), + }, + ); + } + None => { + warn!( + "unwind information too big for executable {} ({} unwind rows)", + executable_path, + unwind_info.len() + ); } + } + + debug!( + "Unwind rows for executable {}: {}", + executable_path, + &unwind_info.len(), + ); + } + + /// Evict executables if a bucket is full or if the max memory is exceeded. Note that + /// the memory accounting is approximate. If returns whether the unwind information can + /// be added to added BPF maps. + /// + /// * `bucket_id`: The unwind information bucket where the unwind information will be added. + /// * `max_memory_mb`: The maximum memory that all unwind information should account for in BPF maps. + fn maybe_evict_executables(&mut self, bucket_id: u32, max_memory_mb: i32) -> bool { + let mut executables_to_evict = Vec::new(); + + // Check if bucket is full. + if Self::is_bucket_full( + &self.native_unwind_state.unwind_info_bucket_usage, + bucket_id, + ) { + debug!("unwind info bucket for is full",); + let last_used = self.last_used_executables(Some(bucket_id)); + let last_used_ids: Vec<_> = last_used.iter().map(|el| el.0).collect(); + let last_used_id = last_used_ids + .first() + .expect("should contain at least one element"); + + executables_to_evict.push(*last_used_id); + } + + // Check if this executable unwind info would exceed the approximate memory limit. + let total_memory_used_mb = Self::unwind_info_memory_usage( + &self.native_unwind_info_bucket_sizes, + &self.native_unwind_state.unwind_info_bucket_usage, + ); + + let this_unwind_info_mb = + Self::unwind_info_size_mb(self.native_unwind_info_bucket_sizes[bucket_id as usize]); + let total_memory_used_after_mb = total_memory_used_mb + this_unwind_info_mb; + let to_free_mb = std::cmp::max(0, total_memory_used_after_mb as i32 - max_memory_mb) as u32; + + let should_evict = !executables_to_evict.is_empty() || to_free_mb != 0; + let cant_evict = + self.native_unwind_state.last_eviction.elapsed() < std::time::Duration::from_secs(5); - debug!( - "======== Unwind rows for executable {}: {} with id {}", - obj_path.display(), - &found_unwind_info.len(), - self.native_unwind_state.known_executables.len(), + // Do not evict unwind information too often. + if should_evict && cant_evict { + return false; + } + + // Figure out what are the unwind info we should evict to stay below the memory limit. + let mut could_be_freed_mb = 0; + for (executable_id, executable_info) in self.last_used_executables(None) { + let unwind_size_mb = Self::unwind_info_size_mb( + self.native_unwind_info_bucket_sizes[executable_info.bucket_id as usize], ); - } // Added all mappings + if could_be_freed_mb >= to_free_mb { + break; + } - // Add mappings to BPF maps. - if let Err(e) = Self::add_bpf_mappings(&self.native_unwinder, pid, &bpf_mappings) { - warn!("failed to add BPF mappings due to {:?}", e); + could_be_freed_mb += unwind_size_mb; + executables_to_evict.push(executable_id); } - // Add entry just with the pid to signal processes that we already know about. - if let Err(e) = Self::add_bpf_process(&self.native_unwinder, pid) { - warn!("failed to add BPF process due to {:?}", e); + + debug!( + "evicting unwind info for {} executables", + executables_to_evict.len() + ); + for executable_id in executables_to_evict { + let entry = self + .native_unwind_state + .known_executables + .entry(executable_id); + if let Entry::Occupied(entry) = entry { + Self::delete_bpf_pages( + &self.native_unwinder, + entry.get().unwind_info_start_address, + entry.get().unwind_info_end_address, + executable_id, + ); + + let ret = Self::delete_bpf_unwind_info_map( + &mut self.native_unwinder, + entry.get().bucket_id, + executable_id, + &mut self.native_unwind_state.unwind_info_bucket_usage, + ); + if ret.is_err() { + debug!("failed to evict unwind info map with {:?}", ret); + } + entry.remove_entry(); + } + + self.native_unwind_state.last_eviction = Instant::now(); } + + true } fn should_profile(&self, pid: Pid) -> bool { @@ -1249,13 +1450,13 @@ impl Profiler { if self.process_is_known(pid) { // We hit this when we had to reset the state of the BPF maps but we know about this process. - self.add_unwind_info(pid); + self.add_unwind_info_for_process(pid); return; } match self.add_proc(pid) { Ok(()) => { - self.add_unwind_info(pid); + self.add_unwind_info_for_process(pid); } Err(_e) => { // probabaly a procfs race @@ -1263,6 +1464,26 @@ impl Profiler { } } + fn event_need_unwind_info(&mut self, pid: Pid, address: u64) { + let procs = self.procs.read(); + let proc_info = procs.get(&pid); + let Some(proc_info) = proc_info else { + return; + }; + + let executable_id = if let Some(mapping) = proc_info.mappings.for_address(address) { + Some(mapping.executable_id) + } else { + info!("event_need_unwind_info, mapping not known"); + None + }; + std::mem::drop(procs); + + if let Some(executable_id) = executable_id { + self.add_unwind_information_for_executable(executable_id); + } + } + pub fn add_proc(&mut self, pid: Pid) -> anyhow::Result<()> { let proc = procfs::process::Process::new(pid)?; let maps = proc.maps()?; @@ -1341,8 +1562,8 @@ impl Profiler { map.address.0 }; - let mut object_files = object_files_clone.write().expect("lock object_files"); let main_exec = mappings.is_empty(); + let mut object_files = object_files_clone.write(); mappings.push(ExecutableMapping { executable_id, @@ -1420,7 +1641,7 @@ impl Profiler { if let Ok((vdso_path, object_file)) = fetch_vdso_info(pid, map.address.0, map.address.1, map.offset) { - let mut object_files = object_files_clone.write().expect("lock"); + let mut object_files = object_files_clone.write(); let Ok(executable_id) = object_file.id() else { debug!("vDSO object file id failed"); continue; @@ -1472,18 +1693,15 @@ impl Profiler { status: ProcessStatus::Running, mappings: ExecutableMappings(mappings), }; - self.procs - .clone() - .write() - .expect("lock") - .insert(pid, proc_info); + self.procs.clone().write().insert(pid, proc_info); Ok(()) } fn handle_event(sender: &Arc>, data: &[u8]) { - let event = plain::from_bytes(data).expect("handle event serde"); - sender.send(*event).expect("handle event send"); + let mut event = Event::default(); + plain::copy_from_bytes(&mut event, data).expect("handle event serde"); + sender.send(event).expect("handle event send"); } fn handle_lost_events(cpu: i32, count: u64) { From 985c193c265f7a940c64c12e0d0a29d1034e2b4e Mon Sep 17 00:00:00 2001 From: Francisco Javier Honduvilla Coto Date: Mon, 6 Jan 2025 10:09:24 +0000 Subject: [PATCH 5/7] Do not crash while being debugged Make lighswitch debuggable by `ptrace(2)` based tools by retrying `EINTR` during poll rather than panicking. GDB and strace can now be used without crashes. --- src/profiler.rs | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/profiler.rs b/src/profiler.rs index a0e098f..0344586 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -20,6 +20,7 @@ use itertools::Itertools; use libbpf_rs::num_possible_cpus; use libbpf_rs::skel::SkelBuilder; use libbpf_rs::skel::{OpenSkel, Skel}; +use libbpf_rs::ErrorKind; use libbpf_rs::MapCore; use libbpf_rs::MapHandle; use libbpf_rs::MapType; @@ -408,7 +409,7 @@ impl Profiler { self.tracers.attach().expect("attach tracers"); - // New process events. + // Unwinder events. let chan_send = self.new_proc_chan_send.clone(); let perf_buffer = PerfBufferBuilder::new(&self.native_unwinder.maps.events) .pages(self.perf_buffer_bytes / page_size::get()) @@ -417,15 +418,21 @@ impl Profiler { }) .lost_cb(Self::handle_lost_events) .build() - // TODO: Instead of unwrap, consume and emit any error, with - // .expect() perhaps? - .unwrap(); - - let _poll_thread = thread::spawn(move || loop { - perf_buffer.poll(Duration::from_millis(100)).expect("poll"); + .expect("set up perf buffer for unwinder events"); + + let _unwinder_poll_thread = thread::spawn(move || loop { + match perf_buffer.poll(Duration::from_millis(100)) { + Ok(_) => {} + Err(err) => { + if err.kind() != ErrorKind::Interrupted { + error!("polling events perf buffer failed with {:?}", err); + break; + } + } + } }); - // Trace events are received here, such as memory unmaps. + // Tracer events. let tracers_send = self.tracers_chan_send.clone(); let tracers_events_perf_buffer = PerfBufferBuilder::new(&self.tracers.maps.tracer_events) .pages(self.perf_buffer_bytes / page_size::get()) @@ -440,14 +447,18 @@ impl Profiler { warn!("lost {} events from the tracers", lost_count); }) .build() - // TODO: Instead of unwrap, consume and emit any error, with - // .expect() perhaps? - .unwrap(); + .expect("set up perf buffer for tracer events"); let _tracers_poll_thread = thread::spawn(move || loop { - tracers_events_perf_buffer - .poll(Duration::from_millis(100)) - .expect("poll"); + match tracers_events_perf_buffer.poll(Duration::from_millis(100)) { + Ok(_) => {} + Err(err) => { + if err.kind() != ErrorKind::Interrupted { + error!("polling tracers perf buffer failed with {:?}", err); + break; + } + } + } }); let profile_receive = self.profile_receive.clone(); From 5926f46cfcf2e959aeca2b5ea611204baf1ec7fa Mon Sep 17 00:00:00 2001 From: Francisco Javier Honduvilla Coto Date: Tue, 7 Jan 2025 11:18:57 +0000 Subject: [PATCH 6/7] Always check if the unwind information has already been loaded in BPF maps This was only done in one of the code paths --- src/profiler.rs | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/src/profiler.rs b/src/profiler.rs index 0344586..ad1969d 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -73,6 +73,11 @@ impl NativeUnwindState { last_eviction: Instant::now(), } } + + /// Checks whether the given `executable_id` is loaded in the BPF maps. + fn is_known(&self, executable_id: ExecutableId) -> bool { + self.known_executables.contains_key(&executable_id) + } } pub struct Profiler { @@ -1169,7 +1174,6 @@ impl Profiler { continue; } let object_file_info = object_file_info.unwrap(); - let obj_path = object_file_info.path.clone(); // TODO: rework this logic as it's quite kludgy at the moment and this is broken with // some loaders. Particularly, Rust statically linked with musl does not work. We must @@ -1184,32 +1188,6 @@ impl Profiler { } std::mem::drop(object_file); - match self - .native_unwind_state - .known_executables - .get(&mapping.executable_id) - { - Some(_) => { - // Add mapping. - bpf_mappings.push(mapping_t { - executable_id: mapping.executable_id, - load_address, - begin: mapping.start_addr, - end: mapping.end_addr, - type_: if mapping.kind == ExecutableMappingType::Vdso { - MAPPING_TYPE_VDSO - } else { - MAPPING_TYPE_FILE - }, - }); - debug!("unwind info CACHED for executable {:?}", obj_path); - continue; - } - None => { - debug!("unwind info not found for executable {:?}", obj_path); - } - } - // Add mapping. bpf_mappings.push(mapping_t { load_address, @@ -1245,6 +1223,16 @@ impl Profiler { } fn add_unwind_information_for_executable(&mut self, executable_id: ExecutableId) { + if self.native_unwind_state.is_known(executable_id) { + debug!("unwind info CACHED for executable id: {:x}", executable_id); + return; + } else { + debug!( + "unwind info not found for executable id: {:x}", + executable_id + ); + } + let object_files = self.object_files.read(); let executable_info = object_files.get(&executable_id).unwrap(); let executable_path_open = executable_info.open_file_path(); From 1cd2739b137dcd6937213226e3fc673f35c24d05 Mon Sep 17 00:00:00 2001 From: Javier Honduvilla Coto Date: Wed, 8 Jan 2025 15:48:45 +0000 Subject: [PATCH 7/7] Implement unwind information persistance (#128) * Implement unwind information persistance This is not used anywhere yet, see commits after this one. Test Plan ========= Added tests. * feedback --- Cargo.lock | 1 + Cargo.toml | 2 + lightswitch-object/Cargo.toml | 2 +- src/unwind_info/mod.rs | 1 + src/unwind_info/pages.rs | 3 +- src/unwind_info/persist.rs | 282 ++++++++++++++++++++++++++++++++++ src/unwind_info/types.rs | 4 + 7 files changed, 293 insertions(+), 2 deletions(-) create mode 100644 src/unwind_info/persist.rs diff --git a/Cargo.lock b/Cargo.lock index 22f720c..ba47d70 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1380,6 +1380,7 @@ dependencies = [ "prost", "rand", "reqwest", + "ring", "rstest", "tempfile", "thiserror 2.0.3", diff --git a/Cargo.toml b/Cargo.toml index 6503fd9..297efe1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ rand = "0.8.5" # workspace build dependencies have to be defined here libbpf-cargo = { version = "0.25.0-beta.0" } glob = "0.3.1" +ring = "0.17.8" [dependencies] gimli = "0.31.1" @@ -62,6 +63,7 @@ errno = { workspace = true } procfs = { workspace = true } nix = { workspace = true, features = ["user"] } parking_lot = { version = "0.12.3", features = ["deadlock_detection"] } +ring = { workspace = true } [dev-dependencies] assert_cmd = { version = "2.0.16" } diff --git a/lightswitch-object/Cargo.toml b/lightswitch-object/Cargo.toml index 3ce2673..9bbc92b 100644 --- a/lightswitch-object/Cargo.toml +++ b/lightswitch-object/Cargo.toml @@ -8,7 +8,7 @@ repository = "https://github.com/javierhonduco/lightswitch" [dependencies] data-encoding = "2.6.0" -ring = "0.17.8" +ring = { workspace = true } memmap2 = { workspace = true } object = { workspace = true } anyhow = { workspace = true } diff --git a/src/unwind_info/mod.rs b/src/unwind_info/mod.rs index ae0a9e9..a7085f8 100644 --- a/src/unwind_info/mod.rs +++ b/src/unwind_info/mod.rs @@ -1,6 +1,7 @@ mod convert; mod optimize; pub mod pages; +pub mod persist; pub mod types; pub use convert::compact_unwind_info; diff --git a/src/unwind_info/pages.rs b/src/unwind_info/pages.rs index 881aef1..03d50b0 100644 --- a/src/unwind_info/pages.rs +++ b/src/unwind_info/pages.rs @@ -130,7 +130,8 @@ mod tests { let search_here = &unwind_info[(found.index as usize)..(found.len as usize)]; let found_row = search_here.iter().find(|el| el.pc == pc).unwrap(); // And that the high and low bits were done ok - assert_eq!((found_row.pc & low_bits_mask) + pc_high, found_row.pc); + let pc = found_row.pc; + assert_eq!((pc & low_bits_mask) + pc_high, pc); } } } diff --git a/src/unwind_info/persist.rs b/src/unwind_info/persist.rs new file mode 100644 index 0000000..98dec09 --- /dev/null +++ b/src/unwind_info/persist.rs @@ -0,0 +1,282 @@ +#![allow(dead_code)] +use plain::Plain; +use ring::digest::{Context, SHA256}; +use std::io::Read; +use std::io::Seek; +use std::io::SeekFrom; +use std::io::Write; +use std::path::Path; +use std::path::PathBuf; + +use crate::unwind_info::compact_unwind_info; +use crate::unwind_info::types::CompactUnwindRow; + +// To identify this binary file type. +const MAGIC_NUMBER: u32 = 0x1357531; +// Any changes to the ABI / digest must bump the version. +const VERSION: u32 = 1; + +type UnwindInformationDigest = u64; + +#[derive(Debug, Default)] +#[repr(C, packed)] +struct Header { + magic: u32, + version: u32, + // To ensure that the unwind information we are reading is not + // corrupted in any way we compute a hash of the unwind information + // that is checked on the read path. + unwind_info_digest: UnwindInformationDigest, + unwind_info_len: u64, +} + +/// SAFETY: Using packed C representation, which plain needs, and there is +/// the extra safety layer of the unwind information digest checked in the +/// read path, in case the data is corrupted. +unsafe impl Plain for Header {} +/// SAFETY: Using packed C representation, which plain needs, and there is +/// the extra safety layer of the unwind information digest checked in the +/// read path, in case the data is corrupted. +unsafe impl Plain for CompactUnwindRow {} + +/// Writes compact information to a given writer. +struct Writer { + executable_path: PathBuf, +} + +impl Writer { + fn new(executable_path: &Path) -> Self { + Writer { + executable_path: executable_path.to_path_buf(), + } + } + + fn write(self, writer: &mut W) -> anyhow::Result<()> { + let unwind_info = self.read_unwind_info()?; + // Write dummy header. + self.write_header(writer, 0, None)?; + let digest = self.write_unwind_info(writer, &unwind_info)?; + // Write real header. + writer.seek(SeekFrom::Start(0))?; + self.write_header(writer, unwind_info.len(), Some(digest))?; + Ok(()) + } + + fn read_unwind_info(&self) -> anyhow::Result> { + compact_unwind_info(&self.executable_path.to_string_lossy()) + } + + fn write_header( + &self, + writer: &mut impl Write, + unwind_info_len: usize, + digest: Option, + ) -> anyhow::Result<()> { + let header = Header { + magic: MAGIC_NUMBER, + version: VERSION, + unwind_info_digest: digest.unwrap_or(0), + unwind_info_len: unwind_info_len.try_into()?, + }; + writer.write_all(unsafe { plain::as_bytes(&header) })?; + Ok(()) + } + + fn write_unwind_info( + &self, + writer: &mut impl Write, + unwind_info: &[CompactUnwindRow], + ) -> anyhow::Result { + let mut context = Context::new(&SHA256); + + for unwind_row in unwind_info { + let unwind_row_data = unsafe { plain::as_bytes(unwind_row) }; + context.update(unwind_row_data); + writer.write_all(unwind_row_data)?; + } + + let mut buffer = [0; 8]; + let _ = context.finish().as_ref().read(&mut buffer)?; + + Ok(u64::from_ne_bytes(buffer)) + } +} + +#[derive(Debug, thiserror::Error, PartialEq, Eq)] +pub enum ReaderError { + #[error("magic number does not match")] + MagicNumber, + #[error("version is not compatible")] + Version, + #[error("generic error: {0}")] + Generic(String), + #[error("index out of range")] + OutOfRange, + #[error("could not convert between types")] + SizeConversion, + #[error("digest does not match")] + Digest, +} + +/// Reads compact information of a bytes slice. +struct Reader<'a> { + header: Header, + data: &'a [u8], +} + +impl<'a> Reader<'a> { + pub fn new(data: &'a [u8]) -> Result { + let header = Self::parse_header(data)?; + Ok(Reader { header, data }) + } + + fn parse_header(data: &[u8]) -> Result { + let header_size = std::mem::size_of::
(); + let mut header = Header::default(); + let header_data = data.get(0..header_size).ok_or(ReaderError::OutOfRange)?; + plain::copy_from_bytes(&mut header, header_data) + .map_err(|e| ReaderError::Generic(format!("{:?}", e)))?; + + if header.magic != MAGIC_NUMBER { + return Err(ReaderError::MagicNumber); + } + + if header.version != VERSION { + return Err(ReaderError::Version); + } + + Ok(header) + } + + pub fn unwind_info(self) -> Result, ReaderError> { + let header_size = std::mem::size_of::
(); + let unwind_row_size = std::mem::size_of::(); + let unwind_info_len: usize = self + .header + .unwind_info_len + .try_into() + .map_err(|_| ReaderError::SizeConversion)?; + + let mut unwind_info = Vec::with_capacity(unwind_info_len); + let mut unwind_row = CompactUnwindRow::default(); + + let unwind_info_data = &self.data[header_size..]; + let mut context = Context::new(&SHA256); + for i in 0..unwind_info_len { + let step = i * unwind_row_size; + let unwind_row_data = unwind_info_data + .get(step..step + unwind_row_size) + .ok_or(ReaderError::OutOfRange)?; + context.update(unwind_row_data); + plain::copy_from_bytes(&mut unwind_row, unwind_row_data) + .map_err(|e| ReaderError::Generic(format!("{:?}", e)))?; + unwind_info.push(unwind_row); + } + + let mut buffer = [0; 8]; + let _ = context + .finish() + .as_ref() + .read(&mut buffer) + .map_err(|e| ReaderError::Generic(e.to_string())); + let digest = u64::from_ne_bytes(buffer); + + if self.header.unwind_info_digest != digest { + return Err(ReaderError::Digest); + } + + Ok(unwind_info) + } +} + +#[cfg(test)] +mod tests { + use std::io::Cursor; + use std::path::PathBuf; + + use super::*; + + #[test] + fn test_write_and_read_unwind_info() { + let mut buffer = Cursor::new(Vec::new()); + let path = PathBuf::from("/proc/self/exe"); + let writer = Writer::new(&path); + assert!(writer.write(&mut buffer).is_ok()); + + let reader = Reader::new(&buffer.get_ref()[..]); + let unwind_info = reader.unwrap().unwind_info(); + assert!(unwind_info.is_ok()); + let unwind_info = unwind_info.unwrap(); + assert_eq!(unwind_info, compact_unwind_info("/proc/self/exe").unwrap()); + } + + #[test] + fn test_bad_magic() { + let mut buffer = Vec::new(); + let header = Header { + magic: 0xBAD, + ..Default::default() + }; + buffer + .write_all(unsafe { plain::as_bytes(&header) }) + .unwrap(); + assert!(matches!( + Reader::new(&buffer), + Err(ReaderError::MagicNumber) + )); + } + + #[test] + fn test_version_mismatch() { + let mut buffer = Vec::new(); + let header = Header { + version: VERSION + 1, + magic: MAGIC_NUMBER, + ..Default::default() + }; + buffer + .write_all(unsafe { plain::as_bytes(&header) }) + .unwrap(); + assert!(matches!(Reader::new(&buffer), Err(ReaderError::Version))); + } + + #[test] + fn test_corrupt_unwind_info() { + let mut buffer: Cursor> = Cursor::new(Vec::new()); + let path = PathBuf::from("/proc/self/exe"); + let writer = Writer::new(&path); + assert!(writer.write(&mut buffer).is_ok()); + + // Corrupt unwind info. + buffer.seek(SeekFrom::End(-10)).unwrap(); + buffer.write_all(&[0, 0, 0, 0, 0, 0, 0]).unwrap(); + + let reader = Reader::new(&buffer.get_ref()[..]); + let unwind_info = reader.unwrap().unwind_info(); + assert!(matches!(unwind_info, Err(ReaderError::Digest))); + } + + #[test] + fn test_header_too_small() { + let buffer = Vec::new(); + assert!(matches!(Reader::new(&buffer), Err(ReaderError::OutOfRange))); + } + + #[test] + fn test_unwind_info_too_small() { + let mut buffer = Vec::new(); + let header = Header { + version: VERSION, + magic: MAGIC_NUMBER, + unwind_info_len: 4, + unwind_info_digest: 0x0, + }; + buffer + .write_all(unsafe { plain::as_bytes(&header) }) + .unwrap(); + assert!(matches!( + Reader::new(&buffer).unwrap().unwind_info(), + Err(ReaderError::OutOfRange) + )); + } +} diff --git a/src/unwind_info/types.rs b/src/unwind_info/types.rs index b39807d..f27d42a 100644 --- a/src/unwind_info/types.rs +++ b/src/unwind_info/types.rs @@ -1,5 +1,8 @@ use lazy_static::lazy_static; +// Important: Any changes to the structures below must bump the file +// version in unwind_info/persist.rs + #[repr(u8)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub enum CfaType { @@ -33,6 +36,7 @@ pub enum PltType { } #[derive(Debug, Default, Copy, Clone, PartialEq)] +#[repr(C, packed)] pub struct CompactUnwindRow { pub pc: u64, pub cfa_type: CfaType,