diff --git a/build.rs b/build.rs index 2a3cae2..c4fc7f2 100644 --- a/build.rs +++ b/build.rs @@ -3,6 +3,7 @@ extern crate bindgen; use std::env; use std::path::PathBuf; +use bindgen::callbacks::{DeriveInfo, ParseCallbacks}; use glob::glob; use libbpf_cargo::SkeletonBuilder; use std::path::Path; @@ -15,6 +16,19 @@ const TRACERS_BPF_HEADER: &str = "./src/bpf/tracers.h"; const TRACERS_BPF_SOURCE: &str = "./src/bpf/tracers.bpf.c"; const TRACERS_SKELETON: &str = "./src/bpf/tracers_skel.rs"; +#[derive(Debug)] +struct CustomParseCallbacks; + +impl ParseCallbacks for CustomParseCallbacks { + fn add_derives(&self, derive_info: &DeriveInfo) -> Vec { + if derive_info.name == "native_stack_t" { + vec!["Hash".into(), "Eq".into(), "PartialEq".into()] + } else { + vec![] + } + } +} + fn main() { // Inform cargo of when to re build for path in glob("src/bpf/*[hc]").unwrap().flatten() { @@ -24,6 +38,7 @@ fn main() { // Main native profiler. let bindings = bindgen::Builder::default() .derive_default(true) + .parse_callbacks(Box::new(CustomParseCallbacks)) .header(PROFILER_BPF_HEADER) .generate() .expect("Unable to generate bindings"); diff --git a/src/collector.rs b/src/collector.rs index c2d5d0d..c8edc29 100644 --- a/src/collector.rs +++ b/src/collector.rs @@ -3,11 +3,9 @@ use std::sync::{Arc, Mutex}; use tracing::{debug, span, Level}; use crate::object::ExecutableId; -use crate::profile::symbolize_profile; -use crate::profiler::ObjectFileInfo; use crate::profiler::ProcessInfo; use crate::profiler::RawAggregatedProfile; -use crate::profiler::SymbolizedAggregatedProfile; +use crate::profiler::{ObjectFileInfo, RawAggregatedSample}; pub struct Collector { profiles: Vec, @@ -53,14 +51,35 @@ impl Collector { } } - pub fn finish(&self) -> Vec { + pub fn finish( + &self, + ) -> ( + RawAggregatedProfile, + &HashMap, + &HashMap, + ) { let _span: span::EnteredSpan = span!(Level::DEBUG, "symbolize_profiles").entered(); - debug!("Collector::finish {}", self.profiles.len()); - let mut r = Vec::new(); + let mut samples_count = HashMap::new(); for profile in &self.profiles { - r.push(symbolize_profile(profile, &self.procs, &self.objs)); + for sample in profile { + let sample_without_count = RawAggregatedSample { + count: 0, + ..*sample + }; + *samples_count.entry(sample_without_count).or_insert(0) += sample.count + } } - r + + debug!("found {} unique samples", samples_count.len()); + let profile = samples_count + .iter() + .map(|(sample, count)| RawAggregatedSample { + count: *count, + ..*sample + }) + .collect(); + + (profile, &self.procs, &self.objs) } } diff --git a/src/main.rs b/src/main.rs index 77ffa0a..6dd8971 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,7 +17,8 @@ use tracing_subscriber::FmtSubscriber; use lightswitch::collector::Collector; use lightswitch::object::ObjectFile; -use lightswitch::profile::fold_profiles; +use lightswitch::profile::fold_profile; +use lightswitch::profile::symbolize_profile; use lightswitch::profiler::Profiler; use lightswitch::unwind_info::in_memory_unwind_info; use lightswitch::unwind_info::remove_redundant; @@ -213,11 +214,13 @@ fn main() -> Result<(), Box> { p.profile_pids(args.pids); p.run(collector.clone()); - let profiles = collector.lock().unwrap().finish(); + let collector = collector.lock().unwrap(); + let (raw_profile, procs, objs) = collector.finish(); + let symbolized_profile = symbolize_profile(&raw_profile, procs, objs); match args.profile_format { ProfileFormat::FlameGraph => { - let folded = fold_profiles(profiles); + let folded = fold_profile(symbolized_profile); let mut options: flamegraph::Options<'_> = flamegraph::Options::default(); let data = folded.as_bytes(); let f = File::create(args.profile_name).unwrap(); diff --git a/src/profile.rs b/src/profile.rs index e227eea..1afb5c1 100644 --- a/src/profile.rs +++ b/src/profile.rs @@ -15,7 +15,7 @@ use crate::profiler::SymbolizedAggregatedProfile; use crate::profiler::SymbolizedAggregatedSample; use crate::usym::symbolize_native_stack_blaze; -/// Converts a collection of symbolized aggregated profiles to their folded representation that most flamegraph renderers use. +/// Converts a symbolized aggregated profiles to their folded representation that most flamegraph renderers use. /// Folded stacks look like this: /// /// > base_frame;other_frame;top_frame 100 @@ -23,102 +23,100 @@ use crate::usym::symbolize_native_stack_blaze; /// /// The frame names are separated by semicolons and the count is at the end separated with a space. We insert some synthetic /// frames to quickly identify the thread and process names and other pieces of metadata. -pub fn fold_profiles(profiles: Vec) -> String { +pub fn fold_profile(profile: SymbolizedAggregatedProfile) -> String { let mut folded = String::new(); - for profile in profiles { - for sample in profile { - let ustack = sample - .ustack - .clone() - .into_iter() - .rev() - .map(|e| e.to_string()) - .collect::>(); - let ustack = ustack.join(";"); - let kstack = sample - .kstack - .clone() - .into_iter() - .rev() - .map(|e| format!("kernel: {}", e)) - .collect::>(); - let kstack = kstack.join(";"); - let count: String = sample.count.to_string(); - - // Getting the meatadata for the stack. This will be abstracted in the future in a common module. - let (process_name, thread_name) = match procfs::process::Process::new(sample.pid) { - // We successfully looked up the PID in procfs (we don't yet - // know if it's a PID/PGID/main thread or a TID/non-main thread) - Ok(p) => match p.stat() { - // Successfully got the pid/tid stat info - Ok(stat) => { - // Differentiate between PID/PGID/main thread or TID/non-main thread - if stat.pid == stat.pgrp { - // NOTE: - // This is the main thread for the PID/PGID - // If stat.pid() == stat.pgrp() for this process, - // this is a stack for the main thread - // of the pid, and stat.comm is the name of the - // process binary file, so use: - // process_name = stat.comm, and thread_name = "main_thread" - (stat.comm, "main_thread".to_string()) - } else { - // NOTE: - // This is a non-main thread (TID) of a PID, so we - // have to look up the actual PID/PGID to get the - // process binary name - // As in, stat.comm is the name of the thread, and - // you have to look up the process binary name, so - // use: - // process_name = , and thread_name = stat.comm - // - let process_name = match procfs::process::Process::new(stat.pgrp) { - // We successfully looked up the PID/PGID of the TID in procfs - Ok(p) => match p.stat() { - // We successfully looked up the PID binary name from stat - Ok(stat2) => stat2.comm, - // We were unable to get the PID's binary name from stat - Err(_) => "".to_string(), - }, - // We failed to look up the PID/PGID of the TID in procfs + for sample in profile { + let ustack = sample + .ustack + .clone() + .into_iter() + .rev() + .map(|e| e.to_string()) + .collect::>(); + let ustack = ustack.join(";"); + let kstack = sample + .kstack + .clone() + .into_iter() + .rev() + .map(|e| format!("kernel: {}", e)) + .collect::>(); + let kstack = kstack.join(";"); + let count: String = sample.count.to_string(); + + // Getting the meatadata for the stack. This will be abstracted in the future in a common module. + let (process_name, thread_name) = match procfs::process::Process::new(sample.pid) { + // We successfully looked up the PID in procfs (we don't yet + // know if it's a PID/PGID/main thread or a TID/non-main thread) + Ok(p) => match p.stat() { + // Successfully got the pid/tid stat info + Ok(stat) => { + // Differentiate between PID/PGID/main thread or TID/non-main thread + if stat.pid == stat.pgrp { + // NOTE: + // This is the main thread for the PID/PGID + // If stat.pid() == stat.pgrp() for this process, + // this is a stack for the main thread + // of the pid, and stat.comm is the name of the + // process binary file, so use: + // process_name = stat.comm, and thread_name = "main_thread" + (stat.comm, "main_thread".to_string()) + } else { + // NOTE: + // This is a non-main thread (TID) of a PID, so we + // have to look up the actual PID/PGID to get the + // process binary name + // As in, stat.comm is the name of the thread, and + // you have to look up the process binary name, so + // use: + // process_name = , and thread_name = stat.comm + // + let process_name = match procfs::process::Process::new(stat.pgrp) { + // We successfully looked up the PID/PGID of the TID in procfs + Ok(p) => match p.stat() { + // We successfully looked up the PID binary name from stat + Ok(stat2) => stat2.comm, + // We were unable to get the PID's binary name from stat Err(_) => "".to_string(), - }; - (process_name, stat.comm) - } + }, + // We failed to look up the PID/PGID of the TID in procfs + Err(_) => "".to_string(), + }; + (process_name, stat.comm) } - // Was unable to lookup the PID binary or thread name from stat - Err(_) => ( - "".to_string(), - "".to_string(), - ), - }, - // Completely failed to look up the PID/TID in procfs + } + // Was unable to lookup the PID binary or thread name from stat Err(_) => ( "".to_string(), "".to_string(), ), - }; + }, + // Completely failed to look up the PID/TID in procfs + Err(_) => ( + "".to_string(), + "".to_string(), + ), + }; - writeln!( - folded, - "{};{}{}{} {}", - process_name, - thread_name, - if ustack.trim().is_empty() { - "".to_string() - } else { - format!(";{}", ustack) - }, - if kstack.trim().is_empty() { - "".to_string() - } else { - format!(";{}", kstack) - }, - count - ) - .unwrap(); - } + writeln!( + folded, + "{};{}{}{} {}", + process_name, + thread_name, + if ustack.trim().is_empty() { + "".to_string() + } else { + format!(";{}", ustack) + }, + if kstack.trim().is_empty() { + "".to_string() + } else { + format!(";{}", kstack) + }, + count + ) + .unwrap(); } folded diff --git a/src/profiler.rs b/src/profiler.rs index 1291f7c..e3eb530 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -160,7 +160,7 @@ const MAX_CHUNKS: usize = MAX_UNWIND_TABLE_CHUNKS as usize; // TODO: should make this configurable via a command line argument in future const PERF_BUFFER_BYTES: usize = 512 * 1024; -#[derive(Debug)] +#[derive(Debug, Hash, Eq, PartialEq)] pub struct RawAggregatedSample { pub pid: i32, pub tid: i32,