From 7a1247040224a9f37c6134ca820346f121f1cf13 Mon Sep 17 00:00:00 2001 From: Javier Honduvilla Coto Date: Tue, 14 May 2024 12:37:01 +0100 Subject: [PATCH] Improve logging (#40) Fix the log verbosity for the userspace program to let the user select the level they want to filter by. This also replaces the previous description which wasn't clear enough. Add two other flags to set logging of the libbpf library which also contains verifier errors, as well as for the BPF programs themselves. By default they are turned off to reduce noise and performance overhead. Test Plan ========= Ensured the logging was off by default and could be enabled passing the right flags. --- src/main.rs | 60 ++++++++++++++++++++++++++++--------------------- src/profiler.rs | 23 ++++++++++++++----- 2 files changed, 51 insertions(+), 32 deletions(-) diff --git a/src/main.rs b/src/main.rs index 4aec28c..6de0f46 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,7 +7,6 @@ use std::panic; use std::path::PathBuf; use std::time::Duration; -use clap::ArgAction; use clap::Parser; use inferno::flamegraph; @@ -74,6 +73,16 @@ fn primes_before_after(non_prime: usize) -> Result<(usize, usize), String> { Ok((before, after)) } +#[derive(clap::ValueEnum, Debug, Clone, Default)] +enum LoggingLevel { + Trace, + Debug, + #[default] + Info, + Warn, + Error, +} + #[derive(Parser, Debug)] struct Cli { /// Specific PIDs to profile @@ -84,13 +93,12 @@ struct Cli { tids: Vec, /// Show unwind info for given binary #[arg(long, value_name = "PATH_TO_BINARY", - conflicts_with_all = ["pids", "tids", "show_info", "duration", - "filter_logs", "sample_freq", "flamegraph_file"] + conflicts_with_all = ["pids", "tids", "show_info", "duration", "sample_freq", "flamegraph_file"] )] show_unwind_info: Option, /// Show build ID for given binary #[arg(long, value_name = "PATH_TO_BINARY", - conflicts_with_all = ["pids", "tids", "duration", "filter_logs", + conflicts_with_all = ["pids", "tids", "duration", "sample_freq", "flamegraph_file"] )] show_info: Option, @@ -98,9 +106,15 @@ struct Cli { #[arg(short='D', long, default_value = Duration::MAX.as_secs().to_string(), value_parser = parse_duration)] duration: Duration, - /// Enable TRACE (max) level logging - defaults to INFO level otherwise - #[arg(long, action=ArgAction::SetFalse)] - filter_logs: bool, + /// Enable libbpf logs. This includes the BPF verifier output + #[arg(long)] + libbpf_logs: bool, + /// Enable BPF programs logging + #[arg(long)] + bpf_logging: bool, + /// Set lightswitch's logging level + #[arg(long, default_value_t, value_enum)] + logging: LoggingLevel, // Verification for this option guarantees the only possible selections // are prime numbers up to and including 1001 /// Per-CPU Sampling Frequency in Hz @@ -148,10 +162,12 @@ fn main() -> Result<(), Box> { } let subscriber = FmtSubscriber::builder() - .with_max_level(if args.filter_logs { - Level::TRACE - } else { - Level::INFO + .with_max_level(match args.logging { + LoggingLevel::Trace => Level::TRACE, + LoggingLevel::Debug => Level::DEBUG, + LoggingLevel::Info => Level::INFO, + LoggingLevel::Warn => Level::WARN, + LoggingLevel::Error => Level::ERROR, }) .with_span_events(FmtSpan::ENTER | FmtSpan::CLOSE) .with_ansi(std::io::stdout().is_terminal()) @@ -176,7 +192,12 @@ fn main() -> Result<(), Box> { let collector = Collector::new(); - let mut p: Profiler<'_> = Profiler::new(true, args.duration, args.sample_freq); + let mut p: Profiler<'_> = Profiler::new( + args.libbpf_logs, + args.bpf_logging, + args.duration, + args.sample_freq, + ); p.profile_pids(args.pids); p.run(collector.clone()); @@ -268,7 +289,7 @@ mod tests { let actual = String::from_utf8(cmd.unwrap().stdout).unwrap(); insta::assert_yaml_snapshot!(actual, @r###" --- - "Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids \n Specific PIDs to profile\n --tids \n Specific TIDs to profile (these can be outside the PIDs selected above)\n --show-unwind-info \n Show unwind info for given binary\n --show-info \n Show build ID for given binary\n -D, --duration \n How long this agent will run in seconds [default: 18446744073709551615]\n --filter-logs\n Enable TRACE (max) level logging - defaults to INFO level otherwise\n --sample-freq \n Per-CPU Sampling Frequency in Hz [default: 19]\n --flamegraph-file \n Output file for Flame Graph in SVG format [default: flame.svg]\n -h, --help\n Print help\n" + "Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids \n Specific PIDs to profile\n --tids \n Specific TIDs to profile (these can be outside the PIDs selected above)\n --show-unwind-info \n Show unwind info for given binary\n --show-info \n Show build ID for given binary\n -D, --duration \n How long this agent will run in seconds [default: 18446744073709551615]\n --libbpf-logs\n Enable libbpf logs. This includes the BPF verifier output\n --bpf-logging\n Enable BPF programs logging\n --logging \n Set lightswitch's logging level [default: info] [possible values: trace, debug, info, warn, error]\n --sample-freq \n Per-CPU Sampling Frequency in Hz [default: 19]\n --flamegraph-file \n Output file for Flame Graph in SVG format [default: flame.svg]\n -h, --help\n Print help\n" "###); } @@ -340,17 +361,4 @@ mod tests { } } } - - // Only usable on the nightly toolchain for now... - // use test::{black_box, Bencher}; - - // #[bench] - // fn bench_primes_before_after(b: &mut Bencher) { - // b.iter(|| { - // // Inner closure, the actual test - // for i in 1..1009 { - // black_box(primes_before_after(i)); - // } - // }); - // } } diff --git a/src/profiler.rs b/src/profiler.rs index f8e7cca..2873a2a 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -147,20 +147,27 @@ pub type SymbolizedAggregatedProfile = Vec; impl Default for Profiler<'_> { fn default() -> Self { - Self::new(false, Duration::MAX, 19) + Self::new(false, false, Duration::MAX, 19) } } impl Profiler<'_> { - pub fn new(bpf_debug: bool, duration: Duration, sample_freq: u16) -> Self { + pub fn new( + libbpf_debug: bool, + bpf_logging: bool, + duration: Duration, + sample_freq: u16, + ) -> Self { let mut skel_builder: ProfilerSkelBuilder = ProfilerSkelBuilder::default(); - skel_builder.obj_builder.debug(bpf_debug); - let open_skel = skel_builder.open().expect("open skel"); + skel_builder.obj_builder.debug(libbpf_debug); + let mut open_skel = skel_builder.open().expect("open skel"); + open_skel.rodata_mut().lightswitch_config.verbose_logging = bpf_logging; let bpf = open_skel.load().expect("load skel"); + info!("native unwinder BPF program loaded"); let exec_mappings_fd = bpf.maps().exec_mappings().as_fd().as_raw_fd(); let mut tracers_builder = TracersSkelBuilder::default(); - tracers_builder.obj_builder.debug(bpf_debug); + tracers_builder.obj_builder.debug(libbpf_debug); let open_tracers = tracers_builder.open().expect("open skel"); open_tracers .maps() @@ -169,6 +176,7 @@ impl Profiler<'_> { .expect("reuse exec_mappings"); let tracers = open_tracers.load().expect("load skel"); + info!("munmap and process exit tracing BPF programs loaded"); let procs = Arc::new(Mutex::new(HashMap::new())); let object_files = Arc::new(Mutex::new(HashMap::new())); @@ -822,7 +830,10 @@ impl Profiler<'_> { ); if available_space == 0 { - info!("no space in live shard, allocating a new one"); + info!( + "live shard is full, allocating a new one [{}/{}]", + self.native_unwind_state.shard_index, MAX_SHARDS + ); if self.persist_unwind_info(&self.native_unwind_state.live_shard) { self.native_unwind_state.dirty = false;