Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow eBPF map sizes and general buffer size for profiler to be changed via CLI options #54

Merged
merged 10 commits into from
Aug 13, 2024
2 changes: 1 addition & 1 deletion src/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ impl AggregatorCollector {
}
}

/// Aggregagates the samples in memory, which might be acceptable when profiling for short amounts of time.
/// Aggregates the samples in memory, which might be acceptable when profiling for short amounts of time.
gmarler marked this conversation as resolved.
Show resolved Hide resolved
impl Collector for AggregatorCollector {
fn collect(
&mut self,
Expand Down
125 changes: 117 additions & 8 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use core::str;
use std::error::Error;
use std::fs::File;
use std::io::IsTerminal;
Expand All @@ -21,7 +22,7 @@ use tracing_subscriber::FmtSubscriber;
use lightswitch::object::ObjectFile;
use lightswitch::profile::symbolize_profile;
use lightswitch::profile::{fold_profile, to_pprof};
use lightswitch::profiler::Profiler;
use lightswitch::profiler::{Profiler, ProfilerConfig};
use lightswitch::unwind_info::in_memory_unwind_info;
use lightswitch::unwind_info::remove_redundant;
use lightswitch::unwind_info::remove_unnecesary_markers;
Expand Down Expand Up @@ -61,6 +62,20 @@ fn sample_freq_in_range(s: &str) -> Result<u16, String> {
Ok(sample_freq as u16)
}

// Return a value if it's a power of 2, otherwise Error
fn value_is_power_of_two(s: &str) -> Result<usize, String> {
gmarler marked this conversation as resolved.
Show resolved Hide resolved
let value: usize = s
.parse()
.map_err(|_| format!("`{s}' isn't a valid usize"))?;
// Now we have a value, test whether it's a power of 2
// NOTE: Neither 0 nor 1 are a power of 2, so rule them out
gmarler marked this conversation as resolved.
Show resolved Hide resolved
if (value != 0) && (value != 1) && ((value & (value - 1)) == 0) {
Ok(value)
} else {
Err(format!("{} is not a power of 2", value))
}
}

/// Given a non-prime unsigned int, return the prime number that precedes it
/// as well as the prime that succeeds it
fn primes_before_after(non_prime: usize) -> Result<(usize, usize), String> {
Expand Down Expand Up @@ -152,6 +167,51 @@ struct Cli {
/// Where to write the profile.
#[arg(long, default_value_t, value_enum)]
sender: ProfileSender,
// Buffer Sizes with defaults
#[arg(long, default_value_t = 512 * 1024, value_name = "PERF_BUFFER_BYTES",
help="Size of each profiler perf buffer, in bytes (must be a power of 2)",
value_parser = value_is_power_of_two)]
perf_buffer_bytes: usize,
// Print out info on eBPF map sizes
#[arg(long, help = "Print eBPF map sizes after creation")]
mapsize_info: bool,
// eBPF map stacks
#[arg(
long,
default_value_t = 100000,
help = "max number of individual \
stacks to capture before aggregation"
)]
mapsize_stacks: u32,
// eBPF map aggregated_stacks
#[arg(
long,
default_value_t = 10000,
help = "Derived from constant MAX_AGGREGATED_STACKS_ENTRIES - max \
number of unique stacks after aggregation"
)]
mapsize_aggregated_stacks: u32,
// eBPF map unwind_info_chunks
#[arg(
long,
default_value_t = 5000,
help = "max number of chunks allowed inside a shard"
)]
mapsize_unwind_info_chunks: u32,
// eBPF map unwind_tables
#[arg(
long,
default_value_t = 65,
help = "Derived from constant MAX_UNWIND_INFO_SHARDS"
)]
mapsize_unwind_tables: u32,
// eBPF map rate_limits
#[arg(
long,
default_value_t = 5000,
help = "Derived from constant MAX_PROCESSES"
)]
mapsize_rate_limits: u32,
}

/// Exit the main thread if any thread panics. We prefer this behaviour because pretty much every
Expand Down Expand Up @@ -227,12 +287,21 @@ fn main() -> Result<(), Box<dyn Error>> {
}
}));

let mut p: Profiler<'_> = Profiler::new(
args.libbpf_logs,
args.bpf_logging,
args.duration,
args.sample_freq,
);
let profiler_config = ProfilerConfig {
// NOTE the difference in this arg name from the actual config name
libbpf_debug: args.libbpf_logs,
bpf_logging: args.bpf_logging,
duration: args.duration,
sample_freq: args.sample_freq,
perf_buffer_bytes: args.perf_buffer_bytes,
mapsize_info: args.mapsize_info,
mapsize_stacks: args.mapsize_stacks,
mapsize_aggregated_stacks: args.mapsize_aggregated_stacks,
mapsize_unwind_info_chunks: args.mapsize_unwind_info_chunks,
mapsize_unwind_tables: args.mapsize_unwind_tables,
mapsize_rate_limits: args.mapsize_rate_limits,
};
let mut p: Profiler<'_> = Profiler::new(profiler_config);
p.profile_pids(args.pids);
p.run(collector.clone());

Expand Down Expand Up @@ -295,7 +364,9 @@ mod tests {
use super::*;
use assert_cmd::Command;
use clap::Parser;
use rand::distributions::{Distribution, Uniform};
use rstest::rstest;
use std::collections::HashSet;

#[test]
fn verify_cli() {
Expand All @@ -312,7 +383,7 @@ mod tests {
let actual = String::from_utf8(cmd.unwrap().stdout).unwrap();
insta::assert_yaml_snapshot!(actual, @r###"
---
"Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids <PIDS>\n Specific PIDs to profile\n\n --tids <TIDS>\n Specific TIDs to profile (these can be outside the PIDs selected above)\n\n --show-unwind-info <PATH_TO_BINARY>\n Show unwind info for given binary\n\n --show-info <PATH_TO_BINARY>\n Show build ID for given binary\n\n -D, --duration <DURATION>\n How long this agent will run in seconds\n \n [default: 18446744073709551615]\n\n --libbpf-logs\n Enable libbpf logs. This includes the BPF verifier output\n\n --bpf-logging\n Enable BPF programs logging\n\n --logging <LOGGING>\n Set lightswitch's logging level\n \n [default: info]\n [possible values: trace, debug, info, warn, error]\n\n --sample-freq <SAMPLE_FREQ_IN_HZ>\n Per-CPU Sampling Frequency in Hz\n \n [default: 19]\n\n --profile-format <PROFILE_FORMAT>\n Output file for Flame Graph in SVG format\n \n [default: flame-graph]\n [possible values: none, flame-graph, pprof]\n\n --profile-name <PROFILE_NAME>\n Name for the generated profile\n\n --sender <SENDER>\n Where to write the profile\n \n [default: local-disk]\n\n Possible values:\n - none: Discard the profile. Used for kernel tests\n - local-disk\n - remote\n\n -h, --help\n Print help (see a summary with '-h')\n"
"Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids <PIDS>\n Specific PIDs to profile\n\n --tids <TIDS>\n Specific TIDs to profile (these can be outside the PIDs selected above)\n\n --show-unwind-info <PATH_TO_BINARY>\n Show unwind info for given binary\n\n --show-info <PATH_TO_BINARY>\n Show build ID for given binary\n\n -D, --duration <DURATION>\n How long this agent will run in seconds\n \n [default: 18446744073709551615]\n\n --libbpf-logs\n Enable libbpf logs. This includes the BPF verifier output\n\n --bpf-logging\n Enable BPF programs logging\n\n --logging <LOGGING>\n Set lightswitch's logging level\n \n [default: info]\n [possible values: trace, debug, info, warn, error]\n\n --sample-freq <SAMPLE_FREQ_IN_HZ>\n Per-CPU Sampling Frequency in Hz\n \n [default: 19]\n\n --profile-format <PROFILE_FORMAT>\n Output file for Flame Graph in SVG format\n \n [default: flame-graph]\n [possible values: none, flame-graph, pprof]\n\n --profile-name <PROFILE_NAME>\n Name for the generated profile\n\n --sender <SENDER>\n Where to write the profile\n \n [default: local-disk]\n\n Possible values:\n - none: Discard the profile. Used for kernel tests\n - local-disk\n - remote\n\n --perf-buffer-bytes <PERF_BUFFER_BYTES>\n Size of each profiler perf buffer, in bytes (must be a power of 2)\n \n [default: 524288]\n\n --mapsize-info\n Print eBPF map sizes after creation\n\n --mapsize-stacks <MAPSIZE_STACKS>\n max number of individual stacks to capture before aggregation\n \n [default: 100000]\n\n --mapsize-aggregated-stacks <MAPSIZE_AGGREGATED_STACKS>\n Derived from constant MAX_AGGREGATED_STACKS_ENTRIES - max number of unique stacks after aggregation\n \n [default: 10000]\n\n --mapsize-unwind-info-chunks <MAPSIZE_UNWIND_INFO_CHUNKS>\n max number of chunks allowed inside a shard\n \n [default: 5000]\n\n --mapsize-unwind-tables <MAPSIZE_UNWIND_TABLES>\n Derived from constant MAX_UNWIND_INFO_SHARDS\n \n [default: 65]\n\n --mapsize-rate-limits <MAPSIZE_RATE_LIMITS>\n Derived from constant MAX_PROCESSES\n \n [default: 5000]\n\n -h, --help\n Print help (see a summary with '-h')\n"
"###);
}

Expand Down Expand Up @@ -384,4 +455,42 @@ mod tests {
}
}
}

#[rstest]
fn should_be_powers_of_two() {
let mut test_uint_strings: Vec<String> = vec![];
gmarler marked this conversation as resolved.
Show resolved Hide resolved
for shift in 1..63 {
let val: usize = 2 << shift;
let val_str = val.to_string();
test_uint_strings.push(val_str);
}
for val_string in test_uint_strings {
assert!(value_is_power_of_two(val_string.as_str()).is_ok())
}
}

#[rstest]
fn should_not_be_powers_of_two() {
let mut test_uint_stringset: HashSet<String> = HashSet::new();
// usizes that ARE powers of two, for later exclusion
for shift in 0..63 {
let val: usize = 2 << shift;
let val_string = val.to_string();
test_uint_stringset.insert(val_string);
}
// Now, for a random sampling of 500000 integers in the range of usize,
// excluding any that are known to be powers of 2
let between = Uniform::from(0..=usize::MAX);
let mut rng = rand::thread_rng();
for _ in 0..500000 {
let usize_int: usize = between.sample(&mut rng);
let usize_int_string = usize_int.to_string();
if test_uint_stringset.contains(&usize_int_string) {
println!("{}", usize_int_string);
gmarler marked this conversation as resolved.
Show resolved Hide resolved
continue;
}
let result = value_is_power_of_two(usize_int_string.as_str());
assert!(result.is_err());
}
}
}
120 changes: 100 additions & 20 deletions src/profiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ pub struct Profiler<'bpf> {
duration: Duration,
// Per-CPU Sampling Frequency of this profile in Hz
sample_freq: u16,
// Size of each perf buffer, in include_bytes!
gmarler marked this conversation as resolved.
Show resolved Hide resolved
perf_buffer_bytes: usize,
session_duration: Duration,
}

Expand All @@ -171,10 +173,6 @@ const MAX_SHARDS: u64 = MAX_UNWIND_INFO_SHARDS as u64;
const SHARD_CAPACITY: usize = MAX_UNWIND_TABLE_SIZE as usize;
const MAX_CHUNKS: usize = MAX_UNWIND_TABLE_CHUNKS as usize;

// Make each perf buffer 512 KB
// TODO: should make this configurable via a command line argument in future
const PERF_BUFFER_BYTES: usize = 512 * 1024;

#[derive(Debug, Hash, Eq, PartialEq)]
pub struct RawAggregatedSample {
pub pid: i32,
Expand Down Expand Up @@ -285,34 +283,111 @@ impl fmt::Display for SymbolizedAggregatedSample {
pub type RawAggregatedProfile = Vec<RawAggregatedSample>;
pub type SymbolizedAggregatedProfile = Vec<SymbolizedAggregatedSample>;

impl Default for Profiler<'_> {
fn default() -> Self {
Self::new(false, false, Duration::MAX, 19)
}
pub struct ProfilerConfig {
pub libbpf_debug: bool,
pub bpf_logging: bool,
pub duration: Duration,
pub sample_freq: u16,
pub perf_buffer_bytes: usize,
pub mapsize_info: bool,
pub mapsize_stacks: u32,
pub mapsize_aggregated_stacks: u32,
pub mapsize_unwind_info_chunks: u32,
pub mapsize_unwind_tables: u32,
pub mapsize_rate_limits: u32,
}

// impl Default for Profiler<'_> {
gmarler marked this conversation as resolved.
Show resolved Hide resolved
// fn default() -> Self {
// Self::new(false, false, Duration::MAX, 19)
// }
// }

impl Profiler<'_> {
pub fn new(
libbpf_debug: bool,
bpf_logging: bool,
duration: Duration,
sample_freq: u16,
) -> Self {
pub fn new(profiler_config: ProfilerConfig) -> Self {
let duration = profiler_config.duration;
let sample_freq = profiler_config.sample_freq;
let perf_buffer_bytes = profiler_config.perf_buffer_bytes;
let mut skel_builder: ProfilerSkelBuilder = ProfilerSkelBuilder::default();
skel_builder.obj_builder.debug(libbpf_debug);
skel_builder.obj_builder.debug(profiler_config.libbpf_debug);
let mut open_skel = skel_builder.open().expect("open skel");
// mapsize modifications can only be made before the maps are actually loaded
// Initialize map sizes with defaults or modifications
open_skel
.maps_mut()
.stacks()
.set_max_entries(profiler_config.mapsize_stacks)
.expect("Unable to set stacks map max_entries");
open_skel
.maps_mut()
.aggregated_stacks()
.set_max_entries(profiler_config.mapsize_aggregated_stacks)
.expect("Unable to set aggregated_stacks map max_entries");
open_skel
.maps_mut()
.unwind_info_chunks()
.set_max_entries(profiler_config.mapsize_unwind_info_chunks)
.expect("Unable to set unwind_info_chunks map max_entries");
open_skel
.maps_mut()
.unwind_tables()
.set_max_entries(profiler_config.mapsize_unwind_tables)
.expect("Unable to set unwind_tables map max_entries");
open_skel
.maps_mut()
.rate_limits()
.set_max_entries(profiler_config.mapsize_rate_limits)
.expect("Unable to set rate_limits map max_entries");
open_skel
.rodata_mut()
.lightswitch_config
.verbose_logging
.write(bpf_logging);
.write(profiler_config.bpf_logging);
let bpf = open_skel.load().expect("load skel");
info!("native unwinder BPF program loaded");
let native_unwinder_maps = bpf.maps();
let exec_mappings_fd = native_unwinder_maps.exec_mappings().as_fd();

// If mapsize_info requested, pull the max_entries from each map of
// interest and print out
if profiler_config.mapsize_info {
info!("eBPF ACTUAL map size Configuration:");
info!(
"stacks: {}",
bpf.maps().stacks().info().unwrap().info.max_entries
);
info!(
"aggregated_stacks: {}",
bpf.maps()
.aggregated_stacks()
.info()
.unwrap()
.info
.max_entries
);
info!(
"unwind_info_chunks: {}",
bpf.maps()
.unwind_info_chunks()
.info()
.unwrap()
.info
.max_entries
);
info!(
"unwind_tables: {}",
bpf.maps().unwind_tables().info().unwrap().info.max_entries
);
info!(
"rate_limits: {}",
bpf.maps().rate_limits().info().unwrap().info.max_entries
);
}

let mut tracers_builder = TracersSkelBuilder::default();
tracers_builder.obj_builder.debug(libbpf_debug);
tracers_builder
.obj_builder
.debug(profiler_config.libbpf_debug);
let open_tracers = tracers_builder.open().expect("open skel");
open_tracers
.maps()
Expand Down Expand Up @@ -358,6 +433,7 @@ impl Profiler<'_> {
profile_receive,
duration,
sample_freq,
perf_buffer_bytes,
session_duration: Duration::from_secs(5),
}
}
Expand Down Expand Up @@ -395,23 +471,25 @@ impl Profiler<'_> {
// New process events.
let chan_send = self.chan_send.clone();
let perf_buffer = PerfBufferBuilder::new(self.bpf.maps().events())
.pages(PERF_BUFFER_BYTES / page_size::get())
.pages(self.perf_buffer_bytes / page_size::get())
.sample_cb(move |_cpu: i32, data: &[u8]| {
Self::handle_event(&chan_send, data);
})
.lost_cb(Self::handle_lost_events)
.build()
// TODO: Instead of unwrap, consume and emit any error - usually
javierhonduco marked this conversation as resolved.
Show resolved Hide resolved
// about buffer bytes not being a power of 2
.unwrap();

let _poll_thread = thread::spawn(move || loop {
perf_buffer.poll(Duration::from_millis(100)).expect("poll");
});

// Trace events are received here, such memory unmaps.
// Trace events are received here, such as memory unmaps.
let tracers_send = self.tracers_chan_send.clone();
let tracers_events_perf_buffer =
PerfBufferBuilder::new(self.tracers.maps().tracer_events())
.pages(PERF_BUFFER_BYTES / page_size::get())
.pages(self.perf_buffer_bytes / page_size::get())
.sample_cb(move |_cpu: i32, data: &[u8]| {
let mut event = tracer_event_t::default();
plain::copy_from_bytes(&mut event, data).expect("serde tracers event");
Expand All @@ -425,6 +503,8 @@ impl Profiler<'_> {
warn!("lost {} events from the tracers", lost_count);
})
.build()
// TODO: Instead of unwrap, consume and emit any error - usually
gmarler marked this conversation as resolved.
Show resolved Hide resolved
// about buffer bytes not being a power of 2
.unwrap();

let _tracers_poll_thread = thread::spawn(move || loop {
Expand Down
Loading