Skip to content

Commit

Permalink
Merge branch 'main' of github.com:patnebe/lightswitch into refactor-l…
Browse files Browse the repository at this point in the history
…abel-provider-interface
  • Loading branch information
patnebe committed Jan 8, 2025
2 parents 5bdc12c + 1cd2739 commit d962434
Show file tree
Hide file tree
Showing 18 changed files with 1,042 additions and 258 deletions.
27 changes: 26 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ rand = "0.8.5"
# workspace build dependencies have to be defined here
libbpf-cargo = { version = "0.25.0-beta.0" }
glob = "0.3.1"
ring = "0.17.8"

[dependencies]
gimli = "0.31.1"
Expand Down Expand Up @@ -61,6 +62,8 @@ thiserror = { workspace = true }
errno = { workspace = true }
procfs = { workspace = true }
nix = { workspace = true, features = ["user"] }
parking_lot = { version = "0.12.3", features = ["deadlock_detection"] }
ring = { workspace = true }

[dev-dependencies]
assert_cmd = { version = "2.0.16" }
Expand Down
4 changes: 2 additions & 2 deletions lightswitch-object/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[package]
name = "lightswitch-object"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
description = "Deals with object files"
license = "MIT"
repository = "https://github.com/javierhonduco/lightswitch"

[dependencies]
data-encoding = "2.6.0"
ring = "0.17.8"
ring = { workspace = true }
memmap2 = { workspace = true }
object = { workspace = true }
anyhow = { workspace = true }
5 changes: 5 additions & 0 deletions lightswitch-object/src/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ impl ObjectFile {
Ok(BuildId::sha256_from_digest(&self.code_hash))
}

/// Returns whether the object has debug symbols.
pub fn has_debug_info(&self) -> bool {
self.object.has_debug_symbols()
}

pub fn is_dynamic(&self) -> bool {
self.object.kind() == ObjectKind::Dynamic
}
Expand Down
34 changes: 19 additions & 15 deletions src/bpf/profiler.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,26 +192,21 @@ find_page(mapping_t *mapping, u64 object_relative_pc, u64 *left, u64 *right) {
return NULL;
}


static __always_inline void event_new_process(struct bpf_perf_event_data *ctx, int per_process_id) {
Event event = {
.type = EVENT_NEW_PROCESS,
.pid = per_process_id,
};

bool *is_rate_limited = bpf_map_lookup_elem(&rate_limits, &event);
static __always_inline void send_event(Event *event, struct bpf_perf_event_data *ctx) {
bool *is_rate_limited = bpf_map_lookup_elem(&rate_limits, event);
if (is_rate_limited != NULL && *is_rate_limited) {
LOG("[debug] event_new_process was rate limited");
LOG("[debug] send_event was rate limited");
return;
}

if (bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(Event)) < 0) {
if (bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, event, sizeof(Event)) < 0) {
bump_unwind_error_sending_new_process_event();
}

LOG("[debug] event_new_process event sent");
LOG("[debug] event type %d sent", event->type);
bool rate_limited = true;
bpf_map_update_elem(&rate_limits, &event, &rate_limited, BPF_ANY);

bpf_map_update_elem(&rate_limits, event, &rate_limited, BPF_ANY);
}

// Kernel addresses have the top bits set.
Expand Down Expand Up @@ -398,7 +393,12 @@ int dwarf_unwind(struct bpf_perf_event_data *ctx) {
u64 right = 0;
void *inner = find_page(mapping, object_relative_pc_high, &left, &right);
if (inner == NULL) {
// TODO: add counter
Event event = {
.type = EVENT_NEED_UNWIND_INFO,
.pid = per_process_id,
.address = unwind_state->ip,
};
send_event(&event, ctx);
return 1;
}

Expand All @@ -422,7 +422,7 @@ int dwarf_unwind(struct bpf_perf_event_data *ctx) {
if (!in_previous_page) {
LOG("[error] binary search failed with %llx, pc: %llx", table_idx, unwind_state->ip);
if (table_idx == BINARY_SEARCH_EXHAUSTED_ITERATIONS) {
bump_unwind_error_binary_search_exausted_iterations();
bump_unwind_error_binary_search_exhausted_iterations();
}
return 1;
}
Expand Down Expand Up @@ -672,7 +672,11 @@ int on_event(struct bpf_perf_event_data *ctx) {
return 0;
}

event_new_process(ctx, per_process_id);
Event event = {
.type = EVENT_NEW_PROCESS,
.pid = per_process_id,
};
send_event(&event, ctx);
return 0;
}

Expand Down
5 changes: 3 additions & 2 deletions src/bpf/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ struct unwinder_stats_t {
u64 error_mapping_not_found;
u64 error_mapping_does_not_contain_pc;
u64 error_chunk_not_found;
u64 error_binary_search_exausted_iterations;
u64 error_binary_search_exhausted_iterations;
u64 error_sending_new_process_event;
u64 error_cfa_offset_did_not_fit;
u64 error_rbp_offset_did_not_fit;
Expand Down Expand Up @@ -184,12 +184,13 @@ typedef struct {

enum event_type {
EVENT_NEW_PROCESS = 1,
// EVENT_NEED_UNWIND_INFO = 2, need a way to signal of new loaded mappings
EVENT_NEED_UNWIND_INFO = 2,
};

typedef struct {
enum event_type type;
int pid; // use right name here (tgid?)
u64 address;
} Event;

enum program {
Expand Down
4 changes: 2 additions & 2 deletions src/bpf/profiler_bindings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ impl Add for unwinder_stats_t {
error_previous_rbp_zero: self.error_previous_rbp_zero + other.error_previous_rbp_zero,
error_should_never_happen: self.error_should_never_happen
+ other.error_should_never_happen,
error_binary_search_exausted_iterations: self.error_binary_search_exausted_iterations
+ other.error_binary_search_exausted_iterations,
error_binary_search_exhausted_iterations: self.error_binary_search_exhausted_iterations
+ other.error_binary_search_exhausted_iterations,
error_chunk_not_found: self.error_chunk_not_found + other.error_chunk_not_found,
error_mapping_does_not_contain_pc: self.error_mapping_does_not_contain_pc
+ other.error_mapping_does_not_contain_pc,
Expand Down
2 changes: 1 addition & 1 deletion src/bpf/shared_maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ DEFINE_COUNTER(error_should_never_happen);
DEFINE_COUNTER(error_mapping_not_found);
DEFINE_COUNTER(error_mapping_does_not_contain_pc);
DEFINE_COUNTER(error_chunk_not_found);
DEFINE_COUNTER(error_binary_search_exausted_iterations);
DEFINE_COUNTER(error_binary_search_exhausted_iterations);
DEFINE_COUNTER(error_sending_new_process_event);
DEFINE_COUNTER(error_cfa_offset_did_not_fit);
DEFINE_COUNTER(error_rbp_offset_did_not_fit);
Expand Down
18 changes: 18 additions & 0 deletions src/cli/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ pub(crate) enum Symbolizer {
None,
}

#[derive(PartialEq, clap::ValueEnum, Debug, Clone, Default)]
pub(crate) enum DebugInfoBackend {
#[default]
None,
Copy,
Remote,
}

#[derive(Parser, Debug)]
pub(crate) struct CliArgs {
/// Specific PIDs to profile
Expand Down Expand Up @@ -126,4 +134,14 @@ pub(crate) struct CliArgs {
pub(crate) exclude_self: bool,
#[arg(long, default_value_t, value_enum)]
pub(crate) symbolizer: Symbolizer,
#[arg(long, default_value_t, value_enum)]
pub(crate) debug_info_backend: DebugInfoBackend,
#[arg(
long,
default_value_t = ProfilerConfig::default().max_native_unwind_info_size_mb,
help = "approximate max size in megabytes used for the BPF maps that hold unwind information"
)]
pub(crate) max_native_unwind_info_size_mb: i32,
#[arg(long, help = "enable parking_lot's deadlock detector")]
pub(crate) enable_deadlock_detector: bool,
}
71 changes: 53 additions & 18 deletions src/cli/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ use std::io::Write;
use std::panic;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use std::time::Duration;

use clap::Parser;
use crossbeam_channel::bounded;
use inferno::flamegraph;
use lightswitch::collector::{AggregatorCollector, Collector, NullCollector, StreamingCollector};
use lightswitch::debug_info::DebugInfoManager;
use lightswitch_metadata::metadata_provider::GlobalMetadataProvider;
use nix::unistd::Uid;
use prost::Message;
Expand All @@ -21,6 +23,9 @@ use tracing_subscriber::FmtSubscriber;
use lightswitch_capabilities::system_info::SystemInfo;
use lightswitch_metadata::metadata_provider::ThreadSafeGlobalMetadataProvider;

use lightswitch::debug_info::{
DebugInfoBackendFilesystem, DebugInfoBackendNull, DebugInfoBackendRemote,
};
use lightswitch::profile::symbolize_profile;
use lightswitch::profile::{fold_profile, to_pprof};
use lightswitch::profiler::{Profiler, ProfilerConfig};
Expand All @@ -32,12 +37,13 @@ mod args;
mod validators;

use crate::args::CliArgs;
use crate::args::DebugInfoBackend;
use crate::args::LoggingLevel;
use crate::args::ProfileFormat;
use crate::args::ProfileSender;
use crate::args::Symbolizer;

const DEFAULT_PPROF_INGEST_URL: &str = "http://localhost:4567/pprof/new";
const DEFAULT_SERVER_URL: &str = "http://localhost:4567";

/// Exit the main thread if any thread panics. We prefer this behaviour because pretty much every
/// thread is load bearing for the correct functioning.
Expand All @@ -49,10 +55,28 @@ fn panic_thread_hook() {
}));
}

/// Starts `parking_lot`'s deadlock detector.
fn start_deadlock_detector() {
std::thread::spawn(move || loop {
std::thread::sleep(std::time::Duration::from_secs(1));
for deadlock in parking_lot::deadlock::check_deadlock() {
for deadlock in deadlock {
eprintln!(
"Found a deadlock! {}: {:?}",
deadlock.thread_id(),
deadlock.backtrace()
);
}
}
});
}

fn main() -> Result<(), Box<dyn Error>> {
panic_thread_hook();

let args = CliArgs::parse();
if args.enable_deadlock_detector {
start_deadlock_detector();
}

if let Some(path) = args.show_unwind_info {
show_unwind_info(&path);
Expand Down Expand Up @@ -98,24 +122,34 @@ fn main() -> Result<(), Box<dyn Error>> {
}
}

let server_url = args.server_url.unwrap_or(DEFAULT_SERVER_URL.into());

let metadata_provider: ThreadSafeGlobalMetadataProvider =
Arc::new(Mutex::new(GlobalMetadataProvider::default()));

let collector = Arc::new(Mutex::new(match args.sender {
ProfileSender::None => Box::new(NullCollector::new()) as Box<dyn Collector + Send>,
ProfileSender::LocalDisk => {
Box::new(AggregatorCollector::new()) as Box<dyn Collector + Send>
}
ProfileSender::Remote => Box::new(StreamingCollector::new(
args.symbolizer == Symbolizer::Local,
args.server_url
.as_ref()
.map_or(DEFAULT_PPROF_INGEST_URL, |v| v),
ProfilerConfig::default().session_duration,
args.sample_freq,
metadata_provider.clone(),
)) as Box<dyn Collector + Send>,
}));
let collector: Arc<Mutex<Box<dyn Collector + Send>>> =
Arc::new(Mutex::new(match args.sender {
ProfileSender::None => Box::new(NullCollector::new()),
ProfileSender::LocalDisk => Box::new(AggregatorCollector::new()),
ProfileSender::Remote => Box::new(StreamingCollector::new(
args.symbolizer == Symbolizer::Local,
&server_url,
ProfilerConfig::default().session_duration,
args.sample_freq,
metadata_provider.clone(),
)),
}));

let debug_info_manager: Box<dyn DebugInfoManager> = match args.debug_info_backend {
DebugInfoBackend::None => Box::new(DebugInfoBackendNull {}),
DebugInfoBackend::Copy => Box::new(DebugInfoBackendFilesystem {
path: PathBuf::from("/tmp"),
}),
DebugInfoBackend::Remote => Box::new(DebugInfoBackendRemote {
http_client_timeout: Duration::from_millis(500),
server_url,
}),
};

let profiler_config = ProfilerConfig {
libbpf_debug: args.libbpf_debug,
Expand All @@ -128,6 +162,7 @@ fn main() -> Result<(), Box<dyn Error>> {
mapsize_aggregated_stacks: args.mapsize_aggregated_stacks,
mapsize_rate_limits: args.mapsize_rate_limits,
exclude_self: args.exclude_self,
debug_info_manager,
..Default::default()
};

Expand Down Expand Up @@ -260,7 +295,7 @@ mod tests {
cmd.arg("--help");
cmd.assert().success();
let actual = String::from_utf8(cmd.unwrap().stdout).unwrap();
insta::assert_yaml_snapshot!(actual, @r#""Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids <PIDS>\n Specific PIDs to profile\n\n --tids <TIDS>\n Specific TIDs to profile (these can be outside the PIDs selected above)\n\n --show-unwind-info <PATH_TO_BINARY>\n Show unwind info for given binary\n\n --show-info <PATH_TO_BINARY>\n Show build ID for given binary\n\n -D, --duration <DURATION>\n How long this agent will run in seconds\n \n [default: 18446744073709551615]\n\n --libbpf-debug\n Enable libbpf logs. This includes the BPF verifier output\n\n --bpf-logging\n Enable BPF programs logging\n\n --logging <LOGGING>\n Set lightswitch's logging level\n \n [default: info]\n [possible values: trace, debug, info, warn, error]\n\n --sample-freq <SAMPLE_FREQ_IN_HZ>\n Per-CPU Sampling Frequency in Hz\n \n [default: 19]\n\n --profile-format <PROFILE_FORMAT>\n Output file for Flame Graph in SVG format\n \n [default: flame-graph]\n [possible values: none, flame-graph, pprof]\n\n --profile-path <PROFILE_PATH>\n Path for the generated profile\n\n --profile-name <PROFILE_NAME>\n Name for the generated profile\n\n --sender <SENDER>\n Where to write the profile\n \n [default: local-disk]\n\n Possible values:\n - none: Discard the profile. Used for kernel tests\n - local-disk\n - remote\n\n --server-url <SERVER_URL>\n \n\n --perf-buffer-bytes <PERF_BUFFER_BYTES>\n Size of each profiler perf buffer, in bytes (must be a power of 2)\n \n [default: 524288]\n\n --mapsize-info\n Print eBPF map sizes after creation\n\n --mapsize-stacks <MAPSIZE_STACKS>\n max number of individual stacks to capture before aggregation\n \n [default: 100000]\n\n --mapsize-aggregated-stacks <MAPSIZE_AGGREGATED_STACKS>\n max number of unique stacks after aggregation\n \n [default: 10000]\n\n --mapsize-rate-limits <MAPSIZE_RATE_LIMITS>\n max number of rate limit entries\n \n [default: 5000]\n\n --exclude-self\n Do not profile the profiler (myself)\n\n --symbolizer <SYMBOLIZER>\n [default: local]\n [possible values: local, none]\n\n -h, --help\n Print help (see a summary with '-h')\n""#);
insta::assert_yaml_snapshot!(actual, @r#""Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids <PIDS>\n Specific PIDs to profile\n\n --tids <TIDS>\n Specific TIDs to profile (these can be outside the PIDs selected above)\n\n --show-unwind-info <PATH_TO_BINARY>\n Show unwind info for given binary\n\n --show-info <PATH_TO_BINARY>\n Show build ID for given binary\n\n -D, --duration <DURATION>\n How long this agent will run in seconds\n \n [default: 18446744073709551615]\n\n --libbpf-debug\n Enable libbpf logs. This includes the BPF verifier output\n\n --bpf-logging\n Enable BPF programs logging\n\n --logging <LOGGING>\n Set lightswitch's logging level\n \n [default: info]\n [possible values: trace, debug, info, warn, error]\n\n --sample-freq <SAMPLE_FREQ_IN_HZ>\n Per-CPU Sampling Frequency in Hz\n \n [default: 19]\n\n --profile-format <PROFILE_FORMAT>\n Output file for Flame Graph in SVG format\n \n [default: flame-graph]\n [possible values: none, flame-graph, pprof]\n\n --profile-path <PROFILE_PATH>\n Path for the generated profile\n\n --profile-name <PROFILE_NAME>\n Name for the generated profile\n\n --sender <SENDER>\n Where to write the profile\n \n [default: local-disk]\n\n Possible values:\n - none: Discard the profile. Used for kernel tests\n - local-disk\n - remote\n\n --server-url <SERVER_URL>\n \n\n --perf-buffer-bytes <PERF_BUFFER_BYTES>\n Size of each profiler perf buffer, in bytes (must be a power of 2)\n \n [default: 524288]\n\n --mapsize-info\n Print eBPF map sizes after creation\n\n --mapsize-stacks <MAPSIZE_STACKS>\n max number of individual stacks to capture before aggregation\n \n [default: 100000]\n\n --mapsize-aggregated-stacks <MAPSIZE_AGGREGATED_STACKS>\n max number of unique stacks after aggregation\n \n [default: 10000]\n\n --mapsize-rate-limits <MAPSIZE_RATE_LIMITS>\n max number of rate limit entries\n \n [default: 5000]\n\n --exclude-self\n Do not profile the profiler (myself)\n\n --symbolizer <SYMBOLIZER>\n [default: local]\n [possible values: local, none]\n\n --debug-info-backend <DEBUG_INFO_BACKEND>\n [default: none]\n [possible values: none, copy, remote]\n\n --max-native-unwind-info-size-mb <MAX_NATIVE_UNWIND_INFO_SIZE_MB>\n approximate max size in megabytes used for the BPF maps that hold unwind information\n \n [default: 2147483647]\n\n --enable-deadlock-detector\n enable parking_lot's deadlock detector\n\n -h, --help\n Print help (see a summary with '-h')\n""#);
}

#[rstest]
Expand Down
Loading

0 comments on commit d962434

Please sign in to comment.