Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial implementation of debug information manager #123

Merged
merged 1 commit into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion lightswitch-object/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "lightswitch-object"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
description = "Deals with object files"
license = "MIT"
Expand Down
5 changes: 5 additions & 0 deletions lightswitch-object/src/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ impl ObjectFile {
Ok(BuildId::sha256_from_digest(&self.code_hash))
}

/// Returns whether the object has debug symbols.
pub fn has_debug_info(&self) -> bool {
self.object.has_debug_symbols()
}

pub fn is_dynamic(&self) -> bool {
self.object.kind() == ObjectKind::Dynamic
}
Expand Down
10 changes: 10 additions & 0 deletions src/cli/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ pub(crate) enum Symbolizer {
None,
}

#[derive(PartialEq, clap::ValueEnum, Debug, Clone, Default)]
pub(crate) enum DebugInfoBackend {
#[default]
None,
Copy,
Remote,
}

#[derive(Parser, Debug)]
pub(crate) struct CliArgs {
/// Specific PIDs to profile
Expand Down Expand Up @@ -126,4 +134,6 @@ pub(crate) struct CliArgs {
pub(crate) exclude_self: bool,
#[arg(long, default_value_t, value_enum)]
pub(crate) symbolizer: Symbolizer,
#[arg(long, default_value_t, value_enum)]
pub(crate) debug_info_backend: DebugInfoBackend,
}
51 changes: 34 additions & 17 deletions src/cli/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ use std::io::Write;
use std::panic;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use std::time::Duration;

use clap::Parser;
use crossbeam_channel::bounded;
use inferno::flamegraph;
use lightswitch::collector::{AggregatorCollector, Collector, NullCollector, StreamingCollector};
use lightswitch::debug_info::DebugInfoManager;
use lightswitch_metadata::metadata_provider::GlobalMetadataProvider;
use nix::unistd::Uid;
use prost::Message;
Expand All @@ -21,6 +23,9 @@ use tracing_subscriber::FmtSubscriber;
use lightswitch_capabilities::system_info::SystemInfo;
use lightswitch_metadata::metadata_provider::ThreadSafeGlobalMetadataProvider;

use lightswitch::debug_info::{
DebugInfoBackendFilesystem, DebugInfoBackendNull, DebugInfoBackendRemote,
};
use lightswitch::profile::symbolize_profile;
use lightswitch::profile::{fold_profile, to_pprof};
use lightswitch::profiler::{Profiler, ProfilerConfig};
Expand All @@ -32,12 +37,13 @@ mod args;
mod validators;

use crate::args::CliArgs;
use crate::args::DebugInfoBackend;
use crate::args::LoggingLevel;
use crate::args::ProfileFormat;
use crate::args::ProfileSender;
use crate::args::Symbolizer;

const DEFAULT_PPROF_INGEST_URL: &str = "http://localhost:4567/pprof/new";
const DEFAULT_SERVER_URL: &str = "http://localhost:4567";

/// Exit the main thread if any thread panics. We prefer this behaviour because pretty much every
/// thread is load bearing for the correct functioning.
Expand Down Expand Up @@ -98,24 +104,34 @@ fn main() -> Result<(), Box<dyn Error>> {
}
}

let server_url = args.server_url.unwrap_or(DEFAULT_SERVER_URL.into());

let metadata_provider: ThreadSafeGlobalMetadataProvider =
Arc::new(Mutex::new(GlobalMetadataProvider::default()));

let collector = Arc::new(Mutex::new(match args.sender {
ProfileSender::None => Box::new(NullCollector::new()) as Box<dyn Collector + Send>,
ProfileSender::LocalDisk => {
Box::new(AggregatorCollector::new()) as Box<dyn Collector + Send>
}
ProfileSender::Remote => Box::new(StreamingCollector::new(
args.symbolizer == Symbolizer::Local,
args.server_url
.as_ref()
.map_or(DEFAULT_PPROF_INGEST_URL, |v| v),
ProfilerConfig::default().session_duration,
args.sample_freq,
metadata_provider.clone(),
)) as Box<dyn Collector + Send>,
}));
let collector: Arc<Mutex<Box<dyn Collector + Send>>> =
Arc::new(Mutex::new(match args.sender {
ProfileSender::None => Box::new(NullCollector::new()),
ProfileSender::LocalDisk => Box::new(AggregatorCollector::new()),
ProfileSender::Remote => Box::new(StreamingCollector::new(
args.symbolizer == Symbolizer::Local,
&server_url,
ProfilerConfig::default().session_duration,
args.sample_freq,
metadata_provider.clone(),
)),
}));

let debug_info_manager: Box<dyn DebugInfoManager> = match args.debug_info_backend {
DebugInfoBackend::None => Box::new(DebugInfoBackendNull {}),
DebugInfoBackend::Copy => Box::new(DebugInfoBackendFilesystem {
path: PathBuf::from("/tmp"),
}),
DebugInfoBackend::Remote => Box::new(DebugInfoBackendRemote {
http_client_timeout: Duration::from_millis(500),
server_url,
}),
};

let profiler_config = ProfilerConfig {
libbpf_debug: args.libbpf_debug,
Expand All @@ -128,6 +144,7 @@ fn main() -> Result<(), Box<dyn Error>> {
mapsize_aggregated_stacks: args.mapsize_aggregated_stacks,
mapsize_rate_limits: args.mapsize_rate_limits,
exclude_self: args.exclude_self,
debug_info_manager,
..Default::default()
};

Expand Down Expand Up @@ -260,7 +277,7 @@ mod tests {
cmd.arg("--help");
cmd.assert().success();
let actual = String::from_utf8(cmd.unwrap().stdout).unwrap();
insta::assert_yaml_snapshot!(actual, @r#""Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids <PIDS>\n Specific PIDs to profile\n\n --tids <TIDS>\n Specific TIDs to profile (these can be outside the PIDs selected above)\n\n --show-unwind-info <PATH_TO_BINARY>\n Show unwind info for given binary\n\n --show-info <PATH_TO_BINARY>\n Show build ID for given binary\n\n -D, --duration <DURATION>\n How long this agent will run in seconds\n \n [default: 18446744073709551615]\n\n --libbpf-debug\n Enable libbpf logs. This includes the BPF verifier output\n\n --bpf-logging\n Enable BPF programs logging\n\n --logging <LOGGING>\n Set lightswitch's logging level\n \n [default: info]\n [possible values: trace, debug, info, warn, error]\n\n --sample-freq <SAMPLE_FREQ_IN_HZ>\n Per-CPU Sampling Frequency in Hz\n \n [default: 19]\n\n --profile-format <PROFILE_FORMAT>\n Output file for Flame Graph in SVG format\n \n [default: flame-graph]\n [possible values: none, flame-graph, pprof]\n\n --profile-path <PROFILE_PATH>\n Path for the generated profile\n\n --profile-name <PROFILE_NAME>\n Name for the generated profile\n\n --sender <SENDER>\n Where to write the profile\n \n [default: local-disk]\n\n Possible values:\n - none: Discard the profile. Used for kernel tests\n - local-disk\n - remote\n\n --server-url <SERVER_URL>\n \n\n --perf-buffer-bytes <PERF_BUFFER_BYTES>\n Size of each profiler perf buffer, in bytes (must be a power of 2)\n \n [default: 524288]\n\n --mapsize-info\n Print eBPF map sizes after creation\n\n --mapsize-stacks <MAPSIZE_STACKS>\n max number of individual stacks to capture before aggregation\n \n [default: 100000]\n\n --mapsize-aggregated-stacks <MAPSIZE_AGGREGATED_STACKS>\n max number of unique stacks after aggregation\n \n [default: 10000]\n\n --mapsize-rate-limits <MAPSIZE_RATE_LIMITS>\n max number of rate limit entries\n \n [default: 5000]\n\n --exclude-self\n Do not profile the profiler (myself)\n\n --symbolizer <SYMBOLIZER>\n [default: local]\n [possible values: local, none]\n\n -h, --help\n Print help (see a summary with '-h')\n""#);
insta::assert_yaml_snapshot!(actual, @r#""Usage: lightswitch [OPTIONS]\n\nOptions:\n --pids <PIDS>\n Specific PIDs to profile\n\n --tids <TIDS>\n Specific TIDs to profile (these can be outside the PIDs selected above)\n\n --show-unwind-info <PATH_TO_BINARY>\n Show unwind info for given binary\n\n --show-info <PATH_TO_BINARY>\n Show build ID for given binary\n\n -D, --duration <DURATION>\n How long this agent will run in seconds\n \n [default: 18446744073709551615]\n\n --libbpf-debug\n Enable libbpf logs. This includes the BPF verifier output\n\n --bpf-logging\n Enable BPF programs logging\n\n --logging <LOGGING>\n Set lightswitch's logging level\n \n [default: info]\n [possible values: trace, debug, info, warn, error]\n\n --sample-freq <SAMPLE_FREQ_IN_HZ>\n Per-CPU Sampling Frequency in Hz\n \n [default: 19]\n\n --profile-format <PROFILE_FORMAT>\n Output file for Flame Graph in SVG format\n \n [default: flame-graph]\n [possible values: none, flame-graph, pprof]\n\n --profile-path <PROFILE_PATH>\n Path for the generated profile\n\n --profile-name <PROFILE_NAME>\n Name for the generated profile\n\n --sender <SENDER>\n Where to write the profile\n \n [default: local-disk]\n\n Possible values:\n - none: Discard the profile. Used for kernel tests\n - local-disk\n - remote\n\n --server-url <SERVER_URL>\n \n\n --perf-buffer-bytes <PERF_BUFFER_BYTES>\n Size of each profiler perf buffer, in bytes (must be a power of 2)\n \n [default: 524288]\n\n --mapsize-info\n Print eBPF map sizes after creation\n\n --mapsize-stacks <MAPSIZE_STACKS>\n max number of individual stacks to capture before aggregation\n \n [default: 100000]\n\n --mapsize-aggregated-stacks <MAPSIZE_AGGREGATED_STACKS>\n max number of unique stacks after aggregation\n \n [default: 10000]\n\n --mapsize-rate-limits <MAPSIZE_RATE_LIMITS>\n max number of rate limit entries\n \n [default: 5000]\n\n --exclude-self\n Do not profile the profiler (myself)\n\n --symbolizer <SYMBOLIZER>\n [default: local]\n [possible values: local, none]\n\n --debug-info-backend <DEBUG_INFO_BACKEND>\n [default: none]\n [possible values: none, copy, remote]\n\n -h, --help\n Print help (see a summary with '-h')\n""#);
}

#[rstest]
Expand Down
2 changes: 1 addition & 1 deletion src/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ impl StreamingCollector {
) -> Self {
Self {
local_symbolizer,
pprof_ingest_url: pprof_ingest_url.into(),
pprof_ingest_url: format!("{}/pprof/new", pprof_ingest_url),
http_client_timeout: Duration::from_secs(30),
profile_duration,
profile_frequency_hz,
Expand Down
165 changes: 165 additions & 0 deletions src/debug_info.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
use std::io::Read;
use std::path::PathBuf;
use std::time::Duration;

use reqwest::StatusCode;
use tracing::instrument;

use lightswitch_object::BuildId;
use lightswitch_object::ExecutableId;

/// Handles with debug information.
///
/// This currently experimental, not feature-complete and not used yet during
/// symbolization. The end goal would be to keep track of every debug info
/// that's either present locally or remotely (depending on configuration), while
/// minimizing the number of open FDs, file copies, and race condition windows.
pub trait DebugInfoManager {
fn add_if_not_present(
&self,
name: &str,
build_id: &BuildId,
executable_id: ExecutableId,
file: &mut std::fs::File,
) -> anyhow::Result<()>;
fn debug_info_path(&self) -> Option<PathBuf>;
}

pub struct DebugInfoBackendNull {}
impl DebugInfoManager for DebugInfoBackendNull {
fn add_if_not_present(
&self,
_name: &str,
_build_id: &BuildId,
_executable_id: ExecutableId,
_file: &mut std::fs::File,
) -> anyhow::Result<()> {
Ok(())
}

fn debug_info_path(&self) -> Option<PathBuf> {
None
}
}

#[derive(Debug)]
pub struct DebugInfoBackendFilesystem {
pub path: PathBuf,
}
impl DebugInfoManager for DebugInfoBackendFilesystem {
#[instrument]
fn add_if_not_present(
&self,
_name: &str,
build_id: &BuildId,
executable_id: ExecutableId,
file: &mut std::fs::File,
) -> anyhow::Result<()> {
// try to find, else extract
if self.find_in_fs(build_id) {
return Ok(());
}

self.add_to_fs(build_id, executable_id, file)
}

fn debug_info_path(&self) -> Option<PathBuf> {
todo!()
}
}

impl DebugInfoBackendFilesystem {
fn find_in_fs(&self, build_id: &BuildId) -> bool {
self.path.join(build_id.to_string()).exists()
}

fn add_to_fs(
&self,
build_id: &BuildId,
_executable_id: ExecutableId,
file: &mut std::fs::File,
) -> anyhow::Result<()> {
// TODO: add support for other methods beyond copying. For example
// hardlinks could be used and only fall back to copying if the src
// and dst filesystems differ.
let mut writer = std::fs::File::create(self.path.join(build_id.to_string()))?;
std::io::copy(file, &mut writer)?;
Ok(())
}
}

#[derive(Debug)]
pub struct DebugInfoBackendRemote {
pub http_client_timeout: Duration,
pub server_url: String,
}
impl DebugInfoManager for DebugInfoBackendRemote {
#[instrument(level = "debug")]
fn add_if_not_present(
&self,
name: &str,
build_id: &BuildId,
executable_id: ExecutableId,
file: &mut std::fs::File,
) -> anyhow::Result<()> {
// TODO: add a local cache to not have to reach to the backend
// unnecessarily.
if self.find_in_backend(build_id)? {
return Ok(());
}

// TODO: do this in another thread.
self.upload_to_backend(name, build_id, executable_id, file)?;
Ok(())
}

fn debug_info_path(&self) -> Option<PathBuf> {
None
}
}

impl DebugInfoBackendRemote {
/// Whether the backend knows about some debug information.
#[instrument(level = "debug")]
fn find_in_backend(&self, build_id: &BuildId) -> anyhow::Result<bool> {
let client_builder = reqwest::blocking::Client::builder().timeout(self.http_client_timeout);
let client = client_builder.build()?;
let response = client
.get(format!(
"{}/debuginfo/{}",
self.server_url.clone(),
build_id
))
.send();

Ok(response?.status() == StatusCode::OK)
}

/// Send the debug information to the backend.
#[instrument]
fn upload_to_backend(
&self,
name: &str,
build_id: &BuildId,
executable_id: ExecutableId,
file: &mut std::fs::File,
) -> anyhow::Result<()> {
let client_builder = reqwest::blocking::Client::builder().timeout(self.http_client_timeout);
let client = client_builder.build()?;
let mut debug_info = Vec::new();
file.read_to_end(&mut debug_info)?;

let response = client
.post(format!(
"{}/debuginfo/new/{}/{}/{}",
self.server_url.clone(),
name,
build_id,
executable_id
))
.body(debug_info)
.send()?;
println!("wrote debug info to server {:?}", response);
Ok(())
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod bpf;
pub mod collector;
pub mod debug_info;
pub mod ksym;
pub mod perf_events;
pub mod process;
Expand Down
Loading