diff --git a/src/profiler.rs b/src/profiler.rs index d45e484..5907b91 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -3,7 +3,9 @@ use parking_lot::RwLock; use std::collections::hash_map::Entry; use std::collections::hash_map::OccupiedEntry; use std::collections::HashMap; +use std::env::temp_dir; use std::fs; +use std::io::ErrorKind; use std::mem::size_of; use std::mem::ManuallyDrop; use std::mem::MaybeUninit; @@ -20,7 +22,6 @@ use itertools::Itertools; use libbpf_rs::num_possible_cpus; use libbpf_rs::skel::SkelBuilder; use libbpf_rs::skel::{OpenSkel, Skel}; -use libbpf_rs::ErrorKind; use libbpf_rs::MapCore; use libbpf_rs::MapHandle; use libbpf_rs::MapType; @@ -41,8 +42,8 @@ use crate::process::{ ProcessStatus, }; use crate::profile::*; -use crate::unwind_info::compact_unwind_info; use crate::unwind_info::log_unwind_info_sections; +use crate::unwind_info::manager::UnwindInfoManager; use crate::unwind_info::types::CompactUnwindRow; use crate::util::{get_online_cpus, summarize_address_range}; use lightswitch_object::{ExecutableId, ObjectFile}; @@ -121,9 +122,11 @@ pub struct Profiler { /// evictions which might reduce the quality of the profiles and in more work /// for the profiler. max_native_unwind_info_size_mb: i32, + unwind_info_manager: UnwindInfoManager, } pub struct ProfilerConfig { + pub cache_dir: PathBuf, pub libbpf_debug: bool, pub bpf_logging: bool, pub duration: Duration, @@ -142,7 +145,9 @@ pub struct ProfilerConfig { impl Default for ProfilerConfig { fn default() -> Self { + let cache_dir = temp_dir().join("lightswitch"); Self { + cache_dir, libbpf_debug: false, bpf_logging: false, duration: Duration::MAX, @@ -288,6 +293,25 @@ impl Profiler { } pub fn new(profiler_config: ProfilerConfig, stop_signal_receive: Receiver<()>) -> Self { + debug!("Cache directory {}", profiler_config.cache_dir.display()); + if let Err(e) = fs::create_dir(&profiler_config.cache_dir) { + if e.kind() != ErrorKind::AlreadyExists { + error!( + "could not create cache dir at {}", + profiler_config.cache_dir.display() + ); + } + } + let unwind_cache_dir = profiler_config.cache_dir.join("unwind-info").to_path_buf(); + if let Err(e) = fs::create_dir(&unwind_cache_dir) { + if e.kind() != ErrorKind::AlreadyExists { + error!( + "could not create cache dir at {}", + unwind_cache_dir.display() + ); + } + } + let mut native_unwinder_open_object = ManuallyDrop::new(Box::new(MaybeUninit::uninit())); let mut tracers_open_object = ManuallyDrop::new(Box::new(MaybeUninit::uninit())); @@ -387,6 +411,7 @@ impl Profiler { native_unwind_info_bucket_sizes: profiler_config.native_unwind_info_bucket_sizes, debug_info_manager: profiler_config.debug_info_manager, max_native_unwind_info_size_mb: profiler_config.max_native_unwind_info_size_mb, + unwind_info_manager: UnwindInfoManager::new(&unwind_cache_dir, None), } } @@ -430,7 +455,7 @@ impl Profiler { match perf_buffer.poll(Duration::from_millis(100)) { Ok(_) => {} Err(err) => { - if err.kind() != ErrorKind::Interrupted { + if err.kind() != libbpf_rs::ErrorKind::Interrupted { error!("polling events perf buffer failed with {:?}", err); break; } @@ -459,7 +484,7 @@ impl Profiler { match tracers_events_perf_buffer.poll(Duration::from_millis(100)) { Ok(_) => {} Err(err) => { - if err.kind() != ErrorKind::Interrupted { + if err.kind() != libbpf_rs::ErrorKind::Interrupted { error!("polling tracers perf buffer failed with {:?}", err); break; } @@ -1257,32 +1282,34 @@ impl Profiler { ) .entered(); - let unwind_info: Vec = - match compact_unwind_info(&executable_path_open.to_string_lossy()) { - Ok(unwind_info) => unwind_info, - Err(e) => { - let executable_path_str = executable_path; - let known_naughty = executable_path_str.contains("libicudata"); - - // tracing doesn't support a level chosen at runtime: https://github.com/tokio-rs/tracing/issues/2730 - if known_naughty { - debug!( - "failed to get unwind information for {} with {}", - executable_path_str, e - ); - } else { - info!( - "failed to get unwind information for {} with {}", - executable_path_str, e - ); + let unwind_info = self + .unwind_info_manager + .fetch_unwind_info(&executable_path_open, executable_id); + let unwind_info: Vec = match unwind_info { + Ok(unwind_info) => unwind_info, + Err(e) => { + let executable_path_str = executable_path; + let known_naughty = executable_path_str.contains("libicudata"); + + // tracing doesn't support a level chosen at runtime: https://github.com/tokio-rs/tracing/issues/2730 + if known_naughty { + debug!( + "failed to get unwind information for {} with {}", + executable_path_str, e + ); + } else { + info!( + "failed to get unwind information for {} with {}", + executable_path_str, e + ); - if let Err(e) = log_unwind_info_sections(&executable_path_open) { - warn!("log_unwind_info_sections failed with {}", e); - } + if let Err(e) = log_unwind_info_sections(&executable_path_open) { + warn!("log_unwind_info_sections failed with {}", e); } - return; } - }; + return; + } + }; span.exit(); let bucket = @@ -1536,7 +1563,7 @@ impl Profiler { let object_file = match ObjectFile::new(&PathBuf::from(abs_path.clone())) { Ok(f) => f, Err(e) => { - warn!("object_file {} failed with {:?}", abs_path, e); + warn!("object_file {} failed with {}", abs_path, e); // Rather than returning here, we prefer to be able to profile some // parts of the binary continue; diff --git a/src/unwind_info/manager.rs b/src/unwind_info/manager.rs new file mode 100644 index 0000000..235f362 --- /dev/null +++ b/src/unwind_info/manager.rs @@ -0,0 +1,239 @@ +use lightswitch_object::ExecutableId; +use std::collections::BinaryHeap; +use std::fs; +use std::io::BufWriter; +use std::io::Read; +use std::path::Path; +use std::path::PathBuf; +use std::time::Instant; +use std::{fs::File, io::BufReader}; + +use tracing::debug; + +use super::persist::{Reader, Writer}; +use crate::unwind_info::types::CompactUnwindRow; + +const DEFAULT_MAX_CACHED_FILES: usize = 1_000; + +#[derive(Debug, PartialEq, Eq)] +struct Usage { + executable_id: ExecutableId, + instant: Instant, +} + +// `BinaryHeap::pop()` returns the biggest element, so reverse it +// to get the smallest one AKA oldest for both `PartialOrd` and `Ord`. +impl PartialOrd for Usage { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Usage { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + other.instant.cmp(&self.instant) + } +} + +/// Provides unwind information with caching on the file system, expiring +/// older files if there are more than `max_cached_files`. +pub struct UnwindInfoManager { + cache_dir: PathBuf, + usage_tracking: BinaryHeap, + max_cached_files: usize, +} + +impl UnwindInfoManager { + pub fn new(cache_dir: &Path, max_cached_files: Option) -> Self { + let max_cached_files = max_cached_files.unwrap_or(DEFAULT_MAX_CACHED_FILES); + debug!( + "Storing unwind information cache in {}", + cache_dir.display() + ); + let mut manager = UnwindInfoManager { + cache_dir: cache_dir.to_path_buf(), + usage_tracking: BinaryHeap::with_capacity(max_cached_files), + max_cached_files, + }; + let _ = manager.bump_already_present(); + manager + } + + pub fn fetch_unwind_info( + &mut self, + executable_path: &Path, + executable_id: ExecutableId, + ) -> anyhow::Result> { + match self.read_from_cache(executable_id) { + Ok(unwind_info) => Ok(unwind_info), + Err(e) => { + debug!( + "error fetch_unwind_info: {:?}, unwind information will be regenerated", + e + ); + // No matter the error, regenerate the unwind information. + let unwind_info = self.write_to_cache(executable_path, executable_id); + if unwind_info.is_ok() { + self.bump(executable_id, None); + } + unwind_info + } + } + } + + fn read_from_cache( + &self, + executable_id: ExecutableId, + ) -> anyhow::Result> { + let unwind_info_path = self.path_for(executable_id); + let file = File::open(unwind_info_path)?; + + let mut buffer = BufReader::new(file); + let mut data = Vec::new(); + buffer.read_to_end(&mut data)?; + let reader = Reader::new(&data)?; + + Ok(reader.unwind_info()?) + } + + fn write_to_cache( + &self, + executable_path: &Path, + executable_id: ExecutableId, + ) -> anyhow::Result> { + let unwind_info_path = self.path_for(executable_id); + let unwind_info_writer = Writer::new(executable_path); + // `File::create()` will truncate an existing file to the size it needs. + let mut file = BufWriter::new(File::create(unwind_info_path)?); + unwind_info_writer.write(&mut file) + } + + fn path_for(&self, executable_id: ExecutableId) -> PathBuf { + self.cache_dir.join(format!("{:x}", executable_id)) + } + + pub fn bump_already_present(&mut self) -> anyhow::Result<()> { + for direntry in fs::read_dir(&self.cache_dir)?.flatten() { + let name = direntry.file_name(); + let Some(name) = name.to_str() else { continue }; + let executable_id = ExecutableId::from_str_radix(name, 16)?; + + let metadata = fs::metadata(direntry.path())?; + let modified = metadata.created()?; + + self.bump(executable_id, Some(Instant::now() - modified.elapsed()?)); + } + + Ok(()) + } + + fn bump(&mut self, executable_id: ExecutableId, instant: Option) { + let instant = instant.unwrap_or(Instant::now()); + + self.usage_tracking.push(Usage { + executable_id, + instant, + }); + + self.maybe_evict() + } + + fn maybe_evict(&mut self) { + if self.usage_tracking.len() > self.max_cached_files { + if let Some(evict) = self.usage_tracking.pop() { + let _ = fs::remove_file(self.path_for(evict.executable_id)); + } + } + } +} + +#[cfg(test)] +mod tests { + use std::{ + io::{Seek, SeekFrom, Write}, + path::PathBuf, + time::Duration, + }; + + use fs::OpenOptions; + + use super::*; + use crate::unwind_info::compact_unwind_info; + + #[test] + fn test_custom_usage_ordering() { + let now = Instant::now(); + let before = Usage { + executable_id: 0xBAD, + instant: now, + }; + let after = Usage { + executable_id: 0xFAD, + instant: now + Duration::from_secs(10), + }; + + // `BinaryHeap::pop()` returns the max element so the ordering is switched. + assert_eq!([&before, &after].iter().max().unwrap().executable_id, 0xBAD); + // Ensure that `Ord` and `PartialOrd` agree. + assert_eq!(before.cmp(&after), before.partial_cmp(&after).unwrap()); + } + + #[test] + fn test_unwind_info_manager_unwind_info() { + let unwind_info = compact_unwind_info("/proc/self/exe").unwrap(); + let tmpdir = tempfile::TempDir::new().unwrap(); + let mut manager = UnwindInfoManager::new(tmpdir.path(), None); + + // The unwind info fetched with the manager should be correct + // both when it's a cache miss and a cache hit. + for _ in 0..2 { + let manager_unwind_info = + manager.fetch_unwind_info(&PathBuf::from("/proc/self/exe"), 0xFABADA); + let manager_unwind_info = manager_unwind_info.unwrap(); + assert_eq!(unwind_info, manager_unwind_info); + } + } + + #[test] + fn test_unwind_info_manager_corrupt() { + let unwind_info = compact_unwind_info("/proc/self/exe").unwrap(); + let tmpdir = tempfile::TempDir::new().unwrap(); + let mut manager = UnwindInfoManager::new(tmpdir.path(), None); + + // Cache unwind info. + let manager_unwind_info = + manager.fetch_unwind_info(&PathBuf::from("/proc/self/exe"), 0xFABADA); + assert!(manager_unwind_info.is_ok()); + let manager_unwind_info = manager_unwind_info.unwrap(); + assert_eq!(unwind_info, manager_unwind_info); + + // Corrupt it. + let mut file = OpenOptions::new() + .write(true) + .open(tmpdir.path().join(format!("{:x}", 0xFABADA))) + .unwrap(); + file.seek(SeekFrom::End(-20)).unwrap(); + file.write_all(&[0; 20]).unwrap(); + + // Make sure the corrupted one gets replaced and things work. + let manager_unwind_info = + manager.fetch_unwind_info(&PathBuf::from("/proc/self/exe"), 0xFABADA); + let manager_unwind_info = manager_unwind_info.unwrap(); + assert_eq!(unwind_info, manager_unwind_info); + } + + #[test] + fn test_unwind_info_manager_cache_eviction() { + let tmpdir = tempfile::TempDir::new().unwrap(); + let path = tmpdir.path(); + + // Creaty dummy cache entries. + for i in 0..20 { + File::create(path.join(format!("{:x}", i))).unwrap(); + } + + assert_eq!(fs::read_dir(path).unwrap().collect::>().len(), 20); + UnwindInfoManager::new(path, Some(4)); + assert_eq!(fs::read_dir(path).unwrap().collect::>().len(), 4); + } +} diff --git a/src/unwind_info/mod.rs b/src/unwind_info/mod.rs index a7085f8..455d0b8 100644 --- a/src/unwind_info/mod.rs +++ b/src/unwind_info/mod.rs @@ -1,4 +1,5 @@ mod convert; +pub mod manager; mod optimize; pub mod pages; pub mod persist; diff --git a/src/unwind_info/persist.rs b/src/unwind_info/persist.rs index 98dec09..5f9b623 100644 --- a/src/unwind_info/persist.rs +++ b/src/unwind_info/persist.rs @@ -1,4 +1,3 @@ -#![allow(dead_code)] use plain::Plain; use ring::digest::{Context, SHA256}; use std::io::Read; @@ -40,18 +39,18 @@ unsafe impl Plain for Header {} unsafe impl Plain for CompactUnwindRow {} /// Writes compact information to a given writer. -struct Writer { +pub struct Writer { executable_path: PathBuf, } impl Writer { - fn new(executable_path: &Path) -> Self { + pub fn new(executable_path: &Path) -> Self { Writer { executable_path: executable_path.to_path_buf(), } } - fn write(self, writer: &mut W) -> anyhow::Result<()> { + pub fn write(self, writer: &mut W) -> anyhow::Result> { let unwind_info = self.read_unwind_info()?; // Write dummy header. self.write_header(writer, 0, None)?; @@ -59,7 +58,7 @@ impl Writer { // Write real header. writer.seek(SeekFrom::Start(0))?; self.write_header(writer, unwind_info.len(), Some(digest))?; - Ok(()) + Ok(unwind_info) } fn read_unwind_info(&self) -> anyhow::Result> { @@ -119,7 +118,7 @@ pub enum ReaderError { } /// Reads compact information of a bytes slice. -struct Reader<'a> { +pub struct Reader<'a> { header: Header, data: &'a [u8], }