Skip to content

Commit

Permalink
symbolizer: Handle addresses with many symbols
Browse files Browse the repository at this point in the history
Symbolization was completely broken if inline functions were present
because the code assumed that every program counter maps to exactly
one symbol, but that's not true.

We didn't see this often because some applications we ran had no inlined
functions, but it was very apparent with Ruby and Fortran code, especially
with O2 and O3, as the compiler will start inlining at these levels more.

We didn't see this often because some applications we ran had no inlined
functions, but it was very apparent with Ruby and Fortran code.

Test Plan
=========

See PR.
  • Loading branch information
javierhonduco committed Apr 23, 2024
1 parent 05bd9fe commit ea8cfe3
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 42 deletions.
65 changes: 32 additions & 33 deletions src/profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ pub fn symbolize_profile(
objs: &HashMap<BuildId, ObjectFileInfo>,
) -> SymbolizedAggregatedProfile {
let mut r = SymbolizedAggregatedProfile::new();
let mut addresses_per_sample = HashMap::new();

let _ = fetch_symbols_for_profile(&mut addresses_per_sample, profile, procs, objs); // best effort
let addresses_per_sample = fetch_symbols_for_profile(profile, procs, objs);

let ksyms: Vec<crate::ksym::Ksym> = KsymIter::from_kallsyms().collect();

Expand All @@ -36,7 +34,7 @@ pub fn symbolize_profile(

if let Some(ustack) = sample.ustack {
symbolized_sample.ustack =
symbolize_native_stack(&mut addresses_per_sample, procs, objs, sample.pid, &ustack);
symbolize_native_stack(&addresses_per_sample, procs, objs, sample.pid, &ustack);
};

if let Some(kstack) = sample.kstack {
Expand Down Expand Up @@ -79,24 +77,18 @@ fn find_mapping(mappings: &[ExecutableMapping], addr: u64) -> Option<ExecutableM
}

fn fetch_symbols_for_profile(
addresses_per_sample: &mut HashMap<PathBuf, HashMap<u64, String>>,
profile: &RawAggregatedProfile,
procs: &HashMap<i32, ProcessInfo>,
objs: &HashMap<BuildId, ObjectFileInfo>,
) -> anyhow::Result<()> {
) -> HashMap<PathBuf, HashMap<u64, Vec<String>>> {
let mut addresses_per_sample: HashMap<PathBuf, HashMap<u64, Vec<String>>> = HashMap::new();

for sample in profile {
let Some(native_stack) = sample.ustack else {
continue;
};
let task_id = sample.pid;

// We should really continue
// Also it would be ideal not to have to query procfs again...
let p = procfs::process::Process::new(task_id)?;
let status = p.status()?;
let tgid = status.tgid;

let Some(info) = procs.get(&tgid) else {
let Some(info) = procs.get(&sample.pid) else {
continue;
};

Expand All @@ -121,7 +113,8 @@ fn fetch_symbols_for_profile(

let key = obj.path.clone();
let addrs = addresses_per_sample.entry(key).or_default();
addrs.insert(normalized_addr, "".to_string()); // <- default value is a bit janky
addrs.insert(normalized_addr, vec!["<default>".to_string()]);
// <- default value is a bit janky
}
None => {
println!("\t\t - [no build id found]");
Expand All @@ -137,21 +130,21 @@ fn fetch_symbols_for_profile(

// second pass, symbolize
for (path, addr_to_symbol_mapping) in addresses_per_sample.iter_mut() {
let addresses = addr_to_symbol_mapping.iter().map(|a| *a.0 - 1).collect();
let symbols: Vec<String> = symbolize_native_stack_blaze(addresses, path);
for (addr, symbol) in addr_to_symbol_mapping.clone().iter_mut().zip(symbols) {
addr_to_symbol_mapping.insert(*addr.0, symbol.to_string());
let addresses = addr_to_symbol_mapping.iter().map(|a| *a.0).collect();
let symbols = symbolize_native_stack_blaze(addresses, path);
for (a, symbol) in addr_to_symbol_mapping.clone().iter_mut().zip(symbols) {
addr_to_symbol_mapping.insert(*a.0, symbol);
}
}

Ok(())
addresses_per_sample
}

fn symbolize_native_stack(
addresses_per_sample: &mut HashMap<PathBuf, HashMap<u64, String>>,
addresses_per_sample: &HashMap<PathBuf, HashMap<u64, Vec<String>>>,
procs: &HashMap<i32, ProcessInfo>,
objs: &HashMap<BuildId, ObjectFileInfo>,
task_id: i32,
pid: i32,
native_stack: &native_stack_t,
) -> Vec<String> {
let mut r = Vec::new();
Expand All @@ -161,33 +154,39 @@ fn symbolize_native_stack(
break;
}

let Some(info) = procs.get(&task_id) else {
return r;
//return Err(anyhow!("process not found"));
let Some(info) = procs.get(&pid) else {
r.push("<could not find process>".to_string());
continue;
};

let Some(mapping) = find_mapping(&info.mappings, addr) else {
return r;
//return Err(anyhow!("could not find mapping"));
r.push("<could not find mapping>".to_string());
continue;
};

// finally
match &mapping.build_id {
Some(build_id) => match objs.get(build_id) {
Some(obj) => {
let failed_to_fetch_symbol =
vec!["<failed to fetch symbol for addr>".to_string()];
let failed_to_symbolize = vec!["<failed to symbolize>".to_string()];

let normalized_addr = addr - mapping.start_addr + mapping.offset
- obj.load_offset
+ obj.load_vaddr;

let func_name = match addresses_per_sample.get(&obj.path) {
let func_names = match addresses_per_sample.get(&obj.path) {
Some(value) => match value.get(&normalized_addr) {
Some(v) => v.to_string(),
None => "<failed to fetch symbol for addr>".to_string(),
Some(v) => v,
None => &failed_to_fetch_symbol,
},
None => "<failed to symbolize>".to_string(),
None => &failed_to_symbolize,
};
//println!("\t\t - {:?}", name);
r.push(func_name.to_string());

for func_name in func_names {
r.push(func_name.clone());
}
}
None => {
debug!("\t\t - [no build id found]");
Expand Down
29 changes: 20 additions & 9 deletions src/usym.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,26 @@ use blazesym::symbolize::Symbolizer;

const ADDR2LINE_BIN: &str = "/usr/bin/addr2line";

pub fn symbolize_native_stack_blaze(addrs: Vec<u64>, object_path: &PathBuf) -> Vec<String> {
pub fn symbolize_native_stack_blaze(addrs: Vec<u64>, object_path: &PathBuf) -> Vec<Vec<String>> {
let mut res = Vec::new();

let src = Source::Elf(Elf::new(object_path));
let symbolizer = Symbolizer::new();
let syms = symbolizer
.symbolize(&src, Input::VirtOffset(&addrs))
.unwrap(); // <----
let syms = match symbolizer.symbolize(&src, Input::VirtOffset(&addrs)) {
Ok(symbolized) => symbolized,
Err(e) => {
res.resize(
addrs.len(),
vec![format!("<blazesym: failed to symbolize due to {}", e)],
);
return res;
}
};

for symbol in syms {
let mut symbols = Vec::new();

for sym in syms.iter() {
match sym {
match symbol {
Symbolized::Sym(Sym {
name,
addr: _,
Expand All @@ -29,16 +38,18 @@ pub fn symbolize_native_stack_blaze(addrs: Vec<u64>, object_path: &PathBuf) -> V
inlined,
..
}) => {
res.push(name.to_string());
symbols.push(name.to_string());

for frame in inlined.iter() {
res.push(format!("{} (inlined)", frame.name));
symbols.push(format!("{} (inlined)", frame.name));
}
}
Symbolized::Unknown(r) => {
res.push(format!("<unknown {}>", r));
symbols.push(format!("<blazesym: unknown symbol due to {}>", r));
}
}

res.push(symbols);
}
res
}
Expand Down

0 comments on commit ea8cfe3

Please sign in to comment.