From 0270afb56db466a3840281b2e8a937e34b80da45 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Sun, 1 Aug 2021 00:01:38 +0300 Subject: [PATCH] Allow attaching co_firstlineno to frame name Before this PR, we had 2 modes: * --function NOT passed: attach the line number from frame->f_lasti to the frame name * --function passed: do not attach any line number The main reason to avoid attaching f_lasti is to have frames aggregated by the function name only; so different "lines" of the same function are aggregated as a single frame. However, line numbers are useful as identifiers for functions, especially in large files / for common function names such as "__init__", which are likely to appear multiple times in the same file. This PR allows attaching code->co_firstlineno instead, which can serve to help in identification, while not preventing frames of the same function from being aggregated together. After this PR, we have these options: * --function, --nolineno NOT passed: same as before - attach f_lasti to the frame name * --function passed: attach co_firstlineno to the frame name * --nolineno: do not attach any line number (also, do not spend time on retrieving the line number from the remote process). Closes: #424 --- src/config.rs | 23 ++++++++++++++++++++--- src/python_spy.rs | 6 +++--- src/stack_trace.rs | 29 +++++++++++++++++------------ 3 files changed, 40 insertions(+), 18 deletions(-) diff --git a/src/config.rs b/src/config.rs index 00d4e584..f539f0f9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -50,6 +50,8 @@ pub struct Config { pub dump_locals: u64, #[doc(hidden)] pub full_filenames: bool, + #[doc(hidden)] + pub lineno: LineNo, } arg_enum!{ @@ -76,6 +78,13 @@ pub enum RecordDuration { Seconds(u64) } +#[derive(Debug, Clone, Eq, PartialEq, Copy)] +pub enum LineNo { + NoLine, + FirstLineNo, + LastInstruction +} + impl Default for Config { /// Initializes a new Config object with default parameters #[allow(dead_code)] @@ -86,7 +95,7 @@ impl Default for Config { duration: RecordDuration::Unlimited, native: false, gil_only: false, include_idle: false, include_thread_ids: false, hide_progress: false, capture_output: true, dump_json: false, dump_locals: 0, subprocesses: false, - full_filenames: false} + full_filenames: false, lineno: LineNo::LastInstruction } } } @@ -182,7 +191,10 @@ impl Config { .arg(Arg::with_name("function") .short("F") .long("function") - .help("Aggregate samples by function name instead of by line number")) + .help("Aggregate samples by function's first line number, instead of current line number")) + .arg(Arg::with_name("nolineno") + .long("nolineno") + .help("Do not show line numbers")) .arg(Arg::with_name("threads") .short("t") .long("threads") @@ -278,7 +290,7 @@ impl Config { config.python_program = matches.values_of("python_program").map(|vals| { vals.map(|v| v.to_owned()).collect() }); - config.show_line_numbers = matches.occurrences_of("function") == 0; + config.show_line_numbers = matches.occurrences_of("nolineno") == 0; config.include_idle = matches.occurrences_of("idle") > 0; config.gil_only = matches.occurrences_of("gil") > 0; config.include_thread_ids = matches.occurrences_of("threads") > 0; @@ -289,6 +301,11 @@ impl Config { config.dump_locals = matches.occurrences_of("locals"); config.subprocesses = matches.occurrences_of("subprocesses") > 0; config.full_filenames = matches.occurrences_of("full_filenames") > 0; + config.lineno = if matches.occurrences_of("nolineno") > 0 { LineNo::NoLine } else if matches.occurrences_of("function") > 0 { LineNo::FirstLineNo } else { LineNo::LastInstruction }; + if matches.occurrences_of("nolineno") > 0 && matches.occurrences_of("function") > 0 { + eprintln!("--function & --nolinenos can't be used together"); + std::process::exit(1); + } config.capture_output = config.command != "record" || matches.occurrences_of("capture") > 0; if !config.capture_output { diff --git a/src/python_spy.rs b/src/python_spy.rs index e8af5749..5c349145 100644 --- a/src/python_spy.rs +++ b/src/python_spy.rs @@ -18,7 +18,7 @@ use proc_maps::{get_process_maps, MapRange}; use crate::binary_parser::{parse_binary, BinaryInfo}; -use crate::config::{Config, LockingStrategy}; +use crate::config::{Config, LockingStrategy, LineNo}; #[cfg(unwind)] use crate::native_stack_trace::NativeStack; use crate::python_bindings::{pyruntime, v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5}; @@ -223,7 +223,7 @@ impl PythonSpy { while !threads.is_null() { // Get the stack trace of the python thread let thread = self.process.copy_pointer(threads).context("Failed to copy PyThreadState")?; - let mut trace = get_stack_trace(&thread, &self.process, self.config.dump_locals > 0)?; + let mut trace = get_stack_trace(&thread, &self.process, self.config.dump_locals > 0, self.config.lineno)?; // Try getting the native thread id let python_thread_id = thread.thread_id(); @@ -678,7 +678,7 @@ fn check_interpreter_addresses(addrs: &[usize], }; // as a final sanity check, try getting the stack_traces, and only return if this works - if thread.interp() as usize == addr && get_stack_traces(&interp, process).is_ok() { + if thread.interp() as usize == addr && get_stack_traces(&interp, process, LineNo::NoLine).is_ok() { return Ok(addr); } } diff --git a/src/stack_trace.rs b/src/stack_trace.rs index 00ff7b7d..16f87fe9 100644 --- a/src/stack_trace.rs +++ b/src/stack_trace.rs @@ -8,6 +8,7 @@ use serde_derive::Serialize; use crate::python_interpreters::{InterpreterState, ThreadState, FrameObject, CodeObject, TupleObject}; use crate::python_data_access::{copy_string, copy_bytes}; +use crate::config::LineNo; /// Call stack for a single python thread #[derive(Debug, Clone, Serialize)] @@ -63,14 +64,14 @@ pub struct ProcessInfo { } /// Given an InterpreterState, this function returns a vector of stack traces for each thread -pub fn get_stack_traces(interpreter: &I, process: &Process) -> Result, Error> +pub fn get_stack_traces(interpreter: &I, process: &Process, lineno: LineNo) -> Result, Error> where I: InterpreterState { // TODO: deprecate this method let mut ret = Vec::new(); let mut threads = interpreter.head(); while !threads.is_null() { let thread = process.copy_pointer(threads).context("Failed to copy PyThreadState")?; - ret.push(get_stack_trace(&thread, process, false)?); + ret.push(get_stack_trace(&thread, process, false, lineno)?); // This seems to happen occasionally when scanning BSS addresses for valid interpeters if ret.len() > 4096 { return Err(format_err!("Max thread recursion depth reached")); @@ -81,7 +82,7 @@ pub fn get_stack_traces(interpreter: &I, process: &Process) -> Result(thread: &T, process: &Process, copy_locals: bool) -> Result +pub fn get_stack_trace(thread: &T, process: &Process, copy_locals: bool, lineno: LineNo) -> Result where T: ThreadState { // TODO: just return frames here? everything else probably should be returned out of scope let mut frames = Vec::new(); @@ -93,15 +94,19 @@ pub fn get_stack_trace(thread: &T, process: &Process, copy_locals: bool) -> R let filename = copy_string(code.filename(), process).context("Failed to copy filename")?; let name = copy_string(code.name(), process).context("Failed to copy function name")?; - let line = match get_line_number(&code, frame.lasti(), process) { - Ok(line) => line, - Err(e) => { - // Failling to get the line number really shouldn't be fatal here, but - // can happen in extreme cases (https://github.com/benfred/py-spy/issues/164) - // Rather than fail set the linenumber to 0. This is used by the native extensions - // to indicate that we can't load a line number and it should be handled gracefully - warn!("Failed to get line number from {}.{}: {}", filename, name, e); - 0 + let line = match lineno { + LineNo::NoLine => 0, + LineNo::FirstLineNo => code.first_lineno(), + LineNo::LastInstruction => match get_line_number(&code, frame.lasti(), process) { + Ok(line) => line, + Err(e) => { + // Failling to get the line number really shouldn't be fatal here, but + // can happen in extreme cases (https://github.com/benfred/py-spy/issues/164) + // Rather than fail set the linenumber to 0. This is used by the native extensions + // to indicate that we can't load a line number and it should be handled gracefully + warn!("Failed to get line number from {}.{}: {}", filename, name, e); + 0 + } } };