diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 517fe01e..878ea945 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,8 +13,16 @@ env: CARGO_TERM_COLOR: always jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + - uses: pre-commit/action@v3.0.0 + build-linux-armv7: runs-on: [self-hosted, linux, arm] + needs: [lint] steps: - name: Setup python run: | @@ -28,6 +36,7 @@ jobs: build: runs-on: ${{ matrix.os }} + needs: [lint] strategy: fail-fast: false matrix: @@ -82,6 +91,7 @@ jobs: build-linux-cross: runs-on: ubuntu-latest + needs: [lint] strategy: fail-fast: false matrix: @@ -109,6 +119,7 @@ jobs: build-freebsd: runs-on: macos-latest + needs: [lint] timeout-minutes: 30 strategy: matrix: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..e7e6d301 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,21 @@ +repos: + - repo: https://github.com/codespell-project/codespell + rev: v2.2.4 + hooks: + - id: codespell + additional_dependencies: [tomli] + args: ["--toml", "pyproject.toml"] + exclude: (?x)^(ci/testdata.*|images.*)$ + - repo: https://github.com/doublify/pre-commit-rust + rev: v1.0 + hooks: + - id: fmt + - id: cargo-check + - repo: local + hooks: + - id: cargo-clippy + name: cargo clippy + entry: cargo clippy -- -D warnings + language: system + files: \.rs$ + pass_filenames: false diff --git a/build.rs b/build.rs index 3d8e7bdd..cec961ca 100644 --- a/build.rs +++ b/build.rs @@ -8,6 +8,6 @@ fn main() { match env::var("CARGO_CFG_TARGET_OS").unwrap().as_ref() { "windows" => println!("cargo:rustc-cfg=unwind"), "linux" => println!("cargo:rustc-cfg=unwind"), - _ => { } + _ => {} } } diff --git a/ci/testdata/cython_test.pyx b/ci/testdata/cython_test.pyx index 0d226182..8f93b45e 100644 --- a/ci/testdata/cython_test.pyx +++ b/ci/testdata/cython_test.pyx @@ -5,7 +5,7 @@ from cython cimport floating cpdef sqrt(floating value): # solve for the square root of value by finding the zeros of - # 'x * x - value = 0' using newtons meethod + # 'x * x - value = 0' using newtons method cdef double x = value / 2 for _ in range(8): x -= (x * x - value) / (2 * x) diff --git a/generate_bindings.py b/generate_bindings.py index 48617318..6323a440 100644 --- a/generate_bindings.py +++ b/generate_bindings.py @@ -153,6 +153,7 @@ def extract_bindings(cpython_path, version, configure=False): o.write("#![allow(clippy::default_trait_access)]\n") o.write("#![allow(clippy::cast_lossless)]\n") o.write("#![allow(clippy::trivially_copy_pass_by_ref)]\n\n") + o.write("#![allow(clippy::upper_case_acronyms)]\n\n") o.write(open(os.path.join(cpython_path, "bindgen_output.rs")).read()) diff --git a/pyproject.toml b/pyproject.toml index d10b9d6c..10aba3db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,3 +16,7 @@ classifiers = [ [tool.maturin] bindings = "bin" + +[tool.codespell] +ignore-words-list = "crate" +skip = "./.git,./.github,./target,./ci/testdata,./images/" diff --git a/src/binary_parser.rs b/src/binary_parser.rs index 4d97a52b..a8d977a0 100644 --- a/src/binary_parser.rs +++ b/src/binary_parser.rs @@ -1,10 +1,8 @@ - use std::collections::HashMap; use std::fs::File; use std::path::Path; use anyhow::Error; -use goblin; use goblin::Object; use memmap::Mmap; @@ -15,7 +13,7 @@ pub struct BinaryInfo { pub bss_size: u64, pub offset: u64, pub addr: u64, - pub size: u64 + pub size: u64, } impl BinaryInfo { @@ -42,12 +40,18 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result mach, goblin::mach::Mach::Fat(fat) => { - let arch = fat.iter_arches().find(|arch| - match arch { + let arch = fat + .iter_arches() + .find(|arch| match arch { Ok(arch) => arch.is_64(), - Err(_) => false - } - ).ok_or_else(|| format_err!("Failed to find 64 bit arch in FAT archive in {}", filename.display()))??; + Err(_) => false, + }) + .ok_or_else(|| { + format_err!( + "Failed to find 64 bit arch in FAT archive in {}", + filename.display() + ) + })??; let bytes = &buffer[arch.offset as usize..][..arch.size as usize]; goblin::mach::MachO::parse(bytes, 0)? } @@ -69,31 +73,51 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result { - let bss_header = elf.section_headers + let bss_header = elf + .section_headers .iter() - .find(|ref header| header.sh_type == goblin::elf::section_header::SHT_NOBITS) - .ok_or_else(|| format_err!("Failed to find BSS section header in {}", filename.display()))?; - - let program_header = elf.program_headers + .find(|header| header.sh_type == goblin::elf::section_header::SHT_NOBITS) + .ok_or_else(|| { + format_err!( + "Failed to find BSS section header in {}", + filename.display() + ) + })?; + + let program_header = elf + .program_headers .iter() - .find(|ref header| - header.p_type == goblin::elf::program_header::PT_LOAD && - header.p_flags & goblin::elf::program_header::PF_X != 0) - .ok_or_else(|| format_err!("Failed to find executable PT_LOAD program header in {}", filename.display()))?; + .find(|header| { + header.p_type == goblin::elf::program_header::PT_LOAD + && header.p_flags & goblin::elf::program_header::PF_X != 0 + }) + .ok_or_else(|| { + format_err!( + "Failed to find executable PT_LOAD program header in {}", + filename.display() + ) + })?; // p_vaddr may be larger than the map address in case when the header has an offset and // the map address is relatively small. In this case we can default to 0. - let offset = offset.checked_sub(program_header.p_vaddr).unwrap_or(0); + let offset = offset.saturating_sub(program_header.p_vaddr); for sym in elf.syms.iter() { let name = elf.strtab[sym.st_name].to_string(); @@ -103,36 +127,49 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result { for export in pe.exports { if let Some(name) = export.name { if let Some(export_offset) = export.offset { - symbols.insert(name.to_string(), export_offset as u64 + offset as u64); + symbols.insert(name.to_string(), export_offset as u64 + offset); } } } pe.sections .iter() - .find(|ref section| section.name.starts_with(b".data")) - .ok_or_else(|| format_err!("Failed to find .data section in PE binary of {}", filename.display())) + .find(|section| section.name.starts_with(b".data")) + .ok_or_else(|| { + format_err!( + "Failed to find .data section in PE binary of {}", + filename.display() + ) + }) .map(|data_section| { let bss_addr = u64::from(data_section.virtual_address) + offset; let bss_size = u64::from(data_section.virtual_size); - BinaryInfo{filename: filename.to_owned(), symbols, bss_addr, bss_size, offset, addr, size} + BinaryInfo { + filename: filename.to_owned(), + symbols, + bss_addr, + bss_size, + offset, + addr, + size, + } }) - }, - _ => { - Err(format_err!("Unhandled binary type")) } + _ => Err(format_err!("Unhandled binary type")), } } diff --git a/src/config.rs b/src/config.rs index a9e80118..55c2151b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,4 +1,7 @@ -use clap::{ArgEnum, Arg, Command, crate_description, crate_name, crate_version, PossibleValue, value_parser}; +use clap::{ + crate_description, crate_name, crate_version, value_parser, Arg, ArgEnum, Command, + PossibleValue, +}; use remoteprocess::Pid; /// Options on how to collect samples from a python process @@ -63,7 +66,7 @@ pub struct Config { pub enum FileFormat { flamegraph, raw, - speedscope + speedscope, } impl FileFormat { @@ -87,41 +90,55 @@ impl std::str::FromStr for FileFormat { } } - - #[derive(Debug, Clone, Eq, PartialEq)] pub enum LockingStrategy { NonBlocking, #[allow(dead_code)] AlreadyLocked, - Lock + Lock, } #[derive(Debug, Clone, Eq, PartialEq)] pub enum RecordDuration { Unlimited, - Seconds(u64) + Seconds(u64), } #[derive(Debug, Clone, Eq, PartialEq, Copy)] pub enum LineNo { NoLine, - FirstLineNo, - LastInstruction + First, + LastInstruction, } impl Default for Config { /// Initializes a new Config object with default parameters #[allow(dead_code)] fn default() -> Config { - Config{pid: None, python_program: None, filename: None, format: None, - command: String::from("top"), - blocking: LockingStrategy::Lock, show_line_numbers: false, sampling_rate: 100, - duration: RecordDuration::Unlimited, native: false, - gil_only: false, include_idle: false, include_thread_ids: false, - hide_progress: false, capture_output: true, dump_json: false, dump_locals: 0, subprocesses: false, - full_filenames: false, lineno: LineNo::LastInstruction, - refresh_seconds: 1.0, core_filename: None } + Config { + pid: None, + python_program: None, + filename: None, + format: None, + command: String::from("top"), + blocking: LockingStrategy::Lock, + show_line_numbers: false, + sampling_rate: 100, + duration: RecordDuration::Unlimited, + native: false, + gil_only: false, + include_idle: false, + include_thread_ids: false, + hide_progress: false, + capture_output: true, + dump_json: false, + dump_locals: 0, + subprocesses: false, + full_filenames: false, + lineno: LineNo::LastInstruction, + refresh_seconds: 1.0, + core_filename: None, + } } } @@ -129,24 +146,24 @@ impl Config { /// Uses clap to set config options from commandline arguments pub fn from_commandline() -> Config { let args: Vec = std::env::args().collect(); - Config::from_args(&args).unwrap_or_else( |e| e.exit() ) + Config::from_args(&args).unwrap_or_else(|e| e.exit()) } pub fn from_args(args: &[String]) -> clap::Result { // pid/native/nonblocking/rate/python_program/subprocesses/full_filenames arguments can be // used across various subcommand - define once here let pid = Arg::new("pid") - .short('p') - .long("pid") - .value_name("pid") - .help("PID of a running python program to spy on") - .takes_value(true); + .short('p') + .long("pid") + .value_name("pid") + .help("PID of a running python program to spy on") + .takes_value(true); #[cfg(unwind)] let native = Arg::new("native") - .short('n') - .long("native") - .help("Collect stack traces from native extensions written in Cython, C or C++"); + .short('n') + .long("native") + .help("Collect stack traces from native extensions written in Cython, C or C++"); #[cfg(not(target_os="freebsd"))] let nonblocking = Arg::new("nonblocking") @@ -155,94 +172,107 @@ impl Config { the performance impact of sampling, but may lead to inaccurate results"); let rate = Arg::new("rate") - .short('r') - .long("rate") - .value_name("rate") - .help("The number of samples to collect per second") - .default_value("100") - .takes_value(true); + .short('r') + .long("rate") + .value_name("rate") + .help("The number of samples to collect per second") + .default_value("100") + .takes_value(true); let subprocesses = Arg::new("subprocesses") - .short('s') - .long("subprocesses") - .help("Profile subprocesses of the original process"); + .short('s') + .long("subprocesses") + .help("Profile subprocesses of the original process"); - let full_filenames = Arg::new("full_filenames") - .long("full-filenames") - .help("Show full Python filenames, instead of shortening to show only the package part"); + let full_filenames = Arg::new("full_filenames").long("full-filenames").help( + "Show full Python filenames, instead of shortening to show only the package part", + ); let program = Arg::new("python_program") - .help("commandline of a python program to run") - .multiple_values(true); + .help("commandline of a python program to run") + .multiple_values(true); let idle = Arg::new("idle") - .short('i') - .long("idle") - .help("Include stack traces for idle threads"); + .short('i') + .long("idle") + .help("Include stack traces for idle threads"); let gil = Arg::new("gil") - .short('g') - .long("gil") - .help("Only include traces that are holding on to the GIL"); + .short('g') + .long("gil") + .help("Only include traces that are holding on to the GIL"); let top_delay = Arg::new("delay") - .long("delay") - .value_name("seconds") - .help("Delay between 'top' refreshes.") - .default_value("1.0") - .value_parser(clap::value_parser!(f64)) - .takes_value(true); + .long("delay") + .value_name("seconds") + .help("Delay between 'top' refreshes.") + .default_value("1.0") + .value_parser(clap::value_parser!(f64)) + .takes_value(true); let record = Command::new("record") .about("Records stack trace information to a flamegraph, speedscope or raw file") .arg(program.clone()) .arg(pid.clone().required_unless_present("python_program")) .arg(full_filenames.clone()) - .arg(Arg::new("output") - .short('o') - .long("output") - .value_name("filename") - .help("Output filename") - .takes_value(true) - .required(false)) - .arg(Arg::new("format") - .short('f') - .long("format") - .value_name("format") - .help("Output file format") - .takes_value(true) - .possible_values(FileFormat::possible_values()) - .ignore_case(true) - .default_value("flamegraph")) - .arg(Arg::new("duration") - .short('d') - .long("duration") - .value_name("duration") - .help("The number of seconds to sample for") - .default_value("unlimited") - .takes_value(true)) + .arg( + Arg::new("output") + .short('o') + .long("output") + .value_name("filename") + .help("Output filename") + .takes_value(true) + .required(false), + ) + .arg( + Arg::new("format") + .short('f') + .long("format") + .value_name("format") + .help("Output file format") + .takes_value(true) + .possible_values(FileFormat::possible_values()) + .ignore_case(true) + .default_value("flamegraph"), + ) + .arg( + Arg::new("duration") + .short('d') + .long("duration") + .value_name("duration") + .help("The number of seconds to sample for") + .default_value("unlimited") + .takes_value(true), + ) .arg(rate.clone()) .arg(subprocesses.clone()) - .arg(Arg::new("function") - .short('F') - .long("function") - .help("Aggregate samples by function's first line number, instead of current line number")) - .arg(Arg::new("nolineno") - .long("nolineno") - .help("Do not show line numbers")) - .arg(Arg::new("threads") - .short('t') - .long("threads") - .help("Show thread ids in the output")) + .arg(Arg::new("function").short('F').long("function").help( + "Aggregate samples by function's first line number, instead of current line number", + )) + .arg( + Arg::new("nolineno") + .long("nolineno") + .help("Do not show line numbers"), + ) + .arg( + Arg::new("threads") + .short('t') + .long("threads") + .help("Show thread ids in the output"), + ) .arg(gil.clone()) .arg(idle.clone()) - .arg(Arg::new("capture") - .long("capture") - .hide(true) - .help("Captures output from child process")) - .arg(Arg::new("hideprogress") - .long("hideprogress") - .hide(true) - .help("Hides progress bar (useful for showing error output on record)")); + .arg( + Arg::new("capture") + .long("capture") + .hide(true) + .help("Captures output from child process"), + ) + .arg( + Arg::new("hideprogress") + .long("hideprogress") + .hide(true) + .help("Hides progress bar (useful for showing error output on record)"), + ); let top = Command::new("top") .about("Displays a top like view of functions consuming CPU") @@ -255,23 +285,25 @@ impl Config { .arg(idle.clone()) .arg(top_delay.clone()); - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] let dump_pid = pid.clone().required_unless_present("core"); - #[cfg(not(target_os="linux"))] + #[cfg(not(target_os = "linux"))] let dump_pid = pid.clone().required(true); let dump = Command::new("dump") .about("Dumps stack traces for a target program to stdout") .arg(dump_pid); - #[cfg(target_os="linux")] - let dump = dump.arg(Arg::new("core") - .short('c') - .long("core") - .help("Filename of coredump to display python stack traces from") - .value_name("core") - .takes_value(true)); + #[cfg(target_os = "linux")] + let dump = dump.arg( + Arg::new("core") + .short('c') + .long("core") + .help("Filename of coredump to display python stack traces from") + .value_name("core") + .takes_value(true), + ); let dump = dump.arg(full_filenames.clone()) .arg(Arg::new("locals") @@ -288,9 +320,11 @@ impl Config { let completions = Command::new("completions") .about("Generate shell completions") .hide(true) - .arg(Arg::new("shell") - .value_parser(value_parser!(clap_complete::Shell)) - .help("Shell type")); + .arg( + Arg::new("shell") + .value_parser(value_parser!(clap_complete::Shell)) + .help("Shell type"), + ); // add native unwinding if appropriate #[cfg(unwind)] @@ -301,11 +335,11 @@ impl Config { let dump = dump.arg(native.clone()); // Nonblocking isn't an option for freebsd, remove - #[cfg(not(target_os="freebsd"))] + #[cfg(not(target_os = "freebsd"))] let record = record.arg(nonblocking.clone()); - #[cfg(not(target_os="freebsd"))] + #[cfg(not(target_os = "freebsd"))] let top = top.arg(nonblocking.clone()); - #[cfg(not(target_os="freebsd"))] + #[cfg(not(target_os = "freebsd"))] let dump = dump.arg(nonblocking.clone()); let mut app = Command::new(crate_name!()) @@ -331,32 +365,41 @@ impl Config { config.sampling_rate = matches.value_of_t("rate")?; config.duration = match matches.value_of("duration") { Some("unlimited") | None => RecordDuration::Unlimited, - Some(seconds) => RecordDuration::Seconds(seconds.parse().expect("invalid duration")) + Some(seconds) => { + RecordDuration::Seconds(seconds.parse().expect("invalid duration")) + } }; config.format = Some(matches.value_of_t("format")?); config.filename = matches.value_of("output").map(|f| f.to_owned()); config.show_line_numbers = matches.occurrences_of("nolineno") == 0; - config.lineno = if matches.occurrences_of("nolineno") > 0 { LineNo::NoLine } else if matches.occurrences_of("function") > 0 { LineNo::FirstLineNo } else { LineNo::LastInstruction }; + config.lineno = if matches.occurrences_of("nolineno") > 0 { + LineNo::NoLine + } else if matches.occurrences_of("function") > 0 { + LineNo::First + } else { + LineNo::LastInstruction + }; config.include_thread_ids = matches.occurrences_of("threads") > 0; - if matches.occurrences_of("nolineno") > 0 && matches.occurrences_of("function") > 0 { + if matches.occurrences_of("nolineno") > 0 && matches.occurrences_of("function") > 0 + { eprintln!("--function & --nolinenos can't be used together"); std::process::exit(1); } config.hide_progress = matches.occurrences_of("hideprogress") > 0; - }, + } "top" => { config.sampling_rate = matches.value_of_t("rate")?; config.refresh_seconds = *matches.get_one::("delay").unwrap(); - }, + } "dump" => { config.dump_json = matches.occurrences_of("json") > 0; config.dump_locals = matches.occurrences_of("locals"); - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] { - config.core_filename = matches.value_of("core").map(|f| f.to_owned()); + config.core_filename = matches.value_of("core").map(|f| f.to_owned()); } - }, + } "completions" => { let shell = matches.get_one::("shell").unwrap(); let app_name = app.get_name().to_string(); @@ -368,12 +411,12 @@ impl Config { match subcommand { "record" | "top" => { - config.python_program = matches.values_of("python_program").map(|vals| { - vals.map(|v| v.to_owned()).collect() - }); + config.python_program = matches + .values_of("python_program") + .map(|vals| vals.map(|v| v.to_owned()).collect()); config.gil_only = matches.occurrences_of("gil") > 0; config.include_idle = matches.occurrences_of("idle") > 0; - }, + } _ => {} } @@ -381,7 +424,9 @@ impl Config { config.command = subcommand.to_owned(); // options that can be shared between subcommands - config.pid = matches.value_of("pid").map(|p| p.parse().expect("invalid pid")); + config.pid = matches + .value_of("pid") + .map(|p| p.parse().expect("invalid pid")); config.full_filenames = matches.occurrences_of("full_filenames") > 0; if cfg!(unwind) { config.native = matches.occurrences_of("native") > 0; @@ -394,7 +439,7 @@ impl Config { if matches.occurrences_of("nonblocking") > 0 { // disable native profiling if invalidly asked for - if config.native { + if config.native { eprintln!("Can't get native stack traces with the --nonblocking option."); std::process::exit(1); } @@ -406,22 +451,26 @@ impl Config { if config.native && config.subprocesses { // the native extension profiling code relies on dbghelp library, which doesn't // seem to work when connecting to multiple processes. disallow - eprintln!("Can't get native stack traces with the ---subprocesses option on windows."); + eprintln!( + "Can't get native stack traces with the ---subprocesses option on windows." + ); std::process::exit(1); } } - #[cfg(target_os="freebsd")] + #[cfg(target_os = "freebsd")] { - if config.pid.is_some() { - if std::env::var("PYSPY_ALLOW_FREEBSD_ATTACH").is_err() { + if config.pid.is_some() { + if std::env::var("PYSPY_ALLOW_FREEBSD_ATTACH").is_err() { eprintln!("On FreeBSD, running py-spy can cause an exception in the profiled process if the process \ is calling 'socket.connect'."); eprintln!("While this is fixed in recent versions of python, you need to acknowledge the risk here by \ setting an environment variable PYSPY_ALLOW_FREEBSD_ATTACH to run this command."); - eprintln!("\nSee https://github.com/benfred/py-spy/issues/147 for more information"); + eprintln!( + "\nSee https://github.com/benfred/py-spy/issues/147 for more information" + ); std::process::exit(-1); - } + } } } Ok(config) @@ -432,7 +481,7 @@ impl Config { mod tests { use super::*; fn get_config(cmd: &str) -> clap::Result { - #[cfg(target_os="freebsd")] + #[cfg(target_os = "freebsd")] std::env::set_var("PYSPY_ALLOW_FREEBSD_ATTACH", "1"); let args: Vec = cmd.split_whitespace().map(|x| x.to_owned()).collect(); Config::from_args(&args) @@ -452,17 +501,26 @@ mod tests { assert_eq!(config, short_config); // missing the --pid argument should fail - assert_eq!(get_config("py-spy record -o foo").unwrap_err().kind, - clap::ErrorKind::MissingRequiredArgument); + assert_eq!( + get_config("py-spy record -o foo").unwrap_err().kind, + clap::ErrorKind::MissingRequiredArgument + ); // but should work when passed a python program let program_config = get_config("py-spy r -o foo -- python test.py").unwrap(); - assert_eq!(program_config.python_program, Some(vec![String::from("python"), String::from("test.py")])); + assert_eq!( + program_config.python_program, + Some(vec![String::from("python"), String::from("test.py")]) + ); assert_eq!(program_config.pid, None); // passing an invalid file format should fail - assert_eq!(get_config("py-spy r -p 1234 -o foo -f unknown").unwrap_err().kind, - clap::ErrorKind::InvalidValue); + assert_eq!( + get_config("py-spy r -p 1234 -o foo -f unknown") + .unwrap_err() + .kind, + clap::ErrorKind::InvalidValue + ); // test out overriding these params by setting flags assert_eq!(config.include_idle, false); @@ -487,8 +545,10 @@ mod tests { assert_eq!(config, short_config); // missing the --pid argument should fail - assert_eq!(get_config("py-spy dump").unwrap_err().kind, - clap::ErrorKind::MissingRequiredArgument); + assert_eq!( + get_config("py-spy dump").unwrap_err().kind, + clap::ErrorKind::MissingRequiredArgument + ); } #[test] @@ -505,7 +565,9 @@ mod tests { #[test] fn test_parse_args() { - assert_eq!(get_config("py-spy dude").unwrap_err().kind, - clap::ErrorKind::UnrecognizedSubcommand); + assert_eq!( + get_config("py-spy dude").unwrap_err().kind, + clap::ErrorKind::UnrecognizedSubcommand + ); } } diff --git a/src/console_viewer.rs b/src/console_viewer.rs index 906d154e..ed49c8b3 100644 --- a/src/console_viewer.rs +++ b/src/console_viewer.rs @@ -1,16 +1,15 @@ -use std; use std::collections::HashMap; -use std::vec::Vec; use std::io; use std::io::{Read, Write}; -use std::sync::{Mutex, Arc, atomic}; +use std::sync::{atomic, Arc, Mutex}; use std::thread; +use std::vec::Vec; use anyhow::Error; -use console::{Term, style}; +use console::{style, Term}; use crate::config::Config; -use crate::stack_trace::{StackTrace, Frame}; +use crate::stack_trace::{Frame, StackTrace}; use crate::version::Version; pub struct ConsoleViewer { @@ -23,14 +22,16 @@ pub struct ConsoleViewer { options: Arc>, stats: Stats, subprocesses: bool, - config: Config + config: Config, } impl ConsoleViewer { - pub fn new(show_linenumbers: bool, - python_command: &str, - version: &Option, - config: &Config) -> io::Result { + pub fn new( + show_linenumbers: bool, + python_command: &str, + version: &Option, + config: &Config, + ) -> io::Result { let sampling_rate = 1.0 / (config.sampling_rate as f64); let running = Arc::new(atomic::AtomicBool::new(true)); let options = Arc::new(Mutex::new(Options::new(show_linenumbers))); @@ -55,7 +56,7 @@ impl ConsoleViewer { '2' => options.sort_column = 2, '3' => options.sort_column = 3, '4' => options.sort_column = 4, - _ => {}, + _ => {} } options.reset_style = previous_usage != options.usage; @@ -63,13 +64,17 @@ impl ConsoleViewer { } }); - Ok(ConsoleViewer{console_config: os_impl::ConsoleConfig::new()?, - version: version.clone(), - command: python_command.to_owned(), - running, options, sampling_rate, - subprocesses: config.subprocesses, - stats: Stats::new(), - config: config.clone()}) + Ok(ConsoleViewer { + console_config: os_impl::ConsoleConfig::new()?, + version: version.clone(), + command: python_command.to_owned(), + running, + options, + sampling_rate, + subprocesses: config.subprocesses, + stats: Stats::new(), + config: config.clone(), + }) } pub fn increment(&mut self, traces: &[StackTrace]) -> Result<(), Error> { @@ -101,7 +106,10 @@ impl ConsoleViewer { } update_function_statistics(&mut self.stats.line_counts, trace, |frame| { - let filename = match &frame.short_filename { Some(f) => &f, None => &frame.filename }; + let filename = match &frame.short_filename { + Some(f) => f, + None => &frame.filename, + }; if frame.line != 0 { format!("{} ({}:{})", frame.name, filename, frame.line) } else { @@ -110,7 +118,10 @@ impl ConsoleViewer { }); update_function_statistics(&mut self.stats.function_counts, trace, |frame| { - let filename = match &frame.short_filename { Some(f) => &f, None => &frame.filename }; + let filename = match &frame.short_filename { + Some(f) => f, + None => &frame.filename, + }; format!("{} ({})", frame.name, filename) }); } @@ -122,8 +133,13 @@ impl ConsoleViewer { // Get the top aggregate function calls (either by line or by function as ) let mut options = self.options.lock().unwrap(); options.dirty = false; - let counts = if options.show_linenumbers { &self.stats.line_counts } else { &self.stats.function_counts }; - let mut counts:Vec<(&FunctionStatistics, &str)> = counts.iter().map(|(x,y)| (y, x.as_ref())).collect(); + let counts = if options.show_linenumbers { + &self.stats.line_counts + } else { + &self.stats.function_counts + }; + let mut counts: Vec<(&FunctionStatistics, &str)> = + counts.iter().map(|(x, y)| (y, x.as_ref())).collect(); // TODO: subsort ? match options.sort_column { @@ -131,7 +147,7 @@ impl ConsoleViewer { 2 => counts.sort_unstable_by(|a, b| b.0.current_total.cmp(&a.0.current_total)), 3 => counts.sort_unstable_by(|a, b| b.0.overall_own.cmp(&a.0.overall_own)), 4 => counts.sort_unstable_by(|a, b| b.0.overall_total.cmp(&a.0.overall_total)), - _ => panic!("unknown sort column. this really shouldn't happen") + _ => panic!("unknown sort column. this really shouldn't happen"), } let term = Term::stdout(); let (height, width) = term.size(); @@ -164,23 +180,33 @@ impl ConsoleViewer { } if self.subprocesses { - out!("Collecting samples from '{}' and subprocesses", style(&self.command).green()); + out!( + "Collecting samples from '{}' and subprocesses", + style(&self.command).green() + ); } else { - out!("Collecting samples from '{}' (python v{})", style(&self.command).green(), self.version.as_ref().unwrap()); + out!( + "Collecting samples from '{}' (python v{})", + style(&self.command).green(), + self.version.as_ref().unwrap() + ); } let error_rate = self.stats.errors as f64 / self.stats.overall_samples as f64; if error_rate >= 0.01 && self.stats.overall_samples > 100 { let error_string = self.stats.last_error.as_ref().unwrap(); - out!("Total Samples {}, Error Rate {:.2}% ({})", - style(self.stats.overall_samples).bold(), - style(error_rate * 100.0).bold().red(), - style(error_string).bold()); + out!( + "Total Samples {}, Error Rate {:.2}% ({})", + style(self.stats.overall_samples).bold(), + style(error_rate * 100.0).bold().red(), + style(error_string).bold() + ); } else { - out!("Total Samples {}", style(self.stats.overall_samples).bold()); + out!("Total Samples {}", style(self.stats.overall_samples).bold()); } - out!("GIL: {:.2}%, Active: {:>.2}%, Threads: {}{}", + out!( + "GIL: {:.2}%, Active: {:>.2}%, Threads: {}{}", style(100.0 * self.stats.gil as f64 / self.stats.current_samples as f64).bold(), style(100.0 * self.stats.active as f64 / self.stats.current_samples as f64).bold(), style(self.stats.threads).bold(), @@ -188,7 +214,8 @@ impl ConsoleViewer { format!(", Processes {}", style(self.stats.processes).bold()) } else { "".to_owned() - }); + } + ); out!(); @@ -213,51 +240,91 @@ impl ConsoleViewer { // If we aren't at least 50 characters wide, lets use two lines per entry // Otherwise, truncate the filename so that it doesn't wrap around to the next line - let header_lines = if width > 50 { header_lines } else { header_lines + height as usize / 2 }; - let max_function_width = if width > 50 { width as usize - 35 } else { width as usize }; - - out!("{:>7}{:>8}{:>9}{:>11}{:width$}", percent_own_header, percent_total_header, - time_own_header, time_total_header, function_header, width=max_function_width); + let header_lines = if width > 50 { + header_lines + } else { + header_lines + height as usize / 2 + }; + let max_function_width = if width > 50 { width - 35 } else { width }; + + out!( + "{:>7}{:>8}{:>9}{:>11}{:width$}", + percent_own_header, + percent_total_header, + time_own_header, + time_total_header, + function_header, + width = max_function_width + ); let mut written = 0; for (samples, label) in counts.iter().take(height as usize - header_lines) { - out!("{:>6.2}% {:>6.2}% {:>7}s {:>8}s {:.width$}", + out!( + "{:>6.2}% {:>6.2}% {:>7}s {:>8}s {:.width$}", 100.0 * samples.current_own as f64 / (self.stats.current_samples as f64), 100.0 * samples.current_total as f64 / (self.stats.current_samples as f64), display_time(samples.overall_own as f64 * self.sampling_rate), display_time(samples.overall_total as f64 * self.sampling_rate), - label, width=max_function_width - 2); - written += 1; + label, + width = max_function_width - 2 + ); + written += 1; } - for _ in written.. height as usize - header_lines { + for _ in written..height as usize - header_lines { out!(); } out!(); if options.usage { - out!("{:width$}", style(" Keyboard Shortcuts ").reverse(), width=width as usize); + out!( + "{:width$}", + style(" Keyboard Shortcuts ").reverse(), + width = width + ); out!(); out!("{:^12}{:<}", style("key").green(), style("action").green()); - out!("{:^12}{:<}", "1", "Sort by %Own (% of time currently spent in the function)"); - out!("{:^12}{:<}", "2", "Sort by %Total (% of time currently in the function and its children)"); - out!("{:^12}{:<}", "3", "Sort by OwnTime (Overall time spent in the function)"); - out!("{:^12}{:<}", "4", "Sort by TotalTime (Overall time spent in the function and its children)"); - out!("{:^12}{:<}", "L,l", "Toggle between aggregating by line number or by function"); + out!( + "{:^12}{:<}", + "1", + "Sort by %Own (% of time currently spent in the function)" + ); + out!( + "{:^12}{:<}", + "2", + "Sort by %Total (% of time currently in the function and its children)" + ); + out!( + "{:^12}{:<}", + "3", + "Sort by OwnTime (Overall time spent in the function)" + ); + out!( + "{:^12}{:<}", + "4", + "Sort by TotalTime (Overall time spent in the function and its children)" + ); + out!( + "{:^12}{:<}", + "L,l", + "Toggle between aggregating by line number or by function" + ); out!("{:^12}{:<}", "R,r", "Reset statistics"); out!("{:^12}{:<}", "X,x", "Exit this help screen"); out!(); //println!("{:^12}{:<}", "Control-C", "Quit py-spy"); } else { - out!("Press {} to quit, or {} for help.", - style("Control-C").bold().reverse(), - style("?").bold().reverse()); + out!( + "Press {} to quit, or {} for help.", + style("Control-C").bold().reverse(), + style("?").bold().reverse() + ); } std::io::stdout().flush()?; Ok(()) } - pub fn increment_error(&mut self, err: &Error) -> Result<(), Error> { + pub fn increment_error(&mut self, err: &Error) -> Result<(), Error> { self.maybe_reset(); self.stats.errors += 1; self.stats.last_error = Some(format!("{}", err)); @@ -273,8 +340,10 @@ impl ConsoleViewer { // update faster if we only have a few samples, or if we changed options match self.stats.overall_samples { 10 | 100 | 500 => true, - _ => self.options.lock().unwrap().dirty || - self.stats.elapsed >= self.config.refresh_seconds + _ => { + self.options.lock().unwrap().dirty + || self.stats.elapsed >= self.config.refresh_seconds + } } } @@ -311,11 +380,16 @@ struct FunctionStatistics { current_own: u64, current_total: u64, overall_own: u64, - overall_total: u64 + overall_total: u64, } -fn update_function_statistics(counts: &mut HashMap, trace: &StackTrace, key_func: K) - where K: Fn(&Frame) -> String { +fn update_function_statistics( + counts: &mut HashMap, + trace: &StackTrace, + key_func: K, +) where + K: Fn(&Frame) -> String, +{ // we need to deduplicate (so we don't overcount cumulative stats with recursive function calls) let mut current = HashMap::new(); for (i, frame) in trace.frames.iter().enumerate() { @@ -324,8 +398,12 @@ fn update_function_statistics(counts: &mut HashMap Options { - Options{dirty: false, usage: false, reset: false, sort_column: 3, show_linenumbers, reset_style: false} + Options { + dirty: false, + usage: false, + reset: false, + sort_column: 3, + show_linenumbers, + reset_style: false, + } } } impl Stats { fn new() -> Stats { - Stats{current_samples: 0, overall_samples: 0, elapsed: 0., - errors: 0, late_samples: 0, threads: 0, processes: 0, gil: 0, active: 0, - line_counts: HashMap::new(), function_counts: HashMap::new(), - last_error: None, last_delay: None} + Stats { + current_samples: 0, + overall_samples: 0, + elapsed: 0., + errors: 0, + late_samples: 0, + threads: 0, + processes: 0, + gil: 0, + active: 0, + line_counts: HashMap::new(), + function_counts: HashMap::new(), + last_error: None, + last_delay: None, + } } pub fn reset_current(&mut self) { @@ -421,11 +517,11 @@ for doing this: #[cfg(unix)] mod os_impl { use super::*; - use termios::{Termios, TCSANOW, ECHO, ICANON, tcsetattr}; + use termios::{tcsetattr, Termios, ECHO, ICANON, TCSANOW}; pub struct ConsoleConfig { termios: Termios, - stdin: i32 + stdin: i32, } impl ConsoleConfig { @@ -445,7 +541,7 @@ mod os_impl { println!(); } - Ok(ConsoleConfig{termios, stdin}) + Ok(ConsoleConfig { termios, stdin }) } pub fn reset_cursor(&self) -> io::Result<()> { @@ -466,19 +562,21 @@ mod os_impl { #[cfg(windows)] mod os_impl { use super::*; - use winapi::shared::minwindef::{DWORD}; - use winapi::um::winnt::{HANDLE}; - use winapi::um::winbase::{STD_INPUT_HANDLE, STD_OUTPUT_HANDLE}; - use winapi::um::processenv::GetStdHandle; - use winapi::um::handleapi::INVALID_HANDLE_VALUE; + use winapi::shared::minwindef::DWORD; use winapi::um::consoleapi::{GetConsoleMode, SetConsoleMode}; - use winapi::um::wincon::{ENABLE_LINE_INPUT, ENABLE_ECHO_INPUT, CONSOLE_SCREEN_BUFFER_INFO, SetConsoleCursorPosition, - GetConsoleScreenBufferInfo, COORD, FillConsoleOutputAttribute}; + use winapi::um::handleapi::INVALID_HANDLE_VALUE; + use winapi::um::processenv::GetStdHandle; + use winapi::um::winbase::{STD_INPUT_HANDLE, STD_OUTPUT_HANDLE}; + use winapi::um::wincon::{ + FillConsoleOutputAttribute, GetConsoleScreenBufferInfo, SetConsoleCursorPosition, + CONSOLE_SCREEN_BUFFER_INFO, COORD, ENABLE_ECHO_INPUT, ENABLE_LINE_INPUT, + }; + use winapi::um::winnt::HANDLE; pub struct ConsoleConfig { stdin: HANDLE, mode: DWORD, - top_left: COORD + top_left: COORD, } impl ConsoleConfig { @@ -515,9 +613,17 @@ mod os_impl { // Figure out a consistent spot in the terminal buffer to write output to let mut top_left = csbi.dwCursorPosition; top_left.X = 0; - top_left.Y = if top_left.Y > height { top_left.Y - height } else { 0 }; - - Ok(ConsoleConfig{stdin, mode, top_left}) + top_left.Y = if top_left.Y > height { + top_left.Y - height + } else { + 0 + }; + + Ok(ConsoleConfig { + stdin, + mode, + top_left, + }) } } @@ -543,8 +649,17 @@ mod os_impl { } let mut written: DWORD = 0; - let console_size = ((1 + csbi.srWindow.Bottom - csbi.srWindow.Top) * (csbi.srWindow.Right - csbi.srWindow.Left)) as DWORD; - if FillConsoleOutputAttribute(stdout, csbi.wAttributes, console_size, self.top_left, &mut written) == 0 { + let console_size = ((1 + csbi.srWindow.Bottom - csbi.srWindow.Top) + * (csbi.srWindow.Right - csbi.srWindow.Left)) + as DWORD; + if FillConsoleOutputAttribute( + stdout, + csbi.wAttributes, + console_size, + self.top_left, + &mut written, + ) == 0 + { return Err(io::Error::last_os_error()); } Ok(()) @@ -554,7 +669,9 @@ mod os_impl { impl Drop for ConsoleConfig { fn drop(&mut self) { - unsafe { SetConsoleMode(self.stdin, self.mode); } + unsafe { + SetConsoleMode(self.stdin, self.mode); + } } } } diff --git a/src/coredump.rs b/src/coredump.rs index cdd2f5c8..51a66361 100644 --- a/src/coredump.rs +++ b/src/coredump.rs @@ -1,29 +1,31 @@ use std::collections::HashMap; +use std::ffi::OsStr; use std::fs::File; +use std::io::Read; +use std::os::unix::ffi::OsStrExt; use std::path::Path; use std::path::PathBuf; -use std::ffi::OsStr; -use std::os::unix::ffi::OsStrExt; -use std::io::Read; -use anyhow::{Error, Context, Result}; +use anyhow::{Context, Error, Result}; use console::style; -use goblin; -use log::{info}; -use libc; -use remoteprocess; +use log::info; use remoteprocess::ProcessMemory; -use crate::binary_parser::{BinaryInfo, parse_binary}; +use crate::binary_parser::{parse_binary, BinaryInfo}; +use crate::config::Config; use crate::dump::print_trace; -use crate::python_bindings::{v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, v3_10_0, v3_11_0}; +use crate::python_bindings::{ + v2_7_15, v3_10_0, v3_11_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, +}; use crate::python_data_access::format_variable; use crate::python_interpreters::InterpreterState; -use crate::python_process_info::{is_python_lib, ContainsAddr, PythonProcessInfo, get_python_version, get_interpreter_address, get_threadstate_address}; -use crate::stack_trace::{StackTrace, get_stack_traces}; +use crate::python_process_info::{ + get_interpreter_address, get_python_version, get_threadstate_address, is_python_lib, + ContainsAddr, PythonProcessInfo, +}; use crate::python_threading::thread_names_from_interpreter; +use crate::stack_trace::{get_stack_traces, StackTrace}; use crate::version::Version; -use crate::config::Config; #[derive(Debug, Clone)] pub struct CoreMapRange { @@ -34,17 +36,30 @@ pub struct CoreMapRange { // Defines accessors to match those in proc_maps. However, can't use the // proc_maps trait since is private impl CoreMapRange { - pub fn size(&self) -> usize { self.segment.p_memsz as usize } - pub fn start(&self) -> usize { self.segment.p_vaddr as usize } - pub fn filename(&self) -> Option<&Path> { self.pathname.as_deref() } - pub fn is_exec(&self) -> bool { self.segment.is_executable() } - pub fn is_write(&self) -> bool { self.segment.is_write() } - pub fn is_read(&self) -> bool { self.segment.is_read() } + pub fn size(&self) -> usize { + self.segment.p_memsz as usize + } + pub fn start(&self) -> usize { + self.segment.p_vaddr as usize + } + pub fn filename(&self) -> Option<&Path> { + self.pathname.as_deref() + } + pub fn is_exec(&self) -> bool { + self.segment.is_executable() + } + pub fn is_write(&self) -> bool { + self.segment.is_write() + } + pub fn is_read(&self) -> bool { + self.segment.is_read() + } } impl ContainsAddr for Vec { fn contains_addr(&self, addr: usize) -> bool { - self.iter().any(|map| (addr >= map.start()) && (addr < (map.start() + map.size()))) + self.iter() + .any(|map| (addr >= map.start()) && (addr < (map.start() + map.size()))) } } @@ -62,41 +77,40 @@ impl CoreDump { let mut file = File::open(filename)?; let mut contents = Vec::new(); file.read_to_end(&mut contents)?; - let elf = goblin::elf::Elf::parse(&contents)?; + let elf = goblin::elf::Elf::parse(&contents)?; - let notes = elf.iter_note_headers(&contents).ok_or_else(|| format_err!("no note segment found"))?; + let notes = elf + .iter_note_headers(&contents) + .ok_or_else(|| format_err!("no note segment found"))?; let mut filenames = HashMap::new(); let mut psinfo = None; let mut status = Vec::new(); - for note in notes { - if let Ok(note) = note { - if note.n_type == goblin::elf::note::NT_PRPSINFO { - psinfo = Some(unsafe { *(note.desc.as_ptr() as * const elfcore::elf_prpsinfo) }); - } - else if note.n_type == goblin::elf::note::NT_PRSTATUS { - let thread_status = unsafe { *(note.desc.as_ptr() as * const elfcore::elf_prstatus) }; - status.push(thread_status); - } - else if note.n_type == goblin::elf::note::NT_FILE { - let data = note.desc; - let ptrs = data.as_ptr() as * const usize; - - let count = unsafe { *ptrs }; - let _page_size = unsafe { *ptrs.offset(1) }; - - let string_table = &data[(std::mem::size_of::() * (2 + count * 3))..]; - - for (i, filename) in string_table.split(|chr| *chr == 0).enumerate() { - if i < count { - let i = i as isize; - let start = unsafe { *ptrs.offset(i * 3 + 2) }; - let _end = unsafe { *ptrs.offset(i * 3 + 3) }; - let _page_offset = unsafe { *ptrs.offset(i * 3 + 4) }; - - let pathname = Path::new(&OsStr::from_bytes(filename)).to_path_buf(); - filenames.insert(start, pathname); - } + for note in notes.flatten() { + if note.n_type == goblin::elf::note::NT_PRPSINFO { + psinfo = Some(unsafe { *(note.desc.as_ptr() as *const elfcore::elf_prpsinfo) }); + } else if note.n_type == goblin::elf::note::NT_PRSTATUS { + let thread_status = + unsafe { *(note.desc.as_ptr() as *const elfcore::elf_prstatus) }; + status.push(thread_status); + } else if note.n_type == goblin::elf::note::NT_FILE { + let data = note.desc; + let ptrs = data.as_ptr() as *const usize; + + let count = unsafe { *ptrs }; + let _page_size = unsafe { *ptrs.offset(1) }; + + let string_table = &data[(std::mem::size_of::() * (2 + count * 3))..]; + + for (i, filename) in string_table.split(|chr| *chr == 0).enumerate() { + if i < count { + let i = i as isize; + let start = unsafe { *ptrs.offset(i * 3 + 2) }; + let _end = unsafe { *ptrs.offset(i * 3 + 3) }; + let _page_offset = unsafe { *ptrs.offset(i * 3 + 4) }; + + let pathname = Path::new(&OsStr::from_bytes(filename)).to_path_buf(); + filenames.insert(start, pathname); } } } @@ -106,16 +120,33 @@ impl CoreDump { for ph in elf.program_headers { if ph.p_type == goblin::elf::program_header::PT_LOAD { let pathname = filenames.get(&(ph.p_vaddr as _)); - let map = CoreMapRange {pathname: pathname.cloned(), segment: ph}; - info!("map: {:016x}-{:016x} {}{}{} {}", map.start(), map.start() + map.size(), - if map.is_read() {'r'} else {'-'}, if map.is_write() {'w'} else {'-'}, if map.is_exec() {'x'} else {'-'}, - map.filename().unwrap_or(&std::path::PathBuf::from("")).display()); + let map = CoreMapRange { + pathname: pathname.cloned(), + segment: ph, + }; + info!( + "map: {:016x}-{:016x} {}{}{} {}", + map.start(), + map.start() + map.size(), + if map.is_read() { 'r' } else { '-' }, + if map.is_write() { 'w' } else { '-' }, + if map.is_exec() { 'x' } else { '-' }, + map.filename() + .unwrap_or(&std::path::PathBuf::from("")) + .display() + ); maps.push(map); } } - Ok(CoreDump{filename: filename.to_owned(), contents, maps, psinfo, status}) + Ok(CoreDump { + filename: filename.to_owned(), + contents, + maps, + psinfo, + status, + }) } } @@ -131,8 +162,8 @@ impl ProcessMemory for CoreDump { let ph = &map.segment; if start >= ph.p_vaddr && start <= (ph.p_vaddr + ph.p_memsz) { let offset = (start - ph.p_vaddr + ph.p_offset) as usize; - buf.copy_from_slice(&self.contents[offset..(offset+buf.len())]); - return Ok(()) + buf.copy_from_slice(&self.contents[offset..(offset + buf.len())]); + return Ok(()); } } @@ -155,30 +186,33 @@ impl PythonCoreDump { // Get the python binary from the maps, and parse it let (python_filename, python_binary) = { - let map = maps.iter().find(|m| m.filename().is_some() & m.is_exec()).ok_or_else(|| format_err!("Failed to get binary from coredump"))?; + let map = maps + .iter() + .find(|m| m.filename().is_some() & m.is_exec()) + .ok_or_else(|| format_err!("Failed to get binary from coredump"))?; let python_filename = map.filename().unwrap(); - let python_binary = parse_binary(python_filename, map.start() as _ , map.size() as _); + let python_binary = parse_binary(python_filename, map.start() as _, map.size() as _); info!("Found python binary @ {}", python_filename.display()); (python_filename.to_owned(), python_binary) }; // get the libpython binary (if any) from maps let libpython_binary = { - let libmap = maps.iter() - .find(|m| { - if let Some(pathname) = m.filename() { - if let Some(pathname) = pathname.to_str() { - return is_python_lib(pathname) && m.is_exec(); - } + let libmap = maps.iter().find(|m| { + if let Some(pathname) = m.filename() { + if let Some(pathname) = pathname.to_str() { + return is_python_lib(pathname) && m.is_exec(); } - false - }); + } + false + }); let mut libpython_binary: Option = None; if let Some(libpython) = libmap { if let Some(filename) = &libpython.filename() { info!("Found libpython binary @ {}", filename.display()); - let parsed = parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?; + let parsed = + parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?; libpython_binary = Some(parsed); } } @@ -191,10 +225,16 @@ impl PythonCoreDump { _ => python_binary.ok(), }; - let python_info = PythonProcessInfo{python_binary, libpython_binary, maps: Box::new(core.maps.clone()), - python_filename: python_filename, dockerized: false}; + let python_info = PythonProcessInfo { + python_binary, + libpython_binary, + maps: Box::new(core.maps.clone()), + python_filename, + dockerized: false, + }; - let version = get_python_version(&python_info, &core).context("failed to get python version")?; + let version = + get_python_version(&python_info, &core).context("failed to get python version")?; info!("Got python version {}", version); let interpreter_address = get_interpreter_address(&python_info, &core, &version)?; @@ -205,37 +245,76 @@ impl PythonCoreDump { let threadstate_address = get_threadstate_address(&python_info, &version, &config)?; info!("found threadstate at 0x{:016x}", threadstate_address); - Ok(PythonCoreDump{core, version, interpreter_address, threadstate_address}) + Ok(PythonCoreDump { + core, + version, + interpreter_address, + threadstate_address, + }) } pub fn get_stack(&self, config: &Config) -> Result, Error> { if config.native { - return Err(format_err!("Native unwinding isn't yet supported with coredumps")); + return Err(format_err!( + "Native unwinding isn't yet supported with coredumps" + )); } if config.subprocesses { - return Err(format_err!("Subprocesses can't be used for getting stacktraces from coredumps")); + return Err(format_err!( + "Subprocesses can't be used for getting stacktraces from coredumps" + )); } // different versions have different layouts, check as appropriate - Ok(match self.version { - Version{major: 2, minor: 3..=7, ..} => self._get_stack::(config), - Version{major: 3, minor: 3, ..} => self._get_stack::(config), - Version{major: 3, minor: 4..=5, ..} => self._get_stack::(config), - Version{major: 3, minor: 6, ..} => self._get_stack::(config), - Version{major: 3, minor: 7, ..} => self._get_stack::(config), - Version{major: 3, minor: 8, ..} => self._get_stack::(config), - Version{major: 3, minor: 9, ..} => self._get_stack::(config), - Version{major: 3, minor: 10, ..} => self._get_stack::(config), - Version{major: 3, minor: 11, ..} => self._get_stack::(config), - _ => Err(format_err!("Unsupported version of Python: {}", self.version)) - }?) + match self.version { + Version { + major: 2, + minor: 3..=7, + .. + } => self._get_stack::(config), + Version { + major: 3, minor: 3, .. + } => self._get_stack::(config), + Version { + major: 3, + minor: 4..=5, + .. + } => self._get_stack::(config), + Version { + major: 3, minor: 6, .. + } => self._get_stack::(config), + Version { + major: 3, minor: 7, .. + } => self._get_stack::(config), + Version { + major: 3, minor: 8, .. + } => self._get_stack::(config), + Version { + major: 3, minor: 9, .. + } => self._get_stack::(config), + Version { + major: 3, + minor: 10, + .. + } => self._get_stack::(config), + Version { + major: 3, + minor: 11, + .. + } => self._get_stack::(config), + _ => Err(format_err!( + "Unsupported version of Python: {}", + self.version + )), + } } fn _get_stack(&self, config: &Config) -> Result, Error> { let interp: I = self.core.copy_struct(self.interpreter_address)?; - let mut traces = get_stack_traces(&interp, &self.core, self.threadstate_address, Some(config))?; + let mut traces = + get_stack_traces(&interp, &self.core, self.threadstate_address, Some(config))?; let thread_names = thread_names_from_interpreter(&interp, &self.core, &self.version).ok(); for trace in &mut traces { @@ -247,8 +326,13 @@ impl PythonCoreDump { if let Some(locals) = frame.locals.as_mut() { let max_length = (128 * config.dump_locals) as isize; for local in locals { - let repr = format_variable::(&self.core, &self.version, local.addr, max_length); - local.repr = Some(repr.unwrap_or("?".to_owned())); + let repr = format_variable::( + &self.core, + &self.version, + local.addr, + max_length, + ); + local.repr = Some(repr.unwrap_or_else(|_| "?".to_owned())); } } } @@ -259,25 +343,28 @@ impl PythonCoreDump { pub fn print_traces(&self, traces: &Vec, config: &Config) -> Result<(), Error> { if config.dump_json { println!("{}", serde_json::to_string_pretty(&traces)?); - return Ok(()) + return Ok(()); } - for status in &self.core.status { - println!("Signal {}: {}", + if let Some(status) = self.core.status.first() { + println!( + "Signal {}: {}", style(status.pr_cursig).bold().yellow(), - self.core.filename.display()); - break; + self.core.filename.display() + ); } if let Some(psinfo) = self.core.psinfo { - println!("Process {}: {}", + println!( + "Process {}: {}", style(psinfo.pr_pid).bold().yellow(), - OsStr::from_bytes(&psinfo.pr_psargs).to_string_lossy()); + OsStr::from_bytes(&psinfo.pr_psargs).to_string_lossy() + ); } println!("Python v{}", style(&self.version).bold()); - println!(""); + println!(); for trace in traces.iter().rev() { - print_trace(&trace, false); + print_trace(trace, false); } Ok(()) } @@ -349,12 +436,18 @@ mod test { // so we can't (yet) figure out the interpreter address & version. // Manually specify here to test out instead let core = CoreDump::new(&get_coredump_path("python_3_9_threads")).unwrap(); - let version = Version{major: 3, minor: 9, patch: 13, release_flags: "".to_owned()}; + let version = Version { + major: 3, + minor: 9, + patch: 13, + release_flags: "".to_owned(), + }; let python_core = PythonCoreDump { core, version, interpreter_address: 0x000055a8293dbe20, - threadstate_address: 0x000055a82745fe18}; + threadstate_address: 0x000055a82745fe18, + }; let config = Config::default(); let traces = python_core.get_stack(&config).unwrap(); diff --git a/src/cython.rs b/src/cython.rs index 1a0a9814..3497f614 100644 --- a/src/cython.rs +++ b/src/cython.rs @@ -1,13 +1,11 @@ - -use std; -use std::collections::{BTreeMap, HashMap}; use regex::Regex; +use std::collections::{BTreeMap, HashMap}; use anyhow::Error; use lazy_static::lazy_static; -use crate::utils::resolve_filename; use crate::stack_trace::Frame; +use crate::utils::resolve_filename; pub struct SourceMaps { maps: HashMap>, @@ -16,7 +14,7 @@ pub struct SourceMaps { impl SourceMaps { pub fn new() -> SourceMaps { let maps = HashMap::new(); - SourceMaps{maps} + SourceMaps { maps } } pub fn translate(&mut self, frame: &mut Frame) { @@ -42,8 +40,7 @@ impl SourceMaps { } return false; } - - return true; + true } // loads the corresponding cython source map for the frame @@ -67,7 +64,7 @@ impl SourceMaps { } struct SourceMap { - lookup: BTreeMap + lookup: BTreeMap, } impl SourceMap { @@ -76,7 +73,11 @@ impl SourceMap { SourceMap::from_contents(&contents, filename, module) } - pub fn from_contents(contents: &str, cpp_filename: &str, module: &Option) -> Result { + pub fn from_contents( + contents: &str, + cpp_filename: &str, + module: &Option, + ) -> Result { lazy_static! { static ref RE: Regex = Regex::new(r#"^\s*/\* "(.+\..+)":([0-9]+)"#).unwrap(); } @@ -86,7 +87,7 @@ impl SourceMap { let mut line_count = 0; for (lineno, line) in contents.lines().enumerate() { - if let Some(captures) = RE.captures(&line) { + if let Some(captures) = RE.captures(line) { let cython_file = captures.get(1).map_or("", |m| m.as_str()); let cython_line = captures.get(2).map_or("", |m| m.as_str()); @@ -108,7 +109,7 @@ impl SourceMap { } lookup.insert(line_count + 1, ("".to_owned(), 0)); - Ok(SourceMap{lookup}) + Ok(SourceMap { lookup }) } pub fn lookup(&self, lineno: u32) -> Option<&(String, u32)> { @@ -116,25 +117,38 @@ impl SourceMap { // handle EOF Some((_, (_, 0))) => None, Some((_, val)) => Some(val), - None => None + None => None, } } } pub fn ignore_frame(name: &str) -> bool { - let ignorable = ["__Pyx_PyFunction_FastCallDict", "__Pyx_PyObject_CallOneArg", - "__Pyx_PyObject_Call", "__Pyx_PyObject_Call", "__pyx_FusedFunction_call"]; + let ignorable = [ + "__Pyx_PyFunction_FastCallDict", + "__Pyx_PyObject_CallOneArg", + "__Pyx_PyObject_Call", + "__Pyx_PyObject_Call", + "__pyx_FusedFunction_call", + ]; ignorable.iter().any(|&f| f == name) } pub fn demangle(name: &str) -> &str { // slice off any leading cython prefix. - let prefixes = ["__pyx_fuse_1_0__pyx_pw", "__pyx_fuse_0__pyx_f", "__pyx_fuse_1__pyx_f", - "__pyx_pf", "__pyx_pw", "__pyx_f", "___pyx_f", "___pyx_pw"]; + let prefixes = [ + "__pyx_fuse_1_0__pyx_pw", + "__pyx_fuse_0__pyx_f", + "__pyx_fuse_1__pyx_f", + "__pyx_pf", + "__pyx_pw", + "__pyx_f", + "___pyx_f", + "___pyx_pw", + ]; let mut current = match prefixes.iter().find(|&prefix| name.starts_with(prefix)) { Some(prefix) => &name[prefix.len()..], - None => return name + None => return name, }; let mut next = current; @@ -148,8 +162,8 @@ pub fn demangle(name: &str) -> &str { } let mut digit_index = 1; - while let Some(ch) = chars.next() { - if !ch.is_digit(10) { + for ch in chars { + if !ch.is_ascii_digit() { break; } digit_index += 1; @@ -166,8 +180,8 @@ pub fn demangle(name: &str) -> &str { break; } next = &next[digits + digit_index..]; - }, - Err(_) => { break } + } + Err(_) => break, }; } debug!("cython_demangle(\"{}\") -> \"{}\"", name, current); @@ -175,11 +189,15 @@ pub fn demangle(name: &str) -> &str { current } -fn resolve_cython_file(cpp_filename: &str, cython_filename: &str, module: &Option) -> String { +fn resolve_cython_file( + cpp_filename: &str, + cython_filename: &str, + module: &Option, +) -> String { let cython_path = std::path::PathBuf::from(cython_filename); if let Some(ext) = cython_path.extension() { let mut path_buf = std::path::PathBuf::from(cpp_filename); - path_buf.set_extension(&ext); + path_buf.set_extension(ext); if path_buf.ends_with(&cython_path) && path_buf.exists() { return path_buf.to_string_lossy().to_string(); } @@ -187,10 +205,9 @@ fn resolve_cython_file(cpp_filename: &str, cython_filename: &str, module: &Optio match module { Some(module) => { - resolve_filename(cython_filename, module) - .unwrap_or_else(|| cython_filename.to_owned()) - }, - None => cython_filename.to_owned() + resolve_filename(cython_filename, module).unwrap_or_else(|| cython_filename.to_owned()) + } + None => cython_filename.to_owned(), } } @@ -200,34 +217,58 @@ mod tests { #[test] fn test_demangle() { // all of these were wrong at certain points when writing cython_demangle =( - assert_eq!(demangle("__pyx_pf_8implicit_4_als_30_least_squares_cg"), "_least_squares_cg"); - assert_eq!(demangle("__pyx_pw_8implicit_4_als_5least_squares_cg"), "least_squares_cg"); - assert_eq!(demangle("__pyx_fuse_1_0__pyx_pw_8implicit_4_als_31_least_squares_cg"), "_least_squares_cg"); - assert_eq!(demangle("__pyx_f_6mtrand_cont0_array"), "mtrand_cont0_array"); + assert_eq!( + demangle("__pyx_pf_8implicit_4_als_30_least_squares_cg"), + "_least_squares_cg" + ); + assert_eq!( + demangle("__pyx_pw_8implicit_4_als_5least_squares_cg"), + "least_squares_cg" + ); + assert_eq!( + demangle("__pyx_fuse_1_0__pyx_pw_8implicit_4_als_31_least_squares_cg"), + "_least_squares_cg" + ); + assert_eq!( + demangle("__pyx_f_6mtrand_cont0_array"), + "mtrand_cont0_array" + ); // in both of these cases we should ideally slice off the module (_als/bpr), but it gets tricky // implementation wise - assert_eq!(demangle("__pyx_fuse_0__pyx_f_8implicit_4_als_axpy"), "_als_axpy"); - assert_eq!(demangle("__pyx_fuse_1__pyx_f_8implicit_3bpr_has_non_zero"), "bpr_has_non_zero"); + assert_eq!( + demangle("__pyx_fuse_0__pyx_f_8implicit_4_als_axpy"), + "_als_axpy" + ); + assert_eq!( + demangle("__pyx_fuse_1__pyx_f_8implicit_3bpr_has_non_zero"), + "bpr_has_non_zero" + ); } #[test] fn test_source_map() { - let map = SourceMap::from_contents(include_str!("../ci/testdata/cython_test.c"), "cython_test.c", &None).unwrap(); + let map = SourceMap::from_contents( + include_str!("../ci/testdata/cython_test.c"), + "cython_test.c", + &None, + ) + .unwrap(); // we don't have info on cython line numbers until line 1261 assert_eq!(map.lookup(1000), None); // past the end of the file should also return none assert_eq!(map.lookup(10000), None); - let lookup = |lineno: u32, cython_file: &str, cython_line: u32| { - match map.lookup(lineno) { - Some((file, line)) => { - assert_eq!(file, cython_file); - assert_eq!(line, &cython_line); - }, - None => { - panic!("Failed to lookup line {} (expected {}:{})", lineno, cython_file, cython_line); - } + let lookup = |lineno: u32, cython_file: &str, cython_line: u32| match map.lookup(lineno) { + Some((file, line)) => { + assert_eq!(file, cython_file); + assert_eq!(line, &cython_line); + } + None => { + panic!( + "Failed to lookup line {} (expected {}:{})", + lineno, cython_file, cython_line + ); } }; lookup(1298, "cython_test.pyx", 6); diff --git a/src/dump.rs b/src/dump.rs index 03d57a75..39624e6d 100644 --- a/src/dump.rs +++ b/src/dump.rs @@ -1,5 +1,5 @@ use anyhow::Error; -use console::{Term, style}; +use console::{style, Term}; use crate::config::Config; use crate::python_spy::PythonSpy; @@ -12,29 +12,39 @@ pub fn print_traces(pid: Pid, config: &Config, parent: Option) -> Result<() if config.dump_json { let traces = process.get_stack_traces()?; println!("{}", serde_json::to_string_pretty(&traces)?); - return Ok(()) + return Ok(()); } - println!("Process {}: {}", + println!( + "Process {}: {}", style(process.pid).bold().yellow(), - process.process.cmdline()?.join(" ")); + process.process.cmdline()?.join(" ") + ); - println!("Python v{} ({})", + println!( + "Python v{} ({})", style(&process.version).bold(), - style(process.process.exe()?).dim()); + style(process.process.exe()?).dim() + ); if let Some(parentpid) = parent { let parentprocess = remoteprocess::Process::new(parentpid)?; - println!("Parent Process {}: {}", + println!( + "Parent Process {}: {}", style(parentpid).bold().yellow(), - parentprocess.cmdline()?.join(" ")); + parentprocess.cmdline()?.join(" ") + ); } - println!(""); + println!(); let traces = process.get_stack_traces()?; for trace in traces.iter().rev() { print_trace(trace, true); if config.subprocesses { - for (childpid, parentpid) in process.process.child_processes().expect("failed to get subprocesses") { + for (childpid, parentpid) in process + .process + .child_processes() + .expect("failed to get subprocesses") + { let term = Term::stdout(); let (_, width) = term.size(); @@ -43,7 +53,7 @@ pub fn print_traces(pid: Pid, config: &Config, parent: Option) -> Result<() // though we could end up printing grandchild processes multiple times. Limit down // to just once if parentpid == pid { - print_traces(childpid, &config, Some(parentpid))?; + print_traces(childpid, config, Some(parentpid))?; } } } @@ -64,7 +74,12 @@ pub fn print_trace(trace: &StackTrace, include_activity: bool) { match trace.thread_name.as_ref() { Some(name) => { - println!("Thread {}{}: \"{}\"", style(thread_id).bold().yellow(), status, name); + println!( + "Thread {}{}: \"{}\"", + style(thread_id).bold().yellow(), + status, + name + ); } None => { println!("Thread {}{}", style(thread_id).bold().yellow(), status); @@ -72,11 +87,23 @@ pub fn print_trace(trace: &StackTrace, include_activity: bool) { }; for frame in &trace.frames { - let filename = match &frame.short_filename { Some(f) => &f, None => &frame.filename }; + let filename = match &frame.short_filename { + Some(f) => f, + None => &frame.filename, + }; if frame.line != 0 { - println!(" {} ({}:{})", style(&frame.name).green(), style(&filename).cyan(), style(frame.line).dim()); + println!( + " {} ({}:{})", + style(&frame.name).green(), + style(&filename).cyan(), + style(frame.line).dim() + ); } else { - println!(" {} ({})", style(&frame.name).green(), style(&filename).cyan()); + println!( + " {} ({})", + style(&frame.name).green(), + style(&filename).cyan() + ); } if let Some(locals) = &frame.locals { @@ -91,7 +118,7 @@ pub fn print_trace(trace: &StackTrace, include_activity: bool) { shown_locals = true; } - let repr = local.repr.as_ref().map(String::as_str).unwrap_or("?"); + let repr = local.repr.as_deref().unwrap_or("?"); println!(" {}: {}", local.name, repr); } } diff --git a/src/flamegraph.rs b/src/flamegraph.rs index ef33c379..795f0503 100644 --- a/src/flamegraph.rs +++ b/src/flamegraph.rs @@ -26,10 +26,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -use std::io::Write; -use std; use std::collections::HashMap; - +use std::io::Write; use anyhow::Error; use inferno::flamegraph::{Direction, Options}; @@ -43,32 +41,47 @@ pub struct Flamegraph { impl Flamegraph { pub fn new(show_linenumbers: bool) -> Flamegraph { - Flamegraph { counts: HashMap::new(), show_linenumbers } + Flamegraph { + counts: HashMap::new(), + show_linenumbers, + } } pub fn increment(&mut self, trace: &StackTrace) -> std::io::Result<()> { // convert the frame into a single ';' delimited String - let frame = trace.frames.iter().rev().map(|frame| { - let filename = match &frame.short_filename { Some(f) => &f, None => &frame.filename }; - if self.show_linenumbers && frame.line != 0 { - format!("{} ({}:{})", frame.name, filename, frame.line) - } else if filename.len() > 0 { - format!("{} ({})", frame.name, filename) - } else { - frame.name.clone() - } - }).collect::>().join(";"); + let frame = trace + .frames + .iter() + .rev() + .map(|frame| { + let filename = match &frame.short_filename { + Some(f) => f, + None => &frame.filename, + }; + if self.show_linenumbers && frame.line != 0 { + format!("{} ({}:{})", frame.name, filename, frame.line) + } else if !filename.is_empty() { + format!("{} ({})", frame.name, filename) + } else { + frame.name.clone() + } + }) + .collect::>() + .join(";"); // update counts for that frame *self.counts.entry(frame).or_insert(0) += 1; Ok(()) } fn get_lines(&self) -> Vec { - self.counts.iter().map(|(k, v)| format!("{} {}", k, v)).collect() + self.counts + .iter() + .map(|(k, v)| format!("{} {}", k, v)) + .collect() } pub fn write(&self, w: &mut dyn Write) -> Result<(), Error> { - let mut opts = Options::default(); + let mut opts = Options::default(); opts.direction = Direction::Inverted; opts.min_width = 0.1; opts.title = std::env::args().collect::>().join(" "); diff --git a/src/lib.rs b/src/lib.rs index da73a7ad..424e253e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,9 +29,9 @@ extern crate anyhow; #[macro_use] extern crate log; -pub mod config; pub mod binary_parser; -#[cfg(target_os="linux")] +pub mod config; +#[cfg(target_os = "linux")] pub mod coredump; #[cfg(unwind)] mod cython; @@ -39,10 +39,10 @@ pub mod dump; #[cfg(unwind)] mod native_stack_trace; mod python_bindings; +mod python_data_access; mod python_interpreters; -pub mod python_spy; pub mod python_process_info; -mod python_data_access; +pub mod python_spy; mod python_threading; pub mod sampler; pub mod stack_trace; @@ -50,8 +50,8 @@ pub mod timer; mod utils; mod version; -pub use python_spy::PythonSpy; pub use config::Config; -pub use stack_trace::StackTrace; -pub use stack_trace::Frame; +pub use python_spy::PythonSpy; pub use remoteprocess::Pid; +pub use stack_trace::Frame; +pub use stack_trace::StackTrace; diff --git a/src/main.rs b/src/main.rs index f355cb2e..50429e04 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,26 +3,26 @@ extern crate anyhow; #[macro_use] extern crate log; +mod binary_parser; mod config; -#[cfg(target_os="linux")] +mod console_viewer; +#[cfg(target_os = "linux")] mod coredump; -mod dump; -mod binary_parser; #[cfg(unwind)] mod cython; +mod dump; +mod flamegraph; #[cfg(unwind)] mod native_stack_trace; mod python_bindings; +mod python_data_access; mod python_interpreters; mod python_process_info; mod python_spy; -mod python_data_access; mod python_threading; -mod stack_trace; -mod console_viewer; -mod flamegraph; -mod speedscope; mod sampler; +mod speedscope; +mod stack_trace; mod timer; mod utils; mod version; @@ -32,40 +32,40 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::time::Duration; -use console::style; use anyhow::Error; +use console::style; -use stack_trace::{StackTrace, Frame}; -use console_viewer::ConsoleViewer; use config::{Config, FileFormat, RecordDuration}; +use console_viewer::ConsoleViewer; +use stack_trace::{Frame, StackTrace}; -use chrono::{SecondsFormat, Local}; +use chrono::{Local, SecondsFormat}; #[cfg(unix)] fn permission_denied(err: &Error) -> bool { err.chain().any(|cause| { if let Some(ioerror) = cause.downcast_ref::() { ioerror.kind() == std::io::ErrorKind::PermissionDenied - } else if let Some(remoteprocess::Error::IOError(ioerror)) = cause.downcast_ref::() { + } else if let Some(remoteprocess::Error::IOError(ioerror)) = + cause.downcast_ref::() + { ioerror.kind() == std::io::ErrorKind::PermissionDenied - }else { + } else { false } }) } -fn sample_console(pid: remoteprocess::Pid, - config: &Config) -> Result<(), Error> { +fn sample_console(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> { let sampler = sampler::Sampler::new(pid, config)?; let display = match remoteprocess::Process::new(pid)?.cmdline() { Ok(cmdline) => cmdline.join(" "), - Err(_) => format!("Pid {}", pid) + Err(_) => format!("Pid {}", pid), }; - let mut console = ConsoleViewer::new(config.show_line_numbers, &display, - &sampler.version, - config)?; + let mut console = + ConsoleViewer::new(config.show_line_numbers, &display, &sampler.version, config)?; for sample in sampler { if let Some(elapsed) = sample.late { console.increment_late_sample(elapsed); @@ -122,10 +122,14 @@ impl Recorder for RawFlamegraph { fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> { let mut output: Box = match config.format { - Some(FileFormat::flamegraph) => Box::new(flamegraph::Flamegraph::new(config.show_line_numbers)), - Some(FileFormat::speedscope) => Box::new(speedscope::Stats::new(config)), - Some(FileFormat::raw) => Box::new(RawFlamegraph(flamegraph::Flamegraph::new(config.show_line_numbers))), - None => return Err(format_err!("A file format is required to record samples")) + Some(FileFormat::flamegraph) => { + Box::new(flamegraph::Flamegraph::new(config.show_line_numbers)) + } + Some(FileFormat::speedscope) => Box::new(speedscope::Stats::new(config)), + Some(FileFormat::raw) => Box::new(RawFlamegraph(flamegraph::Flamegraph::new( + config.show_line_numbers, + ))), + None => return Err(format_err!("A file format is required to record samples")), }; let filename = match config.filename.clone() { @@ -135,18 +139,18 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> Some(FileFormat::flamegraph) => "svg", Some(FileFormat::speedscope) => "json", Some(FileFormat::raw) => "txt", - None => return Err(format_err!("A file format is required to record samples")) + None => return Err(format_err!("A file format is required to record samples")), }; let local_time = Local::now().to_rfc3339_opts(SecondsFormat::Secs, true); let name = match config.python_program.as_ref() { Some(prog) => prog[0].to_string(), - None => match config.pid.as_ref() { + None => match config.pid.as_ref() { Some(pid) => pid.to_string(), - None => String::from("unknown") - } + None => String::from("unknown"), + }, }; format!("{}-{}.{}", name, local_time, ext) - } + } }; let sampler = sampler::Sampler::new(pid, config)?; @@ -162,11 +166,17 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> let max_intervals = match &config.duration { RecordDuration::Unlimited => { - println!("{}Sampling process {} times a second. Press Control-C to exit.", lede, config.sampling_rate); + println!( + "{}Sampling process {} times a second. Press Control-C to exit.", + lede, config.sampling_rate + ); None - }, + } RecordDuration::Seconds(sec) => { - println!("{}Sampling process {} times a second for {} seconds. Press Control-C to exit.", lede, config.sampling_rate, sec); + println!( + "{}Sampling process {} times a second for {} seconds. Press Control-C to exit.", + lede, config.sampling_rate, sec + ); Some(sec * config.sampling_rate) } }; @@ -176,12 +186,15 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> (true, _) => ProgressBar::hidden(), (false, RecordDuration::Seconds(samples)) => ProgressBar::new(*samples), (false, RecordDuration::Unlimited) => { + #[allow(clippy::let_and_return)] let progress = ProgressBar::new_spinner(); // The spinner on windows doesn't look great: was replaced by a [?] character at least on // my system. Replace unicode spinners with just how many seconds have elapsed #[cfg(windows)] - progress.set_style(indicatif::ProgressStyle::default_spinner().template("[{elapsed}] {msg}")); + progress.set_style( + indicatif::ProgressStyle::default_spinner().template("[{elapsed}] {msg}"), + ); progress } }; @@ -243,12 +256,17 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> if config.include_thread_ids { let threadid = trace.format_threadid(); - trace.frames.push(Frame{name: format!("thread ({})", threadid), + trace.frames.push(Frame { + name: format!("thread ({})", threadid), filename: String::from(""), - module: None, short_filename: None, line: 0, locals: None}); + module: None, + short_filename: None, + line: 0, + locals: None, + }); } - if let Some(process_info) = trace.process_info.as_ref().map(|x| x) { + if let Some(process_info) = trace.process_info.as_ref() { trace.frames.push(process_info.to_frame()); let mut parent = process_info.parent.as_ref(); while parent.is_some() { @@ -260,7 +278,7 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> } samples += 1; - output.increment(&trace)?; + output.increment(trace)?; } if let Some(sampling_errors) = sample.sampling_errors { @@ -287,25 +305,34 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> } { - let mut out_file = std::fs::File::create(&filename)?; - output.write(&mut out_file)?; + let mut out_file = std::fs::File::create(&filename)?; + output.write(&mut out_file)?; } match config.format.as_ref().unwrap() { FileFormat::flamegraph => { - println!("{}Wrote flamegraph data to '{}'. Samples: {} Errors: {}", lede, filename, samples, errors); + println!( + "{}Wrote flamegraph data to '{}'. Samples: {} Errors: {}", + lede, filename, samples, errors + ); // open generated flame graph in the browser on OSX (theory being that on linux // you might be SSH'ed into a server somewhere and this isn't desired, but on // that is pretty unlikely for osx) (note to self: xdg-open will open on linux) #[cfg(target_os = "macos")] std::process::Command::new("open").arg(&filename).spawn()?; - }, - FileFormat::speedscope => { - println!("{}Wrote speedscope file to '{}'. Samples: {} Errors: {}", lede, filename, samples, errors); + } + FileFormat::speedscope => { + println!( + "{}Wrote speedscope file to '{}'. Samples: {} Errors: {}", + lede, filename, samples, errors + ); println!("{}Visit https://www.speedscope.app/ to view", lede); - }, + } FileFormat::raw => { - println!("{}Wrote raw flamegraph data to '{}'. Samples: {} Errors: {}", lede, filename, samples, errors); + println!( + "{}Wrote raw flamegraph data to '{}'. Samples: {} Errors: {}", + lede, filename, samples, errors + ); println!("{}You can use the flamegraph.pl script from https://github.com/brendangregg/flamegraph to generate a SVG", lede); } }; @@ -315,12 +342,12 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> fn run_spy_command(pid: remoteprocess::Pid, config: &config::Config) -> Result<(), Error> { match config.command.as_ref() { - "dump" => { + "dump" => { dump::print_traces(pid, config, None)?; - }, + } "record" => { record_samples(pid, config)?; - }, + } "top" => { sample_console(pid, config)?; } @@ -335,7 +362,7 @@ fn run_spy_command(pid: remoteprocess::Pid, config: &config::Config) -> Result<( fn pyspy_main() -> Result<(), Error> { let config = config::Config::from_commandline(); - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { if unsafe { libc::geteuid() } != 0 { eprintln!("This program requires root on OSX."); @@ -344,7 +371,7 @@ fn pyspy_main() -> Result<(), Error> { } } - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] { if let Some(ref core_filename) = config.core_filename { let core = coredump::PythonCoreDump::new(std::path::Path::new(&core_filename))?; @@ -355,9 +382,7 @@ fn pyspy_main() -> Result<(), Error> { if let Some(pid) = config.pid { run_spy_command(pid, &config)?; - } - - else if let Some(ref subprocess) = config.python_program { + } else if let Some(ref subprocess) = config.python_program { // Dump out stdout/stderr from the process to a temp file, so we can view it later if needed let mut process_output = tempfile::NamedTempFile::new()?; @@ -368,7 +393,10 @@ fn pyspy_main() -> Result<(), Error> { if unsafe { libc::geteuid() } == 0 { if let Ok(sudo_uid) = std::env::var("SUDO_UID") { use std::os::unix::process::CommandExt; - info!("Dropping root and running python command as {}", std::env::var("SUDO_USER")?); + info!( + "Dropping root and running python command as {}", + std::env::var("SUDO_USER")? + ); command.uid(sudo_uid.parse::()?); } } @@ -377,15 +405,17 @@ fn pyspy_main() -> Result<(), Error> { let mut command = command.args(&subprocess[1..]); if config.capture_output { - command = command.stdin(std::process::Stdio::null()) + command = command + .stdin(std::process::Stdio::null()) .stdout(process_output.reopen()?) .stderr(process_output.reopen()?) } - let mut command = command.spawn() + let mut command = command + .spawn() .map_err(|e| format_err!("Failed to create process '{}': {}", subprocess[0], e))?; - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // sleep just in case: https://jvns.ca/blog/2018/01/28/mac-freeze/ std::thread::sleep(Duration::from_millis(50)); @@ -394,10 +424,10 @@ fn pyspy_main() -> Result<(), Error> { // check exit code of subprocess std::thread::sleep(Duration::from_millis(1)); - let success = match command.try_wait()? { + let success = match command.try_wait()? { Some(exit) => exit.success(), // if process hasn't finished, assume success - None => true + None => true, }; // if we failed for any reason, dump out stderr from child process here @@ -421,34 +451,37 @@ fn pyspy_main() -> Result<(), Error> { } fn main() { - env_logger::builder().format_timestamp_nanos().try_init().unwrap(); + env_logger::builder() + .format_timestamp_nanos() + .try_init() + .unwrap(); if let Err(err) = pyspy_main() { #[cfg(unix)] { - if permission_denied(&err) { - // Got a permission denied error, if we're not running as root - ask to use sudo - if unsafe { libc::geteuid() } != 0 { - eprintln!("Permission Denied: Try running again with elevated permissions by going 'sudo env \"PATH=$PATH\" !!'"); - std::process::exit(1); - } + if permission_denied(&err) { + // Got a permission denied error, if we're not running as root - ask to use sudo + if unsafe { libc::geteuid() } != 0 { + eprintln!("Permission Denied: Try running again with elevated permissions by going 'sudo env \"PATH=$PATH\" !!'"); + std::process::exit(1); + } - // We got a permission denied error running as root, check to see if we're running - // as docker, and if so ask the user to check the SYS_PTRACE capability is added - // Otherwise, fall through to the generic error handling - #[cfg(target_os="linux")] - if let Ok(cgroups) = std::fs::read_to_string("/proc/self/cgroup") { - if cgroups.contains("/docker/") { - eprintln!("Permission Denied"); - eprintln!("\nIt looks like you are running in a docker container. Please make sure \ + // We got a permission denied error running as root, check to see if we're running + // as docker, and if so ask the user to check the SYS_PTRACE capability is added + // Otherwise, fall through to the generic error handling + #[cfg(target_os = "linux")] + if let Ok(cgroups) = std::fs::read_to_string("/proc/self/cgroup") { + if cgroups.contains("/docker/") { + eprintln!("Permission Denied"); + eprintln!("\nIt looks like you are running in a docker container. Please make sure \ you started your container with the SYS_PTRACE capability. See \ https://github.com/benfred/py-spy#how-do-i-run-py-spy-in-docker for \ more details"); - std::process::exit(1); + std::process::exit(1); + } } } } - } eprintln!("Error: {}", err); for (i, suberror) in err.chain().enumerate() { diff --git a/src/native_stack_trace.rs b/src/native_stack_trace.rs index d4dd1981..858969b5 100644 --- a/src/native_stack_trace.rs +++ b/src/native_stack_trace.rs @@ -1,14 +1,14 @@ -use std::collections::HashSet; use anyhow::Error; +use std::collections::HashSet; -use cpp_demangle::{DemangleOptions, BorrowedSymbol}; -use remoteprocess::{self, Pid}; +use cpp_demangle::{BorrowedSymbol, DemangleOptions}; use lazy_static::lazy_static; use lru::LruCache; +use remoteprocess::{self, Pid}; use crate::binary_parser::BinaryInfo; use crate::cython; -use crate::stack_trace::{Frame}; +use crate::stack_trace::Frame; use crate::utils::resolve_filename; pub struct NativeStack { @@ -25,22 +25,34 @@ pub struct NativeStack { } impl NativeStack { - pub fn new(pid: Pid, python: Option, libpython: Option) -> Result { + pub fn new( + pid: Pid, + python: Option, + libpython: Option, + ) -> Result { let cython_maps = cython::SourceMaps::new(); let process = remoteprocess::Process::new(pid)?; let unwinder = process.unwinder()?; let symbolicator = process.symbolicator()?; - return Ok(NativeStack{cython_maps, unwinder, symbolicator, should_reload: false, - python, - libpython, - process, - symbol_cache: LruCache::new(65536) - }); + Ok(NativeStack { + cython_maps, + unwinder, + symbolicator, + should_reload: false, + python, + libpython, + process, + symbol_cache: LruCache::new(65536), + }) } - pub fn merge_native_thread(&mut self, frames: &Vec, thread: &remoteprocess::Thread) -> Result, Error> { + pub fn merge_native_thread( + &mut self, + frames: &Vec, + thread: &remoteprocess::Thread, + ) -> Result, Error> { if self.should_reload { self.symbolicator.reload()?; self.should_reload = false; @@ -50,28 +62,32 @@ impl NativeStack { let native_stack = self.get_thread(thread)?; // TODO: merging the two stack together could happen outside of thread lock - return self.merge_native_stack(frames, native_stack); + self.merge_native_stack(frames, native_stack) } - pub fn merge_native_stack(&mut self, frames: &Vec, native_stack: Vec) -> Result, Error> { + pub fn merge_native_stack( + &mut self, + frames: &Vec, + native_stack: Vec, + ) -> Result, Error> { let mut python_frame_index = 0; let mut merged = Vec::new(); // merge the native_stack and python stack together for addr in native_stack { // check in the symbol cache if we have looked up this symbol yet - let cached_symbol = self.symbol_cache.get(&addr).map(|f| f.clone()); + let cached_symbol = self.symbol_cache.get(&addr).cloned(); // merges a remoteprocess::StackFrame into the current merged vec - let is_python_addr = self.python.as_ref().map_or(false, |m| m.contains(addr)) || - self.libpython.as_ref().map_or(false, |m| m.contains(addr)); + let is_python_addr = self.python.as_ref().map_or(false, |m| m.contains(addr)) + || self.libpython.as_ref().map_or(false, |m| m.contains(addr)); let merge_frame = &mut |frame: &remoteprocess::StackFrame| { match self.get_merge_strategy(is_python_addr, frame) { - MergeType::Ignore => {}, + MergeType::Ignore => {} MergeType::MergeNativeFrame => { if let Some(python_frame) = self.translate_native_frame(frame) { merged.push(python_frame); } - }, + } MergeType::MergePythonFrame => { // if we have a corresponding python frame for the evalframe // merge it into the stack. (if we're out of bounds a later @@ -96,22 +112,36 @@ impl NativeStack { let mut symbolicated_count = 0; let mut first_frame = None; - self.symbolicator.symbolicate(addr, !is_python_addr, &mut |frame: &remoteprocess::StackFrame| { - symbolicated_count += 1; - if symbolicated_count == 1 { - first_frame = Some(frame.clone()); - } - merge_frame(frame); - }).unwrap_or_else(|e| { - if let remoteprocess::Error::NoBinaryForAddress(_) = e { - debug!("don't have a binary for symbols at 0x{:x} - reloading", addr); - self.should_reload = true; - } - // if we can't symbolicate, just insert a stub here. - merged.push(Frame{filename: "?".to_owned(), - name: format!("0x{:x}", addr), - line: 0, short_filename: None, module: None, locals: None}); - }); + self.symbolicator + .symbolicate( + addr, + !is_python_addr, + &mut |frame: &remoteprocess::StackFrame| { + symbolicated_count += 1; + if symbolicated_count == 1 { + first_frame = Some(frame.clone()); + } + merge_frame(frame); + }, + ) + .unwrap_or_else(|e| { + if let remoteprocess::Error::NoBinaryForAddress(_) = e { + debug!( + "don't have a binary for symbols at 0x{:x} - reloading", + addr + ); + self.should_reload = true; + } + // if we can't symbolicate, just insert a stub here. + merged.push(Frame { + filename: "?".to_owned(), + name: format!("0x{:x}", addr), + line: 0, + short_filename: None, + module: None, + locals: None, + }); + }); if symbolicated_count == 1 { self.symbol_cache.put(addr, first_frame.unwrap()); @@ -134,11 +164,17 @@ impl NativeStack { // if we have seen exactly one more python frame in the native stack than the python stack - let it go. // (can happen when the python stack has been unwound, but haven't exited the PyEvalFrame function // yet) - info!("Have {} native and {} python threads in stack - allowing for now", - python_frame_index, frames.len()); + info!( + "Have {} native and {} python threads in stack - allowing for now", + python_frame_index, + frames.len() + ); } else { - return Err(format_err!("Failed to merge native and python frames (Have {} native and {} python)", - python_frame_index, frames.len())); + return Err(format_err!( + "Failed to merge native and python frames (Have {} native and {} python)", + python_frame_index, + frames.len() + )); } } @@ -150,7 +186,11 @@ impl NativeStack { Ok(merged) } - fn get_merge_strategy(&self, check_python: bool, frame: &remoteprocess::StackFrame) -> MergeType { + fn get_merge_strategy( + &self, + check_python: bool, + frame: &remoteprocess::StackFrame, + ) -> MergeType { if check_python { if let Some(ref function) = frame.function { // We want to include some internal python functions. For example, calls like time.sleep @@ -180,17 +220,17 @@ impl NativeStack { // which is replaced by the function from the python stack // note: we're splitting on both _ and . to handle symbols like // _PyEval_EvalFrameDefault.cold.2962 - let mut tokens = function.split(&['_', '.'][..]).filter(|&x| x.len() > 0); + let mut tokens = function.split(&['_', '.'][..]).filter(|&x| !x.is_empty()); match tokens.next() { - Some("PyEval") => { - match tokens.next() { - Some("EvalFrameDefault") => MergeType::MergePythonFrame, - Some("EvalFrameEx") => MergeType::MergePythonFrame, - _ => MergeType::Ignore - } + Some("PyEval") => match tokens.next() { + Some("EvalFrameDefault") => MergeType::MergePythonFrame, + Some("EvalFrameEx") => MergeType::MergePythonFrame, + _ => MergeType::Ignore, }, - Some(prefix) if WHITELISTED_PREFIXES.contains(prefix) => MergeType::MergeNativeFrame, - _ => MergeType::Ignore + Some(prefix) if WHITELISTED_PREFIXES.contains(prefix) => { + MergeType::MergeNativeFrame + } + _ => MergeType::Ignore, } } else { // is this correct? if we don't have a function name and in python binary should ignore? @@ -204,7 +244,7 @@ impl NativeStack { /// translates a native frame into a optional frame. none indicates we should ignore this frame fn translate_native_frame(&self, frame: &remoteprocess::StackFrame) -> Option { match &frame.function { - Some(func) => { + Some(func) => { if ignore_frame(func, &frame.module) { return None; } @@ -214,11 +254,9 @@ impl NativeStack { // try to resolve the filename relative to the module if given let filename = match frame.filename.as_ref() { - Some(filename) => { - resolve_filename(filename, &frame.module) - .unwrap_or_else(|| filename.clone()) - }, - None => frame.module.clone() + Some(filename) => resolve_filename(filename, &frame.module) + .unwrap_or_else(|| filename.clone()), + None => frame.module.clone(), }; let mut demangled = None; @@ -230,18 +268,28 @@ impl NativeStack { } } } - let name = demangled.as_ref().unwrap_or_else(|| &func); + let name = demangled.as_ref().unwrap_or(func); if cython::ignore_frame(name) { return None; } - let name = cython::demangle(&name).to_owned(); - Some(Frame{filename, line, name, short_filename: None, module: Some(frame.module.clone()), locals: None}) - }, - None => { - Some(Frame{filename: frame.module.clone(), - name: format!("0x{:x}", frame.addr), locals: None, - line: 0, short_filename: None, module: Some(frame.module.clone())}) + let name = cython::demangle(name).to_owned(); + Some(Frame { + filename, + line, + name, + short_filename: None, + module: Some(frame.module.clone()), + locals: None, + }) } + None => Some(Frame { + filename: frame.module.clone(), + name: format!("0x{:x}", frame.addr), + locals: None, + line: 0, + short_filename: None, + module: Some(frame.module.clone()), + }), } } @@ -258,12 +306,12 @@ impl NativeStack { enum MergeType { Ignore, MergePythonFrame, - MergeNativeFrame + MergeNativeFrame, } // the intent here is to remove top-level libc or pthreads calls // from the stack traces. This almost certainly can be done better -#[cfg(target_os="linux")] +#[cfg(target_os = "linux")] fn ignore_frame(function: &str, module: &str) -> bool { if function == "__libc_start_main" && module.contains("/libc") { return true; @@ -280,7 +328,7 @@ fn ignore_frame(function: &str, module: &str) -> bool { false } -#[cfg(target_os="macos")] +#[cfg(target_os = "macos")] fn ignore_frame(function: &str, module: &str) -> bool { if function == "_start" && module.contains("/libdyld.dylib") { return true; diff --git a/src/python_bindings/mod.rs b/src/python_bindings/mod.rs index e69156a0..74a6ce89 100644 --- a/src/python_bindings/mod.rs +++ b/src/python_bindings/mod.rs @@ -1,12 +1,12 @@ pub mod v2_7_15; +pub mod v3_10_0; +pub mod v3_11_0; pub mod v3_3_7; pub mod v3_5_5; pub mod v3_6_6; pub mod v3_7_0; pub mod v3_8_0; pub mod v3_9_5; -pub mod v3_10_0; -pub mod v3_11_0; // currently the PyRuntime struct used from Python 3.7 on really can't be // exposed in a cross platform way using bindgen. PyRuntime has several mutex's @@ -23,125 +23,228 @@ pub mod pyruntime { #[cfg(target_arch = "x86")] pub fn get_interp_head_offset(version: &Version) -> usize { match version { - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" | "a2" => 16, - "a3" | "a4" => 20, - _ => 24 - } + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" | "a2" => 16, + "a3" | "a4" => 20, + _ => 24, }, - Version{major: 3, minor: 8..=10, ..} => 24, - _ => 16 + Version { + major: 3, + minor: 8..=10, + .. + } => 24, + _ => 16, } } #[cfg(target_arch = "arm")] pub fn get_interp_head_offset(version: &Version) -> usize { match version { - Version{major: 3, minor: 7, ..} => 20, - _ => 28 + Version { + major: 3, minor: 7, .. + } => 20, + _ => 28, } } #[cfg(target_pointer_width = "64")] pub fn get_interp_head_offset(version: &Version) -> usize { match version { - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" | "a2" => 24, - _ => 32 - } + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" | "a2" => 24, + _ => 32, }, - Version{major: 3, minor: 8..=10, ..} => 32, - Version{major: 3, minor: 11, ..} => 40, - _ => 24 + Version { + major: 3, + minor: 8..=10, + .. + } => 32, + Version { + major: 3, + minor: 11, + .. + } => 40, + _ => 24, } } // getting gilstate.tstate_current is different for all OS // and is also different for each python version, and even // between v3.8.0a1 and v3.8.0a2 =( - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, patch: 0..=3, ..} => Some(1440), - Version{major: 3, minor: 7, ..} => Some(1528), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" => Some(1432), - "a2" => Some(888), - "a3" | "a4" => Some(1448), - _ => Some(1416), - } + Version { + major: 3, + minor: 7, + patch: 0..=3, + .. + } => Some(1440), + Version { + major: 3, minor: 7, .. + } => Some(1528), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" => Some(1432), + "a2" => Some(888), + "a3" | "a4" => Some(1448), + _ => Some(1416), }, - Version{major: 3, minor: 8, ..} => { Some(1416) }, - Version{major: 3, minor: 9..=10, ..} => { Some(616) }, - Version{major: 3, minor: 11, ..} => Some(624), - _ => None + Version { + major: 3, minor: 8, .. + } => Some(1416), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(616), + Version { + major: 3, + minor: 11, + .. + } => Some(624), + _ => None, } } - #[cfg(all(target_os="linux", target_arch="x86"))] + #[cfg(all(target_os = "linux", target_arch = "x86"))] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, ..} => Some(796), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" => Some(792), - "a2" => Some(512), - "a3" | "a4" => Some(800), - _ => Some(788) - } + Version { + major: 3, minor: 7, .. + } => Some(796), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" => Some(792), + "a2" => Some(512), + "a3" | "a4" => Some(800), + _ => Some(788), }, - Version{major: 3, minor: 8, ..} => Some(788), - Version{major: 3, minor: 9..=10, ..} => Some(352), - _ => None + Version { + major: 3, minor: 8, .. + } => Some(788), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(352), + _ => None, } } - #[cfg(all(target_os="linux", target_arch="arm"))] + #[cfg(all(target_os = "linux", target_arch = "arm"))] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, ..} => Some(828), - Version{major: 3, minor: 8, ..} => Some(804), - Version{major: 3, minor: 9..=11, ..} => Some(364), - _ => None + Version { + major: 3, minor: 7, .. + } => Some(828), + Version { + major: 3, minor: 8, .. + } => Some(804), + Version { + major: 3, + minor: 9..=11, + .. + } => Some(364), + _ => None, } } - #[cfg(all(target_os="linux", target_arch="aarch64"))] + #[cfg(all(target_os = "linux", target_arch = "aarch64"))] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, patch: 0..=3, ..} => Some(1408), - Version{major: 3, minor: 7, ..} => Some(1496), - Version{major: 3, minor: 8, ..} => Some(1384), - Version{major: 3, minor: 9..=10, ..} => Some(584), - Version{major: 3, minor: 11, ..} => Some(592), - _ => None + Version { + major: 3, + minor: 7, + patch: 0..=3, + .. + } => Some(1408), + Version { + major: 3, minor: 7, .. + } => Some(1496), + Version { + major: 3, minor: 8, .. + } => Some(1384), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(584), + Version { + major: 3, + minor: 11, + .. + } => Some(592), + _ => None, } } - #[cfg(all(target_os="linux", target_arch="x86_64"))] + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, patch: 0..=3, ..} => Some(1392), - Version{major: 3, minor: 7, ..} => Some(1480), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" => Some(1384), - "a2" => Some(840), - "a3" | "a4" => Some(1400), - _ => Some(1368) - } - }, - Version{major: 3, minor: 8, ..} => Some(1368), - Version{major: 3, minor: 9..=10, ..} => Some(568), - Version{major: 3, minor: 11, ..} => Some(576), - _ => None + Version { + major: 3, + minor: 7, + patch: 0..=3, + .. + } => Some(1392), + Version { + major: 3, minor: 7, .. + } => Some(1480), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" => Some(1384), + "a2" => Some(840), + "a3" | "a4" => Some(1400), + _ => Some(1368), + }, + Version { + major: 3, minor: 8, .. + } => Some(1368), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(568), + Version { + major: 3, + minor: 11, + .. + } => Some(576), + _ => None, } } - #[cfg(all(target_os="linux", any(target_arch="powerpc64", target_arch="powerpc", target_arch="mips")))] + #[cfg(all( + target_os = "linux", + any( + target_arch = "powerpc64", + target_arch = "powerpc", + target_arch = "mips" + ) + ))] pub fn get_tstate_current_offset(version: &Version) -> Option { None } @@ -149,39 +252,80 @@ pub mod pyruntime { #[cfg(windows)] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, patch: 0..=3, ..} => Some(1320), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" => Some(1312), - "a2" => Some(768), - "a3" | "a4" => Some(1328), - _ => Some(1296) - } + Version { + major: 3, + minor: 7, + patch: 0..=3, + .. + } => Some(1320), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" => Some(1312), + "a2" => Some(768), + "a3" | "a4" => Some(1328), + _ => Some(1296), }, - Version{major: 3, minor: 8, ..} => Some(1296), - Version{major: 3, minor: 9..=10, ..} => Some(496), - Version{major: 3, minor: 11, ..} => Some(504), - _ => None + Version { + major: 3, minor: 8, .. + } => Some(1296), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(496), + Version { + major: 3, + minor: 11, + .. + } => Some(504), + _ => None, } } - #[cfg(target_os="freebsd")] + #[cfg(target_os = "freebsd")] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, patch: 0..=3, ..} => Some(1248), - Version{major: 3, minor: 7, patch: 4..=7, ..} => Some(1336), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" => Some(1240), - "a2" => Some(696), - "a3" | "a4" => Some(1256), - _ => Some(1224) - } + Version { + major: 3, + minor: 7, + patch: 0..=3, + .. + } => Some(1248), + Version { + major: 3, + minor: 7, + patch: 4..=7, + .. + } => Some(1336), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" => Some(1240), + "a2" => Some(696), + "a3" | "a4" => Some(1256), + _ => Some(1224), }, - Version{major: 3, minor: 8, ..} => Some(1224), - Version{major: 3, minor: 9..=10, ..} => Some(424), - Version{major: 3, minor: 11, ..} => Some(432), - _ => None + Version { + major: 3, minor: 8, .. + } => Some(1224), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(424), + Version { + major: 3, + minor: 11, + .. + } => Some(432), + _ => None, } } } diff --git a/src/python_bindings/v2_7_15.rs b/src/python_bindings/v2_7_15.rs index 839e7ee9..0b5e7afd 100644 --- a/src/python_bindings/v2_7_15.rs +++ b/src/python_bindings/v2_7_15.rs @@ -7,6 +7,7 @@ #![allow(clippy::default_trait_access)] #![allow(clippy::cast_lossless)] #![allow(clippy::trivially_copy_pass_by_ref)] +#![allow(clippy::upper_case_acronyms)] /* automatically generated by rust-bindgen */ diff --git a/src/python_bindings/v3_10_0.rs b/src/python_bindings/v3_10_0.rs index 74da9649..37611203 100644 --- a/src/python_bindings/v3_10_0.rs +++ b/src/python_bindings/v3_10_0.rs @@ -7,6 +7,7 @@ #![allow(clippy::default_trait_access)] #![allow(clippy::cast_lossless)] #![allow(clippy::trivially_copy_pass_by_ref)] +#![allow(clippy::upper_case_acronyms)] /* automatically generated by rust-bindgen */ diff --git a/src/python_bindings/v3_11_0.rs b/src/python_bindings/v3_11_0.rs index 3bbd3570..ad57a72e 100644 --- a/src/python_bindings/v3_11_0.rs +++ b/src/python_bindings/v3_11_0.rs @@ -7,6 +7,7 @@ #![allow(clippy::default_trait_access)] #![allow(clippy::cast_lossless)] #![allow(clippy::trivially_copy_pass_by_ref)] +#![allow(clippy::upper_case_acronyms)] /* automatically generated by rust-bindgen */ diff --git a/src/python_bindings/v3_3_7.rs b/src/python_bindings/v3_3_7.rs index 99f5ddd2..542d6e2c 100644 --- a/src/python_bindings/v3_3_7.rs +++ b/src/python_bindings/v3_3_7.rs @@ -7,6 +7,7 @@ #![allow(clippy::default_trait_access)] #![allow(clippy::cast_lossless)] #![allow(clippy::trivially_copy_pass_by_ref)] +#![allow(clippy::upper_case_acronyms)] /* automatically generated by rust-bindgen */ diff --git a/src/python_bindings/v3_4_8.rs b/src/python_bindings/v3_4_8.rs index 8e1e6a0b..4f39c70d 100644 --- a/src/python_bindings/v3_4_8.rs +++ b/src/python_bindings/v3_4_8.rs @@ -7,6 +7,7 @@ #![allow(clippy::default_trait_access)] #![allow(clippy::cast_lossless)] #![allow(clippy::trivially_copy_pass_by_ref)] +#![allow(clippy::upper_case_acronyms)] /* automatically generated by rust-bindgen */ diff --git a/src/python_bindings/v3_5_5.rs b/src/python_bindings/v3_5_5.rs index 8ee1a774..02e47980 100644 --- a/src/python_bindings/v3_5_5.rs +++ b/src/python_bindings/v3_5_5.rs @@ -7,6 +7,7 @@ #![allow(clippy::default_trait_access)] #![allow(clippy::cast_lossless)] #![allow(clippy::trivially_copy_pass_by_ref)] +#![allow(clippy::upper_case_acronyms)] /* automatically generated by rust-bindgen */ diff --git a/src/python_bindings/v3_6_6.rs b/src/python_bindings/v3_6_6.rs index 22c28a3f..2fa2721b 100644 --- a/src/python_bindings/v3_6_6.rs +++ b/src/python_bindings/v3_6_6.rs @@ -7,6 +7,7 @@ #![allow(clippy::default_trait_access)] #![allow(clippy::cast_lossless)] #![allow(clippy::trivially_copy_pass_by_ref)] +#![allow(clippy::upper_case_acronyms)] /* automatically generated by rust-bindgen */ diff --git a/src/python_bindings/v3_7_0.rs b/src/python_bindings/v3_7_0.rs index 6c9a24f8..e191cddf 100644 --- a/src/python_bindings/v3_7_0.rs +++ b/src/python_bindings/v3_7_0.rs @@ -7,6 +7,7 @@ #![allow(clippy::default_trait_access)] #![allow(clippy::cast_lossless)] #![allow(clippy::trivially_copy_pass_by_ref)] +#![allow(clippy::upper_case_acronyms)] /* automatically generated by rust-bindgen */ diff --git a/src/python_bindings/v3_8_0.rs b/src/python_bindings/v3_8_0.rs index 433ed6c6..6377e86d 100644 --- a/src/python_bindings/v3_8_0.rs +++ b/src/python_bindings/v3_8_0.rs @@ -7,6 +7,7 @@ #![allow(clippy::default_trait_access)] #![allow(clippy::cast_lossless)] #![allow(clippy::trivially_copy_pass_by_ref)] +#![allow(clippy::upper_case_acronyms)] /* automatically generated by rust-bindgen */ diff --git a/src/python_bindings/v3_9_5.rs b/src/python_bindings/v3_9_5.rs index 1930e146..1f80b2d7 100644 --- a/src/python_bindings/v3_9_5.rs +++ b/src/python_bindings/v3_9_5.rs @@ -7,6 +7,7 @@ #![allow(clippy::default_trait_access)] #![allow(clippy::cast_lossless)] #![allow(clippy::trivially_copy_pass_by_ref)] +#![allow(clippy::upper_case_acronyms)] /* automatically generated by rust-bindgen */ diff --git a/src/python_data_access.rs b/src/python_data_access.rs index 2e2cf819..4249e40a 100644 --- a/src/python_data_access.rs +++ b/src/python_data_access.rs @@ -1,16 +1,23 @@ -use std; - +#![allow(clippy::unnecessary_cast)] use anyhow::Error; -use remoteprocess::ProcessMemory; -use crate::python_interpreters::{StringObject, BytesObject, InterpreterState, Object, TypeObject, TupleObject, ListObject}; +use crate::python_interpreters::{ + BytesObject, InterpreterState, ListObject, Object, StringObject, TupleObject, TypeObject, +}; use crate::version::Version; +use remoteprocess::ProcessMemory; /// Copies a string from a target process. Attempts to handle unicode differences, which mostly seems to be working -pub fn copy_string(ptr: * const T, process: &P) -> Result { +pub fn copy_string( + ptr: *const T, + process: &P, +) -> Result { let obj = process.copy_pointer(ptr)?; if obj.size() >= 4096 { - return Err(format_err!("Refusing to copy {} chars of a string", obj.size())); + return Err(format_err!( + "Refusing to copy {} chars of a string", + obj.size() + )); } let kind = obj.kind(); @@ -20,23 +27,28 @@ pub fn copy_string(ptr: * const T, process: & match (kind, obj.ascii()) { (4, _) => { #[allow(clippy::cast_ptr_alignment)] - let chars = unsafe { std::slice::from_raw_parts(bytes.as_ptr() as * const char, bytes.len() / 4) }; + let chars = unsafe { + std::slice::from_raw_parts(bytes.as_ptr() as *const char, bytes.len() / 4) + }; Ok(chars.iter().collect()) - }, + } (2, _) => { // UCS2 strings aren't used internally after v3.3: https://www.python.org/dev/peps/pep-0393/ // TODO: however with python 2.7 they could be added with --enable-unicode=ucs2 configure flag. // or with python 3.2 --with-wide-unicode=ucs2 Err(format_err!("ucs2 strings aren't supported yet!")) - }, + } (1, true) => Ok(String::from_utf8(bytes)?), - (1, false) => Ok(bytes.iter().map(|&b| { b as char }).collect()), - _ => Err(format_err!("Unknown string kind {}", kind)) + (1, false) => Ok(bytes.iter().map(|&b| b as char).collect()), + _ => Err(format_err!("Unknown string kind {}", kind)), } } /// Copies data from a PyBytesObject (currently only lnotab object) -pub fn copy_bytes(ptr: * const T, process: &P) -> Result, Error> { +pub fn copy_bytes( + ptr: *const T, + process: &P, +) -> Result, Error> { let obj = process.copy_pointer(ptr)?; let size = obj.size(); if size >= 65536 { @@ -49,7 +61,8 @@ pub fn copy_bytes(ptr: * const T, process: &P) pub fn copy_long(process: &P, addr: usize) -> Result<(i64, bool), Error> { // this is PyLongObject for a specific version of python, but this works since it's binary compatible // layout across versions we're targeting - let value = process.copy_pointer(addr as *const crate::python_bindings::v3_7_0::PyLongObject)?; + let value = + process.copy_pointer(addr as *const crate::python_bindings::v3_7_0::PyLongObject)?; let negative: i64 = if value.ob_base.ob_size < 0 { -1 } else { 1 }; let size = value.ob_base.ob_size * (negative as isize); match size { @@ -75,13 +88,14 @@ pub fn copy_long(process: &P, addr: usize) -> Result<(i64, boo Ok((negative * ret, false)) } // we don't support arbitrary sized integers yet, signal this by returning that we've overflowed - _ => Ok((value.ob_base.ob_size as i64, true)) + _ => Ok((value.ob_base.ob_size as i64, true)), } } /// Copies a i64 from a python 2.7 PyIntObject pub fn copy_int(process: &P, addr: usize) -> Result { - let value = process.copy_pointer(addr as *const crate::python_bindings::v2_7_15::PyIntObject)?; + let value = + process.copy_pointer(addr as *const crate::python_bindings::v2_7_15::PyIntObject)?; Ok(value.ob_ival as i64) } @@ -93,20 +107,36 @@ pub struct DictIterator<'a, P: 'a> { kind: u8, index: usize, entries: usize, - values: usize + values: usize, } impl<'a, P: ProcessMemory> DictIterator<'a, P> { - pub fn from_managed_dict(process: &'a P, version: &'a Version, addr: usize, tp_addr: usize) -> Result, Error> { + pub fn from_managed_dict( + process: &'a P, + version: &'a Version, + addr: usize, + tp_addr: usize, + ) -> Result, Error> { // Handles logic of _PyObject_ManagedDictPointer in python 3.11 let values_addr: usize = process.copy_struct(addr - 4 * std::mem::size_of::())?; let dict_addr: usize = process.copy_struct(addr - 3 * std::mem::size_of::())?; if values_addr != 0 { - let ht: crate::python_bindings::v3_11_0::PyHeapTypeObject = process.copy_struct(tp_addr)?; - let keys: crate::python_bindings::v3_11_0::PyDictKeysObject = process.copy_struct(ht.ht_cached_keys as usize)?; - let entries_addr = ht.ht_cached_keys as usize + (1 << keys.dk_log2_index_bytes) + std::mem::size_of_val(&keys); - Ok(DictIterator{process, entries_addr, index: 0, kind: keys.dk_kind, entries: keys.dk_nentries as usize, values: values_addr}) + let ht: crate::python_bindings::v3_11_0::PyHeapTypeObject = + process.copy_struct(tp_addr)?; + let keys: crate::python_bindings::v3_11_0::PyDictKeysObject = + process.copy_struct(ht.ht_cached_keys as usize)?; + let entries_addr = ht.ht_cached_keys as usize + + (1 << keys.dk_log2_index_bytes) + + std::mem::size_of_val(&keys); + Ok(DictIterator { + process, + entries_addr, + index: 0, + kind: keys.dk_kind, + entries: keys.dk_nentries as usize, + values: values_addr, + }) } else if dict_addr != 0 { DictIterator::from(process, version, dict_addr) } else { @@ -114,16 +144,35 @@ impl<'a, P: ProcessMemory> DictIterator<'a, P> { } } - pub fn from(process: &'a P, version: &'a Version, addr: usize) -> Result, Error> { - match version { - Version{major: 3, minor: 11, ..} => { - let dict: crate::python_bindings::v3_11_0::PyDictObject = process.copy_struct(addr)?; + pub fn from( + process: &'a P, + version: &'a Version, + addr: usize, + ) -> Result, Error> { + match version { + Version { + major: 3, + minor: 11, + .. + } => { + let dict: crate::python_bindings::v3_11_0::PyDictObject = + process.copy_struct(addr)?; let keys = process.copy_pointer(dict.ma_keys)?; - let entries_addr = dict.ma_keys as usize + (1 << keys.dk_log2_index_bytes) + std::mem::size_of_val(&keys); - Ok(DictIterator{process, entries_addr, index: 0, kind: keys.dk_kind, entries: keys.dk_nentries as usize, values: dict.ma_values as usize}) - }, - _ => { - let dict: crate::python_bindings::v3_7_0::PyDictObject = process.copy_struct(addr)?; + let entries_addr = dict.ma_keys as usize + + (1 << keys.dk_log2_index_bytes) + + std::mem::size_of_val(&keys); + Ok(DictIterator { + process, + entries_addr, + index: 0, + kind: keys.dk_kind, + entries: keys.dk_nentries as usize, + values: dict.ma_values as usize, + }) + } + _ => { + let dict: crate::python_bindings::v3_7_0::PyDictObject = + process.copy_struct(addr)?; // Getting this going generically is tricky: there is a lot of variation on how dictionaries are handled // instead this just focuses on a single version, which works for python // 3.6/3.7/3.8/3.9/3.10 @@ -136,11 +185,19 @@ impl<'a, P: ProcessMemory> DictIterator<'a, P> { #[cfg(target_pointer_width = "64")] _ => 8, #[cfg(not(target_pointer_width = "64"))] - _ => 4 + _ => 4, }; let byteoffset = (keys.dk_size * index_size) as usize; - let entries_addr = dict.ma_keys as usize + byteoffset + std::mem::size_of_val(&keys); - Ok(DictIterator{process, entries_addr, index: 0, kind: 0, entries: keys.dk_nentries as usize, values: dict.ma_values as usize}) + let entries_addr = + dict.ma_keys as usize + byteoffset + std::mem::size_of_val(&keys); + Ok(DictIterator { + process, + entries_addr, + index: 0, + kind: 0, + entries: keys.dk_nentries as usize, + values: dict.ma_values as usize, + }) } } } @@ -157,14 +214,23 @@ impl<'a, P: ProcessMemory> Iterator for DictIterator<'a, P> { // get the addresses of the key/value for the current index let entry = match self.kind { 0 => { - let addr = index * std::mem::size_of::() + self.entries_addr; - let ret = self.process.copy_struct::(addr); + let addr = index + * std::mem::size_of::() + + self.entries_addr; + let ret = self + .process + .copy_struct::(addr); ret.map(|entry| (entry.me_key as usize, entry.me_value as usize)) - }, + } _ => { // Python 3.11 added a PyDictUnicodeEntry , which uses the hash from the Unicode key rather than recalculate - let addr = index * std::mem::size_of::() + self.entries_addr; - let ret = self.process.copy_struct::(addr); + let addr = index + * std::mem::size_of::( + ) + + self.entries_addr; + let ret = self + .process + .copy_struct::(addr); ret.map(|entry| (entry.me_key as usize, entry.me_value as usize)) } }; @@ -176,20 +242,23 @@ impl<'a, P: ProcessMemory> Iterator for DictIterator<'a, P> { } let value = if self.values != 0 { - let valueaddr = self.values + index * std::mem::size_of::<* mut crate::python_bindings::v3_7_0::PyObject>(); + let valueaddr = self.values + + index + * std::mem::size_of::<*mut crate::python_bindings::v3_7_0::PyObject>( + ); match self.process.copy_struct(valueaddr) { Ok(addr) => addr, - Err(e) => { return Some(Err(e.into())); } + Err(e) => { + return Some(Err(e.into())); + } } } else { value }; - return Some(Ok((key, value))) - }, - Err(e) => { - return Some(Err(e.into())) + return Some(Ok((key, value))); } + Err(e) => return Some(Err(e.into())), } } @@ -197,18 +266,26 @@ impl<'a, P: ProcessMemory> Iterator for DictIterator<'a, P> { } } -pub const PY_TPFLAGS_MANAGED_DICT: usize = 1 << 4; -const PY_TPFLAGS_INT_SUBCLASS: usize = 1 << 23; -const PY_TPFLAGS_LONG_SUBCLASS: usize = 1 << 24; -const PY_TPFLAGS_LIST_SUBCLASS: usize = 1 << 25; -const PY_TPFLAGS_TUPLE_SUBCLASS: usize = 1 << 26; -const PY_TPFLAGS_BYTES_SUBCLASS: usize = 1 << 27; +pub const PY_TPFLAGS_MANAGED_DICT: usize = 1 << 4; +const PY_TPFLAGS_INT_SUBCLASS: usize = 1 << 23; +const PY_TPFLAGS_LONG_SUBCLASS: usize = 1 << 24; +const PY_TPFLAGS_LIST_SUBCLASS: usize = 1 << 25; +const PY_TPFLAGS_TUPLE_SUBCLASS: usize = 1 << 26; +const PY_TPFLAGS_BYTES_SUBCLASS: usize = 1 << 27; const PY_TPFLAGS_STRING_SUBCLASS: usize = 1 << 28; -const PY_TPFLAGS_DICT_SUBCLASS: usize = 1 << 29; +const PY_TPFLAGS_DICT_SUBCLASS: usize = 1 << 29; /// Converts a python variable in the other process to a human readable string -pub fn format_variable(process: &P, version: &Version, addr: usize, max_length: isize) - -> Result where I: InterpreterState, P: ProcessMemory { +pub fn format_variable( + process: &P, + version: &Version, + addr: usize, + max_length: isize, +) -> Result +where + I: InterpreterState, + P: ProcessMemory, +{ // We need at least 5 characters remaining for all this code to work, replace with an ellipsis if // we're out of space if max_length <= 5 { @@ -221,7 +298,10 @@ pub fn format_variable(process: &P, version: &Version, addr: usize, max_le // get the typename (truncating to 128 bytes if longer) let max_type_len = 128; let value_type_name = process.copy(value_type.name() as usize, max_type_len)?; - let length = value_type_name.iter().position(|&x| x == 0).unwrap_or(max_type_len); + let length = value_type_name + .iter() + .position(|&x| x == 0) + .unwrap_or(max_type_len); let value_type_name = std::str::from_utf8(&value_type_name[..length])?; let format_int = |value: i64| { @@ -239,14 +319,21 @@ pub fn format_variable(process: &P, version: &Version, addr: usize, max_le } else if flags & PY_TPFLAGS_LONG_SUBCLASS != 0 { // we don't handle arbitrary sized integer values (max is 2**60) let (value, overflowed) = copy_long(process, addr)?; - if overflowed { - if value > 0 { "+bigint".to_owned() } else { "-bigint".to_owned() } + if overflowed { + if value > 0 { + "+bigint".to_owned() + } else { + "-bigint".to_owned() + } } else { format_int(value) } - } else if flags & PY_TPFLAGS_STRING_SUBCLASS != 0 || - (version.major == 2 && (flags & PY_TPFLAGS_BYTES_SUBCLASS != 0)) { - let value = copy_string(addr as *const I::StringObject, process)?.replace("\"", "\\\"").replace("\n", "\\n"); + } else if flags & PY_TPFLAGS_STRING_SUBCLASS != 0 + || (version.major == 2 && (flags & PY_TPFLAGS_BYTES_SUBCLASS != 0)) + { + let value = copy_string(addr as *const I::StringObject, process)? + .replace('\'', "\\\"") + .replace('\n', "\\n"); if value.len() as isize >= max_length - 5 { format!("\"{}...\"", &value[..(max_length - 5) as usize]) } else { @@ -278,7 +365,8 @@ pub fn format_variable(process: &P, version: &Version, addr: usize, max_le let mut values = Vec::new(); let mut remaining = max_length - 2; for i in 0..object.size() { - let valueptr: *mut I::Object = process.copy_struct(addr + i * std::mem::size_of::<* mut I::Object>())?; + let valueptr: *mut I::Object = + process.copy_struct(addr + i * std::mem::size_of::<*mut I::Object>())?; let value = format_variable::(process, version, valueptr as usize, remaining)?; remaining -= value.len() as isize + 2; if remaining <= 5 { @@ -304,7 +392,8 @@ pub fn format_variable(process: &P, version: &Version, addr: usize, max_le } format!("({})", values.join(", ")) } else if value_type_name == "float" { - let value = process.copy_pointer(addr as *const crate::python_bindings::v3_7_0::PyFloatObject)?; + let value = + process.copy_pointer(addr as *const crate::python_bindings::v3_7_0::PyFloatObject)?; format!("{}", value.ob_fval) } else if value_type_name == "NoneType" { "None".to_owned() @@ -320,8 +409,10 @@ pub mod tests { // the idea here is to create various cpython interpretator structs locally // and then test out that the above code handles appropriately use super::*; + use crate::python_bindings::v3_7_0::{ + PyASCIIObject, PyBytesObject, PyUnicodeObject, PyVarObject, + }; use remoteprocess::LocalProcess; - use crate::python_bindings::v3_7_0::{PyBytesObject, PyVarObject, PyUnicodeObject, PyASCIIObject}; use std::ptr::copy_nonoverlapping; // python stores data after pybytesobject/pyasciiobject. hack by initializing a 4k buffer for testing. @@ -330,31 +421,52 @@ pub mod tests { #[repr(C)] pub struct AllocatedPyByteObject { pub base: PyBytesObject, - pub storage: [u8; 4096] + pub storage: [u8; 4096], } #[allow(dead_code)] #[repr(C)] // Rust can optimize the layout of this struct and break our pointer arithmetic pub struct AllocatedPyASCIIObject { pub base: PyASCIIObject, - pub storage: [u8; 4096] + pub storage: [u8; 4096], } pub fn to_byteobject(bytes: &[u8]) -> AllocatedPyByteObject { let ob_size = bytes.len() as isize; - let base = PyBytesObject{ob_base: PyVarObject{ob_size, ..Default::default()}, ..Default::default()}; - let mut ret = AllocatedPyByteObject{base, storage: [0 as u8; 4096]}; - unsafe { copy_nonoverlapping(bytes.as_ptr(), ret.base.ob_sval.as_mut_ptr() as *mut u8, bytes.len()); } + let base = PyBytesObject { + ob_base: PyVarObject { + ob_size, + ..Default::default() + }, + ..Default::default() + }; + let mut ret = AllocatedPyByteObject { + base, + storage: [0 as u8; 4096], + }; + unsafe { + copy_nonoverlapping( + bytes.as_ptr(), + ret.base.ob_sval.as_mut_ptr() as *mut u8, + bytes.len(), + ); + } ret } pub fn to_asciiobject(input: &str) -> AllocatedPyASCIIObject { let bytes: Vec = input.bytes().collect(); - let mut base = PyASCIIObject{length: bytes.len() as isize, ..Default::default()}; + let mut base = PyASCIIObject { + length: bytes.len() as isize, + ..Default::default() + }; base.state.set_compact(1); base.state.set_kind(1); base.state.set_ascii(1); - let mut ret = AllocatedPyASCIIObject{base, storage: [0 as u8; 4096]}; + let mut ret = AllocatedPyASCIIObject { + base, + storage: [0 as u8; 4096], + }; unsafe { let ptr = &mut ret as *mut AllocatedPyASCIIObject as *mut u8; let dst = ptr.offset(std::mem::size_of::() as isize); @@ -368,7 +480,7 @@ pub mod tests { let original = "function_name"; let obj = to_asciiobject(original); - let unicode: &PyUnicodeObject = unsafe{ std::mem::transmute(&obj.base) }; + let unicode: &PyUnicodeObject = unsafe { std::mem::transmute(&obj.base) }; let copied = copy_string(unicode, &LocalProcess).unwrap(); assert_eq!(copied, original); } diff --git a/src/python_interpreters.rs b/src/python_interpreters.rs index 9efc79bf..f2485dee 100644 --- a/src/python_interpreters.rs +++ b/src/python_interpreters.rs @@ -7,11 +7,13 @@ pointer addresses here refer to locations in the target process memory space. This means we can't dereference them directly. */ +#![allow(clippy::unnecessary_cast)] + // these bindings are automatically generated by rust bindgen // using the generate_bindings.py script -use crate::python_bindings::{v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, v3_10_0, v3_11_0}; - -use std; +use crate::python_bindings::{ + v2_7_15, v3_10_0, v3_11_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, +}; pub trait InterpreterState { type ThreadState: ThreadState; @@ -19,7 +21,7 @@ pub trait InterpreterState { type StringObject: StringObject; type ListObject: ListObject; type TupleObject: TupleObject; - fn head(&self) -> * mut Self::ThreadState; + fn head(&self) -> *mut Self::ThreadState; fn modules(&self) -> *mut Self::Object; } @@ -27,24 +29,24 @@ pub trait ThreadState { type FrameObject: FrameObject; type InterpreterState: InterpreterState; - fn interp(&self) -> * mut Self::InterpreterState; + fn interp(&self) -> *mut Self::InterpreterState; // starting in python 3.11, there is an extra level of indirection // in getting the frame. this returns the address fn frame_address(&self) -> Option; - fn frame(&self, offset: Option) -> * mut Self::FrameObject; + fn frame(&self, offset: Option) -> *mut Self::FrameObject; fn thread_id(&self) -> u64; fn native_thread_id(&self) -> Option; - fn next(&self) -> * mut Self; + fn next(&self) -> *mut Self; } pub trait FrameObject { type CodeObject: CodeObject; - fn code(&self) -> * mut Self::CodeObject; + fn code(&self) -> *mut Self::CodeObject; fn lasti(&self) -> i32; - fn back(&self) -> * mut Self; + fn back(&self) -> *mut Self; } pub trait CodeObject { @@ -52,13 +54,13 @@ pub trait CodeObject { type BytesObject: BytesObject; type TupleObject: TupleObject; - fn name(&self) -> * mut Self::StringObject; - fn filename(&self) -> * mut Self::StringObject; - fn line_table(&self) -> * mut Self::BytesObject; + fn name(&self) -> *mut Self::StringObject; + fn filename(&self) -> *mut Self::StringObject; + fn line_table(&self) -> *mut Self::BytesObject; fn first_lineno(&self) -> i32; fn nlocals(&self) -> i32; fn argcount(&self) -> i32; - fn varnames(&self) -> * mut Self::TupleObject; + fn varnames(&self) -> *mut Self::TupleObject; fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32; } @@ -88,7 +90,7 @@ pub trait ListObject { pub trait Object { type TypeObject: TypeObject; - fn ob_type(&self) -> * mut Self::TypeObject; + fn ob_type(&self) -> *mut Self::TypeObject; } pub trait TypeObject { @@ -105,64 +107,108 @@ fn offset_of(object: *const T, member: *const M) -> usize { /// (this code is identical across python versions, we are only abstracting the struct layouts here). /// String handling changes substantially between python versions, and is handled separately. macro_rules! PythonCommonImpl { - ($py: ident, $stringobject: ident) => ( + ($py: ident, $stringobject: ident) => { impl InterpreterState for $py::PyInterpreterState { type ThreadState = $py::PyThreadState; type Object = $py::PyObject; type StringObject = $py::$stringobject; type ListObject = $py::PyListObject; type TupleObject = $py::PyTupleObject; - fn head(&self) -> * mut Self::ThreadState { self.tstate_head } - fn modules(&self) -> * mut Self::Object { self.modules } + fn head(&self) -> *mut Self::ThreadState { + self.tstate_head + } + fn modules(&self) -> *mut Self::Object { + self.modules + } } impl ThreadState for $py::PyThreadState { type FrameObject = $py::PyFrameObject; type InterpreterState = $py::PyInterpreterState; - fn frame_address(&self) -> Option { None } - fn frame(&self, _: Option) -> * mut Self::FrameObject { self.frame } - fn thread_id(&self) -> u64 { self.thread_id as u64 } - fn native_thread_id(&self) -> Option { None } - fn next(&self) -> * mut Self { self.next } - fn interp(&self) -> *mut Self::InterpreterState { self.interp } + fn frame_address(&self) -> Option { + None + } + fn frame(&self, _: Option) -> *mut Self::FrameObject { + self.frame + } + fn thread_id(&self) -> u64 { + self.thread_id as u64 + } + fn native_thread_id(&self) -> Option { + None + } + fn next(&self) -> *mut Self { + self.next + } + fn interp(&self) -> *mut Self::InterpreterState { + self.interp + } } impl FrameObject for $py::PyFrameObject { type CodeObject = $py::PyCodeObject; - fn code(&self) -> * mut Self::CodeObject { self.f_code } - fn lasti(&self) -> i32 { self.f_lasti as i32 } - fn back(&self) -> * mut Self { self.f_back } + fn code(&self) -> *mut Self::CodeObject { + self.f_code + } + fn lasti(&self) -> i32 { + self.f_lasti as i32 + } + fn back(&self) -> *mut Self { + self.f_back + } } impl Object for $py::PyObject { type TypeObject = $py::PyTypeObject; - fn ob_type(&self) -> * mut Self::TypeObject { self.ob_type as * mut Self::TypeObject } + fn ob_type(&self) -> *mut Self::TypeObject { + self.ob_type as *mut Self::TypeObject + } } impl TypeObject for $py::PyTypeObject { - fn name(&self) -> *const ::std::os::raw::c_char { self.tp_name } - fn dictoffset(&self) -> isize { self.tp_dictoffset } - fn flags(&self) -> usize { self.tp_flags as usize } + fn name(&self) -> *const ::std::os::raw::c_char { + self.tp_name + } + fn dictoffset(&self) -> isize { + self.tp_dictoffset + } + fn flags(&self) -> usize { + self.tp_flags as usize + } } - ) + }; } // We can use this up until python3.10 - where code object lnotab attribute is deprecated macro_rules! PythonCodeObjectImpl { - ($py: ident, $bytesobject: ident, $stringobject: ident) => ( + ($py: ident, $bytesobject: ident, $stringobject: ident) => { impl CodeObject for $py::PyCodeObject { type BytesObject = $py::$bytesobject; type StringObject = $py::$stringobject; type TupleObject = $py::PyTupleObject; - fn name(&self) -> * mut Self::StringObject { self.co_name as * mut Self::StringObject } - fn filename(&self) -> * mut Self::StringObject { self.co_filename as * mut Self::StringObject } - fn line_table(&self) -> * mut Self::BytesObject { self.co_lnotab as * mut Self::BytesObject } - fn first_lineno(&self) -> i32 { self.co_firstlineno } - fn nlocals(&self) -> i32 { self.co_nlocals } - fn argcount(&self) -> i32 { self.co_argcount } - fn varnames(&self) -> * mut Self::TupleObject { self.co_varnames as * mut Self::TupleObject } + fn name(&self) -> *mut Self::StringObject { + self.co_name as *mut Self::StringObject + } + fn filename(&self) -> *mut Self::StringObject { + self.co_filename as *mut Self::StringObject + } + fn line_table(&self) -> *mut Self::BytesObject { + self.co_lnotab as *mut Self::BytesObject + } + fn first_lineno(&self) -> i32 { + self.co_firstlineno + } + fn nlocals(&self) -> i32 { + self.co_nlocals + } + fn argcount(&self) -> i32 { + self.co_argcount + } + fn varnames(&self) -> *mut Self::TupleObject { + self.co_varnames as *mut Self::TupleObject + } fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 { let lasti = lasti as i32; @@ -191,27 +237,35 @@ macro_rules! PythonCodeObjectImpl { line_number } } - ) + }; } // String/Byte/List/Tuple handling for Python 3.3+ macro_rules! Python3Impl { - ($py: ident) => ( + ($py: ident) => { impl BytesObject for $py::PyBytesObject { - fn size(&self) -> usize { self.ob_base.ob_size as usize } + fn size(&self) -> usize { + self.ob_base.ob_size as usize + } fn address(&self, base: usize) -> usize { base + offset_of(self, &self.ob_sval) } } impl StringObject for $py::PyUnicodeObject { - fn ascii(&self) -> bool { self._base._base.state.ascii() != 0 } - fn size(&self) -> usize { self._base._base.length as usize } - fn kind(&self) -> u32 { self._base._base.state.kind() } + fn ascii(&self) -> bool { + self._base._base.state.ascii() != 0 + } + fn size(&self) -> usize { + self._base._base.length as usize + } + fn kind(&self) -> u32 { + self._base._base.state.kind() + } fn address(&self, base: usize) -> usize { if self._base._base.state.compact() == 0 { - return unsafe{ self.data.any as usize }; + return unsafe { self.data.any as usize }; } if self._base._base.state.ascii() == 1 { @@ -224,17 +278,24 @@ macro_rules! Python3Impl { impl ListObject for $py::PyListObject { type Object = $py::PyObject; - fn size(&self) -> usize { self.ob_base.ob_size as usize } - fn item(&self) -> *mut *mut Self::Object { self.ob_item } + fn size(&self) -> usize { + self.ob_base.ob_size as usize + } + fn item(&self) -> *mut *mut Self::Object { + self.ob_item + } } impl TupleObject for $py::PyTupleObject { - fn size(&self) -> usize { self.ob_base.ob_size as usize } + fn size(&self) -> usize { + self.ob_base.ob_size as usize + } fn address(&self, base: usize, index: usize) -> usize { - base + offset_of(self, &self.ob_item) + index * std::mem::size_of::<* mut $py::PyObject>() + base + offset_of(self, &self.ob_item) + + index * std::mem::size_of::<*mut $py::PyObject>() } } - ) + }; } // Python 3.11 // Python3.11 is sufficiently different from previous versions that we can't use the macros above @@ -247,8 +308,12 @@ impl InterpreterState for v3_11_0::PyInterpreterState { type StringObject = v3_11_0::PyUnicodeObject; type ListObject = v3_11_0::PyListObject; type TupleObject = v3_11_0::PyTupleObject; - fn head(&self) -> * mut Self::ThreadState { self.threads.head } - fn modules(&self) -> * mut Self::Object { self.modules } + fn head(&self) -> *mut Self::ThreadState { + self.threads.head + } + fn modules(&self) -> *mut Self::Object { + self.modules + } } impl ThreadState for v3_11_0::PyThreadState { @@ -260,35 +325,57 @@ impl ThreadState for v3_11_0::PyThreadState { let current_frame_offset = offset_of(&cframe, &cframe.current_frame); Some(self.cframe as usize + current_frame_offset) } - fn frame(&self, addr: Option) -> * mut Self::FrameObject { addr.unwrap() as * mut Self::FrameObject } - fn thread_id(&self) -> u64 { self.thread_id as u64 } - fn native_thread_id(&self) -> Option { Some(self.native_thread_id as u64) } - fn next(&self) -> * mut Self { self.next } - fn interp(&self) -> *mut Self::InterpreterState { self.interp } + fn frame(&self, addr: Option) -> *mut Self::FrameObject { + addr.unwrap() as *mut Self::FrameObject + } + fn thread_id(&self) -> u64 { + self.thread_id as u64 + } + fn native_thread_id(&self) -> Option { + Some(self.native_thread_id as u64) + } + fn next(&self) -> *mut Self { + self.next + } + fn interp(&self) -> *mut Self::InterpreterState { + self.interp + } } impl FrameObject for v3_11_0::_PyInterpreterFrame { type CodeObject = v3_11_0::PyCodeObject; - fn code(&self) -> * mut Self::CodeObject { self.f_code } + fn code(&self) -> *mut Self::CodeObject { + self.f_code + } fn lasti(&self) -> i32 { // this returns the delta from the co_code, but we need to adjust for the // offset from co_code.co_code_adaptive. This is slightly easier to do in the // get_line_number code, so will adjust there - let co_code = self.f_code as * const _ as * const u8; - unsafe { (self.prev_instr as * const u8).offset_from(co_code) as i32} + let co_code = self.f_code as *const _ as *const u8; + unsafe { (self.prev_instr as *const u8).offset_from(co_code) as i32 } + } + fn back(&self) -> *mut Self { + self.previous } - fn back(&self) -> * mut Self { self.previous } } impl Object for v3_11_0::PyObject { type TypeObject = v3_11_0::PyTypeObject; - fn ob_type(&self) -> * mut Self::TypeObject { self.ob_type as * mut Self::TypeObject } + fn ob_type(&self) -> *mut Self::TypeObject { + self.ob_type as *mut Self::TypeObject + } } impl TypeObject for v3_11_0::PyTypeObject { - fn name(&self) -> *const ::std::os::raw::c_char { self.tp_name } - fn dictoffset(&self) -> isize { self.tp_dictoffset } - fn flags(&self) -> usize { self.tp_flags as usize } + fn name(&self) -> *const ::std::os::raw::c_char { + self.tp_name + } + fn dictoffset(&self) -> isize { + self.tp_dictoffset + } + fn flags(&self) -> usize { + self.tp_flags as usize + } } fn read_varint(index: &mut usize, table: &[u8]) -> usize { @@ -310,7 +397,7 @@ fn read_varint(index: &mut usize, table: &[u8]) -> usize { fn read_signed_varint(index: &mut usize, table: &[u8]) -> isize { let unsigned_val = read_varint(index, table); if unsigned_val & 1 != 0 { - -1 * ((unsigned_val >> 1) as isize) + -((unsigned_val >> 1) as isize) } else { (unsigned_val >> 1) as isize } @@ -321,13 +408,27 @@ impl CodeObject for v3_11_0::PyCodeObject { type StringObject = v3_11_0::PyUnicodeObject; type TupleObject = v3_11_0::PyTupleObject; - fn name(&self) -> * mut Self::StringObject { self.co_name as * mut Self::StringObject } - fn filename(&self) -> * mut Self::StringObject { self.co_filename as * mut Self::StringObject } - fn line_table(&self) -> * mut Self::BytesObject { self.co_linetable as * mut Self::BytesObject } - fn first_lineno(&self) -> i32 { self.co_firstlineno } - fn nlocals(&self) -> i32 { self.co_nlocals } - fn argcount(&self) -> i32 { self.co_argcount } - fn varnames(&self) -> * mut Self::TupleObject { self.co_localsplusnames as * mut Self::TupleObject } + fn name(&self) -> *mut Self::StringObject { + self.co_name as *mut Self::StringObject + } + fn filename(&self) -> *mut Self::StringObject { + self.co_filename as *mut Self::StringObject + } + fn line_table(&self) -> *mut Self::BytesObject { + self.co_linetable as *mut Self::BytesObject + } + fn first_lineno(&self) -> i32 { + self.co_firstlineno + } + fn nlocals(&self) -> i32 { + self.co_nlocals + } + fn argcount(&self) -> i32 { + self.co_argcount + } + fn varnames(&self) -> *mut Self::TupleObject { + self.co_localsplusnames as *mut Self::TupleObject + } fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 { // unpack compressed table format from python 3.11 @@ -348,21 +449,19 @@ impl CodeObject for v3_11_0::PyCodeObject { bytecode_address += delta * 2; let code = (byte >> 3) & 15; let line_delta = match code { - 15 => { 0 }, + 15 => 0, 14 => { let delta = read_signed_varint(&mut index, table); read_varint(&mut index, table); // end line read_varint(&mut index, table); // start column read_varint(&mut index, table); // end column delta - }, - 13 => { - read_signed_varint(&mut index, table) - }, + } + 13 => read_signed_varint(&mut index, table), 10..=12 => { index += 2; // start column / end column (code - 10).into() - }, + } _ => { index += 1; // column 0 @@ -377,7 +476,6 @@ impl CodeObject for v3_11_0::PyCodeObject { } } - // Python 3.10 Python3Impl!(v3_10_0); PythonCommonImpl!(v3_10_0, PyUnicodeObject); @@ -387,47 +485,59 @@ impl CodeObject for v3_10_0::PyCodeObject { type StringObject = v3_10_0::PyUnicodeObject; type TupleObject = v3_10_0::PyTupleObject; - fn name(&self) -> * mut Self::StringObject { self.co_name as * mut Self::StringObject } - fn filename(&self) -> * mut Self::StringObject { self.co_filename as * mut Self::StringObject } - fn line_table(&self) -> * mut Self::BytesObject { self.co_linetable as * mut Self::BytesObject } - fn first_lineno(&self) -> i32 { self.co_firstlineno } - fn nlocals(&self) -> i32 { self.co_nlocals } - fn argcount(&self) -> i32 { self.co_argcount } - fn varnames(&self) -> * mut Self::TupleObject { self.co_varnames as * mut Self::TupleObject } - fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 { - // in Python 3.10 we need to double the lasti instruction value here (and no I don't know why) - // https://github.com/python/cpython/blob/7b88f63e1dd4006b1a08b9c9f087dd13449ecc76/Python/ceval.c#L5999 - // Whereas in python versions up to 3.9 we didn't. - // https://github.com/python/cpython/blob/3.9/Python/ceval.c#L4713-L4714 - let lasti = 2 * lasti as i32; - - // unpack the line table. format is specified here: - // https://github.com/python/cpython/blob/3.10/Objects/lnotab_notes.txt - let size = table.len(); - let mut i = 0; - let mut line_number: i32 = self.first_lineno(); - let mut bytecode_address: i32 = 0; - while (i + 1) < size { - let delta: u8 = table[i]; - let line_delta: i8 = unsafe { std::mem::transmute(table[i + 1]) }; - i += 2; - - if line_delta == -128 { - continue; - } - - line_number += i32::from(line_delta); - bytecode_address += i32::from(delta); - if bytecode_address > lasti { - break; - } - } - - line_number + fn name(&self) -> *mut Self::StringObject { + self.co_name as *mut Self::StringObject } -} + fn filename(&self) -> *mut Self::StringObject { + self.co_filename as *mut Self::StringObject + } + fn line_table(&self) -> *mut Self::BytesObject { + self.co_linetable as *mut Self::BytesObject + } + fn first_lineno(&self) -> i32 { + self.co_firstlineno + } + fn nlocals(&self) -> i32 { + self.co_nlocals + } + fn argcount(&self) -> i32 { + self.co_argcount + } + fn varnames(&self) -> *mut Self::TupleObject { + self.co_varnames as *mut Self::TupleObject + } + fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 { + // in Python 3.10 we need to double the lasti instruction value here (and no I don't know why) + // https://github.com/python/cpython/blob/7b88f63e1dd4006b1a08b9c9f087dd13449ecc76/Python/ceval.c#L5999 + // Whereas in python versions up to 3.9 we didn't. + // https://github.com/python/cpython/blob/3.9/Python/ceval.c#L4713-L4714 + let lasti = 2 * lasti as i32; + + // unpack the line table. format is specified here: + // https://github.com/python/cpython/blob/3.10/Objects/lnotab_notes.txt + let size = table.len(); + let mut i = 0; + let mut line_number: i32 = self.first_lineno(); + let mut bytecode_address: i32 = 0; + while (i + 1) < size { + let delta: u8 = table[i]; + let line_delta: i8 = unsafe { std::mem::transmute(table[i + 1]) }; + i += 2; + if line_delta == -128 { + continue; + } + line_number += i32::from(line_delta); + bytecode_address += i32::from(delta); + if bytecode_address > lasti { + break; + } + } + + line_number + } +} // Python 3.9 PythonCommonImpl!(v3_9_5, PyUnicodeObject); @@ -463,27 +573,46 @@ Python3Impl!(v3_3_7); PythonCommonImpl!(v2_7_15, PyStringObject); PythonCodeObjectImpl!(v2_7_15, PyStringObject, PyStringObject); impl BytesObject for v2_7_15::PyStringObject { - fn size(&self) -> usize { self.ob_size as usize } - fn address(&self, base: usize) -> usize { base + offset_of(self, &self.ob_sval) } + fn size(&self) -> usize { + self.ob_size as usize + } + fn address(&self, base: usize) -> usize { + base + offset_of(self, &self.ob_sval) + } } impl StringObject for v2_7_15::PyStringObject { - fn ascii(&self) -> bool { true } - fn kind(&self) -> u32 { 1 } - fn size(&self) -> usize { self.ob_size as usize } - fn address(&self, base: usize) -> usize { base + offset_of(self, &self.ob_sval) } + fn ascii(&self) -> bool { + true + } + fn kind(&self) -> u32 { + 1 + } + fn size(&self) -> usize { + self.ob_size as usize + } + fn address(&self, base: usize) -> usize { + base + offset_of(self, &self.ob_sval) + } } impl ListObject for v2_7_15::PyListObject { type Object = v2_7_15::PyObject; - fn size(&self) -> usize { self.ob_size as usize } - fn item(&self) -> *mut *mut Self::Object { self.ob_item } + fn size(&self) -> usize { + self.ob_size as usize + } + fn item(&self) -> *mut *mut Self::Object { + self.ob_item + } } impl TupleObject for v2_7_15::PyTupleObject { - fn size(&self) -> usize { self.ob_size as usize } + fn size(&self) -> usize { + self.ob_size as usize + } fn address(&self, base: usize, index: usize) -> usize { - base + offset_of(self, &self.ob_item) + index * std::mem::size_of::<* mut v2_7_15::PyObject>() + base + offset_of(self, &self.ob_item) + + index * std::mem::size_of::<*mut v2_7_15::PyObject>() } } @@ -494,11 +623,15 @@ mod tests { #[test] fn test_py3_11_line_numbers() { use crate::python_bindings::v3_11_0::PyCodeObject; - let code = PyCodeObject {co_firstlineno:4, ..Default::default()}; - - let table = [128_u8, 0, 221, 4, 8, 132, 74, 136, 118, 209, 4, 22, 212, 4, 22, 208, 4, 22, - 208, 4, 22, 208, 4, 22]; + let code = PyCodeObject { + co_firstlineno: 4, + ..Default::default() + }; + + let table = [ + 128_u8, 0, 221, 4, 8, 132, 74, 136, 118, 209, 4, 22, 212, 4, 22, 208, 4, 22, 208, 4, + 22, 208, 4, 22, + ]; assert_eq!(code.get_line_number(214, &table), 5); - } } diff --git a/src/python_process_info.rs b/src/python_process_info.rs index 76805c5f..4c6210cb 100644 --- a/src/python_process_info.rs +++ b/src/python_process_info.rs @@ -1,26 +1,26 @@ -use std; +use regex::Regex; +#[cfg(windows)] +use regex::RegexBuilder; #[cfg(windows)] use std::collections::HashMap; use std::mem::size_of; -use std::slice; use std::path::Path; -use regex::Regex; -#[cfg(windows)] -use regex::RegexBuilder; +use std::slice; -use anyhow::{Error, Result, Context}; +use anyhow::{Context, Error, Result}; use lazy_static::lazy_static; -use remoteprocess::{ProcessMemory, Pid}; use proc_maps::{get_process_maps, MapRange}; +use remoteprocess::{Pid, ProcessMemory}; use crate::binary_parser::{parse_binary, BinaryInfo}; use crate::config::Config; -use crate::python_bindings::{pyruntime, v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, v3_10_0, v3_11_0}; +use crate::python_bindings::{ + pyruntime, v2_7_15, v3_10_0, v3_11_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, +}; use crate::python_interpreters::{InterpreterState, ThreadState}; use crate::stack_trace::get_stack_traces; use crate::version::Version; - /// Holds information about the python process: memory map layout, parsed binary info /// for python /libpython etc. pub struct PythonProcessInfo { @@ -30,13 +30,14 @@ pub struct PythonProcessInfo { pub libpython_binary: Option, pub maps: Box, pub python_filename: std::path::PathBuf, - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] pub dockerized: bool, } impl PythonProcessInfo { pub fn new(process: &remoteprocess::Process) -> Result { - let filename = process.exe() + let filename = process + .exe() .context("Failed to get process executable name. Check that the process is running.")?; #[cfg(windows)] @@ -52,23 +53,30 @@ impl PythonProcessInfo { let maps = get_process_maps(process.pid)?; info!("Got virtual memory maps from pid {}:", process.pid); for map in &maps { - debug!("map: {:016x}-{:016x} {}{}{} {}", map.start(), map.start() + map.size(), - if map.is_read() {'r'} else {'-'}, if map.is_write() {'w'} else {'-'}, if map.is_exec() {'x'} else {'-'}, - map.filename().unwrap_or(&std::path::PathBuf::from("")).display()); + debug!( + "map: {:016x}-{:016x} {}{}{} {}", + map.start(), + map.start() + map.size(), + if map.is_read() { 'r' } else { '-' }, + if map.is_write() { 'w' } else { '-' }, + if map.is_exec() { 'x' } else { '-' }, + map.filename() + .unwrap_or(&std::path::PathBuf::from("")) + .display() + ); } // parse the main python binary let (python_binary, python_filename) = { // Get the memory address for the executable by matching against virtual memory maps - let map = maps.iter() - .find(|m| { - if let Some(pathname) = m.filename() { - if let Some(pathname) = pathname.to_str() { - return is_python_bin(pathname) && m.is_exec(); - } + let map = maps.iter().find(|m| { + if let Some(pathname) = m.filename() { + if let Some(pathname) = pathname.to_str() { + return is_python_bin(pathname) && m.is_exec(); } - false - }); + } + false + }); let map = match map { Some(map) => map, @@ -77,62 +85,62 @@ impl PythonProcessInfo { // If we failed to find the executable in the virtual memory maps, just take the first file we find // sometimes on windows get_process_exe returns stale info =( https://github.com/benfred/py-spy/issues/40 // and on all operating systems I've tried, the exe is the first region in the maps - &maps.first().ok_or_else(|| format_err!("Failed to get virtual memory maps from process"))? + maps.first().ok_or_else(|| { + format_err!("Failed to get virtual memory maps from process") + })? } }; - #[cfg(not(target_os="linux"))] + #[cfg(not(target_os = "linux"))] let filename = std::path::PathBuf::from(filename); // use filename through /proc/pid/exe which works across docker namespaces and // handles if the file was deleted - #[cfg(target_os="linux")] - let filename = &std::path::PathBuf::from(format!("/proc/{}/exe", process.pid)); + #[cfg(target_os = "linux")] + let filename = std::path::PathBuf::from(format!("/proc/{}/exe", process.pid)); // TODO: consistent types? u64 -> usize? for map.start etc - #[allow(unused_mut)] - let python_binary = parse_binary(&filename, map.start() as u64, map.size() as u64) - .and_then(|mut pb| { - // windows symbols are stored in separate files (.pdb), load - #[cfg(windows)] - { - get_windows_python_symbols(process.pid, &filename, map.start() as u64) - .map(|symbols| { pb.symbols.extend(symbols); pb }) - .map_err(|err| err.into()) - } - - // For OSX, need to adjust main binary symbols by subtracting _mh_execute_header - // (which we've added to by map.start already, so undo that here) - #[cfg(target_os = "macos")] - { - let offset = pb.symbols["_mh_execute_header"] - map.start() as u64; - for address in pb.symbols.values_mut() { - *address -= offset; - } - - if pb.bss_addr != 0 { - pb.bss_addr -= offset; - } - } + let python_binary = parse_binary(&filename, map.start() as u64, map.size() as u64); + + // windows symbols are stored in separate files (.pdb), load + #[cfg(windows)] + let python_binary = python_binary.and_then(|mut pb| { + get_windows_python_symbols(process.pid, &filename, map.start() as u64) + .map(|symbols| { + pb.symbols.extend(symbols); + pb + }) + .map_err(|err| err.into()) + }); + + // For OSX, need to adjust main binary symbols by subtracting _mh_execute_header + // (which we've added to by map.start already, so undo that here) + #[cfg(target_os = "macos")] + let python_binary = python_binary.map(|mut pb| { + let offset = pb.symbols["_mh_execute_header"] - map.start() as u64; + for address in pb.symbols.values_mut() { + *address -= offset; + } - #[cfg(not(windows))] - Ok(pb) - }); + if pb.bss_addr != 0 { + pb.bss_addr -= offset; + } + pb + }); - (python_binary, filename.clone()) + (python_binary, filename) }; // likewise handle libpython for python versions compiled with --enabled-shared let libpython_binary = { - let libmap = maps.iter() - .find(|m| { - if let Some(pathname) = m.filename() { - if let Some(pathname) = pathname.to_str() { - return is_python_lib(pathname) && m.is_exec(); - } + let libmap = maps.iter().find(|m| { + if let Some(pathname) = m.filename() { + if let Some(pathname) = pathname.to_str() { + return is_python_lib(pathname) && m.is_exec(); } - false - }); + } + false + }); let mut libpython_binary: Option = None; if let Some(libpython) = libmap { @@ -140,13 +148,22 @@ impl PythonProcessInfo { info!("Found libpython binary @ {}", filename.display()); // on linux the process could be running in docker, access the filename through procfs - #[cfg(target_os="linux")] - let filename = &std::path::PathBuf::from(format!("/proc/{}/root{}", process.pid, filename.display())); + #[cfg(target_os = "linux")] + let filename = &std::path::PathBuf::from(format!( + "/proc/{}/root{}", + process.pid, + filename.display() + )); #[allow(unused_mut)] - let mut parsed = parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?; + let mut parsed = + parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?; #[cfg(windows)] - parsed.symbols.extend(get_windows_python_symbols(process.pid, filename, libpython.start() as u64)?); + parsed.symbols.extend(get_windows_python_symbols( + process.pid, + filename, + libpython.start() as u64, + )?); libpython_binary = Some(parsed); } } @@ -161,26 +178,36 @@ impl PythonProcessInfo { let dyld_infos = get_dyld_info(process.pid)?; for dyld in &dyld_infos { - let segname = unsafe { std::ffi::CStr::from_ptr(dyld.segment.segname.as_ptr()) }; - debug!("dyld: {:016x}-{:016x} {:10} {}", - dyld.segment.vmaddr, dyld.segment.vmaddr + dyld.segment.vmsize, - segname.to_string_lossy(), dyld.filename.display()); + let segname = + unsafe { std::ffi::CStr::from_ptr(dyld.segment.segname.as_ptr()) }; + debug!( + "dyld: {:016x}-{:016x} {:10} {}", + dyld.segment.vmaddr, + dyld.segment.vmaddr + dyld.segment.vmsize, + segname.to_string_lossy(), + dyld.filename.display() + ); } - let python_dyld_data = dyld_infos.iter() - .find(|m| { - if let Some(filename) = m.filename.to_str() { - return is_python_framework(filename) && - m.segment.segname[0..7] == [95, 95, 68, 65, 84, 65, 0]; - } - false - }); - + let python_dyld_data = dyld_infos.iter().find(|m| { + if let Some(filename) = m.filename.to_str() { + return is_python_framework(filename) + && m.segment.segname[0..7] == [95, 95, 68, 65, 84, 65, 0]; + } + false + }); if let Some(libpython) = python_dyld_data { - info!("Found libpython binary from dyld @ {}", libpython.filename.display()); + info!( + "Found libpython binary from dyld @ {}", + libpython.filename.display() + ); - let mut binary = parse_binary(&libpython.filename, libpython.segment.vmaddr, libpython.segment.vmsize)?; + let mut binary = parse_binary( + &libpython.filename, + libpython.segment.vmaddr, + libpython.segment.vmsize, + )?; // TODO: bss addr offsets returned from parsing binary are wrong // (assumes data section isn't split from text section like done here). @@ -202,12 +229,16 @@ impl PythonProcessInfo { _ => python_binary.ok(), }; - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] let dockerized = is_dockerized(process.pid).unwrap_or(false); - Ok(PythonProcessInfo{python_binary, libpython_binary, maps: Box::new(maps), python_filename, - #[cfg(target_os="linux")] - dockerized + Ok(PythonProcessInfo { + python_binary, + libpython_binary, + maps: Box::new(maps), + python_filename, + #[cfg(target_os = "linux")] + dockerized, }) } @@ -221,7 +252,10 @@ impl PythonProcessInfo { if let Some(ref binary) = self.libpython_binary { if let Some(addr) = binary.symbols.get(symbol) { - info!("got symbol {} (0x{:016x}) from libpython binary", symbol, addr); + info!( + "got symbol {} (0x{:016x}) from libpython binary", + symbol, addr + ); return Some(addr); } } @@ -231,7 +265,9 @@ impl PythonProcessInfo { /// Returns the version of python running in the process. pub fn get_python_version

(python_info: &PythonProcessInfo, process: &P) -> Result - where P: ProcessMemory { +where + P: ProcessMemory, +{ // If possible, grab the sys.version string from the processes memory (mac osx). if let Some(&addr) = python_info.get_symbol("Py_GetVersion.version") { info!("Getting version from symbol address"); @@ -245,68 +281,97 @@ pub fn get_python_version

(python_info: &PythonProcessInfo, process: &P) -> Re // otherwise get version info from scanning BSS section for sys.version string if let Some(ref pb) = python_info.python_binary { info!("Getting version from python binary BSS"); - let bss = process.copy(pb.bss_addr as usize, - pb.bss_size as usize)?; + let bss = process.copy(pb.bss_addr as usize, pb.bss_size as usize)?; match Version::scan_bytes(&bss) { Ok(version) => return Ok(version), - Err(err) => info!("Failed to get version from BSS section: {}", err) + Err(err) => info!("Failed to get version from BSS section: {}", err), } } // try again if there is a libpython.so if let Some(ref libpython) = python_info.libpython_binary { info!("Getting version from libpython BSS"); - let bss = process.copy(libpython.bss_addr as usize, - libpython.bss_size as usize)?; + let bss = process.copy(libpython.bss_addr as usize, libpython.bss_size as usize)?; match Version::scan_bytes(&bss) { Ok(version) => return Ok(version), - Err(err) => info!("Failed to get version from libpython BSS section: {}", err) + Err(err) => info!("Failed to get version from libpython BSS section: {}", err), } } // the python_filename might have the version encoded in it (/usr/bin/python3.5 etc). // try reading that in (will miss patch level on python, but that shouldn't matter) - info!("Trying to get version from path: {}", python_info.python_filename.display()); + info!( + "Trying to get version from path: {}", + python_info.python_filename.display() + ); let path = Path::new(&python_info.python_filename); if let Some(python) = path.file_name() { if let Some(python) = python.to_str() { - if python.starts_with("python") { - let tokens: Vec<&str> = python[6..].split('.').collect(); + if let Some(stripped_python) = python.strip_prefix("python") { + let tokens: Vec<&str> = stripped_python.split('.').collect(); if tokens.len() >= 2 { - if let (Ok(major), Ok(minor)) = (tokens[0].parse::(), tokens[1].parse::()) { - return Ok(Version{major, minor, patch:0, release_flags: "".to_owned()}) + if let (Ok(major), Ok(minor)) = + (tokens[0].parse::(), tokens[1].parse::()) + { + return Ok(Version { + major, + minor, + patch: 0, + release_flags: "".to_owned(), + }); } } } } } - Err(format_err!("Failed to find python version from target process")) + Err(format_err!( + "Failed to find python version from target process" + )) } -pub fn get_interpreter_address

(python_info: &PythonProcessInfo, - process: &P, - version: &Version) -> Result - where P: ProcessMemory { +pub fn get_interpreter_address

( + python_info: &PythonProcessInfo, + process: &P, + version: &Version, +) -> Result +where + P: ProcessMemory, +{ // get the address of the main PyInterpreterState object from loaded symbols if we can // (this tends to be faster than scanning through the bss section) match version { - Version{major: 3, minor: 7..=11, ..} => { + Version { + major: 3, + minor: 7..=11, + .. + } => { if let Some(&addr) = python_info.get_symbol("_PyRuntime") { - let addr = process.copy_struct(addr as usize + pyruntime::get_interp_head_offset(&version))?; + let addr = process + .copy_struct(addr as usize + pyruntime::get_interp_head_offset(version))?; // Make sure the interpreter addr is valid before returning match check_interpreter_addresses(&[addr], &*python_info.maps, process, version) { Ok(addr) => return Ok(addr), - Err(_) => { warn!("Interpreter address from _PyRuntime symbol is invalid {:016x}", addr); } + Err(_) => { + warn!( + "Interpreter address from _PyRuntime symbol is invalid {:016x}", + addr + ); + } }; } - }, + } _ => { if let Some(&addr) = python_info.get_symbol("interp_head") { let addr = process.copy_struct(addr as usize)?; match check_interpreter_addresses(&[addr], &*python_info.maps, process, version) { Ok(addr) => return Ok(addr), - Err(_) => { warn!("Interpreter address from interp_head symbol is invalid {:016x}", addr); } + Err(_) => { + warn!( + "Interpreter address from interp_head symbol is invalid {:016x}", + addr + ); + } }; } } @@ -314,59 +379,70 @@ pub fn get_interpreter_address

(python_info: &PythonProcessInfo, info!("Failed to get interp_head from symbols, scanning BSS section from main binary"); // try scanning the BSS section of the binary for things that might be the interpreterstate - let err = - if let Some(ref pb) = python_info.python_binary { - match get_interpreter_address_from_binary(pb, &*python_info.maps, process, version) { - Ok(addr) => return Ok(addr), - err => Some(err) - } - } else { - None - }; + let err = if let Some(ref pb) = python_info.python_binary { + match get_interpreter_address_from_binary(pb, &*python_info.maps, process, version) { + Ok(addr) => return Ok(addr), + err => Some(err), + } + } else { + None + }; // Before giving up, try again if there is a libpython.so if let Some(ref lpb) = python_info.libpython_binary { info!("Failed to get interpreter from binary BSS, scanning libpython BSS"); match get_interpreter_address_from_binary(lpb, &*python_info.maps, process, version) { - Ok(addr) => return Ok(addr), - lib_err => err.unwrap_or(lib_err) + Ok(addr) => Ok(addr), + lib_err => err.unwrap_or(lib_err), } } else { err.expect("Both python and libpython are invalid.") } } -fn get_interpreter_address_from_binary

(binary: &BinaryInfo, - maps: &dyn ContainsAddr, - process: &P, - version: &Version) -> Result where P: ProcessMemory { +fn get_interpreter_address_from_binary

( + binary: &BinaryInfo, + maps: &dyn ContainsAddr, + process: &P, + version: &Version, +) -> Result +where + P: ProcessMemory, +{ // We're going to scan the BSS/data section for things, and try to narrowly scan things that // look like pointers to PyinterpreterState let bss = process.copy(binary.bss_addr as usize, binary.bss_size as usize)?; #[allow(clippy::cast_ptr_alignment)] - let addrs = unsafe { slice::from_raw_parts(bss.as_ptr() as *const usize, bss.len() / size_of::()) }; + let addrs = unsafe { + slice::from_raw_parts(bss.as_ptr() as *const usize, bss.len() / size_of::()) + }; check_interpreter_addresses(addrs, maps, process, version) } // Checks whether a block of memory (from BSS/.data etc) contains pointers that are pointing // to a valid PyInterpreterState -fn check_interpreter_addresses

(addrs: &[usize], - maps: &dyn ContainsAddr, - process: &P, - version: &Version) -> Result - where P: ProcessMemory { +fn check_interpreter_addresses

( + addrs: &[usize], + maps: &dyn ContainsAddr, + process: &P, + version: &Version, +) -> Result +where + P: ProcessMemory, +{ // This function does all the work, but needs a type of the interpreter - fn check(addrs: &[usize], - maps: &dyn ContainsAddr, - process: &P) -> Result - where I: InterpreterState, P: ProcessMemory { + fn check(addrs: &[usize], maps: &dyn ContainsAddr, process: &P) -> Result + where + I: InterpreterState, + P: ProcessMemory, + { for &addr in addrs { if maps.contains_addr(addr) { // this address points to valid memory. try loading it up as a PyInterpreterState // to further check let interp: I = match process.copy_struct(addr) { Ok(interp) => interp, - Err(_) => continue + Err(_) => continue, }; // get the pythreadstate pointer from the interpreter object, and if it is also @@ -377,99 +453,150 @@ fn check_interpreter_addresses

(addrs: &[usize], // this is almost certainly the address of the intrepreter let thread = match process.copy_pointer(threads) { Ok(thread) => thread, - Err(_) => continue + Err(_) => continue, }; // as a final sanity check, try getting the stack_traces, and only return if this works - if thread.interp() as usize == addr && get_stack_traces(&interp, process, 0, None).is_ok() { + if thread.interp() as usize == addr + && get_stack_traces(&interp, process, 0, None).is_ok() + { return Ok(addr); } } } } - Err(format_err!("Failed to find a python interpreter in the .data section")) + Err(format_err!( + "Failed to find a python interpreter in the .data section" + )) } // different versions have different layouts, check as appropriate match version { - Version{major: 2, minor: 3..=7, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 3, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 4..=5, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 6, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 7, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" | "a2" | "a3" => check::(addrs, maps, process), - _ => check::(addrs, maps, process) - } + Version { + major: 2, + minor: 3..=7, + .. + } => check::(addrs, maps, process), + Version { + major: 3, minor: 3, .. + } => check::(addrs, maps, process), + Version { + major: 3, + minor: 4..=5, + .. + } => check::(addrs, maps, process), + Version { + major: 3, minor: 6, .. + } => check::(addrs, maps, process), + Version { + major: 3, minor: 7, .. + } => check::(addrs, maps, process), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" | "a2" | "a3" => check::(addrs, maps, process), + _ => check::(addrs, maps, process), }, - Version{major: 3, minor: 8, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 9, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 10, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 11, ..} => check::(addrs, maps, process), - _ => Err(format_err!("Unsupported version of Python: {}", version)) + Version { + major: 3, minor: 8, .. + } => check::(addrs, maps, process), + Version { + major: 3, minor: 9, .. + } => check::(addrs, maps, process), + Version { + major: 3, + minor: 10, + .. + } => check::(addrs, maps, process), + Version { + major: 3, + minor: 11, + .. + } => check::(addrs, maps, process), + _ => Err(format_err!("Unsupported version of Python: {}", version)), } } -pub fn get_threadstate_address(python_info: &PythonProcessInfo, - version: &Version, - config: &Config) -> Result { +pub fn get_threadstate_address( + python_info: &PythonProcessInfo, + version: &Version, + config: &Config, +) -> Result { let threadstate_address = match version { - Version{major: 3, minor: 7..=11, ..} => { - match python_info.get_symbol("_PyRuntime") { - Some(&addr) => { - if let Some(offset) = pyruntime::get_tstate_current_offset(&version) { - info!("Found _PyRuntime @ 0x{:016x}, getting gilstate.tstate_current from offset 0x{:x}", + Version { + major: 3, + minor: 7..=11, + .. + } => match python_info.get_symbol("_PyRuntime") { + Some(&addr) => { + if let Some(offset) = pyruntime::get_tstate_current_offset(version) { + info!("Found _PyRuntime @ 0x{:016x}, getting gilstate.tstate_current from offset 0x{:x}", addr, offset); - addr as usize + offset - } else { - error_if_gil(config, &version, "unknown pyruntime.gilstate.tstate_current offset")?; - 0 - } - }, - None => { - error_if_gil(config, &version, "failed to find _PyRuntime symbol")?; + addr as usize + offset + } else { + error_if_gil( + config, + version, + "unknown pyruntime.gilstate.tstate_current offset", + )?; 0 } } - }, - _ => { - match python_info.get_symbol("_PyThreadState_Current") { - Some(&addr) => { - info!("Found _PyThreadState_Current @ 0x{:016x}", addr); - addr as usize - }, - None => { - error_if_gil(config, &version, "failed to find _PyThreadState_Current symbol")?; - 0 - } + None => { + error_if_gil(config, version, "failed to find _PyRuntime symbol")?; + 0 + } + }, + _ => match python_info.get_symbol("_PyThreadState_Current") { + Some(&addr) => { + info!("Found _PyThreadState_Current @ 0x{:016x}", addr); + addr as usize + } + None => { + error_if_gil( + config, + version, + "failed to find _PyThreadState_Current symbol", + )?; + 0 } - } - }; + }, + }; Ok(threadstate_address) } fn error_if_gil(config: &Config, version: &Version, msg: &str) -> Result<(), Error> { lazy_static! { - static ref WARNED: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false); + static ref WARNED: std::sync::atomic::AtomicBool = + std::sync::atomic::AtomicBool::new(false); } if config.gil_only { if !WARNED.load(std::sync::atomic::Ordering::Relaxed) { // only print this once - eprintln!("Cannot detect GIL holding in version '{}' on the current platform (reason: {})", version, msg); + eprintln!( + "Cannot detect GIL holding in version '{}' on the current platform (reason: {})", + version, msg + ); eprintln!("Please open an issue in https://github.com/benfred/py-spy with the Python version and your platform."); WARNED.store(true, std::sync::atomic::Ordering::Relaxed); } - Err(format_err!("Cannot detect GIL holding in version '{}' on the current platform (reason: {})", version, msg)) + Err(format_err!( + "Cannot detect GIL holding in version '{}' on the current platform (reason: {})", + version, + msg + )) } else { warn!("Unable to detect GIL usage: {}", msg); Ok(()) } } -pub trait ContainsAddr{ +pub trait ContainsAddr { fn contains_addr(&self, addr: usize) -> bool; } @@ -487,10 +614,10 @@ impl ContainsAddr for Vec { } } -#[cfg(target_os="linux")] +#[cfg(target_os = "linux")] fn is_dockerized(pid: Pid) -> Result { let self_mnt = std::fs::read_link("/proc/self/ns/mnt")?; - let target_mnt = std::fs::read_link(&format!("/proc/{}/ns/mnt", pid))?; + let target_mnt = std::fs::read_link(format!("/proc/{}/ns/mnt", pid))?; Ok(self_mnt != target_mnt) } @@ -498,7 +625,11 @@ fn is_dockerized(pid: Pid) -> Result { // So use the win32 api to load up the couple of symbols we need on windows. Note: // we still can get export's from the PE file #[cfg(windows)] -pub fn get_windows_python_symbols(pid: Pid, filename: &Path, offset: u64) -> std::io::Result> { +pub fn get_windows_python_symbols( + pid: Pid, + filename: &Path, + offset: u64, +) -> std::io::Result> { use proc_maps::win_maps::SymbolLoader; let handler = SymbolLoader::new(pid)?; @@ -513,7 +644,11 @@ pub fn get_windows_python_symbols(pid: Pid, filename: &Path, offset: u64) -> std if let Ok((base, addr)) = handler.address_from_name(symbol) { // If we have a module base (ie from PDB), need to adjust by the offset // otherwise seems like we can take address directly - let addr = if base == 0 { addr } else { offset + addr - base }; + let addr = if base == 0 { + addr + } else { + offset + addr - base + }; ret.insert(String::from(*symbol), addr); } } @@ -521,7 +656,7 @@ pub fn get_windows_python_symbols(pid: Pid, filename: &Path, offset: u64) -> std Ok(ret) } -#[cfg(any(target_os="linux", target_os="freebsd"))] +#[cfg(any(target_os = "linux", target_os = "freebsd"))] pub fn is_python_lib(pathname: &str) -> bool { lazy_static! { static ref RE: Regex = Regex::new(r"/libpython\d.\d\d?(m|d|u)?.so").unwrap(); @@ -529,7 +664,7 @@ pub fn is_python_lib(pathname: &str) -> bool { RE.is_match(pathname) } -#[cfg(target_os="macos")] +#[cfg(target_os = "macos")] pub fn is_python_lib(pathname: &str) -> bool { lazy_static! { static ref RE: Regex = Regex::new(r"/libpython\d.\d\d?(m|d|u)?.(dylib|so)$").unwrap(); @@ -540,22 +675,24 @@ pub fn is_python_lib(pathname: &str) -> bool { #[cfg(windows)] pub fn is_python_lib(pathname: &str) -> bool { lazy_static! { - static ref RE: Regex = RegexBuilder::new(r"\\python\d\d\d?(m|d|u)?.dll$").case_insensitive(true).build().unwrap(); + static ref RE: Regex = RegexBuilder::new(r"\\python\d\d\d?(m|d|u)?.dll$") + .case_insensitive(true) + .build() + .unwrap(); } RE.is_match(pathname) } -#[cfg(target_os="macos")] +#[cfg(target_os = "macos")] pub fn is_python_framework(pathname: &str) -> bool { - pathname.ends_with("/Python") && - !pathname.contains("Python.app") + pathname.ends_with("/Python") && !pathname.contains("Python.app") } #[cfg(test)] mod tests { use super::*; - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] #[test] fn test_is_python_lib() { assert!(is_python_lib("~/Anaconda2/lib/libpython2.7.dylib")); @@ -573,7 +710,7 @@ mod tests { assert!(!is_python_lib("/lib/heapq.cpython-36m-darwin.dylib")); } - #[cfg(any(target_os="linux", target_os="freebsd"))] + #[cfg(any(target_os = "linux", target_os = "freebsd"))] #[test] fn test_is_python_lib() { // libpython bundled by pyinstaller https://github.com/benfred/py-spy/issues/42 @@ -587,37 +724,49 @@ mod tests { // don't blindly match libraries with python in the name (boost_python etc) assert!(!is_python_lib("/usr/lib/libboost_python.so")); - assert!(!is_python_lib("/usr/lib/x86_64-linux-gnu/libboost_python-py27.so.1.58.0")); + assert!(!is_python_lib( + "/usr/lib/x86_64-linux-gnu/libboost_python-py27.so.1.58.0" + )); assert!(!is_python_lib("/usr/lib/libboost_python-py35.so")); - } #[cfg(windows)] #[test] fn test_is_python_lib() { - assert!(is_python_lib("C:\\Users\\test\\AppData\\Local\\Programs\\Python\\Python37\\python37.dll")); + assert!(is_python_lib( + "C:\\Users\\test\\AppData\\Local\\Programs\\Python\\Python37\\python37.dll" + )); // .NET host via https://github.com/pythonnet/pythonnet - assert!(is_python_lib("C:\\Users\\test\\AppData\\Local\\Programs\\Python\\Python37\\python37.DLL")); + assert!(is_python_lib( + "C:\\Users\\test\\AppData\\Local\\Programs\\Python\\Python37\\python37.DLL" + )); } - - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] #[test] fn test_python_frameworks() { // homebrew v2 assert!(!is_python_framework("/usr/local/Cellar/python@2/2.7.15_1/Frameworks/Python.framework/Versions/2.7/Resources/Python.app/Contents/MacOS/Python")); - assert!(is_python_framework("/usr/local/Cellar/python@2/2.7.15_1/Frameworks/Python.framework/Versions/2.7/Python")); + assert!(is_python_framework( + "/usr/local/Cellar/python@2/2.7.15_1/Frameworks/Python.framework/Versions/2.7/Python" + )); // System python from osx 10.13.6 (high sierra) assert!(!is_python_framework("/System/Library/Frameworks/Python.framework/Versions/2.7/Resources/Python.app/Contents/MacOS/Python")); - assert!(is_python_framework("/System/Library/Frameworks/Python.framework/Versions/2.7/Python")); + assert!(is_python_framework( + "/System/Library/Frameworks/Python.framework/Versions/2.7/Python" + )); // pyenv 3.6.6 with OSX framework enabled (https://github.com/benfred/py-spy/issues/15) // env PYTHON_CONFIGURE_OPTS="--enable-framework" pyenv install 3.6.6 - assert!(is_python_framework("/Users/ben/.pyenv/versions/3.6.6/Python.framework/Versions/3.6/Python")); + assert!(is_python_framework( + "/Users/ben/.pyenv/versions/3.6.6/Python.framework/Versions/3.6/Python" + )); assert!(!is_python_framework("/Users/ben/.pyenv/versions/3.6.6/Python.framework/Versions/3.6/Resources/Python.app/Contents/MacOS/Python")); // single file pyinstaller - assert!(is_python_framework("/private/var/folders/3x/qy479lpd1fb2q88lc9g4d3kr0000gn/T/_MEI2Akvi8/Python")); + assert!(is_python_framework( + "/private/var/folders/3x/qy479lpd1fb2q88lc9g4d3kr0000gn/T/_MEI2Akvi8/Python" + )); } } diff --git a/src/python_spy.rs b/src/python_spy.rs index ceb248a1..f674cc74 100644 --- a/src/python_spy.rs +++ b/src/python_spy.rs @@ -1,26 +1,29 @@ -use std; +#[cfg(windows)] +use regex::RegexBuilder; use std::collections::HashMap; -#[cfg(all(target_os="linux", unwind))] +#[cfg(all(target_os = "linux", unwind))] use std::collections::HashSet; -use std::path::Path; -#[cfg(all(target_os="linux", unwind))] +#[cfg(all(target_os = "linux", unwind))] use std::iter::FromIterator; -#[cfg(windows)] -use regex::RegexBuilder; +use std::path::Path; -use anyhow::{Error, Result, Context}; -use remoteprocess::{Process, ProcessMemory, Pid, Tid}; +use anyhow::{Context, Error, Result}; +use remoteprocess::{Pid, Process, ProcessMemory, Tid}; use crate::config::{Config, LockingStrategy}; #[cfg(unwind)] use crate::native_stack_trace::NativeStack; -use crate::python_bindings::{v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, v3_10_0, v3_11_0}; +use crate::python_bindings::{ + v2_7_15, v3_10_0, v3_11_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, +}; use crate::python_data_access::format_variable; use crate::python_interpreters::{InterpreterState, ThreadState}; +use crate::python_process_info::{ + get_interpreter_address, get_python_version, get_threadstate_address, PythonProcessInfo, +}; use crate::python_threading::thread_name_lookup; -use crate::stack_trace::{StackTrace, get_stack_trace, get_gil_threadid}; +use crate::stack_trace::{get_gil_threadid, get_stack_trace, StackTrace}; use crate::version::Version; -use crate::python_process_info::{PythonProcessInfo, get_python_version, get_interpreter_address, get_threadstate_address}; /// Lets you retrieve stack traces of a running python program pub struct PythonSpy { @@ -37,8 +40,8 @@ pub struct PythonSpy { pub short_filenames: HashMap>, pub python_thread_ids: HashMap, pub python_thread_names: HashMap, - #[cfg(target_os="linux")] - pub dockerized: bool + #[cfg(target_os = "linux")] + pub dockerized: bool, } impl PythonSpy { @@ -53,7 +56,7 @@ impl PythonSpy { // lock the process when loading up on freebsd (rather than locking // on every memory read). Needs done after getting python process info // because procmaps also tries to attach w/ ptrace on freebsd - #[cfg(target_os="freebsd")] + #[cfg(target_os = "freebsd")] let _lock = process.lock(); let version = get_python_version(&python_info, &process)?; @@ -61,7 +64,7 @@ impl PythonSpy { let interpreter_address = get_interpreter_address(&python_info, &process, &version)?; info!("Found interpreter at 0x{:016x}", interpreter_address); - + // lets us figure out which thread has the GIL let threadstate_address = get_threadstate_address(&python_info, &version, config)?; @@ -69,28 +72,38 @@ impl PythonSpy { #[cfg(unwind)] let native = if config.native { - Some(NativeStack::new(pid, python_info.python_binary, python_info.libpython_binary)?) + Some(NativeStack::new( + pid, + python_info.python_binary, + python_info.libpython_binary, + )?) } else { None }; - Ok(PythonSpy{pid, process, version, interpreter_address, threadstate_address, - python_filename: python_info.python_filename, - version_string, - #[cfg(unwind)] - native, - #[cfg(target_os="linux")] - dockerized: python_info.dockerized, - config: config.clone(), - short_filenames: HashMap::new(), - python_thread_ids: HashMap::new(), - python_thread_names: HashMap::new()}) + Ok(PythonSpy { + pid, + process, + version, + interpreter_address, + threadstate_address, + python_filename: python_info.python_filename, + version_string, + #[cfg(unwind)] + native, + #[cfg(target_os = "linux")] + dockerized: python_info.dockerized, + config: config.clone(), + short_filenames: HashMap::new(), + python_thread_ids: HashMap::new(), + python_thread_names: HashMap::new(), + }) } /// Creates a PythonSpy object, retrying up to max_retries times. /// Mainly useful for the case where the process is just started and /// symbols or the python interpreter might not be loaded yet. - pub fn retry_new(pid: Pid, config: &Config, max_retries:u64) -> Result { + pub fn retry_new(pid: Pid, config: &Config, max_retries: u64) -> Result { let mut retries = 0; loop { let err = match PythonSpy::new(pid, config) { @@ -98,10 +111,10 @@ impl PythonSpy { // verify that we can load a stack trace before returning success match process.get_stack_traces() { Ok(_) => return Ok(process), - Err(err) => err + Err(err) => err, } - }, - Err(err) => err + } + Err(err) => err, }; // If we failed, retry a couple times before returning the last error @@ -118,25 +131,57 @@ impl PythonSpy { pub fn get_stack_traces(&mut self) -> Result, Error> { match self.version { // ABI for 2.3/2.4/2.5/2.6/2.7 is compatible for our purpose - Version{major: 2, minor: 3..=7, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 3, ..} => self._get_stack_traces::(), + Version { + major: 2, + minor: 3..=7, + .. + } => self._get_stack_traces::(), + Version { + major: 3, minor: 3, .. + } => self._get_stack_traces::(), // ABI for 3.4 and 3.5 is the same for our purposes - Version{major: 3, minor: 4, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 5, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 6, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 7, ..} => self._get_stack_traces::(), + Version { + major: 3, minor: 4, .. + } => self._get_stack_traces::(), + Version { + major: 3, minor: 5, .. + } => self._get_stack_traces::(), + Version { + major: 3, minor: 6, .. + } => self._get_stack_traces::(), + Version { + major: 3, minor: 7, .. + } => self._get_stack_traces::(), // v3.8.0a1 to v3.8.0a3 is compatible with 3.7 ABI, but later versions of 3.8.0 aren't - Version{major: 3, minor: 8, patch: 0, ..} => { - match self.version.release_flags.as_ref() { - "a1" | "a2" | "a3" => self._get_stack_traces::(), - _ => self._get_stack_traces::() - } - } - Version{major: 3, minor: 8, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 9, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 10, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 11, ..} => self._get_stack_traces::(), - _ => Err(format_err!("Unsupported version of Python: {}", self.version)), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match self.version.release_flags.as_ref() { + "a1" | "a2" | "a3" => self._get_stack_traces::(), + _ => self._get_stack_traces::(), + }, + Version { + major: 3, minor: 8, .. + } => self._get_stack_traces::(), + Version { + major: 3, minor: 9, .. + } => self._get_stack_traces::(), + Version { + major: 3, + minor: 10, + .. + } => self._get_stack_traces::(), + Version { + major: 3, + minor: 11, + .. + } => self._get_stack_traces::(), + _ => Err(format_err!( + "Unsupported version of Python: {}", + self.version + )), } } @@ -162,32 +207,43 @@ impl PythonSpy { // TODO: hoist most of this code out to stack_trace.rs, and // then annotate the output of that with things like native stack traces etc // have moved in gil / locals etc - let gil_thread_id = get_gil_threadid::(self.threadstate_address, &self.process)?; + let gil_thread_id = + get_gil_threadid::(self.threadstate_address, &self.process)?; // Get the python interpreter, and loop over all the python threads - let interp: I = self.process.copy_struct(self.interpreter_address) - .context("Failed to copy PyInterpreterState from process")?; + let interp: I = self + .process + .copy_struct(self.interpreter_address) + .context("Failed to copy PyInterpreterState from process")?; let mut traces = Vec::new(); let mut threads = interp.head(); while !threads.is_null() { // Get the stack trace of the python thread - let thread = self.process.copy_pointer(threads).context("Failed to copy PyThreadState")?; - let mut trace = get_stack_trace(&thread, &self.process, self.config.dump_locals > 0, self.config.lineno)?; + let thread = self + .process + .copy_pointer(threads) + .context("Failed to copy PyThreadState")?; + let mut trace = get_stack_trace( + &thread, + &self.process, + self.config.dump_locals > 0, + self.config.lineno, + )?; // Try getting the native thread id let python_thread_id = thread.thread_id(); // python 3.11+ has the native thread id directly on the PyThreadState object, // for older versions of python, try using OS specific code to get the native - // thread id (doesn' work on freebsd, or on arm/i686 processors on linux) + // thread id (doesn't work on freebsd, or on arm/i686 processors on linux) if trace.os_thread_id.is_none() { let mut os_thread_id = self._get_os_thread_id(python_thread_id, &interp)?; // linux can see issues where pthread_ids get recycled for new OS threads, // which totally breaks the caching we were doing here. Detect this and retry if let Some(tid) = os_thread_id { - if thread_activity.len() > 0 && !thread_activity.contains_key(&tid) { + if !thread_activity.is_empty() && !thread_activity.contains_key(&tid) { info!("clearing away thread id caches, thread {} has exited", tid); self.python_thread_ids.clear(); self.python_thread_names.clear(); @@ -226,7 +282,9 @@ impl PythonSpy { { if self.config.native { if let Some(native) = self.native.as_mut() { - let thread_id = trace.os_thread_id.ok_or_else(|| format_err!("failed to get os threadid"))?; + let thread_id = trace + .os_thread_id + .ok_or_else(|| format_err!("failed to get os threadid"))?; let os_thread = remoteprocess::Thread::new(thread_id as Tid)?; trace.frames = native.merge_native_thread(&trace.frames, &os_thread)? } @@ -238,8 +296,13 @@ impl PythonSpy { if let Some(locals) = frame.locals.as_mut() { let max_length = (128 * self.config.dump_locals) as isize; for local in locals { - let repr = format_variable::(&self.process, &self.version, local.addr, max_length); - local.repr = Some(repr.unwrap_or("?".to_owned())); + let repr = format_variable::( + &self.process, + &self.version, + local.addr, + max_length, + ); + local.repr = Some(repr.unwrap_or_else(|_| "?".to_owned())); } } } @@ -266,22 +329,31 @@ impl PythonSpy { false } else { let frame = &frames[0]; - (frame.name == "wait" && frame.filename.ends_with("threading.py")) || - (frame.name == "select" && frame.filename.ends_with("selectors.py")) || - (frame.name == "poll" && (frame.filename.ends_with("asyncore.py") || - frame.filename.contains("zmq") || - frame.filename.contains("gevent") || - frame.filename.contains("tornado"))) + (frame.name == "wait" && frame.filename.ends_with("threading.py")) + || (frame.name == "select" && frame.filename.ends_with("selectors.py")) + || (frame.name == "poll" + && (frame.filename.ends_with("asyncore.py") + || frame.filename.contains("zmq") + || frame.filename.contains("gevent") + || frame.filename.contains("tornado"))) } } #[cfg(windows)] - fn _get_os_thread_id(&mut self, python_thread_id: u64, _interp: &I) -> Result, Error> { + fn _get_os_thread_id( + &mut self, + python_thread_id: u64, + _interp: &I, + ) -> Result, Error> { Ok(Some(python_thread_id as Tid)) } - #[cfg(target_os="macos")] - fn _get_os_thread_id(&mut self, python_thread_id: u64, _interp: &I) -> Result, Error> { + #[cfg(target_os = "macos")] + fn _get_os_thread_id( + &mut self, + python_thread_id: u64, + _interp: &I, + ) -> Result, Error> { // If we've already know this threadid, we're good if let Some(thread_id) = self.python_thread_ids.get(&python_thread_id) { return Ok(Some(*thread_id)); @@ -300,13 +372,21 @@ impl PythonSpy { Ok(None) } - #[cfg(all(target_os="linux", not(unwind)))] - fn _get_os_thread_id(&mut self, _python_thread_id: u64, _interp: &I) -> Result, Error> { + #[cfg(all(target_os = "linux", not(unwind)))] + fn _get_os_thread_id( + &mut self, + _python_thread_id: u64, + _interp: &I, + ) -> Result, Error> { Ok(None) } - #[cfg(all(target_os="linux", unwind))] - fn _get_os_thread_id(&mut self, python_thread_id: u64, interp: &I) -> Result, Error> { + #[cfg(all(target_os = "linux", unwind))] + fn _get_os_thread_id( + &mut self, + python_thread_id: u64, + interp: &I, + ) -> Result, Error> { // in nonblocking mode, we can't get the threadid reliably (method here requires reading the RBX // register which requires a ptrace attach). fallback to heuristic thread activity here if self.config.blocking == LockingStrategy::NonBlocking { @@ -327,13 +407,17 @@ impl PythonSpy { let mut all_python_threads = HashSet::new(); let mut threads = interp.head(); while !threads.is_null() { - let thread = self.process.copy_pointer(threads).context("Failed to copy PyThreadState")?; + let thread = self + .process + .copy_pointer(threads) + .context("Failed to copy PyThreadState")?; let current = thread.thread_id(); all_python_threads.insert(current); threads = thread.next(); } - let processed_os_threads: HashSet = HashSet::from_iter(self.python_thread_ids.values().map(|x| *x)); + let processed_os_threads: HashSet = + HashSet::from_iter(self.python_thread_ids.values().copied()); let unwinder = self.process.unwinder()?; @@ -344,13 +428,15 @@ impl PythonSpy { continue; } - match self._get_pthread_id(&unwinder, &thread, &all_python_threads) { + match self._get_pthread_id(&unwinder, thread, &all_python_threads) { Ok(pthread_id) => { if pthread_id != 0 { self.python_thread_ids.insert(pthread_id, threadid); } - }, - Err(e) => { warn!("Failed to get get_pthread_id for {}: {}", threadid, e); } + } + Err(e) => { + warn!("Failed to get get_pthread_id for {}: {}", threadid, e); + } }; } @@ -380,9 +466,13 @@ impl PythonSpy { Ok(None) } - - #[cfg(all(target_os="linux", unwind))] - pub fn _get_pthread_id(&self, unwinder: &remoteprocess::Unwinder, thread: &remoteprocess::Thread, threadids: &HashSet) -> Result { + #[cfg(all(target_os = "linux", unwind))] + pub fn _get_pthread_id( + &self, + unwinder: &remoteprocess::Unwinder, + thread: &remoteprocess::Thread, + threadids: &HashSet, + ) -> Result { let mut pthread_id = 0; let mut cursor = unwinder.cursor(thread)?; @@ -400,8 +490,12 @@ impl PythonSpy { Ok(pthread_id) } - #[cfg(target_os="freebsd")] - fn _get_os_thread_id(&mut self, _python_thread_id: u64, _interp: &I) -> Result, Error> { + #[cfg(target_os = "freebsd")] + fn _get_os_thread_id( + &mut self, + _python_thread_id: u64, + _interp: &I, + ) -> Result, Error> { Ok(None) } @@ -409,8 +503,8 @@ impl PythonSpy { match self.python_thread_names.get(&python_thread_id) { Some(thread_name) => Some(thread_name.clone()), None => { - self.python_thread_names = thread_name_lookup(self).unwrap_or_else(|| HashMap::new()); - self.python_thread_names.get(&python_thread_id).map(|name| name.clone()) + self.python_thread_names = thread_name_lookup(self).unwrap_or_default(); + self.python_thread_names.get(&python_thread_id).cloned() } } } @@ -430,10 +524,10 @@ impl PythonSpy { } // on linux the process could be running in docker, access the filename through procfs - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] let filename_storage; - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] let filename = if self.dockerized { filename_storage = format!("/proc/{}/root{}", self.pid, filename); if Path::new(&filename_storage).exists() { @@ -460,7 +554,8 @@ impl PythonSpy { .ok() .map(|p| p.to_string_lossy().to_string()); - self.short_filenames.insert(filename.to_owned(), shortened.clone()); + self.short_filenames + .insert(filename.to_owned(), shortened.clone()); shortened } } diff --git a/src/python_threading.rs b/src/python_threading.rs index 446f0434..d60acd6d 100644 --- a/src/python_threading.rs +++ b/src/python_threading.rs @@ -2,10 +2,10 @@ use std::collections::HashMap; use anyhow::Error; -use crate::python_bindings::{v3_6_6, v3_7_0, v3_8_0, v3_9_5, v3_10_0, v3_11_0}; +use crate::python_bindings::{v3_10_0, v3_11_0, v3_6_6, v3_7_0, v3_8_0, v3_9_5}; +use crate::python_data_access::{copy_long, copy_string, DictIterator, PY_TPFLAGS_MANAGED_DICT}; use crate::python_interpreters::{InterpreterState, Object, TypeObject}; use crate::python_spy::PythonSpy; -use crate::python_data_access::{copy_string, copy_long, DictIterator, PY_TPFLAGS_MANAGED_DICT}; use crate::version::Version; @@ -14,22 +14,24 @@ use remoteprocess::ProcessMemory; /// Returns a hashmap of threadid: threadname, by inspecting the '_active' variable in the /// 'threading' module. pub fn thread_names_from_interpreter( - interp: &I, process: &P, version: &Version) -> Result, Error> { + interp: &I, + process: &P, + version: &Version, +) -> Result, Error> { let mut ret = HashMap::new(); - for entry in DictIterator::from(process, &version, interp.modules() as usize)? { + for entry in DictIterator::from(process, version, interp.modules() as usize)? { let (key, value) = entry?; let module_name = copy_string(key as *const I::StringObject, process)?; if module_name == "threading" { let module: I::Object = process.copy_struct(value)?; let module_type = process.copy_pointer(module.ob_type())?; let dictptr: usize = process.copy_struct(value + module_type.dictoffset() as usize)?; - for i in DictIterator::from(process, &version, dictptr)? { + for i in DictIterator::from(process, version, dictptr)? { let (key, value) = i?; let name = copy_string(key as *const I::StringObject, process)?; if name == "_active" { - - for i in DictIterator::from(process, &version, value)? { + for i in DictIterator::from(process, version, value)? { let (key, value) = i?; let (threadid, _) = copy_long(process, key)?; @@ -37,12 +39,17 @@ pub fn thread_names_from_interpreter( let thread_type = process.copy_pointer(thread.ob_type())?; let dict_iter = if thread_type.flags() & PY_TPFLAGS_MANAGED_DICT != 0 { - DictIterator::from_managed_dict(process, &version, value, thread.ob_type() as usize)? + DictIterator::from_managed_dict( + process, + version, + value, + thread.ob_type() as usize, + )? } else { let dict_offset = thread_type.dictoffset(); - let dict_addr =(value as isize + dict_offset) as usize; + let dict_addr = (value as isize + dict_offset) as usize; let thread_dict_addr: usize = process.copy_struct(dict_addr)?; - DictIterator::from(process, &version, thread_dict_addr)? + DictIterator::from(process, version, thread_dict_addr)? }; for i in dict_iter { @@ -50,7 +57,8 @@ pub fn thread_names_from_interpreter( let varname = copy_string(key as *const I::StringObject, process)?; if varname == "_name" { - let threadname = copy_string(value as *const I::StringObject, process)?; + let threadname = + copy_string(value as *const I::StringObject, process)?; ret.insert(threadid as u64, threadname); break; } @@ -67,9 +75,11 @@ pub fn thread_names_from_interpreter( /// Returns a hashmap of threadid: threadname, by inspecting the '_active' variable in the /// 'threading' module. -fn _thread_name_lookup(spy: &PythonSpy) -> Result, Error> { +fn _thread_name_lookup( + spy: &PythonSpy, +) -> Result, Error> { let interp: I = spy.process.copy_struct(spy.interpreter_address)?; - Ok(thread_names_from_interpreter(&interp, &spy.process, &spy.version)?) + thread_names_from_interpreter(&interp, &spy.process, &spy.version) } // try getting the threadnames, but don't sweat it if we can't. Since this relies on dictionary @@ -77,13 +87,29 @@ fn _thread_name_lookup(spy: &PythonSpy) -> Result Option> { let err = match process.version { - Version{major: 3, minor: 6, ..} => _thread_name_lookup::(&process), - Version{major: 3, minor: 7, ..} => _thread_name_lookup::(&process), - Version{major: 3, minor: 8, ..} => _thread_name_lookup::(&process), - Version{major: 3, minor: 9, ..} => _thread_name_lookup::(&process), - Version{major: 3, minor: 10, ..} => _thread_name_lookup::(&process), - Version{major: 3, minor: 11, ..} => _thread_name_lookup::(&process), - _ => return None + Version { + major: 3, minor: 6, .. + } => _thread_name_lookup::(process), + Version { + major: 3, minor: 7, .. + } => _thread_name_lookup::(process), + Version { + major: 3, minor: 8, .. + } => _thread_name_lookup::(process), + Version { + major: 3, minor: 9, .. + } => _thread_name_lookup::(process), + Version { + major: 3, + minor: 10, + .. + } => _thread_name_lookup::(process), + Version { + major: 3, + minor: 11, + .. + } => _thread_name_lookup::(process), + _ => return None, }; err.ok() } diff --git a/src/sampler.rs b/src/sampler.rs index 2f60f511..d9831abc 100644 --- a/src/sampler.rs +++ b/src/sampler.rs @@ -1,17 +1,19 @@ +#![allow(clippy::type_complexity)] + use std::collections::HashMap; -use std::sync::mpsc::{self, Sender, Receiver}; -use std::sync::{Mutex, Arc}; -use std::time::Duration; +use std::sync::mpsc::{self, Receiver, Sender}; +use std::sync::{Arc, Mutex}; use std::thread; +use std::time::Duration; use anyhow::Error; use remoteprocess::Pid; -use crate::timer::Timer; -use crate::python_spy::PythonSpy; use crate::config::Config; -use crate::stack_trace::{StackTrace, ProcessInfo}; +use crate::python_spy::PythonSpy; +use crate::stack_trace::{ProcessInfo, StackTrace}; +use crate::timer::Timer; use crate::version::Version; pub struct Sampler { @@ -23,7 +25,7 @@ pub struct Sampler { pub struct Sample { pub traces: Vec, pub sampling_errors: Option>, - pub late: Option + pub late: Option, } impl Sampler { @@ -38,19 +40,22 @@ impl Sampler { /// Creates a new sampler object, reading from a single process only fn new_sampler(pid: Pid, config: &Config) -> Result { let (tx, rx): (Sender, Receiver) = mpsc::channel(); - let (initialized_tx, initialized_rx): (Sender>, Receiver>) = mpsc::channel(); + let (initialized_tx, initialized_rx): ( + Sender>, + Receiver>, + ) = mpsc::channel(); let config = config.clone(); let sampling_thread = thread::spawn(move || { // We need to create this object inside the thread here since PythonSpy objects don't // have the Send trait implemented on linux let mut spy = match PythonSpy::retry_new(pid, &config, 20) { Ok(spy) => { - if let Err(_) = initialized_tx.send(Ok(spy.version.clone())) { + if initialized_tx.send(Ok(spy.version.clone())).is_err() { return; } spy - }, - Err(e) => { + } + Err(e) => { if initialized_tx.send(Err(e)).is_err() {} return; } @@ -62,7 +67,10 @@ impl Sampler { Ok(traces) => traces, Err(e) => { if spy.process.exe().is_err() { - info!("stopped sampling pid {} because the process exited", spy.pid); + info!( + "stopped sampling pid {} because the process exited", + spy.pid + ); break; } sampling_errors = Some(vec![(spy.pid, e)]); @@ -71,14 +79,25 @@ impl Sampler { }; let late = sleep.err(); - if tx.send(Sample{traces: traces, sampling_errors, late}).is_err() { + if tx + .send(Sample { + traces, + sampling_errors, + late, + }) + .is_err() + { break; } } }); let version = initialized_rx.recv()??; - Ok(Sampler{rx: Some(rx), version: Some(version), sampling_thread: Some(sampling_thread)}) + Ok(Sampler { + rx: Some(rx), + version: Some(version), + sampling_thread: Some(sampling_thread), + }) } /// Creates a new sampler object that samples any python process in the @@ -89,15 +108,19 @@ impl Sampler { // Initialize a PythonSpy object per child, and build up the process tree let mut spies = HashMap::new(); let mut retries = 10; - spies.insert(pid, PythonSpyThread::new(pid, None, &config)?); + spies.insert(pid, PythonSpyThread::new(pid, None, config)?); loop { for (childpid, parentpid) in process.child_processes()? { // If we can't create the child process, don't worry about it // can happen with zombie child processes etc - match PythonSpyThread::new(childpid, Some(parentpid), &config) { - Ok(spy) => { spies.insert(childpid, spy); }, - Err(e) => { warn!("Failed to open process {}: {}", childpid, e); } + match PythonSpyThread::new(childpid, Some(parentpid), config) { + Ok(spy) => { + spies.insert(childpid, spy); + } + Err(e) => { + warn!("Failed to open process {}: {}", childpid, e); + } } } @@ -110,7 +133,10 @@ impl Sampler { // Otherwise sleep for a short time and retry retries -= 1; if retries == 0 { - return Err(format_err!("No python processes found in process {} or any of its subprocesses", pid)); + return Err(format_err!( + "No python processes found in process {} or any of its subprocesses", + pid + )); } std::thread::sleep(std::time::Duration::from_millis(100)); } @@ -124,17 +150,26 @@ impl Sampler { while process.exe().is_ok() { match monitor_spies.lock() { Ok(mut spies) => { - for (childpid, parentpid) in process.child_processes().expect("failed to get subprocesses") { + for (childpid, parentpid) in process + .child_processes() + .expect("failed to get subprocesses") + { if spies.contains_key(&childpid) { continue; } match PythonSpyThread::new(childpid, Some(parentpid), &monitor_config) { - Ok(spy) => { spies.insert(childpid, spy); } - Err(e) => { warn!("Failed to create spy for {}: {}", childpid, e); } + Ok(spy) => { + spies.insert(childpid, spy); + } + Err(e) => { + warn!("Failed to create spy for {}: {}", childpid, e); + } } } - }, - Err(e) => { error!("Failed to acquire lock: {}", e); } + } + Err(e) => { + error!("Failed to acquire lock: {}", e); + } } std::thread::sleep(Duration::from_millis(100)); } @@ -168,11 +203,11 @@ impl Sampler { // collect the traces from each python spy if possible for spy in spies.values_mut() { match spy.collect() { - Some(Ok(mut t)) => { traces.append(&mut t) }, + Some(Ok(mut t)) => traces.append(&mut t), Some(Err(e)) => { - let errors = sampling_errors.get_or_insert_with(|| Vec::new()); + let errors = sampling_errors.get_or_insert_with(Vec::new); errors.push((spy.process.pid, e)); - }, + } None => {} } } @@ -181,15 +216,22 @@ impl Sampler { for trace in traces.iter_mut() { let pid = trace.pid; // Annotate each trace with the process info for the current - let process = process_info.entry(pid).or_insert_with(|| { - get_process_info(pid, &spies).map(|p| Arc::new(*p)) - }); + let process = process_info + .entry(pid) + .or_insert_with(|| get_process_info(pid, &spies).map(|p| Arc::new(*p))); trace.process_info = process.clone(); } // Send the collected info back let late = sleep.err(); - if tx.send(Sample{traces, sampling_errors, late}).is_err() { + if tx + .send(Sample { + traces, + sampling_errors, + late, + }) + .is_err() + { break; } @@ -200,7 +242,11 @@ impl Sampler { } }); - Ok(Sampler{rx: Some(rx), version: None, sampling_thread: Some(sampling_thread)}) + Ok(Sampler { + rx: Some(rx), + version: None, + sampling_thread: Some(sampling_thread), + }) } } @@ -229,29 +275,38 @@ struct PythonSpyThread { notified: bool, pub process: remoteprocess::Process, pub parent: Option, - pub command_line: String + pub command_line: String, } impl PythonSpyThread { fn new(pid: Pid, parent: Option, config: &Config) -> Result { - let (initialized_tx, initialized_rx): (Sender>, Receiver>) = mpsc::channel(); + let (initialized_tx, initialized_rx): ( + Sender>, + Receiver>, + ) = mpsc::channel(); let (notify_tx, notify_rx): (Sender<()>, Receiver<()>) = mpsc::channel(); - let (sample_tx, sample_rx): (Sender, Error>>, Receiver, Error>>) = mpsc::channel(); + let (sample_tx, sample_rx): ( + Sender, Error>>, + Receiver, Error>>, + ) = mpsc::channel(); let config = config.clone(); let process = remoteprocess::Process::new(pid)?; - let command_line = process.cmdline().map(|x| x.join(" ")).unwrap_or("".to_owned()); + let command_line = process + .cmdline() + .map(|x| x.join(" ")) + .unwrap_or_else(|_| "".to_owned()); thread::spawn(move || { // We need to create this object inside the thread here since PythonSpy objects don't // have the Send trait implemented on linux let mut spy = match PythonSpy::retry_new(pid, &config, 5) { Ok(spy) => { - if let Err(_) = initialized_tx.send(Ok(spy.version.clone())) { + if initialized_tx.send(Ok(spy.version.clone())).is_err() { return; } spy - }, - Err(e) => { + } + Err(e) => { warn!("Failed to profile python from process {}: {}", pid, e); if initialized_tx.send(Err(e)).is_err() {} return; @@ -260,30 +315,44 @@ impl PythonSpyThread { for _ in notify_rx.iter() { let result = spy.get_stack_traces(); - if let Err(_) = result { - if spy.process.exe().is_err() { - info!("stopped sampling pid {} because the process exited", spy.pid); - break; - } + if result.is_err() && spy.process.exe().is_err() { + info!( + "stopped sampling pid {} because the process exited", + spy.pid + ); + break; } if sample_tx.send(result).is_err() { break; } } }); - Ok(PythonSpyThread{initialized_rx, notify_tx, sample_rx, process, command_line, parent, initialized: None, running: false, notified: false}) + Ok(PythonSpyThread { + initialized_rx, + notify_tx, + sample_rx, + process, + command_line, + parent, + initialized: None, + running: false, + notified: false, + }) } - fn wait_initialized(&mut self) -> bool { + fn wait_initialized(&mut self) -> bool { match self.initialized_rx.recv() { Ok(status) => { self.running = status.is_ok(); self.initialized = Some(status); self.running - }, + } Err(e) => { // shouldn't happen, but will be ok if it does - warn!("Failed to get initialization status from PythonSpyThread: {}", e); + warn!( + "Failed to get initialization status from PythonSpyThread: {}", + e + ); false } } @@ -298,7 +367,7 @@ impl PythonSpyThread { self.running = status.is_ok(); self.initialized = Some(status); self.running - }, + } Err(std::sync::mpsc::TryRecvError::Empty) => false, Err(std::sync::mpsc::TryRecvError::Disconnected) => { // this *shouldn't* happen @@ -310,12 +379,16 @@ impl PythonSpyThread { fn notify(&mut self) { match self.notify_tx.send(()) { - Ok(_) => { self.notified = true; }, - Err(_) => { self.running = false; } + Ok(_) => { + self.notified = true; + } + Err(_) => { + self.running = false; + } } } - fn collect(&mut self) -> Option, Error>> { + fn collect(&mut self) -> Option, Error>> { if !self.notified { return None; } @@ -332,7 +405,13 @@ impl PythonSpyThread { fn get_process_info(pid: Pid, spies: &HashMap) -> Option> { spies.get(&pid).map(|spy| { - let parent = spy.parent.and_then(|parentpid| get_process_info(parentpid, spies)); - Box::new(ProcessInfo{pid, parent, command_line: spy.command_line.clone()}) + let parent = spy + .parent + .and_then(|parentpid| get_process_info(parentpid, spies)); + Box::new(ProcessInfo { + pid, + parent, + command_line: spy.command_line.clone(), + }) }) } diff --git a/src/speedscope.rs b/src/speedscope.rs index 5ce378de..8d8a4023 100644 --- a/src/speedscope.rs +++ b/src/speedscope.rs @@ -26,16 +26,15 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -use std::collections::{HashMap}; +use std::collections::HashMap; use std::io; use std::io::Write; use crate::stack_trace; -use remoteprocess::{Tid, Pid}; +use remoteprocess::{Pid, Tid}; -use anyhow::{Error}; +use anyhow::Error; use serde_derive::{Deserialize, Serialize}; -use serde_json; use crate::config::Config; @@ -128,40 +127,51 @@ enum ValueUnit { } impl SpeedscopeFile { - pub fn new(samples: &HashMap<(Pid, Tid), Vec>>, frames: &Vec, - thread_name_map: &HashMap<(Pid, Tid), String>, sample_rate: u64) -> SpeedscopeFile { - - let mut profiles: Vec = samples.iter().map(|(thread_id, samples)| { - let end_value = samples.len(); - // we sample at 100 Hz, so scale the end value and weights to match the time unit - let scaled_end_value = end_value as f64 / sample_rate as f64; - let weights: Vec = (&samples).iter().map(|_s| 1_f64 / sample_rate as f64).collect(); - - Profile { - profile_type: ProfileType::Sampled, - name: thread_name_map.get(thread_id).map_or_else(|| "py-spy".to_string(), |x| x.clone()), - unit: ValueUnit::Seconds, - start_value: 0.0, - end_value: scaled_end_value, - samples: samples.clone(), - weights + pub fn new( + samples: &HashMap<(Pid, Tid), Vec>>, + frames: &[Frame], + thread_name_map: &HashMap<(Pid, Tid), String>, + sample_rate: u64, + ) -> SpeedscopeFile { + let mut profiles: Vec = samples + .iter() + .map(|(thread_id, samples)| { + let end_value = samples.len(); + // we sample at 100 Hz, so scale the end value and weights to match the time unit + let scaled_end_value = end_value as f64 / sample_rate as f64; + let weights: Vec = samples + .iter() + .map(|_s| 1_f64 / sample_rate as f64) + .collect(); + + Profile { + profile_type: ProfileType::Sampled, + name: thread_name_map + .get(thread_id) + .map_or_else(|| "py-spy".to_string(), |x| x.clone()), + unit: ValueUnit::Seconds, + start_value: 0.0, + end_value: scaled_end_value, + samples: samples.clone(), + weights, + } + }) + .collect(); + + profiles.sort_by(|a, b| a.name.cmp(&b.name)); + + SpeedscopeFile { + // This is always the same + schema: "https://www.speedscope.app/file-format-schema.json".to_string(), + active_profile_index: None, + name: Some("py-spy profile".to_string()), + exporter: Some(format!("py-spy@{}", env!("CARGO_PKG_VERSION"))), + profiles, + shared: Shared { + frames: frames.to_owned(), + }, } - }).collect(); - - profiles.sort_by(|a, b| a.name.cmp(&b.name)); - - SpeedscopeFile { - // This is always the same - schema: "https://www.speedscope.app/file-format-schema.json".to_string(), - active_profile_index: None, - name: Some("py-spy profile".to_string()), - exporter: Some(format!("py-spy@{}", env!("CARGO_PKG_VERSION"))), - profiles: profiles, - shared: Shared { - frames: frames.clone() - } } - } } impl Frame { @@ -170,8 +180,12 @@ impl Frame { name: stack_frame.name.clone(), // TODO: filename? file: Some(stack_frame.filename.clone()), - line: if show_line_numbers { Some(stack_frame.line as u32) } else { None }, - col: None + line: if show_line_numbers { + Some(stack_frame.line as u32) + } else { + None + }, + col: None, } } } @@ -197,30 +211,43 @@ impl Stats { pub fn record(&mut self, stack: &stack_trace::StackTrace) -> Result<(), io::Error> { let show_line_numbers = self.config.show_line_numbers; - let mut frame_indices: Vec = stack.frames.iter().map(|frame| { - let frames = &mut self.frames; - let mut key = frame.clone(); - if !show_line_numbers { - key.line = 0; - } - *self.frame_to_index.entry(key).or_insert_with(|| { - let len = frames.len(); - frames.push(Frame::new(&frame, show_line_numbers)); - len + let mut frame_indices: Vec = stack + .frames + .iter() + .map(|frame| { + let frames = &mut self.frames; + let mut key = frame.clone(); + if !show_line_numbers { + key.line = 0; + } + *self.frame_to_index.entry(key).or_insert_with(|| { + let len = frames.len(); + frames.push(Frame::new(frame, show_line_numbers)); + len + }) }) - }).collect(); + .collect(); frame_indices.reverse(); let key = (stack.pid as Pid, stack.thread_id as Tid); - self.samples.entry(key).or_insert_with(|| { - vec![] - }).push(frame_indices); + self.samples + .entry(key) + .or_insert_with(std::vec::Vec::new) + .push(frame_indices); let subprocesses = self.config.subprocesses; self.thread_name_map.entry(key).or_insert_with(|| { - let thread_name = stack.thread_name.as_ref().map_or_else(|| "".to_string(), |x| x.clone()); + let thread_name = stack + .thread_name + .as_ref() + .map_or_else(|| "".to_string(), |x| x.clone()); if subprocesses { - format!("Process {} Thread {} \"{}\"", stack.pid, stack.format_threadid(), thread_name) + format!( + "Process {} Thread {} \"{}\"", + stack.pid, + stack.format_threadid(), + thread_name + ) } else { format!("Thread {} \"{}\"", stack.format_threadid(), thread_name) } @@ -230,7 +257,12 @@ impl Stats { } pub fn write(&self, w: &mut dyn Write) -> Result<(), Error> { - let json = serde_json::to_string(&SpeedscopeFile::new(&self.samples, &self.frames, &self.thread_name_map, self.config.sampling_rate))?; + let json = serde_json::to_string(&SpeedscopeFile::new( + &self.samples, + &self.frames, + &self.thread_name_map, + self.config.sampling_rate, + ))?; writeln!(w, "{}", json)?; Ok(()) } @@ -244,7 +276,11 @@ mod tests { #[test] fn test_speedscope_units() { let sample_rate = 100; - let config = Config{show_line_numbers: true, sampling_rate: sample_rate, ..Default::default()}; + let config = Config { + show_line_numbers: true, + sampling_rate: sample_rate, + ..Default::default() + }; let mut stats = Stats::new(&config); let mut cursor = Cursor::new(Vec::new()); diff --git a/src/stack_trace.rs b/src/stack_trace.rs index b7d58163..4eb76623 100644 --- a/src/stack_trace.rs +++ b/src/stack_trace.rs @@ -1,14 +1,15 @@ -use std; use std::sync::Arc; use anyhow::{Context, Error, Result}; -use remoteprocess::{ProcessMemory, Pid}; +use remoteprocess::{Pid, ProcessMemory}; use serde_derive::Serialize; -use crate::python_interpreters::{InterpreterState, ThreadState, FrameObject, CodeObject, TupleObject}; -use crate::python_data_access::{copy_string, copy_bytes}; use crate::config::{Config, LineNo}; +use crate::python_data_access::{copy_bytes, copy_string}; +use crate::python_interpreters::{ + CodeObject, FrameObject, InterpreterState, ThreadState, TupleObject, +}; /// Call stack for a single python thread #[derive(Debug, Clone, Serialize)] @@ -28,7 +29,7 @@ pub struct StackTrace { /// The frames pub frames: Vec, /// process commandline / parent process info - pub process_info: Option> + pub process_info: Option>, } /// Information about a single function call in a stack trace @@ -58,15 +59,22 @@ pub struct LocalVariable { #[derive(Debug, Clone, Serialize)] pub struct ProcessInfo { - pub pid: Pid, + pub pid: Pid, pub command_line: String, - pub parent: Option> + pub parent: Option>, } /// Given an InterpreterState, this function returns a vector of stack traces for each thread -pub fn get_stack_traces(interpreter: &I, process: &P, threadstate_address: usize, config: Option<&Config>) -> Result, Error> - where I: InterpreterState, P: ProcessMemory { - +pub fn get_stack_traces( + interpreter: &I, + process: &P, + threadstate_address: usize, + config: Option<&Config>, +) -> Result, Error> +where + I: InterpreterState, + P: ProcessMemory, +{ let gil_thread_id = get_gil_threadid::(threadstate_address, process)?; let mut ret = Vec::new(); @@ -76,7 +84,9 @@ pub fn get_stack_traces(interpreter: &I, process: &P, threadstate_address: let dump_locals = config.map(|c| c.dump_locals).unwrap_or(0); while !threads.is_null() { - let thread = process.copy_pointer(threads).context("Failed to copy PyThreadState")?; + let thread = process + .copy_pointer(threads) + .context("Failed to copy PyThreadState")?; let mut trace = get_stack_trace(&thread, process, dump_locals > 0, lineno)?; trace.owns_gil = trace.thread_id == gil_thread_id; @@ -92,8 +102,16 @@ pub fn get_stack_traces(interpreter: &I, process: &P, threadstate_address: } /// Gets a stack trace for an individual thread -pub fn get_stack_trace(thread: &T, process: &P, copy_locals: bool, lineno: LineNo) -> Result - where T: ThreadState, P: ProcessMemory { +pub fn get_stack_trace( + thread: &T, + process: &P, + copy_locals: bool, + lineno: LineNo, +) -> Result +where + T: ThreadState, + P: ProcessMemory, +{ // TODO: just return frames here? everything else probably should be returned out of scope let mut frames = Vec::new(); @@ -105,15 +123,19 @@ pub fn get_stack_trace(thread: &T, process: &P, copy_locals: bool, lineno: let mut frame_ptr = thread.frame(frame_address); while !frame_ptr.is_null() { - let frame = process.copy_pointer(frame_ptr).context("Failed to copy PyFrameObject")?; - let code = process.copy_pointer(frame.code()).context("Failed to copy PyCodeObject")?; + let frame = process + .copy_pointer(frame_ptr) + .context("Failed to copy PyFrameObject")?; + let code = process + .copy_pointer(frame.code()) + .context("Failed to copy PyCodeObject")?; let filename = copy_string(code.filename(), process).context("Failed to copy filename")?; let name = copy_string(code.name(), process).context("Failed to copy function name")?; let line = match lineno { LineNo::NoLine => 0, - LineNo::FirstLineNo => code.first_lineno(), + LineNo::First => code.first_lineno(), LineNo::LastInstruction => match get_line_number(&code, frame.lasti(), process) { Ok(line) => line, Err(e) => { @@ -121,10 +143,13 @@ pub fn get_stack_trace(thread: &T, process: &P, copy_locals: bool, lineno: // can happen in extreme cases (https://github.com/benfred/py-spy/issues/164) // Rather than fail set the linenumber to 0. This is used by the native extensions // to indicate that we can't load a line number and it should be handled gracefully - warn!("Failed to get line number from {}.{}: {}", filename, name, e); + warn!( + "Failed to get line number from {}.{}: {}", + filename, name, e + ); 0 } - } + }, }; let locals = if copy_locals { @@ -133,7 +158,14 @@ pub fn get_stack_trace(thread: &T, process: &P, copy_locals: bool, lineno: None }; - frames.push(Frame{name, filename, line, short_filename: None, module: None, locals}); + frames.push(Frame { + name, + filename, + line, + short_filename: None, + module: None, + locals, + }); if frames.len() > 4096 { return Err(format_err!("Max frame recursion depth reached")); } @@ -141,7 +173,16 @@ pub fn get_stack_trace(thread: &T, process: &P, copy_locals: bool, lineno: frame_ptr = frame.back(); } - Ok(StackTrace{pid: 0, frames, thread_id: thread.thread_id(), thread_name: None, owns_gil: false, active: true, os_thread_id: thread.native_thread_id(), process_info: None}) + Ok(StackTrace { + pid: 0, + frames, + thread_id: thread.thread_id(), + thread_name: None, + owns_gil: false, + active: true, + os_thread_id: thread.native_thread_id(), + process_info: None, + }) } impl StackTrace { @@ -155,27 +196,35 @@ impl StackTrace { pub fn format_threadid(&self) -> String { // native threadids in osx are kinda useless, use the pthread id instead - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] return format!("{:#X}", self.thread_id); // otherwise use the native threadid if given - #[cfg(not(target_os="macos"))] + #[cfg(not(target_os = "macos"))] match self.os_thread_id { Some(tid) => format!("{}", tid), - None => format!("{:#X}", self.thread_id) + None => format!("{:#X}", self.thread_id), } } } /// Returns the line number from a PyCodeObject (given the lasti index from a PyFrameObject) -fn get_line_number(code: &C, lasti: i32, process: &P) -> Result { - let table = copy_bytes(code.line_table(), process).context("Failed to copy line number table")?; +fn get_line_number( + code: &C, + lasti: i32, + process: &P, +) -> Result { + let table = + copy_bytes(code.line_table(), process).context("Failed to copy line number table")?; Ok(code.get_line_number(lasti, &table)) } - -fn get_locals(code: &C, frameptr: *const F, frame: &F, process: &P) - -> Result, Error> { +fn get_locals( + code: &C, + frameptr: *const F, + frame: &F, + process: &P, +) -> Result, Error> { let local_count = code.nlocals() as usize; let argcount = code.argcount() as usize; let varnames = process.copy_pointer(code.varnames())?; @@ -186,19 +235,27 @@ fn get_locals(code: &C, framept let mut ret = Vec::new(); for i in 0..local_count { - let nameptr: *const C::StringObject = process.copy_struct(varnames.address(code.varnames() as usize, i))?; + let nameptr: *const C::StringObject = + process.copy_struct(varnames.address(code.varnames() as usize, i))?; let name = copy_string(nameptr, process)?; let addr: usize = process.copy_struct(locals_addr + i * ptr_size)?; if addr == 0 { continue; } - ret.push(LocalVariable{name, addr, arg: i < argcount, repr: None}); + ret.push(LocalVariable { + name, + addr, + arg: i < argcount, + repr: None, + }); } Ok(ret) } -pub fn get_gil_threadid(threadstate_address: usize, process: &P) - -> Result { +pub fn get_gil_threadid( + threadstate_address: usize, + process: &P, +) -> Result { // figure out what thread has the GIL by inspecting _PyThreadState_Current if threadstate_address > 0 { let addr: usize = process.copy_struct(threadstate_address)?; @@ -214,25 +271,32 @@ pub fn get_gil_threadid(threadstate_addre impl ProcessInfo { pub fn to_frame(&self) -> Frame { - Frame{name: format!("process {}:\"{}\"", self.pid, self.command_line), + Frame { + name: format!("process {}:\"{}\"", self.pid, self.command_line), filename: String::from(""), - module: None, short_filename: None, line: 0, locals: None} + module: None, + short_filename: None, + line: 0, + locals: None, + } } } #[cfg(test)] mod tests { use super::*; - use remoteprocess::LocalProcess; - use crate::python_bindings::v3_7_0::{PyCodeObject}; + use crate::python_bindings::v3_7_0::PyCodeObject; use crate::python_data_access::tests::to_byteobject; + use remoteprocess::LocalProcess; #[test] fn test_get_line_number() { let mut lnotab = to_byteobject(&[0u8, 1, 10, 1, 8, 1, 4, 1]); - let code = PyCodeObject{co_firstlineno: 3, - co_lnotab: &mut lnotab.base.ob_base.ob_base, - ..Default::default()}; + let code = PyCodeObject { + co_firstlineno: 3, + co_lnotab: &mut lnotab.base.ob_base.ob_base, + ..Default::default() + }; let lineno = get_line_number(&code, 30, &LocalProcess).unwrap(); assert_eq!(lineno, 7); } diff --git a/src/timer.rs b/src/timer.rs index af7ecbf9..8a86e8d4 100644 --- a/src/timer.rs +++ b/src/timer.rs @@ -1,9 +1,8 @@ -use std::time::{Instant, Duration}; +use std::time::{Duration, Instant}; #[cfg(windows)] use winapi::um::timeapi; -use rand; -use rand_distr::{Exp, Distribution}; +use rand_distr::{Distribution, Exp}; /// Timer is an iterator that sleeps an appropriate amount of time between iterations /// so that we can sample the process a certain number of times a second. @@ -25,10 +24,16 @@ impl Timer { // https://randomascii.wordpress.com/2013/07/08/windows-timer-resolution-megawatts-wasted/ // and http://www.belshe.com/2010/06/04/chrome-cranking-up-the-clock/ #[cfg(windows)] - unsafe { timeapi::timeBeginPeriod(1); } + unsafe { + timeapi::timeBeginPeriod(1); + } let start = Instant::now(); - Timer{start, desired: Duration::from_secs(0), exp: Exp::new(rate).unwrap()} + Timer { + start, + desired: Duration::from_secs(0), + exp: Exp::new(rate).unwrap(), + } } } @@ -60,6 +65,8 @@ impl Iterator for Timer { impl Drop for Timer { fn drop(&mut self) { #[cfg(windows)] - unsafe { timeapi::timeEndPeriod(1); } + unsafe { + timeapi::timeEndPeriod(1); + } } } diff --git a/src/utils.rs b/src/utils.rs index f9674fa7..1d8778bb 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -11,9 +11,9 @@ pub fn resolve_filename(filename: &str, modulename: &str) -> Option { let module = Path::new(modulename); if let Some(parent) = module.parent() { if let Some(name) = path.file_name() { - let temp = parent.join(name); + let temp = parent.join(name); if temp.exists() { - return Some(temp.to_string_lossy().to_owned().to_string()) + return Some(temp.to_string_lossy().to_string()); } } } diff --git a/src/version.rs b/src/version.rs index 8b079d26..f99e518b 100644 --- a/src/version.rs +++ b/src/version.rs @@ -1,28 +1,29 @@ use lazy_static::lazy_static; use regex::bytes::Regex; -use std; - -use anyhow::{Error}; +use anyhow::Error; #[derive(Debug, PartialEq, Eq, Clone)] pub struct Version { pub major: u64, pub minor: u64, pub patch: u64, - pub release_flags: String + pub release_flags: String, } impl Version { pub fn scan_bytes(data: &[u8]) -> Result { lazy_static! { - static ref RE: Regex = Regex::new(r"((2|3)\.(3|4|5|6|7|8|9|10|11)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+? (.{1,64})").unwrap(); + static ref RE: Regex = Regex::new( + r"((2|3)\.(3|4|5|6|7|8|9|10|11)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+? (.{1,64})" + ) + .unwrap(); } if let Some(cap) = RE.captures_iter(data).next() { let release = match cap.get(5) { - Some(x) => { std::str::from_utf8(x.as_bytes())? }, - None => "" + Some(x) => std::str::from_utf8(x.as_bytes())?, + None => "", }; let major = std::str::from_utf8(&cap[2])?.parse::()?; let minor = std::str::from_utf8(&cap[3])?.parse::()?; @@ -41,7 +42,12 @@ impl Version { } } - return Ok(Version{major, minor, patch, release_flags:release.to_owned()}); + return Ok(Version { + major, + minor, + patch, + release_flags: release.to_owned(), + }); } Err(format_err!("failed to find version string")) } @@ -49,7 +55,11 @@ impl Version { impl std::fmt::Display for Version { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}.{}.{}{}", self.major, self.minor, self.patch, self.release_flags) + write!( + f, + "{}.{}.{}{}", + self.major, self.minor, self.patch, self.release_flags + ) } } @@ -59,18 +69,58 @@ mod tests { #[test] fn test_find_version() { let version = Version::scan_bytes(b"2.7.10 (default, Oct 6 2017, 22:29:07)").unwrap(); - assert_eq!(version, Version{major: 2, minor: 7, patch: 10, release_flags: "".to_owned()}); - - let version = Version::scan_bytes(b"3.6.3 |Anaconda custom (64-bit)| (default, Oct 6 2017, 12:04:38)").unwrap(); - assert_eq!(version, Version{major: 3, minor: 6, patch: 3, release_flags: "".to_owned()}); - - let version = Version::scan_bytes(b"Python 3.7.0rc1 (v3.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)").unwrap(); - assert_eq!(version, Version{major: 3, minor: 7, patch: 0, release_flags: "rc1".to_owned()}); - - let version = Version::scan_bytes(b"Python 3.10.0rc1 (tags/v3.10.0rc1, Aug 28 2021, 18:25:40)").unwrap(); - assert_eq!(version, Version{major: 3, minor: 10, patch: 0, release_flags: "rc1".to_owned()}); + assert_eq!( + version, + Version { + major: 2, + minor: 7, + patch: 10, + release_flags: "".to_owned() + } + ); + + let version = Version::scan_bytes( + b"3.6.3 |Anaconda custom (64-bit)| (default, Oct 6 2017, 12:04:38)", + ) + .unwrap(); + assert_eq!( + version, + Version { + major: 3, + minor: 6, + patch: 3, + release_flags: "".to_owned() + } + ); + + let version = + Version::scan_bytes(b"Python 3.7.0rc1 (v3.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)") + .unwrap(); + assert_eq!( + version, + Version { + major: 3, + minor: 7, + patch: 0, + release_flags: "rc1".to_owned() + } + ); + + let version = + Version::scan_bytes(b"Python 3.10.0rc1 (tags/v3.10.0rc1, Aug 28 2021, 18:25:40)") + .unwrap(); + assert_eq!( + version, + Version { + major: 3, + minor: 10, + patch: 0, + release_flags: "rc1".to_owned() + } + ); - let version = Version::scan_bytes(b"1.7.0rc1 (v1.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)"); + let version = + Version::scan_bytes(b"1.7.0rc1 (v1.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)"); assert!(version.is_err(), "don't match unsupported "); let version = Version::scan_bytes(b"3.7 10 "); @@ -81,6 +131,14 @@ mod tests { // v2.7.15+ is a valid version string apparently: https://github.com/benfred/py-spy/issues/81 let version = Version::scan_bytes(b"2.7.15+ (default, Oct 2 2018, 22:12:08)").unwrap(); - assert_eq!(version, Version{major: 2, minor: 7, patch: 15, release_flags: "".to_owned()}); + assert_eq!( + version, + Version { + major: 2, + minor: 7, + patch: 15, + release_flags: "".to_owned() + } + ); } } diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 233d0b66..9d7c36cc 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,6 +1,6 @@ extern crate py_spy; +use py_spy::{Config, Pid, PythonSpy}; use std::collections::HashSet; -use py_spy::{Config, PythonSpy, Pid}; struct ScriptRunner { #[allow(dead_code)] @@ -9,11 +9,16 @@ struct ScriptRunner { impl ScriptRunner { fn new(process_name: &str, filename: &str) -> ScriptRunner { - let child = std::process::Command::new(process_name).arg(filename).spawn().unwrap(); - ScriptRunner{child} + let child = std::process::Command::new(process_name) + .arg(filename) + .spawn() + .unwrap(); + ScriptRunner { child } } - fn id(&self) -> Pid { self.child.id() as _ } + fn id(&self) -> Pid { + self.child.id() as _ + } } impl Drop for ScriptRunner { @@ -27,7 +32,7 @@ impl Drop for ScriptRunner { struct TestRunner { #[allow(dead_code)] child: ScriptRunner, - spy: PythonSpy + spy: PythonSpy, } impl TestRunner { @@ -35,13 +40,13 @@ impl TestRunner { let child = ScriptRunner::new("python", filename); std::thread::sleep(std::time::Duration::from_millis(400)); let spy = PythonSpy::retry_new(child.id(), &config, 20).unwrap(); - TestRunner{child, spy} + TestRunner { child, spy } } } #[test] fn test_busy_loop() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -65,7 +70,10 @@ fn test_thread_reuse() { // and this caused errors on native unwind (since the native thread had // exited). Test that this works with a simple script that creates // a couple short lived threads, and then profiling with native enabled - let config = Config{native: true, ..Default::default()}; + let config = Config { + native: true, + ..Default::default() + }; let mut runner = TestRunner::new(config, "./tests/scripts/thread_reuse.py"); let mut errors = 0; @@ -86,7 +94,7 @@ fn test_thread_reuse() { #[test] fn test_long_sleep() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -102,18 +110,24 @@ fn test_long_sleep() { // Make sure the stack trace is what we expect assert_eq!(trace.frames[0].name, "longsleep"); - assert_eq!(trace.frames[0].short_filename, Some("longsleep.py".to_owned())); + assert_eq!( + trace.frames[0].short_filename, + Some("longsleep.py".to_owned()) + ); assert_eq!(trace.frames[0].line, 5); assert_eq!(trace.frames[1].name, ""); assert_eq!(trace.frames[1].line, 9); - assert_eq!(trace.frames[1].short_filename, Some("longsleep.py".to_owned())); + assert_eq!( + trace.frames[1].short_filename, + Some("longsleep.py".to_owned()) + ); assert!(!traces[0].owns_gil); // we should reliably be able to detect the thread is sleeping on osx/windows // linux+freebsd is trickier - #[cfg(any(target_os="macos", target_os="windows"))] + #[cfg(any(target_os = "macos", target_os = "windows"))] assert!(!traces[0].active); } @@ -154,7 +168,7 @@ fn test_thread_names() { #[test] fn test_recursive() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -164,7 +178,7 @@ fn test_recursive() { // there used to be a problem where the top-level functions being returned // weren't actually entry points: https://github.com/benfred/py-spy/issues/56 - // This was fixed by locking the process while we are profling it. Test that + // This was fixed by locking the process while we are profiling it. Test that // the fix works by generating some samples from a program that would exhibit // this behaviour let mut runner = TestRunner::new(Config::default(), "./tests/scripts/recursive.py"); @@ -176,7 +190,7 @@ fn test_recursive() { assert!(trace.frames.len() <= 22); - let top_level_frame = &trace.frames[trace.frames.len()-1]; + let top_level_frame = &trace.frames[trace.frames.len() - 1]; assert_eq!(top_level_frame.name, ""); assert!((top_level_frame.line == 8) || (top_level_frame.line == 7)); @@ -186,7 +200,7 @@ fn test_recursive() { #[test] fn test_unicode() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { if unsafe { libc::geteuid() } != 0 { return; @@ -199,19 +213,25 @@ fn test_unicode() { let trace = &traces[0]; assert_eq!(trace.frames[0].name, "function1"); - assert_eq!(trace.frames[0].short_filename, Some("unicode💩.py".to_owned())); + assert_eq!( + trace.frames[0].short_filename, + Some("unicode💩.py".to_owned()) + ); assert_eq!(trace.frames[0].line, 6); assert_eq!(trace.frames[1].name, ""); assert_eq!(trace.frames[1].line, 9); - assert_eq!(trace.frames[1].short_filename, Some("unicode💩.py".to_owned())); + assert_eq!( + trace.frames[1].short_filename, + Some("unicode💩.py".to_owned()) + ); assert!(!traces[0].owns_gil); } #[test] fn test_local_vars() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -219,7 +239,10 @@ fn test_local_vars() { } } - let config = Config{dump_locals: 1, ..Default::default()}; + let config = Config { + dump_locals: 1, + ..Default::default() + }; let mut runner = TestRunner::new(config, "./tests/scripts/local_vars.py"); let traces = runner.spy.get_stack_traces().unwrap(); @@ -277,14 +300,17 @@ fn test_local_vars() { // we only support dictionary lookup on python 3.6+ right now if runner.spy.version.major == 3 && runner.spy.version.minor >= 6 { - assert_eq!(local5.repr, Some("{\"a\": False, \"b\": (1, 2, 3)}".to_owned())); + assert_eq!( + local5.repr, + Some("{\"a\": False, \"b\": (1, 2, 3)}".to_owned()) + ); } } -#[cfg(not(target_os="freebsd"))] +#[cfg(not(target_os = "freebsd"))] #[test] fn test_subprocesses() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -296,7 +322,10 @@ fn test_subprocesses() { // was in a zombie state. Verify that this works now let process = ScriptRunner::new("python", "./tests/scripts/subprocesses.py"); std::thread::sleep(std::time::Duration::from_millis(1000)); - let config = Config{subprocesses: true, ..Default::default()}; + let config = Config { + subprocesses: true, + ..Default::default() + }; let sampler = py_spy::sampler::Sampler::new(process.id(), &config).unwrap(); std::thread::sleep(std::time::Duration::from_millis(1000)); @@ -318,10 +347,10 @@ fn test_subprocesses() { } } -#[cfg(not(target_os="freebsd"))] +#[cfg(not(target_os = "freebsd"))] #[test] fn test_subprocesses_zombiechild() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -333,20 +362,26 @@ fn test_subprocesses_zombiechild() { // was in a zombie state. Verify that this works now let process = ScriptRunner::new("python", "./tests/scripts/subprocesses_zombie_child.py"); std::thread::sleep(std::time::Duration::from_millis(200)); - let config = Config{subprocesses: true, ..Default::default()}; + let config = Config { + subprocesses: true, + ..Default::default() + }; let _sampler = py_spy::sampler::Sampler::new(process.id(), &config).unwrap(); } #[test] fn test_negative_linenumber_increment() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { return; } } - let mut runner = TestRunner::new(Config::default(), "./tests/scripts/negative_linenumber_offsets.py"); + let mut runner = TestRunner::new( + Config::default(), + "./tests/scripts/negative_linenumber_offsets.py", + ); let traces = runner.spy.get_stack_traces().unwrap(); assert_eq!(traces.len(), 1); @@ -360,22 +395,25 @@ fn test_negative_linenumber_increment() { assert!(trace.frames[1].line >= 5 && trace.frames[0].line <= 10); assert_eq!(trace.frames[2].name, ""); assert_eq!(trace.frames[2].line, 13) - }, + } 2 => { assert_eq!(trace.frames[0].name, "f"); assert!(trace.frames[0].line >= 5 && trace.frames[0].line <= 10); assert_eq!(trace.frames[1].name, ""); assert_eq!(trace.frames[1].line, 13); - }, - _ => panic!("Unknown python major version") + } + _ => panic!("Unknown python major version"), } } -#[cfg(target_os="linux")] +#[cfg(target_os = "linux")] #[test] fn test_delayed_subprocess() { let process = ScriptRunner::new("bash", "./tests/scripts/delayed_launch.sh"); - let config = Config{subprocesses: true, ..Default::default()}; + let config = Config { + subprocesses: true, + ..Default::default() + }; let sampler = py_spy::sampler::Sampler::new(process.id(), &config).unwrap(); for sample in sampler { // should have one trace from the subprocess