From c3d305fd3acdc3d694795e535870f0c46d8fde6b Mon Sep 17 00:00:00 2001 From: "Celina G. Val" Date: Tue, 24 Sep 2024 06:55:27 -0700 Subject: [PATCH 1/2] Analyze unsafe code reachability Add callgraph analysis to scanner in order to find the distance between functions in a crate and unsafe functions. For that, we build the crate call graph and collect the unsafe functions. After that, do reverse BFS traversal from the unsafe functions and store the distance to other functions. The result is stored in a new csv file. --- docs/src/rustc-hacks.md | 24 +++-- scripts/std-analysis.sh | 3 +- .../tool-scanner/scanner-test.expected | 4 +- .../tool-scanner/scanner-test.sh | 2 +- tests/script-based-pre/tool-scanner/test.rs | 13 +++ tools/scanner/src/analysis.rs | 84 ++++++++++++--- tools/scanner/src/call_graph.rs | 101 ++++++++++++++++++ tools/scanner/src/lib.rs | 9 +- 8 files changed, 212 insertions(+), 28 deletions(-) create mode 100644 tools/scanner/src/call_graph.rs diff --git a/docs/src/rustc-hacks.md b/docs/src/rustc-hacks.md index a6047613288a..9ee5b522d894 100644 --- a/docs/src/rustc-hacks.md +++ b/docs/src/rustc-hacks.md @@ -52,19 +52,27 @@ Note that pretty printing for the Rust nightly toolchain (which Kani uses) is no For example, a vector may be displayed as `vec![{...}, {...}]` on nightly Rust, when it would be displayed as `vec![Some(0), None]` on stable Rust. Hopefully, this will be fixed soon. -### CLion / IntelliJ +### RustRover / CLion This is not a great solution, but it works for now (see for more details). -Edit the `Cargo.toml` of the package that you're working on and add artificial dependencies on the `rustc` packages that you would like to explore. + +Open the `Cargo.toml` of your crate (e.g.: `kani-compiler`), and do the following: + +1. Add optional dependencies on the `rustc` crates you are using. +2. Add a feature that enable those dependencies. +3. Toggle that feature using the IDE GUI. + +Here is an example: ```toml -# This configuration doesn't exist so it shouldn't affect your build. -[target.'cfg(KANI_DEV)'.dependencies] +# ** At the bottom of the dependencies section: ** # Adjust the path here to point to a local copy of the rust compiler. -# The best way is to use the rustup path. Replace with the -# proper name to your toolchain. -rustc_driver = { path = "~/.rustup/toolchains//lib/rustlib/rustc-src/rust/compiler/rustc_driver" } -rustc_interface = { path = "~/.rustup/toolchains//lib/rustlib/rustc-src/rust/compiler/rustc_interface" } +# E.g.: ~/.rustup/toolchains//lib/rustlib/rustc-src/rust/compiler +rustc_smir = { path = "/rustc_smir", optional = true } +stable_mir = { path = "/stable_mir", optional = true } + +[features] +clion = ['rustc_smir', 'stable_mir'] ``` **Don't forget to rollback the changes before you create your PR.** diff --git a/scripts/std-analysis.sh b/scripts/std-analysis.sh index 87ac991cb00d..4399eac6cfeb 100755 --- a/scripts/std-analysis.sh +++ b/scripts/std-analysis.sh @@ -76,6 +76,7 @@ export RUSTC_LOG=error RUST_FLAGS=( "-Cpanic=abort" "-Zalways-encode-mir" + "--emit=mir" ) export RUSTFLAGS="${RUST_FLAGS[@]}" export RUSTC="$KANI_DIR/target/debug/scan" @@ -104,7 +105,7 @@ for f in $results/*overall.csv; do fname=$(basename $f) crate=${fname%_scan_overall.csv} echo -n "$crate," >> $summary - tr -d [:alpha:]_,; < $f | tr -s '\n' ',' \ + tr -d "[:alpha:]_,;" < $f | tr -s '\n' ',' \ >> $summary echo "" >> $summary done diff --git a/tests/script-based-pre/tool-scanner/scanner-test.expected b/tests/script-based-pre/tool-scanner/scanner-test.expected index f55a883434ee..7e5b3875979d 100644 --- a/tests/script-based-pre/tool-scanner/scanner-test.expected +++ b/tests/script-based-pre/tool-scanner/scanner-test.expected @@ -1,6 +1,6 @@ 5 test_scan_fn_loops.csv -19 test_scan_functions.csv +20 test_scan_functions.csv 5 test_scan_input_tys.csv 16 test_scan_overall.csv 3 test_scan_recursion.csv -5 test_scan_unsafe_ops.csv +6 test_scan_unsafe_ops.csv diff --git a/tests/script-based-pre/tool-scanner/scanner-test.sh b/tests/script-based-pre/tool-scanner/scanner-test.sh index 2cd5a33a3f8e..7ac7c5fd74dc 100755 --- a/tests/script-based-pre/tool-scanner/scanner-test.sh +++ b/tests/script-based-pre/tool-scanner/scanner-test.sh @@ -17,4 +17,4 @@ cargo run -p scanner test.rs --crate-type lib wc -l *csv popd -rm -rf ${OUT_DIR} +#rm -rf ${OUT_DIR} diff --git a/tests/script-based-pre/tool-scanner/test.rs b/tests/script-based-pre/tool-scanner/test.rs index f6a141f2a708..0cdebde134f2 100644 --- a/tests/script-based-pre/tool-scanner/test.rs +++ b/tests/script-based-pre/tool-scanner/test.rs @@ -14,6 +14,11 @@ pub fn generic() -> T { T::default() } +pub fn blah() { + ok(); + assert_eq!(u8::default(), 0); +} + pub struct RecursiveType { pub inner: Option<*const RecursiveType>, } @@ -102,3 +107,11 @@ pub fn start_recursion() { pub fn not_recursive() { let _ = ok(); } + +extern "C" { + fn external_function(); +} + +pub fn call_external() { + unsafe { external_function() }; +} diff --git a/tools/scanner/src/analysis.rs b/tools/scanner/src/analysis.rs index b32627939bf5..e68373e7e612 100644 --- a/tools/scanner/src/analysis.rs +++ b/tools/scanner/src/analysis.rs @@ -11,9 +11,10 @@ use serde::{ser::SerializeStruct, Serialize, Serializer}; use stable_mir::mir::mono::Instance; use stable_mir::mir::visit::{Location, PlaceContext, PlaceRef}; use stable_mir::mir::{ - BasicBlock, Body, MirVisitor, Mutability, ProjectionElem, Safety, Terminator, TerminatorKind, + BasicBlock, Body, CastKind, MirVisitor, Mutability, NonDivergingIntrinsic, ProjectionElem, + Rvalue, Safety, Statement, StatementKind, Terminator, TerminatorKind, }; -use stable_mir::ty::{AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind}; +use stable_mir::ty::{Abi, AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind}; use stable_mir::visitor::{Visitable, Visitor}; use stable_mir::{CrateDef, CrateItem}; use std::collections::{HashMap, HashSet}; @@ -23,7 +24,7 @@ use std::path::{Path, PathBuf}; #[derive(Clone, Debug)] pub struct OverallStats { /// The key and value of each counter. - counters: Vec<(&'static str, usize)>, + pub counters: Vec<(&'static str, usize)>, /// TODO: Group stats per function. fn_stats: HashMap, } @@ -35,6 +36,12 @@ struct FnStats { has_unsafe_ops: Option, has_unsupported_input: Option, has_loop: Option, + /// How many degrees of separation to unsafe code if any? + /// - `None` if this function is indeed safe. + /// - 0 if this function contains unsafe code (including invoking unsafe fns). + /// - 1 if this function calls a safe abstraction. + /// - 2+ if this function calls other functions that call safe abstractions. + unsafe_distance: Option, } impl FnStats { @@ -46,6 +53,7 @@ impl FnStats { has_unsupported_input: None, // TODO: Implement this. has_loop: None, + unsafe_distance: None, } } } @@ -230,24 +238,24 @@ impl OverallStats { macro_rules! fn_props { ($(#[$attr:meta])* - struct $name:ident { + $vis:vis struct $name:ident { $( $(#[$prop_attr:meta])* $prop:ident, )+ }) => { #[derive(Debug)] - struct $name { + $vis struct $name { fn_name: String, $($(#[$prop_attr])* $prop: usize,)+ } impl $name { - const fn num_props() -> usize { + pub const fn num_props() -> usize { [$(stringify!($prop),)+].len() } - fn new(fn_name: String) -> Self { + pub fn new(fn_name: String) -> Self { Self { fn_name, $($prop: 0,)+} } } @@ -367,7 +375,7 @@ impl<'a> Visitor for TypeVisitor<'a> { } } -fn dump_csv(mut out_path: PathBuf, data: &[T]) { +pub(crate) fn dump_csv(mut out_path: PathBuf, data: &[T]) { out_path.set_extension("csv"); info(format!("Write file: {out_path:?}")); let mut writer = WriterBuilder::new().delimiter(b';').from_path(&out_path).unwrap(); @@ -377,17 +385,23 @@ fn dump_csv(mut out_path: PathBuf, data: &[T]) { } fn_props! { - struct FnUnsafeOperations { + pub struct FnUnsafeOperations { inline_assembly, /// Dereference a raw pointer. /// This is also counted when we access a static variable since it gets translated to a raw pointer. unsafe_dereference, - /// Call an unsafe function or method. + /// Call an unsafe function or method including C-FFI. unsafe_call, /// Access or modify a mutable static variable. unsafe_static_access, /// Access fields of unions. unsafe_union_access, + /// Invoke external functions (this is a subset of `unsafe_call`. + extern_call, + /// Transmute operations. + transmute, + /// Cast raw pointer to reference. + unsafe_cast, } } @@ -417,9 +431,21 @@ impl<'a> MirVisitor for BodyVisitor<'a> { fn visit_terminator(&mut self, term: &Terminator, location: Location) { match &term.kind { TerminatorKind::Call { func, .. } => { - let fn_sig = func.ty(self.body.locals()).unwrap().kind().fn_sig().unwrap(); - if fn_sig.value.safety == Safety::Unsafe { + let TyKind::RigidTy(RigidTy::FnDef(fn_def, _)) = + func.ty(self.body.locals()).unwrap().kind() + else { + return self.super_terminator(term, location); + }; + let fn_sig = fn_def.fn_sig().skip_binder(); + if fn_sig.safety == Safety::Unsafe { self.props.unsafe_call += 1; + if !matches!( + fn_sig.abi, + Abi::Rust | Abi::RustCold | Abi::RustCall | Abi::RustIntrinsic + ) && !fn_def.has_body() + { + self.props.extern_call += 1; + } } } TerminatorKind::InlineAsm { .. } => self.props.inline_assembly += 1, @@ -428,6 +454,34 @@ impl<'a> MirVisitor for BodyVisitor<'a> { self.super_terminator(term, location) } + fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) { + if let Rvalue::Cast(cast_kind, operand, ty) = rvalue { + match cast_kind { + CastKind::Transmute => { + self.props.transmute += 1; + } + _ => { + let operand_ty = operand.ty(self.body.locals()).unwrap(); + if ty.kind().is_ref() && operand_ty.kind().is_raw_ptr() { + self.props.unsafe_cast += 1; + } + } + } + }; + self.super_rvalue(rvalue, location); + } + + fn visit_statement(&mut self, stmt: &Statement, location: Location) { + if matches!( + &stmt.kind, + StatementKind::Intrinsic(NonDivergingIntrinsic::CopyNonOverlapping(_)) + ) { + // Treat this as invoking the copy intrinsic. + self.props.unsafe_call += 1; + } + self.super_statement(stmt, location) + } + fn visit_projection_elem( &mut self, place: PlaceRef, @@ -672,9 +726,9 @@ impl Recursion { } } -struct FnCallVisitor<'a> { - body: &'a Body, - fns: Vec, +pub struct FnCallVisitor<'a> { + pub body: &'a Body, + pub fns: Vec, } impl<'a> MirVisitor for FnCallVisitor<'a> { diff --git a/tools/scanner/src/call_graph.rs b/tools/scanner/src/call_graph.rs new file mode 100644 index 000000000000..81d81da317cb --- /dev/null +++ b/tools/scanner/src/call_graph.rs @@ -0,0 +1,101 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Provide different static analysis to be performed in the call graph + +use crate::analysis::{FnCallVisitor, FnUnsafeOperations, OverallStats}; +use stable_mir::mir::{MirVisitor, Safety}; +use stable_mir::ty::{FnDef, RigidTy, Ty, TyKind}; +use stable_mir::{CrateDef, CrateDefType}; +use std::collections::hash_map::Entry; +use std::collections::{HashMap, VecDeque}; +use std::hash::{Hash, Hasher}; +use std::path::PathBuf; + +impl OverallStats { + /// Iterate over all functions defined in this crate and log any unsafe operation. + pub fn unsafe_distance(&mut self, filename: PathBuf) { + let all_items = stable_mir::all_local_items(); + let mut queue = + all_items.into_iter().filter_map(|item| Node::try_new(item.ty())).collect::>(); + // Build call graph + let mut call_graph = CallGraph::default(); + while let Some(node) = queue.pop() { + if let Entry::Vacant(e) = call_graph.nodes.entry(node.def) { + e.insert(node); + let Some(body) = node.def.body() else { + continue; + }; + let mut visitor = FnCallVisitor { body: &body, fns: vec![] }; + visitor.visit_body(&body); + queue.extend(visitor.fns.iter().map(|def| Node::try_new(def.ty()).unwrap())); + for callee in &visitor.fns { + call_graph.rev_edges.entry(*callee).or_default().push(node.def) + } + call_graph.edges.insert(node.def, visitor.fns); + } + } + + // Calculate the distance between unsafe functions and functions with unsafe operation. + let mut queue = call_graph + .nodes + .values() + .filter_map(|node| node.has_unsafe.then_some((node.def, 0))) + .collect::>(); + let mut visited: HashMap = HashMap::from_iter(queue.iter().cloned()); + while let Some(current) = queue.pop_front() { + for caller in call_graph.rev_edges.entry(current.0).or_default() { + if !visited.contains_key(caller) { + let distance = current.1 + 1; + visited.insert(*caller, distance); + queue.push_back((*caller, distance)) + } + } + } + let krate = stable_mir::local_crate(); + let transitive_unsafe = visited + .into_iter() + .filter_map(|(def, distance)| (def.krate() == krate).then_some((def.name(), distance))) + .collect::>(); + self.counters.push(("transitive_unsafe", transitive_unsafe.len())); + crate::analysis::dump_csv(filename, &transitive_unsafe); + } +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct Node { + def: FnDef, + is_unsafe: bool, + has_unsafe: bool, +} + +impl Node { + fn try_new(ty: Ty) -> Option { + let kind = ty.kind(); + let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind else { + return None; + }; + let has_unsafe = if let Some(body) = def.body() { + let unsafe_ops = FnUnsafeOperations::new(def.name()).collect(&body); + unsafe_ops.has_unsafe() + } else { + true + }; + let fn_sig = kind.fn_sig().unwrap(); + let is_unsafe = fn_sig.skip_binder().safety == Safety::Unsafe; + Some(Node { def, is_unsafe, has_unsafe }) + } +} + +impl Hash for Node { + fn hash(&self, state: &mut H) { + self.def.hash(state) + } +} + +#[derive(Default, Debug)] +struct CallGraph { + nodes: HashMap, + edges: HashMap>, + rev_edges: HashMap>, +} diff --git a/tools/scanner/src/lib.rs b/tools/scanner/src/lib.rs index 7f9555781ccf..ad66842b9751 100644 --- a/tools/scanner/src/lib.rs +++ b/tools/scanner/src/lib.rs @@ -10,7 +10,8 @@ #![feature(rustc_private)] -mod analysis; +pub mod analysis; +pub mod call_graph; extern crate rustc_driver; extern crate rustc_interface; @@ -65,6 +66,8 @@ pub enum Analysis { FnLoops, /// Collect information about recursion via direct calls. Recursion, + /// Collect information about transitive usage of unsafe. + UnsafeDistance, } fn info(msg: String) { @@ -75,6 +78,9 @@ fn info(msg: String) { /// This function invoke the required analyses in the given order. fn analyze_crate(tcx: TyCtxt, analyses: &[Analysis]) -> ControlFlow<()> { + if stable_mir::local_crate().name == "build_script_build" { + return ControlFlow::Continue(()); + } let object_file = tcx.output_filenames(()).path(OutputType::Object); let base_path = object_file.as_path().to_path_buf(); // Use name for now to make it more friendly. Change to base_path.file_stem() to avoid conflict. @@ -96,6 +102,7 @@ fn analyze_crate(tcx: TyCtxt, analyses: &[Analysis]) -> ControlFlow<()> { Analysis::UnsafeOps => crate_stats.unsafe_operations(out_path), Analysis::FnLoops => crate_stats.loops(out_path), Analysis::Recursion => crate_stats.recursion(out_path), + Analysis::UnsafeDistance => crate_stats.unsafe_distance(out_path), } } crate_stats.store_csv(base_path, &file_stem); From a4198ac53a2c01d35de63b355fcf81f746ca7e89 Mon Sep 17 00:00:00 2001 From: "Celina G. Val" Date: Fri, 31 Jan 2025 12:42:45 -0800 Subject: [PATCH 2/2] Address PR comments --- docs/src/rustc-hacks.md | 24 +++++++------------ .../tool-scanner/scanner-test.expected | 3 ++- tests/script-based-pre/tool-scanner/test.rs | 6 +---- tools/scanner/src/analysis.rs | 6 ++--- tools/scanner/src/call_graph.rs | 6 ++++- 5 files changed, 19 insertions(+), 26 deletions(-) diff --git a/docs/src/rustc-hacks.md b/docs/src/rustc-hacks.md index 9ee5b522d894..a6047613288a 100644 --- a/docs/src/rustc-hacks.md +++ b/docs/src/rustc-hacks.md @@ -52,27 +52,19 @@ Note that pretty printing for the Rust nightly toolchain (which Kani uses) is no For example, a vector may be displayed as `vec![{...}, {...}]` on nightly Rust, when it would be displayed as `vec![Some(0), None]` on stable Rust. Hopefully, this will be fixed soon. -### RustRover / CLion +### CLion / IntelliJ This is not a great solution, but it works for now (see for more details). - -Open the `Cargo.toml` of your crate (e.g.: `kani-compiler`), and do the following: - -1. Add optional dependencies on the `rustc` crates you are using. -2. Add a feature that enable those dependencies. -3. Toggle that feature using the IDE GUI. - -Here is an example: +Edit the `Cargo.toml` of the package that you're working on and add artificial dependencies on the `rustc` packages that you would like to explore. ```toml -# ** At the bottom of the dependencies section: ** +# This configuration doesn't exist so it shouldn't affect your build. +[target.'cfg(KANI_DEV)'.dependencies] # Adjust the path here to point to a local copy of the rust compiler. -# E.g.: ~/.rustup/toolchains//lib/rustlib/rustc-src/rust/compiler -rustc_smir = { path = "/rustc_smir", optional = true } -stable_mir = { path = "/stable_mir", optional = true } - -[features] -clion = ['rustc_smir', 'stable_mir'] +# The best way is to use the rustup path. Replace with the +# proper name to your toolchain. +rustc_driver = { path = "~/.rustup/toolchains//lib/rustlib/rustc-src/rust/compiler/rustc_driver" } +rustc_interface = { path = "~/.rustup/toolchains//lib/rustlib/rustc-src/rust/compiler/rustc_interface" } ``` **Don't forget to rollback the changes before you create your PR.** diff --git a/tests/script-based-pre/tool-scanner/scanner-test.expected b/tests/script-based-pre/tool-scanner/scanner-test.expected index 7e5b3875979d..44c9704c3b98 100644 --- a/tests/script-based-pre/tool-scanner/scanner-test.expected +++ b/tests/script-based-pre/tool-scanner/scanner-test.expected @@ -1,6 +1,7 @@ 5 test_scan_fn_loops.csv 20 test_scan_functions.csv 5 test_scan_input_tys.csv -16 test_scan_overall.csv +17 test_scan_overall.csv 3 test_scan_recursion.csv +9 test_scan_unsafe_distance.csv 6 test_scan_unsafe_ops.csv diff --git a/tests/script-based-pre/tool-scanner/test.rs b/tests/script-based-pre/tool-scanner/test.rs index 0cdebde134f2..be45a653d55e 100644 --- a/tests/script-based-pre/tool-scanner/test.rs +++ b/tests/script-based-pre/tool-scanner/test.rs @@ -14,11 +14,6 @@ pub fn generic() -> T { T::default() } -pub fn blah() { - ok(); - assert_eq!(u8::default(), 0); -} - pub struct RecursiveType { pub inner: Option<*const RecursiveType>, } @@ -112,6 +107,7 @@ extern "C" { fn external_function(); } +/// Ensure scanner finds unsafe calls to external functions. pub fn call_external() { unsafe { external_function() }; } diff --git a/tools/scanner/src/analysis.rs b/tools/scanner/src/analysis.rs index db90fd6b2fd9..2e8cdd0203f0 100644 --- a/tools/scanner/src/analysis.rs +++ b/tools/scanner/src/analysis.rs @@ -1,7 +1,7 @@ // Copyright Kani Contributors // SPDX-License-Identifier: Apache-2.0 OR MIT -//! Provide different static analysis to be performed in the crate under compilation +//! Provide passes that perform intra-function analysis on the crate under compilation use crate::info; use csv::WriterBuilder; @@ -251,11 +251,11 @@ macro_rules! fn_props { } impl $name { - pub const fn num_props() -> usize { + $vis const fn num_props() -> usize { [$(stringify!($prop),)+].len() } - pub fn new(fn_name: String) -> Self { + $vis fn new(fn_name: String) -> Self { Self { fn_name, $($prop: 0,)+} } } diff --git a/tools/scanner/src/call_graph.rs b/tools/scanner/src/call_graph.rs index 81d81da317cb..0e2d1760492a 100644 --- a/tools/scanner/src/call_graph.rs +++ b/tools/scanner/src/call_graph.rs @@ -1,7 +1,11 @@ // Copyright Kani Contributors // SPDX-License-Identifier: Apache-2.0 OR MIT -//! Provide different static analysis to be performed in the call graph +//! Provide passes that perform inter-function analysis on the crate under compilation. +//! +//! This module also includes a `CallGraph` structure to help the analysis. +//! For now, we build the CallGraph as part of the pass, but as we add more analysis, +//! the call-graph could be reused by different analysis. use crate::analysis::{FnCallVisitor, FnUnsafeOperations, OverallStats}; use stable_mir::mir::{MirVisitor, Safety};