Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Analyze unsafe code reachability #3546

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions scripts/std-analysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ export RUSTC_LOG=error
RUST_FLAGS=(
"-Cpanic=abort"
"-Zalways-encode-mir"
"--emit=mir"
)
export RUSTFLAGS="${RUST_FLAGS[@]}"
export RUSTC="$KANI_DIR/target/debug/scan"
Expand Down
7 changes: 4 additions & 3 deletions tests/script-based-pre/tool-scanner/scanner-test.expected
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
5 test_scan_fn_loops.csv
19 test_scan_functions.csv
20 test_scan_functions.csv
5 test_scan_input_tys.csv
16 test_scan_overall.csv
17 test_scan_overall.csv
3 test_scan_recursion.csv
5 test_scan_unsafe_ops.csv
9 test_scan_unsafe_distance.csv
6 test_scan_unsafe_ops.csv
2 changes: 1 addition & 1 deletion tests/script-based-pre/tool-scanner/scanner-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ cargo run -p scanner test.rs --crate-type lib
wc -l *csv

popd
rm -rf ${OUT_DIR}
#rm -rf ${OUT_DIR}
celinval marked this conversation as resolved.
Show resolved Hide resolved
9 changes: 9 additions & 0 deletions tests/script-based-pre/tool-scanner/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,12 @@ pub fn start_recursion() {
pub fn not_recursive() {
let _ = ok();
}

extern "C" {
fn external_function();
}

/// Ensure scanner finds unsafe calls to external functions.
pub fn call_external() {
unsafe { external_function() };
}
86 changes: 70 additions & 16 deletions tools/scanner/src/analysis.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Provide different static analysis to be performed in the crate under compilation
//! Provide passes that perform intra-function analysis on the crate under compilation

use crate::info;
use csv::WriterBuilder;
Expand All @@ -11,9 +11,10 @@ use serde::{Serialize, Serializer, ser::SerializeStruct};
use stable_mir::mir::mono::Instance;
use stable_mir::mir::visit::{Location, PlaceContext, PlaceRef};
use stable_mir::mir::{
BasicBlock, Body, MirVisitor, Mutability, ProjectionElem, Safety, Terminator, TerminatorKind,
BasicBlock, Body, CastKind, MirVisitor, Mutability, NonDivergingIntrinsic, ProjectionElem,
Rvalue, Safety, Statement, StatementKind, Terminator, TerminatorKind,
};
use stable_mir::ty::{AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind};
use stable_mir::ty::{Abi, AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind};
use stable_mir::visitor::{Visitable, Visitor};
use stable_mir::{CrateDef, CrateItem};
use std::collections::{HashMap, HashSet};
Expand All @@ -23,7 +24,7 @@ use std::path::{Path, PathBuf};
#[derive(Clone, Debug)]
pub struct OverallStats {
/// The key and value of each counter.
counters: Vec<(&'static str, usize)>,
pub counters: Vec<(&'static str, usize)>,
/// TODO: Group stats per function.
fn_stats: HashMap<CrateItem, FnStats>,
}
Expand All @@ -35,6 +36,12 @@ struct FnStats {
has_unsafe_ops: Option<bool>,
has_unsupported_input: Option<bool>,
has_loop: Option<bool>,
/// How many degrees of separation to unsafe code if any?
/// - `None` if this function is indeed safe.
/// - 0 if this function contains unsafe code (including invoking unsafe fns).
/// - 1 if this function calls a safe abstraction.
/// - 2+ if this function calls other functions that call safe abstractions.
unsafe_distance: Option<usize>,
}

impl FnStats {
Expand All @@ -46,6 +53,7 @@ impl FnStats {
has_unsupported_input: None,
// TODO: Implement this.
has_loop: None,
unsafe_distance: None,
}
}
}
Expand Down Expand Up @@ -230,24 +238,24 @@ impl OverallStats {

macro_rules! fn_props {
($(#[$attr:meta])*
struct $name:ident {
$vis:vis struct $name:ident {
$(
$(#[$prop_attr:meta])*
$prop:ident,
)+
}) => {
#[derive(Debug)]
struct $name {
$vis struct $name {
fn_name: String,
$($(#[$prop_attr])* $prop: usize,)+
}

impl $name {
const fn num_props() -> usize {
$vis const fn num_props() -> usize {
[$(stringify!($prop),)+].len()
}

fn new(fn_name: String) -> Self {
$vis fn new(fn_name: String) -> Self {
Self { fn_name, $($prop: 0,)+}
}
}
Expand Down Expand Up @@ -367,7 +375,7 @@ impl Visitor for TypeVisitor<'_> {
}
}

fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
pub(crate) fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
out_path.set_extension("csv");
info(format!("Write file: {out_path:?}"));
let mut writer = WriterBuilder::new().delimiter(b';').from_path(&out_path).unwrap();
Expand All @@ -377,17 +385,23 @@ fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
}

fn_props! {
struct FnUnsafeOperations {
pub struct FnUnsafeOperations {
inline_assembly,
/// Dereference a raw pointer.
/// This is also counted when we access a static variable since it gets translated to a raw pointer.
unsafe_dereference,
/// Call an unsafe function or method.
/// Call an unsafe function or method including C-FFI.
unsafe_call,
/// Access or modify a mutable static variable.
unsafe_static_access,
/// Access fields of unions.
unsafe_union_access,
/// Invoke external functions (this is a subset of `unsafe_call`.
extern_call,
/// Transmute operations.
transmute,
/// Cast raw pointer to reference.
unsafe_cast,
}
}

Expand Down Expand Up @@ -417,9 +431,21 @@ impl MirVisitor for BodyVisitor<'_> {
fn visit_terminator(&mut self, term: &Terminator, location: Location) {
match &term.kind {
TerminatorKind::Call { func, .. } => {
let fn_sig = func.ty(self.body.locals()).unwrap().kind().fn_sig().unwrap();
if fn_sig.value.safety == Safety::Unsafe {
let TyKind::RigidTy(RigidTy::FnDef(fn_def, _)) =
func.ty(self.body.locals()).unwrap().kind()
else {
return self.super_terminator(term, location);
};
let fn_sig = fn_def.fn_sig().skip_binder();
if fn_sig.safety == Safety::Unsafe {
self.props.unsafe_call += 1;
if !matches!(
fn_sig.abi,
Abi::Rust | Abi::RustCold | Abi::RustCall | Abi::RustIntrinsic
) && !fn_def.has_body()
{
self.props.extern_call += 1;
}
}
}
TerminatorKind::InlineAsm { .. } => self.props.inline_assembly += 1,
Expand All @@ -428,6 +454,34 @@ impl MirVisitor for BodyVisitor<'_> {
self.super_terminator(term, location)
}

fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) {
if let Rvalue::Cast(cast_kind, operand, ty) = rvalue {
match cast_kind {
CastKind::Transmute => {
self.props.transmute += 1;
}
_ => {
let operand_ty = operand.ty(self.body.locals()).unwrap();
if ty.kind().is_ref() && operand_ty.kind().is_raw_ptr() {
self.props.unsafe_cast += 1;
}
}
}
};
self.super_rvalue(rvalue, location);
}

fn visit_statement(&mut self, stmt: &Statement, location: Location) {
if matches!(
&stmt.kind,
StatementKind::Intrinsic(NonDivergingIntrinsic::CopyNonOverlapping(_))
) {
// Treat this as invoking the copy intrinsic.
self.props.unsafe_call += 1;
}
self.super_statement(stmt, location)
}

fn visit_projection_elem(
&mut self,
place: PlaceRef,
Expand Down Expand Up @@ -672,9 +726,9 @@ impl Recursion {
}
}

struct FnCallVisitor<'a> {
body: &'a Body,
fns: Vec<FnDef>,
pub struct FnCallVisitor<'a> {
pub body: &'a Body,
pub fns: Vec<FnDef>,
}

impl MirVisitor for FnCallVisitor<'_> {
Expand Down
105 changes: 105 additions & 0 deletions tools/scanner/src/call_graph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Provide passes that perform inter-function analysis on the crate under compilation.
//!
//! This module also includes a `CallGraph` structure to help the analysis.
//! For now, we build the CallGraph as part of the pass, but as we add more analysis,
//! the call-graph could be reused by different analysis.

use crate::analysis::{FnCallVisitor, FnUnsafeOperations, OverallStats};
use stable_mir::mir::{MirVisitor, Safety};
use stable_mir::ty::{FnDef, RigidTy, Ty, TyKind};
use stable_mir::{CrateDef, CrateDefType};
use std::collections::hash_map::Entry;
use std::collections::{HashMap, VecDeque};
use std::hash::{Hash, Hasher};
use std::path::PathBuf;

impl OverallStats {
/// Iterate over all functions defined in this crate and log any unsafe operation.
pub fn unsafe_distance(&mut self, filename: PathBuf) {
let all_items = stable_mir::all_local_items();
let mut queue =
all_items.into_iter().filter_map(|item| Node::try_new(item.ty())).collect::<Vec<_>>();
// Build call graph
let mut call_graph = CallGraph::default();
while let Some(node) = queue.pop() {
if let Entry::Vacant(e) = call_graph.nodes.entry(node.def) {
e.insert(node);
let Some(body) = node.def.body() else {
continue;
};
let mut visitor = FnCallVisitor { body: &body, fns: vec![] };
visitor.visit_body(&body);
queue.extend(visitor.fns.iter().map(|def| Node::try_new(def.ty()).unwrap()));
for callee in &visitor.fns {
call_graph.rev_edges.entry(*callee).or_default().push(node.def)
}
call_graph.edges.insert(node.def, visitor.fns);
}
}

// Calculate the distance between unsafe functions and functions with unsafe operation.
let mut queue = call_graph
.nodes
.values()
.filter_map(|node| node.has_unsafe.then_some((node.def, 0)))
.collect::<VecDeque<_>>();
let mut visited: HashMap<FnDef, u16> = HashMap::from_iter(queue.iter().cloned());
while let Some(current) = queue.pop_front() {
for caller in call_graph.rev_edges.entry(current.0).or_default() {
if !visited.contains_key(caller) {
let distance = current.1 + 1;
visited.insert(*caller, distance);
queue.push_back((*caller, distance))
}
}
}
let krate = stable_mir::local_crate();
let transitive_unsafe = visited
.into_iter()
.filter_map(|(def, distance)| (def.krate() == krate).then_some((def.name(), distance)))
.collect::<Vec<_>>();
self.counters.push(("transitive_unsafe", transitive_unsafe.len()));
crate::analysis::dump_csv(filename, &transitive_unsafe);
}
}

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
struct Node {
def: FnDef,
is_unsafe: bool,
has_unsafe: bool,
}

impl Node {
fn try_new(ty: Ty) -> Option<Node> {
let kind = ty.kind();
let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind else {
return None;
};
let has_unsafe = if let Some(body) = def.body() {
let unsafe_ops = FnUnsafeOperations::new(def.name()).collect(&body);
unsafe_ops.has_unsafe()
} else {
true
};
let fn_sig = kind.fn_sig().unwrap();
let is_unsafe = fn_sig.skip_binder().safety == Safety::Unsafe;
Some(Node { def, is_unsafe, has_unsafe })
}
}

impl Hash for Node {
fn hash<H: Hasher>(&self, state: &mut H) {
self.def.hash(state)
}
}

#[derive(Default, Debug)]
struct CallGraph {
nodes: HashMap<FnDef, Node>,
edges: HashMap<FnDef, Vec<FnDef>>,
rev_edges: HashMap<FnDef, Vec<FnDef>>,
}
9 changes: 8 additions & 1 deletion tools/scanner/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

#![feature(rustc_private)]

mod analysis;
pub mod analysis;
pub mod call_graph;

extern crate rustc_driver;
extern crate rustc_interface;
Expand Down Expand Up @@ -65,6 +66,8 @@ pub enum Analysis {
FnLoops,
/// Collect information about recursion via direct calls.
Recursion,
/// Collect information about transitive usage of unsafe.
UnsafeDistance,
}

fn info(msg: String) {
Expand All @@ -75,6 +78,9 @@ fn info(msg: String) {

/// This function invoke the required analyses in the given order.
fn analyze_crate(tcx: TyCtxt, analyses: &[Analysis]) -> ControlFlow<()> {
if stable_mir::local_crate().name == "build_script_build" {
return ControlFlow::Continue(());
}
let object_file = tcx.output_filenames(()).path(OutputType::Object);
let base_path = object_file.as_path().to_path_buf();
// Use name for now to make it more friendly. Change to base_path.file_stem() to avoid conflict.
Expand All @@ -96,6 +102,7 @@ fn analyze_crate(tcx: TyCtxt, analyses: &[Analysis]) -> ControlFlow<()> {
Analysis::UnsafeOps => crate_stats.unsafe_operations(out_path),
Analysis::FnLoops => crate_stats.loops(out_path),
Analysis::Recursion => crate_stats.recursion(out_path),
Analysis::UnsafeDistance => crate_stats.unsafe_distance(out_path),
}
}
crate_stats.store_csv(base_path, &file_stem);
Expand Down
Loading