diff --git a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs index 6bf237b8ed5df..63afb236a6375 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs @@ -201,6 +201,7 @@ provide! { <'tcx> tcx, def_id, other, cdata, const_param_default => { table } thir_abstract_const => { table } optimized_mir => { table } + optimized_mir_summary => { table } mir_for_ctfe => { table } promoted_mir => { table } def_span => { table } diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 50d983754e89c..4e6451d687c5b 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -1352,6 +1352,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { debug!("EntryBuilder::encode_mir({:?})", def_id); if encode_opt { record!(self.tables.optimized_mir[def_id.to_def_id()] <- self.tcx.optimized_mir(def_id)); + record!(self.tables.optimized_mir_summary[def_id.to_def_id()] <- self.tcx.optimized_mir_summary(def_id)); } if encode_const { record!(self.tables.mir_for_ctfe[def_id.to_def_id()] <- self.tcx.mir_for_ctfe(def_id)); diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index 0f291f9264777..9fac218636986 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -360,6 +360,7 @@ define_tables! { optimized_mir: Table>>, mir_for_ctfe: Table>>, promoted_mir: Table>>>, + optimized_mir_summary: Table>, // FIXME(compiler-errors): Why isn't this a LazyArray? thir_abstract_const: Table]>>, impl_parent: Table, diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index f7311ebdabfd9..7e8a84b2160e5 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -2900,3 +2900,10 @@ impl Location { } } } + +#[derive(Debug, Copy, Clone, HashStable, Encodable, Decodable)] +pub struct Summary { + pub inlining_cost: usize, + pub bbcount: usize, + pub diverges: bool, +} diff --git a/compiler/rustc_middle/src/query/mod.rs b/compiler/rustc_middle/src/query/mod.rs index 466a0fc25f7d1..1fe91c1a02105 100644 --- a/compiler/rustc_middle/src/query/mod.rs +++ b/compiler/rustc_middle/src/query/mod.rs @@ -431,6 +431,13 @@ rustc_queries! { separate_provide_extern } + /// Summary of `optimized_mir` to avoid decoding it when we are not planning to use it. + query optimized_mir_summary(key: DefId) -> mir::Summary { + desc { |tcx| "summarizing MIR for `{}`", tcx.def_path_str(key) } + cache_on_disk_if { key.is_local() } + separate_provide_extern + } + /// Returns coverage summary info for a function, after executing the `InstrumentCoverage` /// MIR pass (assuming the -Cinstrument-coverage option is enabled). query coverageinfo(key: ty::InstanceDef<'tcx>) -> mir::CoverageInfo { diff --git a/compiler/rustc_middle/src/ty/mod.rs b/compiler/rustc_middle/src/ty/mod.rs index 31c523aaca9ae..ed036af060476 100644 --- a/compiler/rustc_middle/src/ty/mod.rs +++ b/compiler/rustc_middle/src/ty/mod.rs @@ -17,7 +17,7 @@ pub use self::IntVarValue::*; pub use self::Variance::*; use crate::metadata::ModChild; use crate::middle::privacy::AccessLevels; -use crate::mir::{Body, GeneratorLayout}; +use crate::mir::{self, Body, GeneratorLayout}; use crate::traits::{self, Reveal}; use crate::ty; use crate::ty::fast_reject::SimplifiedType; @@ -2160,6 +2160,38 @@ impl<'tcx> TyCtxt<'tcx> { } } + /// Returns the possibly-auto-generated MIR of a `(DefId, Subst)` pair. + pub fn instance_mir_summary(self, instance: ty::InstanceDef<'tcx>) -> mir::Summary { + match instance { + ty::InstanceDef::Item(def) => match self.def_kind(def.did) { + DefKind::Const + | DefKind::Static(..) + | DefKind::AssocConst + | DefKind::Ctor(..) + | DefKind::AnonConst + | DefKind::InlineConst => { + mir::Summary { inlining_cost: 0, bbcount: 0, diverges: false } + } + // If the caller wants `mir_for_ctfe` of a function they should not be using + // `instance_mir`, so we'll assume const fn also wants the optimized version. + _ => { + assert_eq!(def.const_param_did, None); + self.optimized_mir_summary(def.did) + } + }, + ty::InstanceDef::VTableShim(..) + | ty::InstanceDef::ReifyShim(..) + | ty::InstanceDef::Intrinsic(..) + | ty::InstanceDef::FnPtrShim(..) + | ty::InstanceDef::Virtual(..) + | ty::InstanceDef::ClosureOnceShim { .. } + | ty::InstanceDef::DropGlue(..) + | ty::InstanceDef::CloneShim(..) => { + mir::Summary { inlining_cost: 0, bbcount: 0, diverges: false } + } + } + } + // FIXME(@lcnr): Remove this function. pub fn get_attrs_unchecked(self, did: DefId) -> &'tcx [ast::Attribute] { if let Some(did) = did.as_local() { diff --git a/compiler/rustc_middle/src/ty/parameterized.rs b/compiler/rustc_middle/src/ty/parameterized.rs index e189ee2fc4db1..e0cf5fa2b34fa 100644 --- a/compiler/rustc_middle/src/ty/parameterized.rs +++ b/compiler/rustc_middle/src/ty/parameterized.rs @@ -54,6 +54,7 @@ trivially_parameterized_over_tcx! { crate::middle::codegen_fn_attrs::CodegenFnAttrs, crate::middle::exported_symbols::SymbolExportInfo, crate::mir::ConstQualifs, + crate::mir::Summary, ty::Generics, ty::ImplPolarity, ty::ReprOptions, diff --git a/compiler/rustc_mir_transform/src/inline.rs b/compiler/rustc_mir_transform/src/inline.rs index 1e46b0a0e8164..da261891d4de3 100644 --- a/compiler/rustc_mir_transform/src/inline.rs +++ b/compiler/rustc_mir_transform/src/inline.rs @@ -151,8 +151,7 @@ impl<'tcx> Inliner<'tcx> { let callee_attrs = self.tcx.codegen_fn_attrs(callsite.callee.def_id()); self.check_codegen_attributes(callsite, callee_attrs)?; self.check_mir_is_available(caller_body, &callsite.callee)?; - let callee_body = self.tcx.instance_mir(callsite.callee.def); - self.check_mir_body(callsite, callee_body, callee_attrs)?; + let callee_body = self.check_mir_body(callsite, callee_attrs)?; if !self.tcx.consider_optimizing(|| { format!("Inline {:?} into {:?}", callsite.callee, caller_body.source) @@ -385,14 +384,17 @@ impl<'tcx> Inliner<'tcx> { /// Returns inlining decision that is based on the examination of callee MIR body. /// Assumes that codegen attributes have been checked for compatibility already. - #[instrument(level = "debug", skip(self, callee_body))] + #[instrument(level = "debug", skip(self))] fn check_mir_body( &self, callsite: &CallSite<'tcx>, - callee_body: &Body<'tcx>, callee_attrs: &CodegenFnAttrs, - ) -> Result<(), &'static str> { - let tcx = self.tcx; + ) -> Result<&'tcx Body<'tcx>, &'static str> { + if let InlineAttr::Always = callee_attrs.inline { + debug!("INLINING {:?} because inline(always)", callsite,); + let callee_body = self.tcx.instance_mir(callsite.callee.def); + return Ok(callee_body); + } let mut threshold = if callee_attrs.requests_inline() { self.tcx.sess.opts.unstable_opts.inline_mir_hint_threshold.unwrap_or(100) @@ -400,133 +402,36 @@ impl<'tcx> Inliner<'tcx> { self.tcx.sess.opts.unstable_opts.inline_mir_threshold.unwrap_or(50) }; + let callee_summary = self.tcx.instance_mir_summary(callsite.callee.def); // Give a bonus functions with a small number of blocks, // We normally have two or three blocks for even // very small functions. - if callee_body.basic_blocks().len() <= 3 { + if callee_summary.bbcount <= 3 { threshold += threshold / 4; } + if callee_summary.diverges { + threshold = 0; + } debug!(" final inline threshold = {}", threshold); - // FIXME: Give a bonus to functions with only a single caller - let mut first_block = true; - let mut cost = 0; - - // Traverse the MIR manually so we can account for the effects of - // inlining on the CFG. - let mut work_list = vec![START_BLOCK]; - let mut visited = BitSet::new_empty(callee_body.basic_blocks().len()); - while let Some(bb) = work_list.pop() { - if !visited.insert(bb.index()) { - continue; - } - let blk = &callee_body.basic_blocks()[bb]; - - for stmt in &blk.statements { - // Don't count StorageLive/StorageDead in the inlining cost. - match stmt.kind { - StatementKind::StorageLive(_) - | StatementKind::StorageDead(_) - | StatementKind::Deinit(_) - | StatementKind::Nop => {} - _ => cost += INSTR_COST, - } - } - let term = blk.terminator(); - let mut is_drop = false; - match term.kind { - TerminatorKind::Drop { ref place, target, unwind } - | TerminatorKind::DropAndReplace { ref place, target, unwind, .. } => { - is_drop = true; - work_list.push(target); - // If the place doesn't actually need dropping, treat it like - // a regular goto. - let ty = callsite.callee.subst_mir(self.tcx, &place.ty(callee_body, tcx).ty); - if ty.needs_drop(tcx, self.param_env) { - cost += CALL_PENALTY; - if let Some(unwind) = unwind { - cost += LANDINGPAD_PENALTY; - work_list.push(unwind); - } - } else { - cost += INSTR_COST; - } - } - - TerminatorKind::Unreachable | TerminatorKind::Call { target: None, .. } - if first_block => - { - // If the function always diverges, don't inline - // unless the cost is zero - threshold = 0; - } - - TerminatorKind::Call { func: Operand::Constant(ref f), cleanup, .. } => { - if let ty::FnDef(def_id, _) = - *callsite.callee.subst_mir(self.tcx, &f.literal.ty()).kind() - { - // Don't give intrinsics the extra penalty for calls - if tcx.is_intrinsic(def_id) { - cost += INSTR_COST; - } else { - cost += CALL_PENALTY; - } - } else { - cost += CALL_PENALTY; - } - if cleanup.is_some() { - cost += LANDINGPAD_PENALTY; - } - } - TerminatorKind::Assert { cleanup, .. } => { - cost += CALL_PENALTY; - - if cleanup.is_some() { - cost += LANDINGPAD_PENALTY; - } - } - TerminatorKind::Resume => cost += RESUME_PENALTY, - TerminatorKind::InlineAsm { cleanup, .. } => { - cost += INSTR_COST; - - if cleanup.is_some() { - cost += LANDINGPAD_PENALTY; - } - } - _ => cost += INSTR_COST, - } - - if !is_drop { - for succ in term.successors() { - work_list.push(succ); - } - } - - first_block = false; + // Fast reject based on unsubstituted MIR. + if callee_summary.inlining_cost >= 2 * threshold { + return Err("summary cost above threshold"); } - // Count up the cost of local variables and temps, if we know the size - // use that, otherwise we use a moderately-large dummy cost. - - let ptr_size = tcx.data_layout.pointer_size.bytes(); + let callee_body = self.tcx.instance_mir(callsite.callee.def); + let cost_info = body_cost(self.tcx, self.param_env, callee_body, |ty| { + callsite.callee.subst_mir(self.tcx, &ty) + }); - for v in callee_body.vars_and_temps_iter() { - let ty = callsite.callee.subst_mir(self.tcx, &callee_body.local_decls[v].ty); - // Cost of the var is the size in machine-words, if we know - // it. - if let Some(size) = type_size_of(tcx, self.param_env, ty) { - cost += ((size + ptr_size - 1) / ptr_size) as usize; - } else { - cost += UNKNOWN_SIZE_COST; - } - } + let cost = cost_info.cost; if let InlineAttr::Always = callee_attrs.inline { debug!("INLINING {:?} because inline(always) [cost={}]", callsite, cost); - Ok(()) + Ok(callee_body) } else if cost <= threshold { debug!("INLINING {:?} [cost={} <= threshold={}]", callsite, cost, threshold); - Ok(()) + Ok(callee_body) } else { debug!("NOT inlining {:?} [cost={} > threshold={}]", callsite, cost, threshold); Err("cost above threshold") @@ -1012,3 +917,130 @@ impl<'tcx> MutVisitor<'tcx> for Integrator<'_, 'tcx> { } } } + +pub struct InlineCostInfo { + pub cost: usize, + pub bbcount: usize, + pub diverges: bool, +} + +pub fn body_cost<'tcx>( + tcx: TyCtxt<'tcx>, + param_env: ty::ParamEnv<'tcx>, + callee_body: &Body<'tcx>, + subst_mir: impl Fn(Ty<'tcx>) -> Ty<'tcx>, +) -> InlineCostInfo { + // FIXME: Give a bonus to functions with only a single caller + let mut diverges = false; + let mut first_block = true; + let mut cost = 0; + + // Traverse the MIR manually so we can account for the effects of + // inlining on the CFG. + let mut work_list = vec![START_BLOCK]; + let mut visited = BitSet::new_empty(callee_body.basic_blocks().len()); + while let Some(bb) = work_list.pop() { + if !visited.insert(bb.index()) { + continue; + } + let blk = &callee_body.basic_blocks()[bb]; + + for stmt in &blk.statements { + // Don't count StorageLive/StorageDead in the inlining cost. + match stmt.kind { + StatementKind::StorageLive(_) + | StatementKind::StorageDead(_) + | StatementKind::Deinit(_) + | StatementKind::Nop => {} + _ => cost += INSTR_COST, + } + } + let term = blk.terminator(); + let mut is_drop = false; + match term.kind { + TerminatorKind::Drop { ref place, target, unwind } + | TerminatorKind::DropAndReplace { ref place, target, unwind, .. } => { + is_drop = true; + work_list.push(target); + // If the place doesn't actually need dropping, treat it like + // a regular goto. + let ty = subst_mir(place.ty(callee_body, tcx).ty); + if ty.needs_drop(tcx, param_env) { + cost += CALL_PENALTY; + if let Some(unwind) = unwind { + cost += LANDINGPAD_PENALTY; + work_list.push(unwind); + } + } else { + cost += INSTR_COST; + } + } + + TerminatorKind::Unreachable | TerminatorKind::Call { target: None, .. } + if first_block => + { + // If the function always diverges, don't inline + // unless the cost is zero + diverges = true; + } + + TerminatorKind::Call { func: Operand::Constant(ref f), cleanup, .. } => { + if let ty::FnDef(def_id, _) = *subst_mir(f.literal.ty()).kind() { + // Don't give intrinsics the extra penalty for calls + if tcx.is_intrinsic(def_id) { + cost += INSTR_COST; + } else { + cost += CALL_PENALTY; + } + } else { + cost += CALL_PENALTY; + } + if cleanup.is_some() { + cost += LANDINGPAD_PENALTY; + } + } + TerminatorKind::Assert { cleanup, .. } => { + cost += CALL_PENALTY; + + if cleanup.is_some() { + cost += LANDINGPAD_PENALTY; + } + } + TerminatorKind::Resume => cost += RESUME_PENALTY, + TerminatorKind::InlineAsm { cleanup, .. } => { + cost += INSTR_COST; + + if cleanup.is_some() { + cost += LANDINGPAD_PENALTY; + } + } + _ => cost += INSTR_COST, + } + + if !is_drop { + for succ in term.successors() { + work_list.push(succ); + } + } + + first_block = false; + } + + // Count up the cost of local variables and temps, if we know the size + // use that, otherwise we use a moderately-large dummy cost. + + let ptr_size = tcx.data_layout.pointer_size.bytes(); + + for v in callee_body.vars_and_temps_iter() { + let ty = subst_mir(callee_body.local_decls[v].ty); + // Cost of the var is the size in machine-words, if we know + // it. + if let Some(size) = type_size_of(tcx, param_env, ty) { + cost += ((size + ptr_size - 1) / ptr_size) as usize; + } else { + cost += UNKNOWN_SIZE_COST; + } + } + + InlineCostInfo { cost, diverges, bbcount: callee_body.basic_blocks().len() } +} diff --git a/compiler/rustc_mir_transform/src/lib.rs b/compiler/rustc_mir_transform/src/lib.rs index 40dc9fe9a05bb..842682e817485 100644 --- a/compiler/rustc_mir_transform/src/lib.rs +++ b/compiler/rustc_mir_transform/src/lib.rs @@ -26,7 +26,7 @@ use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_hir::intravisit::{self, Visitor}; use rustc_index::vec::IndexVec; use rustc_middle::mir::visit::Visitor as _; -use rustc_middle::mir::{traversal, Body, ConstQualifs, MirPass, MirPhase, Promoted}; +use rustc_middle::mir::{traversal, Body, ConstQualifs, MirPass, MirPhase, Promoted, Summary}; use rustc_middle::ty::query::Providers; use rustc_middle::ty::{self, TyCtxt, TypeVisitable}; use rustc_span::{Span, Symbol}; @@ -120,6 +120,7 @@ pub fn provide(providers: &mut Providers) { mir_for_ctfe, mir_for_ctfe_of_const_arg, optimized_mir, + optimized_mir_summary, is_mir_available, is_ctfe_mir_available: |tcx, did| is_mir_available(tcx, did), mir_callgraph_reachable: inline::cycle::mir_callgraph_reachable, @@ -573,3 +574,11 @@ fn promoted_mir<'tcx>( tcx.arena.alloc(promoted) } + +fn optimized_mir_summary<'tcx>(tcx: TyCtxt<'tcx>, did: DefId) -> Summary { + let body = tcx.optimized_mir(did); + let param_env = tcx.param_env_reveal_all_normalized(did); + let cost_info = inline::body_cost(tcx, param_env, body, |ty| ty); + let inline::InlineCostInfo { cost, bbcount, diverges } = cost_info; + Summary { inlining_cost: cost, bbcount, diverges } +}