-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Auto merge of #8349 - ehuss:fix-lto, r=alexcrichton
Some LTO fixes. This reworks the LTO computation a little to address a few issues: - `cargo build` in a project with both a lib and bin would not engage the optimization introduced in #8192 where the lib *should* be compiled with `-C linker-plugin-lto` (bitcode only). This happened because the old code was starting root units as `Lto::None`. The solution here is to conditionally choose the starting Lto for roots. - A project with a dylib dependency would fail to build. It was building the dylib with `-C linker-plugin-lto` which is not valid. - A project with a bin/lib would build the lib differently based on whether or not it was selected. This changes it so that the lib is built the same. See `lto::between_builds`, where the second build the lib is now fresh. - Tests/benchmarks of a `lib` target will now support LTO. - Treats example libs a little more consistently as regular libs. I scattered some comments throughout, hopefully it's not too difficult to follow. Closes #8337
- Loading branch information
Showing
5 changed files
with
481 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,131 +1,192 @@ | ||
use crate::core::compiler::{Context, Unit}; | ||
use crate::core::compiler::{CompileMode, Context, CrateType, Unit}; | ||
use crate::core::interning::InternedString; | ||
use crate::core::profiles; | ||
use crate::core::TargetKind; | ||
|
||
use crate::util::errors::CargoResult; | ||
use std::collections::hash_map::{Entry, HashMap}; | ||
|
||
/// Possible ways to run rustc and request various parts of LTO. | ||
#[derive(Copy, Clone, PartialEq, Eq, Hash)] | ||
/// | ||
/// Variant | Flag | Object Code | Bitcode | ||
/// -------------------|------------------------|-------------|-------- | ||
/// `Run` | `-C lto=foo` | n/a | n/a | ||
/// `Off` | `-C lto=off` | n/a | n/a | ||
/// `OnlyBitcode` | `-C linker-plugin-lto` | | ✓ | ||
/// `ObjectAndBitcode` | | ✓ | ✓ | ||
/// `OnlyObject` | `-C embed-bitcode=no` | ✓ | | ||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] | ||
pub enum Lto { | ||
/// LTO is run for this rustc, and it's `-Clto=foo` where `foo` is optional. | ||
/// LTO is run for this rustc, and it's `-Clto=foo`. If the given value is | ||
/// None, that corresponds to `-Clto` with no argument, which means do | ||
/// "fat" LTO. | ||
Run(Option<InternedString>), | ||
|
||
/// This rustc invocation only needs to produce bitcode, there's no need to | ||
/// produce object files, so we can pass `-Clinker-plugin-lto` | ||
/// LTO has been explicitly listed as "off". This means no thin-local-LTO, | ||
/// no LTO anywhere, I really mean it! | ||
Off, | ||
|
||
/// This rustc invocation only needs to produce bitcode (it is *only* used | ||
/// for LTO), there's no need to produce object files, so we can pass | ||
/// `-Clinker-plugin-lto` | ||
OnlyBitcode, | ||
|
||
/// This rustc invocation needs to embed bitcode in object files. This means | ||
/// that object files may be used for a normal link, and the crate may be | ||
/// loaded for LTO later, so both are required. | ||
EmbedBitcode, | ||
ObjectAndBitcode, | ||
|
||
/// Nothing related to LTO is required of this compilation. | ||
None, | ||
/// This should not include bitcode. This is primarily to reduce disk | ||
/// space usage. | ||
OnlyObject, | ||
} | ||
|
||
pub fn generate(cx: &mut Context<'_, '_>) -> CargoResult<()> { | ||
let mut map = HashMap::new(); | ||
for unit in cx.bcx.roots.iter() { | ||
calculate(cx, &mut map, unit, Lto::None)?; | ||
let root_lto = match unit.profile.lto { | ||
// LTO not requested, no need for bitcode. | ||
profiles::Lto::Bool(false) | profiles::Lto::Off => Lto::OnlyObject, | ||
_ => { | ||
let crate_types = unit.target.rustc_crate_types(); | ||
if unit.target.for_host() { | ||
Lto::OnlyObject | ||
} else if needs_object(&crate_types) { | ||
lto_when_needs_object(&crate_types) | ||
} else { | ||
// This may or may not participate in LTO, let's start | ||
// with the minimum requirements. This may be expanded in | ||
// `calculate` below if necessary. | ||
Lto::OnlyBitcode | ||
} | ||
} | ||
}; | ||
calculate(cx, &mut map, unit, root_lto)?; | ||
} | ||
cx.lto = map; | ||
Ok(()) | ||
} | ||
|
||
/// Whether or not any of these crate types need object code. | ||
fn needs_object(crate_types: &[CrateType]) -> bool { | ||
crate_types.iter().any(|k| k.can_lto() || k.is_dynamic()) | ||
} | ||
|
||
/// Lto setting to use when this unit needs object code. | ||
fn lto_when_needs_object(crate_types: &[CrateType]) -> Lto { | ||
if crate_types.iter().any(CrateType::can_lto) { | ||
// A mixed rlib/cdylib whose parent is running LTO. This | ||
// needs both, for bitcode in the rlib (for LTO) and the | ||
// cdylib requires object code. | ||
Lto::ObjectAndBitcode | ||
} else { | ||
// A dylib whose parent is running LTO. rustc currently | ||
// doesn't support LTO with dylibs, so bitcode is not | ||
// needed. | ||
Lto::OnlyObject | ||
} | ||
} | ||
|
||
fn calculate( | ||
cx: &Context<'_, '_>, | ||
map: &mut HashMap<Unit, Lto>, | ||
unit: &Unit, | ||
lto_for_deps: Lto, | ||
parent_lto: Lto, | ||
) -> CargoResult<()> { | ||
let (lto, lto_for_deps) = if unit.target.for_host() { | ||
let crate_types = match unit.mode { | ||
// Note: Doctest ignores LTO, but for now we'll compute it as-if it is | ||
// a Bin, in case it is ever supported in the future. | ||
CompileMode::Test | CompileMode::Bench | CompileMode::Doctest => vec![CrateType::Bin], | ||
// Notes on other modes: | ||
// - Check: Treat as the underlying type, it doesn't really matter. | ||
// - Doc: LTO is N/A for the Doc unit itself since rustdoc does not | ||
// support codegen flags. We still compute the dependencies, which | ||
// are mostly `Check`. | ||
// - RunCustomBuild is ignored because it is always "for_host". | ||
_ => unit.target.rustc_crate_types(), | ||
}; | ||
// LTO can only be performed if *all* of the crate types support it. | ||
// For example, a cdylib/rlib combination won't allow LTO. | ||
let all_lto_types = crate_types.iter().all(CrateType::can_lto); | ||
// Compute the LTO based on the profile, and what our parent requires. | ||
let lto = if unit.target.for_host() { | ||
// Disable LTO for host builds since we only really want to perform LTO | ||
// for the final binary, and LTO on plugins/build scripts/proc macros is | ||
// largely not desired. | ||
(Lto::None, Lto::None) | ||
} else if unit.target.is_linkable() { | ||
// A "linkable" target is one that produces and rlib or dylib in this | ||
// case. In this scenario we cannot pass `-Clto` to the compiler because | ||
// that is an invalid request, this is simply a dependency. What we do, | ||
// however, is respect the request for whatever dependencies need to | ||
// have. | ||
// | ||
// Here if no LTO is requested then we keep it turned off. Otherwise LTO | ||
// is requested in some form, which means ideally we need just what's | ||
// requested, nothing else. It's possible, though, to have libraries | ||
// which are both a cdylib and and rlib, for example, which means that | ||
// object files are getting sent to the linker. That means that we need | ||
// to fully embed bitcode rather than simply generating just bitcode. | ||
let has_non_linkable_lib = match unit.target.kind() { | ||
TargetKind::Lib(kinds) => kinds.iter().any(|k| !k.is_linkable()), | ||
_ => true, | ||
}; | ||
match lto_for_deps { | ||
Lto::None => (Lto::None, Lto::None), | ||
_ if has_non_linkable_lib => (Lto::EmbedBitcode, Lto::EmbedBitcode), | ||
other => (other, other), | ||
Lto::OnlyObject | ||
} else if all_lto_types { | ||
// Note that this ignores the `parent_lto` because this isn't a | ||
// linkable crate type; this unit is not being embedded in the parent. | ||
match unit.profile.lto { | ||
profiles::Lto::Named(s) => Lto::Run(Some(s)), | ||
profiles::Lto::Off => Lto::Off, | ||
profiles::Lto::Bool(true) => Lto::Run(None), | ||
profiles::Lto::Bool(false) => Lto::OnlyObject, | ||
} | ||
} else { | ||
// Otherwise this target can perform LTO and we're going to read the | ||
// LTO value out of the profile. Note that we ignore `lto_for_deps` | ||
// here because if a unit depends on another unit than can LTO this | ||
// isn't a rustc-level dependency but rather a Cargo-level dependency. | ||
// For example this is an integration test depending on a binary. | ||
match unit.profile.lto { | ||
profiles::Lto::Named(s) => match s.as_str() { | ||
"n" | "no" | "off" => (Lto::Run(Some(s)), Lto::None), | ||
_ => (Lto::Run(Some(s)), Lto::OnlyBitcode), | ||
}, | ||
profiles::Lto::Bool(true) => (Lto::Run(None), Lto::OnlyBitcode), | ||
profiles::Lto::Bool(false) => (Lto::None, Lto::None), | ||
match (parent_lto, needs_object(&crate_types)) { | ||
// An rlib whose parent is running LTO, we only need bitcode. | ||
(Lto::Run(_), false) => Lto::OnlyBitcode, | ||
// LTO when something needs object code. | ||
(Lto::Run(_), true) | (Lto::OnlyBitcode, true) => lto_when_needs_object(&crate_types), | ||
// LTO is disabled, no need for bitcode. | ||
(Lto::Off, _) => Lto::OnlyObject, | ||
// If this doesn't have any requirements, or the requirements are | ||
// already satisfied, then stay with our parent. | ||
(_, false) | (Lto::OnlyObject, true) | (Lto::ObjectAndBitcode, true) => parent_lto, | ||
} | ||
}; | ||
|
||
match map.entry(unit.clone()) { | ||
// Merge the computed LTO. If this unit appears multiple times in the | ||
// graph, the merge may expand the requirements. | ||
let merged_lto = match map.entry(unit.clone()) { | ||
// If we haven't seen this unit before then insert our value and keep | ||
// going. | ||
Entry::Vacant(v) => { | ||
v.insert(lto); | ||
} | ||
Entry::Vacant(v) => *v.insert(lto), | ||
|
||
Entry::Occupied(mut v) => { | ||
let result = match (lto, v.get()) { | ||
// No change in requirements. | ||
(Lto::OnlyBitcode, Lto::OnlyBitcode) => Lto::OnlyBitcode, | ||
(Lto::OnlyObject, Lto::OnlyObject) => Lto::OnlyObject, | ||
|
||
// Once we're running LTO we keep running LTO. We should always | ||
// calculate the same thing here each iteration because if we | ||
// see this twice then it means, for example, two unit tests | ||
// depend on a binary, which is normal. | ||
(Lto::Run(s), _) | (_, &Lto::Run(s)) => Lto::Run(s), | ||
|
||
// If we calculated the same thing as before then we can bail | ||
// out quickly. | ||
(Lto::OnlyBitcode, Lto::OnlyBitcode) | (Lto::None, Lto::None) => return Ok(()), | ||
// Off means off! This has the same reasoning as `Lto::Run`. | ||
(Lto::Off, _) | (_, Lto::Off) => Lto::Off, | ||
|
||
// Once a target has requested both, that's the maximal amount | ||
// of work that can be done, so we just keep doing that work. | ||
(Lto::ObjectAndBitcode, _) | (_, Lto::ObjectAndBitcode) => Lto::ObjectAndBitcode, | ||
|
||
// Upgrade so that both requirements can be met. | ||
// | ||
// This is where the trickiness happens. This unit needs | ||
// bitcode and the previously calculated value for this unit | ||
// says it didn't need bitcode (or vice versa). This means that | ||
// we're a shared dependency between some targets which require | ||
// LTO and some which don't. This means that instead of being | ||
// either only-objects or only-bitcode we have to embed both in | ||
// rlibs (used for different compilations), so we switch to | ||
// embedding bitcode. | ||
(Lto::OnlyBitcode, Lto::None) | (Lto::None, Lto::OnlyBitcode) => Lto::EmbedBitcode, | ||
|
||
// Once a target has requested bitcode embedding that's the | ||
// maximal amount of work that can be done, so we just keep | ||
// doing that work. | ||
(Lto::EmbedBitcode, _) | (_, Lto::EmbedBitcode) => Lto::EmbedBitcode, | ||
// including both. | ||
(Lto::OnlyObject, Lto::OnlyBitcode) | (Lto::OnlyBitcode, Lto::OnlyObject) => { | ||
Lto::ObjectAndBitcode | ||
} | ||
}; | ||
// No need to recurse if we calculated the same value as before. | ||
if result == *v.get() { | ||
return Ok(()); | ||
} | ||
v.insert(result); | ||
result | ||
} | ||
} | ||
}; | ||
|
||
for dep in cx.unit_deps(unit) { | ||
calculate(cx, map, &dep.unit, lto_for_deps)?; | ||
calculate(cx, map, &dep.unit, merged_lto)?; | ||
} | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.