From e2219254695b290ff1ef9d8c2015939af9a9ab1f Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 30 Apr 2020 12:24:50 -0700 Subject: [PATCH] Don't force rustc to do codegen for LTO builds This commit updates Cargo's implementation of LTO builds to do less work and hopefully be faster when doing a cold build. Additionaly this should save space on disk! The general idea is that the compiler does not need object files if it's only going to perform LTO with some artifacts. In this case all rustc needs to do is load bitcode from dependencies. This means that if you're doing an LTO build generating object code for intermediate dependencies is just wasted time! Here Cargo is updated with more intrusive knowledge about LTO. Cargo will now analyze the dependency graph to figure out which crates are being compiled with LTO, and then it will figure out which dependencies only need to have bitcode in them. Pure-bitcode artifacts are emitted with the `-Clinker-plugin-lto` flag. Some artifacts are still used in multiple scenarios (such as those shared between build scripts and final artifacts), so those are not compiled with `-Clinker-plugin-lto` since the linker is not guaranteed to know how to perform LTO. This functionality was recently implemented in rust-lang/rust#71528 where rustc is now capable of reading bitcode from `-Clinker-plugin-lto` rlibs. Previously rustc would only read its own format of bitcode, but this has now been extended! This support is now on nightly, hence this PR. --- .../compiler/build_context/target_info.rs | 2 +- src/cargo/core/compiler/context/mod.rs | 8 + src/cargo/core/compiler/fingerprint.rs | 8 +- src/cargo/core/compiler/lto.rs | 116 ++++++++ src/cargo/core/compiler/mod.rs | 41 +-- tests/testsuite/lto.rs | 254 ++++++++++++++++++ 6 files changed, 410 insertions(+), 19 deletions(-) create mode 100644 src/cargo/core/compiler/lto.rs diff --git a/src/cargo/core/compiler/build_context/target_info.rs b/src/cargo/core/compiler/build_context/target_info.rs index 8d9274c0bda..17ea6c6411e 100644 --- a/src/cargo/core/compiler/build_context/target_info.rs +++ b/src/cargo/core/compiler/build_context/target_info.rs @@ -40,7 +40,7 @@ pub struct TargetInfo { pub rustflags: Vec, /// Extra flags to pass to `rustdoc`, see `env_args`. pub rustdocflags: Vec, - /// Remove this when it hits stable (1.44) + /// Remove this when it hits stable (1.45) pub supports_embed_bitcode: Option, } diff --git a/src/cargo/core/compiler/context/mod.rs b/src/cargo/core/compiler/context/mod.rs index 0855fc09ce7..ae5c1382947 100644 --- a/src/cargo/core/compiler/context/mod.rs +++ b/src/cargo/core/compiler/context/mod.rs @@ -16,6 +16,7 @@ use super::custom_build::{self, BuildDeps, BuildScriptOutputs, BuildScripts}; use super::fingerprint::Fingerprint; use super::job_queue::JobQueue; use super::layout::Layout; +use super::lto::Lto; use super::unit_graph::UnitDep; use super::{BuildContext, Compilation, CompileKind, CompileMode, Executor, FileFlavor}; @@ -72,6 +73,11 @@ pub struct Context<'a, 'cfg> { /// jobserver clients for each Unit (which eventually becomes a rustc /// process). pub rustc_clients: HashMap, + + /// Map of the LTO-status of each unit. This indicates what sort of + /// compilation is happening (only object, only bitcode, both, etc), and is + /// precalculated early on. + pub lto: HashMap, } impl<'a, 'cfg> Context<'a, 'cfg> { @@ -111,6 +117,7 @@ impl<'a, 'cfg> Context<'a, 'cfg> { rmeta_required: HashSet::new(), rustc_clients: HashMap::new(), pipelining, + lto: HashMap::new(), }) } @@ -123,6 +130,7 @@ impl<'a, 'cfg> Context<'a, 'cfg> { self.prepare_units()?; self.prepare()?; custom_build::build_map(&mut self)?; + super::lto::generate(&mut self)?; self.check_collistions()?; for unit in &self.bcx.roots { diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 89263cc7c9b..598abdec8e1 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -71,6 +71,7 @@ //! -C incremental=… flag | ✓ | //! mtime of sources | ✓[^3] | //! RUSTFLAGS/RUSTDOCFLAGS | ✓ | +//! LTO flags | ✓ | //! is_std | | ✓ //! //! [^1]: Build script and bin dependencies are not included. @@ -1241,7 +1242,12 @@ fn calculate_normal(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult), + + /// This rustc invocation only needs to produce bitcode, there's no need to + /// produce object files, so we can pass `-Clinker-plugin-lto` + OnlyBitcode, + + /// This rustc invocation needs to embed bitcode in object files. This means + /// that object files may be used for a normal link, and the crate may be + /// loaded for LTO later, so both are required. + EmbedBitcode, + + /// Nothing related to LTO is required of this compilation. + None, +} + +pub fn generate(cx: &mut Context<'_, '_>) -> CargoResult<()> { + let mut map = HashMap::new(); + for unit in cx.bcx.roots.iter() { + calculate(cx, &mut map, unit, false)?; + } + cx.lto = map; + Ok(()) +} + +fn calculate( + cx: &Context<'_, '_>, + map: &mut HashMap, + unit: &Unit, + require_bitcode: bool, +) -> CargoResult<()> { + let (lto, require_bitcode_for_deps) = if unit.target.for_host() { + // Disable LTO for host builds since we only really want to perform LTO + // for the final binary, and LTO on plugins/build scripts/proc macros is + // largely not desired. + (Lto::None, false) + } else if unit.target.can_lto() { + // Otherwise if this target can perform LTO then we're going to read the + // LTO value out of the profile. + assert!(!require_bitcode); // can't depend on binaries/staticlib/etc + match unit.profile.lto { + profiles::Lto::Named(s) => match s.as_str() { + "n" | "no" | "off" => (Lto::Run(Some(s)), false), + _ => (Lto::Run(Some(s)), true), + }, + profiles::Lto::Bool(true) => (Lto::Run(None), true), + profiles::Lto::Bool(false) => (Lto::None, false), + } + } else if require_bitcode { + // Otherwise we're a dependency of something, an rlib. This means that + // if our parent required bitcode of some kind then we need to generate + // bitcode. + (Lto::OnlyBitcode, true) + } else { + (Lto::None, false) + }; + + match map.entry(unit.clone()) { + // If we haven't seen this unit before then insert our value and keep + // going. + Entry::Vacant(v) => { + v.insert(lto); + } + + Entry::Occupied(mut v) => { + let result = match (lto, v.get()) { + // Targets which execute LTO cannot be depended on, so these + // units should only show up once in the dependency graph, so we + // should never hit this case. + (Lto::Run(_), _) | (_, Lto::Run(_)) => { + unreachable!("lto-able targets shouldn't show up twice") + } + + // If we calculated the same thing as before then we can bail + // out quickly. + (Lto::OnlyBitcode, Lto::OnlyBitcode) | (Lto::None, Lto::None) => return Ok(()), + + // This is where the trickiness happens. This unit needs + // bitcode and the previously calculated value for this unit + // says it didn't need bitcode (or vice versa). This means that + // we're a shared dependency between some targets which require + // LTO and some which don't. This means that instead of being + // either only-objects or only-bitcode we have to embed both in + // rlibs (used for different compilations), so we switch to + // embedding bitcode. + (Lto::OnlyBitcode, Lto::None) + | (Lto::OnlyBitcode, Lto::EmbedBitcode) + | (Lto::None, Lto::OnlyBitcode) + | (Lto::None, Lto::EmbedBitcode) => Lto::EmbedBitcode, + + // Currently this variant is never calculated above, so no need + // to handle this case. + (Lto::EmbedBitcode, _) => unreachable!(), + }; + // No need to recurse if we calculated the same value as before. + if result == *v.get() { + return Ok(()); + } + v.insert(result); + } + } + + for dep in cx.unit_deps(unit) { + calculate(cx, map, &dep.unit, require_bitcode_for_deps)?; + } + Ok(()) +} diff --git a/src/cargo/core/compiler/mod.rs b/src/cargo/core/compiler/mod.rs index c5b09373e07..dd2dbea39ea 100644 --- a/src/cargo/core/compiler/mod.rs +++ b/src/cargo/core/compiler/mod.rs @@ -10,6 +10,7 @@ mod job; mod job_queue; mod layout; mod links; +mod lto; mod output_depinfo; pub mod standard_lib; mod timings; @@ -42,7 +43,7 @@ use self::output_depinfo::output_depinfo; use self::unit_graph::UnitDep; pub use crate::core::compiler::unit::{Unit, UnitInterner}; use crate::core::manifest::TargetSourcePath; -use crate::core::profiles::{Lto, PanicStrategy, Profile}; +use crate::core::profiles::{PanicStrategy, Profile}; use crate::core::{Edition, Feature, InternedString, PackageId, Target}; use crate::util::errors::{self, CargoResult, CargoResultExt, ProcessError, VerboseError}; use crate::util::machine_message::Message; @@ -740,7 +741,6 @@ fn build_base_args( let bcx = cx.bcx; let Profile { ref opt_level, - ref lto, codegen_units, debuginfo, debug_assertions, @@ -793,24 +793,31 @@ fn build_base_args( cmd.arg("-C").arg(format!("panic={}", panic)); } - // Disable LTO for host builds as prefer_dynamic and it are mutually - // exclusive. - let lto_possible = unit.target.can_lto() && !unit.target.for_host(); - match lto { - Lto::Bool(true) => { - if lto_possible { - cmd.args(&["-C", "lto"]); - } + match cx.lto[&unit] { + lto::Lto::Run(None) => { + cmd.arg("-C").arg("lto"); + } + lto::Lto::Run(Some(s)) => { + cmd.arg("-C").arg(format!("lto={}", s)); } - Lto::Named(s) => { - if lto_possible { - cmd.arg("-C").arg(format!("lto={}", s)); + lto::Lto::EmbedBitcode => {} // this is rustc's default + lto::Lto::OnlyBitcode => { + // Note that this compiler flag, like the one below, is just an + // optimization in terms of build time. If we don't pass it then + // both object code and bitcode will show up. This is lagely just + // compat until the feature lands on stable and we can remove the + // conditional branch. + if cx + .bcx + .target_data + .info(CompileKind::Host) + .supports_embed_bitcode + .unwrap() + { + cmd.arg("-Clinker-plugin-lto"); } } - // If LTO isn't being enabled then there's no need for bitcode to be - // present in the intermediate artifacts, so shave off some build time - // by removing it. - Lto::Bool(false) => { + lto::Lto::None => { if cx .bcx .target_data diff --git a/tests/testsuite/lto.rs b/tests/testsuite/lto.rs index 4d77b2186e2..a671e27e04d 100644 --- a/tests/testsuite/lto.rs +++ b/tests/testsuite/lto.rs @@ -3,6 +3,10 @@ use cargo_test_support::registry::Package; #[cargo_test] fn with_deps() { + if !cargo_test_support::is_nightly() { + return; + } + Package::new("bar", "0.0.1").publish(); let p = project() @@ -22,5 +26,255 @@ fn with_deps() { ) .file("src/main.rs", "extern crate bar; fn main() {}") .build(); + p.cargo("build -v --release") + .with_stderr_contains("[..]`rustc[..]--crate-name bar[..]-Clinker-plugin-lto[..]`") + .with_stderr_contains("[..]`rustc[..]--crate-name test[..]-C lto[..]`") + .run(); +} + +#[cargo_test] +fn shared_deps() { + if !cargo_test_support::is_nightly() { + return; + } + + Package::new("bar", "0.0.1").publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "test" + version = "0.0.0" + + [dependencies] + bar = "*" + + [build-dependencies] + bar = "*" + + [profile.release] + lto = true + "#, + ) + .file("build.rs", "extern crate bar; fn main() {}") + .file("src/main.rs", "extern crate bar; fn main() {}") + .build(); + p.cargo("build -v --release") + .with_stderr_contains("[..]`rustc[..]--crate-name test[..]-C lto[..]`") + .run(); +} + +#[cargo_test] +fn build_dep_not_ltod() { + if !cargo_test_support::is_nightly() { + return; + } + + Package::new("bar", "0.0.1").publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "test" + version = "0.0.0" + + [build-dependencies] + bar = "*" + + [profile.release] + lto = true + "#, + ) + .file("build.rs", "extern crate bar; fn main() {}") + .file("src/main.rs", "fn main() {}") + .build(); + p.cargo("build -v --release") + .with_stderr_contains("[..]`rustc[..]--crate-name bar[..]-Cembed-bitcode=no[..]`") + .with_stderr_contains("[..]`rustc[..]--crate-name test[..]-C lto[..]`") + .run(); +} + +#[cargo_test] +fn complicated() { + if !cargo_test_support::is_nightly() { + return; + } + + Package::new("dep-shared", "0.0.1") + .file("src/lib.rs", "pub fn foo() {}") + .publish(); + Package::new("dep-normal2", "0.0.1") + .file("src/lib.rs", "pub fn foo() {}") + .publish(); + Package::new("dep-normal", "0.0.1") + .dep("dep-shared", "*") + .dep("dep-normal2", "*") + .file( + "src/lib.rs", + " + pub fn foo() { + dep_shared::foo(); + dep_normal2::foo(); + } + ", + ) + .publish(); + Package::new("dep-build2", "0.0.1") + .file("src/lib.rs", "pub fn foo() {}") + .publish(); + Package::new("dep-build", "0.0.1") + .dep("dep-shared", "*") + .dep("dep-build2", "*") + .file( + "src/lib.rs", + " + pub fn foo() { + dep_shared::foo(); + dep_build2::foo(); + } + ", + ) + .publish(); + Package::new("dep-proc-macro2", "0.0.1") + .file("src/lib.rs", "pub fn foo() {}") + .publish(); + Package::new("dep-proc-macro", "0.0.1") + .proc_macro(true) + .dep("dep-shared", "*") + .dep("dep-proc-macro2", "*") + .file( + "src/lib.rs", + " + extern crate proc_macro; + use proc_macro::TokenStream; + + #[proc_macro_attribute] + pub fn foo(_: TokenStream, a: TokenStream) -> TokenStream { + dep_shared::foo(); + dep_proc_macro2::foo(); + a + } + ", + ) + .publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "test" + version = "0.0.0" + + [lib] + crate-type = ['cdylib', 'staticlib'] + + [dependencies] + dep-normal = "*" + dep-proc-macro = "*" + + [build-dependencies] + dep-build = "*" + + [profile.release] + lto = true + "#, + ) + .file("build.rs", "fn main() { dep_build::foo() }") + .file( + "src/main.rs", + "#[dep_proc_macro::foo] fn main() { dep_normal::foo() }", + ) + .file( + "src/lib.rs", + "#[dep_proc_macro::foo] pub fn foo() { dep_normal::foo() }", + ) + .build(); + p.cargo("build -v --release") + // normal deps and their transitive dependencies do not need object + // code, so they should have linker-plugin-lto specified + .with_stderr_contains("[..]`rustc[..]--crate-name dep_normal2 [..]-Clinker-plugin-lto[..]`") + .with_stderr_contains("[..]`rustc[..]--crate-name dep_normal [..]-Clinker-plugin-lto[..]`") + // build dependencies and their transitive deps don't need any bitcode, + // so embedding should be turned off + .with_stderr_contains("[..]`rustc[..]--crate-name dep_build2 [..]-Cembed-bitcode=no[..]`") + .with_stderr_contains("[..]`rustc[..]--crate-name dep_build [..]-Cembed-bitcode=no[..]`") + .with_stderr_contains( + "[..]`rustc[..]--crate-name build_script_build [..]-Cembed-bitcode=no[..]`", + ) + // proc macro deps are the same as build deps here + .with_stderr_contains( + "[..]`rustc[..]--crate-name dep_proc_macro2 [..]-Cembed-bitcode=no[..]`", + ) + .with_stderr_contains( + "[..]`rustc[..]--crate-name dep_proc_macro [..]-Cembed-bitcode=no[..]`", + ) + .with_stderr_contains("[..]`rustc[..]--crate-name test [..]--crate-type bin[..]-C lto[..]`") + .with_stderr_contains( + "[..]`rustc[..]--crate-name test [..]--crate-type cdylib[..]-C lto[..]`", + ) + .with_stderr_contains("[..]`rustc[..]--crate-name dep_shared [..]`") + .with_stderr_does_not_contain("[..]--crate-name dep_shared[..]-C lto[..]") + .with_stderr_does_not_contain("[..]--crate-name dep_shared[..]-Clinker-plugin-lto[..]") + .with_stderr_does_not_contain("[..]--crate-name dep_shared[..]-Cembed-bitcode[..]") + .run(); +} + +#[cargo_test] +fn off_in_manifest_works() { + if !cargo_test_support::is_nightly() { + return; + } + + Package::new("bar", "0.0.1") + .file("src/lib.rs", "pub fn foo() {}") + .publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "test" + version = "0.0.0" + + [dependencies] + bar = "*" + + [profile.release] + lto = "off" + "#, + ) + .file("src/main.rs", "fn main() { bar::foo() }") + .build(); + p.cargo("build -v --release").run(); +} + +#[cargo_test] +fn between_builds() { + if !cargo_test_support::is_nightly() { + return; + } + + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "test" + version = "0.0.0" + + [profile.release] + lto = true + "#, + ) + .file("src/lib.rs", "pub fn foo() {}") + .file("src/main.rs", "fn main() { test::foo() }") + .build(); + p.cargo("build -v --release --lib").run(); p.cargo("build -v --release").run(); }