Skip to content

Commit

Permalink
Introduce -Zsplit-metadata option
Browse files Browse the repository at this point in the history
This will split the crate metadata out of library files. Instead only
the svh is preserved to allow for loading the right rmeta file. This
significicantly reduces library size. In addition it allows for cheaper
checks if different library files are the same crate.
  • Loading branch information
bjorn3 committed Aug 10, 2024
1 parent 7347f8e commit 5e86595
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 27 deletions.
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/back/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ fn link_rlib<'a>(
let (metadata, metadata_position) = create_wrapper_file(
sess,
".rmeta".to_string(),
codegen_results.metadata.raw_data(),
codegen_results.metadata.maybe_reference(),
);
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
match metadata_position {
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_ssa/src/back/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,8 +527,8 @@ pub fn create_compressed_metadata_file(
symbol_name: &str,
) -> Vec<u8> {
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
packed_metadata.extend(metadata.raw_data());
packed_metadata.write_all(&(metadata.maybe_reference().len() as u64).to_le_bytes()).unwrap();
packed_metadata.extend(metadata.maybe_reference());

let Some(mut file) = create_object_file(sess) else {
if sess.target.is_like_wasm {
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_interface/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,7 @@ fn test_unstable_options_tracking_hash() {
tracked!(show_span, Some(String::from("abc")));
tracked!(simulate_remapped_rust_src_base, Some(PathBuf::from("/rustc/abc")));
tracked!(split_lto_unit, Some(true));
tracked!(split_metadata, true);
tracked!(src_hash_algorithm, Some(SourceFileHashAlgorithm::Sha1));
tracked!(stack_protector, StackProtector::All);
tracked!(teach, true);
Expand Down
15 changes: 10 additions & 5 deletions compiler/rustc_metadata/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
let metadata_reference_filename = metadata_tmpdir.as_ref().join("ref.rmeta");

// Always create a file at `metadata_filename`, even if we have nothing to write to it.
// This simplifies the creation of the output `out_filename` when requested.
Expand All @@ -60,9 +61,12 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
});
std::fs::File::create(&metadata_reference_filename).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
});
}
MetadataKind::Uncompressed | MetadataKind::Compressed => {
encode_metadata(tcx, &metadata_filename);
encode_metadata(tcx, &metadata_filename, &metadata_reference_filename)
}
};

Expand Down Expand Up @@ -100,9 +104,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {

// Load metadata back to memory: codegen may need to include it in object files.
let metadata =
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
});
EncodedMetadata::from_path(metadata_filename, metadata_reference_filename, metadata_tmpdir)
.unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
});

let need_metadata_module = metadata_kind == MetadataKind::Compressed;

Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_metadata/src/locator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,11 @@ impl<'a> CrateLocator<'a> {
) {
Ok(blob) => {
if let Some(h) = self.crate_matches(&blob, &lib) {
if blob.get_header().is_reference {
if slot.is_none() {
todo!("return error");
}
}
(h, blob)
} else {
info!("metadata mismatch");
Expand Down
91 changes: 72 additions & 19 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
triple: tcx.sess.opts.target_triple.clone(),
hash: tcx.crate_hash(LOCAL_CRATE),
is_proc_macro_crate: proc_macro_data.is_some(),
is_reference: false,
},
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
Expand Down Expand Up @@ -2162,42 +2163,61 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
// generated regardless of trailing bytes that end up in it.

pub struct EncodedMetadata {
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
mmap: Option<Mmap>,
// The declaration order matters because `full_mmap` should be dropped
// before `_temp_dir`.
full_mmap: Option<Mmap>,
reference: Option<Vec<u8>>,
// We need to carry MaybeTempDir to avoid deleting the temporary
// directory while accessing the Mmap.
_temp_dir: Option<MaybeTempDir>,
}

impl EncodedMetadata {
#[inline]
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
pub fn from_path(
path: PathBuf,
reference_path: PathBuf,
temp_dir: Option<MaybeTempDir>,
) -> std::io::Result<Self> {
let file = std::fs::File::open(&path)?;
let file_metadata = file.metadata()?;
if file_metadata.len() == 0 {
return Ok(Self { mmap: None, _temp_dir: None });
return Ok(Self { full_mmap: None, reference: None, _temp_dir: None });
}
let mmap = unsafe { Some(Mmap::map(file)?) };
Ok(Self { mmap, _temp_dir: temp_dir })
let full_mmap = unsafe { Some(Mmap::map(file)?) };

let reference = std::fs::read(reference_path)?;
let reference = if reference.is_empty() { None } else { Some(reference) };

Ok(Self { full_mmap, reference, _temp_dir: temp_dir })
}

#[inline]
pub fn full(&self) -> &[u8] {
&self.full_mmap.as_deref().unwrap_or_default()
}

#[inline]
pub fn raw_data(&self) -> &[u8] {
self.mmap.as_deref().unwrap_or_default()
pub fn maybe_reference(&self) -> &[u8] {
self.reference.as_deref().unwrap_or(self.full())
}
}

impl<S: Encoder> Encodable<S> for EncodedMetadata {
fn encode(&self, s: &mut S) {
let slice = self.raw_data();
self.reference.encode(s);

let slice = self.full();
slice.encode(s)
}
}

impl<D: Decoder> Decodable<D> for EncodedMetadata {
fn decode(d: &mut D) -> Self {
let reference = <Option<Vec<u8>>>::decode(d);

let len = d.read_usize();
let mmap = if len > 0 {
let full_mmap = if len > 0 {
let mut mmap = MmapMut::map_anon(len).unwrap();
for _ in 0..len {
(&mut mmap[..]).write_all(&[d.read_u8()]).unwrap();
Expand All @@ -2208,11 +2228,11 @@ impl<D: Decoder> Decodable<D> for EncodedMetadata {
None
};

Self { mmap, _temp_dir: None }
Self { full_mmap, reference, _temp_dir: None }
}
}

pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: &Path) {
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");

// Since encoding metadata is not in a query, and nothing is cached,
Expand All @@ -2226,6 +2246,44 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
}

with_encode_metadata_header(tcx, path, |ecx| {
// Encode all the entries and extra information in the crate,
// culminating in the `CrateRoot` which points to all of it.
let root = ecx.encode_crate_root();

// Flush buffer to ensure backing file has the correct size.
ecx.opaque.flush();
// Record metadata size for self-profiling
tcx.prof.artifact_size(
"crate_metadata",
"crate_metadata",
ecx.opaque.file().metadata().unwrap().len(),
);

root.position.get()
});

if tcx.sess.opts.unstable_opts.split_metadata
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
{
with_encode_metadata_header(tcx, ref_path, |ecx| {
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
name: tcx.crate_name(LOCAL_CRATE),
triple: tcx.sess.opts.target_triple.clone(),
hash: tcx.crate_hash(LOCAL_CRATE),
is_proc_macro_crate: false,
is_reference: true,
});
header.position.get()
});
}
}

pub fn with_encode_metadata_header(
tcx: TyCtxt<'_>,
path: &Path,
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
) {
let mut encoder = opaque::FileEncoder::new(path)
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
encoder.emit_raw_bytes(METADATA_HEADER);
Expand Down Expand Up @@ -2260,9 +2318,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
// Encode the rustc version string in a predictable location.
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);

// Encode all the entries and extra information in the crate,
// culminating in the `CrateRoot` which points to all of it.
let root = ecx.encode_crate_root();
let root_position = f(&mut ecx);

// Make sure we report any errors from writing to the file.
// If we forget this, compilation can succeed with an incomplete rmeta file,
Expand All @@ -2272,12 +2328,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
}

let file = ecx.opaque.file();
if let Err(err) = encode_root_position(file, root.position.get()) {
if let Err(err) = encode_root_position(file, root_position) {
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
}

// Record metadata size for self-profiling
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
}

fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ pub(crate) struct CrateHeader {
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
/// time ProcMacroData changes.
pub(crate) is_proc_macro_crate: bool,
/// Whether this header is a reference to a separate rmeta file.
///
/// This is used inside rlibs and dylibs when using `-Zsplit-metadata`.
pub(crate) is_reference: bool,
}

/// Serialized `.rmeta` data for a crate.
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_session/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2014,6 +2014,8 @@ written to standard error output)"),
by the linker"),
split_lto_unit: Option<bool> = (None, parse_opt_bool, [TRACKED],
"enable LTO unit splitting (default: no)"),
split_metadata: bool = (false, parse_bool, [TRACKED],
"split metadata out of libraries into .rmeta files"),
src_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_src_file_hash, [TRACKED],
"hash algorithm of source files in debug info (`md5`, `sha1`, or `sha256`)"),
#[rustc_lint_opt_deny_field_access("use `Session::stack_protector` instead of this field")]
Expand Down

0 comments on commit 5e86595

Please sign in to comment.