Skip to content

Commit

Permalink
Auto merge of rust-lang#83214 - cjgillot:dep-map, r=michaelwoerister
Browse files Browse the repository at this point in the history
Mmap the incremental data instead of reading it.

Instead of reading the full incremental state using `fs::read_file`, we memmap it using a private read-only file-backed map.
This allows the system to reclaim any memory we are not using, while ensuring we are not polluted by
outside modifications to the file.

Suggested in rust-lang#83036 (comment) by `@bjorn3`
  • Loading branch information
bors committed Sep 6, 2021
2 parents 1698e3c + bcefd48 commit 11bbb52
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 88 deletions.
78 changes: 71 additions & 7 deletions compiler/rustc_incremental/src/persist/file_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
use std::env;
use std::fs;
use std::io::{self, Read};
use std::path::Path;
use std::path::{Path, PathBuf};

use rustc_data_structures::memmap::Mmap;
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
use rustc_serialize::Encoder;
use rustc_session::Session;

/// The first few bytes of files generated by incremental compilation.
const FILE_MAGIC: &[u8] = b"RSIC";
Expand All @@ -28,7 +30,7 @@ const HEADER_FORMAT_VERSION: u16 = 0;
/// the Git commit hash.
const RUSTC_VERSION: Option<&str> = option_env!("CFG_VERSION");

pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult {
pub(crate) fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult {
stream.emit_raw_bytes(FILE_MAGIC)?;
stream.emit_raw_bytes(&[
(HEADER_FORMAT_VERSION >> 0) as u8,
Expand All @@ -41,6 +43,61 @@ pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileE
stream.emit_raw_bytes(rustc_version.as_bytes())
}

pub(crate) fn save_in<F>(sess: &Session, path_buf: PathBuf, name: &str, encode: F)
where
F: FnOnce(&mut FileEncoder) -> FileEncodeResult,
{
debug!("save: storing data in {}", path_buf.display());

// Delete the old file, if any.
// Note: It's important that we actually delete the old file and not just
// truncate and overwrite it, since it might be a shared hard-link, the
// underlying data of which we don't want to modify.
//
// We have to ensure we have dropped the memory maps to this file
// before performing this removal.
match fs::remove_file(&path_buf) {
Ok(()) => {
debug!("save: remove old file");
}
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
Err(err) => {
sess.err(&format!(
"unable to delete old {} at `{}`: {}",
name,
path_buf.display(),
err
));
return;
}
}

let mut encoder = match FileEncoder::new(&path_buf) {
Ok(encoder) => encoder,
Err(err) => {
sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err));
return;
}
};

if let Err(err) = write_file_header(&mut encoder, sess.is_nightly_build()) {
sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err));
return;
}

if let Err(err) = encode(&mut encoder) {
sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err));
return;
}

if let Err(err) = encoder.flush() {
sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err));
return;
}

debug!("save: data written to disk successfully");
}

/// Reads the contents of a file with a file header as defined in this module.
///
/// - Returns `Ok(Some(data, pos))` if the file existed and was generated by a
Expand All @@ -54,14 +111,21 @@ pub fn read_file(
report_incremental_info: bool,
path: &Path,
nightly_build: bool,
) -> io::Result<Option<(Vec<u8>, usize)>> {
let data = match fs::read(path) {
Ok(data) => data,
) -> io::Result<Option<(Mmap, usize)>> {
let file = match fs::File::open(path) {
Ok(file) => file,
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
Err(err) => return Err(err),
};
// SAFETY: This process must not modify nor remove the backing file while the memory map lives.
// For the dep-graph and the work product index, it is as soon as the decoding is done.
// For the query result cache, the memory map is dropped in save_dep_graph before calling
// save_in and trying to remove the backing file.
//
// There is no way to prevent another process from modifying this file.
let mmap = unsafe { Mmap::map(file) }?;

let mut file = io::Cursor::new(data);
let mut file = io::Cursor::new(&*mmap);

// Check FILE_MAGIC
{
Expand Down Expand Up @@ -103,7 +167,7 @@ pub fn read_file(
}

let post_header_start_pos = file.position() as usize;
Ok(Some((file.into_inner(), post_header_start_pos)))
Ok(Some((mmap, post_header_start_pos)))
}

fn report_format_mismatch(report_incremental_info: bool, file: &Path, message: &str) {
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_incremental/src/persist/load.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Code to save/load the dep-graph from files.
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::memmap::Mmap;
use rustc_middle::dep_graph::{SerializedDepGraph, WorkProduct, WorkProductId};
use rustc_middle::ty::OnDiskCache;
use rustc_serialize::opaque::Decoder;
Expand Down Expand Up @@ -48,7 +49,7 @@ fn load_data(
report_incremental_info: bool,
path: &Path,
nightly_build: bool,
) -> LoadResult<(Vec<u8>, usize)> {
) -> LoadResult<(Mmap, usize)> {
match file_format::read_file(report_incremental_info, path, nightly_build) {
Ok(Some(data_and_pos)) => LoadResult::Ok { data: data_and_pos },
Ok(None) => {
Expand Down
67 changes: 11 additions & 56 deletions compiler/rustc_incremental/src/persist/save.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
use rustc_serialize::Encodable as RustcEncodable;
use rustc_session::Session;
use std::fs;
use std::io;
use std::path::PathBuf;

use super::data::*;
use super::dirty_clean;
Expand Down Expand Up @@ -44,7 +42,14 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {
join(
move || {
sess.time("incr_comp_persist_result_cache", || {
save_in(sess, query_cache_path, "query cache", |e| encode_query_cache(tcx, e));
// Drop the memory map so that we can remove the file and write to it.
if let Some(odc) = &tcx.on_disk_cache {
odc.drop_serialized_data(tcx);
}

file_format::save_in(sess, query_cache_path, "query cache", |e| {
encode_query_cache(tcx, e)
});
});
},
move || {
Expand Down Expand Up @@ -86,7 +91,9 @@ pub fn save_work_product_index(
debug!("save_work_product_index()");
dep_graph.assert_ignored();
let path = work_products_path(sess);
save_in(sess, path, "work product index", |e| encode_work_product_index(&new_work_products, e));
file_format::save_in(sess, path, "work product index", |e| {
encode_work_product_index(&new_work_products, e)
});

// We also need to clean out old work-products, as not all of them are
// deleted during invalidation. Some object files don't change their
Expand All @@ -113,58 +120,6 @@ pub fn save_work_product_index(
});
}

pub(crate) fn save_in<F>(sess: &Session, path_buf: PathBuf, name: &str, encode: F)
where
F: FnOnce(&mut FileEncoder) -> FileEncodeResult,
{
debug!("save: storing data in {}", path_buf.display());

// Delete the old file, if any.
// Note: It's important that we actually delete the old file and not just
// truncate and overwrite it, since it might be a shared hard-link, the
// underlying data of which we don't want to modify
match fs::remove_file(&path_buf) {
Ok(()) => {
debug!("save: remove old file");
}
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
Err(err) => {
sess.err(&format!(
"unable to delete old {} at `{}`: {}",
name,
path_buf.display(),
err
));
return;
}
}

let mut encoder = match FileEncoder::new(&path_buf) {
Ok(encoder) => encoder,
Err(err) => {
sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err));
return;
}
};

if let Err(err) = file_format::write_file_header(&mut encoder, sess.is_nightly_build()) {
sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err));
return;
}

if let Err(err) = encode(&mut encoder) {
sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err));
return;
}

if let Err(err) = encoder.flush() {
sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err));
return;
}

debug!("save: data written to disk successfully");
}

fn encode_work_product_index(
work_products: &FxHashMap<WorkProductId, WorkProduct>,
encoder: &mut FileEncoder,
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_middle/src/ty/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use crate::ty::{
use rustc_ast as ast;
use rustc_attr as attr;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::memmap::Mmap;
use rustc_data_structures::profiling::SelfProfilerRef;
use rustc_data_structures::sharded::{IntoPointer, ShardedHashMap};
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
Expand Down Expand Up @@ -71,7 +72,7 @@ use std::sync::Arc;

pub trait OnDiskCache<'tcx>: rustc_data_structures::sync::Sync {
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
fn new(sess: &'tcx Session, data: Vec<u8>, start_pos: usize) -> Self
fn new(sess: &'tcx Session, data: Mmap, start_pos: usize) -> Self
where
Self: Sized;

Expand Down Expand Up @@ -100,6 +101,8 @@ pub trait OnDiskCache<'tcx>: rustc_data_structures::sync::Sync {
fn register_reused_dep_node(&self, tcx: TyCtxt<'tcx>, dep_node: &DepNode);
fn store_foreign_def_id_hash(&self, def_id: DefId, hash: DefPathHash);

fn drop_serialized_data(&self, tcx: TyCtxt<'tcx>);

fn serialize(&self, tcx: TyCtxt<'tcx>, encoder: &mut FileEncoder) -> FileEncodeResult;
}

Expand Down
58 changes: 35 additions & 23 deletions compiler/rustc_query_impl/src/on_disk_cache.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::QueryCtxt;
use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet};
use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell};
use rustc_data_structures::memmap::Mmap;
use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell, RwLock};
use rustc_data_structures::unhash::UnhashMap;
use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, StableCrateId, LOCAL_CRATE};
use rustc_hir::definitions::DefPathHash;
Expand Down Expand Up @@ -42,7 +43,7 @@ const TAG_EXPN_DATA: u8 = 1;
/// any side effects that have been emitted during a query.
pub struct OnDiskCache<'sess> {
// The complete cache data in serialized form.
serialized_data: Vec<u8>,
serialized_data: RwLock<Option<Mmap>>,

// Collects all `QuerySideEffects` created during the current compilation
// session.
Expand Down Expand Up @@ -182,7 +183,8 @@ impl EncodedSourceFileId {
}

impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
fn new(sess: &'sess Session, data: Vec<u8>, start_pos: usize) -> Self {
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self {
debug_assert!(sess.opts.incremental.is_some());

// Wrap in a scope so we can borrow `data`.
Expand All @@ -204,7 +206,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
};

Self {
serialized_data: data,
serialized_data: RwLock::new(Some(data)),
file_index_to_stable_id: footer.file_index_to_stable_id,
file_index_to_file: Default::default(),
cnum_map: OnceCell::new(),
Expand All @@ -225,7 +227,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {

fn new_empty(source_map: &'sess SourceMap) -> Self {
Self {
serialized_data: Vec::new(),
serialized_data: RwLock::new(None),
file_index_to_stable_id: Default::default(),
file_index_to_file: Default::default(),
cnum_map: OnceCell::new(),
Expand All @@ -244,7 +246,31 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
}
}

fn serialize(&self, tcx: TyCtxt<'sess>, encoder: &mut FileEncoder) -> FileEncodeResult {
/// Execute all cache promotions and release the serialized backing Mmap.
///
/// Cache promotions require invoking queries, which needs to read the serialized data.
/// In order to serialize the new on-disk cache, the former on-disk cache file needs to be
/// deleted, hence we won't be able to refer to its memmapped data.
fn drop_serialized_data(&self, tcx: TyCtxt<'tcx>) {
// Register any dep nodes that we reused from the previous session,
// but didn't `DepNode::construct` in this session. This ensures
// that their `DefPathHash` to `RawDefId` mappings are registered
// in 'latest_foreign_def_path_hashes' if necessary, since that
// normally happens in `DepNode::construct`.
tcx.dep_graph.register_reused_dep_nodes(tcx);

// Load everything into memory so we can write it out to the on-disk
// cache. The vast majority of cacheable query results should already
// be in memory, so this should be a cheap operation.
// Do this *before* we clone 'latest_foreign_def_path_hashes', since
// loading existing queries may cause us to create new DepNodes, which
// may in turn end up invoking `store_foreign_def_id_hash`
tcx.dep_graph.exec_cache_promotions(QueryCtxt::from_tcx(tcx));

*self.serialized_data.write() = None;
}

fn serialize<'tcx>(&self, tcx: TyCtxt<'tcx>, encoder: &mut FileEncoder) -> FileEncodeResult {
// Serializing the `DepGraph` should not modify it.
tcx.dep_graph.with_ignore(|| {
// Allocate `SourceFileIndex`es.
Expand All @@ -266,21 +292,6 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
(file_to_file_index, file_index_to_stable_id)
};

// Register any dep nodes that we reused from the previous session,
// but didn't `DepNode::construct` in this session. This ensures
// that their `DefPathHash` to `RawDefId` mappings are registered
// in 'latest_foreign_def_path_hashes' if necessary, since that
// normally happens in `DepNode::construct`.
tcx.dep_graph.register_reused_dep_nodes(tcx);

// Load everything into memory so we can write it out to the on-disk
// cache. The vast majority of cacheable query results should already
// be in memory, so this should be a cheap operation.
// Do this *before* we clone 'latest_foreign_def_path_hashes', since
// loading existing queries may cause us to create new DepNodes, which
// may in turn end up invoking `store_foreign_def_id_hash`
tcx.dep_graph.exec_cache_promotions(QueryCtxt::from_tcx(tcx));

let latest_foreign_def_path_hashes = self.latest_foreign_def_path_hashes.lock().clone();
let hygiene_encode_context = HygieneEncodeContext::default();

Expand Down Expand Up @@ -564,7 +575,7 @@ impl<'sess> OnDiskCache<'sess> {
})
}

fn with_decoder<'a, 'tcx, T, F: FnOnce(&mut CacheDecoder<'sess, 'tcx>) -> T>(
fn with_decoder<'a, 'tcx, T, F: for<'s> FnOnce(&mut CacheDecoder<'s, 'tcx>) -> T>(
&'sess self,
tcx: TyCtxt<'tcx>,
pos: AbsoluteBytePos,
Expand All @@ -575,9 +586,10 @@ impl<'sess> OnDiskCache<'sess> {
{
let cnum_map = self.cnum_map.get_or_init(|| Self::compute_cnum_map(tcx));

let serialized_data = self.serialized_data.read();
let mut decoder = CacheDecoder {
tcx,
opaque: opaque::Decoder::new(&self.serialized_data[..], pos.to_usize()),
opaque: opaque::Decoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()),
source_map: self.source_map,
cnum_map,
file_index_to_file: &self.file_index_to_file,
Expand Down

0 comments on commit 11bbb52

Please sign in to comment.