From 1fcf5ee2ff911b4b82f0f5e8bf4516b875db53ff Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Tue, 29 Oct 2024 21:26:40 -0400 Subject: [PATCH 1/3] test(depinfo): basic serialization round-trip test --- .../core/compiler/fingerprint/dep_info.rs | 56 ++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/src/cargo/core/compiler/fingerprint/dep_info.rs b/src/cargo/core/compiler/fingerprint/dep_info.rs index 2d092f43eb7..35cb322d2f2 100644 --- a/src/cargo/core/compiler/fingerprint/dep_info.rs +++ b/src/cargo/core/compiler/fingerprint/dep_info.rs @@ -84,7 +84,7 @@ pub enum DepInfoPathType { /// | len of key | key bytes | value exists? | len of value | value bytes | /// +------------+-----------+---------------+--------------+-------------+ /// ``` -#[derive(Default)] +#[derive(Default, Debug, PartialEq, Eq)] pub struct EncodedDepInfo { pub files: Vec<(DepInfoPathType, PathBuf, Option<(u64, String)>)>, pub env: Vec<(String, Option)>, @@ -614,3 +614,57 @@ pub enum InvalidChecksum { #[error("expected a string with format \"algorithm=hex_checksum\"")] InvalidFormat, } + +#[cfg(test)] +mod encoded_dep_info { + use super::*; + + #[track_caller] + fn gen_test(checksum: bool) { + let checksum = checksum.then_some((768, "c01efc669f09508b55eced32d3c88702578a7c3e".into())); + let lib_rs = ( + DepInfoPathType::TargetRootRelative, + PathBuf::from("src/lib.rs"), + checksum.clone(), + ); + + let depinfo = EncodedDepInfo { + files: vec![lib_rs.clone()], + env: Vec::new(), + }; + let data = depinfo.serialize().unwrap(); + assert_eq!(EncodedDepInfo::parse(&data).unwrap(), depinfo); + + let mod_rs = ( + DepInfoPathType::TargetRootRelative, + PathBuf::from("src/mod.rs"), + checksum.clone(), + ); + let depinfo = EncodedDepInfo { + files: vec![lib_rs.clone(), mod_rs.clone()], + env: Vec::new(), + }; + let data = depinfo.serialize().unwrap(); + assert_eq!(EncodedDepInfo::parse(&data).unwrap(), depinfo); + + let depinfo = EncodedDepInfo { + files: vec![lib_rs, mod_rs], + env: vec![ + ("Gimli".into(), Some("Legolas".into())), + ("Beren".into(), Some("LĂșthien".into())), + ], + }; + let data = depinfo.serialize().unwrap(); + assert_eq!(EncodedDepInfo::parse(&data).unwrap(), depinfo); + } + + #[test] + fn round_trip() { + gen_test(false); + } + + #[test] + fn round_trip_with_checksums() { + gen_test(true); + } +} From 6f2b5d9b76b5c07eaf45d52482e0e07b6620e41b Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Wed, 30 Oct 2024 08:43:08 -0400 Subject: [PATCH 2/3] test(depinfo): current cargo unable to parse v0 or v1 We're going to add the magic marker in the next commit. The v0 test is to ensure that we don't change anything unexpected. --- .../core/compiler/fingerprint/dep_info.rs | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/cargo/core/compiler/fingerprint/dep_info.rs b/src/cargo/core/compiler/fingerprint/dep_info.rs index 35cb322d2f2..3fcc1ccb9d4 100644 --- a/src/cargo/core/compiler/fingerprint/dep_info.rs +++ b/src/cargo/core/compiler/fingerprint/dep_info.rs @@ -667,4 +667,35 @@ mod encoded_dep_info { fn round_trip_with_checksums() { gen_test(true); } + + #[test] + fn path_type_is_u8_max() { + #[rustfmt::skip] + let data = [ + 0x01, 0x00, 0x00, 0x00, 0xff, // magic marker + 0x01, // version + 0x01, 0x00, 0x00, 0x00, // # of files + 0x00, // path type + 0x04, 0x00, 0x00, 0x00, // len of path + 0x72, 0x75, 0x73, 0x74, // path bytes ("rust") + 0x00, // cksum exists? + 0x00, 0x00, 0x00, 0x00, // # of env vars + ]; + // The current cargo doesn't recognize the magic marker. + assert!(EncodedDepInfo::parse(&data).is_none()); + } + + #[test] + fn parse_v0_fingerprint_dep_info() { + #[rustfmt::skip] + let data = [ + 0x01, 0x00, 0x00, 0x00, // # of files + 0x00, // path type + 0x04, 0x00, 0x00, 0x00, // len of path + 0x72, 0x75, 0x73, 0x74, // path bytes: "rust" + 0x00, 0x00, 0x00, 0x00, // # of env vars + ]; + // Cargo can't recognize v0 after `-Zchecksum-freshess` added. + assert!(EncodedDepInfo::parse(&data).is_none()); + } } From 634450e4b676a32eb0e382215d81657792026bec Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Wed, 30 Oct 2024 16:19:55 -0400 Subject: [PATCH 3/3] fix: track version in fingerprint dep-info files Encodes the version information into Cargo's fingerprint dep-info files, so that when the format encoding changes in the future, Cargo understands a dep-info file was outdated and doesn't bother parsing it. Since there was no version info encoded in the old format (call it v0), to be compatible with older cargoes, this PR works around it with a horrible hack. It is explained in the doc comment of `EncodedDepInfo`. --- crates/cargo-test-support/src/lib.rs | 6 ++ .../core/compiler/fingerprint/dep_info.rs | 69 +++++++++++++++++-- 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/crates/cargo-test-support/src/lib.rs b/crates/cargo-test-support/src/lib.rs index 476e185dd73..689f7208cf4 100644 --- a/crates/cargo-test-support/src/lib.rs +++ b/crates/cargo-test-support/src/lib.rs @@ -1597,6 +1597,12 @@ pub fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, assert!(files.next().is_none(), "expected only 1 dep-info file"); let dep_info = fs::read(&info_path).unwrap(); let dep_info = &mut &dep_info[..]; + + // Consume the magic marker and version. Here they don't really matter. + read_usize(dep_info); + read_u8(dep_info); + read_u8(dep_info); + let deps = (0..read_usize(dep_info)) .map(|_| { let ty = read_u8(dep_info); diff --git a/src/cargo/core/compiler/fingerprint/dep_info.rs b/src/cargo/core/compiler/fingerprint/dep_info.rs index 3fcc1ccb9d4..e8628f34ebf 100644 --- a/src/cargo/core/compiler/fingerprint/dep_info.rs +++ b/src/cargo/core/compiler/fingerprint/dep_info.rs @@ -22,6 +22,9 @@ use cargo_util::Sha256; use crate::CargoResult; use crate::CARGO_ENV; +/// The current format version of [`EncodedDepInfo`]. +const CURRENT_ENCODED_DEP_INFO_VERSION: u8 = 1; + /// The representation of the `.d` dep-info file generated by rustc #[derive(Default)] pub struct RustcDepInfo { @@ -61,20 +64,36 @@ pub enum DepInfoPathType { /// Currently the format looks like: /// /// ```text -/// +------------+------------+---------------+---------------+ -/// | # of files | file paths | # of env vars | env var pairs | -/// +------------+------------+---------------+---------------+ +/// +--------+---------+------------+------------+---------------+---------------+ +/// | marker | version | # of files | file paths | # of env vars | env var pairs | +/// +--------+---------+------------+------------+---------------+---------------+ /// ``` /// /// Each field represents /// +/// * _Marker_ --- A magic marker to ensure that older Cargoes, which only +/// recognize format v0 (prior to checksum support in [`f4ca7390`]), do not +/// proceed with parsing newer formats. Since [`EncodedDepInfo`] is merely +/// an optimization, and to avoid adding complexity, Cargo recognizes only +/// one version of [`CURRENT_ENCODED_DEP_INFO_VERSION`]. +/// The current layout looks like this +/// ```text +/// +----------------------------+ +/// | [0x01 0x00 0x00 0x00 0xff] | +/// +----------------------------+ +/// ``` +/// These bytes will be interpreted as "one file tracked and an invalid +/// [`DepInfoPathType`] variant with 255" by older Cargoes, causing them to +/// stop parsing. This could prevent problematic parsing as noted in +/// rust-lang/cargo#14712. +/// * _Version_ --- The current format version. /// * _Number of files/envs_ --- A `u32` representing the number of things. /// * _File paths_ --- Zero or more paths of files the dep-info file depends on. /// Each path is encoded as the following: /// /// ```text /// +-----------+-------------+------------+---------------+-----------+-------+ -/// | Path type | len of path | path bytes | cksum exists? | file size | cksum | +/// | path type | len of path | path bytes | cksum exists? | file size | cksum | /// +-----------+-------------+------------+---------------+-----------+-------+ /// ``` /// * _Env var pairs_ --- Zero or more env vars the dep-info file depends on. @@ -84,6 +103,8 @@ pub enum DepInfoPathType { /// | len of key | key bytes | value exists? | len of value | value bytes | /// +------------+-----------+---------------+--------------+-------------+ /// ``` +/// +/// [`f4ca7390`]: https://github.com/rust-lang/cargo/commit/f4ca739073185ea5e1148ff100bb4a06d3bf721d #[derive(Default, Debug, PartialEq, Eq)] pub struct EncodedDepInfo { pub files: Vec<(DepInfoPathType, PathBuf, Option<(u64, String)>)>, @@ -93,6 +114,12 @@ pub struct EncodedDepInfo { impl EncodedDepInfo { pub fn parse(mut bytes: &[u8]) -> Option { let bytes = &mut bytes; + read_magic_marker(bytes)?; + let version = read_u8(bytes)?; + if version != CURRENT_ENCODED_DEP_INFO_VERSION { + return None; + } + let nfiles = read_usize(bytes)?; let mut files = Vec::with_capacity(nfiles); for _ in 0..nfiles { @@ -129,6 +156,18 @@ impl EncodedDepInfo { } return Some(EncodedDepInfo { files, env }); + /// See [`EncodedDepInfo`] for why a magic marker exists. + fn read_magic_marker(bytes: &mut &[u8]) -> Option<()> { + let _size = read_usize(bytes)?; + let path_type = read_u8(bytes)?; + if path_type != u8::MAX { + // Old depinfo. Give up parsing it. + None + } else { + Some(()) + } + } + fn read_usize(bytes: &mut &[u8]) -> Option { let ret = bytes.get(..4)?; *bytes = &bytes[4..]; @@ -162,6 +201,10 @@ impl EncodedDepInfo { pub fn serialize(&self) -> CargoResult> { let mut ret = Vec::new(); let dst = &mut ret; + + write_magic_marker(dst); + dst.push(CURRENT_ENCODED_DEP_INFO_VERSION); + write_usize(dst, self.files.len()); for (ty, file, checksum_info) in self.files.iter() { match ty { @@ -189,6 +232,14 @@ impl EncodedDepInfo { } return Ok(ret); + /// See [`EncodedDepInfo`] for why a magic marker exists. + /// + /// There is an assumption that there is always at least a file. + fn write_magic_marker(dst: &mut Vec) { + write_usize(dst, 1); + dst.push(u8::MAX); + } + fn write_bytes(dst: &mut Vec, val: impl AsRef<[u8]>) { let val = val.as_ref(); write_usize(dst, val.len()); @@ -673,7 +724,7 @@ mod encoded_dep_info { #[rustfmt::skip] let data = [ 0x01, 0x00, 0x00, 0x00, 0xff, // magic marker - 0x01, // version + CURRENT_ENCODED_DEP_INFO_VERSION, // version 0x01, 0x00, 0x00, 0x00, // # of files 0x00, // path type 0x04, 0x00, 0x00, 0x00, // len of path @@ -682,7 +733,13 @@ mod encoded_dep_info { 0x00, 0x00, 0x00, 0x00, // # of env vars ]; // The current cargo doesn't recognize the magic marker. - assert!(EncodedDepInfo::parse(&data).is_none()); + assert_eq!( + EncodedDepInfo::parse(&data).unwrap(), + EncodedDepInfo { + files: vec![(DepInfoPathType::PackageRootRelative, "rust".into(), None)], + env: Vec::new(), + } + ); } #[test]