From b381e0294f612d912e176ea113245349f4a5710f Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Mon, 8 Jul 2019 21:10:17 -0400 Subject: [PATCH] compose: Hash all treefile externals and flattened manifest Move hashing to the Rust side so that we can easily hash over the final set of inputs after parsing. This means that we now hash over all the externals, like `add-files` references, any `postprocess-script` script, and `passwd` and `group` files. The original motivation for this was that hashing over a reserialized version of the treefile was not deterministic now that treefiles include hash tables (i.e. `add-commit-metadata`). So I initially included each individual treefile as part of the hash. I realized afterwards that just switching to `BTreeMap` fixes this, so we can keep hashing only the final flattened reserialized treefile so we ignore comments and whitespace too. But since I already wrote the patch, and it fixes a real issue today... here we are. One notable change though is that we now hash the treefile in non-pretty mode to increase the chances that the serialized form remains stable. Ironically, this change is likely to cause a no-op commit once it gets to pipelines which iterate quickly. All for the greater good though. Closes: #1865 Approved by: cgwalters --- rust/src/treefile.rs | 67 ++++++++++++++++++++++++++++++--- src/app/rpmostree-composeutil.c | 33 ++-------------- 2 files changed, 65 insertions(+), 35 deletions(-) diff --git a/rust/src/treefile.rs b/rust/src/treefile.rs index 2e6e0ea420..4496ffb454 100644 --- a/rust/src/treefile.rs +++ b/rust/src/treefile.rs @@ -39,7 +39,7 @@ const INCLUDE_MAXDEPTH: u32 = 50; /// a TreeComposeConfig. struct TreefileExternals { postprocess_script: Option, - add_files: collections::HashMap, + add_files: collections::BTreeMap, passwd: Option, group: Option, } @@ -56,6 +56,8 @@ pub struct Treefile { rojig_spec: Option, serialized: CUtf8Buf, externals: TreefileExternals, + // This is a checksum over *all* the treefile inputs (recursed treefiles + externals). + checksum: CUtf8Buf, } // We only use this while parsing @@ -239,7 +241,7 @@ fn treefile_parse>( } else { None }; - let mut add_files: collections::HashMap = collections::HashMap::new(); + let mut add_files: BTreeMap = BTreeMap::new(); if let Some(ref add_file_names) = tf.add_files.as_ref() { for (name, _) in add_file_names.iter() { add_files.insert(name.clone(), utils::open_file(filename.with_file_name(name))?); @@ -361,9 +363,7 @@ fn treefile_merge_externals(dest: &mut TreefileExternals, src: &mut TreefileExte } // add-files is an array and hence has append semantics. - for (k, v) in src.add_files.drain() { - dest.add_files.insert(k, v); - } + dest.add_files.append(&mut src.add_files); // passwd/group are basic values if dest.passwd.is_none() { @@ -432,6 +432,13 @@ impl Treefile { (None, None) }; let serialized = Treefile::serialize_json_string(&parsed.config)?; + // Notice we hash the *reserialization* of the final flattened treefile only so that e.g. + // comments/whitespace/hash table key reorderings don't trigger a respin. We could take + // this further by using a custom `serialize_with` for Vecs where ordering doesn't matter + // (or just sort the Vecs). + let mut hasher = glib::Checksum::new(glib::ChecksumType::Sha256); + parsed.config.hasher_update(&mut hasher)?; + parsed.externals.hasher_update(&mut hasher)?; Ok(Box::new(Treefile { primary_dfd: dfd, parsed: parsed.config, @@ -440,6 +447,7 @@ impl Treefile { rojig_spec: rojig_spec, serialized: serialized, externals: parsed.externals, + checksum: CUtf8Buf::from_string(hasher.get_string().unwrap()), })) } @@ -516,6 +524,42 @@ for x in *; do mv ${{x}} %{{buildroot}}%{{_prefix}}/lib/ostree-jigdo/%{{name}}; } } +fn hash_file(hasher: &mut glib::Checksum, mut f: &fs::File) -> Fallible<()> { + let mut reader = io::BufReader::with_capacity(128 * 1024, f); + loop { + // have to scope fill_buf() so we can consume() below + let n = { + let buf = reader.fill_buf()?; + hasher.update(buf); + buf.len() + }; + if n == 0 { + break; + } + reader.consume(n); + } + f.seek(io::SeekFrom::Start(0))?; + Ok(()) +} + +impl TreefileExternals { + fn hasher_update(&self, hasher: &mut glib::Checksum) -> Fallible<()> { + if let Some(ref f) = self.postprocess_script { + hash_file(hasher, f)?; + } + if let Some(ref f) = self.passwd { + hash_file(hasher, f)?; + } + if let Some(ref f) = self.group { + hash_file(hasher, f)?; + } + for f in self.add_files.values() { + hash_file(hasher, f)?; + } + Ok(()) + } +} + /// For increased readability in YAML/JSON, we support whitespace in individual /// array elements. fn whitespace_split_packages(pkgs: &[String]) -> Vec { @@ -767,6 +811,13 @@ impl TreeComposeConfig { Ok(self) } + + fn hasher_update(&self, hasher: &mut glib::Checksum) -> Fallible<()> { + // don't use pretty mode to increase the chances of a stable serialization + // https://github.com/projectatomic/rpm-ostree/pull/1865 + hasher.update(serde_json::to_vec(self)?.as_slice()); + Ok(()) + } } #[cfg(test)] @@ -1228,6 +1279,12 @@ mod ffi { .unwrap_or(ptr::null_mut()) } + #[no_mangle] + pub extern "C" fn ror_treefile_get_checksum(tf: *mut Treefile) -> *const libc::c_char { + let tf = ref_from_raw_ptr(tf); + tf.checksum.as_ptr() + } + #[no_mangle] pub extern "C" fn ror_treefile_free(tf: *mut Treefile) { if tf.is_null() { diff --git a/src/app/rpmostree-composeutil.c b/src/app/rpmostree-composeutil.c index 2f71e24a55..702023051a 100644 --- a/src/app/rpmostree-composeutil.c +++ b/src/app/rpmostree-composeutil.c @@ -58,36 +58,9 @@ rpmostree_composeutil_checksum (HyGoal goal, { g_autoptr(GChecksum) checksum = g_checksum_new (G_CHECKSUM_SHA256); - /* Hash in the raw treefile; this means reordering the input packages - * or adding a comment will cause a recompose, but let's be conservative - * here. - */ - g_checksum_update (checksum, (guint8*)ror_treefile_get_json_string (tf), -1); - - if (json_object_has_member (treefile, "add-files")) - { - JsonArray *add_files = json_object_get_array_member (treefile, "add-files"); - guint i, len = json_array_get_length (add_files); - for (i = 0; i < len; i++) - { - JsonArray *add_el = json_array_get_array_element (add_files, i); - if (!add_el) - return glnx_throw (error, "Element in add-files is not an array"); - const char *src = _rpmostree_jsonutil_array_require_string_element (add_el, 0, error); - if (!src) - return FALSE; - - int src_fd = ror_treefile_get_add_file_fd (tf, src); - g_assert_cmpint (src_fd, !=, -1); - g_autoptr(GBytes) bytes = glnx_fd_readall_bytes (src_fd, NULL, FALSE); - gsize len; - const guint8* buf = g_bytes_get_data (bytes, &len); - g_checksum_update (checksum, (const guint8 *) buf, len); - } - - } - - /* FIXME; we should also hash the post script */ + /* Hash in the treefile inputs (this includes all externals like postprocess, add-files, + * etc... and the final flattened treefile -- see treefile.rs for more details). */ + g_checksum_update (checksum, (guint8*)ror_treefile_get_checksum (tf), -1); /* Hash in each package */ if (!rpmostree_dnf_add_checksum_goal (checksum, goal, NULL, error))