Skip to content
This repository has been archived by the owner on Jan 15, 2025. It is now read-only.

tar/import: Handle hardlinked changes into /sysroot #408

Merged
merged 2 commits into from
Nov 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 28 additions & 5 deletions lib/src/integrationtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::path::Path;
use crate::{
container::{ocidir, ExportLayout},
container_utils::is_ostree_container,
ocidir::RawLayerWriter,
};
use anyhow::Result;
use camino::Utf8Path;
Expand Down Expand Up @@ -36,17 +37,39 @@ pub fn generate_derived_oci(
dir: impl AsRef<Utf8Path>,
tag: Option<&str>,
) -> Result<()> {
generate_derived_oci_from_tar(
src,
move |w| {
let dir = dir.as_ref();
let mut layer_tar = tar::Builder::new(w);
layer_tar.append_dir_all("./", dir.as_std_path())?;
layer_tar.finish()?;
Ok(())
},
tag,
)
}

/// Using `src` as a base, take append `dir` into OCI image.
/// Should only be enabled for testing.
#[context("Generating derived oci")]
pub fn generate_derived_oci_from_tar<F>(
src: impl AsRef<Utf8Path>,
f: F,
tag: Option<&str>,
) -> Result<()>
where
F: FnOnce(&mut RawLayerWriter) -> Result<()>,
{
let src = src.as_ref();
let src = Dir::open_ambient_dir(src, cap_std::ambient_authority())?;
let src = ocidir::OciDir::open(&src)?;
let dir = dir.as_ref();

let mut manifest = src.read_manifest()?;
let mut config: oci_spec::image::ImageConfiguration = src.read_json_blob(manifest.config())?;

let bw = src.create_raw_layer(None)?;
let mut layer_tar = tar::Builder::new(bw);
layer_tar.append_dir_all("./", dir.as_std_path())?;
let bw = layer_tar.into_inner()?;
let mut bw = src.create_raw_layer(None)?;
f(&mut bw)?;
let new_layer = bw.complete()?;

manifest.layers_mut().push(
Expand Down
2 changes: 1 addition & 1 deletion lib/src/tar/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const MAX_METADATA_SIZE: u32 = 10 * 1024 * 1024;
pub(crate) const SMALL_REGFILE_SIZE: usize = 127 * 1024;

// The prefix for filenames that contain content we actually look at.
const REPO_PREFIX: &str = "sysroot/ostree/repo/";
pub(crate) const REPO_PREFIX: &str = "sysroot/ostree/repo/";
/// Statistics from import.
#[derive(Debug, Default)]
struct ImportStats {
Expand Down
73 changes: 70 additions & 3 deletions lib/src/tar/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,19 @@
use crate::Result;
use anyhow::{anyhow, Context};
use camino::{Utf8Component, Utf8Path, Utf8PathBuf};

use cap_std_ext::cap_std;
use cap_std_ext::cmdext::CapStdExtCommandExt;
use cap_std_ext::rustix;
use once_cell::unsync::OnceCell;
use ostree::gio;
use ostree::prelude::FileExt;
use rustix::fd::FromFd;
use std::collections::BTreeMap;
use std::io::{BufWriter, Write};
use std::collections::{BTreeMap, HashMap};
use std::io::{BufWriter, Seek, Write};
use std::path::Path;
use std::process::Stdio;

use std::sync::Arc;
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite};
use tracing::instrument;
Expand Down Expand Up @@ -164,11 +168,74 @@ pub(crate) fn filter_tar(
let mut filtered = BTreeMap::new();

let ents = src.entries()?;

// Lookaside data for dealing with hardlinked files into /sysroot; see below.
let mut changed_sysroot_objects = HashMap::new();
let mut new_sysroot_link_targets = HashMap::<Utf8PathBuf, Utf8PathBuf>::new();
// A temporary directory if needed
let tmpdir = OnceCell::new();

for entry in ents {
let entry = entry?;
let mut entry = entry?;
let header = entry.header();
let path = entry.path()?;
let path: &Utf8Path = (&*path).try_into()?;

let is_modified = header.mtime().unwrap_or_default() > 0;
let is_regular = header.entry_type() == tar::EntryType::Regular;
if path.strip_prefix(crate::tar::REPO_PREFIX).is_ok() {
// If it's a modified file in /sysroot, it may be a target for future hardlinks.
// In that case, we copy the data off to a temporary file. Then the first hardlink
// to it becomes instead the real file, and any *further* hardlinks refer to that
// file instead.
if is_modified && is_regular {
tracing::debug!("Processing modified sysroot file {path}");
// Lazily allocate a temporary directory
let tmpdir = tmpdir.get_or_try_init(|| {
let vartmp = &cap_std::fs::Dir::open_ambient_dir(
"/var/tmp",
cap_std::ambient_authority(),
)?;
cap_tempfile::tempdir_in(vartmp)
})?;
// Create an O_TMPFILE (anonymous file) to use as a temporary store for the file data
let mut tmpf = cap_tempfile::TempFile::new_anonymous(tmpdir).map(BufWriter::new)?;
let path = path.to_owned();
let header = header.clone();
std::io::copy(&mut entry, &mut tmpf)?;
let mut tmpf = tmpf.into_inner()?;
tmpf.seek(std::io::SeekFrom::Start(0))?;
// Cache this data, indexed by the file path
changed_sysroot_objects.insert(path, (header, tmpf));
continue;
}
} else if header.entry_type() == tar::EntryType::Link && is_modified {
let target = header
.link_name()?
.ok_or_else(|| anyhow!("Invalid empty hardlink"))?;
let target: &Utf8Path = (&*target).try_into()?;
// If this is a hardlink into /sysroot...
if target.strip_prefix(crate::tar::REPO_PREFIX).is_ok() {
// And we found a previously processed modified file there
if let Some((mut header, data)) = changed_sysroot_objects.remove(target) {
tracing::debug!("Making {path} canonical for sysroot link {target}");
// Make *this* entry the canonical one, consuming the temporary file data
dest.append_data(&mut header, path, data)?;
// And cache this file path as the new link target
new_sysroot_link_targets.insert(target.to_owned(), path.to_owned());
} else if let Some(target) = new_sysroot_link_targets.get(path) {
tracing::debug!("Relinking {path} to {target}");
// We found a 2nd (or 3rd, etc.) link into /sysroot; rewrite the link
// target to be the first file outside of /sysroot we found.
let mut header = header.clone();
dest.append_link(&mut header, path, target)?;
} else {
tracing::debug!("Found unhandled modified link from {path} to {target}");
}
continue;
}
}

let normalized = match normalize_validate_path(path)? {
NormalizedPathResult::Filtered(path) => {
if let Some(v) = filtered.get_mut(path) {
Expand Down
89 changes: 89 additions & 0 deletions lib/tests/it/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use std::collections::{HashMap, HashSet};
use std::io::{BufReader, BufWriter};
use std::os::unix::fs::DirBuilderExt;
use std::process::Command;
use std::time::SystemTime;

use ostree_ext::fixture::{FileDef, Fixture, CONTENTS_CHECKSUM_V0, CONTENTS_V0_LEN};

Expand Down Expand Up @@ -1187,6 +1188,94 @@ async fn test_container_write_derive() -> Result<()> {
Ok(())
}

/// Test for https://github.com/ostreedev/ostree-rs-ext/issues/405
/// We need to handle the case of modified hardlinks into /sysroot
#[tokio::test]
async fn test_container_write_derive_sysroot_hardlink() -> Result<()> {
let fixture = Fixture::new_v1()?;
let baseimg = &fixture.export_container(ExportLayout::V1).await?.0;
let basepath = &match baseimg.transport {
Transport::OciDir => fixture.path.join(baseimg.name.as_str()),
_ => unreachable!(),
};

// Build a derived image
let derived_path = &fixture.path.join("derived.oci");
oci_clone(basepath, derived_path).await?;
ostree_ext::integrationtest::generate_derived_oci_from_tar(
derived_path,
|w| {
let mut tar = tar::Builder::new(w);
let objpath = Utf8Path::new("sysroot/ostree/repo/objects/60/feb13e826d2f9b62490ab24cea0f4a2d09615fb57027e55f713c18c59f4796.file");
let d = objpath.parent().unwrap();
fn mkparents<F: std::io::Write>(
t: &mut tar::Builder<F>,
path: &Utf8Path,
) -> std::io::Result<()> {
if let Some(parent) = path.parent().filter(|p| !p.as_str().is_empty()) {
mkparents(t, parent)?;
}
let mut h = tar::Header::new_gnu();
h.set_entry_type(tar::EntryType::Directory);
h.set_uid(0);
h.set_gid(0);
h.set_mode(0o755);
h.set_size(0);
t.append_data(&mut h, path, std::io::empty())
}
mkparents(&mut tar, d).context("Appending parent")?;

let now = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)?
.as_secs();
let mut h = tar::Header::new_gnu();
h.set_entry_type(tar::EntryType::Regular);
h.set_uid(0);
h.set_gid(0);
h.set_mode(0o644);
h.set_mtime(now);
let data = b"hello";
h.set_size(data.len() as u64);
tar.append_data(&mut h, objpath, std::io::Cursor::new(data))
.context("appending object")?;
let targetpath = Utf8Path::new("usr/bin/bash");
h.set_size(0);
h.set_mtime(now);
h.set_entry_type(tar::EntryType::Link);
tar.append_link(&mut h, targetpath, objpath)
.context("appending target")?;
Ok::<_, anyhow::Error>(())
},
None,
)?;
let derived_ref = &OstreeImageReference {
sigverify: SignatureSource::ContainerPolicyAllowInsecure,
imgref: ImageReference {
transport: Transport::OciDir,
name: derived_path.to_string(),
},
};
let mut imp =
store::ImageImporter::new(fixture.destrepo(), &derived_ref, Default::default()).await?;
let prep = match imp.prepare().await.context("Init prep derived")? {
store::PrepareResult::AlreadyPresent(_) => panic!("should not be already imported"),
store::PrepareResult::Ready(r) => r,
};
let import = imp.import(prep).await.unwrap();

// Verify we have the new file
bash_in!(
&fixture.dir,
r#"set -x;
ostree --repo=dest/repo ls ${r} /usr/bin/bash >/dev/null
test "$(ostree --repo=dest/repo cat ${r} /usr/bin/bash)" = "hello"
"#,
r = import.merge_commit.as_str()
)?;

Ok(())
}

#[tokio::test]
// Today rpm-ostree vendors a stable ostree-rs-ext; this test
// verifies that the old ostree-rs-ext code can parse the containers
Expand Down