From cbaa2363775d2022bd81e9138ed4f8a04ff18ed0 Mon Sep 17 00:00:00 2001 From: Yan Song Date: Thu, 17 Mar 2022 09:15:17 +0000 Subject: [PATCH 01/12] Builder: support none whiteout-spec Add `--whiteout-spec none` option for builer command, which will build all `.wh.*` and `.wh..wh..opq` files into bootstrap. This is used for tar build, packing blob and bootstrap into a tar stream, so that correspond to the OCI tar layer one by one. Signed-off-by: Yan Song --- src/bin/nydus-image/core/node.rs | 6 ++++++ src/bin/nydus-image/main.rs | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/bin/nydus-image/core/node.rs b/src/bin/nydus-image/core/node.rs index 202d3421fc1..6f406d2f29c 100644 --- a/src/bin/nydus-image/core/node.rs +++ b/src/bin/nydus-image/core/node.rs @@ -103,6 +103,8 @@ pub enum WhiteoutSpec { Oci, /// "whiteouts and opaque directories" in https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt Overlayfs, + /// No whiteout spec, which will build all `.wh.*` and `.wh..wh..opq` files into bootstrap. + None, } impl Default for WhiteoutSpec { @@ -118,6 +120,7 @@ impl FromStr for WhiteoutSpec { match s { "oci" => Ok(Self::Oci), "overlayfs" => Ok(Self::Overlayfs), + "none" => Ok(Self::None), _ => Err(anyhow!("invalid whiteout spec")), } } @@ -1022,6 +1025,9 @@ impl Node { return Some(WhiteoutType::OverlayFsOpaque); } } + WhiteoutSpec::None => { + return None; + } } None diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index bed1a69b713..ffbc153f06c 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -277,7 +277,7 @@ fn prepare_cmd_args(bti_string: String) -> ArgMatches<'static> { .takes_value(true) .required(true) .default_value("oci") - .possible_values(&["oci", "overlayfs"]) + .possible_values(&["oci", "overlayfs", "none"]) ) .arg( Arg::with_name("output-json") From 5759787d21d48c32a2cf4a85051d430aeb397fd8 Mon Sep 17 00:00:00 2001 From: Yan Song Date: Fri, 18 Mar 2022 03:56:53 +0000 Subject: [PATCH 02/12] Builder: simplify call for bootstrap dump Match blob table by fs version in `boostrap.dump()`. Signed-off-by: Yan Song --- src/bin/nydus-image/builder/diff.rs | 11 ++--------- src/bin/nydus-image/builder/directory.rs | 6 +----- src/bin/nydus-image/builder/stargz.rs | 6 +++--- src/bin/nydus-image/core/blob_compact.rs | 10 +--------- src/bin/nydus-image/core/bootstrap.rs | 17 +++++++++++++++-- 5 files changed, 22 insertions(+), 28 deletions(-) diff --git a/src/bin/nydus-image/builder/diff.rs b/src/bin/nydus-image/builder/diff.rs index afb9693c31b..ad62fcc6131 100644 --- a/src/bin/nydus-image/builder/diff.rs +++ b/src/bin/nydus-image/builder/diff.rs @@ -136,7 +136,7 @@ use crate::core::context::{ use crate::core::node::{ChunkSource, ChunkWrapper, Node, NodeChunk, Overlay}; use crate::core::tree::Tree; use nydus_utils::digest::RafsDigest; -use rafs::metadata::layout::{RafsBlobTable, RAFS_ROOT_INODE}; +use rafs::metadata::layout::RAFS_ROOT_INODE; use rafs::metadata::{RafsInode, RafsMode, RafsSuper}; use storage::device::BlobChunkInfo; @@ -653,14 +653,7 @@ impl DiffBuilder { // Dump bootstrap file let blob_table = blob_mgr.to_blob_table(ctx)?; - match blob_table { - RafsBlobTable::V5(table) => { - bootstrap.dump_rafsv5(ctx, bootstrap_ctx, &table)?; - } - RafsBlobTable::V6(table) => { - bootstrap.dump_rafsv6(ctx, bootstrap_ctx, &table)?; - } - }; + bootstrap.dump(ctx, bootstrap_ctx, &blob_table)?; bootstrap_ctx.blobs = blob_mgr .get_blobs() .iter() diff --git a/src/bin/nydus-image/builder/directory.rs b/src/bin/nydus-image/builder/directory.rs index b7ed7e7c46d..794888a496e 100644 --- a/src/bin/nydus-image/builder/directory.rs +++ b/src/bin/nydus-image/builder/directory.rs @@ -16,7 +16,6 @@ use crate::core::context::{ }; use crate::core::node::{Node, Overlay}; use crate::core::tree::Tree; -use rafs::metadata::layout::RafsBlobTable; struct FilesystemTreeBuilder {} @@ -159,10 +158,7 @@ impl Builder for DirectoryBuilder { // Dump bootstrap file let blob_table = blob_mgr.to_blob_table(ctx)?; - match blob_table { - RafsBlobTable::V5(table) => bootstrap.dump_rafsv5(ctx, &mut bootstrap_ctx, &table)?, - RafsBlobTable::V6(table) => bootstrap.dump_rafsv6(ctx, &mut bootstrap_ctx, &table)?, - } + bootstrap.dump(ctx, &mut bootstrap_ctx, &blob_table)?; bootstrap_mgr.add(bootstrap_ctx); BuildOutput::new(&blob_mgr, &bootstrap_mgr) diff --git a/src/bin/nydus-image/builder/stargz.rs b/src/bin/nydus-image/builder/stargz.rs index c669c7486df..b569c29b310 100644 --- a/src/bin/nydus-image/builder/stargz.rs +++ b/src/bin/nydus-image/builder/stargz.rs @@ -649,10 +649,10 @@ impl Builder for StargzBuilder { // Dump bootstrap file let blob_table = blob_mgr.to_blob_table(ctx)?; - match blob_table { - RafsBlobTable::V5(table) => bootstrap.dump_rafsv5(ctx, &mut bootstrap_ctx, &table)?, - RafsBlobTable::V6(_) => todo!(), + if let RafsBlobTable::V6(_) = blob_table { + todo!(); } + bootstrap.dump(ctx, &mut bootstrap_ctx, &blob_table)?; bootstrap_mgr.add(bootstrap_ctx); BuildOutput::new(&blob_mgr, &bootstrap_mgr) diff --git a/src/bin/nydus-image/core/blob_compact.rs b/src/bin/nydus-image/core/blob_compact.rs index fec7a6b8bf5..f06ea2f08ed 100644 --- a/src/bin/nydus-image/core/blob_compact.rs +++ b/src/bin/nydus-image/core/blob_compact.rs @@ -14,7 +14,6 @@ use crate::core::tree::Tree; use anyhow::Result; use nydus_utils::digest::RafsDigest; use nydus_utils::try_round_up_4k; -use rafs::metadata::layout::RafsBlobTable; use rafs::metadata::{RafsMode, RafsSuper}; use serde::{Deserialize, Serialize}; use sha2::Digest; @@ -599,14 +598,7 @@ impl BlobCompactor { std::mem::swap(&mut bootstrap_ctx.nodes, &mut compactor.nodes); // blobs have already been dumped, dump bootstrap only let blob_table = compactor.new_blob_mgr.to_blob_table(&build_ctx)?; - match blob_table { - RafsBlobTable::V5(table) => { - bootstrap.dump_rafsv5(&mut build_ctx, &mut bootstrap_ctx, &table)?; - } - RafsBlobTable::V6(table) => { - bootstrap.dump_rafsv6(&mut build_ctx, &mut bootstrap_ctx, &table)?; - } - }; + bootstrap.dump(&mut build_ctx, &mut bootstrap_ctx, &blob_table)?; bootstrap_mgr.add(bootstrap_ctx); Ok(Some(BuildOutput::new( &compactor.new_blob_mgr, diff --git a/src/bin/nydus-image/core/bootstrap.rs b/src/bin/nydus-image/core/bootstrap.rs index 0f168aebcf8..068ccf709ee 100644 --- a/src/bin/nydus-image/core/bootstrap.rs +++ b/src/bin/nydus-image/core/bootstrap.rs @@ -17,6 +17,7 @@ use rafs::metadata::layout::v6::{ align_offset, calculate_nid, RafsV6BlobTable, RafsV6Device, RafsV6SuperBlock, RafsV6SuperBlockExt, EROFS_BLOCK_SIZE, EROFS_DEVTABLE_OFFSET, EROFS_INODE_SLOT_SIZE, }; +use rafs::metadata::layout::RafsBlobTable; use rafs::RafsIoWrite; use rafs::metadata::layout::RAFS_ROOT_INODE; @@ -364,8 +365,20 @@ impl Bootstrap { } } + pub fn dump( + &mut self, + ctx: &mut BuildContext, + bootstrap_ctx: &mut BootstrapContext, + blob_table: &RafsBlobTable, + ) -> Result<()> { + match blob_table { + RafsBlobTable::V5(table) => self.dump_rafsv5(ctx, bootstrap_ctx, &table), + RafsBlobTable::V6(table) => self.dump_rafsv6(ctx, bootstrap_ctx, &table), + } + } + /// Dump bootstrap and blob file, return (Vec, blob_size) - pub fn dump_rafsv5( + fn dump_rafsv5( &mut self, ctx: &mut BuildContext, bootstrap_ctx: &mut BootstrapContext, @@ -498,7 +511,7 @@ impl Bootstrap { } /// Dump bootstrap and blob file, return (Vec, blob_size) - pub fn dump_rafsv6( + fn dump_rafsv6( &mut self, ctx: &mut BuildContext, bootstrap_ctx: &mut BootstrapContext, From 3cbf9dadf10f985c1c013b79b35f8d11654b7768 Mon Sep 17 00:00:00 2001 From: Yan Song Date: Mon, 21 Mar 2022 10:33:31 +0000 Subject: [PATCH 03/12] Builder: support merge sub-command The `merge` sub-command merges multiple nydus boostraps (from every layer of image) to a final boostrap. Signed-off-by: Yan Song --- rafs/src/metadata/mod.rs | 2 +- src/bin/nydus-image/builder/diff.rs | 6 +- src/bin/nydus-image/core/blob_compact.rs | 3 +- src/bin/nydus-image/core/context.rs | 25 ++++++- src/bin/nydus-image/core/node.rs | 5 +- src/bin/nydus-image/core/tree.rs | 16 +++-- src/bin/nydus-image/main.rs | 31 +++++++++ src/bin/nydus-image/merge.rs | 88 ++++++++++++++++++++++++ src/bin/nydus-image/stat.rs | 9 +-- src/bin/nydus-image/validator.rs | 7 +- 10 files changed, 161 insertions(+), 31 deletions(-) create mode 100644 src/bin/nydus-image/merge.rs diff --git a/rafs/src/metadata/mod.rs b/rafs/src/metadata/mod.rs index 2360cb5f4e6..f64231d886c 100644 --- a/rafs/src/metadata/mod.rs +++ b/rafs/src/metadata/mod.rs @@ -468,7 +468,7 @@ impl RafsSuper { } /// Load Rafs super block from a metadata file. - pub fn load_from_metadata(path: &str, mode: RafsMode, validate_digest: bool) -> Result { + pub fn load_from_metadata(path: &Path, mode: RafsMode, validate_digest: bool) -> Result { // open bootstrap file let file = OpenOptions::new().read(true).write(false).open(path)?; let mut rs = RafsSuper { diff --git a/src/bin/nydus-image/builder/diff.rs b/src/bin/nydus-image/builder/diff.rs index ad62fcc6131..f242b31130f 100644 --- a/src/bin/nydus-image/builder/diff.rs +++ b/src/bin/nydus-image/builder/diff.rs @@ -854,7 +854,6 @@ pub mod tests { use super::*; use nydus_utils::exec; - use storage::RAFS_DEFAULT_CHUNK_SIZE; fn create_dir(path: &Path) { fs::create_dir_all(path).unwrap(); @@ -995,10 +994,7 @@ pub mod tests { (Overlay::UpperAddition, PathBuf::from("/test-1-symlink")), (Overlay::UpperAddition, PathBuf::from("/test-2")), ]; - let ctx = BuildContext { - chunk_size: RAFS_DEFAULT_CHUNK_SIZE as u32, - ..Default::default() - }; + let ctx = BuildContext::default(); let nodes = walk_diff(&ctx, None, lower_dir.clone(), lower_dir.clone()).unwrap(); for (i, node) in nodes.into_iter().enumerate() { println!("lower node: {:?} {:?}", node.overlay, node.target()); diff --git a/src/bin/nydus-image/core/blob_compact.rs b/src/bin/nydus-image/core/blob_compact.rs index f06ea2f08ed..24c73e89e2f 100644 --- a/src/bin/nydus-image/core/blob_compact.rs +++ b/src/bin/nydus-image/core/blob_compact.rs @@ -551,8 +551,7 @@ impl BlobCompactor { backend: Arc, cfg: &Config, ) -> Result> { - let rs = - RafsSuper::load_from_metadata(s_boostrap.to_str().unwrap(), RafsMode::Direct, true)?; + let rs = RafsSuper::load_from_metadata(&s_boostrap, RafsMode::Direct, true)?; info!("load bootstrap {:?} successfully", s_boostrap); let mut build_ctx = BuildContext::new( "".to_string(), diff --git a/src/bin/nydus-image/core/context.rs b/src/bin/nydus-image/core/context.rs index 7ad59f41b1b..fbbdf4a7418 100644 --- a/src/bin/nydus-image/core/context.rs +++ b/src/bin/nydus-image/core/context.rs @@ -678,7 +678,7 @@ impl BootstrapManager { } } -#[derive(Default, Clone)] +#[derive(Clone)] pub struct BuildContext { /// Blob id (user specified or sha256(blob)). pub blob_id: String, @@ -759,6 +759,29 @@ impl BuildContext { } } +impl Default for BuildContext { + fn default() -> Self { + Self { + blob_id: String::new(), + aligned_chunk: false, + compressor: compress::Algorithm::default(), + digester: digest::Algorithm::default(), + explicit_uidgid: true, + whiteout_spec: WhiteoutSpec::default(), + + chunk_size: RAFS_DEFAULT_CHUNK_SIZE as u32, + fs_version: RafsVersion::default(), + + source_type: SourceType::default(), + source_path: PathBuf::new(), + + prefetch: Prefetch::default(), + blob_storage: None, + has_xattr: true, + } + } +} + #[derive(Serialize, Deserialize, Default, Debug, Clone)] pub struct BuildOutputBlob { pub blob_id: String, diff --git a/src/bin/nydus-image/core/node.rs b/src/bin/nydus-image/core/node.rs index 6f406d2f29c..c56fb1613ec 100644 --- a/src/bin/nydus-image/core/node.rs +++ b/src/bin/nydus-image/core/node.rs @@ -15,7 +15,6 @@ use std::os::linux::fs::MetadataExt; use std::os::unix::ffi::OsStrExt; use std::path::{Component, Path, PathBuf}; use std::str::FromStr; -use std::sync::Arc; use anyhow::{Context, Error, Result}; use nix::sys::stat; @@ -47,7 +46,7 @@ use super::chunk_dict::ChunkDict; use super::context::{BlobContext, BootstrapContext, BuildContext, RafsVersion}; use super::tree::Tree; -const ROOT_PATH_NAME: &[u8] = &[b'/']; +pub const ROOT_PATH_NAME: &[u8] = &[b'/']; /// Prefix for OCI whiteout file. pub const OCISPEC_WHITEOUT_PREFIX: &str = ".wh."; @@ -1199,7 +1198,7 @@ impl InodeWrapper { } } - pub fn from_inode_info(inode: &Arc) -> Self { + pub fn from_inode_info(inode: &dyn RafsInode) -> Self { if let Some(inode) = inode.as_any().downcast_ref::() { InodeWrapper::V5(to_rafsv5_inode(inode)) } else if let Some(inode) = inode.as_any().downcast_ref::() { diff --git a/src/bin/nydus-image/core/tree.rs b/src/bin/nydus-image/core/tree.rs index 7a65f8e0a4c..fe5355f1412 100644 --- a/src/bin/nydus-image/core/tree.rs +++ b/src/bin/nydus-image/core/tree.rs @@ -17,8 +17,8 @@ use std::ffi::OsStr; use std::ffi::OsString; +use std::os::unix::ffi::OsStrExt; use std::path::PathBuf; -use std::sync::Arc; use anyhow::Result; use rafs::metadata::layout::{bytes_to_os_str, RafsXAttrs, RAFS_ROOT_INODE}; @@ -27,6 +27,7 @@ use rafs::metadata::{Inode, RafsInode, RafsSuper}; use super::chunk_dict::ChunkDict; use super::node::{ ChunkSource, ChunkWrapper, InodeWrapper, Node, NodeChunk, Overlay, WhiteoutSpec, WhiteoutType, + ROOT_PATH_NAME, }; /// An in-memory tree structure to maintain information and topology of filesystem nodes. @@ -51,7 +52,8 @@ impl Tree { pub fn from_bootstrap(rs: &RafsSuper, chunk_dict: &mut T) -> Result { let tree_builder = MetadataTreeBuilder::new(&rs); let root_inode = rs.get_inode(RAFS_ROOT_INODE, true)?; - let root_node = tree_builder.parse_node(root_inode, PathBuf::from("/"))?; + let root_node = + MetadataTreeBuilder::parse_node(&rs, root_inode.as_ref(), PathBuf::from("/"))?; let mut tree = Tree::new(root_node); tree.children = timing_tracer!( @@ -240,7 +242,7 @@ impl Tree { } } -struct MetadataTreeBuilder<'a> { +pub struct MetadataTreeBuilder<'a> { rs: &'a RafsSuper, } @@ -277,7 +279,7 @@ impl<'a> MetadataTreeBuilder<'a> { let child = inode.get_child_by_index(idx)?; let child_ino = child.ino(); let child_path = parent_path.join(child.name()); - let child = self.parse_node(child, child_path)?; + let child = Self::parse_node(&self.rs, child.as_ref(), child_path)?; if child.is_reg() { for chunk in &child.chunks { @@ -297,7 +299,7 @@ impl<'a> MetadataTreeBuilder<'a> { } /// Convert a `RafsInode` object to an in-memory `Node` object. - fn parse_node(&self, inode: Arc, path: PathBuf) -> Result { + pub fn parse_node(rs: &RafsSuper, inode: &dyn RafsInode, path: PathBuf) -> Result { let chunks = if inode.is_reg() { let chunk_count = inode.get_chunk_count(); let mut chunks = Vec::with_capacity(chunk_count as usize); @@ -330,7 +332,7 @@ impl<'a> MetadataTreeBuilder<'a> { // to avoid breaking hardlink detecting logic. let src_dev = u64::MAX; - let inode_wrapper = InodeWrapper::from_inode_info(&inode); + let inode_wrapper = InodeWrapper::from_inode_info(inode); let source = PathBuf::from("/"); let target = Node::generate_target(&path, &source); let target_vec = Node::generate_target_vec(&target); @@ -341,7 +343,7 @@ impl<'a> MetadataTreeBuilder<'a> { src_dev, rdev: inode.rdev() as u64, overlay: Overlay::Lower, - explicit_uidgid: self.rs.meta.explicit_uidgid(), + explicit_uidgid: rs.meta.explicit_uidgid(), source, target, path, diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index ffbc153f06c..dce9162b3ba 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -38,6 +38,7 @@ use crate::core::context::{ use crate::core::node::{self, WhiteoutSpec}; use crate::core::prefetch::Prefetch; use crate::core::tree; +use crate::merge::Merger; use crate::trace::{EventTracerClass, TimingTracerClass, TraceClass}; use crate::validator::Validator; use storage::factory::{BackendConfig, BlobFactory}; @@ -47,6 +48,7 @@ mod trace; mod builder; mod core; mod inspect; +mod merge; mod stat; mod validator; @@ -322,6 +324,24 @@ fn prepare_cmd_args(bti_string: String) -> ArgMatches<'static> { .takes_value(true) ) ) + .subcommand( + SubCommand::with_name("merge") + .about("Merge multiple bootstraps into a overlaid bootstrap") + .arg( + Arg::with_name("bootstrap") + .long("bootstrap") + .short("B") + .help("output path of nydus overlaid bootstrap") + .required(true) + .takes_value(true), + ) + .arg( + Arg::with_name("SOURCE") + .help("bootstrap paths (allow one or more)") + .required(true) + .multiple(true), + ) + ) .subcommand( SubCommand::with_name("check") .about("Validates nydus image's filesystem metadata") @@ -505,6 +525,8 @@ fn main() -> Result<()> { if let Some(matches) = cmd.subcommand_matches("create") { Command::create(matches, &build_info) + } else if let Some(matches) = cmd.subcommand_matches("merge") { + Command::merge(matches) } else if let Some(matches) = cmd.subcommand_matches("check") { Command::check(matches, &build_info) } else if let Some(matches) = cmd.subcommand_matches("inspect") { @@ -650,6 +672,15 @@ impl Command { Ok(()) } + fn merge(matches: &clap::ArgMatches) -> Result<()> { + let source_bootstrap_paths: Vec = matches + .values_of("SOURCE") + .map(|paths| paths.map(PathBuf::from).collect()) + .unwrap(); + let target_bootstrap_path = Self::get_bootstrap(&matches)?; + Merger::merge(source_bootstrap_paths, target_bootstrap_path.to_path_buf()) + } + fn compact(matches: &clap::ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { let bootstrap_path = PathBuf::from(Self::get_bootstrap(matches)?); let dst_bootstrap = match matches.value_of("output-bootstrap") { diff --git a/src/bin/nydus-image/merge.rs b/src/bin/nydus-image/merge.rs new file mode 100644 index 00000000000..e9901d3505e --- /dev/null +++ b/src/bin/nydus-image/merge.rs @@ -0,0 +1,88 @@ +// Copyright (C) 2022 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::path::{Path, PathBuf}; + +use anyhow::Result; + +use rafs::metadata::layout::RAFS_ROOT_INODE; +use rafs::metadata::{RafsInode, RafsMode, RafsSuper}; + +use crate::core::bootstrap::Bootstrap; +use crate::core::chunk_dict::HashChunkDict; +use crate::core::context::ArtifactStorage; +use crate::core::context::{BlobContext, BlobManager, BootstrapContext, BuildContext}; +use crate::core::node::{ChunkSource, Overlay, WhiteoutSpec}; +use crate::core::tree::{MetadataTreeBuilder, Tree}; + +pub struct Merger {} + +impl Merger { + pub fn merge(sources: Vec, target: PathBuf) -> Result<()> { + if sources.is_empty() { + bail!("please provide at least one source bootstrap"); + } + + let mut dict = HashChunkDict::default(); + + let mut tree: Option = None; + let mut blob_mgr = BlobManager::new(); + + let mut blob_idx = 0u32; + for source in sources { + let rs = RafsSuper::load_from_metadata(&source, RafsMode::Direct, true)?; + + let blobs = rs.superblock.get_blob_infos(); + if blobs.len() > 0 { + let mut blob_ctx = BlobContext::from(blobs[0].as_ref(), ChunkSource::Parent); + let blob_id = source.file_name().unwrap().to_str().unwrap(); + blob_ctx.blob_id = blob_id.to_owned(); + blob_mgr.add(blob_ctx); + } + + if let Some(tree) = &mut tree { + let mut nodes = Vec::new(); + rs.walk_inodes(RAFS_ROOT_INODE, None, &mut |inode: &dyn RafsInode, + path: &Path| + -> Result<()> { + let mut node = MetadataTreeBuilder::parse_node(&rs, inode, path.to_path_buf())?; + for chunk in &mut node.chunks { + chunk.inner.set_blob_index(blob_idx); + } + node.overlay = Overlay::UpperAddition; + match node.whiteout_type(WhiteoutSpec::Oci) { + Some(_) => { + nodes.insert(0, node.clone()); + } + _ => { + nodes.push(node.clone()); + } + } + Ok(()) + })?; + for node in &nodes { + tree.apply(node, true, WhiteoutSpec::Oci)?; + } + } else { + tree = Some(Tree::from_bootstrap(&rs, &mut dict)?); + } + + if blobs.len() > 0 { + blob_idx += 1; + } + } + + // Safe to unwrap because source bootstrap is at least one. + let mut tree = tree.unwrap(); + let mut bootstrap = Bootstrap::new()?; + let storage = ArtifactStorage::SingleFile(target.clone()); + let mut bootstrap_ctx = BootstrapContext::new(storage, false)?; + let mut ctx = BuildContext::default(); + bootstrap.build(&mut ctx, &mut bootstrap_ctx, &mut tree)?; + let blob_table = blob_mgr.to_blob_table(&ctx)?; + bootstrap.dump(&mut ctx, &mut bootstrap_ctx, &blob_table)?; + + Ok(()) + } +} diff --git a/src/bin/nydus-image/stat.rs b/src/bin/nydus-image/stat.rs index 8ff25d29973..a067308a109 100644 --- a/src/bin/nydus-image/stat.rs +++ b/src/bin/nydus-image/stat.rs @@ -7,7 +7,7 @@ use std::fs::OpenOptions; use std::path::Path; use std::sync::atomic::Ordering; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use rafs::metadata::{RafsMode, RafsSuper}; use serde::Serialize; @@ -158,12 +158,7 @@ impl ImageStat { } pub fn stat(&mut self, path: &Path, is_base: bool) -> Result<()> { - let p = match path.to_str() { - None => bail!("invalid path to nydus image metadata blob"), - Some(v) => v, - }; - - let rs = RafsSuper::load_from_metadata(p, RafsMode::Direct, false)?; + let rs = RafsSuper::load_from_metadata(path, RafsMode::Direct, false)?; let mut dict = HashChunkDict::default(); let mut hardlinks = HashSet::new(); let tree = diff --git a/src/bin/nydus-image/validator.rs b/src/bin/nydus-image/validator.rs index dfa31ff871d..eaf2082d5ff 100644 --- a/src/bin/nydus-image/validator.rs +++ b/src/bin/nydus-image/validator.rs @@ -6,7 +6,7 @@ use std::path::Path; -use anyhow::{Context, Error, Result}; +use anyhow::{Context, Result}; use rafs::metadata::{RafsMode, RafsSuper}; use crate::tree::Tree; @@ -17,10 +17,7 @@ pub struct Validator { impl Validator { pub fn new(bootstrap_path: &Path) -> Result { - let path = bootstrap_path - .to_str() - .ok_or_else(|| Error::msg("bootstrap path is invalid"))?; - let sb = RafsSuper::load_from_metadata(path, RafsMode::Direct, true)?; + let sb = RafsSuper::load_from_metadata(bootstrap_path, RafsMode::Direct, true)?; Ok(Self { sb }) } From 0c5e6f6bfad60e8c1c3b36ee86e5bf3f3d25fd13 Mon Sep 17 00:00:00 2001 From: Yan Song Date: Mon, 21 Mar 2022 11:24:41 +0000 Subject: [PATCH 04/12] Builder: support compressed blob offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nydus tar build converts a OCI formatted tar stream to a nydus formatted tar stream. The nydus blob tar stream contains blob and bootstrap files with the following file tree structure: /image ├── image.blob ├── image.boot So for the chunk of files in the nydus boostreap, a blob compressed offset of 1024 (size_of(tar_header) * 2) is required. Add `--blob-offset` option to builder to support this case. Signed-off-by: Yan Song --- src/bin/nydus-image/builder/diff.rs | 2 +- src/bin/nydus-image/builder/directory.rs | 6 +++++- src/bin/nydus-image/builder/stargz.rs | 2 +- src/bin/nydus-image/core/blob_compact.rs | 3 ++- src/bin/nydus-image/core/context.rs | 23 ++++++++++++++++++----- src/bin/nydus-image/main.rs | 16 ++++++++++++++++ 6 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/bin/nydus-image/builder/diff.rs b/src/bin/nydus-image/builder/diff.rs index f242b31130f..b58c1132aa7 100644 --- a/src/bin/nydus-image/builder/diff.rs +++ b/src/bin/nydus-image/builder/diff.rs @@ -305,7 +305,7 @@ fn dump_blob( blob_nodes: &mut Vec, chunk_dict: Arc, ) -> Result<(Option, ChunkMap)> { - let mut blob_ctx = BlobContext::new(blob_id, blob_storage)?; + let mut blob_ctx = BlobContext::new(blob_id, blob_storage, ctx.blob_offset)?; blob_ctx.set_chunk_dict(chunk_dict); blob_ctx.set_chunk_size(ctx.chunk_size); blob_ctx.set_meta_info_enabled(ctx.fs_version == RafsVersion::V6); diff --git a/src/bin/nydus-image/builder/directory.rs b/src/bin/nydus-image/builder/directory.rs index 794888a496e..23a1d825dee 100644 --- a/src/bin/nydus-image/builder/directory.rs +++ b/src/bin/nydus-image/builder/directory.rs @@ -130,7 +130,11 @@ impl Builder for DirectoryBuilder { )?; // Dump blob file - let mut blob_ctx = BlobContext::new(ctx.blob_id.clone(), ctx.blob_storage.clone())?; + let mut blob_ctx = BlobContext::new( + ctx.blob_id.clone(), + ctx.blob_storage.clone(), + ctx.blob_offset, + )?; blob_ctx.set_chunk_dict(blob_mgr.get_chunk_dict()); blob_ctx.set_chunk_size(ctx.chunk_size); blob_ctx.set_meta_info_enabled(ctx.fs_version == RafsVersion::V6); diff --git a/src/bin/nydus-image/builder/stargz.rs b/src/bin/nydus-image/builder/stargz.rs index b569c29b310..66134370be5 100644 --- a/src/bin/nydus-image/builder/stargz.rs +++ b/src/bin/nydus-image/builder/stargz.rs @@ -574,7 +574,7 @@ impl StargzBuilder { let mut decompressed_blob_size = 0u64; let mut compressed_blob_size = 0u64; let blob_index = blob_mgr.alloc_index()?; - let mut blob_ctx = BlobContext::new(ctx.blob_id.clone(), ctx.blob_storage.clone())?; + let mut blob_ctx = BlobContext::new(ctx.blob_id.clone(), ctx.blob_storage.clone(), 0)?; blob_ctx.set_chunk_dict(blob_mgr.get_chunk_dict()); blob_ctx.set_chunk_size(ctx.chunk_size); diff --git a/src/bin/nydus-image/core/blob_compact.rs b/src/bin/nydus-image/core/blob_compact.rs index 24c73e89e2f..f6d93ecd49d 100644 --- a/src/bin/nydus-image/core/blob_compact.rs +++ b/src/bin/nydus-image/core/blob_compact.rs @@ -516,7 +516,7 @@ impl BlobCompactor { } State::Rebuild(cs) => { let blob_storage = ArtifactStorage::FileDir(PathBuf::from(dir)); - let mut blob_ctx = BlobContext::new(String::from(""), Some(blob_storage))?; + let mut blob_ctx = BlobContext::new(String::from(""), Some(blob_storage), 0)?; blob_ctx.set_meta_info_enabled(self.is_v6()); let blob_idx = self.new_blob_mgr.alloc_index()?; let new_chunks = cs.dump( @@ -556,6 +556,7 @@ impl BlobCompactor { let mut build_ctx = BuildContext::new( "".to_string(), false, + 0, rs.meta.get_compressor(), rs.meta.get_digester(), rs.meta.explicit_uidgid(), diff --git a/src/bin/nydus-image/core/context.rs b/src/bin/nydus-image/core/context.rs index fbbdf4a7418..1d1c5f1cceb 100644 --- a/src/bin/nydus-image/core/context.rs +++ b/src/bin/nydus-image/core/context.rs @@ -284,18 +284,22 @@ impl Clone for BlobContext { } impl BlobContext { - pub fn new(blob_id: String, blob_stor: Option) -> Result { + pub fn new( + blob_id: String, + blob_stor: Option, + blob_offset: u64, + ) -> Result { let writer = if let Some(blob_stor) = blob_stor { Some(ArtifactBufferWriter::new(blob_stor)?) } else { None }; - Ok(Self::new_with_writer(blob_id, writer)) + Ok(Self::new_with_writer(blob_id, writer, blob_offset)) } pub fn from(blob: &BlobInfo, chunk_source: ChunkSource) -> Self { - let mut ctx = Self::new_with_writer(blob.blob_id().to_owned(), None); + let mut ctx = Self::new_with_writer(blob.blob_id().to_owned(), None, 0); ctx.blob_readahead_size = blob.readahead_size(); ctx.chunk_count = blob.chunk_count(); @@ -320,7 +324,11 @@ impl BlobContext { ctx } - pub fn new_with_writer(blob_id: String, writer: Option) -> Self { + pub fn new_with_writer( + blob_id: String, + writer: Option, + blob_offset: u64, + ) -> Self { let size = if writer.is_some() { RAFS_MAX_CHUNK_SIZE as usize } else { @@ -339,7 +347,7 @@ impl BlobContext { compressed_blob_size: 0, decompressed_blob_size: 0, - compress_offset: 0, + compress_offset: blob_offset, decompress_offset: 0, chunk_count: 0, @@ -687,6 +695,8 @@ pub struct BuildContext { /// `decompress_offset` within chunk info. Therefore, provide a new flag /// to image tool thus to align chunks in blob with 4k size. pub aligned_chunk: bool, + /// Add a offset for compressed blob. + pub blob_offset: u64, /// Blob chunk compress flag. pub compressor: compress::Algorithm, /// Inode and chunk digest algorithm flag. @@ -721,6 +731,7 @@ impl BuildContext { pub fn new( blob_id: String, aligned_chunk: bool, + blob_offset: u64, compressor: compress::Algorithm, digester: digest::Algorithm, explicit_uidgid: bool, @@ -733,6 +744,7 @@ impl BuildContext { BuildContext { blob_id, aligned_chunk, + blob_offset, compressor, digester, explicit_uidgid, @@ -764,6 +776,7 @@ impl Default for BuildContext { Self { blob_id: String::new(), aligned_chunk: false, + blob_offset: 0, compressor: compress::Algorithm::default(), digester: digest::Algorithm::default(), explicit_uidgid: true, diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index dce9162b3ba..cda44e2cda2 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -295,6 +295,13 @@ fn prepare_cmd_args(bti_string: String) -> ArgMatches<'static> { .help("Align data chunks to 4K") .takes_value(false) ) + .arg( + Arg::with_name("blob-offset") + .long("blob-offset") + .help("add an offset for compressed blob (is only used to put the blob in the tarball)") + .default_value("0") + .takes_value(true) + ) .arg( Arg::with_name("blob-dir") .long("blob-dir") @@ -547,6 +554,7 @@ impl Command { fn create(matches: &clap::ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { let blob_id = Self::get_blob_id(&matches)?; let chunk_size = Self::get_chunk_size(&matches)?; + let blob_offset = Self::get_blob_offset(&matches)?; let parent_bootstrap = Self::get_parent_bootstrap(&matches)?; let source_path = PathBuf::from(matches.value_of("SOURCE").unwrap()); let extra_paths: Vec = matches @@ -600,6 +608,7 @@ impl Command { let mut build_ctx = BuildContext::new( blob_id, aligned_chunk, + blob_offset, compressor, digester, !repeatable, @@ -918,6 +927,13 @@ impl Command { } } + fn get_blob_offset(matches: &clap::ArgMatches) -> Result { + match matches.value_of("blob-offset") { + None => Ok(0), + Some(v) => u64::from_str_radix(v, 10).context(format!("invalid blob offset {}", v)), + } + } + fn get_fs_version(matches: &clap::ArgMatches) -> Result { match matches.value_of("fs-version") { None => Ok(RafsVersion::V6), From 2b9c61fc63f2fcba3de46bb58b77bbafe7dcf7d4 Mon Sep 17 00:00:00 2001 From: Yan Song Date: Wed, 23 Mar 2022 03:39:00 +0000 Subject: [PATCH 05/12] Builder: support chunk dict for merge Add `--chunk-dict` option support for merge sub-command, We need get blob list from chunk-dict bootstrap to fill blob table in final bootstrap. Signed-off-by: Yan Song --- rafs/src/metadata/mod.rs | 2 +- src/bin/nydus-image/core/chunk_dict.rs | 23 +++++++++----- src/bin/nydus-image/main.rs | 20 ++++++++++-- src/bin/nydus-image/merge.rs | 43 +++++++++++++++++++------- 4 files changed, 65 insertions(+), 23 deletions(-) diff --git a/rafs/src/metadata/mod.rs b/rafs/src/metadata/mod.rs index f64231d886c..8aca32593f5 100644 --- a/rafs/src/metadata/mod.rs +++ b/rafs/src/metadata/mod.rs @@ -483,7 +483,7 @@ impl RafsSuper { Ok(rs) } - pub fn load_chunk_dict_from_metadata(path: &str) -> Result { + pub fn load_chunk_dict_from_metadata(path: &Path) -> Result { // open bootstrap file let file = OpenOptions::new().read(true).write(false).open(path)?; let mut rs = RafsSuper { diff --git a/src/bin/nydus-image/core/chunk_dict.rs b/src/bin/nydus-image/core/chunk_dict.rs index a7cf97abac0..85b0dda51c2 100644 --- a/src/bin/nydus-image/core/chunk_dict.rs +++ b/src/bin/nydus-image/core/chunk_dict.rs @@ -4,6 +4,7 @@ use std::collections::{BTreeMap, HashMap}; use std::mem::size_of; +use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, Mutex}; @@ -93,7 +94,7 @@ impl ChunkDict for HashChunkDict { } impl HashChunkDict { - fn from_bootstrap_file(path: &str) -> Result { + fn from_bootstrap_file(path: &Path) -> Result { let rs = RafsSuper::load_chunk_dict_from_metadata(path) .with_context(|| format!("failed to open bootstrap file {:?}", path))?; let mut d = HashChunkDict { @@ -138,7 +139,7 @@ impl HashChunkDict { } } -/// Load a chunk dictionary from external source. +/// Parse a chunk dictionary argument string. /// /// # Argument /// `arg` may be in inform of: @@ -150,22 +151,28 @@ impl HashChunkDict { /// image.boot /// ~/image/image.boot /// boltdb=/var/db/dict.db (not supported yet) -pub(crate) fn import_chunk_dict(arg: &str) -> Result> { +pub fn parse_chunk_dict_arg(arg: &str) -> Result { let (file_type, file_path) = match arg.find('=') { None => ("bootstrap", arg), Some(idx) => (&arg[0..idx], &arg[idx + 1..]), }; - info!("import chunk dict file {}={}", file_type, file_path); + info!("parse chunk dict argument {}={}", file_type, file_path); + match file_type { - "bootstrap" => { - HashChunkDict::from_bootstrap_file(file_path).map(|d| Arc::new(d) as Arc) + "bootstrap" => Ok(PathBuf::from(file_path)), + _ => { + bail!("invalid chunk dict type {}", file_type); } - _ => Err(std::io::Error::from_raw_os_error(libc::EINVAL)) - .with_context(|| format!("invalid chunk dict type {}", file_type)), } } +/// Load a chunk dictionary from external source. +pub(crate) fn import_chunk_dict(arg: &str) -> Result> { + let file_path = parse_chunk_dict_arg(arg)?; + HashChunkDict::from_bootstrap_file(&file_path).map(|d| Arc::new(d) as Arc) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index cda44e2cda2..4b811f3da31 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -30,7 +30,7 @@ use storage::RAFS_DEFAULT_CHUNK_SIZE; use crate::builder::{Builder, DiffBuilder, DirectoryBuilder, StargzBuilder}; use crate::core::blob_compact::BlobCompactor; -use crate::core::chunk_dict::import_chunk_dict; +use crate::core::chunk_dict::{import_chunk_dict, parse_chunk_dict_arg}; use crate::core::context::{ ArtifactStorage, BlobManager, BootstrapManager, BuildContext, BuildOutput, BuildOutputArtifact, RafsVersion, SourceType, @@ -342,6 +342,13 @@ fn prepare_cmd_args(bti_string: String) -> ArgMatches<'static> { .required(true) .takes_value(true), ) + .arg( + Arg::with_name("chunk-dict") + .long("chunk-dict") + .short("M") + .help("Specify a chunk dictionary for chunk deduplication") + .takes_value(true) + ) .arg( Arg::with_name("SOURCE") .help("bootstrap paths (allow one or more)") @@ -687,7 +694,16 @@ impl Command { .map(|paths| paths.map(PathBuf::from).collect()) .unwrap(); let target_bootstrap_path = Self::get_bootstrap(&matches)?; - Merger::merge(source_bootstrap_paths, target_bootstrap_path.to_path_buf()) + let chunk_dict_path = if let Some(arg) = matches.value_of("chunk-dict") { + Some(parse_chunk_dict_arg(arg)?) + } else { + None + }; + Merger::merge( + source_bootstrap_paths, + target_bootstrap_path.to_path_buf(), + chunk_dict_path, + ) } fn compact(matches: &clap::ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { diff --git a/src/bin/nydus-image/merge.rs b/src/bin/nydus-image/merge.rs index e9901d3505e..5451a10d5d3 100644 --- a/src/bin/nydus-image/merge.rs +++ b/src/bin/nydus-image/merge.rs @@ -2,6 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 +use std::collections::HashSet; use std::path::{Path, PathBuf}; use anyhow::Result; @@ -19,7 +20,11 @@ use crate::core::tree::{MetadataTreeBuilder, Tree}; pub struct Merger {} impl Merger { - pub fn merge(sources: Vec, target: PathBuf) -> Result<()> { + pub fn merge( + sources: Vec, + target: PathBuf, + chunk_dict: Option, + ) -> Result<()> { if sources.is_empty() { bail!("please provide at least one source bootstrap"); } @@ -28,16 +33,33 @@ impl Merger { let mut tree: Option = None; let mut blob_mgr = BlobManager::new(); + let mut chunk_dict_blobs = HashSet::new(); - let mut blob_idx = 0u32; for source in sources { let rs = RafsSuper::load_from_metadata(&source, RafsMode::Direct, true)?; - let blobs = rs.superblock.get_blob_infos(); - if blobs.len() > 0 { - let mut blob_ctx = BlobContext::from(blobs[0].as_ref(), ChunkSource::Parent); - let blob_id = source.file_name().unwrap().to_str().unwrap(); - blob_ctx.blob_id = blob_id.to_owned(); + + if let Some(path) = &chunk_dict { + let rs = RafsSuper::load_from_metadata(&path, RafsMode::Direct, true)?; + for blob in rs.superblock.get_blob_infos() { + chunk_dict_blobs.insert(blob.blob_id().to_string()); + } + } + + let blob_id = source.file_name().unwrap().to_str().unwrap(); + + let mut blob_idx_map = Vec::new(); + let mut parent_blob_added = false; + for blob in &blobs { + let mut blob_ctx = BlobContext::from(blob, ChunkSource::Parent); + if chunk_dict_blobs.get(blob.blob_id()).is_none() { + if parent_blob_added { + bail!("invalid bootstrap, seems have multiple non-chunk-dict blobs in this bootstrap"); + } + blob_ctx.blob_id = blob_id.to_owned(); + parent_blob_added = true; + } + blob_idx_map.push(blob_mgr.len() as u32); blob_mgr.add(blob_ctx); } @@ -48,7 +70,8 @@ impl Merger { -> Result<()> { let mut node = MetadataTreeBuilder::parse_node(&rs, inode, path.to_path_buf())?; for chunk in &mut node.chunks { - chunk.inner.set_blob_index(blob_idx); + let origin_blob_index = chunk.inner.blob_index() as usize; + chunk.inner.set_blob_index(blob_idx_map[origin_blob_index]); } node.overlay = Overlay::UpperAddition; match node.whiteout_type(WhiteoutSpec::Oci) { @@ -67,10 +90,6 @@ impl Merger { } else { tree = Some(Tree::from_bootstrap(&rs, &mut dict)?); } - - if blobs.len() > 0 { - blob_idx += 1; - } } // Safe to unwrap because source bootstrap is at least one. From d5a2527e52dd8d00dcb5c5b13feb379226138370 Mon Sep 17 00:00:00 2001 From: Yan Song Date: Wed, 23 Mar 2022 08:05:59 +0000 Subject: [PATCH 06/12] Builder: support --prefetch-policy option for merge Add `--prefetch-policy` option support for merge sub-command, this option has the same function as in the create sub-command, it is used to enable prefetch to optimize inode/chunk layout. Signed-off-by: Yan Song --- src/bin/nydus-image/main.rs | 28 +++++++++++++++++++++++----- src/bin/nydus-image/merge.rs | 6 +++--- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index 4b811f3da31..7989ce92079 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -349,6 +349,16 @@ fn prepare_cmd_args(bti_string: String) -> ArgMatches<'static> { .help("Specify a chunk dictionary for chunk deduplication") .takes_value(true) ) + .arg( + Arg::with_name("prefetch-policy") + .long("prefetch-policy") + .short("P") + .help("prefetch policy:") + .takes_value(true) + .required(false) + .default_value("none") + .possible_values(&["fs", "blob", "none"]), + ) .arg( Arg::with_name("SOURCE") .help("bootstrap paths (allow one or more)") @@ -606,11 +616,7 @@ impl Command { } } - let prefetch_policy = matches - .value_of("prefetch-policy") - .unwrap_or_default() - .parse()?; - let prefetch = Prefetch::new(prefetch_policy)?; + let prefetch = Self::get_prefetch(matches)?; let mut build_ctx = BuildContext::new( blob_id, @@ -699,7 +705,11 @@ impl Command { } else { None }; + let prefetch = Self::get_prefetch(matches)?; + let mut ctx = BuildContext::default(); + ctx.prefetch = prefetch; Merger::merge( + &mut ctx, source_bootstrap_paths, target_bootstrap_path.to_path_buf(), chunk_dict_path, @@ -943,6 +953,14 @@ impl Command { } } + fn get_prefetch(matches: &clap::ArgMatches) -> Result { + let prefetch_policy = matches + .value_of("prefetch-policy") + .unwrap_or_default() + .parse()?; + Prefetch::new(prefetch_policy) + } + fn get_blob_offset(matches: &clap::ArgMatches) -> Result { match matches.value_of("blob-offset") { None => Ok(0), diff --git a/src/bin/nydus-image/merge.rs b/src/bin/nydus-image/merge.rs index 5451a10d5d3..982faefa593 100644 --- a/src/bin/nydus-image/merge.rs +++ b/src/bin/nydus-image/merge.rs @@ -21,6 +21,7 @@ pub struct Merger {} impl Merger { pub fn merge( + ctx: &mut BuildContext, sources: Vec, target: PathBuf, chunk_dict: Option, @@ -97,10 +98,9 @@ impl Merger { let mut bootstrap = Bootstrap::new()?; let storage = ArtifactStorage::SingleFile(target.clone()); let mut bootstrap_ctx = BootstrapContext::new(storage, false)?; - let mut ctx = BuildContext::default(); - bootstrap.build(&mut ctx, &mut bootstrap_ctx, &mut tree)?; + bootstrap.build(ctx, &mut bootstrap_ctx, &mut tree)?; let blob_table = blob_mgr.to_blob_table(&ctx)?; - bootstrap.dump(&mut ctx, &mut bootstrap_ctx, &blob_table)?; + bootstrap.dump(ctx, &mut bootstrap_ctx, &blob_table)?; Ok(()) } From 7811748b8661e8dfe1added36d5a319d17e8078b Mon Sep 17 00:00:00 2001 From: Yan Song Date: Wed, 23 Mar 2022 10:02:30 +0000 Subject: [PATCH 07/12] Builder: tidy code for merge implementation Signed-off-by: Yan Song --- src/bin/nydus-image/core/tree.rs | 2 -- src/bin/nydus-image/main.rs | 15 +++++---- src/bin/nydus-image/merge.rs | 54 +++++++++++++++++++++----------- 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/src/bin/nydus-image/core/tree.rs b/src/bin/nydus-image/core/tree.rs index fe5355f1412..65d972fd721 100644 --- a/src/bin/nydus-image/core/tree.rs +++ b/src/bin/nydus-image/core/tree.rs @@ -17,7 +17,6 @@ use std::ffi::OsStr; use std::ffi::OsString; -use std::os::unix::ffi::OsStrExt; use std::path::PathBuf; use anyhow::Result; @@ -27,7 +26,6 @@ use rafs::metadata::{Inode, RafsInode, RafsSuper}; use super::chunk_dict::ChunkDict; use super::node::{ ChunkSource, ChunkWrapper, InodeWrapper, Node, NodeChunk, Overlay, WhiteoutSpec, WhiteoutType, - ROOT_PATH_NAME, }; /// An in-memory tree structure to maintain information and topology of filesystem nodes. diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index 7989ce92079..24ee0206f66 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -250,7 +250,7 @@ fn prepare_cmd_args(bti_string: String) -> ArgMatches<'static> { Arg::with_name("prefetch-policy") .long("prefetch-policy") .short("P") - .help("prefetch policy:") + .help("blob data prefetch policy") .takes_value(true) .required(false) .default_value("none") @@ -353,7 +353,7 @@ fn prepare_cmd_args(bti_string: String) -> ArgMatches<'static> { Arg::with_name("prefetch-policy") .long("prefetch-policy") .short("P") - .help("prefetch policy:") + .help("blob data prefetch policy") .takes_value(true) .required(false) .default_value("none") @@ -705,9 +705,10 @@ impl Command { } else { None }; - let prefetch = Self::get_prefetch(matches)?; - let mut ctx = BuildContext::default(); - ctx.prefetch = prefetch; + let mut ctx = BuildContext { + prefetch: Self::get_prefetch(matches)?, + ..Default::default() + }; Merger::merge( &mut ctx, source_bootstrap_paths, @@ -964,7 +965,9 @@ impl Command { fn get_blob_offset(matches: &clap::ArgMatches) -> Result { match matches.value_of("blob-offset") { None => Ok(0), - Some(v) => u64::from_str_radix(v, 10).context(format!("invalid blob offset {}", v)), + Some(v) => v + .parse::() + .context(format!("invalid blob offset {}", v)), } } diff --git a/src/bin/nydus-image/merge.rs b/src/bin/nydus-image/merge.rs index 982faefa593..b975e5e97fa 100644 --- a/src/bin/nydus-image/merge.rs +++ b/src/bin/nydus-image/merge.rs @@ -17,9 +17,21 @@ use crate::core::context::{BlobContext, BlobManager, BootstrapContext, BuildCont use crate::core::node::{ChunkSource, Overlay, WhiteoutSpec}; use crate::core::tree::{MetadataTreeBuilder, Tree}; +/// Merger merge multiple bootstraps (generally come from nydus tar blob of +/// intermediate image layer) into one bootstrap (uses as final image layer). pub struct Merger {} impl Merger { + /// Merge assumes the bootstrap name as the hash of whole tar blob. + fn get_blob_hash(bootstrap_path: PathBuf) -> Result { + let blob_hash = bootstrap_path + .file_name() + .ok_or_else(|| anyhow!("get file name"))? + .to_str() + .ok_or_else(|| anyhow!("convert to string"))?; + Ok(blob_hash.to_string()) + } + pub fn merge( ctx: &mut BuildContext, sources: Vec, @@ -27,37 +39,39 @@ impl Merger { chunk_dict: Option, ) -> Result<()> { if sources.is_empty() { - bail!("please provide at least one source bootstrap"); + bail!("please provide at least one source bootstrap path"); } - let mut dict = HashChunkDict::default(); - let mut tree: Option = None; let mut blob_mgr = BlobManager::new(); - let mut chunk_dict_blobs = HashSet::new(); - for source in sources { - let rs = RafsSuper::load_from_metadata(&source, RafsMode::Direct, true)?; - let blobs = rs.superblock.get_blob_infos(); - - if let Some(path) = &chunk_dict { - let rs = RafsSuper::load_from_metadata(&path, RafsMode::Direct, true)?; + for bootstrap_path in sources { + // Get the blobs come from chunk dict bootstrap. + let mut chunk_dict_blobs = HashSet::new(); + if let Some(chunk_dict_path) = &chunk_dict { + let rs = RafsSuper::load_from_metadata(&chunk_dict_path, RafsMode::Direct, true)?; for blob in rs.superblock.get_blob_infos() { chunk_dict_blobs.insert(blob.blob_id().to_string()); } } - let blob_id = source.file_name().unwrap().to_str().unwrap(); - + let rs = RafsSuper::load_from_metadata(&bootstrap_path, RafsMode::Direct, true)?; + let parent_blobs = rs.superblock.get_blob_infos(); + let blob_hash = Self::get_blob_hash(bootstrap_path)?; let mut blob_idx_map = Vec::new(); let mut parent_blob_added = false; - for blob in &blobs { + + for blob in &parent_blobs { let mut blob_ctx = BlobContext::from(blob, ChunkSource::Parent); if chunk_dict_blobs.get(blob.blob_id()).is_none() { + // Only up to one blob from the parent bootstrap, the other blobs should be + // from the chunk dict image. if parent_blob_added { bail!("invalid bootstrap, seems have multiple non-chunk-dict blobs in this bootstrap"); } - blob_ctx.blob_id = blob_id.to_owned(); + // The blob id (blob sha256 hash) in parent bootstrap is invalid for nydusd + // runtime, should change it to the hash of whole tar blob. + blob_ctx.blob_id = blob_hash.to_owned(); parent_blob_added = true; } blob_idx_map.push(blob_mgr.len() as u32); @@ -72,15 +86,18 @@ impl Merger { let mut node = MetadataTreeBuilder::parse_node(&rs, inode, path.to_path_buf())?; for chunk in &mut node.chunks { let origin_blob_index = chunk.inner.blob_index() as usize; + // Set the blob index of chunk to real index in blob table of final bootstrap. chunk.inner.set_blob_index(blob_idx_map[origin_blob_index]); } node.overlay = Overlay::UpperAddition; match node.whiteout_type(WhiteoutSpec::Oci) { Some(_) => { - nodes.insert(0, node.clone()); + // Insert removal operations at the head, so they will be handled first when + // applying to lower layer. + nodes.insert(0, node); } _ => { - nodes.push(node.clone()); + nodes.push(node); } } Ok(()) @@ -89,14 +106,15 @@ impl Merger { tree.apply(node, true, WhiteoutSpec::Oci)?; } } else { + let mut dict = HashChunkDict::default(); tree = Some(Tree::from_bootstrap(&rs, &mut dict)?); } } - // Safe to unwrap because source bootstrap is at least one. + // Safe to unwrap because there is at least one source bootstrap. let mut tree = tree.unwrap(); let mut bootstrap = Bootstrap::new()?; - let storage = ArtifactStorage::SingleFile(target.clone()); + let storage = ArtifactStorage::SingleFile(target); let mut bootstrap_ctx = BootstrapContext::new(storage, false)?; bootstrap.build(ctx, &mut bootstrap_ctx, &mut tree)?; let blob_table = blob_mgr.to_blob_table(&ctx)?; From 9171d3970e3b93d46cc8dcde6507c1859e7c29e7 Mon Sep 17 00:00:00 2001 From: Yan Song Date: Thu, 31 Mar 2022 03:04:19 +0000 Subject: [PATCH 08/12] Builder: fix conflict ino cross layers in merge Set node's layer index to distinguish same inode number (from bootstrap) between different layers. Signed-off-by: Yan Song --- rafs/src/metadata/mod.rs | 16 +++++-------- src/bin/nydus-image/builder/directory.rs | 12 ++++++---- src/bin/nydus-image/builder/stargz.rs | 15 +++++++----- src/bin/nydus-image/core/bootstrap.rs | 30 +++++++++--------------- src/bin/nydus-image/core/context.rs | 8 +++---- src/bin/nydus-image/core/node.rs | 3 +++ src/bin/nydus-image/core/tree.rs | 1 + src/bin/nydus-image/merge.rs | 14 ++++++++--- 8 files changed, 52 insertions(+), 47 deletions(-) diff --git a/rafs/src/metadata/mod.rs b/rafs/src/metadata/mod.rs index 8aca32593f5..8e0b197a3dc 100644 --- a/rafs/src/metadata/mod.rs +++ b/rafs/src/metadata/mod.rs @@ -749,24 +749,20 @@ impl RafsSuper { cb: &mut dyn FnMut(&dyn RafsInode, &Path) -> anyhow::Result<()>, ) -> anyhow::Result<()> { let inode = self.get_inode(ino, false)?; - if !inode.is_dir() { - return Ok(()); - } - let parent_path = if let Some(parent) = parent { + let path = if let Some(parent) = parent { parent.join(inode.name()) } else { PathBuf::from("/") }; + cb(inode.as_ref(), &path)?; + if !inode.is_dir() { + return Ok(()); + } let child_count = inode.get_child_count(); for idx in 0..child_count { let child = inode.get_child_by_index(idx)?; let child_ino = child.ino(); - if child.is_dir() { - self.walk_inodes(child_ino, Some(&parent_path), cb)?; - } else { - let child_path = parent_path.join(child.name()); - cb(child.as_ref(), &child_path)?; - } + self.walk_inodes(child_ino, Some(&path), cb)?; } Ok(()) } diff --git a/src/bin/nydus-image/builder/directory.rs b/src/bin/nydus-image/builder/directory.rs index 23a1d825dee..ae86fd7d576 100644 --- a/src/bin/nydus-image/builder/directory.rs +++ b/src/bin/nydus-image/builder/directory.rs @@ -30,6 +30,7 @@ impl FilesystemTreeBuilder { ctx: &mut BuildContext, bootstrap_ctx: &mut BootstrapContext, parent: &mut Node, + layer_idx: u16, ) -> Result> { let mut result = Vec::new(); if !parent.is_dir() { @@ -43,7 +44,7 @@ impl FilesystemTreeBuilder { event_tracer!("load_from_directory", +children.len()); for child in children { let path = child.path(); - let child = Node::new( + let mut child = Node::new( ctx.fs_version, ctx.source_path.clone(), path.clone(), @@ -53,6 +54,7 @@ impl FilesystemTreeBuilder { true, ) .with_context(|| format!("failed to create node {:?}", path))?; + child.layer_idx = layer_idx; // as per OCI spec, whiteout file should not be present within final image // or filesystem, only existed in layers. @@ -64,7 +66,7 @@ impl FilesystemTreeBuilder { } let mut child = Tree::new(child); - child.children = self.load_children(ctx, bootstrap_ctx, &mut child.node)?; + child.children = self.load_children(ctx, bootstrap_ctx, &mut child.node, layer_idx)?; result.push(child); } @@ -84,6 +86,7 @@ impl DirectoryBuilder { &mut self, ctx: &mut BuildContext, bootstrap_ctx: &mut BootstrapContext, + layer_idx: u16, ) -> Result { let node = Node::new( ctx.fs_version, @@ -98,7 +101,7 @@ impl DirectoryBuilder { let tree_builder = FilesystemTreeBuilder::new(); tree.children = timing_tracer!( - { tree_builder.load_children(ctx, bootstrap_ctx, &mut tree.node) }, + { tree_builder.load_children(ctx, bootstrap_ctx, &mut tree.node, layer_idx) }, "load_from_directory" )?; @@ -115,7 +118,8 @@ impl Builder for DirectoryBuilder { ) -> Result { let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; // Scan source directory to build upper layer tree. - let mut tree = self.build_tree_from_fs(ctx, &mut bootstrap_ctx)?; + let layer_idx = if bootstrap_ctx.layered { 1u16 } else { 0u16 }; + let mut tree = self.build_tree_from_fs(ctx, &mut bootstrap_ctx, layer_idx)?; let mut bootstrap = Bootstrap::new()?; if bootstrap_ctx.layered { // Merge with lower layer if there's one, do not prepare `prefetch` list during merging. diff --git a/src/bin/nydus-image/builder/stargz.rs b/src/bin/nydus-image/builder/stargz.rs index 66134370be5..f7583ac7425 100644 --- a/src/bin/nydus-image/builder/stargz.rs +++ b/src/bin/nydus-image/builder/stargz.rs @@ -333,7 +333,7 @@ impl StargzIndexTreeBuilder { Ok(()) } - fn build(&mut self, ctx: &mut BuildContext) -> Result { + fn build(&mut self, ctx: &mut BuildContext, layer_idx: u16) -> Result { // Parse stargz TOC index from a file let toc_index = TocIndex::load(&ctx.source_path)?; if toc_index.entries.is_empty() { @@ -411,7 +411,7 @@ impl StargzIndexTreeBuilder { let mut lost_dirs = Vec::new(); self.make_lost_dirs(&entry, &mut lost_dirs)?; for dir in &lost_dirs { - let node = self.parse_node(dir, ctx.explicit_uidgid, ctx.fs_version)?; + let node = self.parse_node(dir, ctx.explicit_uidgid, ctx.fs_version, layer_idx)?; nodes.push(node); } @@ -419,7 +419,7 @@ impl StargzIndexTreeBuilder { hardlink_map.insert(entry.path()?, entry.hardlink_link_path()); } - let node = self.parse_node(entry, ctx.explicit_uidgid, ctx.fs_version)?; + let node = self.parse_node(entry, ctx.explicit_uidgid, ctx.fs_version, layer_idx)?; if entry.path()? == PathBuf::from("/") { tree = Some(Tree::new(node.clone())); } @@ -448,6 +448,7 @@ impl StargzIndexTreeBuilder { entry: &TocEntry, explicit_uidgid: bool, version: RafsVersion, + layer_idx: u16, ) -> Result { let chunks = Vec::new(); let entry_path = entry.path()?; @@ -548,6 +549,7 @@ impl StargzIndexTreeBuilder { chunks, symlink, xattrs, + layer_idx, ctime: 0, offset: 0, dirents: Vec::<(u64, OsString, u32)>::new(), @@ -615,10 +617,10 @@ impl StargzBuilder { Ok(()) } - fn build_tree_from_index(&mut self, ctx: &mut BuildContext) -> Result { + fn build_tree_from_index(&mut self, ctx: &mut BuildContext, layer_idx: u16) -> Result { let mut tree_builder = StargzIndexTreeBuilder::new(); tree_builder - .build(ctx) + .build(ctx, layer_idx) .context("failed to build tree from stargz index") } } @@ -632,7 +634,8 @@ impl Builder for StargzBuilder { ) -> Result { let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; // Build tree from source - let mut tree = self.build_tree_from_index(ctx)?; + let layer_idx = if bootstrap_ctx.layered { 1u16 } else { 0u16 }; + let mut tree = self.build_tree_from_index(ctx, layer_idx)?; let mut bootstrap = Bootstrap::new()?; if bootstrap_mgr.f_parent_bootstrap.is_some() { // Merge with lower layer if there's one. diff --git a/src/bin/nydus-image/core/bootstrap.rs b/src/bin/nydus-image/core/bootstrap.rs index 068ccf709ee..5056cde3058 100644 --- a/src/bin/nydus-image/core/bootstrap.rs +++ b/src/bin/nydus-image/core/bootstrap.rs @@ -67,13 +67,8 @@ impl Bootstrap { // user to pass the source root as prefetch hint. Check it here. ctx.prefetch.insert_if_need(&tree.node); - let inode_map = if tree.node.overlay.is_lower_layer() { - &mut bootstrap_ctx.lower_inode_map - } else { - &mut bootstrap_ctx.upper_inode_map - }; - inode_map.insert( - (tree.node.src_ino, tree.node.src_dev), + bootstrap_ctx.inode_map.insert( + (tree.node.layer_idx, tree.node.src_ino, tree.node.src_dev), vec![tree.node.index], ); @@ -121,8 +116,7 @@ impl Bootstrap { )?; // Clear all cached states for next upper layer build. - bootstrap_ctx.lower_inode_map.clear(); - bootstrap_ctx.upper_inode_map.clear(); + bootstrap_ctx.inode_map.clear(); ctx.prefetch.clear(); Ok(tree) @@ -170,12 +164,11 @@ impl Bootstrap { // Hardlink handle, all hardlink nodes' ino, nlink should be the same, // because the real_ino may be conflicted between different layers, // so we need to find hardlink node index list in the layer where the node is located. - let inode_map = if child.node.overlay.is_lower_layer() { - &mut bootstrap_ctx.lower_inode_map - } else { - &mut bootstrap_ctx.upper_inode_map - }; - if let Some(indexes) = inode_map.get_mut(&(child.node.src_ino, child.node.src_dev)) { + if let Some(indexes) = bootstrap_ctx.inode_map.get_mut(&( + child.node.layer_idx, + child.node.src_ino, + child.node.src_dev, + )) { let nlink = indexes.len() as u32 + 1; let first_index = indexes[0]; child.node.inode.set_ino(first_index); @@ -189,8 +182,8 @@ impl Bootstrap { child.node.inode.set_ino(index); child.node.inode.set_nlink(1); // Store inode real ino - inode_map.insert( - (child.node.src_ino, child.node.src_dev), + bootstrap_ctx.inode_map.insert( + (child.node.layer_idx, child.node.src_ino, child.node.src_dev), vec![child.node.index], ); } @@ -413,8 +406,7 @@ impl Bootstrap { // Set super block let mut super_block = RafsV5SuperBlock::new(); - let inodes_count = - (bootstrap_ctx.lower_inode_map.len() + bootstrap_ctx.upper_inode_map.len()) as u64; + let inodes_count = bootstrap_ctx.inode_map.len() as u64; super_block.set_inodes_count(inodes_count); super_block.set_inode_table_offset(super_block_size as u64); super_block.set_inode_table_entries(inode_table_entries); diff --git a/src/bin/nydus-image/core/context.rs b/src/bin/nydus-image/core/context.rs index 1d1c5f1cceb..233b3155807 100644 --- a/src/bin/nydus-image/core/context.rs +++ b/src/bin/nydus-image/core/context.rs @@ -599,9 +599,8 @@ impl BlobManager { pub struct BootstrapContext { /// This build has a parent bootstrap. pub layered: bool, - /// Cache node index for hardlinks, HashMap<(real_inode, dev), Vec>. - pub lower_inode_map: HashMap<(Inode, u64), Vec>, - pub upper_inode_map: HashMap<(Inode, u64), Vec>, + /// Cache node index for hardlinks, HashMap<(layer_index, real_inode, dev), Vec>. + pub inode_map: HashMap<(u16, Inode, u64), Vec>, /// Store all nodes in ascendant ordor, indexed by (node.index - 1). pub nodes: Vec, /// Current position to write in f_bootstrap @@ -617,8 +616,7 @@ impl BootstrapContext { pub fn new(storage: ArtifactStorage, layered: bool) -> Result { Ok(Self { layered, - lower_inode_map: HashMap::new(), - upper_inode_map: HashMap::new(), + inode_map: HashMap::new(), nodes: Vec::new(), offset: EROFS_BLOCK_SIZE, name: String::new(), diff --git a/src/bin/nydus-image/core/node.rs b/src/bin/nydus-image/core/node.rs index c56fb1613ec..b1c879fafac 100644 --- a/src/bin/nydus-image/core/node.rs +++ b/src/bin/nydus-image/core/node.rs @@ -224,6 +224,8 @@ pub struct Node { pub(crate) target: PathBuf, /// Parsed version of `target`. pub(crate) target_vec: Vec, + /// Layer index where node located. + pub layer_idx: u16, /// Last status change time of the file, in nanoseconds. pub ctime: i64, /// Used by rafsv6 inode datalayout @@ -285,6 +287,7 @@ impl Node { symlink: None, xattrs: RafsXAttrs::default(), explicit_uidgid, + layer_idx: 0, ctime: 0, offset: 0, dirents: Vec::new(), diff --git a/src/bin/nydus-image/core/tree.rs b/src/bin/nydus-image/core/tree.rs index 65d972fd721..0d1f4390b26 100644 --- a/src/bin/nydus-image/core/tree.rs +++ b/src/bin/nydus-image/core/tree.rs @@ -350,6 +350,7 @@ impl<'a> MetadataTreeBuilder<'a> { chunks, symlink, xattrs, + layer_idx: 0, ctime: 0, offset: 0, dirents: Vec::<(u64, OsString, u32)>::new(), diff --git a/src/bin/nydus-image/merge.rs b/src/bin/nydus-image/merge.rs index b975e5e97fa..9732f4f747b 100644 --- a/src/bin/nydus-image/merge.rs +++ b/src/bin/nydus-image/merge.rs @@ -3,9 +3,10 @@ // SPDX-License-Identifier: Apache-2.0 use std::collections::HashSet; +use std::convert::TryFrom; use std::path::{Path, PathBuf}; -use anyhow::Result; +use anyhow::{Context, Result}; use rafs::metadata::layout::RAFS_ROOT_INODE; use rafs::metadata::{RafsInode, RafsMode, RafsSuper}; @@ -23,7 +24,7 @@ pub struct Merger {} impl Merger { /// Merge assumes the bootstrap name as the hash of whole tar blob. - fn get_blob_hash(bootstrap_path: PathBuf) -> Result { + fn get_blob_hash(bootstrap_path: &Path) -> Result { let blob_hash = bootstrap_path .file_name() .ok_or_else(|| anyhow!("get file name"))? @@ -45,7 +46,7 @@ impl Merger { let mut tree: Option = None; let mut blob_mgr = BlobManager::new(); - for bootstrap_path in sources { + for (layer_idx, bootstrap_path) in sources.iter().enumerate() { // Get the blobs come from chunk dict bootstrap. let mut chunk_dict_blobs = HashSet::new(); if let Some(chunk_dict_path) = &chunk_dict { @@ -89,6 +90,13 @@ impl Merger { // Set the blob index of chunk to real index in blob table of final bootstrap. chunk.inner.set_blob_index(blob_idx_map[origin_blob_index]); } + // Set node's layer index to distinguish same inode number (from bootstrap) + // between different layers. + node.layer_idx = u16::try_from(layer_idx).context(format!( + "too many layers {}, limited to {}", + layer_idx, + u16::MAX + ))?; node.overlay = Overlay::UpperAddition; match node.whiteout_type(WhiteoutSpec::Oci) { Some(_) => { From b492144774f6f65488cb053abb4317cd05441e94 Mon Sep 17 00:00:00 2001 From: Yan Song Date: Thu, 31 Mar 2022 03:33:14 +0000 Subject: [PATCH 09/12] Builder: small tidy for merge command 1. reuse options with create sub-command; 2. add context error message for user friendly; Signed-off-by: Yan Song --- src/bin/nydus-image/main.rs | 44 +++++++++++++++--------------------- src/bin/nydus-image/merge.rs | 36 +++++++++++++++++------------ 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index 24ee0206f66..449bed0608e 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -135,6 +135,20 @@ impl OutputSerializer { } fn prepare_cmd_args(bti_string: String) -> ArgMatches<'static> { + let arg_chunk_dict = Arg::with_name("chunk-dict") + .long("chunk-dict") + .short("M") + .help("Specify a chunk dictionary for chunk deduplication") + .takes_value(true); + let arg_prefetch_policy = Arg::with_name("prefetch-policy") + .long("prefetch-policy") + .short("P") + .help("blob data prefetch policy") + .takes_value(true) + .required(false) + .default_value("none") + .possible_values(&["fs", "blob", "none"]); + // TODO: Try to use yaml to define below options App::new("") .version(bti_string.as_str()) @@ -247,14 +261,7 @@ fn prepare_cmd_args(bti_string: String) -> ArgMatches<'static> { .required(false), ) .arg( - Arg::with_name("prefetch-policy") - .long("prefetch-policy") - .short("P") - .help("blob data prefetch policy") - .takes_value(true) - .required(false) - .default_value("none") - .possible_values(&["fs", "blob", "none"]), + arg_prefetch_policy.clone(), ) .arg( Arg::with_name("repeatable") @@ -310,11 +317,7 @@ fn prepare_cmd_args(bti_string: String) -> ArgMatches<'static> { .takes_value(true) ) .arg( - Arg::with_name("chunk-dict") - .long("chunk-dict") - .short("M") - .help("Specify a chunk dictionary for chunk deduplication") - .takes_value(true) + arg_chunk_dict.clone(), ) .arg( Arg::with_name("backend-type") @@ -343,21 +346,10 @@ fn prepare_cmd_args(bti_string: String) -> ArgMatches<'static> { .takes_value(true), ) .arg( - Arg::with_name("chunk-dict") - .long("chunk-dict") - .short("M") - .help("Specify a chunk dictionary for chunk deduplication") - .takes_value(true) + arg_chunk_dict, ) .arg( - Arg::with_name("prefetch-policy") - .long("prefetch-policy") - .short("P") - .help("blob data prefetch policy") - .takes_value(true) - .required(false) - .default_value("none") - .possible_values(&["fs", "blob", "none"]), + arg_prefetch_policy, ) .arg( Arg::with_name("SOURCE") diff --git a/src/bin/nydus-image/merge.rs b/src/bin/nydus-image/merge.rs index 9732f4f747b..3eeab776e36 100644 --- a/src/bin/nydus-image/merge.rs +++ b/src/bin/nydus-image/merge.rs @@ -46,17 +46,19 @@ impl Merger { let mut tree: Option = None; let mut blob_mgr = BlobManager::new(); - for (layer_idx, bootstrap_path) in sources.iter().enumerate() { - // Get the blobs come from chunk dict bootstrap. - let mut chunk_dict_blobs = HashSet::new(); - if let Some(chunk_dict_path) = &chunk_dict { - let rs = RafsSuper::load_from_metadata(&chunk_dict_path, RafsMode::Direct, true)?; - for blob in rs.superblock.get_blob_infos() { - chunk_dict_blobs.insert(blob.blob_id().to_string()); - } + // Get the blobs come from chunk dict bootstrap. + let mut chunk_dict_blobs = HashSet::new(); + if let Some(chunk_dict_path) = &chunk_dict { + let rs = RafsSuper::load_from_metadata(&chunk_dict_path, RafsMode::Direct, true) + .context(format!("load chunk dict bootstrap {:?}", chunk_dict_path))?; + for blob in rs.superblock.get_blob_infos() { + chunk_dict_blobs.insert(blob.blob_id().to_string()); } + } - let rs = RafsSuper::load_from_metadata(&bootstrap_path, RafsMode::Direct, true)?; + for (layer_idx, bootstrap_path) in sources.iter().enumerate() { + let rs = RafsSuper::load_from_metadata(&bootstrap_path, RafsMode::Direct, true) + .context(format!("load bootstrap {:?}", bootstrap_path))?; let parent_blobs = rs.superblock.get_blob_infos(); let blob_hash = Self::get_blob_hash(bootstrap_path)?; let mut blob_idx_map = Vec::new(); @@ -65,8 +67,9 @@ impl Merger { for blob in &parent_blobs { let mut blob_ctx = BlobContext::from(blob, ChunkSource::Parent); if chunk_dict_blobs.get(blob.blob_id()).is_none() { - // Only up to one blob from the parent bootstrap, the other blobs should be - // from the chunk dict image. + // It is assumed that the `nydus-image create` at each layer and `nydus-image merge` commands + // use the same chunk dict bootstrap. So the parent bootstrap includes multiple blobs, but + // only at most one new blob, the other blobs should be from the chunk dict image. if parent_blob_added { bail!("invalid bootstrap, seems have multiple non-chunk-dict blobs in this bootstrap"); } @@ -84,7 +87,8 @@ impl Merger { rs.walk_inodes(RAFS_ROOT_INODE, None, &mut |inode: &dyn RafsInode, path: &Path| -> Result<()> { - let mut node = MetadataTreeBuilder::parse_node(&rs, inode, path.to_path_buf())?; + let mut node = MetadataTreeBuilder::parse_node(&rs, inode, path.to_path_buf()) + .context(format!("parse node from bootstrap {:?}", bootstrap_path))?; for chunk in &mut node.chunks { let origin_blob_index = chunk.inner.blob_index() as usize; // Set the blob index of chunk to real index in blob table of final bootstrap. @@ -100,7 +104,7 @@ impl Merger { node.overlay = Overlay::UpperAddition; match node.whiteout_type(WhiteoutSpec::Oci) { Some(_) => { - // Insert removal operations at the head, so they will be handled first when + // Insert whiteouts at the head, so they will be handled first when // applying to lower layer. nodes.insert(0, node); } @@ -122,11 +126,13 @@ impl Merger { // Safe to unwrap because there is at least one source bootstrap. let mut tree = tree.unwrap(); let mut bootstrap = Bootstrap::new()?; - let storage = ArtifactStorage::SingleFile(target); + let storage = ArtifactStorage::SingleFile(target.clone()); let mut bootstrap_ctx = BootstrapContext::new(storage, false)?; bootstrap.build(ctx, &mut bootstrap_ctx, &mut tree)?; let blob_table = blob_mgr.to_blob_table(&ctx)?; - bootstrap.dump(ctx, &mut bootstrap_ctx, &blob_table)?; + bootstrap + .dump(ctx, &mut bootstrap_ctx, &blob_table) + .context(format!("dump bootstrap to {:?}", target))?; Ok(()) } From 4d374c7c8f3866c2b2a8cf2ec514e292a127decb Mon Sep 17 00:00:00 2001 From: Yan Song Date: Wed, 30 Mar 2022 08:36:23 +0000 Subject: [PATCH 10/12] Nydusify: check blob consistency without order For the tar build scene, the regular layer represents blob+bootstrap, for an empty data layer (for example `chmod +x`), the blob table in bootstrap doesn't include the blob entry of this layer, so we need remove the check of comparing the number of blobs and the blob order between bootstrap and layers on the manifest, just ensure nydus blobs in the blob table of bootstrap should all appear in the layers of manifest. Signed-off-by: Yan Song --- .../nydusify/pkg/checker/rule/bootstrap.go | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/contrib/nydusify/pkg/checker/rule/bootstrap.go b/contrib/nydusify/pkg/checker/rule/bootstrap.go index c36f4acc5d5..b5bdd031a17 100644 --- a/contrib/nydusify/pkg/checker/rule/bootstrap.go +++ b/contrib/nydusify/pkg/checker/rule/bootstrap.go @@ -8,7 +8,6 @@ import ( "encoding/json" "fmt" "io/ioutil" - "reflect" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -54,6 +53,16 @@ func (rule *BootstrapRule) Validate() error { return nil } + // Parse blob list from blob layers in Nydus manifest + blobListInLayer := map[string]bool{} + layers := rule.Parsed.NydusImage.Manifest.Layers + for i, layer := range layers { + if i != len(layers)-1 { + blobListInLayer[layer.Digest.Hex()] = true + } + } + + // Parse blob list from blob table of bootstrap var bootstrap bootstrapDebug bootstrapBytes, err := ioutil.ReadFile(rule.DebugOutputPath) if err != nil { @@ -62,27 +71,26 @@ func (rule *BootstrapRule) Validate() error { if err := json.Unmarshal(bootstrapBytes, &bootstrap); err != nil { return errors.Wrap(err, "unmarshal bootstrap output JSON") } - - // Parse blob list from blob layers in Nydus manifest - var blobListInLayer []string - layers := rule.Parsed.NydusImage.Manifest.Layers - for i, layer := range layers { - if i != len(layers)-1 { - blobListInLayer = append(blobListInLayer, layer.Digest.Hex()) + blobListInBootstrap := map[string]bool{} + lostInLayer := false + for _, blobID := range bootstrap.Blobs { + blobListInBootstrap[blobID] = true + if !blobListInLayer[blobID] { + lostInLayer = true } } - // Blob list recorded in manifest annotation should be equal with - // the blob list recorded in blob table of bootstrap - if !reflect.DeepEqual(bootstrap.Blobs, blobListInLayer) { - return fmt.Errorf( - "nydus blob list in bootstrap(%d) does not match with manifest(%d)'s, %v != %v", - len(bootstrap.Blobs), - len(blobListInLayer), - bootstrap.Blobs, - blobListInLayer, - ) + if !lostInLayer { + return nil } - return nil + // The blobs recorded in blob table of bootstrap should all appear + // in the layers. + return fmt.Errorf( + "nydus blobs in the blob table of bootstrap(%d) should all appear in the layers of manifest(%d), %v != %v", + len(blobListInBootstrap), + len(blobListInLayer), + blobListInBootstrap, + blobListInLayer, + ) } From f23feca17b641ecb8af032dc70b5d66b74a4c8a7 Mon Sep 17 00:00:00 2001 From: Yan Song Date: Fri, 1 Apr 2022 10:16:39 +0000 Subject: [PATCH 11/12] Builder: keep bootstrap following flags on merge Keep final bootstrap following superblock flags of source bootstraps. Signed-off-by: Yan Song --- src/bin/nydus-image/merge.rs | 47 +++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/src/bin/nydus-image/merge.rs b/src/bin/nydus-image/merge.rs index 3eeab776e36..5bccdc29fd2 100644 --- a/src/bin/nydus-image/merge.rs +++ b/src/bin/nydus-image/merge.rs @@ -8,8 +8,10 @@ use std::path::{Path, PathBuf}; use anyhow::{Context, Result}; +use nydus_utils::digest::{self}; use rafs::metadata::layout::RAFS_ROOT_INODE; -use rafs::metadata::{RafsInode, RafsMode, RafsSuper}; +use rafs::metadata::{RafsInode, RafsMode, RafsSuper, RafsSuperMeta}; +use storage::compress; use crate::core::bootstrap::Bootstrap; use crate::core::chunk_dict::HashChunkDict; @@ -18,6 +20,32 @@ use crate::core::context::{BlobContext, BlobManager, BootstrapContext, BuildCont use crate::core::node::{ChunkSource, Overlay, WhiteoutSpec}; use crate::core::tree::{MetadataTreeBuilder, Tree}; +#[derive(Clone, Debug, Eq, PartialEq)] +struct Flags { + explicit_uidgid: bool, + has_xattr: bool, + compressor: compress::Algorithm, + digester: digest::Algorithm, +} + +impl Flags { + fn from_meta(meta: &RafsSuperMeta) -> Self { + Self { + explicit_uidgid: meta.explicit_uidgid(), + has_xattr: meta.has_xattr(), + compressor: meta.get_compressor(), + digester: meta.get_digester(), + } + } + + fn set_to_ctx(&self, ctx: &mut BuildContext) { + ctx.explicit_uidgid = self.explicit_uidgid; + ctx.has_xattr = self.has_xattr; + ctx.compressor = self.compressor; + ctx.digester = self.digester; + } +} + /// Merger merge multiple bootstraps (generally come from nydus tar blob of /// intermediate image layer) into one bootstrap (uses as final image layer). pub struct Merger {} @@ -45,6 +73,7 @@ impl Merger { let mut tree: Option = None; let mut blob_mgr = BlobManager::new(); + let mut flags: Option = None; // Get the blobs come from chunk dict bootstrap. let mut chunk_dict_blobs = HashSet::new(); @@ -59,6 +88,22 @@ impl Merger { for (layer_idx, bootstrap_path) in sources.iter().enumerate() { let rs = RafsSuper::load_from_metadata(&bootstrap_path, RafsMode::Direct, true) .context(format!("load bootstrap {:?}", bootstrap_path))?; + + let current_flags = Flags::from_meta(&rs.meta); + if let Some(flags) = &flags { + if flags != ¤t_flags { + bail!( + "inconsistent flags between bootstraps, current bootstrap {:?}, flags {:?}", + bootstrap_path, + current_flags, + ); + } + } else { + // Keep final bootstrap following superblock flags of source bootstraps. + current_flags.set_to_ctx(ctx); + flags = Some(current_flags); + } + let parent_blobs = rs.superblock.get_blob_infos(); let blob_hash = Self::get_blob_hash(bootstrap_path)?; let mut blob_idx_map = Vec::new(); From fc72709753029ad3784fb05c9f304645521a26f2 Mon Sep 17 00:00:00 2001 From: Yan Song Date: Sat, 2 Apr 2022 02:51:42 +0000 Subject: [PATCH 12/12] Builder: fix test for diff build The behavior change of RafsSuper::walk_inodes api break diff build, adjust it to ignore non-regular file handle. Signed-off-by: Yan Song --- src/bin/nydus-image/builder/diff.rs | 18 ++++--- src/bin/nydus-image/merge.rs | 2 +- tests/diff.rs | 75 ++++++++++++++++------------- 3 files changed, 53 insertions(+), 42 deletions(-) diff --git a/src/bin/nydus-image/builder/diff.rs b/src/bin/nydus-image/builder/diff.rs index b58c1132aa7..67465a84173 100644 --- a/src/bin/nydus-image/builder/diff.rs +++ b/src/bin/nydus-image/builder/diff.rs @@ -523,14 +523,16 @@ impl DiffBuilder { path: &Path| -> Result<()> { let mut chunks = Vec::new(); - inode.walk_chunks(&mut |cki: &dyn BlobChunkInfo| -> Result<()> { - let chunk = ChunkWrapper::from_chunk_info(cki); - chunks.push(NodeChunk { - source: ChunkSource::Parent, - inner: chunk, - }); - Ok(()) - })?; + if inode.is_reg() { + inode.walk_chunks(&mut |cki: &dyn BlobChunkInfo| -> Result<()> { + let chunk = ChunkWrapper::from_chunk_info(cki); + chunks.push(NodeChunk { + source: ChunkSource::Parent, + inner: chunk, + }); + Ok(()) + })?; + } self.chunk_map .insert(path.to_path_buf(), (chunks, inode.get_digest())); Ok(()) diff --git a/src/bin/nydus-image/merge.rs b/src/bin/nydus-image/merge.rs index 5bccdc29fd2..bc029ed007e 100644 --- a/src/bin/nydus-image/merge.rs +++ b/src/bin/nydus-image/merge.rs @@ -8,10 +8,10 @@ use std::path::{Path, PathBuf}; use anyhow::{Context, Result}; +use nydus_utils::compress; use nydus_utils::digest::{self}; use rafs::metadata::layout::RAFS_ROOT_INODE; use rafs::metadata::{RafsInode, RafsMode, RafsSuper, RafsSuperMeta}; -use storage::compress; use crate::core::bootstrap::Bootstrap; use crate::core::chunk_dict::HashChunkDict; diff --git a/tests/diff.rs b/tests/diff.rs index 8bada3caef7..5f2280ad25c 100644 --- a/tests/diff.rs +++ b/tests/diff.rs @@ -228,6 +228,7 @@ fn integration_test_diff_build_with_chunk_dict() { &snapshot_dir_4, )); let expected_chunk_dict_bootstrap = IntoIter::new([ + (PathBuf::from("/"), vec![]), ( PathBuf::from("/file-1"), vec![ @@ -324,21 +325,23 @@ fn integration_test_diff_build_with_chunk_dict() { let mut reader = Box::new(file) as RafsIoReader; rs.load(&mut reader).unwrap(); let mut actual = HashMap::new(); + let blobs = rs.superblock.get_blob_infos(); rs.walk_inodes(RAFS_ROOT_INODE, None, &mut |inode: &dyn RafsInode, path: &Path| -> Result<()> { let mut chunks = Vec::new(); - let blobs = rs.superblock.get_blob_infos(); - inode - .walk_chunks(&mut |chunk: &dyn BlobChunkInfo| -> Result<()> { - chunks.push(( - chunk.blob_index(), - blobs[chunk.blob_index() as usize].blob_id().to_string(), - format!("{}", chunk.chunk_id()), - )); - Ok(()) - }) - .unwrap(); + if inode.is_reg() { + inode + .walk_chunks(&mut |chunk: &dyn BlobChunkInfo| -> Result<()> { + chunks.push(( + chunk.blob_index(), + blobs[chunk.blob_index() as usize].blob_id().to_string(), + format!("{}", chunk.chunk_id()), + )); + Ok(()) + }) + .unwrap(); + } actual.insert(path.to_path_buf(), chunks); Ok(()) }) @@ -386,6 +389,7 @@ fn integration_test_diff_build_with_chunk_dict() { &snapshot_dir_6, )); let expected_bootstrap = IntoIter::new([ + (PathBuf::from("/"), vec![]), ( PathBuf::from("/file-8"), vec![ @@ -460,21 +464,23 @@ fn integration_test_diff_build_with_chunk_dict() { let mut reader = Box::new(file) as RafsIoReader; rs.load(&mut reader).unwrap(); let mut actual = HashMap::new(); + let blobs = rs.superblock.get_blob_infos(); rs.walk_inodes(RAFS_ROOT_INODE, None, &mut |inode: &dyn RafsInode, path: &Path| -> Result<()> { let mut chunks = Vec::new(); - let blobs = rs.superblock.get_blob_infos(); - inode - .walk_chunks(&mut |chunk: &dyn BlobChunkInfo| -> Result<()> { - chunks.push(( - chunk.blob_index(), - blobs[chunk.blob_index() as usize].blob_id().to_string(), - format!("{}", chunk.chunk_id()), - )); - Ok(()) - }) - .unwrap(); + if inode.is_reg() { + inode + .walk_chunks(&mut |chunk: &dyn BlobChunkInfo| -> Result<()> { + chunks.push(( + chunk.blob_index(), + blobs[chunk.blob_index() as usize].blob_id().to_string(), + format!("{}", chunk.chunk_id()), + )); + Ok(()) + }) + .unwrap(); + } actual.insert(path.to_path_buf(), chunks); Ok(()) }) @@ -539,6 +545,7 @@ fn integration_test_diff_build_with_chunk_dict() { )); let expected_bootstrap = IntoIter::new([ + (PathBuf::from("/"), vec![]), ( PathBuf::from("/file-1"), vec![ @@ -678,21 +685,23 @@ fn integration_test_diff_build_with_chunk_dict() { let mut reader = Box::new(file) as RafsIoReader; rs.load(&mut reader).unwrap(); let mut actual = HashMap::new(); + let blobs = rs.superblock.get_blob_infos(); rs.walk_inodes(RAFS_ROOT_INODE, None, &mut |inode: &dyn RafsInode, path: &Path| -> Result<()> { let mut chunks = Vec::new(); - let blobs = rs.superblock.get_blob_infos(); - inode - .walk_chunks(&mut |chunk: &dyn BlobChunkInfo| -> Result<()> { - chunks.push(( - chunk.blob_index(), - blobs[chunk.blob_index() as usize].blob_id().to_string(), - format!("{}", chunk.chunk_id()), - )); - Ok(()) - }) - .unwrap(); + if inode.is_reg() { + inode + .walk_chunks(&mut |chunk: &dyn BlobChunkInfo| -> Result<()> { + chunks.push(( + chunk.blob_index(), + blobs[chunk.blob_index() as usize].blob_id().to_string(), + format!("{}", chunk.chunk_id()), + )); + Ok(()) + }) + .unwrap(); + } actual.insert(path.to_path_buf(), chunks); Ok(()) })