diff --git a/src/passthrough/inode_store.rs b/src/passthrough/inode_store.rs index 2cb8d6b9a..d7ff0ff1c 100644 --- a/src/passthrough/inode_store.rs +++ b/src/passthrough/inode_store.rs @@ -23,8 +23,15 @@ impl InodeStore { self.data.insert(data.inode, data); } - pub fn remove(&mut self, inode: &Inode) -> Option> { + pub fn remove(&mut self, inode: &Inode, remove_data_only: bool) -> Option> { let data = self.data.remove(inode); + if remove_data_only { + // Don't remove by_ids and by_handle, we need use it to store inode + // record the mapping of inodes using these two structures to ensure + // that the same files always use the same inode + return data; + } + if let Some(data) = data.as_ref() { if let FileOrHandle::Handle(handle) = &data.file_or_handle { self.by_handle.remove(handle); @@ -45,16 +52,17 @@ impl InodeStore { } pub fn get_by_ids(&self, ids: &InodeAltKey) -> Option<&Arc> { - // safe to unwrap, inode must be in data map if found by ids, otherwise unwrap on - // corruption. - self.inode_by_ids(ids).map(|inode| self.get(inode).unwrap()) + match self.inode_by_ids(ids) { + Some(inode) => self.get(inode), + None => None, + } } pub fn get_by_handle(&self, handle: &FileHandle) -> Option<&Arc> { - // safe to unwrap, inode must be in data map if found by ids, otherwise unwrap on - // corruption. - self.inode_by_handle(handle) - .map(|inode| self.get(inode).unwrap()) + match self.inode_by_handle(handle) { + Some(inode) => self.get(inode), + None => None, + } } pub fn inode_by_ids(&self, ids: &InodeAltKey) -> Option<&Inode> { @@ -176,10 +184,10 @@ mod test { assert_eq!(m.get_by_ids(&ids2).unwrap(), &data2); // remove non-present key - assert!(m.remove(&1).is_none()); + assert!(m.remove(&1, false).is_none()); // remove present key, return its value - assert_eq!(m.remove(&inode1).unwrap(), data1.clone()); + assert_eq!(m.remove(&inode1, false).unwrap(), data1.clone()); assert!(m.get(&inode1).is_none()); assert!(m.get_by_ids(&ids1).is_none()); assert_eq!(m.get(&inode2).unwrap(), &data2); diff --git a/src/passthrough/mod.rs b/src/passthrough/mod.rs index dfa940e33..628ee31bb 100644 --- a/src/passthrough/mod.rs +++ b/src/passthrough/mod.rs @@ -23,7 +23,7 @@ use std::os::unix::ffi::OsStringExt; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::path::PathBuf; use std::str::FromStr; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, AtomicU8, Ordering}; use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockWriteGuard}; use std::time::Duration; @@ -50,6 +50,172 @@ use file_handle::{FileHandle, MountFds}; type Inode = u64; type Handle = u64; +/// Maximum host inode number supported by passthroughfs +pub const MAX_HOST_INO: u64 = 0x7fff_ffff_ffff; + +/// the 56th bit used to set the inode to 1 indicates virtual inode +pub const USE_VIRTUAL_INODE_MASK: u64 = 1 << 55; + +/// Used to form a pair of dev and mntid as the key of the map +#[derive(Clone, Copy, Default, PartialOrd, Ord, PartialEq, Eq, Debug)] +pub struct DevMntIDPair(libc::dev_t, u64); + +// Used to generate a unique inode with a maximum of 56 bits. the format is +// |1bit|8bit|47bit +// when the highest bit is equal to 0, it means the host inode format, and the lower 47 bits normally store no more than 47-bit inode +// When the highest bit is equal to 1, it indicates the virtual inode format, +// which is used to store more than 47 bits of inodes +// the middle 8bit is used to store the unique ID produced by the combination of dev+mntid +struct UniqueInodeGenerator { + // Mapping the dev and mntid pair to another unique id + dev_mntid_map: Mutex>, + // Mapping the large host inode to virtual inode + virtual_inode_map: Mutex>, + next_unique_id: AtomicU8, + next_virtual_inode: AtomicU64, +} + +impl UniqueInodeGenerator { + fn new() -> Self { + UniqueInodeGenerator { + dev_mntid_map: Mutex::new(Default::default()), + virtual_inode_map: Mutex::new(Default::default()), + next_unique_id: AtomicU8::new(1), + next_virtual_inode: AtomicU64::new(fuse::ROOT_ID + 1), + } + } + + #[cfg(test)] + fn decode_unique_inode(&self, inode: libc::ino64_t) -> io::Result { + if inode > VFS_MAX_INO { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("the inode {} excess {}", inode, VFS_MAX_INO), + )); + } + + let dev_mntid = (inode >> 47) as u8; + if dev_mntid == u8::MAX { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("invalid dev and mntid {} excess 255", dev_mntid), + )); + } + + let mut dev: libc::dev_t = 0; + let mut mnt: u64 = 0; + let real_inode; + + let mut found = false; + let id_map_guard = self.dev_mntid_map.lock().unwrap(); + for (k, v) in id_map_guard.iter() { + if *v == dev_mntid { + found = true; + dev = k.0; + mnt = k.1; + break; + } + } + + if !found { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!( + "invalid dev and mntid {},there is no record in memory ", + dev_mntid + ), + )); + } + drop(id_map_guard); + + real_inode = if (inode & USE_VIRTUAL_INODE_MASK) == USE_VIRTUAL_INODE_MASK { + // virtual inode + let mut virtual_inode = inode & MAX_HOST_INO; + if virtual_inode >= self.next_virtual_inode.load(Ordering::Relaxed) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!( + "invalid virtual inode {}, the max virtual inode is {}", + virtual_inode, + self.next_virtual_inode.load(Ordering::Relaxed) + ), + )); + } + let guard = self.virtual_inode_map.lock().unwrap(); + + found = false; + for (k, v) in guard.iter() { + if *v == virtual_inode { + virtual_inode = *k; + found = true; + break; + } + } + if !found { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!( + "invalid virtual inode {},there is no record in memory ", + virtual_inode + ), + )); + } + virtual_inode + } else { + inode & MAX_HOST_INO + }; + + Ok(InodeAltKey { + ino: real_inode, + dev, + mnt, + }) + } + + fn get_unique_inode(&self, unique_inode: &InodeAltKey) -> io::Result { + let id: DevMntIDPair = DevMntIDPair(unique_inode.dev, unique_inode.mnt); + + let mut id_map_guard = self.dev_mntid_map.lock().unwrap(); + let unique_id = match id_map_guard.get(&id) { + Some(id) => *id, + None => { + if self.next_unique_id.load(Ordering::Relaxed) == u8::MAX { + return Err(io::Error::new( + io::ErrorKind::Other, + "the number of combinations of dev and mntid exceeds 255", + )); + } + let next_id = self.next_unique_id.fetch_add(1, Ordering::Relaxed); + id_map_guard.insert(id, next_id); + next_id + } + }; + drop(id_map_guard); + let mut virtual_inode_guard = self.virtual_inode_map.lock().unwrap(); + + let inode = if unique_inode.ino <= MAX_HOST_INO { + unique_inode.ino + } else { + USE_VIRTUAL_INODE_MASK + | match virtual_inode_guard.get(&unique_inode.ino) { + Some(id) => *id, + None => { + if self.next_virtual_inode.load(Ordering::Relaxed) > MAX_HOST_INO { + return Err(io::Error::new( + io::ErrorKind::Other, + format!("the virtual inode excess {}", MAX_HOST_INO), + )); + } + let inode = self.next_virtual_inode.fetch_add(1, Ordering::Relaxed); + virtual_inode_guard.insert(unique_inode.ino, inode); + inode + } + } + }; + Ok((unique_id as u64) << 47 | inode) + } +} + #[derive(Clone, Copy)] struct InodeStat { stat: libc::stat64, @@ -199,6 +365,17 @@ impl InodeMap { .ok_or_else(ebadf) } + fn get_inode_locked( + inodes: &InodeStore, + ids_altkey: &InodeAltKey, + handle: Option<&FileHandle>, + ) -> Option { + match handle { + Some(h) => inodes.inode_by_handle(h).copied(), + None => inodes.inode_by_ids(ids_altkey).copied(), + } + } + fn get_alt( &self, ids_altkey: &InodeAltKey, @@ -209,7 +386,6 @@ impl InodeMap { Self::get_alt_locked(inodes.deref(), ids_altkey, handle) } - fn get_alt_locked( inodes: &InodeStore, ids_altkey: &InodeAltKey, @@ -485,6 +661,12 @@ pub struct Config { /// * If dax_file_size == N, DAX will enable only when the file size is greater than or equal /// to N Bytes. pub dax_file_size: Option, + + /// Whether to return host_inode instead of assigning a virtual inode + /// it should be noted that the returned host inode will be stored in the lower 47bits, + /// and for inodes over 47bits, virtual inode will be used instead. + /// The default value for this option is `false`. + pub use_host_ino: bool, } impl Default for Config { @@ -507,6 +689,7 @@ impl Default for Config { dax_file_size: None, dir_entry_timeout: None, dir_attr_timeout: None, + use_host_ino: false, } } } @@ -525,6 +708,8 @@ pub struct PassthroughFs { // do with an fd opened with this flag. inode_map: InodeMap, next_inode: AtomicU64, + // Use to generate unique inode + unique_inode_generator: UniqueInodeGenerator, // File descriptors for open files and directories. Unlike the fds in `inodes`, these _can_ be // used for reading and writing data. @@ -593,6 +778,7 @@ impl PassthroughFs { Ok(PassthroughFs { inode_map: InodeMap::new(), next_inode: AtomicU64::new(fuse::ROOT_ID + 1), + unique_inode_generator: UniqueInodeGenerator::new(), handle_map: HandleMap::new(), next_handle: AtomicU64::new(1), @@ -945,7 +1131,14 @@ impl PassthroughFs { data.inode } None => { - let inode = self.next_inode.fetch_add(1, Ordering::Relaxed); + let inode = if !self.cfg.use_host_ino { + // If the inode has already been assigned before, the new inode is not reassigned, + // ensuring that the same file is always the same inode + InodeMap::get_inode_locked(inodes.deref(), &ids_altkey, handle_opt) + .unwrap_or_else(|| self.next_inode.fetch_add(1, Ordering::Relaxed)) + } else { + self.unique_inode_generator.get_unique_inode(&ids_altkey)? + }; if inode > VFS_MAX_INO { error!("fuse: max inode number reached: {}", VFS_MAX_INO); return Err(io::Error::new( @@ -985,7 +1178,7 @@ impl PassthroughFs { }) } - fn forget_one(inodes: &mut InodeStore, inode: Inode, count: u64) { + fn forget_one(&self, inodes: &mut InodeStore, inode: Inode, count: u64) { // ROOT_ID should not be forgotten, or we're not able to access to files any more. if inode == fuse::ROOT_ID { return; @@ -1011,7 +1204,10 @@ impl PassthroughFs { { if new == 0 { // We just removed the last refcount for this inode. - inodes.remove(&inode); + // we should remove inode data and inner state when use_host_ino + // In other cases, we should only delete the Inode data itself, + // and the internal state needs to be maintained. + inodes.remove(&inode, !self.cfg.use_host_ino); } break; } @@ -1584,4 +1780,120 @@ mod tests { fs.destroy(); } + + #[test] + fn test_generate_unique_inode() { + // use normal inode format + { + let generator = UniqueInodeGenerator::new(); + + let inode_alt_key = InodeAltKey { + ino: 1, + dev: 0, + mnt: 0, + }; + let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); + // 56 bit = 0 + // 55~48 bit = 0000 0001 + // 47~1 bit = 1 + assert_eq!(unique_inode, 0x00800000000001); + let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap(); + assert_eq!(expect_inode_alt_key, inode_alt_key); + + let inode_alt_key = InodeAltKey { + ino: 1, + dev: 0, + mnt: 1, + }; + let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); + // 56 bit = 0 + // 55~48 bit = 0000 0010 + // 47~1 bit = 1 + assert_eq!(unique_inode, 0x01000000000001); + let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap(); + assert_eq!(expect_inode_alt_key, inode_alt_key); + + let inode_alt_key = InodeAltKey { + ino: 2, + dev: 0, + mnt: 1, + }; + let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); + // 56 bit = 0 + // 55~48 bit = 0000 0010 + // 47~1 bit = 2 + assert_eq!(unique_inode, 0x01000000000002); + let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap(); + assert_eq!(expect_inode_alt_key, inode_alt_key); + + let inode_alt_key = InodeAltKey { + ino: MAX_HOST_INO, + dev: 0, + mnt: 1, + }; + let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); + // 56 bit = 0 + // 55~48 bit = 0000 0010 + // 47~1 bit = 0x7fffffffffff + assert_eq!(unique_inode, 0x017fffffffffff); + let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap(); + assert_eq!(expect_inode_alt_key, inode_alt_key); + } + + // use virtual inode format + { + let generator = UniqueInodeGenerator::new(); + let inode_alt_key = InodeAltKey { + ino: MAX_HOST_INO + 1, + dev: u64::MAX, + mnt: u64::MAX, + }; + let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); + // 56 bit = 1 + // 55~48 bit = 0000 0001 + // 47~1 bit = 2 virtual inode start from 2~MAX_HOST_INO + assert_eq!(unique_inode, 0x80800000000002); + let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap(); + assert_eq!(expect_inode_alt_key, inode_alt_key); + + let inode_alt_key = InodeAltKey { + ino: MAX_HOST_INO + 2, + dev: u64::MAX, + mnt: u64::MAX, + }; + let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); + // 56 bit = 1 + // 55~48 bit = 0000 0001 + // 47~1 bit = 3 + assert_eq!(unique_inode, 0x80800000000003); + let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap(); + assert_eq!(expect_inode_alt_key, inode_alt_key); + + let inode_alt_key = InodeAltKey { + ino: MAX_HOST_INO + 2, + dev: u64::MAX, + mnt: 0, + }; + let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); + // 56 bit = 1 + // 55~48 bit = 0000 0010 + // 47~1 bit = 3 + assert_eq!(unique_inode, 0x81000000000003); + let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap(); + assert_eq!(expect_inode_alt_key, inode_alt_key); + + let inode_alt_key = InodeAltKey { + ino: u64::MAX, + dev: u64::MAX, + mnt: u64::MAX, + }; + let unique_inode = generator.get_unique_inode(&inode_alt_key).unwrap(); + // 56 bit = 1 + // 55~48 bit = 0000 0001 + // 47~1 bit = 4 + assert_eq!(unique_inode, 0x80800000000004); + let expect_inode_alt_key = generator.decode_unique_inode(unique_inode).unwrap(); + assert_eq!(expect_inode_alt_key, inode_alt_key); + } + } } diff --git a/src/passthrough/sync_io.rs b/src/passthrough/sync_io.rs index 1ddf283cd..ba170b5cd 100644 --- a/src/passthrough/sync_io.rs +++ b/src/passthrough/sync_io.rs @@ -358,14 +358,14 @@ impl FileSystem for PassthroughFs { fn forget(&self, _ctx: &Context, inode: Inode, count: u64) { let mut inodes = self.inode_map.get_map_mut(); - Self::forget_one(&mut inodes, inode, count) + self.forget_one(&mut inodes, inode, count) } fn batch_forget(&self, _ctx: &Context, requests: Vec<(Inode, u64)>) { let mut inodes = self.inode_map.get_map_mut(); for (inode, count) in requests { - Self::forget_one(&mut inodes, inode, count) + self.forget_one(&mut inodes, inode, count) } } @@ -485,7 +485,7 @@ impl FileSystem for PassthroughFs { if r == 0 { // Release the refcount acquired by self.do_lookup(). let mut inodes = self.inode_map.get_map_mut(); - Self::forget_one(&mut inodes, ino, 1); + self.forget_one(&mut inodes, ino, 1); } r })