Skip to content

Commit

Permalink
Add zero-compromise directory iteration using getdents64
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Saveau <[email protected]>
  • Loading branch information
SUPERCILEX committed Nov 15, 2022
1 parent 126112c commit 429dfa6
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 37 deletions.
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@ default = [

acct = []
aio = ["pin-utils"]
dir = ["fs"]
dents = ["file_type"]
dir = ["fs", "file_type"]
env = []
event = []
feature = []
file_type = []
fs = []
hostname = []
inotify = []
Expand Down
187 changes: 187 additions & 0 deletions src/dents.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
//! Raw directory iteration using Linux's getdents syscall
use crate::errno::Errno;
use crate::file_type::FileType;
use std::cmp::max;
use std::ffi::CStr;
use std::mem::MaybeUninit;
use std::os::unix::io::AsFd;
use std::{mem, slice};

/// A directory iterator implemented with getdents.
///
/// This implementation:
/// - Excludes deleted inodes (with ID 0).
/// - Does not handle growing the buffer. If this functionality is necessary,
/// you'll need to drop the current iterator, resize the buffer, and then
/// re-create the iterator. The iterator is guaranteed to continue where it
/// left off provided the file descriptor isn't changed. See the example in
/// [`RawDir::new`].
#[derive(Debug)]
pub struct RawDir<'buf, Fd: AsFd> {
fd: Fd,
buf: &'buf mut [MaybeUninit<u8>],
initialized: usize,
offset: usize,
}

impl<'buf, Fd: AsFd> RawDir<'buf, Fd> {
/// Create a new iterator from the given file descriptor and buffer.
///
/// # Examples
///
/// ```
/// # use std::mem::MaybeUninit;
/// # use std::os::unix::io::{AsFd, FromRawFd, OwnedFd};
/// # use nix::dents::RawDir;
/// # use nix::errno::Errno;
/// # use nix::fcntl::{OFlag, open, openat};
/// # use nix::sys::stat::Mode;
///
/// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap();
/// let fd = unsafe { OwnedFd::from_raw_fd(fd) };
///
/// let mut buf = [MaybeUninit::uninit(); 2048];
///
/// for entry in RawDir::new(fd, &mut buf) {
/// let entry = entry.unwrap();
/// dbg!(&entry);
/// }
/// ```
///
/// Contrived example that demonstrates reading entries with arbitrarily large file paths:
///
/// ```
/// # use std::cmp::max;
/// # use std::mem::MaybeUninit;
/// # use std::os::unix::io::{AsFd, FromRawFd, OwnedFd};
/// # use nix::dents::RawDir;
/// # use nix::errno::Errno;
/// # use nix::fcntl::{OFlag, open, openat};
/// # use nix::sys::stat::Mode;
///
/// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap();
/// let fd = unsafe { OwnedFd::from_raw_fd(fd) };
///
/// // DO NOT DO THIS. Use `Vec::with_capacity` to at least start the buffer
/// // off with *some* space.
/// let mut buf = Vec::new();
///
/// 'read: loop {
/// 'resize: {
/// for entry in RawDir::new(&fd, buf.spare_capacity_mut()) {
/// let entry = match entry {
/// Err(Errno::EINVAL) => break 'resize,
/// r => r.unwrap(),
/// };
/// dbg!(&entry);
/// }
/// break 'read;
/// }
///
/// let new_capacity = max(buf.capacity() * 2, 1);
/// buf.reserve(new_capacity);
/// }
/// ```
///
/// Note that this is horribly inefficient as we'll most likely end up doing ~1 syscall per file.
pub fn new(fd: Fd, buf: &'buf mut [MaybeUninit<u8>]) -> Self {
Self {
fd,
buf,
initialized: 0,
offset: 0,
}
}
}

/// A raw directory entry, similar to `std::fs::DirEntry`.
///
/// Note that unlike the std version, this may represent the `.` or `..` entries.
#[derive(Debug)]
#[allow(missing_docs)]
pub struct RawDirEntry<'a> {
pub inode_number: u64,
pub file_type: FileType,
pub name: &'a CStr,
}

#[repr(C, packed)]
struct dirent64 {
d_ino: libc::ino64_t,
d_off: libc::off64_t,
d_reclen: libc::c_ushort,
d_type: libc::c_uchar,
}

impl<'buf, Fd: AsFd> Iterator for RawDir<'buf, Fd> {
type Item = Result<RawDirEntry<'buf>, Errno>;

fn next(&mut self) -> Option<Self::Item> {
loop {
if self.offset < self.initialized {
let dirent_ptr =
&self.buf[self.offset] as *const MaybeUninit<u8>;
// Trust the kernel to use proper alignment
#[allow(clippy::cast_ptr_alignment)]
let dirent = unsafe { &*dirent_ptr.cast::<dirent64>() };

self.offset += dirent.d_reclen as usize;
if dirent.d_ino == 0 {
continue;
}

return Some(Ok(RawDirEntry {
inode_number: dirent.d_ino,
file_type: FileType::from(dirent.d_type),
name: unsafe {
let name_start =
dirent_ptr.add(mem::size_of::<dirent64>());
let mut name_end = {
// Find the last aligned byte of the file name so we can
// start searching for NUL bytes. If we started searching
// from the back, we would run into garbage left over from
// previous iterations.
// TODO use .map_addr() once strict_provenance is stable
let addr = max(
name_start as usize,
dirent_ptr.add(dirent.d_reclen as usize - 1)
as usize
& !(mem::size_of::<usize>() - 1),
);
addr as *const u8
};

while *name_end != 0 {
name_end = name_end.add(1);
}

CStr::from_bytes_with_nul_unchecked(
slice::from_raw_parts(
name_start.cast::<u8>(),
// Add 1 for the NUL byte
// TODO use .addr() once strict_provenance is stable
name_end as usize - name_start as usize + 1,
),
)
},
}));
}
self.initialized = 0;
self.offset = 0;

match unsafe {
Errno::result(libc::syscall(
libc::SYS_getdents64,
self.fd.as_fd(),
self.buf.as_mut_ptr(),
self.buf.len(),
))
} {
Ok(bytes_read) if bytes_read == 0 => return None,
Ok(bytes_read) => self.initialized = bytes_read as usize,
Err(e) => return Some(Err(e)),
}
}
}
}
40 changes: 8 additions & 32 deletions src/dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
use crate::errno::Errno;
use crate::fcntl::{self, OFlag};
pub use crate::file_type::FileType as Type;
use crate::sys;
use crate::{Error, NixPath, Result};
use cfg_if::cfg_if;
Expand Down Expand Up @@ -195,25 +196,6 @@ impl IntoIterator for Dir {
#[repr(transparent)]
pub struct Entry(dirent);

/// Type of file referenced by a directory entry
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
pub enum Type {
/// FIFO (Named pipe)
Fifo,
/// Character device
CharacterDevice,
/// Directory
Directory,
/// Block device
BlockDevice,
/// Regular file
File,
/// Symbolic link
Symlink,
/// Unix-domain socket
Socket,
}

impl Entry {
/// Returns the inode number (`d_ino`) of the underlying `dirent`.
#[allow(clippy::useless_conversion)] // Not useless on all OSes
Expand All @@ -240,37 +222,31 @@ impl Entry {

/// Returns the bare file name of this directory entry without any other leading path component.
pub fn file_name(&self) -> &ffi::CStr {
unsafe { ::std::ffi::CStr::from_ptr(self.0.d_name.as_ptr()) }
unsafe { ffi::CStr::from_ptr(self.0.d_name.as_ptr()) }
}

/// Returns the type of this directory entry, if known.
///
/// See platform `readdir(3)` or `dirent(5)` manpage for when the file type is known;
/// notably, some Linux filesystems don't implement this. The caller should use `stat` or
/// `fstat` if this returns `None`.
pub fn file_type(&self) -> Option<Type> {
pub fn file_type(&self) -> Type {
#[cfg(not(any(
target_os = "illumos",
target_os = "solaris",
target_os = "haiku"
)))]
match self.0.d_type {
libc::DT_FIFO => Some(Type::Fifo),
libc::DT_CHR => Some(Type::CharacterDevice),
libc::DT_DIR => Some(Type::Directory),
libc::DT_BLK => Some(Type::BlockDevice),
libc::DT_REG => Some(Type::File),
libc::DT_LNK => Some(Type::Symlink),
libc::DT_SOCK => Some(Type::Socket),
/* libc::DT_UNKNOWN | */ _ => None,
{
Type::from(self.0.d_type)
}

// illumos, Solaris, and Haiku systems do not have the d_type member at all:
#[cfg(any(
target_os = "illumos",
target_os = "solaris",
target_os = "haiku"
))]
None
{
Type::Unknown
}
}
}
37 changes: 37 additions & 0 deletions src/file_type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//! File type conversion utilities
/// Type of file referenced by a directory entry
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
pub enum FileType {
/// FIFO (Named pipe)
Fifo,
/// Character device
CharacterDevice,
/// Directory
Directory,
/// Block device
BlockDevice,
/// Regular file
File,
/// Symbolic link
Symlink,
/// Unix-domain socket
Socket,
/// Unknown
Unknown,
}

impl From<libc::c_uchar> for FileType {
fn from(value: libc::c_uchar) -> Self {
match value {
libc::DT_FIFO => Self::Fifo,
libc::DT_CHR => Self::CharacterDevice,
libc::DT_DIR => Self::Directory,
libc::DT_BLK => Self::BlockDevice,
libc::DT_REG => Self::File,
libc::DT_LNK => Self::Symlink,
libc::DT_SOCK => Self::Socket,
/* libc::DT_UNKNOWN | */ _ => Self::Unknown,
}
}
}
10 changes: 10 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//! They may be enabled in any combination.
//! * `acct` - Process accounting
//! * `aio` - POSIX AIO
//! * `dents` - Raw directory iteration using Linux's getdents syscall
//! * `dir` - Stuff relating to directory iteration
//! * `env` - Manipulate environment variables
//! * `event` - Event-driven APIs, like `kqueue` and `epoll`
Expand Down Expand Up @@ -63,6 +64,11 @@ pub use libc;
mod macros;

// Public crates
#[cfg(target_os = "linux")]
feature! {
#![feature = "dents"]
pub mod dents;
}
#[cfg(not(target_os = "redox"))]
feature! {
#![feature = "dir"]
Expand All @@ -80,6 +86,10 @@ feature! {
#[deny(missing_docs)]
pub mod features;
}
feature! {
#![feature = "file_type"]
pub mod file_type;
}
#[allow(missing_docs)]
pub mod fcntl;
feature! {
Expand Down
8 changes: 4 additions & 4 deletions test/test_dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ fn read() {

// Check file types. The system is allowed to return DT_UNKNOWN (aka None here) but if it does
// return a type, ensure it's correct.
assert!(&[Some(Type::Directory), None].contains(&entries[0].file_type())); // .: dir
assert!(&[Some(Type::Directory), None].contains(&entries[1].file_type())); // ..: dir
assert!(&[Some(Type::Symlink), None].contains(&entries[2].file_type())); // bar: symlink
assert!(&[Some(Type::File), None].contains(&entries[3].file_type())); // foo: regular file
assert!(&[Type::Directory, Type::Unknown].contains(&entries[0].file_type())); // .: dir
assert!(&[Type::Directory, Type::Unknown].contains(&entries[1].file_type())); // ..: dir
assert!(&[Type::Symlink, Type::Unknown].contains(&entries[2].file_type())); // bar: symlink
assert!(&[Type::File, Type::Unknown].contains(&entries[3].file_type())); // foo: regular file
}

#[test]
Expand Down

0 comments on commit 429dfa6

Please sign in to comment.