Skip to content

Commit

Permalink
decomplect Byte Owner and Source concepts
Browse files Browse the repository at this point in the history
  • Loading branch information
somethingelseentirely committed Nov 24, 2024
1 parent 0d384ca commit ef7683c
Show file tree
Hide file tree
Showing 10 changed files with 183 additions and 109 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ bytes = { version = "1.6.0", optional = true }
ownedbytes = { version = "0.7.0", optional = true }
memmap2 = { version = "0.9.4", optional = true }
zerocopy = { version = "0.7.35", optional = true, features = ["derive"] }
pyo3 = {version = "0.23.1", optional = true }

[dev-dependencies]
quickcheck = "1.0"
Expand All @@ -21,3 +22,4 @@ bytes = ["dep:bytes"]
ownedbytes = ["dep:ownedbytes"]
mmap = ["dep:memmap2"]
zerocopy = ["dep:zerocopy"]
pyo3 = ["dep:pyo3"]
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ including other byte handling crates `Bytes`, mmap-ed files,

## Comparison

| Crate | Active | Extensible | Zerocopy Integration | mmap support | kani verified |
| ----- | ------ | ---------- | -------------------- | ------------ | -------- |
| anybytes ||||| 🚧 |
| [bytes](https://crates.io/crates/bytes) ||||||
| [ownedbytes](https://crates.io/crates/ownedbytes) |||| ||
| [minibytes](https://crates.io/crates/esl01-minibytes) | [^1] ||| ||
| Crate | Active | Extensible | mmap support | Zerocopy Integration | Pyo3 Integration | kani verified |
| ----- | ------ | ---------- | ------------ | -------------------- | ---------------- | -------- |
| anybytes ||||| | 🚧 |
| [bytes](https://crates.io/crates/bytes) |||||||
| [ownedbytes](https://crates.io/crates/ownedbytes) ||| | | ||
| [minibytes](https://crates.io/crates/sapling-minibytes) | [^1] || | | ||

[^1]: No longer maintained as an individual crate.
[^1]: Recently published again.

## Acknowledgements
This library started as a fork of the minibyte library in facebooks [sapling scm](https://github.com/facebook/sapling).
98 changes: 46 additions & 52 deletions src/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,10 @@ use std::cmp;
use std::fmt;
use std::hash;
use std::ops;
use std::ops::Range;
use std::slice::SliceIndex;
use std::sync::Arc;
use std::sync::Weak;

fn range_of_subslice(slice: &[u8], subslice: &[u8]) -> Option<Range<usize>> {
let slice_start = slice.as_ptr() as usize;
let slice_end = slice_start + slice.len();
let subslice_start = subslice.as_ptr() as usize;
let subslice_end = subslice_start + subslice.len();
if subslice_start >= slice_start && subslice_end <= slice_end {
let start = subslice_start - slice_start;
Some(start..start + subslice.len())
} else {
None
}
}

fn is_subslice(slice: &[u8], subslice: &[u8]) -> bool {
let slice_start = slice.as_ptr() as usize;
let slice_end = slice_start + slice.len();
Expand All @@ -43,44 +29,46 @@ unsafe fn erase_lifetime<'a>(slice: &'a [u8]) -> &'static [u8] {
&*(slice as *const [u8])
}

pub unsafe trait ByteOwner: Sync + Send + 'static {
pub unsafe trait ByteSource {
type Owner: ByteOwner;

fn as_bytes(&self) -> &[u8];
fn as_owner(self) -> Self::Owner;
}
pub trait AnyByteOwner: ByteOwner {
pub trait ByteOwner: Sync + Send + 'static {
fn as_any(self: Arc<Self>) -> Arc<dyn Any + Sync + Send>;
}

impl<T: ByteOwner> AnyByteOwner for T {
impl<T: ByteSource + Sync + Send + 'static> ByteOwner for T {
fn as_any(self: Arc<Self>) -> Arc<dyn Any + Sync + Send> {
self
}
}

/// Immutable bytes with zero-copy slicing and cloning.
///
///
/// Access itself is extremely cheap via no-op conversion to a `&[u8]`.
///
///
/// The storage mechanism backing the bytes can be extended
/// and is implemented for a variety of sources already,
/// including other byte handling crates `Bytes`, mmap-ed files,
/// `String`s and `Zerocopy` types.
///
///
/// See [ByteOwner] for an exhaustive list and more details.
pub struct Bytes {
pub(crate) data: &'static [u8],
// Actual owner of the bytes.
pub(crate) owner: Arc<dyn AnyByteOwner>,
pub(crate) owner: Arc<dyn ByteOwner>,
}

/// Weak variant of [Bytes] that doesn't retain the data
/// unless a strong [Bytes] is referencing it.
///
///
/// The referenced subrange of the [Bytes] is reconstructed
/// on [WeakBytes::upgrade].
pub struct WeakBytes {
pub(crate) range: Range<usize>,
// Actual owner of the bytes.
pub(crate) owner: Weak<dyn AnyByteOwner>,
pub(crate) data: *const [u8],
pub(crate) owner: Weak<dyn ByteOwner>,
}

// ByteOwner is Send + Sync and Bytes is immutable.
Expand All @@ -101,23 +89,39 @@ impl Bytes {
/// Creates an empty `Bytes`.
#[inline]
pub fn empty() -> Self {
Self::from_owner(&[0u8; 0][..])
Self::from_owning_source(&[0u8; 0][..])
}

/// Creates `Bytes` from a [`ByteSource`] (for example, `Vec<u8>`).
pub fn from_source(source: impl ByteSource) -> Self {
let data = source.as_bytes();
// Erase the lifetime.
let data = unsafe { erase_lifetime(data) };

let owner = source.as_owner();
let owner = Arc::new(owner);

Self {
// This is safe because slices always have a non-null address.
data,
owner,
}
}

/// Creates `Bytes` from a [`BytesOwner`] (for example, `Vec<u8>`).
pub fn from_owner(owner: impl ByteOwner) -> Self {
/// Creates `Bytes` from a [`ByteOwner`] + [`ByteSource`] (for example, `Vec<u8>`).
pub fn from_owning_source(owner: impl ByteSource + ByteOwner) -> Self {
let arc = Arc::new(owner);
Self::from_arc(arc)
Self::from_owning_source_arc(arc)
}

/// Creates `Bytes` from an `Arc<BytesOwner>`.
/// Creates `Bytes` from an `Arc<ByteSource + ByteOwner>`.
///
/// This provides a potentially faster path for `Bytes` creation
/// as it can forgoe an additional allocation for the wrapping Arc.
/// For example when you implement `ByteOwner` for a `zerocopy` type,
/// sadly we can't provide a blanked implementation for those types
/// because of the orphane rule.
pub fn from_arc(arc: Arc<impl ByteOwner>) -> Self {
pub fn from_owning_source_arc(arc: Arc<impl ByteSource + ByteOwner>) -> Self {
let data = arc.as_bytes();
// Erase the lifetime.
let data = unsafe { erase_lifetime(data) };
Expand All @@ -141,23 +145,18 @@ impl Bytes {
/// use anybytes::Bytes;
/// use std::sync::Arc;
/// let owner: Vec<u8> = vec![0,1,2,3];
/// let bytes = Bytes::from_owner(owner);
/// let owner: Arc<Vec<u8>> = bytes.downcast().expect("Downcast of known type.");
/// let bytes = Bytes::from_source(owner);
/// let owner: Arc<Vec<u8>> = bytes.downcast_to_owner().expect("Downcast of known type.");
/// ```
pub fn downcast<T>(self) -> Option<Arc<T>>
pub fn downcast_to_owner<T>(self) -> Option<Arc<T>>
where
T: Send + Sync + 'static,
{
let owner = self.owner;
let owner = AnyByteOwner::as_any(owner);
let owner = ByteOwner::as_any(owner);
owner.downcast::<T>().ok()
}

pub fn reset(&mut self) {
let data = self.owner.as_bytes();
self.data = unsafe { erase_lifetime(data) };
}

/// Returns a slice of self for the provided range.
/// This operation is `O(1)`.
pub fn slice(&self, range: impl SliceIndex<[u8], Output = [u8]>) -> Self {
Expand All @@ -184,13 +183,10 @@ impl Bytes {
}
}

/// Create a weak pointer. Returns `None` if backed by a static buffer.
/// Note the weak pointer has the full range of the buffer.
/// Create a weak pointer.
pub fn downgrade(&self) -> WeakBytes {
let owned_slice = self.owner.as_bytes();
WeakBytes {
range: range_of_subslice(owned_slice, self.data)
.expect("self.data is always a valid range"),
data: self.data as *const [u8],
owner: Arc::downgrade(&self.owner),
}
}
Expand All @@ -200,22 +196,20 @@ impl WeakBytes {
/// The reverse of `downgrade`. Returns `None` if the value was dropped.
pub fn upgrade(&self) -> Option<Bytes> {
let arc = self.owner.upgrade()?;
let data: &[u8] = arc.as_ref().as_bytes();
let data = &data[self.range.clone()];
let data = unsafe { erase_lifetime(data) };
let data = unsafe { &*(self.data) };
Some(Bytes { data, owner: arc })
}
}

impl<T: ByteOwner> From<T> for Bytes {
impl<T: ByteSource + ByteOwner> From<T> for Bytes {
fn from(value: T) -> Self {
Self::from_owner(value)
Self::from_owning_source(value)
}
}

impl<T: ByteOwner> From<Arc<T>> for Bytes {
impl<T: ByteSource + ByteOwner> From<Arc<T>> for Bytes {
fn from(arc: Arc<T>) -> Self {
Self::from_arc(arc)
Self::from_owning_source_arc(arc)
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/

#![doc = include_str!("../README.md")]
#![doc = include_str!("../README.md")]

pub mod bytes;
mod owners;
Expand All @@ -17,7 +17,7 @@ pub mod packed;
#[cfg(test)]
mod tests;

pub use crate::bytes::ByteOwner;
pub use crate::bytes::ByteSource;
pub use crate::bytes::Bytes;
pub use crate::bytes::WeakBytes;
#[cfg(feature = "zerocopy")]
Expand Down
Loading

0 comments on commit ef7683c

Please sign in to comment.