From a28e4446de932885010f55397cbf47654fbfffc0 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Wed, 18 Dec 2024 16:18:52 +0100 Subject: [PATCH 1/3] Add unsafe/unchecked slice functions --- arrow-buffer/src/buffer/immutable.rs | 11 ++++++++++ arrow-buffer/src/buffer/scalar.rs | 31 +++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index d0c8ffa39783..44ab1277af42 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -241,6 +241,17 @@ impl Buffer { "the offset of the new Buffer cannot exceed the existing length: slice offset={offset} length={length} selflen={}", self.length ); + // Safety: + // offset + length <= self.length + unsafe { self.slice_with_length_unchecked(offset, length) } + } + + /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`, + /// with `length` bytes. + /// Doing so allows the same memory region to be shared between buffers. + /// # Safety + /// `(offset + length)` MUST be `<=` [`Self::length`]. + pub unsafe fn slice_with_length_unchecked(&self, offset: usize, length: usize) -> Self { // Safety: // offset + length <= self.length let ptr = unsafe { self.ptr.add(offset) }; diff --git a/arrow-buffer/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs index ab6c87168e5c..2f34656b8529 100644 --- a/arrow-buffer/src/buffer/scalar.rs +++ b/arrow-buffer/src/buffer/scalar.rs @@ -64,7 +64,7 @@ impl ScalarBuffer { /// /// * `offset` or `len` would result in overflow /// * `buffer` is not aligned to a multiple of `std::mem::align_of::` - /// * `bytes` is not large enough for the requested slice + /// * `buffer` is not large enough for the requested slice pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self { let size = std::mem::size_of::(); let byte_offset = offset.checked_mul(size).expect("offset overflow"); @@ -72,6 +72,27 @@ impl ScalarBuffer { buffer.slice_with_length(byte_offset, byte_len).into() } + /// Create a new [`ScalarBuffer`] from a [`Buffer`], and an `offset` + /// and `length` in units of `T` + /// + /// # Safety + /// + /// This method will be safe UNLESS any of the following: + /// + /// * `offset` or `len` would result in overflow + /// * `buffer` is not aligned to a multiple of `std::mem::align_of::` + /// * `buffer` is not large enough for the requested slice + pub unsafe fn new_unchecked(buffer: Buffer, offset: usize, len: usize) -> Self { + let size = std::mem::size_of::(); + let byte_offset = offset * size; + let byte_len = len * size; + unsafe { + buffer + .slice_with_length_unchecked(byte_offset, byte_len) + .into() + } + } + /// Free up unused memory. pub fn shrink_to_fit(&mut self) { self.buffer.shrink_to_fit(); @@ -82,6 +103,14 @@ impl ScalarBuffer { Self::new(self.buffer.clone(), offset, len) } + /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset` + /// + /// # Safety + /// `(offset + len)` <= [`Self::length`] + pub unsafe fn slice_unchecked(&self, offset: usize, len: usize) -> Self { + unsafe { Self::new_unchecked(self.buffer.clone(), offset, len) } + } + /// Returns the inner [`Buffer`] pub fn inner(&self) -> &Buffer { &self.buffer From abfe5c2fd38d392d06b0d198ad05a80889b5f4d0 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Wed, 18 Dec 2024 16:31:16 +0100 Subject: [PATCH 2/3] Add ScalarBuffer::len_in_elements --- arrow-buffer/src/buffer/immutable.rs | 3 ++- arrow-buffer/src/buffer/scalar.rs | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index 44ab1277af42..09ef8b69dcee 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -249,8 +249,9 @@ impl Buffer { /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`, /// with `length` bytes. /// Doing so allows the same memory region to be shared between buffers. + /// /// # Safety - /// `(offset + length)` MUST be `<=` [`Self::length`]. + /// `(offset + length)` MUST be `<=` [`Self::len`]. pub unsafe fn slice_with_length_unchecked(&self, offset: usize, length: usize) -> Self { // Safety: // offset + length <= self.length diff --git a/arrow-buffer/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs index 2f34656b8529..3401affea915 100644 --- a/arrow-buffer/src/buffer/scalar.rs +++ b/arrow-buffer/src/buffer/scalar.rs @@ -93,6 +93,12 @@ impl ScalarBuffer { } } + /// The length of the scalar buffer, in number of elements. + #[inline] + pub fn len_in_elements(&self) -> usize { + self.buffer.len() / std::mem::size_of::() + } + /// Free up unused memory. pub fn shrink_to_fit(&mut self) { self.buffer.shrink_to_fit(); @@ -106,7 +112,7 @@ impl ScalarBuffer { /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset` /// /// # Safety - /// `(offset + len)` <= [`Self::length`] + /// `(offset + len)` <= [`Self::len_in_elements`] pub unsafe fn slice_unchecked(&self, offset: usize, len: usize) -> Self { unsafe { Self::new_unchecked(self.buffer.clone(), offset, len) } } From 5ffa2acf52af9f1aa68611e35cf8237bd66597f1 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Wed, 18 Dec 2024 16:31:57 +0100 Subject: [PATCH 3/3] tweak wording --- arrow-buffer/src/buffer/scalar.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-buffer/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs index 3401affea915..fed3fbf2226e 100644 --- a/arrow-buffer/src/buffer/scalar.rs +++ b/arrow-buffer/src/buffer/scalar.rs @@ -93,7 +93,7 @@ impl ScalarBuffer { } } - /// The length of the scalar buffer, in number of elements. + /// The length of the scalar buffer, in units of `T`. #[inline] pub fn len_in_elements(&self) -> usize { self.buffer.len() / std::mem::size_of::()