Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Array::shrink_to_fit(&mut self) to 53.4.0 (#6790) (#6817) #6962

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,13 @@ impl Array for BooleanArray {
self.values.is_empty()
}

fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
self.values.offset()
}
Expand Down
8 changes: 8 additions & 0 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,14 @@ impl<T: ByteArrayType> Array for GenericByteArray<T> {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&mut self) {
self.value_offsets.shrink_to_fit();
self.value_data.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
0
}
Expand Down
43 changes: 26 additions & 17 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,31 +430,31 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
///
/// Before GC:
/// ```text
/// ┌──────┐
/// │......│
/// │......│
/// ┌────────────────────┐ ┌ ─ ─ ─ ▶ │Data1 │ Large buffer
/// ┌──────┐
/// │......│
/// │......│
/// ┌────────────────────┐ ┌ ─ ─ ─ ▶ │Data1 │ Large buffer
/// │ View 1 │─ ─ ─ ─ │......│ with data that
/// ├────────────────────┤ │......│ is not referred
/// │ View 2 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data2 │ to by View 1 or
/// └────────────────────┘ │......│ View 2
/// │......│
/// 2 views, refer to │......│
/// small portions of a └──────┘
/// large buffer
/// └────────────────────┘ │......│ View 2
/// │......│
/// 2 views, refer to │......│
/// small portions of a └──────┘
/// large buffer
/// ```
///
///
/// After GC:
///
/// ```text
/// ┌────────────────────┐ ┌─────┐ After gc, only
/// │ View 1 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│ data that is
/// ├────────────────────┤ ┌ ─ ─ ─ ▶ │Data2│ pointed to by
/// │ View 2 │─ ─ ─ ─ └─────┘ the views is
/// └────────────────────┘ left
///
///
/// 2 views
/// │ View 1 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│ data that is
/// ├────────────────────┤ ┌ ─ ─ ─ ▶ │Data2│ pointed to by
/// │ View 2 │─ ─ ─ ─ └─────┘ the views is
/// └────────────────────┘ left
///
///
/// 2 views
/// ```
/// This method will compact the data buffers by recreating the view array and only include the data
/// that is pointed to by the views.
Expand Down Expand Up @@ -575,6 +575,15 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
self.views.is_empty()
}

fn shrink_to_fit(&mut self) {
self.views.shrink_to_fit();
self.buffers.iter_mut().for_each(|b| b.shrink_to_fit());
self.buffers.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
0
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,11 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
self.keys.is_empty()
}

fn shrink_to_fit(&mut self) {
self.keys.shrink_to_fit();
self.values.shrink_to_fit();
}

fn offset(&self) -> usize {
self.keys.offset()
}
Expand Down
7 changes: 7 additions & 0 deletions arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,13 @@ impl Array for FixedSizeBinaryArray {
self.len == 0
}

fn shrink_to_fit(&mut self) {
self.value_data.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
0
}
Expand Down
7 changes: 7 additions & 0 deletions arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,13 @@ impl Array for FixedSizeListArray {
self.len == 0
}

fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
0
}
Expand Down
8 changes: 8 additions & 0 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,14 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&mut self) {
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
self.values.shrink_to_fit();
self.value_offsets.shrink_to_fit();
}

fn offset(&self) -> usize {
0
}
Expand Down
9 changes: 9 additions & 0 deletions arrow-array/src/array/list_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,15 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
self.value_sizes.is_empty()
}

fn shrink_to_fit(&mut self) {
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
self.values.shrink_to_fit();
self.value_offsets.shrink_to_fit();
self.value_sizes.shrink_to_fit();
}

fn offset(&self) -> usize {
0
}
Expand Down
8 changes: 8 additions & 0 deletions arrow-array/src/array/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,14 @@ impl Array for MapArray {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&mut self) {
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
self.entries.shrink_to_fit();
self.value_offsets.shrink_to_fit();
}

fn offset(&self) -> usize {
0
}
Expand Down
15 changes: 15 additions & 0 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// ```
fn is_empty(&self) -> bool;

/// Shrinks the capacity of any exclusively owned buffer as much as possible
///
/// Shared or externally allocated buffers will be ignored, and
/// any buffer offsets will be preserved.
fn shrink_to_fit(&mut self) {}

/// Returns the offset into the underlying data used by this array(-slice).
/// Note that the underlying data can be shared by many arrays.
/// This defaults to `0`.
Expand Down Expand Up @@ -366,6 +372,15 @@ impl Array for ArrayRef {
self.as_ref().is_empty()
}

/// For shared buffers, this is a no-op.
fn shrink_to_fit(&mut self) {
if let Some(slf) = Arc::get_mut(self) {
slf.shrink_to_fit();
} else {
// We ignore shared buffers.
}
}

fn offset(&self) -> usize {
self.as_ref().offset()
}
Expand Down
7 changes: 7 additions & 0 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1152,6 +1152,13 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
self.values.is_empty()
}

fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
}

fn offset(&self) -> usize {
0
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/run_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,11 @@ impl<T: RunEndIndexType> Array for RunArray<T> {
self.run_ends.is_empty()
}

fn shrink_to_fit(&mut self) {
self.run_ends.shrink_to_fit();
self.values.shrink_to_fit();
}

fn offset(&self) -> usize {
self.run_ends.offset()
}
Expand Down
7 changes: 7 additions & 0 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,13 @@ impl Array for StructArray {
self.len == 0
}

fn shrink_to_fit(&mut self) {
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
self.fields.iter_mut().for_each(|n| n.shrink_to_fit());
}

fn offset(&self) -> usize {
0
}
Expand Down
11 changes: 11 additions & 0 deletions arrow-array/src/array/union_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,17 @@ impl Array for UnionArray {
self.type_ids.is_empty()
}

fn shrink_to_fit(&mut self) {
self.type_ids.shrink_to_fit();
if let Some(offsets) = &mut self.offsets {
offsets.shrink_to_fit();
}
for array in self.fields.iter_mut().flatten() {
array.shrink_to_fit();
}
self.fields.shrink_to_fit();
}

fn offset(&self) -> usize {
0
}
Expand Down
6 changes: 6 additions & 0 deletions arrow-buffer/src/buffer/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@ impl BooleanBuffer {
self.len == 0
}

/// Free up unused memory.
pub fn shrink_to_fit(&mut self) {
// TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer
self.buffer.shrink_to_fit();
}

/// Returns the boolean value at index `i`.
///
/// # Panics
Expand Down
63 changes: 63 additions & 0 deletions arrow-buffer/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,41 @@ impl Buffer {
self.data.capacity()
}

/// Tried to shrink the capacity of the buffer as much as possible, freeing unused memory.
///
/// If the buffer is shared, this is a no-op.
///
/// If the memory was allocated with a custom allocator, this is a no-op.
///
/// If the capacity is already less than or equal to the desired capacity, this is a no-op.
///
/// The memory region will be reallocated using `std::alloc::realloc`.
pub fn shrink_to_fit(&mut self) {
let offset = self.ptr_offset();
let is_empty = self.is_empty();
let desired_capacity = if is_empty {
0
} else {
// For realloc to work, we cannot free the elements before the offset
offset + self.len()
};
if desired_capacity < self.capacity() {
if let Some(bytes) = Arc::get_mut(&mut self.data) {
if bytes.try_realloc(desired_capacity).is_ok() {
// Realloc complete - update our pointer into `bytes`:
self.ptr = if is_empty {
bytes.as_ptr()
} else {
// SAFETY: we kept all elements leading up to the offset
unsafe { bytes.as_ptr().add(offset) }
}
} else {
// Failure to reallocate is fine; we just failed to free up memory.
}
}
}
}

/// Returns whether the buffer is empty.
#[inline]
pub fn is_empty(&self) -> bool {
Expand Down Expand Up @@ -562,6 +597,34 @@ mod tests {
assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
}

#[test]
fn test_shrink_to_fit() {
let original = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7]);
assert_eq!(original.as_slice(), &[0, 1, 2, 3, 4, 5, 6, 7]);
assert_eq!(original.capacity(), 64);

let slice = original.slice_with_length(2, 3);
drop(original); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
assert_eq!(slice.as_slice(), &[2, 3, 4]);
assert_eq!(slice.capacity(), 64);

let mut shrunk = slice;
shrunk.shrink_to_fit();
assert_eq!(shrunk.as_slice(), &[2, 3, 4]);
assert_eq!(shrunk.capacity(), 5); // shrink_to_fit is allowed to keep the elements before the offset

// Test that we can handle empty slices:
let empty_slice = shrunk.slice_with_length(1, 0);
drop(shrunk); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
assert_eq!(empty_slice.as_slice(), &[]);
assert_eq!(empty_slice.capacity(), 5);

let mut shrunk_empty = empty_slice;
shrunk_empty.shrink_to_fit();
assert_eq!(shrunk_empty.as_slice(), &[]);
assert_eq!(shrunk_empty.capacity(), 0);
}

#[test]
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
fn test_slice_offset_out_of_bound() {
Expand Down
9 changes: 6 additions & 3 deletions arrow-buffer/src/buffer/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -483,10 +483,13 @@ impl MutableBuffer {
}
}

/// Creates a non-null pointer with alignment of [`ALIGNMENT`]
///
/// This is similar to [`NonNull::dangling`]
#[inline]
fn dangling_ptr() -> NonNull<u8> {
// SAFETY: ALIGNMENT is a non-zero usize which is then casted
// to a *mut T. Therefore, `ptr` is not null and the conditions for
pub(crate) fn dangling_ptr() -> NonNull<u8> {
// SAFETY: ALIGNMENT is a non-zero usize which is then cast
// to a *mut u8. Therefore, `ptr` is not null and the conditions for
// calling new_unchecked() are respected.
#[cfg(miri)]
{
Expand Down
5 changes: 5 additions & 0 deletions arrow-buffer/src/buffer/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ impl NullBuffer {
self.buffer.is_empty()
}

/// Free up unused memory.
pub fn shrink_to_fit(&mut self) {
self.buffer.shrink_to_fit();
}

/// Returns the null count for this [`NullBuffer`]
#[inline]
pub fn null_count(&self) -> usize {
Expand Down
5 changes: 5 additions & 0 deletions arrow-buffer/src/buffer/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
Self(out.into())
}

/// Free up unused memory.
pub fn shrink_to_fit(&mut self) {
self.0.shrink_to_fit();
}

/// Returns the inner [`ScalarBuffer`]
pub fn inner(&self) -> &ScalarBuffer<O> {
&self.0
Expand Down
Loading
Loading