Skip to content

Commit

Permalink
fix(13796): handle overflows with proper max capacity number which is…
Browse files Browse the repository at this point in the history
… valid for MutableBuffer
  • Loading branch information
wiedld committed Dec 17, 2024
1 parent 4269766 commit 7f8c466
Showing 1 changed file with 65 additions and 5 deletions.
70 changes: 65 additions & 5 deletions datafusion/functions/src/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

use std::mem::size_of;

use arrow::alloc::ALIGNMENT;
use arrow::array::{
make_view, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ByteView,
GenericStringArray, LargeStringArray, OffsetSizeTrait, StringArray, StringViewArray,
Expand Down Expand Up @@ -124,8 +125,18 @@ pub struct StringArrayBuilder {

impl StringArrayBuilder {
pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
let mut offsets_buffer =
MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i32>());
// MutableBuffer::with_capacity will round up to multiples of `ALIGNMENT`
let capacity = std::cmp::max(
item_capacity.saturating_add(1).saturating_mul(ALIGNMENT),
data_capacity,
);
// rust core's Layout::from_size_align is capped at isize::Max
let capacity = std::cmp::min(
capacity,
isize::MAX as usize - (isize::MAX as usize % ALIGNMENT),
);

let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
// SAFETY: the first offset value is definitely not going to exceed the bounds.
unsafe { offsets_buffer.push_unchecked(0_i32) };
Self {
Expand Down Expand Up @@ -289,8 +300,18 @@ pub struct LargeStringArrayBuilder {

impl LargeStringArrayBuilder {
pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
let mut offsets_buffer =
MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i64>());
// MutableBuffer::with_capacity will round up to multiples of `ALIGNMENT`
let capacity = std::cmp::max(
item_capacity.saturating_add(1).saturating_mul(ALIGNMENT),
data_capacity,
);
// rust core's Layout::from_size_align is capped at isize::Max
let capacity = std::cmp::min(
capacity,
isize::MAX as usize - (isize::MAX as usize % ALIGNMENT),
);

let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
// SAFETY: the first offset value is definitely not going to exceed the bounds.
unsafe { offsets_buffer.push_unchecked(0_i64) };
Self {
Expand Down Expand Up @@ -455,11 +476,50 @@ impl ColumnarValueRef<'_> {

#[cfg(test)]
mod tests {
use std::alloc::{GlobalAlloc, Layout, System};

use super::*;

#[derive(Default, Copy, Clone)]
pub struct MockAllocator;

unsafe impl GlobalAlloc for MockAllocator {
/// only need to check it's valid
#[inline]
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
if layout.size() >= 9223372036854775744 {
// system allocator will abort in such a way that is not detectable in tests
// instead, throw a panic we can detect
panic!("tried to allocate 9223372036854775744 bytes");
} else {
System.alloc(layout)
}
}

#[inline]
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
System.dealloc(ptr, layout)
}
}

#[global_allocator]
static ALLOCATOR: MockAllocator = MockAllocator;

#[test]
fn test_overflow_string_array_builders() {
#[should_panic(expected = "tried to allocate 9223372036854775744 bytes")]
fn test_overflow_string_array_builder() {
// should successfully handle overflow
// while providing a number that passes upstream checks
// and will then panic (by trying to allocate too much)
let _builder = StringArrayBuilder::with_capacity(usize::MAX, usize::MAX);
}

#[test]
#[should_panic(expected = "tried to allocate 9223372036854775744 bytes")]
fn test_overflow_large_string_array_builder() {
// should successfully handle overflow
// while providing a number that passes upstream checks
// and will then panic (by trying to allocate too much)
let _builder = LargeStringArrayBuilder::with_capacity(usize::MAX, usize::MAX);
}
}

0 comments on commit 7f8c466

Please sign in to comment.