Skip to content

Commit

Permalink
Handle possible overflows in StringArrayBuilder / LargeStringArrayBui…
Browse files Browse the repository at this point in the history
…lder (apache#13802)

* test(13796): reproducer of overflow on capacity

* fix(13796): handle overflows with proper max capacity number which is valid for MutableBuffer

* refactor: use simple solution and provide panic
  • Loading branch information
wiedld authored Dec 18, 2024
1 parent 63ce486 commit 82a40f3
Showing 1 changed file with 31 additions and 6 deletions.
37 changes: 31 additions & 6 deletions datafusion/functions/src/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,12 @@ pub struct StringArrayBuilder {

impl StringArrayBuilder {
pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
let mut offsets_buffer =
MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i32>());
let capacity = item_capacity
.checked_add(1)
.map(|i| i.saturating_mul(size_of::<i32>()))
.expect("capacity integer overflow");

let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
// SAFETY: the first offset value is definitely not going to exceed the bounds.
unsafe { offsets_buffer.push_unchecked(0_i32) };
Self {
Expand Down Expand Up @@ -182,7 +186,7 @@ impl StringArrayBuilder {
.len()
.try_into()
.expect("byte array offset overflow");
unsafe { self.offsets_buffer.push_unchecked(next_offset) };
self.offsets_buffer.push(next_offset);
}

/// Finalise the builder into a concrete [`StringArray`].
Expand Down Expand Up @@ -289,8 +293,12 @@ pub struct LargeStringArrayBuilder {

impl LargeStringArrayBuilder {
pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
let mut offsets_buffer =
MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i64>());
let capacity = item_capacity
.checked_add(1)
.map(|i| i.saturating_mul(size_of::<i64>()))
.expect("capacity integer overflow");

let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
// SAFETY: the first offset value is definitely not going to exceed the bounds.
unsafe { offsets_buffer.push_unchecked(0_i64) };
Self {
Expand Down Expand Up @@ -347,7 +355,7 @@ impl LargeStringArrayBuilder {
.len()
.try_into()
.expect("byte array offset overflow");
unsafe { self.offsets_buffer.push_unchecked(next_offset) };
self.offsets_buffer.push(next_offset);
}

/// Finalise the builder into a concrete [`LargeStringArray`].
Expand Down Expand Up @@ -452,3 +460,20 @@ impl ColumnarValueRef<'_> {
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
#[should_panic(expected = "capacity integer overflow")]
fn test_overflow_string_array_builder() {
let _builder = StringArrayBuilder::with_capacity(usize::MAX, usize::MAX);
}

#[test]
#[should_panic(expected = "capacity integer overflow")]
fn test_overflow_large_string_array_builder() {
let _builder = LargeStringArrayBuilder::with_capacity(usize::MAX, usize::MAX);
}
}

0 comments on commit 82a40f3

Please sign in to comment.