Skip to content

Commit

Permalink
Simplify Validation/Alignment APIs of ArrayDataBuilder: validate an…
Browse files Browse the repository at this point in the history
…d align (apache#6966)

* Simplify ArrayDataBuilder API for validation and alignment

* Update arrow-data/src/data.rs

Co-authored-by: Raphael Taylor-Davies <[email protected]>

* Rename functions for consistency

* Improve documentation

---------

Co-authored-by: Raphael Taylor-Davies <[email protected]>
  • Loading branch information
2 people authored and totoroyyb committed Jan 20, 2025
1 parent c04ef9d commit ad1d7be
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 48 deletions.
141 changes: 96 additions & 45 deletions arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,12 @@ impl ArrayData {
offset,
buffers,
child_data,
align_buffers: false,
// SAFETY: caller responsible for ensuring data is valid
skip_validation: true,
}
.build_unchecked()
.build()
.unwrap()
}

/// Create a new ArrayData, validating that the provided buffers form a valid
Expand Down Expand Up @@ -1775,7 +1779,7 @@ impl PartialEq for ArrayData {
}
}

/// Builder for `ArrayData` type
/// Builder for [`ArrayData`] type
#[derive(Debug)]
pub struct ArrayDataBuilder {
data_type: DataType,
Expand All @@ -1786,6 +1790,20 @@ pub struct ArrayDataBuilder {
offset: usize,
buffers: Vec<Buffer>,
child_data: Vec<ArrayData>,
/// Should buffers be realigned (copying if necessary)?
///
/// Defaults to false.
align_buffers: bool,
/// Should data validation be skipped for this [`ArrayData`]?
///
/// Defaults to false.
///
/// # Safety
///
/// This flag can only be set to true using `unsafe` APIs. However, once true
/// subsequent calls to `build()` may result in undefined behavior if the data
/// is not valid.
skip_validation: bool,
}

impl ArrayDataBuilder {
Expand All @@ -1801,6 +1819,8 @@ impl ArrayDataBuilder {
offset: 0,
buffers: vec![],
child_data: vec![],
align_buffers: false,
skip_validation: false,
}
}

Expand Down Expand Up @@ -1877,66 +1897,79 @@ impl ArrayDataBuilder {

/// Creates an array data, without any validation
///
/// Note: This is shorthand for `self.with_skip_validation(true).build()`
///
/// # Safety
///
/// The same caveats as [`ArrayData::new_unchecked`]
/// apply.
#[allow(clippy::let_and_return)]
pub unsafe fn build_unchecked(self) -> ArrayData {
let data = self.build_impl();
// Provide a force_validate mode
#[cfg(feature = "force_validate")]
data.validate_data().unwrap();
data
self.skip_validation(true).build().unwrap()
}

/// Creates an array data, without any validation,
/// but aligning buffers.
/// Creates an `ArrayData`, consuming `self`
///
/// # Safety
///
/// The same caveats as [`ArrayData::new_unchecked`] apply.
pub unsafe fn build_aligned_unchecked(self) -> ArrayData {
let mut data = self.build_impl();
data.align_buffers();
// Provide a force_validate mode
#[cfg(feature = "force_validate")]
data.validate_data().unwrap();
data
}
/// By default the underlying buffers are checked to ensure they are valid
/// Arrow data. However, if the [`Self::skip_validation`] flag has been set
/// to true (by the `unsafe` API) this validation is skipped. If the data is
/// not valid, undefined behavior will result.
pub fn build(self) -> Result<ArrayData, ArrowError> {
let Self {
data_type,
len,
null_count,
null_bit_buffer,
nulls,
offset,
buffers,
child_data,
align_buffers,
skip_validation,
} = self;

/// Same as [`Self::build_unchecked`] but ignoring `force_validate` feature flag
unsafe fn build_impl(self) -> ArrayData {
let nulls = self
.nulls
let nulls = nulls
.or_else(|| {
let buffer = self.null_bit_buffer?;
let buffer = BooleanBuffer::new(buffer, self.offset, self.len);
Some(match self.null_count {
Some(n) => NullBuffer::new_unchecked(buffer, n),
let buffer = null_bit_buffer?;
let buffer = BooleanBuffer::new(buffer, offset, len);
Some(match null_count {
Some(n) => {
// SAFETY: call to `data.validate_data()` below validates the null buffer is valid
unsafe { NullBuffer::new_unchecked(buffer, n) }
}
None => NullBuffer::new(buffer),
})
})
.filter(|b| b.null_count() != 0);

ArrayData {
data_type: self.data_type,
len: self.len,
offset: self.offset,
buffers: self.buffers,
child_data: self.child_data,
let mut data = ArrayData {
data_type,
len,
offset,
buffers,
child_data,
nulls,
};

if align_buffers {
data.align_buffers();
}
}

/// Creates an array data, validating all inputs
pub fn build(self) -> Result<ArrayData, ArrowError> {
let data = unsafe { self.build_impl() };
data.validate_data()?;
// SAFETY: `skip_validation` is only set to true using `unsafe` APIs
if !skip_validation || cfg!(feature = "force_validate") {
data.validate_data()?;
}
Ok(data)
}

/// Creates an array data, validating all inputs, and aligning any buffers
#[deprecated(since = "54.1.0", note = "Use ArrayData::align_buffers instead")]
pub fn build_aligned(self) -> Result<ArrayData, ArrowError> {
self.align_buffers(true).build()
}

/// Ensure that all buffers are aligned, copying data if necessary
///
/// Rust requires that arrays are aligned to their corresponding primitive,
/// see [`Layout::array`](std::alloc::Layout::array) and [`std::mem::align_of`].
Expand All @@ -1945,17 +1978,33 @@ impl ArrayDataBuilder {
/// to allow for [slice](std::slice) based APIs. See [`BufferSpec::FixedWidth`].
///
/// As this alignment is architecture specific, and not guaranteed by all arrow implementations,
/// this method is provided to automatically copy buffers to a new correctly aligned allocation
/// this flag is provided to automatically copy buffers to a new correctly aligned allocation
/// when necessary, making it useful when interacting with buffers produced by other systems,
/// e.g. IPC or FFI.
///
/// This is unlike `[Self::build`] which will instead return an error on encountering
/// If this flag is not enabled, `[Self::build`] return an error on encountering
/// insufficiently aligned buffers.
pub fn build_aligned(self) -> Result<ArrayData, ArrowError> {
let mut data = unsafe { self.build_impl() };
data.align_buffers();
data.validate_data()?;
Ok(data)
pub fn align_buffers(mut self, align_buffers: bool) -> Self {
self.align_buffers = align_buffers;
self
}

/// Skips validation of the data.
///
/// If this flag is enabled, `[Self::build`] will skip validation of the
/// data
///
/// If this flag is not enabled, `[Self::build`] will validate that all
/// buffers are valid and will return an error if any data is invalid.
/// Validation can be expensive.
///
/// # Safety
///
/// If validation is skipped, the buffers must form a valid Arrow array,
/// otherwise undefined behavior will result
pub unsafe fn skip_validation(mut self, skip_validation: bool) -> Self {
self.skip_validation = skip_validation;
self
}
}

Expand All @@ -1970,6 +2019,8 @@ impl From<ArrayData> for ArrayDataBuilder {
nulls: d.nulls,
null_bit_buffer: None,
null_count: None,
align_buffers: false,
skip_validation: false,
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions arrow-ipc/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ unsafe fn create_array_internal(
let values = create_array_helper(reader, values_field, variadic_counts)?;

let run_array_length = run_node.length() as usize;
let builder = ArrayData::builder(data_type.clone())
let array_data = ArrayData::builder(data_type.clone())
.len(run_array_length)
.offset(0)
.add_child_data(run_ends.into_data())
Expand Down Expand Up @@ -321,7 +321,7 @@ unsafe fn create_array_internal(
)));
}

let builder = ArrayData::builder(data_type.clone())
let array_data = ArrayData::builder(data_type.clone())
.len(length as usize)
.offset(0);

Expand Down Expand Up @@ -539,7 +539,7 @@ unsafe fn create_dictionary_array_internal(
) -> Result<ArrayRef, ArrowError> {
if let Dictionary(_, _) = *data_type {
let null_buffer = (field_node.null_count() > 0).then_some(buffers[0].clone());
let builder = ArrayData::builder(data_type.clone())
let array_data = ArrayData::builder(data_type.clone())
.len(field_node.length() as usize)
.add_buffer(buffers[1].clone())
.add_child_data(value_array.into_data())
Expand Down

0 comments on commit ad1d7be

Please sign in to comment.