From 88b296ca97e18c0514313f1022b91edbeb705c2c Mon Sep 17 00:00:00 2001 From: lambda <1wei@live.com> Date: Wed, 6 Nov 2024 07:45:33 +0800 Subject: [PATCH] [ffi] Fix arrow-array null_count error during conversion from C to Rust (#6674) * Fix arrow-array null_count error during conversion from C to Rust * add ffi_array.null_count_opt * add Safety to follow clippy * add Safety to follow clippy --- arrow-array/src/ffi.rs | 38 +++++++++++++++++++++++++++++++++++--- arrow-data/src/ffi.rs | 15 +++++++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 29414eae6fea..4426e0986409 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -302,8 +302,8 @@ impl ImportedArrowArray<'_> { let len = self.array.len(); let offset = self.array.offset(); let null_count = match &self.data_type { - DataType::Null => 0, - _ => self.array.null_count(), + DataType::Null => Some(0), + _ => self.array.null_count_opt(), }; let data_layout = layout(&self.data_type); @@ -329,7 +329,7 @@ impl ImportedArrowArray<'_> { ArrayData::new_unchecked( self.data_type, len, - Some(null_count), + null_count, null_bit_buffer, offset, buffers, @@ -635,6 +635,38 @@ mod tests_to_then_from_ffi { } // case with nulls is tested in the docs, through the example on this module. + #[test] + fn test_null_count_handling() { + let int32_data = ArrayData::builder(DataType::Int32) + .len(10) + .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) + .null_bit_buffer(Some(Buffer::from([0b01011111, 0b00000001]))) + .build() + .unwrap(); + let mut ffi_array = FFI_ArrowArray::new(&int32_data); + assert_eq!(3, ffi_array.null_count()); + assert_eq!(Some(3), ffi_array.null_count_opt()); + // Simulating uninitialized state + unsafe { + ffi_array.set_null_count(-1); + } + assert_eq!(None, ffi_array.null_count_opt()); + let int32_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Int32) }.unwrap(); + assert_eq!(3, int32_data.null_count()); + + let null_data = &ArrayData::new_null(&DataType::Null, 10); + let mut ffi_array = FFI_ArrowArray::new(null_data); + assert_eq!(10, ffi_array.null_count()); + assert_eq!(Some(10), ffi_array.null_count_opt()); + // Simulating uninitialized state + unsafe { + ffi_array.set_null_count(-1); + } + assert_eq!(None, ffi_array.null_count_opt()); + let null_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Null) }.unwrap(); + assert_eq!(0, null_data.null_count()); + } + fn test_generic_string() -> Result<()> { // create an array natively let array = GenericStringArray::::from(vec![Some("a"), None, Some("aaa")]); diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index cd283d32662f..04599bc47a1c 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -271,6 +271,21 @@ impl FFI_ArrowArray { self.null_count as usize } + /// Returns the null count, checking for validity + #[inline] + pub fn null_count_opt(&self) -> Option { + usize::try_from(self.null_count).ok() + } + + /// Set the null count of the array + /// + /// # Safety + /// Null count must match that of null buffer + #[inline] + pub unsafe fn set_null_count(&mut self, null_count: i64) { + self.null_count = null_count; + } + /// Returns the buffer at the provided index /// /// # Panic