From e7588bda4b4048a5fb21cdb41efba93e18eca16d Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Tue, 19 Nov 2024 09:41:24 +0100 Subject: [PATCH] Fix Dictionary logical nulls (#6740) For `DictionaryArray`, methods `logical_nulls` and `logical_null_count` would return incorrect result if the underlying values had different physical and logical nullability. --- arrow-array/src/array/dictionary_array.rs | 52 ++++++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index 1187e16769a0..920be65fcf5f 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -729,7 +729,7 @@ impl Array for DictionaryArray { } fn logical_nulls(&self) -> Option { - match self.values.nulls() { + match self.values.logical_nulls() { None => self.nulls().cloned(), Some(value_nulls) => { let mut builder = BooleanBufferBuilder::new(self.len()); @@ -1020,7 +1020,7 @@ impl AnyDictionaryArray for DictionaryArray { mod tests { use super::*; use crate::cast::as_dictionary_array; - use crate::{Int16Array, Int32Array, Int8Array}; + use crate::{Int16Array, Int32Array, Int8Array, RunArray}; use arrow_buffer::{Buffer, ToByteSlice}; #[test] @@ -1445,6 +1445,54 @@ mod tests { assert_eq!(values, &[Some(50), None, None, Some(2)]) } + #[test] + fn test_logical_nulls() -> Result<(), ArrowError> { + let values = Arc::new(RunArray::try_new( + &Int32Array::from(vec![1, 3, 7]), + &Int32Array::from(vec![Some(1), None, Some(3)]), + )?) as ArrayRef; + + // For this test to be meaningful, the values array need to have different nulls and logical nulls + assert_eq!(values.null_count(), 0); + assert_eq!(values.logical_null_count(), 2); + + // Construct a trivial dictionary with 1-1 mapping to underlying array + let dictionary = DictionaryArray::::try_new( + Int8Array::from((0..values.len()).map(|i| i as i8).collect::>()), + Arc::clone(&values), + )?; + + // No keys are null + assert_eq!(dictionary.null_count(), 0); + // Dictionary array values are logically nullable + assert_eq!(dictionary.logical_null_count(), values.logical_null_count()); + assert_eq!(dictionary.logical_nulls(), values.logical_nulls()); + assert!(dictionary.is_nullable()); + + // Construct a trivial dictionary with 1-1 mapping to underlying array except that key 0 is nulled out + let dictionary = DictionaryArray::::try_new( + Int8Array::from( + (0..values.len()) + .map(|i| i as i8) + .map(|i| if i == 0 { None } else { Some(i) }) + .collect::>(), + ), + Arc::clone(&values), + )?; + + // One key is null + assert_eq!(dictionary.null_count(), 1); + + // Dictionary array values are logically nullable + assert_eq!( + dictionary.logical_null_count(), + values.logical_null_count() + 1 + ); + assert!(dictionary.is_nullable()); + + Ok(()) + } + #[test] fn test_normalized_keys() { let values = vec![132, 0, 1].into();