From e1f89c0b11eeb7aca7eb6a78643b3571c133dc5e Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 26 Dec 2024 11:31:58 +0000 Subject: [PATCH 1/4] chore(arrow-ord): move `can_rank` to the `rank` file --- arrow-ord/src/rank.rs | 9 +++++++++ arrow-ord/src/sort.rs | 11 +---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arrow-ord/src/rank.rs b/arrow-ord/src/rank.rs index ecc693bab4e4..e61cebef38ec 100644 --- a/arrow-ord/src/rank.rs +++ b/arrow-ord/src/rank.rs @@ -24,6 +24,15 @@ use arrow_buffer::NullBuffer; use arrow_schema::{ArrowError, DataType, SortOptions}; use std::cmp::Ordering; +/// Whether `arrow_ord::rank` can rank an array of given data type. +pub(crate) fn can_rank(data_type: &DataType) -> bool { + data_type.is_primitive() + || matches!( + data_type, + DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary + ) +} + /// Assigns a rank to each value in `array` based on its position in the sorted order /// /// Where values are equal, they will be assigned the highest of their ranks, diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs index 60fc4a918525..51a6659e631b 100644 --- a/arrow-ord/src/sort.rs +++ b/arrow-ord/src/sort.rs @@ -30,7 +30,7 @@ use arrow_select::take::take; use std::cmp::Ordering; use std::sync::Arc; -use crate::rank::rank; +use crate::rank::{can_rank, rank}; pub use arrow_schema::SortOptions; /// Sort the `ArrayRef` using `SortOptions`. @@ -190,15 +190,6 @@ fn partition_validity(array: &dyn Array) -> (Vec, Vec) { } } -/// Whether `arrow_ord::rank` can rank an array of given data type. -fn can_rank(data_type: &DataType) -> bool { - data_type.is_primitive() - || matches!( - data_type, - DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary - ) -} - /// Whether `sort_to_indices` can sort an array of given data type. fn can_sort_to_indices(data_type: &DataType) -> bool { data_type.is_primitive() From bdc149d2f2a86319331c82372c8ca06e99b7a34f Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 26 Dec 2024 12:07:54 +0000 Subject: [PATCH 2/4] feat(arrow-ord): support boolean in rank --- arrow-ord/src/rank.rs | 68 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/arrow-ord/src/rank.rs b/arrow-ord/src/rank.rs index e61cebef38ec..b4d2f3a9b7c0 100644 --- a/arrow-ord/src/rank.rs +++ b/arrow-ord/src/rank.rs @@ -19,7 +19,9 @@ use arrow_array::cast::AsArray; use arrow_array::types::*; -use arrow_array::{downcast_primitive_array, Array, ArrowNativeTypeOp, GenericByteArray}; +use arrow_array::{ + downcast_primitive_array, Array, ArrowNativeTypeOp, BooleanArray, GenericByteArray, +}; use arrow_buffer::NullBuffer; use arrow_schema::{ArrowError, DataType, SortOptions}; use std::cmp::Ordering; @@ -29,7 +31,11 @@ pub(crate) fn can_rank(data_type: &DataType) -> bool { data_type.is_primitive() || matches!( data_type, - DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary + DataType::Boolean + | DataType::Utf8 + | DataType::LargeUtf8 + | DataType::Binary + | DataType::LargeBinary ) } @@ -49,6 +55,7 @@ pub fn rank(array: &dyn Array, options: Option) -> Result, let options = options.unwrap_or_default(); let ranks = downcast_primitive_array! { array => primitive_rank(array.values(), array.nulls(), options), + DataType::Boolean => boolean_rank(array.as_boolean(), options), DataType::Utf8 => bytes_rank(array.as_bytes::(), options), DataType::LargeUtf8 => bytes_rank(array.as_bytes::(), options), DataType::Binary => bytes_rank(array.as_bytes::(), options), @@ -75,6 +82,26 @@ fn primitive_rank( rank_impl(values.len(), to_sort, options, T::compare, T::is_eq) } +#[inline(never)] +fn boolean_rank(array: &BooleanArray, options: SortOptions) -> Vec { + let len: u32 = array.len().try_into().unwrap(); + + let to_sort: Vec<(bool, u32)> = match array.nulls().filter(|n| n.null_count() > 0) { + Some(n) => n + .valid_indices() + .map(|idx| (array.value(idx), idx as u32)) + .collect(), + None => array.values().iter().zip(0..len).collect(), + }; + rank_impl( + array.len(), + to_sort, + options, + |a: bool, b: bool| a.cmp(&b), + |a: bool, b: bool| a == b, + ) +} + #[inline(never)] fn bytes_rank(array: &GenericByteArray, options: SortOptions) -> Vec { let to_sort: Vec<(&[u8], u32)> = match array.nulls().filter(|n| n.null_count() > 0) { @@ -177,6 +204,43 @@ mod tests { assert_eq!(res, &[4, 6, 3, 6, 3, 3]); } + #[test] + fn test_booleans() { + let descending = SortOptions { + descending: true, + nulls_first: true, + }; + + let nulls_last = SortOptions { + descending: false, + nulls_first: false, + }; + + let nulls_last_descending = SortOptions { + descending: true, + nulls_first: false, + }; + + let a = BooleanArray::from(vec![Some(true), Some(true), None, Some(false), Some(false)]); + let res = rank(&a, None).unwrap(); + assert_eq!(res, &[5, 5, 1, 3, 3]); + + let res = rank(&a, Some(descending)).unwrap(); + assert_eq!(res, &[3, 3, 1, 5, 5]); + + let res = rank(&a, Some(nulls_last)).unwrap(); + assert_eq!(res, &[4, 4, 5, 2, 2]); + + let res = rank(&a, Some(nulls_last_descending)).unwrap(); + assert_eq!(res, &[2, 2, 5, 4, 4]); + + // Test with non-zero null values + let nulls = NullBuffer::from(vec![true, true, false, true, true]); + let a = BooleanArray::new(vec![true, true, true, false, false].into(), Some(nulls)); + let res = rank(&a, None).unwrap(); + assert_eq!(res, &[5, 5, 1, 3, 3]); + } + #[test] fn test_bytes() { let v = vec!["foo", "fo", "bar", "bar"]; From 758b3d976097f2976e66707ffba362654c4d68be Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 26 Dec 2024 14:13:19 +0000 Subject: [PATCH 3/4] add tests for sorting list of booleans --- arrow-ord/src/sort.rs | 387 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 386 insertions(+), 1 deletion(-) diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs index 51a6659e631b..88c0bfd07dc5 100644 --- a/arrow-ord/src/sort.rs +++ b/arrow-ord/src/sort.rs @@ -785,12 +785,14 @@ impl LexicographicalComparator { mod tests { use super::*; use arrow_array::builder::{ - FixedSizeListBuilder, Int64Builder, ListBuilder, PrimitiveRunBuilder, + BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder, + PrimitiveRunBuilder, }; use arrow_buffer::{i256, NullBuffer}; use arrow_schema::Field; use half::f16; use rand::rngs::StdRng; + use rand::seq::SliceRandom; use rand::{Rng, RngCore, SeedableRng}; fn create_decimal128_array(data: Vec>) -> Decimal128Array { @@ -867,9 +869,14 @@ mod tests { limit: Option, expected_data: Vec, ) { + println!("input: {:?}", data); let output = BooleanArray::from(data); let expected = UInt32Array::from(expected_data); + let other = sort(&(Arc::new(output.clone()) as ArrayRef), options).unwrap(); let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap(); + println!("options: {:?}", options); + println!("output: {:?}", other); + assert_eq!(output, expected) } @@ -1541,6 +1548,384 @@ mod tests { ); } + /// Test sort boolean on each permutation of with/without limit and GenericListArray/FixedSizeListArray + /// + /// The input data must have the same length for all list items so that we can test FixedSizeListArray + /// + fn test_every_config_sort_boolean_list_arrays( + data: Vec>>>, + options: Option, + expected_data: Vec>>>, + ) { + let first_length = data + .iter() + .find_map(|x| x.as_ref().map(|x| x.len())) + .unwrap_or(0); + let first_non_match_length = data + .iter() + .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length)) + .position(|x| x != first_length); + + assert_eq!( + first_non_match_length, None, + "All list items should have the same length {first_length}, input data is invalid" + ); + + let first_non_match_length = expected_data + .iter() + .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length)) + .position(|x| x != first_length); + + assert_eq!( + first_non_match_length, None, + "All list items should have the same length {first_length}, expected data is invalid" + ); + + let limit = expected_data.len().saturating_div(2); + + for &with_limit in &[false, true] { + let (limit, expected_data) = if with_limit { + ( + Some(limit), + expected_data.iter().take(limit).cloned().collect(), + ) + } else { + (None, expected_data.clone()) + }; + + for &fixed_length in &[None, Some(first_length as i32)] { + test_sort_boolean_list_arrays( + data.clone(), + options, + limit, + expected_data.clone(), + fixed_length, + ); + } + } + } + + fn test_sort_boolean_list_arrays( + data: Vec>>>, + options: Option, + limit: Option, + expected_data: Vec>>>, + fixed_length: Option, + ) { + fn build_fixed_boolean_list_array( + data: Vec>>>, + fixed_length: i32, + ) -> ArrayRef { + let mut builder = FixedSizeListBuilder::new( + BooleanBuilder::with_capacity(fixed_length as usize), + fixed_length, + ); + for sublist in data { + match sublist { + Some(sublist) => { + builder.values().extend(sublist); + builder.append(true); + } + None => { + builder + .values() + .extend(std::iter::repeat(None).take(fixed_length as usize)); + builder.append(false); + } + } + } + Arc::new(builder.finish()) as ArrayRef + } + + fn build_generic_boolean_list_array( + data: Vec>>>, + ) -> ArrayRef { + let mut builder = GenericListBuilder::::new(BooleanBuilder::new()); + builder.extend(data); + Arc::new(builder.finish()) as ArrayRef + } + + // for FixedSizedList + if let Some(length) = fixed_length { + let input = build_fixed_boolean_list_array(data.clone(), length); + let sorted = match limit { + Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(), + _ => sort(&(input as ArrayRef), options).unwrap(), + }; + let expected = build_fixed_boolean_list_array(expected_data.clone(), length); + + assert_eq!(&sorted, &expected); + } + + // for List + let input = build_generic_boolean_list_array::(data.clone()); + let sorted = match limit { + Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(), + _ => sort(&(input as ArrayRef), options).unwrap(), + }; + let expected = build_generic_boolean_list_array::(expected_data.clone()); + + assert_eq!(&sorted, &expected); + + // for LargeList + let input = build_generic_boolean_list_array::(data.clone()); + let sorted = match limit { + Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(), + _ => sort(&(input as ArrayRef), options).unwrap(), + }; + let expected = build_generic_boolean_list_array::(expected_data.clone()); + + assert_eq!(&sorted, &expected); + } + + #[test] + fn test_sort_list_of_booleans() { + // These are all the possible combinations of boolean values + // There are 3^3 + 1 = 28 possible combinations (3 values to permutate - [true, false, null] and 1 None value) + #[rustfmt::skip] + let mut cases = vec![ + Some(vec![Some(true), Some(true), Some(true)]), + Some(vec![Some(true), Some(true), Some(false)]), + Some(vec![Some(true), Some(true), None]), + + Some(vec![Some(true), Some(false), Some(true)]), + Some(vec![Some(true), Some(false), Some(false)]), + Some(vec![Some(true), Some(false), None]), + + Some(vec![Some(true), None, Some(true)]), + Some(vec![Some(true), None, Some(false)]), + Some(vec![Some(true), None, None]), + + Some(vec![Some(false), Some(true), Some(true)]), + Some(vec![Some(false), Some(true), Some(false)]), + Some(vec![Some(false), Some(true), None]), + + Some(vec![Some(false), Some(false), Some(true)]), + Some(vec![Some(false), Some(false), Some(false)]), + Some(vec![Some(false), Some(false), None]), + + Some(vec![Some(false), None, Some(true)]), + Some(vec![Some(false), None, Some(false)]), + Some(vec![Some(false), None, None]), + + Some(vec![None, Some(true), Some(true)]), + Some(vec![None, Some(true), Some(false)]), + Some(vec![None, Some(true), None]), + + Some(vec![None, Some(false), Some(true)]), + Some(vec![None, Some(false), Some(false)]), + Some(vec![None, Some(false), None]), + + Some(vec![None, None, Some(true)]), + Some(vec![None, None, Some(false)]), + Some(vec![None, None, None]), + None, + ]; + + cases.shuffle(&mut StdRng::seed_from_u64(42)); + + // The order is false, true, null + #[rustfmt::skip] + let expected_descending_false_nulls_first_false = vec![ + Some(vec![Some(false), Some(false), Some(false)]), + Some(vec![Some(false), Some(false), Some(true)]), + Some(vec![Some(false), Some(false), None]), + + Some(vec![Some(false), Some(true), Some(false)]), + Some(vec![Some(false), Some(true), Some(true)]), + Some(vec![Some(false), Some(true), None]), + + Some(vec![Some(false), None, Some(false)]), + Some(vec![Some(false), None, Some(true)]), + Some(vec![Some(false), None, None]), + + Some(vec![Some(true), Some(false), Some(false)]), + Some(vec![Some(true), Some(false), Some(true)]), + Some(vec![Some(true), Some(false), None]), + + Some(vec![Some(true), Some(true), Some(false)]), + Some(vec![Some(true), Some(true), Some(true)]), + Some(vec![Some(true), Some(true), None]), + + Some(vec![Some(true), None, Some(false)]), + Some(vec![Some(true), None, Some(true)]), + Some(vec![Some(true), None, None]), + + Some(vec![None, Some(false), Some(false)]), + Some(vec![None, Some(false), Some(true)]), + Some(vec![None, Some(false), None]), + + Some(vec![None, Some(true), Some(false)]), + Some(vec![None, Some(true), Some(true)]), + Some(vec![None, Some(true), None]), + + Some(vec![None, None, Some(false)]), + Some(vec![None, None, Some(true)]), + Some(vec![None, None, None]), + None, + ]; + test_every_config_sort_boolean_list_arrays( + cases.clone(), + Some(SortOptions { + descending: false, + nulls_first: false, + }), + expected_descending_false_nulls_first_false, + ); + + // The order is null, false, true + #[rustfmt::skip] + let expected_descending_false_nulls_first_true = vec![ + None, + + Some(vec![None, None, None]), + Some(vec![None, None, Some(false)]), + Some(vec![None, None, Some(true)]), + + Some(vec![None, Some(false), None]), + Some(vec![None, Some(false), Some(false)]), + Some(vec![None, Some(false), Some(true)]), + + Some(vec![None, Some(true), None]), + Some(vec![None, Some(true), Some(false)]), + Some(vec![None, Some(true), Some(true)]), + + Some(vec![Some(false), None, None]), + Some(vec![Some(false), None, Some(false)]), + Some(vec![Some(false), None, Some(true)]), + + Some(vec![Some(false), Some(false), None]), + Some(vec![Some(false), Some(false), Some(false)]), + Some(vec![Some(false), Some(false), Some(true)]), + + Some(vec![Some(false), Some(true), None]), + Some(vec![Some(false), Some(true), Some(false)]), + Some(vec![Some(false), Some(true), Some(true)]), + + Some(vec![Some(true), None, None]), + Some(vec![Some(true), None, Some(false)]), + Some(vec![Some(true), None, Some(true)]), + + Some(vec![Some(true), Some(false), None]), + Some(vec![Some(true), Some(false), Some(false)]), + Some(vec![Some(true), Some(false), Some(true)]), + + Some(vec![Some(true), Some(true), None]), + Some(vec![Some(true), Some(true), Some(false)]), + Some(vec![Some(true), Some(true), Some(true)]), + ]; + + test_every_config_sort_boolean_list_arrays( + cases.clone(), + Some(SortOptions { + descending: false, + nulls_first: true, + }), + expected_descending_false_nulls_first_true, + ); + + // The order is true, false, null + #[rustfmt::skip] + let expected_descending_true_nulls_first_false = vec![ + Some(vec![Some(true), Some(true), Some(true)]), + Some(vec![Some(true), Some(true), Some(false)]), + Some(vec![Some(true), Some(true), None]), + + Some(vec![Some(true), Some(false), Some(true)]), + Some(vec![Some(true), Some(false), Some(false)]), + Some(vec![Some(true), Some(false), None]), + + Some(vec![Some(true), None, Some(true)]), + Some(vec![Some(true), None, Some(false)]), + Some(vec![Some(true), None, None]), + + Some(vec![Some(false), Some(true), Some(true)]), + Some(vec![Some(false), Some(true), Some(false)]), + Some(vec![Some(false), Some(true), None]), + + Some(vec![Some(false), Some(false), Some(true)]), + Some(vec![Some(false), Some(false), Some(false)]), + Some(vec![Some(false), Some(false), None]), + + Some(vec![Some(false), None, Some(true)]), + Some(vec![Some(false), None, Some(false)]), + Some(vec![Some(false), None, None]), + + Some(vec![None, Some(true), Some(true)]), + Some(vec![None, Some(true), Some(false)]), + Some(vec![None, Some(true), None]), + + Some(vec![None, Some(false), Some(true)]), + Some(vec![None, Some(false), Some(false)]), + Some(vec![None, Some(false), None]), + + Some(vec![None, None, Some(true)]), + Some(vec![None, None, Some(false)]), + Some(vec![None, None, None]), + + None, + ]; + test_every_config_sort_boolean_list_arrays( + cases.clone(), + Some(SortOptions { + descending: true, + nulls_first: false, + }), + expected_descending_true_nulls_first_false, + ); + + // The order is null, true, false + #[rustfmt::skip] + let expected_descending_true_nulls_first_true = vec![ + None, + + Some(vec![None, None, None]), + Some(vec![None, None, Some(true)]), + Some(vec![None, None, Some(false)]), + + Some(vec![None, Some(true), None]), + Some(vec![None, Some(true), Some(true)]), + Some(vec![None, Some(true), Some(false)]), + + Some(vec![None, Some(false), None]), + Some(vec![None, Some(false), Some(true)]), + Some(vec![None, Some(false), Some(false)]), + + Some(vec![Some(true), None, None]), + Some(vec![Some(true), None, Some(true)]), + Some(vec![Some(true), None, Some(false)]), + + Some(vec![Some(true), Some(true), None]), + Some(vec![Some(true), Some(true), Some(true)]), + Some(vec![Some(true), Some(true), Some(false)]), + + Some(vec![Some(true), Some(false), None]), + Some(vec![Some(true), Some(false), Some(true)]), + Some(vec![Some(true), Some(false), Some(false)]), + + Some(vec![Some(false), None, None]), + Some(vec![Some(false), None, Some(true)]), + Some(vec![Some(false), None, Some(false)]), + + Some(vec![Some(false), Some(true), None]), + Some(vec![Some(false), Some(true), Some(true)]), + Some(vec![Some(false), Some(true), Some(false)]), + + Some(vec![Some(false), Some(false), None]), + Some(vec![Some(false), Some(false), Some(true)]), + Some(vec![Some(false), Some(false), Some(false)]), + ]; + // Testing with limit false and fixed_length None + test_every_config_sort_boolean_list_arrays( + cases.clone(), + Some(SortOptions { + descending: true, + nulls_first: true, + }), + expected_descending_true_nulls_first_true, + ); + } + #[test] fn test_sort_indices_decimal128() { // decimal default From 64864b48a2b982bc197ed8eca436d0715028c4c1 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 26 Dec 2024 20:34:46 +0000 Subject: [PATCH 4/4] improve boolean rank performance --- arrow-ord/src/rank.rs | 80 +++++++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 25 deletions(-) diff --git a/arrow-ord/src/rank.rs b/arrow-ord/src/rank.rs index b4d2f3a9b7c0..c617b2a05c26 100644 --- a/arrow-ord/src/rank.rs +++ b/arrow-ord/src/rank.rs @@ -82,26 +82,6 @@ fn primitive_rank( rank_impl(values.len(), to_sort, options, T::compare, T::is_eq) } -#[inline(never)] -fn boolean_rank(array: &BooleanArray, options: SortOptions) -> Vec { - let len: u32 = array.len().try_into().unwrap(); - - let to_sort: Vec<(bool, u32)> = match array.nulls().filter(|n| n.null_count() > 0) { - Some(n) => n - .valid_indices() - .map(|idx| (array.value(idx), idx as u32)) - .collect(), - None => array.values().iter().zip(0..len).collect(), - }; - rank_impl( - array.len(), - to_sort, - options, - |a: bool, b: bool| a.cmp(&b), - |a: bool, b: bool| a == b, - ) -} - #[inline(never)] fn bytes_rank(array: &GenericByteArray, options: SortOptions) -> Vec { let to_sort: Vec<(&[u8], u32)> = match array.nulls().filter(|n| n.null_count() > 0) { @@ -162,6 +142,48 @@ where out } +/// Return the index for the rank when ranking boolean array +/// +/// The index is calculated as follows: +/// if is_null is true, the index is 2 +/// if is_null is false and the value is true, the index is 1 +/// otherwise, the index is 0 +/// +/// false is 0 and true is 1 because these are the value when cast to number +#[inline] +fn get_boolean_rank_index(value: bool, is_null: bool) -> usize { + let is_null_num = is_null as usize; + (is_null_num << 1) | (value as usize & !is_null_num) +} + +#[inline(never)] +fn boolean_rank(array: &BooleanArray, options: SortOptions) -> Vec { + let ranks_index: [u32; 3] = match (options.descending, options.nulls_first) { + // The order is null, true, false + (true, true) => [2, 1, 0], + // The order is true, false, null + (true, false) => [1, 0, 2], + // The order is null, false, true + (false, true) => [1, 2, 0], + // The order is false, true, null + (false, false) => [0, 1, 2], + }; + + match array.nulls().filter(|n| n.null_count() > 0) { + Some(n) => array + .values() + .iter() + .zip(n.iter()) + .map(|(value, is_valid)| ranks_index[get_boolean_rank_index(value, !is_valid)]) + .collect::>(), + None => array + .values() + .iter() + .map(|value| ranks_index[value as usize]) + .collect::>(), + } +} + #[cfg(test)] mod tests { use super::*; @@ -204,6 +226,14 @@ mod tests { assert_eq!(res, &[4, 6, 3, 6, 3, 3]); } + #[test] + fn test_get_boolean_rank_index() { + assert_eq!(get_boolean_rank_index(true, true), 2); + assert_eq!(get_boolean_rank_index(false, true), 2); + assert_eq!(get_boolean_rank_index(true, false), 1); + assert_eq!(get_boolean_rank_index(false, false), 0); + } + #[test] fn test_booleans() { let descending = SortOptions { @@ -223,22 +253,22 @@ mod tests { let a = BooleanArray::from(vec![Some(true), Some(true), None, Some(false), Some(false)]); let res = rank(&a, None).unwrap(); - assert_eq!(res, &[5, 5, 1, 3, 3]); + assert_eq!(res, &[2, 2, 0, 1, 1]); let res = rank(&a, Some(descending)).unwrap(); - assert_eq!(res, &[3, 3, 1, 5, 5]); + assert_eq!(res, &[1, 1, 0, 2, 2]); let res = rank(&a, Some(nulls_last)).unwrap(); - assert_eq!(res, &[4, 4, 5, 2, 2]); + assert_eq!(res, &[1, 1, 2, 0, 0]); let res = rank(&a, Some(nulls_last_descending)).unwrap(); - assert_eq!(res, &[2, 2, 5, 4, 4]); + assert_eq!(res, &[0, 0, 2, 1, 1]); // Test with non-zero null values let nulls = NullBuffer::from(vec![true, true, false, true, true]); let a = BooleanArray::new(vec![true, true, true, false, false].into(), Some(nulls)); let res = rank(&a, None).unwrap(); - assert_eq!(res, &[5, 5, 1, 3, 3]); + assert_eq!(res, &[2, 2, 0, 1, 1]); } #[test]