From d9783dc9a6d4faf26c7066e540c7bbda6b352d60 Mon Sep 17 00:00:00 2001 From: my-vegetable-has-exploded Date: Sun, 17 Dec 2023 16:21:28 +0800 Subject: [PATCH 1/6] Support equality of StructArray --- arrow-ord/src/cmp.rs | 285 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 238 insertions(+), 47 deletions(-) diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs index bfb1f64e2eb8..b235548c17d9 100644 --- a/arrow-ord/src/cmp.rs +++ b/arrow-ord/src/cmp.rs @@ -27,7 +27,7 @@ use arrow_array::cast::AsArray; use arrow_array::types::ByteArrayType; use arrow_array::{ downcast_primitive_array, AnyDictionaryArray, Array, ArrowNativeTypeOp, BooleanArray, Datum, - FixedSizeBinaryArray, GenericByteArray, + FixedSizeBinaryArray, GenericByteArray, StructArray, }; use arrow_buffer::bit_util::ceil; use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer}; @@ -169,12 +169,14 @@ pub fn not_distinct(lhs: &dyn Datum, rhs: &dyn Datum) -> Result Result { - use arrow_schema::DataType::*; - let (l, l_s) = lhs.get(); - let (r, r_s) = rhs.get(); + let (l_array, l_s) = lhs.get(); + let (r_array, r_s) = rhs.get(); + + let l_nulls = l_array.logical_nulls(); + let r_nulls = r_array.logical_nulls(); - let l_len = l.len(); - let r_len = r.len(); + let l_len = l_array.len(); + let r_len = r_array.len(); if l_len != r_len && !l_s && !r_s { return Err(ArrowError::InvalidArgumentError(format!( @@ -187,39 +189,6 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result l_len, }; - let l_nulls = l.logical_nulls(); - let r_nulls = r.logical_nulls(); - - let l_v = l.as_any_dictionary_opt(); - let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l); - let l_t = l.data_type(); - - let r_v = r.as_any_dictionary_opt(); - let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r); - let r_t = r.data_type(); - - if l_t != r_t || l_t.is_nested() { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid comparison operation: {l_t} {op} {r_t}" - ))); - } - - // Defer computation as may not be necessary - let values = || -> BooleanBuffer { - let d = downcast_primitive_array! { - (l, r) => apply(op, l.values().as_ref(), l_s, l_v, r.values().as_ref(), r_s, r_v), - (Boolean, Boolean) => apply(op, l.as_boolean(), l_s, l_v, r.as_boolean(), r_s, r_v), - (Utf8, Utf8) => apply(op, l.as_string::(), l_s, l_v, r.as_string::(), r_s, r_v), - (LargeUtf8, LargeUtf8) => apply(op, l.as_string::(), l_s, l_v, r.as_string::(), r_s, r_v), - (Binary, Binary) => apply(op, l.as_binary::(), l_s, l_v, r.as_binary::(), r_s, r_v), - (LargeBinary, LargeBinary) => apply(op, l.as_binary::(), l_s, l_v, r.as_binary::(), r_s, r_v), - (FixedSizeBinary(_), FixedSizeBinary(_)) => apply(op, l.as_fixed_size_binary(), l_s, l_v, r.as_fixed_size_binary(), r_s, r_v), - (Null, Null) => None, - _ => unreachable!(), - }; - d.unwrap_or_else(|| BooleanBuffer::new_unset(len)) - }; - let l_nulls = l_nulls.filter(|n| n.null_count() > 0); let r_nulls = r_nulls.filter(|n| n.null_count() > 0); Ok(match (l_nulls, l_s, r_nulls, r_s) { @@ -227,7 +196,7 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result { - let values = values(); + let values = compare_op_values(op, l_array, l_s, r_array, r_s, len)?; let l = l.inner().bit_chunks().iter_padded(); let r = r.inner().bit_chunks().iter_padded(); let ne = values.bit_chunks().iter_padded(); @@ -237,7 +206,7 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result { - let values = values(); + let values = compare_op_values(op, l_array, l_s, r_array, r_s, len)?; let l = l.inner().bit_chunks().iter_padded(); let r = r.inner().bit_chunks().iter_padded(); let e = values.bit_chunks().iter_padded(); @@ -246,7 +215,10 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result BooleanArray::new(values(), NullBuffer::union(Some(&l), Some(&r))), + _ => BooleanArray::new( + compare_op_values(op, l_array, l_s, r_array, r_s, len)?, + NullBuffer::union(Some(&l), Some(&r)), + ), } } (Some(_), true, Some(a), false) | (Some(a), false, Some(_), true) => { @@ -268,23 +240,122 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result match op { Op::Distinct => { - let values = values(); + let values = compare_op_values(op, l_array, l_s, r_array, r_s, len)?; let l = nulls.inner().bit_chunks().iter_padded(); let ne = values.bit_chunks().iter_padded(); let c = |(l, n)| u64::not(l) | n; let buffer = l.zip(ne).map(c).collect(); BooleanBuffer::new(buffer, 0, len).into() } - Op::NotDistinct => (nulls.inner() & &values()).into(), - _ => BooleanArray::new(values(), Some(nulls)), + Op::NotDistinct => (nulls.inner() + & &compare_op_values(op, l_array, l_s, r_array, r_s, len)?) + .into(), + _ => BooleanArray::new( + compare_op_values(op, l_array, l_s, r_array, r_s, len)?, + Some(nulls), + ), }, } } // Neither side is nullable - (None, _, None, _) => BooleanArray::new(values(), None), + (None, _, None, _) => BooleanArray::new( + compare_op_values(op, l_array, l_s, r_array, r_s, len)?, + None, + ), }) } +/// Defer computation as may not be necessary +/// get the BooleanBuffer result of the comparison +fn compare_op_values( + op: Op, + l: &dyn Array, + l_s: bool, + r: &dyn Array, + r_s: bool, + len: usize, +) -> Result { + use arrow_schema::DataType::*; + let l_v = l.as_any_dictionary_opt(); + let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l); + let l_t = l.data_type(); + + let r_v = r.as_any_dictionary_opt(); + let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r); + let r_t = r.data_type(); + + if l_t.is_nested() { + if !l_t.equals_datatype(r_t) { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid comparison operation: {l_t} {op} {r_t}" + ))); + } + match (l_t, op) { + (Struct(_), Op::Equal | Op::NotEqual) => {} + _ => { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid comparison operation: {l_t} {op} {r_t}" + ))); + } + } + } else if r_t != l_t { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid comparison operation: {l_t} {op} {r_t}" + ))); + } + let d = downcast_primitive_array! { + (l, r) => apply(op, l.values().as_ref(), l_s, l_v, r.values().as_ref(), r_s, r_v), + (Boolean, Boolean) => apply(op, l.as_boolean(), l_s, l_v, r.as_boolean(), r_s, r_v), + (Utf8, Utf8) => apply(op, l.as_string::(), l_s, l_v, r.as_string::(), r_s, r_v), + (LargeUtf8, LargeUtf8) => apply(op, l.as_string::(), l_s, l_v, r.as_string::(), r_s, r_v), + (Binary, Binary) => apply(op, l.as_binary::(), l_s, l_v, r.as_binary::(), r_s, r_v), + (LargeBinary, LargeBinary) => apply(op, l.as_binary::(), l_s, l_v, r.as_binary::(), r_s, r_v), + (FixedSizeBinary(_), FixedSizeBinary(_)) => apply(op, l.as_fixed_size_binary(), l_s, l_v, r.as_fixed_size_binary(), r_s, r_v), + (Null, Null) => None, + (Struct(_), Struct(_)) => Some(compare_op_struct_values(op, l, l_s, r, r_s, len)?), + _ => unreachable!(), + }; + Ok(d.unwrap_or_else(|| BooleanBuffer::new_unset(len))) +} + +/// recursively compare fields of struct arrays +fn compare_op_struct_values( + op: Op, + l: &dyn Array, + l_s: bool, + r: &dyn Array, + r_s: bool, + len: usize, +) -> Result { + // when one of field is equal, the result is false for not equal + // so we use neg to reverse the result of equal when handle not equal + let neg = match op { + Op::Equal => false, + Op::NotEqual => true, + _ => unreachable!(), + }; + + let l = l.as_any().downcast_ref::().unwrap(); + let r = r.as_any().downcast_ref::().unwrap(); + + let mut child_res: Vec = Vec::with_capacity(len); + // compare each field of struct + for item in l + .columns() + .to_vec() + .iter() + .zip(r.columns().to_vec().iter()) + .map(|(col_l, col_r)| compare_op_values(Op::Equal, col_l, l_s, col_r, r_s, len)) + { + child_res.push(item?); + } + // combine the result of each field + let equality = child_res + .iter() + .fold(BooleanBuffer::new_set(len), |acc, x| &acc & x); + Ok(if neg { !&equality } else { equality }) +} + /// Perform a potentially vectored `op` on the provided `ArrayOrd` fn apply( op: Op, @@ -544,7 +615,9 @@ impl<'a> ArrayOrd for &'a FixedSizeBinaryArray { mod tests { use std::sync::Arc; - use arrow_array::{DictionaryArray, Int32Array, Scalar, StringArray}; + use arrow_array::{ArrayRef, DictionaryArray, Int32Array, Scalar, StringArray, StructArray}; + use arrow_buffer::Buffer; + use arrow_schema::{DataType, Field}; use super::*; @@ -702,4 +775,122 @@ mod tests { neq(&col.slice(0, col.len() - 1), &col.slice(1, col.len() - 1)).unwrap(); } + + #[test] + fn test_struct_equality() { + // test struct('a', 'b') = struct('a', 'b'), the null buffer is 0b0111 + let left_a = Arc::new(Int32Array::new( + vec![0, 1, 2, 3].into(), + Some(vec![true, false, true, false].into()), + )); + let right_a = Arc::new(Int32Array::new( + vec![0, 1, 2, 3].into(), + Some(vec![true, false, true, false].into()), + )); + let left_b = Arc::new(Int32Array::new( + vec![0, 1, 2, 3].into(), + Some(vec![true, true, true, false].into()), + )); + let right_b = Arc::new(Int32Array::new( + vec![0, 1, 2, 3].into(), + Some(vec![true, true, true, false].into()), + )); + let field_a = Arc::new(Field::new("a", DataType::Int32, true)); + let field_b = Arc::new(Field::new("b", DataType::Int32, true)); + let left_struct = StructArray::from(( + vec![ + (field_a.clone(), left_a.clone() as ArrayRef), + (field_b.clone(), left_b.clone() as ArrayRef), + ], + Buffer::from([0b0111]), + )); + let right_struct = StructArray::from(( + vec![ + (field_a.clone(), right_a.clone() as ArrayRef), + (field_b.clone(), right_b.clone() as ArrayRef), + ], + Buffer::from([0b0111]), + )); + let expected = BooleanArray::new( + vec![true, true, true, true].into(), + Some(vec![true, true, true, false].into()), + ); + assert_eq!(eq(&left_struct, &right_struct).unwrap(), expected); + assert_eq!(eq(&right_struct, &left_struct).unwrap(), expected); + let expected = BooleanArray::new( + vec![false, false, false, false].into(), + Some(vec![true, true, true, false].into()), + ); + assert_eq!(neq(&left_struct, &right_struct).unwrap(), expected); + assert_eq!(neq(&right_struct, &left_struct).unwrap(), expected); + + let sub_struct_fields = left_struct.fields().clone(); + + // test struct('a', 'b') = struct('a', 'b'), right a[1] is different from left a[2],the null buffer is 0b0111 + let right_a2 = Arc::new(Int32Array::new( + vec![0, 2, 2, 3].into(), + Some(vec![true, true, true, false].into()), + )); + let right_struct = StructArray::from(( + vec![ + (field_a.clone(), right_a2.clone() as ArrayRef), + (field_b.clone(), right_b.clone() as ArrayRef), + ], + Buffer::from([0b0111]), + )); + let expected = BooleanArray::new( + vec![true, false, true, true].into(), + Some(vec![true, true, true, false].into()), + ); + assert_eq!(eq(&left_struct, &right_struct).unwrap(), expected); + assert_eq!(eq(&right_struct, &left_struct).unwrap(), expected); + let expected = BooleanArray::new( + vec![false, true, false, false].into(), + Some(vec![true, true, true, false].into()), + ); + assert_eq!(neq(&left_struct, &right_struct).unwrap(), expected); + assert_eq!(neq(&right_struct, &left_struct).unwrap(), expected); + + // test struct('a' , struct('suba', 'subb')) = struct('a', struct('suba', 'subb')), where the right suba1[1] different from left suba[1],the null buffer is 0b0111 + let left_struct = StructArray::from(( + vec![ + (field_a.clone(), left_a.clone() as ArrayRef), + ( + Arc::new(Field::new( + "SubStruct", + DataType::Struct(sub_struct_fields.clone()), + true, + )), + Arc::new(left_struct) as ArrayRef, + ), + ], + Buffer::from([0b0111]), + )); + let right_struct = StructArray::from(( + vec![ + (field_a.clone(), right_a.clone() as ArrayRef), + ( + Arc::new(Field::new( + "SubStruct", + DataType::Struct(sub_struct_fields.clone()), + true, + )), + Arc::new(right_struct) as ArrayRef, + ), + ], + Buffer::from([0b0111]), + )); + let expected = BooleanArray::new( + vec![true, false, true, true].into(), + Some(vec![true, true, true, false].into()), + ); + assert_eq!(eq(&left_struct, &right_struct).unwrap(), expected); + assert_eq!(eq(&right_struct, &left_struct).unwrap(), expected); + let expected = BooleanArray::new( + vec![false, true, false, false].into(), + Some(vec![true, true, true, false].into()), + ); + assert_eq!(neq(&left_struct, &right_struct).unwrap(), expected); + assert_eq!(neq(&right_struct, &left_struct).unwrap(), expected); + } } From 73f2a5672e4d6980d24d1524c29c696d216b3c6d Mon Sep 17 00:00:00 2001 From: my-vegetable-has-exploded Date: Mon, 18 Dec 2023 23:08:38 +0800 Subject: [PATCH 2/6] use as_struct & collect. --- arrow-ord/src/cmp.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs index b235548c17d9..746bcba7cd95 100644 --- a/arrow-ord/src/cmp.rs +++ b/arrow-ord/src/cmp.rs @@ -27,7 +27,7 @@ use arrow_array::cast::AsArray; use arrow_array::types::ByteArrayType; use arrow_array::{ downcast_primitive_array, AnyDictionaryArray, Array, ArrowNativeTypeOp, BooleanArray, Datum, - FixedSizeBinaryArray, GenericByteArray, StructArray, + FixedSizeBinaryArray, GenericByteArray, }; use arrow_buffer::bit_util::ceil; use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer}; @@ -335,20 +335,17 @@ fn compare_op_struct_values( _ => unreachable!(), }; - let l = l.as_any().downcast_ref::().unwrap(); - let r = r.as_any().downcast_ref::().unwrap(); + let l = l.as_struct(); + let r = r.as_struct(); - let mut child_res: Vec = Vec::with_capacity(len); // compare each field of struct - for item in l + let child_res = l .columns() .to_vec() .iter() .zip(r.columns().to_vec().iter()) .map(|(col_l, col_r)| compare_op_values(Op::Equal, col_l, l_s, col_r, r_s, len)) - { - child_res.push(item?); - } + .collect::, ArrowError>>()?; // combine the result of each field let equality = child_res .iter() From ddcd6f476c970c12a76fcc65663ce80b235cf3bb Mon Sep 17 00:00:00 2001 From: yi wang <48236141+my-vegetable-has-exploded@users.noreply.github.com> Date: Tue, 19 Dec 2023 10:58:12 +0800 Subject: [PATCH 3/6] rm useless to_vec() Co-authored-by: Andrew Lamb --- arrow-ord/src/cmp.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs index 746bcba7cd95..7e2058a64409 100644 --- a/arrow-ord/src/cmp.rs +++ b/arrow-ord/src/cmp.rs @@ -341,9 +341,8 @@ fn compare_op_struct_values( // compare each field of struct let child_res = l .columns() - .to_vec() .iter() - .zip(r.columns().to_vec().iter()) + .zip(r.columns().iter()) .map(|(col_l, col_r)| compare_op_values(Op::Equal, col_l, l_s, col_r, r_s, len)) .collect::, ArrowError>>()?; // combine the result of each field From b92531955ea1873049185070f15fbae90d5abccd Mon Sep 17 00:00:00 2001 From: my-vegetable-has-exploded Date: Sun, 24 Dec 2023 16:24:29 +0800 Subject: [PATCH 4/6] union nullsbuffer for struct & add tests --- arrow-ord/src/cmp.rs | 359 ++++++++++++++++++++++++++----------------- 1 file changed, 215 insertions(+), 144 deletions(-) diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs index 7e2058a64409..e1697570e600 100644 --- a/arrow-ord/src/cmp.rs +++ b/arrow-ord/src/cmp.rs @@ -169,14 +169,12 @@ pub fn not_distinct(lhs: &dyn Datum, rhs: &dyn Datum) -> Result Result { - let (l_array, l_s) = lhs.get(); - let (r_array, r_s) = rhs.get(); - - let l_nulls = l_array.logical_nulls(); - let r_nulls = r_array.logical_nulls(); + use arrow_schema::DataType::*; + let (l, l_s) = lhs.get(); + let (r, r_s) = rhs.get(); - let l_len = l_array.len(); - let r_len = r_array.len(); + let l_len = l.len(); + let r_len = r.len(); if l_len != r_len && !l_s && !r_s { return Err(ArrowError::InvalidArgumentError(format!( @@ -184,49 +182,166 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result {} + _ => { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid comparison operation: {l_t} {op} {r_t}" + ))); + } + } + } else if r_t != l_t { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid comparison operation: {l_t} {op} {r_t}" + ))); + } + let len = match l_s { true => r_len, false => l_len, }; + Ok(BooleanArray::new( + compare_op_values(op, l, l_s, r, r_s, len)?, + compare_op_nulls(op, l, l_s, r, r_s, len)?, + )) +} + +/// get the NullBuffer result of the comparison +fn compare_op_nulls( + op: Op, + l: &dyn Array, + l_s: bool, + r: &dyn Array, + r_s: bool, + len: usize, +) -> Result, ArrowError> { + use arrow_schema::DataType::*; + let l_t = l.data_type(); + let r_t = r.data_type(); + let l_nulls = l.logical_nulls().filter(|n| n.null_count() > 0); + let r_nulls = r.logical_nulls().filter(|n| n.null_count() > 0); + // for [not]Distinct, the result is never null + match op { + Op::Distinct | Op::NotDistinct => { + return Ok(None); + } + _ => {} + } + let nulls = match (l_nulls, l_s, r_nulls, r_s) { + // Either both sides are scalar or neither side is scalar + (Some(l_nulls), true, Some(r_nulls), true) + | (Some(l_nulls), false, Some(r_nulls), false) => { + NullBuffer::union(Some(&l_nulls), Some(&r_nulls)) + } + // Scalar is null, other side is non-scalar and nullable + (Some(_), true, Some(_), false) | (Some(_), false, Some(_), true) => { + Some(NullBuffer::new_null(len)) + } + // Only one side is nullable + (Some(nulls), is_scalar, None, _) | (None, _, Some(nulls), is_scalar) => match is_scalar { + true => Some(NullBuffer::new_null(len)), + false => Some(nulls), + }, + // Neither side is nullable + (None, _, None, _) => None, + }; + match (l_t, r_t) { + (Struct(_), Struct(_)) => { + // union all nulls from children, because any child in certain slot is null, the struct in the slot is uncomparable + let child_nulls = l + .as_struct() + .columns() + .iter() + .zip(r.as_struct().columns().iter()) + .map(|(l, r)| compare_op_nulls(op, l, l_s, r, r_s, len)) + .collect::, _>>()?; + Ok(child_nulls.iter().fold(nulls, |nulls, child_null| { + NullBuffer::union(nulls.as_ref(), child_null.as_ref()) + })) + } + _ => Ok(nulls), + } +} + +/// Defer computation as may not be necessary +/// get the BooleanBuffer result of the comparison +fn compare_op_values( + op: Op, + l: &dyn Array, + l_s: bool, + r: &dyn Array, + r_s: bool, + len: usize, +) -> Result { + use arrow_schema::DataType::*; + let l_v = l.as_any_dictionary_opt(); + let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l); - let l_nulls = l_nulls.filter(|n| n.null_count() > 0); - let r_nulls = r_nulls.filter(|n| n.null_count() > 0); + let r_v = r.as_any_dictionary_opt(); + let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r); + + let l_nulls = l.logical_nulls().filter(|n| n.null_count() > 0); + let r_nulls = r.logical_nulls().filter(|n| n.null_count() > 0); + let values = || -> Result { + let values = downcast_primitive_array! { + (l, r) => apply(op, l.values().as_ref(), l_s, l_v, r.values().as_ref(), r_s, r_v), + (Boolean, Boolean) => apply(op, l.as_boolean(), l_s, l_v, r.as_boolean(), r_s, r_v), + (Utf8, Utf8) => apply(op, l.as_string::(), l_s, l_v, r.as_string::(), r_s, r_v), + (LargeUtf8, LargeUtf8) => apply(op, l.as_string::(), l_s, l_v, r.as_string::(), r_s, r_v), + (Binary, Binary) => apply(op, l.as_binary::(), l_s, l_v, r.as_binary::(), r_s, r_v), + (LargeBinary, LargeBinary) => apply(op, l.as_binary::(), l_s, l_v, r.as_binary::(), r_s, r_v), + (FixedSizeBinary(_), FixedSizeBinary(_)) => apply(op, l.as_fixed_size_binary(), l_s, l_v, r.as_fixed_size_binary(), r_s, r_v), + (Null, Null) => None, + (Struct(_), Struct(_)) => Some(compare_op_struct_values(op, l, l_s, r, r_s, len)?), + _ => unreachable!(), + }; + Ok(values.unwrap_or_else(|| BooleanBuffer::new_unset(len))) + }; Ok(match (l_nulls, l_s, r_nulls, r_s) { - (Some(l), true, Some(r), true) | (Some(l), false, Some(r), false) => { + (Some(l_nulls), true, Some(r_nulls), true) + | (Some(l_nulls), false, Some(r_nulls), false) => { // Either both sides are scalar or neither side is scalar match op { Op::Distinct => { - let values = compare_op_values(op, l_array, l_s, r_array, r_s, len)?; - let l = l.inner().bit_chunks().iter_padded(); - let r = r.inner().bit_chunks().iter_padded(); + let values = values()?; + let l_nulls = l_nulls.inner().bit_chunks().iter_padded(); + let r_nulls = r_nulls.inner().bit_chunks().iter_padded(); let ne = values.bit_chunks().iter_padded(); - let c = |((l, r), n)| ((l ^ r) | (l & r & n)); - let buffer = l.zip(r).zip(ne).map(c).collect(); - BooleanBuffer::new(buffer, 0, len).into() + let c = + |((l_nulls, r_nulls), n)| ((l_nulls ^ r_nulls) | (l_nulls & r_nulls & n)); + let buffer = l_nulls.zip(r_nulls).zip(ne).map(c).collect(); + BooleanBuffer::new(buffer, 0, len) } Op::NotDistinct => { - let values = compare_op_values(op, l_array, l_s, r_array, r_s, len)?; - let l = l.inner().bit_chunks().iter_padded(); - let r = r.inner().bit_chunks().iter_padded(); + let values = values()?; + let l_nulls = l_nulls.inner().bit_chunks().iter_padded(); + let r_nulls = r_nulls.inner().bit_chunks().iter_padded(); let e = values.bit_chunks().iter_padded(); - let c = |((l, r), e)| u64::not(l | r) | (l & r & e); - let buffer = l.zip(r).zip(e).map(c).collect(); - BooleanBuffer::new(buffer, 0, len).into() + let c = |((l_nulls, r_nulls), e)| { + u64::not(l_nulls | r_nulls) | (l_nulls & r_nulls & e) + }; + let buffer = l_nulls.zip(r_nulls).zip(e).map(c).collect(); + BooleanBuffer::new(buffer, 0, len) } - _ => BooleanArray::new( - compare_op_values(op, l_array, l_s, r_array, r_s, len)?, - NullBuffer::union(Some(&l), Some(&r)), - ), + _ => values()?, } } (Some(_), true, Some(a), false) | (Some(a), false, Some(_), true) => { // Scalar is null, other side is non-scalar and nullable match op { - Op::Distinct => a.into_inner().into(), - Op::NotDistinct => a.into_inner().not().into(), - _ => BooleanArray::new_null(len), + Op::Distinct => a.into_inner(), + Op::NotDistinct => a.into_inner().not(), + _ => BooleanBuffer::new_unset(len), } } (Some(nulls), is_scalar, None, _) | (None, _, Some(nulls), is_scalar) => { @@ -234,90 +349,29 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result match op { // Scalar is null, other side is not nullable - Op::Distinct => BooleanBuffer::new_set(len).into(), - Op::NotDistinct => BooleanBuffer::new_unset(len).into(), - _ => BooleanArray::new_null(len), + Op::Distinct => BooleanBuffer::new_set(len), + Op::NotDistinct => BooleanBuffer::new_unset(len), + _ => BooleanBuffer::new_unset(len), }, false => match op { Op::Distinct => { - let values = compare_op_values(op, l_array, l_s, r_array, r_s, len)?; - let l = nulls.inner().bit_chunks().iter_padded(); + let values = values()?; + let l_nulls = nulls.inner().bit_chunks().iter_padded(); let ne = values.bit_chunks().iter_padded(); - let c = |(l, n)| u64::not(l) | n; - let buffer = l.zip(ne).map(c).collect(); - BooleanBuffer::new(buffer, 0, len).into() + let c = |(l_nulls, n)| u64::not(l_nulls) | n; + let buffer = l_nulls.zip(ne).map(c).collect(); + BooleanBuffer::new(buffer, 0, len) } - Op::NotDistinct => (nulls.inner() - & &compare_op_values(op, l_array, l_s, r_array, r_s, len)?) - .into(), - _ => BooleanArray::new( - compare_op_values(op, l_array, l_s, r_array, r_s, len)?, - Some(nulls), - ), + Op::NotDistinct => nulls.inner() & &values()?, + _ => values()?, }, } } // Neither side is nullable - (None, _, None, _) => BooleanArray::new( - compare_op_values(op, l_array, l_s, r_array, r_s, len)?, - None, - ), + (None, _, None, _) => values()?, }) } -/// Defer computation as may not be necessary -/// get the BooleanBuffer result of the comparison -fn compare_op_values( - op: Op, - l: &dyn Array, - l_s: bool, - r: &dyn Array, - r_s: bool, - len: usize, -) -> Result { - use arrow_schema::DataType::*; - let l_v = l.as_any_dictionary_opt(); - let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l); - let l_t = l.data_type(); - - let r_v = r.as_any_dictionary_opt(); - let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r); - let r_t = r.data_type(); - - if l_t.is_nested() { - if !l_t.equals_datatype(r_t) { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid comparison operation: {l_t} {op} {r_t}" - ))); - } - match (l_t, op) { - (Struct(_), Op::Equal | Op::NotEqual) => {} - _ => { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid comparison operation: {l_t} {op} {r_t}" - ))); - } - } - } else if r_t != l_t { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid comparison operation: {l_t} {op} {r_t}" - ))); - } - let d = downcast_primitive_array! { - (l, r) => apply(op, l.values().as_ref(), l_s, l_v, r.values().as_ref(), r_s, r_v), - (Boolean, Boolean) => apply(op, l.as_boolean(), l_s, l_v, r.as_boolean(), r_s, r_v), - (Utf8, Utf8) => apply(op, l.as_string::(), l_s, l_v, r.as_string::(), r_s, r_v), - (LargeUtf8, LargeUtf8) => apply(op, l.as_string::(), l_s, l_v, r.as_string::(), r_s, r_v), - (Binary, Binary) => apply(op, l.as_binary::(), l_s, l_v, r.as_binary::(), r_s, r_v), - (LargeBinary, LargeBinary) => apply(op, l.as_binary::(), l_s, l_v, r.as_binary::(), r_s, r_v), - (FixedSizeBinary(_), FixedSizeBinary(_)) => apply(op, l.as_fixed_size_binary(), l_s, l_v, r.as_fixed_size_binary(), r_s, r_v), - (Null, Null) => None, - (Struct(_), Struct(_)) => Some(compare_op_struct_values(op, l, l_s, r, r_s, len)?), - _ => unreachable!(), - }; - Ok(d.unwrap_or_else(|| BooleanBuffer::new_unset(len))) -} - /// recursively compare fields of struct arrays fn compare_op_struct_values( op: Op, @@ -330,8 +384,8 @@ fn compare_op_struct_values( // when one of field is equal, the result is false for not equal // so we use neg to reverse the result of equal when handle not equal let neg = match op { - Op::Equal => false, - Op::NotEqual => true, + Op::Equal | Op::NotDistinct => false, + Op::NotEqual | Op::Distinct => true, _ => unreachable!(), }; @@ -339,16 +393,18 @@ fn compare_op_struct_values( let r = r.as_struct(); // compare each field of struct - let child_res = l + let child_values = l .columns() .iter() .zip(r.columns().iter()) .map(|(col_l, col_r)| compare_op_values(Op::Equal, col_l, l_s, col_r, r_s, len)) .collect::, ArrowError>>()?; // combine the result of each field - let equality = child_res + let equality = child_values .iter() - .fold(BooleanBuffer::new_set(len), |acc, x| &acc & x); + .fold(BooleanBuffer::new_set(len), |values, child_value| { + &values & child_value + }); Ok(if neg { !&equality } else { equality }) } @@ -773,8 +829,8 @@ mod tests { } #[test] - fn test_struct_equality() { - // test struct('a', 'b') = struct('a', 'b'), the null buffer is 0b0111 + fn test_struct_uncomparable() { + // test struct('a') == struct('a','b') let left_a = Arc::new(Int32Array::new( vec![0, 1, 2, 3].into(), Some(vec![true, false, true, false].into()), @@ -783,13 +839,42 @@ mod tests { vec![0, 1, 2, 3].into(), Some(vec![true, false, true, false].into()), )); - let left_b = Arc::new(Int32Array::new( + let right_b = Arc::new(Int32Array::new( vec![0, 1, 2, 3].into(), Some(vec![true, true, true, false].into()), )); + let field_a = Arc::new(Field::new("a", DataType::Int32, true)); + let field_b = Arc::new(Field::new("b", DataType::Int32, true)); + let left = StructArray::from(vec![(field_a.clone(), left_a.clone() as ArrayRef)]); + let right = StructArray::from(vec![ + (field_a.clone(), right_a.clone() as ArrayRef), + (field_b.clone(), right_b.clone() as ArrayRef), + ]); + assert_eq!(eq(&left, &right).unwrap_err().to_string(), "Invalid argument error: Invalid comparison operation: Struct([Field { name: \"a\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) == Struct([Field { name: \"a\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"b\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])"); + + // test struct('a') <= struct('a') + assert_eq!(lt(&left, &left).unwrap_err().to_string(), "Invalid argument error: Invalid comparison operation: Struct([Field { name: \"a\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) < Struct([Field { name: \"a\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])"); + } + + #[test] + fn test_struct_compare() { + // test struct('a', 'b')、struct('a', 'b'), the null buffer is 0b0111 + // left b[2] is different from right b[2] + let left_a = Arc::new(Int32Array::new( + vec![0, 1, 2, 3].into(), + Some(vec![true, false, true, true].into()), + )); + let right_a = Arc::new(Int32Array::new( + vec![0, 1, 2, 3].into(), + Some(vec![true, false, true, true].into()), + )); + let left_b = Arc::new(Int32Array::new( + vec![0, 1, 20, 3].into(), + Some(vec![true, true, true, true].into()), + )); let right_b = Arc::new(Int32Array::new( vec![0, 1, 2, 3].into(), - Some(vec![true, true, true, false].into()), + Some(vec![true, true, true, true].into()), )); let field_a = Arc::new(Field::new("a", DataType::Int32, true)); let field_b = Arc::new(Field::new("b", DataType::Int32, true)); @@ -808,46 +893,32 @@ mod tests { Buffer::from([0b0111]), )); let expected = BooleanArray::new( - vec![true, true, true, true].into(), - Some(vec![true, true, true, false].into()), + vec![true, true, false, true].into(), + // a[1] is none in child, struct[3] is none in parent + Some(vec![true, false, true, false].into()), ); assert_eq!(eq(&left_struct, &right_struct).unwrap(), expected); assert_eq!(eq(&right_struct, &left_struct).unwrap(), expected); let expected = BooleanArray::new( - vec![false, false, false, false].into(), - Some(vec![true, true, true, false].into()), + vec![false, false, true, false].into(), + Some(vec![true, false, true, false].into()), ); assert_eq!(neq(&left_struct, &right_struct).unwrap(), expected); assert_eq!(neq(&right_struct, &left_struct).unwrap(), expected); - - let sub_struct_fields = left_struct.fields().clone(); - - // test struct('a', 'b') = struct('a', 'b'), right a[1] is different from left a[2],the null buffer is 0b0111 - let right_a2 = Arc::new(Int32Array::new( - vec![0, 2, 2, 3].into(), - Some(vec![true, true, true, false].into()), - )); - let right_struct = StructArray::from(( - vec![ - (field_a.clone(), right_a2.clone() as ArrayRef), - (field_b.clone(), right_b.clone() as ArrayRef), - ], - Buffer::from([0b0111]), - )); - let expected = BooleanArray::new( - vec![true, false, true, true].into(), - Some(vec![true, true, true, false].into()), - ); - assert_eq!(eq(&left_struct, &right_struct).unwrap(), expected); - assert_eq!(eq(&right_struct, &left_struct).unwrap(), expected); let expected = BooleanArray::new( - vec![false, true, false, false].into(), - Some(vec![true, true, true, false].into()), + // left[0] equals to right[0], left b[1] is not distinct from right b[1], left b[2] is distinct from right b[2], struct[3] is none in parent + vec![false, false, true, false].into(), + None, ); - assert_eq!(neq(&left_struct, &right_struct).unwrap(), expected); - assert_eq!(neq(&right_struct, &left_struct).unwrap(), expected); + assert_eq!(distinct(&left_struct, &right_struct).unwrap(), expected); + assert_eq!(distinct(&right_struct, &left_struct).unwrap(), expected); + let expected = BooleanArray::new(vec![true, true, false, true].into(), None); + assert_eq!(not_distinct(&left_struct, &right_struct).unwrap(), expected); + assert_eq!(not_distinct(&right_struct, &left_struct).unwrap(), expected); + + let sub_struct_fields = left_struct.fields().clone(); - // test struct('a' , struct('suba', 'subb')) = struct('a', struct('suba', 'subb')), where the right suba1[1] different from left suba[1],the null buffer is 0b0111 + // test struct('a' , struct('suba', 'subb')) 、 struct('a', struct('suba', 'subb')), where the right subb1[2] different from left subb[2],the null buffer is 0b0111 let left_struct = StructArray::from(( vec![ (field_a.clone(), left_a.clone() as ArrayRef), @@ -877,14 +948,14 @@ mod tests { Buffer::from([0b0111]), )); let expected = BooleanArray::new( - vec![true, false, true, true].into(), - Some(vec![true, true, true, false].into()), + vec![true, false, false, true].into(), + Some(vec![true, false, true, false].into()), ); assert_eq!(eq(&left_struct, &right_struct).unwrap(), expected); assert_eq!(eq(&right_struct, &left_struct).unwrap(), expected); let expected = BooleanArray::new( - vec![false, true, false, false].into(), - Some(vec![true, true, true, false].into()), + vec![false, true, true, false].into(), + Some(vec![true, false, true, false].into()), ); assert_eq!(neq(&left_struct, &right_struct).unwrap(), expected); assert_eq!(neq(&right_struct, &left_struct).unwrap(), expected); From 1b19ec503efec2ac7fba81883d6e98687f2a54c0 Mon Sep 17 00:00:00 2001 From: my-vegetable-has-exploded Date: Sun, 24 Dec 2023 16:38:44 +0800 Subject: [PATCH 5/6] fix dict. --- arrow-ord/src/cmp.rs | 45 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs index e1697570e600..617c5026c788 100644 --- a/arrow-ord/src/cmp.rs +++ b/arrow-ord/src/cmp.rs @@ -169,7 +169,6 @@ pub fn not_distinct(lhs: &dyn Datum, rhs: &dyn Datum) -> Result Result { - use arrow_schema::DataType::*; let (l, l_s) = lhs.get(); let (r, r_s) = rhs.get(); @@ -182,28 +181,6 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result {} - _ => { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid comparison operation: {l_t} {op} {r_t}" - ))); - } - } - } else if r_t != l_t { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid comparison operation: {l_t} {op} {r_t}" - ))); - } - let len = match l_s { true => r_len, false => l_len, @@ -284,9 +261,31 @@ fn compare_op_values( use arrow_schema::DataType::*; let l_v = l.as_any_dictionary_opt(); let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l); + let l_t = l.data_type(); let r_v = r.as_any_dictionary_opt(); let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r); + let r_t = r.data_type(); + + if l_t.is_nested() { + if !l_t.equals_datatype(r_t) { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid comparison operation: {l_t} {op} {r_t}" + ))); + } + match (l_t, op) { + (Struct(_), Op::Equal | Op::NotEqual | Op::Distinct | Op::NotDistinct) => {} + _ => { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid comparison operation: {l_t} {op} {r_t}" + ))); + } + } + } else if r_t != l_t { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid comparison operation: {l_t} {op} {r_t}" + ))); + } let l_nulls = l.logical_nulls().filter(|n| n.null_count() > 0); let r_nulls = r.logical_nulls().filter(|n| n.null_count() > 0); From 4f8522d97e2ab33e0e8e04c59eb1ba6be42edd24 Mon Sep 17 00:00:00 2001 From: my-vegetable-has-exploded Date: Wed, 27 Dec 2023 23:10:17 +0800 Subject: [PATCH 6/6] fix distinct for struct. --- arrow-ord/src/cmp.rs | 103 +++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs index 617c5026c788..181b5f8f0472 100644 --- a/arrow-ord/src/cmp.rs +++ b/arrow-ord/src/cmp.rs @@ -201,17 +201,15 @@ fn compare_op_nulls( len: usize, ) -> Result, ArrowError> { use arrow_schema::DataType::*; + if matches!(op, Op::Distinct | Op::NotDistinct) { + // for [not]Distinct, the result is never null + return Ok(None); + } + let l_t = l.data_type(); let r_t = r.data_type(); let l_nulls = l.logical_nulls().filter(|n| n.null_count() > 0); let r_nulls = r.logical_nulls().filter(|n| n.null_count() > 0); - // for [not]Distinct, the result is never null - match op { - Op::Distinct | Op::NotDistinct => { - return Ok(None); - } - _ => {} - } let nulls = match (l_nulls, l_s, r_nulls, r_s) { // Either both sides are scalar or neither side is scalar (Some(l_nulls), true, Some(r_nulls), true) @@ -267,21 +265,7 @@ fn compare_op_values( let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r); let r_t = r.data_type(); - if l_t.is_nested() { - if !l_t.equals_datatype(r_t) { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid comparison operation: {l_t} {op} {r_t}" - ))); - } - match (l_t, op) { - (Struct(_), Op::Equal | Op::NotEqual | Op::Distinct | Op::NotDistinct) => {} - _ => { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid comparison operation: {l_t} {op} {r_t}" - ))); - } - } - } else if r_t != l_t { + if !l_t.equals_datatype(r_t) { return Err(ArrowError::InvalidArgumentError(format!( "Invalid comparison operation: {l_t} {op} {r_t}" ))); @@ -380,12 +364,22 @@ fn compare_op_struct_values( r_s: bool, len: usize, ) -> Result { - // when one of field is equal, the result is false for not equal + // when one of field is not equal(notdistinct), the result is false for equal(notdistinct) // so we use neg to reverse the result of equal when handle not equal let neg = match op { Op::Equal | Op::NotDistinct => false, Op::NotEqual | Op::Distinct => true, - _ => unreachable!(), + _ => { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid comparison operation: Struct {op} Struct" + ))) + } + }; + + let op = match op { + Op::NotEqual => Op::Equal, + Op::Distinct => Op::NotDistinct, + _ => op, }; let l = l.as_struct(); @@ -396,7 +390,7 @@ fn compare_op_struct_values( .columns() .iter() .zip(r.columns().iter()) - .map(|(col_l, col_r)| compare_op_values(Op::Equal, col_l, l_s, col_r, r_s, len)) + .map(|(col_l, col_r)| compare_op_values(op, col_l, l_s, col_r, r_s, len)) .collect::, ArrowError>>()?; // combine the result of each field let equality = child_values @@ -852,72 +846,75 @@ mod tests { assert_eq!(eq(&left, &right).unwrap_err().to_string(), "Invalid argument error: Invalid comparison operation: Struct([Field { name: \"a\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) == Struct([Field { name: \"a\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"b\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])"); // test struct('a') <= struct('a') - assert_eq!(lt(&left, &left).unwrap_err().to_string(), "Invalid argument error: Invalid comparison operation: Struct([Field { name: \"a\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) < Struct([Field { name: \"a\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])"); + assert_eq!( + lt(&left, &left).unwrap_err().to_string(), + "Invalid argument error: Invalid comparison operation: Struct < Struct" + ); } #[test] fn test_struct_compare() { - // test struct('a', 'b')、struct('a', 'b'), the null buffer is 0b0111 - // left b[2] is different from right b[2] let left_a = Arc::new(Int32Array::new( - vec![0, 1, 2, 3].into(), - Some(vec![true, false, true, true].into()), + vec![0, 1, 2, 3, 4, 5, 6, 7].into(), + Some(vec![true, false, true, true, false, true, true, false].into()), )); let right_a = Arc::new(Int32Array::new( - vec![0, 1, 2, 3].into(), - Some(vec![true, false, true, true].into()), + vec![0, 1, 2, 3, 4, 5, 6, 72].into(), + Some(vec![true, false, true, true, false, true, true, false].into()), )); let left_b = Arc::new(Int32Array::new( - vec![0, 1, 20, 3].into(), - Some(vec![true, true, true, true].into()), + vec![0, 1, 2, 3, 4, 5, 7, 7].into(), + Some(vec![true, true, true, true, true, true, true, true].into()), )); let right_b = Arc::new(Int32Array::new( - vec![0, 1, 2, 3].into(), - Some(vec![true, true, true, true].into()), + vec![0, 1, 20, 13, 72, 6, 6, 7].into(), + Some(vec![true, true, true, true, true, true, false, true].into()), )); let field_a = Arc::new(Field::new("a", DataType::Int32, true)); let field_b = Arc::new(Field::new("b", DataType::Int32, true)); + // left [{a: 0, b: 0}, {a: NULL, b: 1}, {a: 2, b: 2}, NULL({a: 3, b: 3}), {a: NULL, b: 4}, NULL({a: 5, b: 5}), {a:6, b: 7}, {a: NULL, b: 7}] let left_struct = StructArray::from(( vec![ (field_a.clone(), left_a.clone() as ArrayRef), (field_b.clone(), left_b.clone() as ArrayRef), ], - Buffer::from([0b0111]), + Buffer::from([0b11010111]), )); + // right [{a: 0, b: 0}, {a: NULL, b: 1}, {a: 2, b: 20}, Null({a: 3, b: 13}), {a: NULL, b: 72}, Null({a: 5, b: 6}), {a:6, b: Null}, {a: NULL, b: 7}] let right_struct = StructArray::from(( vec![ (field_a.clone(), right_a.clone() as ArrayRef), (field_b.clone(), right_b.clone() as ArrayRef), ], - Buffer::from([0b0111]), + Buffer::from([0b11010111]), )); let expected = BooleanArray::new( - vec![true, true, false, true].into(), - // a[1] is none in child, struct[3] is none in parent - Some(vec![true, false, true, false].into()), + vec![true, true, false, false, false, false, false, false].into(), + Some(vec![true, false, true, false, false, false, false, false].into()), ); assert_eq!(eq(&left_struct, &right_struct).unwrap(), expected); assert_eq!(eq(&right_struct, &left_struct).unwrap(), expected); let expected = BooleanArray::new( - vec![false, false, true, false].into(), - Some(vec![true, false, true, false].into()), + vec![false, false, true, true, true, true, true, true].into(), + Some(vec![true, false, true, false, false, false, false, false].into()), ); assert_eq!(neq(&left_struct, &right_struct).unwrap(), expected); assert_eq!(neq(&right_struct, &left_struct).unwrap(), expected); let expected = BooleanArray::new( - // left[0] equals to right[0], left b[1] is not distinct from right b[1], left b[2] is distinct from right b[2], struct[3] is none in parent - vec![false, false, true, false].into(), + vec![false, false, true, false, true, false, true, false].into(), None, ); assert_eq!(distinct(&left_struct, &right_struct).unwrap(), expected); assert_eq!(distinct(&right_struct, &left_struct).unwrap(), expected); - let expected = BooleanArray::new(vec![true, true, false, true].into(), None); + let expected = BooleanArray::new( + vec![true, true, false, true, false, true, false, true].into(), + None, + ); assert_eq!(not_distinct(&left_struct, &right_struct).unwrap(), expected); assert_eq!(not_distinct(&right_struct, &left_struct).unwrap(), expected); let sub_struct_fields = left_struct.fields().clone(); - // test struct('a' , struct('suba', 'subb')) 、 struct('a', struct('suba', 'subb')), where the right subb1[2] different from left subb[2],the null buffer is 0b0111 let left_struct = StructArray::from(( vec![ (field_a.clone(), left_a.clone() as ArrayRef), @@ -930,7 +927,7 @@ mod tests { Arc::new(left_struct) as ArrayRef, ), ], - Buffer::from([0b0111]), + Buffer::from([0b11010111]), )); let right_struct = StructArray::from(( vec![ @@ -944,17 +941,17 @@ mod tests { Arc::new(right_struct) as ArrayRef, ), ], - Buffer::from([0b0111]), + Buffer::from([0b11010111]), )); let expected = BooleanArray::new( - vec![true, false, false, true].into(), - Some(vec![true, false, true, false].into()), + vec![true, true, false, false, false, false, false, false].into(), + Some(vec![true, false, true, false, false, false, false, false].into()), ); assert_eq!(eq(&left_struct, &right_struct).unwrap(), expected); assert_eq!(eq(&right_struct, &left_struct).unwrap(), expected); let expected = BooleanArray::new( - vec![false, true, true, false].into(), - Some(vec![true, false, true, false].into()), + vec![false, false, true, true, true, true, true, true].into(), + Some(vec![true, false, true, false, false, false, false, false].into()), ); assert_eq!(neq(&left_struct, &right_struct).unwrap(), expected); assert_eq!(neq(&right_struct, &left_struct).unwrap(), expected);