From 8b6d60ff5de38abebe14d40bad7a8424f8660242 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Thu, 23 Nov 2023 09:19:59 +0800 Subject: [PATCH] cleanup three type of list Signed-off-by: jayzhan211 --- datafusion/common/src/scalar.rs | 128 ++++++++------------------------ 1 file changed, 31 insertions(+), 97 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 76bea165c03a0..40ec36112dc47 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -317,113 +317,47 @@ impl PartialOrd for ScalarValue { (FixedSizeBinary(_, _), _) => None, (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2), (LargeBinary(_), _) => None, - (List(arr1), List(arr2)) => { - if arr1.data_type() == arr2.data_type() { - if arr1.len() != arr2.len() { - return None; - } - - // ScalarValue::List / ScalarValue::FixedSizeList should have only one list. - assert_eq!(arr1.len(), 1); - assert_eq!(arr2.len(), 1); - - let list_arr1 = arr1.as_list::(); - let list_arr2 = arr2.as_list::(); - - // Single child data - assert_eq!(list_arr1.len(), 1); - assert_eq!(list_arr2.len(), 1); - - let arr1 = list_arr1.value(0); - let arr2 = list_arr2.value(0); - - let lt_res = arrow::compute::kernels::cmp::lt(&arr1, &arr2).ok()?; - let eq_res = arrow::compute::kernels::cmp::eq(&arr1, &arr2).ok()?; + (List(arr1), List(arr2)) + | (FixedSizeList(arr1), FixedSizeList(arr2)) + | (LargeList(arr1), LargeList(arr2)) => { + // ScalarValue::List / ScalarValue::FixedSizeList / ScalarValue::LargeList are ensure to have length 1 + assert_eq!(arr1.len(), 1); + assert_eq!(arr2.len(), 1); + + if arr1.data_type() != arr2.data_type() { + return None; + } - for j in 0..lt_res.len() { - if lt_res.is_valid(j) && lt_res.value(j) { - return Some(Ordering::Less); - } - if eq_res.is_valid(j) && !eq_res.value(j) { - return Some(Ordering::Greater); - } + fn first_array_for_list(arr: &ArrayRef) -> ArrayRef { + if let Some(arr) = arr.as_list_opt::() { + arr.value(0) + } else if let Some(arr) = arr.as_list_opt::() { + arr.value(0) + } else if let Some(arr) = arr.as_fixed_size_list_opt() { + arr.value(0) + } else { + unreachable!("Since only List / LargeList / FixedSizeList are supported, this should never happen") } - - Some(Ordering::Equal) - } else { - None } - } - (FixedSizeList(arr1), FixedSizeList(arr2)) => { - if arr1.data_type() == arr2.data_type() { - if arr1.len() != arr2.len() { - return None; - } - - // ScalarValue::List / ScalarValue::FixedSizeList should have only one list. - assert_eq!(arr1.len(), 1); - assert_eq!(arr2.len(), 1); - let list_arr1 = arr1.as_fixed_size_list(); - let list_arr2 = arr2.as_fixed_size_list(); + let arr1 = first_array_for_list(arr1); + let arr2 = first_array_for_list(arr2); - // Single child data - assert_eq!(list_arr1.len(), 1); - assert_eq!(list_arr2.len(), 1); + let lt_res = arrow::compute::kernels::cmp::lt(&arr1, &arr2).ok()?; + let eq_res = arrow::compute::kernels::cmp::eq(&arr1, &arr2).ok()?; - let arr1 = list_arr1.value(0); - let arr2 = list_arr2.value(0); - - let lt_res = arrow::compute::kernels::cmp::lt(&arr1, &arr2).ok()?; - let eq_res = arrow::compute::kernels::cmp::eq(&arr1, &arr2).ok()?; - - for j in 0..lt_res.len() { - if lt_res.is_valid(j) && lt_res.value(j) { - return Some(Ordering::Less); - } - if eq_res.is_valid(j) && !eq_res.value(j) { - return Some(Ordering::Greater); - } - } - - Some(Ordering::Equal) - } else { - None - } - } - (LargeList(arr1), LargeList(arr2)) => { - if arr1.data_type() == arr2.data_type() { - let list_arr1 = as_large_list_array(arr1); - let list_arr2 = as_large_list_array(arr2); - if list_arr1.len() != list_arr2.len() { - return None; + for j in 0..lt_res.len() { + if lt_res.is_valid(j) && lt_res.value(j) { + return Some(Ordering::Less); } - for i in 0..list_arr1.len() { - let arr1 = list_arr1.value(i); - let arr2 = list_arr2.value(i); - - let lt_res = - arrow::compute::kernels::cmp::lt(&arr1, &arr2).ok()?; - let eq_res = - arrow::compute::kernels::cmp::eq(&arr1, &arr2).ok()?; - - for j in 0..lt_res.len() { - if lt_res.is_valid(j) && lt_res.value(j) { - return Some(Ordering::Less); - } - if eq_res.is_valid(j) && !eq_res.value(j) { - return Some(Ordering::Greater); - } - } + if eq_res.is_valid(j) && !eq_res.value(j) { + return Some(Ordering::Greater); } - Some(Ordering::Equal) - } else { - None } + + Some(Ordering::Equal) } - (List(_), _) => None, - (LargeList(_), _) => None, - (FixedSizeList(_), _) => None, + (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None, (Date32(v1), Date32(v2)) => v1.partial_cmp(v2), (Date32(_), _) => None, (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),