From 51e6a4a64c13892b76a4b6bb2b4ab6e6bb644055 Mon Sep 17 00:00:00 2001 From: nglime Date: Sat, 19 Oct 2024 21:18:42 -0500 Subject: [PATCH 1/2] Added casting from Binary and LargeBinary to Utf8View, along with testing for it. --- arrow-cast/src/cast/mod.rs | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index fe59a141cbe2..37d64acfdf8e 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -206,8 +206,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { DataType::is_integer(to_type) || DataType::is_floating(to_type) || to_type == &Utf8 || to_type == &LargeUtf8 } - (Binary, LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView) => true, - (LargeBinary, Binary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView) => true, + (Binary, LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView | Utf8View ) => true, + (LargeBinary, Binary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView | Utf8View ) => true, (FixedSizeBinary(_), Binary | LargeBinary) => true, ( Utf8 | LargeUtf8 | Utf8View, @@ -1399,6 +1399,9 @@ pub fn cast_with_options( cast_binary_to_fixed_size_binary::(array, *size, cast_options) } BinaryView => Ok(Arc::new(BinaryViewArray::from(array.as_binary::()))), + Utf8View => Ok(Arc::new(StringViewArray::from( + cast_binary_to_string::(array, cast_options)?.as_string::(), + ))), _ => Err(ArrowError::CastError(format!( "Casting from {from_type:?} to {to_type:?} not supported", ))), @@ -1414,6 +1417,13 @@ pub fn cast_with_options( cast_binary_to_fixed_size_binary::(array, *size, cast_options) } BinaryView => Ok(Arc::new(BinaryViewArray::from(array.as_binary::()))), + Utf8View => { + let array = cast_binary_to_string::(array, cast_options)?; + Ok(Arc::new(StringViewArray::from( + cast_byte_container::(array.as_ref())? + .as_string::(), + ))) + } _ => Err(ArrowError::CastError(format!( "Casting from {from_type:?} to {to_type:?} not supported", ))), @@ -5368,14 +5378,25 @@ mod tests { { let binary_array = GenericBinaryArray::::from_iter(VIEW_TEST_DATA); + assert!(can_cast_types( + binary_array.data_type(), + &DataType::Utf8View + )); + assert!(can_cast_types( binary_array.data_type(), &DataType::BinaryView )); + let string_view_array = cast(&binary_array, &DataType::Utf8View).unwrap(); + assert_eq!(string_view_array.data_type(), &DataType::Utf8View); + let binary_view_array = cast(&binary_array, &DataType::BinaryView).unwrap(); assert_eq!(binary_view_array.data_type(), &DataType::BinaryView); + let expect_string_view_array = StringViewArray::from_iter(VIEW_TEST_DATA); + assert_eq!(string_view_array.as_ref(), &expect_string_view_array); + let expect_binary_view_array = BinaryViewArray::from_iter(VIEW_TEST_DATA); assert_eq!(binary_view_array.as_ref(), &expect_binary_view_array); } From d7cfff14fe0027da246d3e2f1bbe0e3c6132469d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 21 Oct 2024 11:01:39 -0400 Subject: [PATCH 2/2] Avoid overflow --- arrow-cast/src/cast/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 37d64acfdf8e..1bc29f941c2f 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -1419,10 +1419,7 @@ pub fn cast_with_options( BinaryView => Ok(Arc::new(BinaryViewArray::from(array.as_binary::()))), Utf8View => { let array = cast_binary_to_string::(array, cast_options)?; - Ok(Arc::new(StringViewArray::from( - cast_byte_container::(array.as_ref())? - .as_string::(), - ))) + Ok(Arc::new(StringViewArray::from(array.as_string::()))) } _ => Err(ArrowError::CastError(format!( "Casting from {from_type:?} to {to_type:?} not supported",