From 7705acad845e8b2a366a08640f7acb4033ed7049 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 3 Nov 2023 00:48:41 -0700 Subject: [PATCH] Support casting from integer to binary (#5015) * Support casting from integer to binary * Fix clippy * For review * Reuse array buffers --- arrow-cast/src/cast.rs | 75 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 97307f076f34..684e02b87e6c 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -203,6 +203,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { (Utf8 | LargeUtf8, _) => to_type.is_numeric() && to_type != &Float16, (_, Utf8 | LargeUtf8) => from_type.is_primitive(), + (_, Binary | LargeBinary) => from_type.is_integer(), + // start numeric casts ( UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, @@ -1368,6 +1370,28 @@ pub fn cast_with_options( (from_type, Utf8) if from_type.is_primitive() => { value_to_string::(array, cast_options) } + (from_type, Binary) if from_type.is_integer() => match from_type { + UInt8 => cast_numeric_to_binary::(array), + UInt16 => cast_numeric_to_binary::(array), + UInt32 => cast_numeric_to_binary::(array), + UInt64 => cast_numeric_to_binary::(array), + Int8 => cast_numeric_to_binary::(array), + Int16 => cast_numeric_to_binary::(array), + Int32 => cast_numeric_to_binary::(array), + Int64 => cast_numeric_to_binary::(array), + _ => unreachable!(), + }, + (from_type, LargeBinary) if from_type.is_integer() => match from_type { + UInt8 => cast_numeric_to_binary::(array), + UInt16 => cast_numeric_to_binary::(array), + UInt32 => cast_numeric_to_binary::(array), + UInt64 => cast_numeric_to_binary::(array), + Int8 => cast_numeric_to_binary::(array), + Int16 => cast_numeric_to_binary::(array), + Int32 => cast_numeric_to_binary::(array), + Int64 => cast_numeric_to_binary::(array), + _ => unreachable!(), + }, // start numeric casts (UInt8, UInt16) => cast_numeric_arrays::(array, cast_options), (UInt8, UInt32) => cast_numeric_arrays::(array, cast_options), @@ -2317,6 +2341,19 @@ fn value_to_string( Ok(Arc::new(builder.finish())) } +fn cast_numeric_to_binary( + array: &dyn Array, +) -> Result { + let array = array.as_primitive::(); + let size = std::mem::size_of::(); + let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(array.len())); + Ok(Arc::new(GenericBinaryArray::::new( + offsets, + array.values().inner().clone(), + array.nulls().cloned(), + ))) +} + /// Parse UTF-8 fn parse_string( array: &dyn Array, @@ -5176,6 +5213,44 @@ mod tests { assert!(down_cast.is_null(2)); } + #[test] + fn test_numeric_to_binary() { + let a = Int16Array::from(vec![Some(1), Some(511), None]); + + let array_ref = cast(&a, &DataType::Binary).unwrap(); + let down_cast = array_ref.as_binary::(); + assert_eq!(&1_i16.to_le_bytes(), down_cast.value(0)); + assert_eq!(&511_i16.to_le_bytes(), down_cast.value(1)); + assert!(down_cast.is_null(2)); + + let a = Int64Array::from(vec![Some(-1), Some(123456789), None]); + + let array_ref = cast(&a, &DataType::Binary).unwrap(); + let down_cast = array_ref.as_binary::(); + assert_eq!(&(-1_i64).to_le_bytes(), down_cast.value(0)); + assert_eq!(&123456789_i64.to_le_bytes(), down_cast.value(1)); + assert!(down_cast.is_null(2)); + } + + #[test] + fn test_numeric_to_large_binary() { + let a = Int16Array::from(vec![Some(1), Some(511), None]); + + let array_ref = cast(&a, &DataType::LargeBinary).unwrap(); + let down_cast = array_ref.as_binary::(); + assert_eq!(&1_i16.to_le_bytes(), down_cast.value(0)); + assert_eq!(&511_i16.to_le_bytes(), down_cast.value(1)); + assert!(down_cast.is_null(2)); + + let a = Int64Array::from(vec![Some(-1), Some(123456789), None]); + + let array_ref = cast(&a, &DataType::LargeBinary).unwrap(); + let down_cast = array_ref.as_binary::(); + assert_eq!(&(-1_i64).to_le_bytes(), down_cast.value(0)); + assert_eq!(&123456789_i64.to_le_bytes(), down_cast.value(1)); + assert!(down_cast.is_null(2)); + } + #[test] fn test_cast_date32_to_int32() { let array = Date32Array::from(vec![10000, 17890]);