From 076cee48d72942d06373716f32f00a17cda35d34 Mon Sep 17 00:00:00 2001 From: himadripal Date: Tue, 3 Dec 2024 23:56:30 -0800 Subject: [PATCH 1/3] decimal conversion looses value on lower precision, throws error now on overflow. --- arrow-cast/src/cast/decimal.rs | 56 +++++++++++++++++++--------- arrow-cast/src/cast/mod.rs | 68 ++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 18 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 74cd2637e6a5..7f1d88129d6d 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -112,8 +112,19 @@ where }; Ok(match cast_options.safe { - true => array.unary_opt(f), - false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?, + true => { + array.unary_opt(|x| { + f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision)) + }) + } + false => { + array.try_unary(|x| { + f(x).ok_or_else(|| error(x)) + .and_then(|v|{ + O:: validate_decimal_precision(v, output_precision).map(|_| v) + }) + })? + } }) } @@ -135,11 +146,23 @@ where .unwrap() .pow_checked((output_scale - input_scale) as u32)?; - let f = |x| O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok()); + let f = |x| + O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok()); Ok(match cast_options.safe { - true => array.unary_opt(f), - false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?, + true => { + array.unary_opt(|x| { + f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision)) + }) + } + false => { + array.try_unary(|x| { + f(x).ok_or_else(|| error(x)) + .and_then(|v|{ + O:: validate_decimal_precision(v, output_precision).map(|_| v) + }) + })? + } }) } @@ -156,19 +179,9 @@ where T::Native: DecimalCast + ArrowNativeTypeOp, { let array: PrimitiveArray = match input_scale.cmp(&output_scale) { - Ordering::Equal => { - // the scale doesn't change, the native value don't need to be changed - array.clone() - } - Ordering::Greater => convert_to_smaller_scale_decimal::( - array, - input_scale, - output_precision, - output_scale, - cast_options, - )?, - Ordering::Less => { - // input_scale < output_scale + Ordering::Equal | Ordering::Less => { + // input_scale <= output_scale + // the scale doesn't change, but precision may change and cause overflow convert_to_bigger_or_equal_scale_decimal::( array, input_scale, @@ -177,6 +190,13 @@ where cast_options, )? } + Ordering::Greater => convert_to_smaller_scale_decimal::( + array, + input_scale, + output_precision, + output_scale, + cast_options, + )? }; Ok(Arc::new(array.with_precision_and_scale( diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 78a702e8c174..6dd2cd28fc5e 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -9827,4 +9827,72 @@ mod tests { "Cast non-nullable to non-nullable struct field returning null should fail", ); } + + #[test] + fn test_decimal_to_decimal_throw_error_on_precision_overflow_same_scale(){ + let array = vec![Some(123456789)]; + let array = create_decimal_array(array, 24, 2).unwrap(); + println!("{:?}", array); + let input_type = DataType::Decimal128(24, 2); + let output_type = DataType::Decimal128(6, 2); + assert!(can_cast_types(&input_type, &output_type)); + let mut options = CastOptions::default(); + options.safe = false; + + let result = cast_with_options(&array, &output_type, &options); + + assert_eq!(result.unwrap_err().to_string(), + "InvalidArgumentError(123456789 is too large to store in a Decimal128 of precision 6. Max is 999999)"); + } + + #[test] + fn test_decimal_to_decimal_throw_error_on_precision_overflow_lower_scale(){ + let array = vec![Some(123456789)]; + let array = create_decimal_array(array, 24, 2).unwrap(); + println!("{:?}", array); + let input_type = DataType::Decimal128(24, 4); + let output_type = DataType::Decimal128(6, 2); + assert!(can_cast_types(&input_type, &output_type)); + let mut options = CastOptions::default(); + options.safe = false; + + let result = cast_with_options(&array, &output_type, &options); + + assert_eq!(result.unwrap_err().to_string(), + "InvalidArgumentError(123456789 is too large to store in a Decimal128 of precision 6. Max is 999999)"); + } + + #[test] + fn test_decimal_to_decimal_throw_error_on_precision_overflow_greater_scale(){ + let array = vec![Some(123456789)]; + let array = create_decimal_array(array, 24, 2).unwrap(); + println!("{:?}", array); + let input_type = DataType::Decimal128(24, 2); + let output_type = DataType::Decimal128(6, 3); + assert!(can_cast_types(&input_type, &output_type)); + let mut options = CastOptions::default(); + options.safe = false; + + let result = cast_with_options(&array, &output_type, &options); + + assert_eq!(result.unwrap_err().to_string(), + "InvalidArgumentError(123456789 is too large to store in a Decimal128 of precision 6. Max is 999999)"); + } + + #[test] + fn test_decimal_to_decimal_throw_error_on_precision_overflow_diff_type(){ + let array = vec![Some(123456789)]; + let array = create_decimal_array(array, 24, 2).unwrap(); + println!("{:?}", array); + let input_type = DataType::Decimal128(24, 2); + let output_type = DataType::Decimal256(6, 2); + assert!(can_cast_types(&input_type, &output_type)); + let mut options = CastOptions::default(); + options.safe = false; + + let result = cast_with_options(&array, &output_type, &options); + + assert_eq!(result.unwrap_err().to_string(), + "InvalidArgumentError(123456789 is too large to store in a Decimal256 of precision 6. Max is 999999)"); + } } From 264c29b3caf4dd140cc49dc192038c20069ccda2 Mon Sep 17 00:00:00 2001 From: himadripal Date: Wed, 4 Dec 2024 17:17:29 -0800 Subject: [PATCH 2/3] fix review comments and fix formatting. --- arrow-cast/src/cast/decimal.rs | 64 +++++++++++++--------------------- arrow-cast/src/cast/mod.rs | 52 ++++++++++++--------------- 2 files changed, 46 insertions(+), 70 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 7f1d88129d6d..1793c9b3a282 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -111,20 +111,13 @@ where O::Native::from_decimal(adjusted) }; - Ok(match cast_options.safe { - true => { - array.unary_opt(|x| { - f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision)) - }) - } - false => { - array.try_unary(|x| { - f(x).ok_or_else(|| error(x)) - .and_then(|v|{ - O:: validate_decimal_precision(v, output_precision).map(|_| v) - }) - })? - } + Ok(if cast_options.safe { + array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision))) + } else { + array.try_unary(|x| { + f(x).ok_or_else(|| error(x)) + .and_then(|v| O::validate_decimal_precision(v, output_precision).map(|_| v)) + })? }) } @@ -146,23 +139,15 @@ where .unwrap() .pow_checked((output_scale - input_scale) as u32)?; - let f = |x| - O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok()); + let f = |x| O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok()); - Ok(match cast_options.safe { - true => { - array.unary_opt(|x| { - f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision)) - }) - } - false => { - array.try_unary(|x| { - f(x).ok_or_else(|| error(x)) - .and_then(|v|{ - O:: validate_decimal_precision(v, output_precision).map(|_| v) - }) - })? - } + Ok(if cast_options.safe { + array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision))) + } else { + array.try_unary(|x| { + f(x).ok_or_else(|| error(x)) + .and_then(|v| O::validate_decimal_precision(v, output_precision).map(|_| v)) + })? }) } @@ -178,8 +163,7 @@ where T: DecimalType, T::Native: DecimalCast + ArrowNativeTypeOp, { - let array: PrimitiveArray = match input_scale.cmp(&output_scale) { - Ordering::Equal | Ordering::Less => { + let array: PrimitiveArray = if input_scale <= output_scale { // input_scale <= output_scale // the scale doesn't change, but precision may change and cause overflow convert_to_bigger_or_equal_scale_decimal::( @@ -189,14 +173,14 @@ where output_scale, cast_options, )? - } - Ordering::Greater => convert_to_smaller_scale_decimal::( - array, - input_scale, - output_precision, - output_scale, - cast_options, - )? + } else { + convert_to_smaller_scale_decimal::( + array, + input_scale, + output_precision, + output_scale, + cast_options, + )? }; Ok(Arc::new(array.with_precision_and_scale( diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 6dd2cd28fc5e..2be257ea7e11 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -2681,13 +2681,13 @@ mod tests { // negative test let array = vec![Some(123456), None]; let array = create_decimal_array(array, 10, 0).unwrap(); - let result = cast(&array, &DataType::Decimal128(2, 2)); - assert!(result.is_ok()); - let array = result.unwrap(); - let array: &Decimal128Array = array.as_primitive(); - let err = array.validate_decimal_precision(2); + let result_safe = cast(&array, &DataType::Decimal128(2, 2)); + assert!(result_safe.is_ok()); + let options = CastOptions { safe: false, ..Default::default() }; + + let result_unsafe = cast_with_options(&array, &DataType::Decimal128(2, 2), &options); assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal128 of precision 2. Max is 99", - err.unwrap_err().to_string()); + result_unsafe.unwrap_err().to_string()); } #[test] @@ -8388,7 +8388,7 @@ mod tests { let input_type = DataType::Decimal128(10, 3); let output_type = DataType::Decimal256(10, 5); assert!(can_cast_types(&input_type, &output_type)); - let array = vec![Some(i128::MAX), Some(i128::MIN)]; + let array = vec![Some(123456), Some(-123456)]; let input_decimal_array = create_decimal_array(array, 10, 3).unwrap(); let array = Arc::new(input_decimal_array) as ArrayRef; @@ -8398,8 +8398,8 @@ mod tests { Decimal256Array, &output_type, vec![ - Some(i256::from_i128(i128::MAX).mul_wrapping(hundred)), - Some(i256::from_i128(i128::MIN).mul_wrapping(hundred)) + Some(i256::from_i128(123456).mul_wrapping(hundred)), + Some(i256::from_i128(-123456).mul_wrapping(hundred)) ] ); } @@ -9829,70 +9829,62 @@ mod tests { } #[test] - fn test_decimal_to_decimal_throw_error_on_precision_overflow_same_scale(){ + fn test_decimal_to_decimal_throw_error_on_precision_overflow_same_scale() { let array = vec![Some(123456789)]; let array = create_decimal_array(array, 24, 2).unwrap(); println!("{:?}", array); let input_type = DataType::Decimal128(24, 2); let output_type = DataType::Decimal128(6, 2); assert!(can_cast_types(&input_type, &output_type)); - let mut options = CastOptions::default(); - options.safe = false; + let options = CastOptions { safe: false, ..Default::default() }; let result = cast_with_options(&array, &output_type, &options); - assert_eq!(result.unwrap_err().to_string(), - "InvalidArgumentError(123456789 is too large to store in a Decimal128 of precision 6. Max is 999999)"); + "Invalid argument error: 123456789 is too large to store in a Decimal128 of precision 6. Max is 999999"); } #[test] - fn test_decimal_to_decimal_throw_error_on_precision_overflow_lower_scale(){ + fn test_decimal_to_decimal_throw_error_on_precision_overflow_lower_scale() { let array = vec![Some(123456789)]; let array = create_decimal_array(array, 24, 2).unwrap(); println!("{:?}", array); let input_type = DataType::Decimal128(24, 4); let output_type = DataType::Decimal128(6, 2); assert!(can_cast_types(&input_type, &output_type)); - let mut options = CastOptions::default(); - options.safe = false; + let options = CastOptions { safe: false, ..Default::default() }; let result = cast_with_options(&array, &output_type, &options); - assert_eq!(result.unwrap_err().to_string(), - "InvalidArgumentError(123456789 is too large to store in a Decimal128 of precision 6. Max is 999999)"); + "Invalid argument error: 123456789 is too large to store in a Decimal128 of precision 6. Max is 999999"); } #[test] - fn test_decimal_to_decimal_throw_error_on_precision_overflow_greater_scale(){ + fn test_decimal_to_decimal_throw_error_on_precision_overflow_greater_scale() { let array = vec![Some(123456789)]; let array = create_decimal_array(array, 24, 2).unwrap(); println!("{:?}", array); let input_type = DataType::Decimal128(24, 2); let output_type = DataType::Decimal128(6, 3); assert!(can_cast_types(&input_type, &output_type)); - let mut options = CastOptions::default(); - options.safe = false; + let options = CastOptions { safe: false, ..Default::default() }; let result = cast_with_options(&array, &output_type, &options); - assert_eq!(result.unwrap_err().to_string(), - "InvalidArgumentError(123456789 is too large to store in a Decimal128 of precision 6. Max is 999999)"); + "Invalid argument error: 1234567890 is too large to store in a Decimal128 of precision 6. Max is 999999"); } #[test] - fn test_decimal_to_decimal_throw_error_on_precision_overflow_diff_type(){ + fn test_decimal_to_decimal_throw_error_on_precision_overflow_diff_type() { let array = vec![Some(123456789)]; let array = create_decimal_array(array, 24, 2).unwrap(); println!("{:?}", array); let input_type = DataType::Decimal128(24, 2); let output_type = DataType::Decimal256(6, 2); assert!(can_cast_types(&input_type, &output_type)); - let mut options = CastOptions::default(); - options.safe = false; - - let result = cast_with_options(&array, &output_type, &options); + let options = CastOptions { safe: false, ..Default::default() }; + let result = cast_with_options(&array, &output_type, &options); assert_eq!(result.unwrap_err().to_string(), - "InvalidArgumentError(123456789 is too large to store in a Decimal256 of precision 6. Max is 999999)"); + "Invalid argument error: 123456789 is too large to store in a Decimal256 of precision 6. Max is 999999"); } } From 6c50fe3a4f74454f3c09dca03210d282fc050ede Mon Sep 17 00:00:00 2001 From: himadripal Date: Wed, 4 Dec 2024 21:46:41 -0800 Subject: [PATCH 3/3] for simple case of equal scale and bigger precision, no conversion needed. revert whitespace changes formatting check --- arrow-cast/src/cast/decimal.rs | 11 +++++++---- arrow-cast/src/cast/mod.rs | 35 +++++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 1793c9b3a282..ba82ca9040c7 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -154,6 +154,7 @@ where // Only support one type of decimal cast operations pub(crate) fn cast_decimal_to_decimal_same_type( array: &PrimitiveArray, + input_precision: u8, input_scale: i8, output_precision: u8, output_scale: i8, @@ -163,8 +164,10 @@ where T: DecimalType, T::Native: DecimalCast + ArrowNativeTypeOp, { - let array: PrimitiveArray = if input_scale <= output_scale { - // input_scale <= output_scale + let array: PrimitiveArray = + if input_scale == output_scale && input_precision <= output_precision { + array.clone() + } else if input_scale < output_scale { // the scale doesn't change, but precision may change and cause overflow convert_to_bigger_or_equal_scale_decimal::( array, @@ -174,14 +177,14 @@ where cast_options, )? } else { - convert_to_smaller_scale_decimal::( + convert_to_smaller_scale_decimal::( array, input_scale, output_precision, output_scale, cast_options, )? - }; + }; Ok(Arc::new(array.with_precision_and_scale( output_precision, diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 2be257ea7e11..d0216cd472a1 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -824,18 +824,20 @@ pub fn cast_with_options( (Map(_, ordered1), Map(_, ordered2)) if ordered1 == ordered2 => { cast_map_values(array.as_map(), to_type, cast_options, ordered1.to_owned()) } - (Decimal128(_, s1), Decimal128(p2, s2)) => { + (Decimal128(p1, s1), Decimal128(p2, s2)) => { cast_decimal_to_decimal_same_type::( array.as_primitive(), + *p1, *s1, *p2, *s2, cast_options, ) } - (Decimal256(_, s1), Decimal256(p2, s2)) => { + (Decimal256(p1, s1), Decimal256(p2, s2)) => { cast_decimal_to_decimal_same_type::( array.as_primitive(), + *p1, *s1, *p2, *s2, @@ -2683,7 +2685,10 @@ mod tests { let array = create_decimal_array(array, 10, 0).unwrap(); let result_safe = cast(&array, &DataType::Decimal128(2, 2)); assert!(result_safe.is_ok()); - let options = CastOptions { safe: false, ..Default::default() }; + let options = CastOptions { + safe: false, + ..Default::default() + }; let result_unsafe = cast_with_options(&array, &DataType::Decimal128(2, 2), &options); assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal128 of precision 2. Max is 99", @@ -9837,10 +9842,13 @@ mod tests { let output_type = DataType::Decimal128(6, 2); assert!(can_cast_types(&input_type, &output_type)); - let options = CastOptions { safe: false, ..Default::default() }; + let options = CastOptions { + safe: false, + ..Default::default() + }; let result = cast_with_options(&array, &output_type, &options); assert_eq!(result.unwrap_err().to_string(), - "Invalid argument error: 123456789 is too large to store in a Decimal128 of precision 6. Max is 999999"); + "Invalid argument error: 123456790 is too large to store in a Decimal128 of precision 6. Max is 999999"); } #[test] @@ -9852,10 +9860,13 @@ mod tests { let output_type = DataType::Decimal128(6, 2); assert!(can_cast_types(&input_type, &output_type)); - let options = CastOptions { safe: false, ..Default::default() }; + let options = CastOptions { + safe: false, + ..Default::default() + }; let result = cast_with_options(&array, &output_type, &options); assert_eq!(result.unwrap_err().to_string(), - "Invalid argument error: 123456789 is too large to store in a Decimal128 of precision 6. Max is 999999"); + "Invalid argument error: 123456790 is too large to store in a Decimal128 of precision 6. Max is 999999"); } #[test] @@ -9867,7 +9878,10 @@ mod tests { let output_type = DataType::Decimal128(6, 3); assert!(can_cast_types(&input_type, &output_type)); - let options = CastOptions { safe: false, ..Default::default() }; + let options = CastOptions { + safe: false, + ..Default::default() + }; let result = cast_with_options(&array, &output_type, &options); assert_eq!(result.unwrap_err().to_string(), "Invalid argument error: 1234567890 is too large to store in a Decimal128 of precision 6. Max is 999999"); @@ -9882,7 +9896,10 @@ mod tests { let output_type = DataType::Decimal256(6, 2); assert!(can_cast_types(&input_type, &output_type)); - let options = CastOptions { safe: false, ..Default::default() }; + let options = CastOptions { + safe: false, + ..Default::default() + }; let result = cast_with_options(&array, &output_type, &options); assert_eq!(result.unwrap_err().to_string(), "Invalid argument error: 123456789 is too large to store in a Decimal256 of precision 6. Max is 999999");