From 1c5774f80b500f43448c109225a0cad34d9b5ff0 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 4 Nov 2022 17:53:14 -0700 Subject: [PATCH 1/6] Check overflow while casting floating point value to decimal128 --- arrow-cast/src/cast.rs | 76 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index a3abe545d529..df4e0fceeee5 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -45,6 +45,10 @@ use arrow_array::{ types::*, *, }; use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer}; +use arrow_data::decimal::{ + DECIMAL128_MAX_PRECISION, MAX_DECIMAL_FOR_EACH_PRECISION, + MIN_DECIMAL_FOR_EACH_PRECISION, +}; use arrow_data::ArrayData; use arrow_schema::*; use arrow_select::take::take; @@ -344,16 +348,58 @@ fn cast_floating_point_to_decimal128( array: &PrimitiveArray, precision: u8, scale: u8, + cast_options: &CastOptions, ) -> Result where ::Native: AsPrimitive, { let mul = 10_f64.powi(scale as i32); - array - .unary::<_, Decimal128Type>(|v| (v.as_() * mul).round() as i128) - .with_precision_and_scale(precision, scale) - .map(|a| Arc::new(a) as ArrayRef) + if cast_options.safe { + let iter = array.iter().map(|v| { + v.and_then(|v| { + let mul_v = (mul * v.as_()).round() as i128; + if precision <= DECIMAL128_MAX_PRECISION + && (mul_v > MAX_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1] + || mul_v < MIN_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1]) + { + None + } else { + Some(mul_v) + } + }) + }); + let casted_array = + unsafe { PrimitiveArray::::from_trusted_len_iter(iter) }; + casted_array + .with_precision_and_scale(precision, scale) + .map(|a| Arc::new(a) as ArrayRef) + } else { + array + .try_unary::<_, Decimal128Type, _>(|v| { + mul.mul_checked(v.as_()) + .map(|f| f.round() as i128) + .and_then(|v| { + if precision <= DECIMAL128_MAX_PRECISION + && (v > MAX_DECIMAL_FOR_EACH_PRECISION + [precision as usize - 1] + || v < MIN_DECIMAL_FOR_EACH_PRECISION + [precision as usize - 1]) + { + Err(ArrowError::CastError(format!( + "Cannot cast to {:?}({}, {}). The scale causes overflow.", + Decimal128Type::PREFIX, + precision, + scale, + ))) + } else { + Ok(v) + } + }) + }) + .and_then(|a| a.with_precision_and_scale(precision, scale)) + .map(|a| Arc::new(a) as ArrayRef) + } } fn cast_floating_point_to_decimal256( @@ -588,11 +634,13 @@ pub fn cast_with_options( as_primitive_array::(array), *precision, *scale, + cast_options, ), Float64 => cast_floating_point_to_decimal128( as_primitive_array::(array), *precision, *scale, + cast_options, ), Null => Ok(new_null_array(to_type, array.len())), _ => Err(ArrowError::CastError(format!( @@ -6110,4 +6158,24 @@ mod tests { ); assert!(casted_array.is_err()); } + + #[test] + fn test_cast_floating_point_to_decimal128_overflow() { + let array = Float64Array::from(vec![100e10]); + let array = Arc::new(array) as ArrayRef; + let casted_array = cast_with_options( + &array, + &DataType::Decimal128(38, 30), + &CastOptions { safe: true }, + ); + assert!(casted_array.is_ok()); + assert!(casted_array.unwrap().is_null(0)); + + let casted_array = cast_with_options( + &array, + &DataType::Decimal128(38, 30), + &CastOptions { safe: false }, + ); + assert!(casted_array.is_err()); + } } From c82b0fede4fb8fbe3122982b159a3bae5700158c Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 4 Nov 2022 22:05:46 -0700 Subject: [PATCH 2/6] Don't validate with precision --- arrow-cast/src/cast.rs | 57 +++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index df4e0fceeee5..27ae241970b2 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -45,10 +45,6 @@ use arrow_array::{ types::*, *, }; use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer}; -use arrow_data::decimal::{ - DECIMAL128_MAX_PRECISION, MAX_DECIMAL_FOR_EACH_PRECISION, - MIN_DECIMAL_FOR_EACH_PRECISION, -}; use arrow_data::ArrayData; use arrow_schema::*; use arrow_select::take::take; @@ -358,14 +354,11 @@ where if cast_options.safe { let iter = array.iter().map(|v| { v.and_then(|v| { - let mul_v = (mul * v.as_()).round() as i128; - if precision <= DECIMAL128_MAX_PRECISION - && (mul_v > MAX_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1] - || mul_v < MIN_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1]) - { + let mul_v = (mul * v.as_()).round(); + if mul_v == f64::INFINITY || mul_v == f64::NEG_INFINITY { None } else { - Some(mul_v) + Some(mul_v as i128) } }) }); @@ -377,25 +370,20 @@ where } else { array .try_unary::<_, Decimal128Type, _>(|v| { - mul.mul_checked(v.as_()) - .map(|f| f.round() as i128) - .and_then(|v| { - if precision <= DECIMAL128_MAX_PRECISION - && (v > MAX_DECIMAL_FOR_EACH_PRECISION - [precision as usize - 1] - || v < MIN_DECIMAL_FOR_EACH_PRECISION - [precision as usize - 1]) - { - Err(ArrowError::CastError(format!( - "Cannot cast to {:?}({}, {}). The scale causes overflow.", - Decimal128Type::PREFIX, - precision, - scale, - ))) - } else { - Ok(v) - } - }) + mul.mul_checked(v.as_()).and_then(|value| { + let mul_v = value.round(); + if mul_v == f64::INFINITY || mul_v == f64::NEG_INFINITY { + Err(ArrowError::CastError(format!( + "Cannot cast to {}({}, {}). Overflowing on {:?}", + Decimal128Type::PREFIX, + precision, + scale, + v + ))) + } else { + Ok(mul_v as i128) + } + }) }) .and_then(|a| a.with_precision_and_scale(precision, scale)) .map(|a| Arc::new(a) as ArrayRef) @@ -6161,7 +6149,7 @@ mod tests { #[test] fn test_cast_floating_point_to_decimal128_overflow() { - let array = Float64Array::from(vec![100e10]); + let array = Float64Array::from(vec![f64::MAX]); let array = Arc::new(array) as ArrayRef; let casted_array = cast_with_options( &array, @@ -6176,6 +6164,13 @@ mod tests { &DataType::Decimal128(38, 30), &CastOptions { safe: false }, ); - assert!(casted_array.is_err()); + let err = casted_array.unwrap_err().to_string(); + let expected_error = "Cast error: Cannot cast to Decimal128(38, 30)"; + assert!( + err.contains(expected_error), + "did not find expected error '{}' in actual error '{}'", + expected_error, + err + ); } } From db032ac79e0b6a0a57f878066ca2cbc22f16f7a2 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 5 Nov 2022 15:24:37 -0700 Subject: [PATCH 3/6] Return error when saturating --- arrow-cast/src/cast.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 27ae241970b2..e7d0f30af465 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -381,7 +381,18 @@ where v ))) } else { - Ok(mul_v as i128) + let integer = mul_v as i128; + if integer == i128::MAX || integer == i128::MIN { + Err(ArrowError::CastError(format!( + "Cannot cast to {}({}, {}). Overflowing on {:?}", + Decimal128Type::PREFIX, + precision, + scale, + v + ))) + } else { + Ok(mul_v as i128) + } } }) }) From 8282be3122a87efafeceb09208f958b3c3c9a6aa Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 5 Nov 2022 18:46:37 -0700 Subject: [PATCH 4/6] Use to_i128 --- arrow-cast/src/cast.rs | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index e7d0f30af465..93b571a76719 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -372,28 +372,17 @@ where .try_unary::<_, Decimal128Type, _>(|v| { mul.mul_checked(v.as_()).and_then(|value| { let mul_v = value.round(); - if mul_v == f64::INFINITY || mul_v == f64::NEG_INFINITY { - Err(ArrowError::CastError(format!( + let integer: i128 = mul_v.to_i128().ok_or_else(|| { + ArrowError::CastError(format!( "Cannot cast to {}({}, {}). Overflowing on {:?}", Decimal128Type::PREFIX, precision, scale, v - ))) - } else { - let integer = mul_v as i128; - if integer == i128::MAX || integer == i128::MIN { - Err(ArrowError::CastError(format!( - "Cannot cast to {}({}, {}). Overflowing on {:?}", - Decimal128Type::PREFIX, - precision, - scale, - v - ))) - } else { - Ok(mul_v as i128) - } - } + )) + })?; + + Ok(integer) }) }) .and_then(|a| a.with_precision_and_scale(precision, scale)) From 826979bc1716353f1d499dd9ea8a2ace0e619a50 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 5 Nov 2022 21:55:06 -0700 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-cast/src/cast.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 93b571a76719..6069f2b11e07 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -354,12 +354,7 @@ where if cast_options.safe { let iter = array.iter().map(|v| { v.and_then(|v| { - let mul_v = (mul * v.as_()).round(); - if mul_v == f64::INFINITY || mul_v == f64::NEG_INFINITY { - None - } else { - Some(mul_v as i128) - } + (mul * v.as_()).round().to_i128() }) }); let casted_array = From 47542eac938b5905b85218b35078ad95b8323f9f Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 5 Nov 2022 22:02:25 -0700 Subject: [PATCH 6/6] Fix format --- arrow-cast/src/cast.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 6069f2b11e07..3e23a059bf3e 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -352,11 +352,9 @@ where let mul = 10_f64.powi(scale as i32); if cast_options.safe { - let iter = array.iter().map(|v| { - v.and_then(|v| { - (mul * v.as_()).round().to_i128() - }) - }); + let iter = array + .iter() + .map(|v| v.and_then(|v| (mul * v.as_()).round().to_i128())); let casted_array = unsafe { PrimitiveArray::::from_trusted_len_iter(iter) }; casted_array