From 1c5774f80b500f43448c109225a0cad34d9b5ff0 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 4 Nov 2022 17:53:14 -0700
Subject: [PATCH 1/6] Check overflow while casting floating point value to
 decimal128

---
 arrow-cast/src/cast.rs | 76 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 72 insertions(+), 4 deletions(-)
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index a3abe545d529..df4e0fceeee5 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -45,6 +45,10 @@ use arrow_array::{
     types::*, *,
 };
 use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer};
+use arrow_data::decimal::{
+    DECIMAL128_MAX_PRECISION, MAX_DECIMAL_FOR_EACH_PRECISION,
+    MIN_DECIMAL_FOR_EACH_PRECISION,
+};
 use arrow_data::ArrayData;
 use arrow_schema::*;
 use arrow_select::take::take;
@@ -344,16 +348,58 @@ fn cast_floating_point_to_decimal128<T: ArrowPrimitiveType>(
     array: &PrimitiveArray<T>,
     precision: u8,
     scale: u8,
+    cast_options: &CastOptions,
 ) -> Result<ArrayRef, ArrowError>
 where
     <T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
 {
     let mul = 10_f64.powi(scale as i32);
 
-    array
-        .unary::<_, Decimal128Type>(|v| (v.as_() * mul).round() as i128)
-        .with_precision_and_scale(precision, scale)
-        .map(|a| Arc::new(a) as ArrayRef)
+    if cast_options.safe {
+        let iter = array.iter().map(|v| {
+            v.and_then(|v| {
+                let mul_v = (mul * v.as_()).round() as i128;
+                if precision <= DECIMAL128_MAX_PRECISION
+                    && (mul_v > MAX_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1]
+                        || mul_v < MIN_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1])
+                {
+                    None
+                } else {
+                    Some(mul_v)
+                }
+            })
+        });
+        let casted_array =
+            unsafe { PrimitiveArray::<Decimal128Type>::from_trusted_len_iter(iter) };
+        casted_array
+            .with_precision_and_scale(precision, scale)
+            .map(|a| Arc::new(a) as ArrayRef)
+    } else {
+        array
+            .try_unary::<_, Decimal128Type, _>(|v| {
+                mul.mul_checked(v.as_())
+                    .map(|f| f.round() as i128)
+                    .and_then(|v| {
+                        if precision <= DECIMAL128_MAX_PRECISION
+                            && (v > MAX_DECIMAL_FOR_EACH_PRECISION
+                                [precision as usize - 1]
+                                || v < MIN_DECIMAL_FOR_EACH_PRECISION
+                                    [precision as usize - 1])
+                        {
+                            Err(ArrowError::CastError(format!(
+                                "Cannot cast to {:?}({}, {}). The scale causes overflow.",
+                                Decimal128Type::PREFIX,
+                                precision,
+                                scale,
+                            )))
+                        } else {
+                            Ok(v)
+                        }
+                    })
+            })
+            .and_then(|a| a.with_precision_and_scale(precision, scale))
+            .map(|a| Arc::new(a) as ArrayRef)
+    }
 }
 
 fn cast_floating_point_to_decimal256<T: ArrowPrimitiveType>(
@@ -588,11 +634,13 @@ pub fn cast_with_options(
                     as_primitive_array::<Float32Type>(array),
                     *precision,
                     *scale,
+                    cast_options,
                 ),
                 Float64 => cast_floating_point_to_decimal128(
                     as_primitive_array::<Float64Type>(array),
                     *precision,
                     *scale,
+                    cast_options,
                 ),
                 Null => Ok(new_null_array(to_type, array.len())),
                 _ => Err(ArrowError::CastError(format!(
@@ -6110,4 +6158,24 @@ mod tests {
         );
         assert!(casted_array.is_err());
     }
+
+    #[test]
+    fn test_cast_floating_point_to_decimal128_overflow() {
+        let array = Float64Array::from(vec![100e10]);
+        let array = Arc::new(array) as ArrayRef;
+        let casted_array = cast_with_options(
+            &array,
+            &DataType::Decimal128(38, 30),
+            &CastOptions { safe: true },
+        );
+        assert!(casted_array.is_ok());
+        assert!(casted_array.unwrap().is_null(0));
+
+        let casted_array = cast_with_options(
+            &array,
+            &DataType::Decimal128(38, 30),
+            &CastOptions { safe: false },
+        );
+        assert!(casted_array.is_err());
+    }
 }

From c82b0fede4fb8fbe3122982b159a3bae5700158c Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 4 Nov 2022 22:05:46 -0700
Subject: [PATCH 2/6] Don't validate with precision

---
 arrow-cast/src/cast.rs | 57 +++++++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 31 deletions(-)

diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index df4e0fceeee5..27ae241970b2 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -45,10 +45,6 @@ use arrow_array::{
     types::*, *,
 };
 use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer};
-use arrow_data::decimal::{
-    DECIMAL128_MAX_PRECISION, MAX_DECIMAL_FOR_EACH_PRECISION,
-    MIN_DECIMAL_FOR_EACH_PRECISION,
-};
 use arrow_data::ArrayData;
 use arrow_schema::*;
 use arrow_select::take::take;
@@ -358,14 +354,11 @@ where
     if cast_options.safe {
         let iter = array.iter().map(|v| {
             v.and_then(|v| {
-                let mul_v = (mul * v.as_()).round() as i128;
-                if precision <= DECIMAL128_MAX_PRECISION
-                    && (mul_v > MAX_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1]
-                        || mul_v < MIN_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1])
-                {
+                let mul_v = (mul * v.as_()).round();
+                if mul_v == f64::INFINITY || mul_v == f64::NEG_INFINITY {
                     None
                 } else {
-                    Some(mul_v)
+                    Some(mul_v as i128)
                 }
             })
         });
@@ -377,25 +370,20 @@ where
     } else {
         array
             .try_unary::<_, Decimal128Type, _>(|v| {
-                mul.mul_checked(v.as_())
-                    .map(|f| f.round() as i128)
-                    .and_then(|v| {
-                        if precision <= DECIMAL128_MAX_PRECISION
-                            && (v > MAX_DECIMAL_FOR_EACH_PRECISION
-                                [precision as usize - 1]
-                                || v < MIN_DECIMAL_FOR_EACH_PRECISION
-                                    [precision as usize - 1])
-                        {
-                            Err(ArrowError::CastError(format!(
-                                "Cannot cast to {:?}({}, {}). The scale causes overflow.",
-                                Decimal128Type::PREFIX,
-                                precision,
-                                scale,
-                            )))
-                        } else {
-                            Ok(v)
-                        }
-                    })
+                mul.mul_checked(v.as_()).and_then(|value| {
+                    let mul_v = value.round();
+                    if mul_v == f64::INFINITY || mul_v == f64::NEG_INFINITY {
+                        Err(ArrowError::CastError(format!(
+                            "Cannot cast to {}({}, {}). Overflowing on {:?}",
+                            Decimal128Type::PREFIX,
+                            precision,
+                            scale,
+                            v
+                        )))
+                    } else {
+                        Ok(mul_v as i128)
+                    }
+                })
             })
             .and_then(|a| a.with_precision_and_scale(precision, scale))
             .map(|a| Arc::new(a) as ArrayRef)
@@ -6161,7 +6149,7 @@ mod tests {
 
     #[test]
     fn test_cast_floating_point_to_decimal128_overflow() {
-        let array = Float64Array::from(vec![100e10]);
+        let array = Float64Array::from(vec![f64::MAX]);
         let array = Arc::new(array) as ArrayRef;
         let casted_array = cast_with_options(
             &array,
@@ -6176,6 +6164,13 @@ mod tests {
             &DataType::Decimal128(38, 30),
             &CastOptions { safe: false },
         );
-        assert!(casted_array.is_err());
+        let err = casted_array.unwrap_err().to_string();
+        let expected_error = "Cast error: Cannot cast to Decimal128(38, 30)";
+        assert!(
+            err.contains(expected_error),
+            "did not find expected error '{}' in actual error '{}'",
+            expected_error,
+            err
+        );
     }
 }

From db032ac79e0b6a0a57f878066ca2cbc22f16f7a2 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 5 Nov 2022 15:24:37 -0700
Subject: [PATCH 3/6] Return error when saturating

---
 arrow-cast/src/cast.rs | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 27ae241970b2..e7d0f30af465 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -381,7 +381,18 @@ where
                             v
                         )))
                     } else {
-                        Ok(mul_v as i128)
+                        let integer = mul_v as i128;
+                        if integer == i128::MAX || integer == i128::MIN {
+                            Err(ArrowError::CastError(format!(
+                                "Cannot cast to {}({}, {}). Overflowing on {:?}",
+                                Decimal128Type::PREFIX,
+                                precision,
+                                scale,
+                                v
+                            )))
+                        } else {
+                            Ok(mul_v as i128)
+                        }
                     }
                 })
             })

From 8282be3122a87efafeceb09208f958b3c3c9a6aa Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 5 Nov 2022 18:46:37 -0700
Subject: [PATCH 4/6] Use to_i128

---
 arrow-cast/src/cast.rs | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index e7d0f30af465..93b571a76719 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -372,28 +372,17 @@ where
             .try_unary::<_, Decimal128Type, _>(|v| {
                 mul.mul_checked(v.as_()).and_then(|value| {
                     let mul_v = value.round();
-                    if mul_v == f64::INFINITY || mul_v == f64::NEG_INFINITY {
-                        Err(ArrowError::CastError(format!(
+                    let integer: i128 = mul_v.to_i128().ok_or_else(|| {
+                        ArrowError::CastError(format!(
                             "Cannot cast to {}({}, {}). Overflowing on {:?}",
                             Decimal128Type::PREFIX,
                             precision,
                             scale,
                             v
-                        )))
-                    } else {
-                        let integer = mul_v as i128;
-                        if integer == i128::MAX || integer == i128::MIN {
-                            Err(ArrowError::CastError(format!(
-                                "Cannot cast to {}({}, {}). Overflowing on {:?}",
-                                Decimal128Type::PREFIX,
-                                precision,
-                                scale,
-                                v
-                            )))
-                        } else {
-                            Ok(mul_v as i128)
-                        }
-                    }
+                        ))
+                    })?;
+
+                    Ok(integer)
                 })
             })
             .and_then(|a| a.with_precision_and_scale(precision, scale))

From 826979bc1716353f1d499dd9ea8a2ace0e619a50 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 5 Nov 2022 21:55:06 -0700
Subject: [PATCH 5/6] Apply suggestions from code review

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
---
 arrow-cast/src/cast.rs | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 93b571a76719..6069f2b11e07 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -354,12 +354,7 @@ where
     if cast_options.safe {
         let iter = array.iter().map(|v| {
             v.and_then(|v| {
-                let mul_v = (mul * v.as_()).round();
-                if mul_v == f64::INFINITY || mul_v == f64::NEG_INFINITY {
-                    None
-                } else {
-                    Some(mul_v as i128)
-                }
+                (mul * v.as_()).round().to_i128()
             })
         });
         let casted_array =

From 47542eac938b5905b85218b35078ad95b8323f9f Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 5 Nov 2022 22:02:25 -0700
Subject: [PATCH 6/6] Fix format

---
 arrow-cast/src/cast.rs | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 6069f2b11e07..3e23a059bf3e 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -352,11 +352,9 @@ where
     let mul = 10_f64.powi(scale as i32);
 
     if cast_options.safe {
-        let iter = array.iter().map(|v| {
-            v.and_then(|v| {
-                (mul * v.as_()).round().to_i128()
-            })
-        });
+        let iter = array
+            .iter()
+            .map(|v| v.and_then(|v| (mul * v.as_()).round().to_i128()));
         let casted_array =
             unsafe { PrimitiveArray::<Decimal128Type>::from_trusted_len_iter(iter) };
         casted_array