apache · viirya · Mar 16, 2023 · Mar 1, 2023 · Feb 13, 2023 · Feb 14, 2023
diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs
@@ -26,6 +26,8 @@ use crate::arity::*;
 use arrow_array::cast::*;
 use arrow_array::types::*;
 use arrow_array::*;
+use arrow_buffer::i256;
+use arrow_buffer::ArrowNativeType;
 use arrow_schema::*;
 use num::traits::Pow;
 use std::sync::Arc;
@@ -61,7 +63,7 @@ fn math_checked_op<LT, RT, F>(
 where
     LT: ArrowNumericType,
     RT: ArrowNumericType,
-    F: Fn(LT::Native, RT::Native) -> Result<LT::Native, ArrowError>,
+    F: FnMut(LT::Native, RT::Native) -> Result<LT::Native, ArrowError>,
 {
     try_binary(left, right, op)
 }
@@ -1165,6 +1167,77 @@ pub fn multiply_dyn_checked(
     }
 }
 
+/// Perform `left * right` operation on two decimal arrays. If either left or right value is
+/// null then the result is also null.
+///
+/// This performs decimal multiplication which allows precision loss if an exact representation
+/// is not possible for the result, according to the required scale. In the case, the result
+/// will be rounded to the required scale.
+///
+/// It is implemented for compatibility with precision loss `multiply` function provided by
+/// other data processing engines. For multiplication with precision loss detection, use
+/// `multiply` or `multiply_checked` instead.
+pub fn mul_fixed_point_checked(
+    left: &PrimitiveArray<Decimal128Type>,
+    right: &PrimitiveArray<Decimal128Type>,
+    required_scale: i8,
+) -> Result<ArrayRef, ArrowError> {
+    let precision = left.precision();
+    let product_scale = left.scale() + right.scale();
+
+    try_binary::<_, _, _, Decimal128Type>(left, right, |a, b| {
+        let a = i256::from_i128(a);
+        let b = i256::from_i128(b);
+
+        a.checked_mul(b)
+            .map(|mut a| {
+                if required_scale < product_scale {
+                    let divisor = i256::from_i128(10)
+                        .pow_wrapping((product_scale - required_scale) as u32);
+                    a = divide_and_round::<Decimal256Type>(a, divisor);
+                }
+                a
+            })
+            .ok_or_else(|| {
+                ArrowError::ComputeError(format!(
+                    "Overflow happened on: {:?} * {:?}, {:?}",
+                    a,
+                    b,
+                    a.checked_mul(b)
+                ))
+            })
+            .and_then(|a| {
+                a.to_i128().ok_or_else(|| {
+                    ArrowError::ComputeError(format!("Overflow happened on: {:?}", a))
+                })
+            })
+    })
+    .and_then(|a| {
+        a.with_precision_and_scale(precision, required_scale)
+            .map(|a| Arc::new(a) as ArrayRef)
+    })
+}
+
+/// Divide a decimal native value by given divisor and round the result.
+fn divide_and_round<I>(input: I::Native, div: I::Native) -> I::Native
+where
+    I: DecimalType,
+    I::Native: ArrowNativeTypeOp,
+{
+    let d = input.div_wrapping(div);
+    let r = input.mod_wrapping(div);
+
+    let half = div.div_wrapping(I::Native::from_usize(2).unwrap());
+    let half_neg = half.neg_wrapping();
+
+    // Round result
+    match input >= I::Native::ZERO {
+        true if r >= half => d.add_wrapping(I::Native::ONE),
+        false if r <= half_neg => d.sub_wrapping(I::Native::ONE),
+        _ => d,
+    }
+}
+
 /// Multiply every value in an array by a scalar. If any value in the array is null then the
 /// result is also null.
 ///
@@ -3231,4 +3304,68 @@ mod tests {
 
         assert_eq!(&expected, &result);
     }
+
+    #[test]
+    fn test_decimal_multiply_allow_precision_loss() {
+        // Overflow happening as i128 cannot hold multiplying result.
+        let a = Decimal128Array::from(vec![123456789000000000000000000])
+            .with_precision_and_scale(38, 18)
+            .unwrap();
+
+        let b = Decimal128Array::from(vec![10000000000000000000])
+            .with_precision_and_scale(38, 18)
+            .unwrap();
+
+        let err = multiply_dyn_checked(&a, &b).unwrap_err();
+        assert!(err.to_string().contains(
+            "Overflow happened on: 123456789000000000000000000 * 10000000000000000000"
+        ));
+
+        // Allow precision loss.
+        let result = mul_fixed_point_checked(&a, &b, 28).unwrap();
+        let result = as_primitive_array::<Decimal128Type>(&result).clone();
+        let expected =
+            Decimal128Array::from(vec![12345678900000000000000000000000000000])
+                .with_precision_and_scale(38, 28)
+                .unwrap();
+
+        assert_eq!(&expected, &result);
+        assert_eq!(
+            result.value_as_string(0),
+            "1234567890.0000000000000000000000000000"
+        );
+
+        // Rounding case
+        let a = Decimal128Array::from(vec![
+            1,
+            123456789555555555555555555,
+            1555555555555555555,
+        ])
+        .with_precision_and_scale(38, 18)
+        .unwrap();
+
+        let b = Decimal128Array::from(vec![1555555555555555555, 11222222222222222222, 1])
+            .with_precision_and_scale(38, 18)
+            .unwrap();
+
+        let result = mul_fixed_point_checked(&a, &b, 28).unwrap();
+        let result = as_primitive_array::<Decimal128Type>(&result).clone();
+        let expected = Decimal128Array::from(vec![
+            15555555556,
+            13854595272345679012071330528765432099,
+            15555555556,
+        ])
+        .with_precision_and_scale(38, 28)
+        .unwrap();
+
+        assert_eq!(&expected, &result);
+
+        // Rounded the value "1385459527.234567901207133052876543209876543210".
+        assert_eq!(
+            result.value_as_string(1),
+            "1385459527.2345679012071330528765432099"
+        );
+        assert_eq!(result.value_as_string(0), "0.0000000000000000015555555556");
+        assert_eq!(result.value_as_string(2), "0.0000000000000000015555555556");
+    }
 }
diff --git a/arrow-arith/src/arity.rs b/arrow-arith/src/arity.rs
@@ -77,7 +77,7 @@ pub fn try_unary<I, F, O>(
 where
     I: ArrowPrimitiveType,
     O: ArrowPrimitiveType,
-    F: Fn(I::Native) -> Result<O::Native, ArrowError>,
+    F: FnMut(I::Native) -> Result<O::Native, ArrowError>,
 {
     array.try_unary(op)
 }
@@ -307,11 +307,11 @@ where
 pub fn try_binary<A: ArrayAccessor, B: ArrayAccessor, F, O>(
     a: A,
     b: B,
-    op: F,
+    mut op: F,
 ) -> Result<PrimitiveArray<O>, ArrowError>
 where
     O: ArrowPrimitiveType,
-    F: Fn(A::Item, B::Item) -> Result<O::Native, ArrowError>,
+    F: FnMut(A::Item, B::Item) -> Result<O::Native, ArrowError>,
 {
     if a.len() != b.len() {
         return Err(ArrowError::ComputeError(
@@ -431,11 +431,11 @@ fn try_binary_no_nulls<A: ArrayAccessor, B: ArrayAccessor, F, O>(
     len: usize,
     a: A,
     b: B,
-    op: F,
+    mut op: F,
 ) -> Result<PrimitiveArray<O>, ArrowError>
 where
     O: ArrowPrimitiveType,
-    F: Fn(A::Item, B::Item) -> Result<O::Native, ArrowError>,
+    F: FnMut(A::Item, B::Item) -> Result<O::Native, ArrowError>,
 {
     let mut buffer = MutableBuffer::new(len * O::get_byte_width());
     for idx in 0..len {

diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
@@ -491,10 +491,10 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
     /// be preferred if `op` is infallible.
     ///
     /// Note: LLVM is currently unable to effectively vectorize fallible operations
-    pub fn try_unary<F, O, E>(&self, op: F) -> Result<PrimitiveArray<O>, E>
+    pub fn try_unary<F, O, E>(&self, mut op: F) -> Result<PrimitiveArray<O>, E>
     where
         O: ArrowPrimitiveType,
-        F: Fn(T::Native) -> Result<O::Native, E>,
+        F: FnMut(T::Native) -> Result<O::Native, E>,
     {
         let data = self.data();
         let len = self.len();