apache · viirya · Jul 4, 2022 · Jul 5, 2022 · Jul 5, 2022 · Jul 5, 2022
diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::borrow::Borrow;
-use std::convert::{From, TryInto};
+use std::convert::From;
 use std::fmt;
 use std::{any::Any, iter::FromIterator};
 
@@ -32,7 +32,16 @@ use crate::datatypes::{
     DECIMAL_MAX_SCALE,
 };
 use crate::error::{ArrowError, Result};
-use crate::util::decimal::{BasicDecimal, Decimal128};
+use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};
+use std::marker::PhantomData;
+
+pub struct GenericDecimalArray<T: BasicDecimal, const VALUE_LENGTH: i32> {
+    data: ArrayData,
+    value_data: RawPtrBox<u8>,
+    precision: usize,
+    scale: usize,
+    phantom: PhantomData<T>,
+}
 
 /// `DecimalArray` stores fixed width decimal numbers,
 /// with a fixed precision and scale.
@@ -68,33 +77,47 @@ use crate::util::decimal::{BasicDecimal, Decimal128};
 ///    assert_eq!(6, decimal_array.scale());
 /// ```
 ///
-pub struct DecimalArray {
-    data: ArrayData,
-    value_data: RawPtrBox<u8>,
-    precision: usize,
-    scale: usize,
-}
+pub type DecimalArray = GenericDecimalArray<Decimal128, 16>;
+pub type Decimal256Array = GenericDecimalArray<Decimal256, 32>;
 
-impl DecimalArray {
-    const VALUE_LENGTH: i32 = 16;
+impl<T: BasicDecimal, const VALUE_LENGTH: i32> GenericDecimalArray<T, VALUE_LENGTH> {
+    fn new(
+        data: ArrayData,
+        value_data: RawPtrBox<u8>,
+        precision: usize,
+        scale: usize,
+    ) -> Self {
+        Self {
+            data,
+            value_data,
+            precision,
+            scale,
+            phantom: PhantomData,
+        }
+    }
+
+    /// Return the precision (total digits) that can be stored by this array
+    pub fn precision(&self) -> usize {
+        self.precision
+    }
+
+    /// Return the scale (digits after the decimal) that can be stored by this array
+    pub fn scale(&self) -> usize {
+        self.scale
+    }
 
     /// Returns the element at index `i`.
-    pub fn value(&self, i: usize) -> Decimal128 {
+    pub fn value(&self, i: usize) -> T {
         assert!(i < self.data.len(), "DecimalArray out of bounds access");
         let offset = i + self.data.offset();
         let raw_val = unsafe {
             let pos = self.value_offset_at(offset);
             std::slice::from_raw_parts(
                 self.value_data.as_ptr().offset(pos as isize),
-                Self::VALUE_LENGTH as usize,
+                VALUE_LENGTH as usize,
             )
         };
-        let as_array = raw_val.try_into().unwrap();
-        Decimal128::new_from_i128(
-            self.precision,
-            self.scale,
-            i128::from_le_bytes(as_array),
-        )
+        T::new(self.precision(), self.scale(), raw_val)
     }
 
     /// Returns the offset for the element at index `i`.
@@ -110,7 +133,7 @@ impl DecimalArray {
     /// All elements have the same length as the array is a fixed size.
     #[inline]
     pub const fn value_length(&self) -> i32 {
-        Self::VALUE_LENGTH
+        VALUE_LENGTH
     }
 
     /// Returns a clone of the value data buffer
@@ -120,7 +143,7 @@ impl DecimalArray {
 
     #[inline]
     fn value_offset_at(&self, i: usize) -> i32 {
-        Self::VALUE_LENGTH * i as i32
+        VALUE_LENGTH * i as i32
     }
 
     #[inline]
@@ -157,7 +180,9 @@ impl DecimalArray {
         let array_data = unsafe { builder.build_unchecked() };
         Self::from(array_data)
     }
+}
 
+impl DecimalArray {
     /// Creates a [DecimalArray] with default precision and scale,
     /// based on an iterator of `i128` values without nulls
     pub fn from_iter_values<I: IntoIterator<Item = i128>>(iter: I) -> Self {
@@ -176,16 +201,6 @@ impl DecimalArray {
         DecimalArray::from(data)
     }
 
-    /// Return the precision (total digits) that can be stored by this array
-    pub fn precision(&self) -> usize {
-        self.precision
-    }
-
-    /// Return the scale (digits after the decimal) that can be stored by this array
-    pub fn scale(&self) -> usize {
-        self.scale
-    }
-
     /// Returns a DecimalArray with the same data as self, with the
     /// specified precision.
     ///
@@ -246,7 +261,9 @@ impl DecimalArray {
     }
 }
 
-impl From<ArrayData> for DecimalArray {
+impl<T: BasicDecimal, const VALUE_LENGTH: i32> From<ArrayData>
+    for GenericDecimalArray<T, VALUE_LENGTH>
+{
     fn from(data: ArrayData) -> Self {
         assert_eq!(
             data.buffers().len(),
@@ -258,17 +275,20 @@ impl From<ArrayData> for DecimalArray {
             DataType::Decimal(precision, scale) => (*precision, *scale),
             _ => panic!("Expected data type to be Decimal"),
         };
-        Self {
+
+        GenericDecimalArray::new(
             data,
-            value_data: unsafe { RawPtrBox::new(values) },
+            unsafe { RawPtrBox::new(values) },
             precision,
             scale,
-        }
+        )
     }
 }
 
-impl From<DecimalArray> for ArrayData {
-    fn from(array: DecimalArray) -> Self {
+impl<T: BasicDecimal + 'static, const VALUE_LENGTH: i32>
+    From<GenericDecimalArray<T, VALUE_LENGTH>> for ArrayData
+{
+    fn from(array: GenericDecimalArray<T, VALUE_LENGTH>) -> Self {
         array.data
     }
 }
@@ -325,9 +345,17 @@ impl<Ptr: Borrow<Option<i128>>> FromIterator<Ptr> for DecimalArray {
     }
 }
 
-impl fmt::Debug for DecimalArray {
+impl<T: BasicDecimal + 'static, const VALUE_LENGTH: i32> fmt::Debug
+    for GenericDecimalArray<T, VALUE_LENGTH>
+{
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?;
+        write!(
+            f,
+            "Decimal{}Array<{}, {}>\n[\n",
+            VALUE_LENGTH * 8,
+            self.precision,
+            self.scale
+        )?;
         print_long_array(self, f, |array, index, f| {
             let formatted_decimal = array.value_as_string(index);
 
@@ -337,7 +365,9 @@ impl fmt::Debug for DecimalArray {
     }
 }
 
-impl Array for DecimalArray {
+impl<T: BasicDecimal + 'static, const VALUE_LENGTH: i32> Array
+    for GenericDecimalArray<T, VALUE_LENGTH>
+{
     fn as_any(&self) -> &dyn Any {
         self
     }

diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs
@@ -18,6 +18,7 @@
 use std::any::Any;
 use std::sync::Arc;
 
+use crate::array::array_decimal::Decimal256Array;
 use crate::array::ArrayRef;
 use crate::array::DecimalArray;
 use crate::array::UInt8Builder;
@@ -26,6 +27,7 @@ use crate::array::{ArrayBuilder, FixedSizeListBuilder};
 use crate::error::{ArrowError, Result};
 
 use crate::datatypes::validate_decimal_precision;
+use crate::util::decimal::{BasicDecimal, Decimal256};
 
 /// Array Builder for [`DecimalArray`]
 ///
@@ -42,8 +44,18 @@ pub struct DecimalBuilder {
     value_validation: bool,
 }
 
+/// Array Builder for [`Decimal256Array`]
+///
+/// See [`Decimal256Array`] for example.
+#[derive(Debug)]
+pub struct Decimal256Builder {
+    builder: FixedSizeListBuilder<UInt8Builder>,
+    precision: usize,
+    scale: usize,
+}
+
 impl DecimalBuilder {
-    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
+    /// Creates a new `DecimalBuilder`, `capacity` is the number of bytes in the values
     /// array
     pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
         let values_builder = UInt8Builder::new(capacity);
@@ -154,6 +166,54 @@ impl ArrayBuilder for DecimalBuilder {
     }
 }
 
+impl Decimal256Builder {
+    /// Creates a new `Decimal256Builder`, `capacity` is the number of bytes in the values
+    /// array
+    pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
+        let values_builder = UInt8Builder::new(capacity);
+        let byte_width = 32;
+        Self {
+            builder: FixedSizeListBuilder::new(values_builder, byte_width),
+            precision,
+            scale,
+        }
+    }
+
+    /// Appends a byte slice into the builder.
+    ///
+    /// Automatically calls the `append` method to delimit the slice appended in as a
+    /// distinct array element.
+    #[inline]
+    pub fn append_value(&mut self, value: &Decimal256) -> Result<()> {
+        let value_as_bytes = value.raw_value();
+
+        if self.builder.value_length() != value_as_bytes.len() as i32 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Byte slice does not have the same length as Decimal256Builder value lengths".to_string()
+            ));
+        }
+        self.builder.values().append_slice(value_as_bytes)?;
+        self.builder.append(true)
+    }
+
+    /// Append a null value to the array.
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        let length: usize = self.builder.value_length() as usize;
+        self.builder.values().append_slice(&vec![0u8; length][..])?;
+        self.builder.append(false)
+    }
+
+    /// Builds the `Decimal256Array` and reset this builder.
+    pub fn finish(&mut self) -> Decimal256Array {
+        Decimal256Array::from_fixed_size_list_array(
+            self.builder.finish(),
+            self.precision,
+            self.scale,
+        )
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -197,4 +257,42 @@ mod tests {
         assert_eq!(32, decimal_array.value_offset(2));
         assert_eq!(16, decimal_array.value_length());
     }
+
+    #[test]
+    fn test_decimal256_builder() {
+        let mut builder = Decimal256Builder::new(30, 40, 6);
+
+        let mut bytes = vec![0; 32];
+        bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes());
+        let value = Decimal256::try_new_from_bytes(40, 6, bytes.as_slice()).unwrap();
+        builder.append_value(&value).unwrap();
+
+        builder.append_null().unwrap();
+
+        bytes = vec![255; 32];
+        let value = Decimal256::try_new_from_bytes(40, 6, bytes.as_slice()).unwrap();
+        builder.append_value(&value).unwrap();
+
+        bytes = vec![0; 32];
+        bytes[0..16].clone_from_slice(&0_i128.to_le_bytes());
+        bytes[15] = 128;
+        let value = Decimal256::try_new_from_bytes(40, 6, bytes.as_slice()).unwrap();
+        builder.append_value(&value).unwrap();
+
+        let decimal_array: Decimal256Array = builder.finish();
+
+        assert_eq!(&DataType::Decimal(40, 6), decimal_array.data_type());
+        assert_eq!(4, decimal_array.len());
+        assert_eq!(1, decimal_array.null_count());
+        assert_eq!(64, decimal_array.value_offset(2));
+        assert_eq!(32, decimal_array.value_length());
+
+        assert_eq!(decimal_array.value(0).to_string(), "8887.000000");
+        assert!(decimal_array.is_null(1));
+        assert_eq!(decimal_array.value(2).to_string(), "-0.000001");
+        assert_eq!(
+            decimal_array.value(3).to_string(),
+            "170141183460469231731687303715884.105728"
+        );
+    }
 }
diff --git a/arrow/src/array/builder/mod.rs b/arrow/src/array/builder/mod.rs
@@ -45,6 +45,7 @@ use super::ArrayRef;
 pub use boolean_buffer_builder::BooleanBufferBuilder;
 pub use boolean_builder::BooleanBuilder;
 pub use buffer_builder::BufferBuilder;
+pub use decimal_builder::Decimal256Builder;
 pub use decimal_builder::DecimalBuilder;
 pub use fixed_size_binary_builder::FixedSizeBinaryBuilder;
 pub use fixed_size_list_builder::FixedSizeListBuilder;

diff --git a/arrow/src/array/equal_json.rs b/arrow/src/array/equal_json.rs
@@ -16,7 +16,9 @@
 // under the License.
 
 use super::*;
+use crate::array::array_decimal::GenericDecimalArray;
 use crate::datatypes::*;
+use crate::util::decimal::BasicDecimal;
 use array::Array;
 use hex::FromHex;
 use serde_json::value::Value::{Null as JNull, Object, String as JString};
@@ -359,19 +361,16 @@ impl PartialEq<FixedSizeBinaryArray> for Value {
     }
 }
 
-impl JsonEqual for DecimalArray {
+impl<T: BasicDecimal + 'static, const VALUE_LENGTH: i32> JsonEqual
+    for GenericDecimalArray<T, VALUE_LENGTH>
+{
     fn equals_json(&self, json: &[&Value]) -> bool {
         if self.len() != json.len() {
             return false;
         }
 
         (0..self.len()).all(|i| match json[i] {
-            JString(s) => {
-                self.is_valid(i)
-                    && (s
-                        .parse::<i128>()
-                        .map_or_else(|_| false, |v| v == self.value(i).as_i128()))
-            }
+            JString(s) => self.is_valid(i) && (s == &self.value(i).to_string()),
             JNull => self.is_null(i),
             _ => false,
         })

diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
@@ -181,6 +181,7 @@ pub use self::array_binary::BinaryArray;
 pub use self::array_binary::FixedSizeBinaryArray;
 pub use self::array_binary::LargeBinaryArray;
 pub use self::array_boolean::BooleanArray;
+pub use self::array_decimal::Decimal256Array;
 pub use self::array_decimal::DecimalArray;
 pub use self::array_dictionary::DictionaryArray;
 pub use self::array_list::FixedSizeListArray;
@@ -468,6 +469,7 @@ pub use self::builder::BinaryBuilder;
 pub use self::builder::BooleanBufferBuilder;
 pub use self::builder::BooleanBuilder;
 pub use self::builder::BufferBuilder;
+pub use self::builder::Decimal256Builder;
 pub use self::builder::DecimalBuilder;
 pub use self::builder::FixedSizeBinaryBuilder;
 pub use self::builder::FixedSizeListBuilder;