-
Notifications
You must be signed in to change notification settings - Fork 867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Decimal256Builder and Decimal256Array (generic approach) #2006
Changes from all commits
12f3179
6157f5d
ab93dba
bb2aa82
53525f8
c0feb1c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ | |
// under the License. | ||
|
||
use std::borrow::Borrow; | ||
use std::convert::{From, TryInto}; | ||
use std::convert::From; | ||
use std::fmt; | ||
use std::{any::Any, iter::FromIterator}; | ||
|
||
|
@@ -32,7 +32,16 @@ use crate::datatypes::{ | |
DECIMAL_MAX_SCALE, | ||
}; | ||
use crate::error::{ArrowError, Result}; | ||
use crate::util::decimal::{BasicDecimal, Decimal128}; | ||
use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256}; | ||
use std::marker::PhantomData; | ||
|
||
pub struct GenericDecimalArray<T: BasicDecimal, const VALUE_LENGTH: i32> { | ||
data: ArrayData, | ||
value_data: RawPtrBox<u8>, | ||
precision: usize, | ||
scale: usize, | ||
phantom: PhantomData<T>, | ||
} | ||
Comment on lines
+38
to
+44
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Compared to #2000, which defines a trait and let Honestly I prefer #2000's trait approach. It looks more clear to me. But one cons of #2000 is that it needs to use macro to reduce code duplication, though it is not complicated usage. |
||
|
||
/// `DecimalArray` stores fixed width decimal numbers, | ||
/// with a fixed precision and scale. | ||
|
@@ -68,33 +77,47 @@ use crate::util::decimal::{BasicDecimal, Decimal128}; | |
/// assert_eq!(6, decimal_array.scale()); | ||
/// ``` | ||
/// | ||
pub struct DecimalArray { | ||
data: ArrayData, | ||
value_data: RawPtrBox<u8>, | ||
precision: usize, | ||
scale: usize, | ||
} | ||
pub type DecimalArray = GenericDecimalArray<Decimal128, 16>; | ||
pub type Decimal256Array = GenericDecimalArray<Decimal256, 32>; | ||
|
||
impl DecimalArray { | ||
const VALUE_LENGTH: i32 = 16; | ||
impl<T: BasicDecimal, const VALUE_LENGTH: i32> GenericDecimalArray<T, VALUE_LENGTH> { | ||
fn new( | ||
data: ArrayData, | ||
value_data: RawPtrBox<u8>, | ||
precision: usize, | ||
scale: usize, | ||
) -> Self { | ||
Self { | ||
data, | ||
value_data, | ||
precision, | ||
scale, | ||
phantom: PhantomData, | ||
} | ||
} | ||
|
||
/// Return the precision (total digits) that can be stored by this array | ||
pub fn precision(&self) -> usize { | ||
self.precision | ||
} | ||
|
||
/// Return the scale (digits after the decimal) that can be stored by this array | ||
pub fn scale(&self) -> usize { | ||
self.scale | ||
} | ||
|
||
/// Returns the element at index `i`. | ||
pub fn value(&self, i: usize) -> Decimal128 { | ||
pub fn value(&self, i: usize) -> T { | ||
assert!(i < self.data.len(), "DecimalArray out of bounds access"); | ||
let offset = i + self.data.offset(); | ||
let raw_val = unsafe { | ||
let pos = self.value_offset_at(offset); | ||
std::slice::from_raw_parts( | ||
self.value_data.as_ptr().offset(pos as isize), | ||
Self::VALUE_LENGTH as usize, | ||
VALUE_LENGTH as usize, | ||
) | ||
}; | ||
let as_array = raw_val.try_into().unwrap(); | ||
Decimal128::new_from_i128( | ||
self.precision, | ||
self.scale, | ||
i128::from_le_bytes(as_array), | ||
) | ||
T::new(self.precision(), self.scale(), raw_val) | ||
} | ||
|
||
/// Returns the offset for the element at index `i`. | ||
|
@@ -110,7 +133,7 @@ impl DecimalArray { | |
/// All elements have the same length as the array is a fixed size. | ||
#[inline] | ||
pub const fn value_length(&self) -> i32 { | ||
Self::VALUE_LENGTH | ||
VALUE_LENGTH | ||
} | ||
|
||
/// Returns a clone of the value data buffer | ||
|
@@ -120,7 +143,7 @@ impl DecimalArray { | |
|
||
#[inline] | ||
fn value_offset_at(&self, i: usize) -> i32 { | ||
Self::VALUE_LENGTH * i as i32 | ||
VALUE_LENGTH * i as i32 | ||
} | ||
|
||
#[inline] | ||
|
@@ -157,7 +180,9 @@ impl DecimalArray { | |
let array_data = unsafe { builder.build_unchecked() }; | ||
Self::from(array_data) | ||
} | ||
} | ||
|
||
impl DecimalArray { | ||
/// Creates a [DecimalArray] with default precision and scale, | ||
/// based on an iterator of `i128` values without nulls | ||
pub fn from_iter_values<I: IntoIterator<Item = i128>>(iter: I) -> Self { | ||
|
@@ -176,16 +201,6 @@ impl DecimalArray { | |
DecimalArray::from(data) | ||
} | ||
|
||
/// Return the precision (total digits) that can be stored by this array | ||
pub fn precision(&self) -> usize { | ||
self.precision | ||
} | ||
|
||
/// Return the scale (digits after the decimal) that can be stored by this array | ||
pub fn scale(&self) -> usize { | ||
self.scale | ||
} | ||
|
||
/// Returns a DecimalArray with the same data as self, with the | ||
/// specified precision. | ||
/// | ||
|
@@ -246,7 +261,9 @@ impl DecimalArray { | |
} | ||
} | ||
|
||
impl From<ArrayData> for DecimalArray { | ||
impl<T: BasicDecimal, const VALUE_LENGTH: i32> From<ArrayData> | ||
for GenericDecimalArray<T, VALUE_LENGTH> | ||
{ | ||
fn from(data: ArrayData) -> Self { | ||
assert_eq!( | ||
data.buffers().len(), | ||
|
@@ -258,17 +275,20 @@ impl From<ArrayData> for DecimalArray { | |
DataType::Decimal(precision, scale) => (*precision, *scale), | ||
_ => panic!("Expected data type to be Decimal"), | ||
}; | ||
Self { | ||
|
||
GenericDecimalArray::new( | ||
data, | ||
value_data: unsafe { RawPtrBox::new(values) }, | ||
unsafe { RawPtrBox::new(values) }, | ||
precision, | ||
scale, | ||
} | ||
) | ||
} | ||
} | ||
|
||
impl From<DecimalArray> for ArrayData { | ||
fn from(array: DecimalArray) -> Self { | ||
impl<T: BasicDecimal + 'static, const VALUE_LENGTH: i32> | ||
From<GenericDecimalArray<T, VALUE_LENGTH>> for ArrayData | ||
{ | ||
fn from(array: GenericDecimalArray<T, VALUE_LENGTH>) -> Self { | ||
array.data | ||
} | ||
} | ||
|
@@ -325,9 +345,17 @@ impl<Ptr: Borrow<Option<i128>>> FromIterator<Ptr> for DecimalArray { | |
} | ||
} | ||
|
||
impl fmt::Debug for DecimalArray { | ||
impl<T: BasicDecimal + 'static, const VALUE_LENGTH: i32> fmt::Debug | ||
for GenericDecimalArray<T, VALUE_LENGTH> | ||
{ | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?; | ||
write!( | ||
f, | ||
"Decimal{}Array<{}, {}>\n[\n", | ||
VALUE_LENGTH * 8, | ||
self.precision, | ||
self.scale | ||
)?; | ||
print_long_array(self, f, |array, index, f| { | ||
let formatted_decimal = array.value_as_string(index); | ||
|
||
|
@@ -337,7 +365,9 @@ impl fmt::Debug for DecimalArray { | |
} | ||
} | ||
|
||
impl Array for DecimalArray { | ||
impl<T: BasicDecimal + 'static, const VALUE_LENGTH: i32> Array | ||
for GenericDecimalArray<T, VALUE_LENGTH> | ||
{ | ||
fn as_any(&self) -> &dyn Any { | ||
self | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,9 @@ | |
// under the License. | ||
|
||
use super::*; | ||
use crate::array::array_decimal::GenericDecimalArray; | ||
use crate::datatypes::*; | ||
use crate::util::decimal::BasicDecimal; | ||
use array::Array; | ||
use hex::FromHex; | ||
use serde_json::value::Value::{Null as JNull, Object, String as JString}; | ||
|
@@ -359,19 +361,16 @@ impl PartialEq<FixedSizeBinaryArray> for Value { | |
} | ||
} | ||
|
||
impl JsonEqual for DecimalArray { | ||
impl<T: BasicDecimal + 'static, const VALUE_LENGTH: i32> JsonEqual | ||
for GenericDecimalArray<T, VALUE_LENGTH> | ||
{ | ||
fn equals_json(&self, json: &[&Value]) -> bool { | ||
if self.len() != json.len() { | ||
return false; | ||
} | ||
|
||
(0..self.len()).all(|i| match json[i] { | ||
JString(s) => { | ||
self.is_valid(i) | ||
&& (s | ||
.parse::<i128>() | ||
.map_or_else(|_| false, |v| v == self.value(i).as_i128())) | ||
} | ||
JString(s) => self.is_valid(i) && (s == &self.value(i).to_string()), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is one issue on generic approach. We cannot keep original |
||
JNull => self.is_null(i), | ||
_ => false, | ||
}) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One little concern is that we could remove
T: BasicDecimal
if we also do const generic onDecimal128
andDecimal256
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't we need a type bound to
Decimal<BYTE_LENGTH>
even for const generic?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually we need it. But just as I said in #2001, this is an unstable, which is the major disadvantage of using
const generic
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried to find some help from the rust-lang team: https://users.rust-lang.org/t/how-to-seal-the-const-generic/77947.