Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Structured interval types for IntervalMonthDayNano or IntervalDayTime (#3125) (#5654) #5769

Merged
merged 4 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 20 additions & 23 deletions arrow-arith/src/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use arrow_array::cast::AsArray;
use arrow_array::timezone::Tz;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::ArrowNativeType;
use arrow_buffer::{ArrowNativeType, IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit};

use crate::arity::{binary, try_binary};
Expand Down Expand Up @@ -343,12 +343,12 @@ trait TimestampOp: ArrowTimestampType {
type Duration: ArrowPrimitiveType<Native = i64>;

fn add_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option<i64>;
fn add_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option<i64>;
fn add_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option<i64>;
fn add_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) -> Option<i64>;
fn add_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz) -> Option<i64>;

fn sub_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option<i64>;
fn sub_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option<i64>;
fn sub_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option<i64>;
fn sub_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) -> Option<i64>;
fn sub_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz) -> Option<i64>;
}

macro_rules! timestamp {
Expand All @@ -360,23 +360,23 @@ macro_rules! timestamp {
Self::add_year_months(left, right, tz)
}

fn add_day_time(left: i64, right: i64, tz: Tz) -> Option<i64> {
fn add_day_time(left: i64, right: IntervalDayTime, tz: Tz) -> Option<i64> {
Self::add_day_time(left, right, tz)
}

fn add_month_day_nano(left: i64, right: i128, tz: Tz) -> Option<i64> {
fn add_month_day_nano(left: i64, right: IntervalMonthDayNano, tz: Tz) -> Option<i64> {
Self::add_month_day_nano(left, right, tz)
}

fn sub_year_month(left: i64, right: i32, tz: Tz) -> Option<i64> {
Self::subtract_year_months(left, right, tz)
}

fn sub_day_time(left: i64, right: i64, tz: Tz) -> Option<i64> {
fn sub_day_time(left: i64, right: IntervalDayTime, tz: Tz) -> Option<i64> {
Self::subtract_day_time(left, right, tz)
}

fn sub_month_day_nano(left: i64, right: i128, tz: Tz) -> Option<i64> {
fn sub_month_day_nano(left: i64, right: IntervalMonthDayNano, tz: Tz) -> Option<i64> {
Self::subtract_month_day_nano(left, right, tz)
}
}
Expand Down Expand Up @@ -506,12 +506,12 @@ fn timestamp_op<T: TimestampOp>(
/// Note: these should be fallible (#4456)
trait DateOp: ArrowTemporalType {
fn add_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
fn add_day_time(timestamp: Self::Native, delta: i64) -> Self::Native;
fn add_month_day_nano(timestamp: Self::Native, delta: i128) -> Self::Native;
fn add_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native;
fn add_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native;

fn sub_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
fn sub_day_time(timestamp: Self::Native, delta: i64) -> Self::Native;
fn sub_month_day_nano(timestamp: Self::Native, delta: i128) -> Self::Native;
fn sub_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native;
fn sub_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native;
}

macro_rules! date {
Expand All @@ -521,23 +521,23 @@ macro_rules! date {
Self::add_year_months(left, right)
}

fn add_day_time(left: Self::Native, right: i64) -> Self::Native {
fn add_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native {
Self::add_day_time(left, right)
}

fn add_month_day_nano(left: Self::Native, right: i128) -> Self::Native {
fn add_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native {
Self::add_month_day_nano(left, right)
}

fn sub_year_month(left: Self::Native, right: i32) -> Self::Native {
Self::subtract_year_months(left, right)
}

fn sub_day_time(left: Self::Native, right: i64) -> Self::Native {
fn sub_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native {
Self::subtract_day_time(left, right)
}

fn sub_month_day_nano(left: Self::Native, right: i128) -> Self::Native {
fn sub_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native {
Self::subtract_month_day_nano(left, right)
}
}
Expand Down Expand Up @@ -1346,13 +1346,10 @@ mod tests {
IntervalMonthDayNanoType::make_value(35, -19, 41899000000000000)
])
);
let a = IntervalMonthDayNanoArray::from(vec![i64::MAX as i128]);
let b = IntervalMonthDayNanoArray::from(vec![1]);
let a = IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::MAX]);
let b = IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::ONE]);
let err = add(&a, &b).unwrap_err().to_string();
assert_eq!(
err,
"Compute error: Overflow happened on: 9223372036854775807 + 1"
);
assert_eq!(err, "Compute error: Overflow happened on: 2147483647 + 1");
}

fn test_duration_impl<T: ArrowPrimitiveType<Native = i64>>() {
Expand Down
14 changes: 12 additions & 2 deletions arrow-array/src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use arrow_buffer::{i256, ArrowNativeType};
use arrow_buffer::{i256, ArrowNativeType, IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::ArrowError;
use half::f16;
use num::complex::ComplexFloat;
Expand Down Expand Up @@ -139,7 +139,10 @@ pub trait ArrowNativeTypeOp: ArrowNativeType {

macro_rules! native_type_op {
($t:tt) => {
native_type_op!($t, 0, 1, $t::MIN, $t::MAX);
native_type_op!($t, 0, 1);
};
($t:tt, $zero:expr, $one: expr) => {
native_type_op!($t, $zero, $one, $t::MIN, $t::MAX);
};
($t:tt, $zero:expr, $one: expr, $min: expr, $max: expr) => {
impl ArrowNativeTypeOp for $t {
Expand Down Expand Up @@ -284,6 +287,13 @@ native_type_op!(u32);
native_type_op!(u64);
native_type_op!(i256, i256::ZERO, i256::ONE, i256::MIN, i256::MAX);

native_type_op!(IntervalDayTime, IntervalDayTime::ZERO, IntervalDayTime::ONE);
native_type_op!(
IntervalMonthDayNano,
IntervalMonthDayNano::ZERO,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wondered if this needed any documentation, but it appears the answer is it already has some nice documentation 👍

Screenshot 2024-05-17 at 12 47 36 PM

IntervalMonthDayNano::ONE
);

macro_rules! native_type_float_op {
($t:tt, $zero:expr, $one:expr, $min:expr, $max:expr) => {
impl ArrowNativeTypeOp for $t {
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -946,7 +946,7 @@ where
/// return Ok(d.with_values(r));
/// }
/// downcast_primitive_array! {
/// a => Ok(Arc::new(a.iter().map(|x| x.map(|x| x.to_string())).collect::<StringArray>())),
/// a => Ok(Arc::new(a.iter().map(|x| x.map(|x| format!("{x:?}"))).collect::<StringArray>())),
/// d => Err(ArrowError::InvalidArgumentError(format!("{d:?} not supported")))
/// }
/// }
Expand Down
52 changes: 33 additions & 19 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1502,6 +1502,7 @@ mod tests {
use crate::builder::{Decimal128Builder, Decimal256Builder};
use crate::cast::downcast_array;
use crate::BooleanArray;
use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::TimeUnit;

#[test]
Expand Down Expand Up @@ -1624,33 +1625,46 @@ mod tests {
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);

// a day_time interval contains days and milliseconds, but we do not yet have accessors for the values
let arr = IntervalDayTimeArray::from(vec![Some(1), None, Some(-5)]);
let v0 = IntervalDayTime {
days: 34,
milliseconds: 1,
};
let v2 = IntervalDayTime {
days: -2,
milliseconds: -5,
};

let arr = IntervalDayTimeArray::from(vec![Some(v0), None, Some(v2)]);

assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(1, arr.values()[0]);
assert_eq!(v0, arr.value(0));
assert_eq!(v0, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);
assert_eq!(v2, arr.value(2));
assert_eq!(v2, arr.values()[2]);

// a month_day_nano interval contains months, days and nanoseconds,
// but we do not yet have accessors for the values.
// TODO: implement month, day, and nanos access method for month_day_nano.
let arr = IntervalMonthDayNanoArray::from(vec![
Some(100000000000000000000),
None,
Some(-500000000000000000000),
]);
let v0 = IntervalMonthDayNano {
months: 2,
days: 34,
nanoseconds: -1,
};
let v2 = IntervalMonthDayNano {
months: -3,
days: -2,
nanoseconds: 4,
};

let arr = IntervalMonthDayNanoArray::from(vec![Some(v0), None, Some(v2)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(100000000000000000000, arr.value(0));
assert_eq!(100000000000000000000, arr.values()[0]);
assert_eq!(v0, arr.value(0));
assert_eq!(v0, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-500000000000000000000, arr.value(2));
assert_eq!(-500000000000000000000, arr.values()[2]);
assert_eq!(v2, arr.value(2));
assert_eq!(v2, arr.values()[2]);
}

#[test]
Expand Down Expand Up @@ -2460,7 +2474,7 @@ mod tests {
expected = "PrimitiveArray expected data type Interval(MonthDayNano) got Interval(DayTime)"
)]
fn test_invalid_interval_type() {
let array = IntervalDayTimeArray::from(vec![1, 2, 3]);
let array = IntervalDayTimeArray::from(vec![IntervalDayTime::ZERO]);
let _ = IntervalMonthDayNanoArray::from(array.into_data());
}

Expand Down
77 changes: 19 additions & 58 deletions arrow-array/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::delta::{
use crate::temporal_conversions::as_datetime_with_timezone;
use crate::timezone::Tz;
use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
use arrow_buffer::{i256, Buffer, OffsetBuffer};
use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano, OffsetBuffer};
use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision};
use arrow_data::{validate_binary_view, validate_string_view};
use arrow_schema::{
Expand Down Expand Up @@ -220,7 +220,7 @@ make_type!(
);
make_type!(
IntervalDayTimeType,
i64,
IntervalDayTime,
DataType::Interval(IntervalUnit::DayTime),
r#"A “calendar” interval type in days and milliseconds.

Expand All @@ -247,7 +247,7 @@ which can lead to surprising results. Please see the description of ordering on
);
make_type!(
IntervalMonthDayNanoType,
i128,
IntervalMonthDayNano,
DataType::Interval(IntervalUnit::MonthDayNano),
r#"A “calendar” interval type in months, days, and nanoseconds.

Expand All @@ -264,11 +264,11 @@ Each field is independent (e.g. there is no constraint that the quantity of
nanoseconds represents less than a day's worth of time).

```text
┌──────────────────────────────┬───────────────────────────┐
Nanos Days Months
(64 bits) │ (32 bits) │ (32 bits)
└──────────────────────────────┴───────────────────────────┘
0 63 95 127 bit offset
┌────────────────────────────┬─────────────────────────────┐
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remind me: do we have intergration tests that would find this issue?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and they're now actually testing the logic rather than doing the mapping themselves

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In particular, the changes to arrow-integration-test/src/lib.rs in this PR

Months Days Nanos
(32 bits) │ (32 bits) (64 bits)
└────────────────────────────┴─────────────────────────────┘
0 32 64 128 bit offset
```
Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details

Expand Down Expand Up @@ -917,25 +917,8 @@ impl IntervalDayTimeType {
/// * `days` - The number of days (+/-) represented in this interval
/// * `millis` - The number of milliseconds (+/-) represented in this interval
#[inline]
pub fn make_value(
days: i32,
millis: i32,
) -> <IntervalDayTimeType as ArrowPrimitiveType>::Native {
/*
https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1433
struct DayMilliseconds {
int32_t days = 0;
int32_t milliseconds = 0;
...
}
64 56 48 40 32 24 16 8 0
+-------+-------+-------+-------+-------+-------+-------+-------+
| days | milliseconds |
+-------+-------+-------+-------+-------+-------+-------+-------+
*/
let m = millis as u64 & u32::MAX as u64;
let d = (days as u64 & u32::MAX as u64) << 32;
(m | d) as <IntervalDayTimeType as ArrowPrimitiveType>::Native
pub fn make_value(days: i32, milliseconds: i32) -> IntervalDayTime {
IntervalDayTime { days, milliseconds }
}

/// Turns a IntervalDayTimeType into a tuple of (days, milliseconds)
Expand All @@ -944,10 +927,8 @@ impl IntervalDayTimeType {
///
/// * `i` - The IntervalDayTimeType to convert
#[inline]
pub fn to_parts(i: <IntervalDayTimeType as ArrowPrimitiveType>::Native) -> (i32, i32) {
let days = (i >> 32) as i32;
let ms = i as i32;
(days, ms)
pub fn to_parts(i: IntervalDayTime) -> (i32, i32) {
(i.days, i.milliseconds)
}
}

Expand All @@ -960,27 +941,12 @@ impl IntervalMonthDayNanoType {
/// * `days` - The number of days (+/-) represented in this interval
/// * `nanos` - The number of nanoseconds (+/-) represented in this interval
#[inline]
pub fn make_value(
months: i32,
days: i32,
nanos: i64,
) -> <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native {
/*
https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1475
struct MonthDayNanos {
int32_t months;
int32_t days;
int64_t nanoseconds;
pub fn make_value(months: i32, days: i32, nanoseconds: i64) -> IntervalMonthDayNano {
IntervalMonthDayNano {
months,
days,
nanoseconds,
}
128 112 96 80 64 48 32 16 0
+-------+-------+-------+-------+-------+-------+-------+-------+
| months | days | nanos |
+-------+-------+-------+-------+-------+-------+-------+-------+
*/
let m = (months as u128 & u32::MAX as u128) << 96;
let d = (days as u128 & u32::MAX as u128) << 64;
let n = nanos as u128 & u64::MAX as u128;
(m | d | n) as <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native
}

/// Turns a IntervalMonthDayNanoType into a tuple of (months, days, nanos)
Expand All @@ -989,13 +955,8 @@ impl IntervalMonthDayNanoType {
///
/// * `i` - The IntervalMonthDayNanoType to convert
#[inline]
pub fn to_parts(
i: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
) -> (i32, i32, i64) {
let months = (i >> 96) as i32;
let days = (i >> 64) as i32;
let nanos = i as i64;
(months, days, nanos)
pub fn to_parts(i: IntervalMonthDayNano) -> (i32, i32, i64) {
(i.months, i.days, i.nanoseconds)
}
}

Expand Down
Loading
Loading