Skip to content

Commit

Permalink
Flip interval field ordering (apache#5654)
Browse files Browse the repository at this point in the history
tustvold committed Apr 16, 2024
1 parent f276528 commit 5e0c863
Showing 4 changed files with 49 additions and 49 deletions.
6 changes: 4 additions & 2 deletions arrow-arith/src/numeric.rs
Original file line number Diff line number Diff line change
@@ -1346,8 +1346,10 @@ mod tests {
IntervalMonthDayNanoType::make_value(35, -19, 41899000000000000)
])
);
let a = IntervalMonthDayNanoArray::from(vec![i64::MAX as i128]);
let b = IntervalMonthDayNanoArray::from(vec![1]);
let max_nanos = IntervalMonthDayNanoType::make_value(0, 0, i64::MAX);
let a = IntervalMonthDayNanoArray::from(vec![max_nanos]);
let one_nanos = IntervalMonthDayNanoType::make_value(0, 0, 1);
let b = IntervalMonthDayNanoArray::from(vec![one_nanos]);
let err = add(&a, &b).unwrap_err().to_string();
assert_eq!(
err,
55 changes: 29 additions & 26 deletions arrow-array/src/types.rs
Original file line number Diff line number Diff line change
@@ -264,11 +264,11 @@ Each field is independent (e.g. there is no constraint that the quantity of
nanoseconds represents less than a day's worth of time).
```text
┌──────────────────────────────┬───────────────────────────┐
Nanos Days Months
(64 bits) │ (32 bits) │ (32 bits)
└──────────────────────────────┴───────────────────────────┘
0 63 95 127 bit offset
┌────────────────────────────┬─────────────────────────────┐
Months Days Nanos
(32 bits) │ (32 bits) (64 bits)
└────────────────────────────┴─────────────────────────────┘
0 32 64 128 bit offset
```
Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details
@@ -290,9 +290,9 @@ representation which is fast and efficient, but leads
to potentially surprising results.
For example a
`IntervalMonthDayNano` of `1 month` will compare as **greater** than a
`IntervalMonthDayNano` of `100 days` because the binary representation of `1 month`
is larger than the binary representation of 100 days.
`IntervalMonthDayNano` of `1 month` will compare as **less** than a
`IntervalMonthDayNano` of `1 days` because the binary representation of `1 month`
is smaller than the binary representation of 1 days.
"#
);
make_type!(
@@ -928,13 +928,14 @@ impl IntervalDayTimeType {
int32_t milliseconds = 0;
...
}
64 56 48 40 32 24 16 8 0
+-------+-------+-------+-------+-------+-------+-------+-------+
| days | milliseconds |
+-------+-------+-------+-------+-------+-------+-------+-------+
┌──────────────┬──────────────┐
│ Days │ Milliseconds │
│ (32 bits) │ (32 bits) │
└──────────────┴──────────────┘
0 31 63 bit offset
*/
let m = millis as u64 & u32::MAX as u64;
let d = (days as u64 & u32::MAX as u64) << 32;
let m = (millis as u64 & u32::MAX as u64) << 32;
let d = days as u64 & u32::MAX as u64;
(m | d) as <IntervalDayTimeType as ArrowPrimitiveType>::Native
}

@@ -945,8 +946,8 @@ impl IntervalDayTimeType {
/// * `i` - The IntervalDayTimeType to convert
#[inline]
pub fn to_parts(i: <IntervalDayTimeType as ArrowPrimitiveType>::Native) -> (i32, i32) {
let days = (i >> 32) as i32;
let ms = i as i32;
let days = i as i32;
let ms = (i >> 32) as i32;
(days, ms)
}
}
@@ -972,14 +973,16 @@ impl IntervalMonthDayNanoType {
int32_t days;
int64_t nanoseconds;
}
128 112 96 80 64 48 32 16 0
+-------+-------+-------+-------+-------+-------+-------+-------+
| months | days | nanos |
+-------+-------+-------+-------+-------+-------+-------+-------+
┌───────────────┬─────────────┬─────────────────────────────┐
│ Months │ Days │ Nanos │
│ (32 bits) │ (32 bits) │ (64 bits) │
└───────────────┴─────────────┴─────────────────────────────┘
0 32 64 128 bit offset
*/
let m = (months as u128 & u32::MAX as u128) << 96;
let d = (days as u128 & u32::MAX as u128) << 64;
let n = nanos as u128 & u64::MAX as u128;
let m = months as u128 & u32::MAX as u128;
let d = (days as u128 & u32::MAX as u128) << 32;
let n = (nanos as u128 & u64::MAX as u128) << 64;
(m | d | n) as <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native
}

@@ -992,9 +995,9 @@ impl IntervalMonthDayNanoType {
pub fn to_parts(
i: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
) -> (i32, i32, i64) {
let months = (i >> 96) as i32;
let days = (i >> 64) as i32;
let nanos = i as i64;
let months = i as i32;
let days = (i >> 32) as i32;
let nanos = (i >> 64) as i64;
(months, days, nanos)
}
}
9 changes: 4 additions & 5 deletions arrow-ord/src/comparison.rs
Original file line number Diff line number Diff line change
@@ -1714,18 +1714,17 @@ mod tests {
IntervalDayTimeType::make_value(1, 3000),
// 90M milliseconds
IntervalDayTimeType::make_value(0, 90_000_000),
IntervalDayTimeType::make_value(4, 10),
],
vec![
IntervalDayTimeType::make_value(0, 1000),
IntervalDayTimeType::make_value(1, 0),
IntervalDayTimeType::make_value(10, 0),
IntervalDayTimeType::make_value(2, 1),
// NB even though 1 day is less than 90M milliseconds long,
// it compares as greater because the underlying type stores
// days and milliseconds as different fields
IntervalDayTimeType::make_value(0, 12),
IntervalDayTimeType::make_value(56, 10),
],
vec![false, true, true, true ,false]
vec![true, false, false, false, false, true]
);

cmp_vec!(
@@ -1771,7 +1770,7 @@ mod tests {
// 100 days (note is treated as greater than 1 month as the underlying integer representation)
IntervalMonthDayNanoType::make_value(0, 100, 0),
],
vec![false, false, true, false, false]
vec![false, true, true, true, true]
);
}

28 changes: 12 additions & 16 deletions arrow-ord/src/ord.rs
Original file line number Diff line number Diff line change
@@ -227,14 +227,12 @@ pub mod tests {

let cmp = build_compare(&array, &array).unwrap();

assert_eq!(Ordering::Less, cmp(0, 1));
assert_eq!(Ordering::Greater, cmp(1, 0));

// somewhat confusingly, while 90M milliseconds is more than 1 day,
// it will compare less as the comparison is done on the underlying
// values not field by field
assert_eq!(Ordering::Greater, cmp(1, 2));
assert_eq!(Ordering::Less, cmp(2, 1));
// Ordering is based on the underlying values
// leading to potentially surprising results
assert_eq!(Ordering::Greater, cmp(0, 1));
assert_eq!(Ordering::Less, cmp(1, 0));
assert_eq!(Ordering::Less, cmp(1, 2));
assert_eq!(Ordering::Greater, cmp(2, 1));
}

#[test]
@@ -271,14 +269,12 @@ pub mod tests {

let cmp = build_compare(&array, &array).unwrap();

assert_eq!(Ordering::Less, cmp(0, 1));
assert_eq!(Ordering::Greater, cmp(1, 0));

// somewhat confusingly, while 100 days is more than 1 month in all cases
// it will compare less as the comparison is done on the underlying
// values not field by field
assert_eq!(Ordering::Greater, cmp(1, 2));
assert_eq!(Ordering::Less, cmp(2, 1));
// Ordering is based on the underlying values
// leading to potentially surprising results
assert_eq!(Ordering::Greater, cmp(0, 1));
assert_eq!(Ordering::Less, cmp(1, 0));
assert_eq!(Ordering::Less, cmp(1, 2));
assert_eq!(Ordering::Greater, cmp(2, 1));
}

#[test]

0 comments on commit 5e0c863

Please sign in to comment.