diff --git a/cpp/src/rolling/detail/range_window_bounds.hpp b/cpp/src/rolling/detail/range_window_bounds.hpp index 252e7fd7b67..266f397b1e3 100644 --- a/cpp/src/rolling/detail/range_window_bounds.hpp +++ b/cpp/src/rolling/detail/range_window_bounds.hpp @@ -28,7 +28,7 @@ namespace detail { template constexpr bool is_supported_range_type() { - return cudf::is_duration() || + return cudf::is_duration() || cudf::is_fixed_point() || (std::is_integral_v && !cudf::is_boolean()); } @@ -37,7 +37,7 @@ constexpr bool is_supported_range_type() template constexpr bool is_supported_order_by_column_type() { - return cudf::is_timestamp() || + return cudf::is_timestamp() || cudf::is_fixed_point() || (std::is_integral_v && !cudf::is_boolean()); } @@ -49,6 +49,11 @@ constexpr bool is_supported_order_by_column_type() /// a. For `TIMESTAMP_DAYS`, the range-type is `DURATION_DAYS`. /// Comparisons are done in `int32_t`. /// b. For all other timestamp types, comparisons are done in `int64_t`. +/// 3. For decimal types, all comparisons are done with the rep type, +/// after scaling the rep value to the same scale as the order by column: +/// a. For decimal32, the range-type is `int32_t`. +/// b. For decimal64, the range-type is `int64_t`. +/// c. For decimal128, the range-type is `__int128_t`. template struct range_type_impl { using type = void; @@ -69,45 +74,66 @@ struct range_type_impl +struct range_type_impl(), void>> { + using type = FixedPointType; + using rep_type = typename type::rep; +}; + template using range_type = typename range_type_impl::type; template using range_rep_type = typename range_type_impl::rep_type; -namespace { - template -void assert_non_negative(T const& value) +void assert_non_negative([[maybe_unused]] T const& value) { - (void)value; if constexpr (std::numeric_limits::is_signed) { CUDF_EXPECTS(value >= T{0}, "Range scalar must be >= 0."); } } -template < - typename RangeT, - typename RepT, - std::enable_if_t && !cudf::is_boolean(), void>* = nullptr> -RepT range_comparable_value_impl(scalar const& range_scalar, rmm::cuda_stream_view stream) +template && !cudf::is_boolean())> +RepT range_comparable_value_impl(scalar const& range_scalar, + bool, + data_type const&, + rmm::cuda_stream_view stream) { auto val = static_cast const&>(range_scalar).value(stream); assert_non_negative(val); return val; } -template (), void>* = nullptr> -RepT range_comparable_value_impl(scalar const& range_scalar, rmm::cuda_stream_view stream) +template ())> +RepT range_comparable_value_impl(scalar const& range_scalar, + bool, + data_type const&, + rmm::cuda_stream_view stream) { auto val = static_cast const&>(range_scalar).value(stream).count(); assert_non_negative(val); return val; } -} // namespace +template ())> +RepT range_comparable_value_impl(scalar const& range_scalar, + bool is_unbounded, + data_type const& order_by_data_type, + rmm::cuda_stream_view stream) +{ + CUDF_EXPECTS(is_unbounded || range_scalar.type().scale() >= order_by_data_type.scale(), + "Range bounds scalar must match/exceed the scale of the orderby column."); + auto const fixed_point_value = + static_cast const&>(range_scalar).fixed_point_value(stream); + auto const value = + fixed_point_value.rescaled(numeric::scale_type{order_by_data_type.scale()}).value(); + assert_non_negative(value); + return value; +} /** * @brief Fetch the value of the range_window_bounds scalar, for comparisons @@ -115,22 +141,25 @@ RepT range_comparable_value_impl(scalar const& range_scalar, rmm::cuda_stream_vi * * @tparam OrderByType The type of the orderby column with which the range value will be compared * @param range_bounds The range_window_bounds whose value is to be read + * @param order_by_data_type The data type for the order-by column * @param stream The CUDA stream for device memory operations * @return RepType Value of the range scalar */ template range_rep_type range_comparable_value( range_window_bounds const& range_bounds, - rmm::cuda_stream_view stream = cudf::default_stream_value) + data_type const& order_by_data_type = data_type{type_to_id()}, + rmm::cuda_stream_view stream = cudf::default_stream_value) { auto const& range_scalar = range_bounds.range_scalar(); using range_type = cudf::detail::range_type; CUDF_EXPECTS(range_scalar.type().id() == cudf::type_to_id(), - "Unexpected range type for specified orderby column."); + "Range bounds scalar must match the type of the orderby column."); using rep_type = cudf::detail::range_rep_type; - return range_comparable_value_impl(range_scalar, stream); + return range_comparable_value_impl( + range_scalar, range_bounds.is_unbounded(), order_by_data_type, stream); } } // namespace detail diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index 52587a20fc7..c1be33a9cd5 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -224,47 +224,49 @@ namespace { /** * @brief Add `delta` to value, and cap at numeric_limits::max(), for signed types. */ -template ::is_signed>* = nullptr> +template ::is_signed)> __device__ T add_safe(T const& value, T const& delta) { // delta >= 0. - return (value < 0 || (std::numeric_limits::max() - value) >= delta) + return (value < 0 || (cuda::std::numeric_limits::max() - value) >= delta) ? (value + delta) - : std::numeric_limits::max(); + : cuda::std::numeric_limits::max(); } /** * @brief Add `delta` to value, and cap at numeric_limits::max(), for unsigned types. */ -template ::is_signed>* = nullptr> +template ::is_signed)> __device__ T add_safe(T const& value, T const& delta) { // delta >= 0. - return ((std::numeric_limits::max() - value) >= delta) ? (value + delta) - : std::numeric_limits::max(); + return ((cuda::std::numeric_limits::max() - value) >= delta) + ? (value + delta) + : cuda::std::numeric_limits::max(); } /** * @brief Subtract `delta` from value, and cap at numeric_limits::min(), for signed types. */ -template ::is_signed>* = nullptr> +template ::is_signed)> __device__ T subtract_safe(T const& value, T const& delta) { // delta >= 0; - return (value >= 0 || (value - std::numeric_limits::min()) >= delta) + return (value >= 0 || (value - cuda::std::numeric_limits::min()) >= delta) ? (value - delta) - : std::numeric_limits::min(); + : cuda::std::numeric_limits::min(); } /** * @brief Subtract `delta` from value, and cap at numeric_limits::min(), for unsigned types. */ -template ::is_signed>* = nullptr> +template ::is_signed)> __device__ T subtract_safe(T const& value, T const& delta) { // delta >= 0; - return ((value - std::numeric_limits::min()) >= delta) ? (value - delta) - : std::numeric_limits::min(); + return ((value - cuda::std::numeric_limits::min()) >= delta) + ? (value - delta) + : cuda::std::numeric_limits::min(); } /// Given a single, ungrouped order-by column, return the indices corresponding @@ -780,7 +782,7 @@ template std::unique_ptr grouped_range_rolling_window_impl( column_view const& input, column_view const& orderby_column, - cudf::order const& timestamp_ordering, + cudf::order const& order_of_orderby_column, rmm::device_uvector const& group_offsets, rmm::device_uvector const& group_labels, range_window_bounds const& preceding_window, @@ -790,10 +792,12 @@ std::unique_ptr grouped_range_rolling_window_impl( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto preceding_value = detail::range_comparable_value(preceding_window); - auto following_value = detail::range_comparable_value(following_window); + auto preceding_value = + detail::range_comparable_value(preceding_window, orderby_column.type(), stream); + auto following_value = + detail::range_comparable_value(following_window, orderby_column.type(), stream); - if (timestamp_ordering == cudf::order::ASCENDING) { + if (order_of_orderby_column == cudf::order::ASCENDING) { return group_offsets.is_empty() ? range_window_ASC(input, orderby_column, preceding_value, @@ -856,7 +860,7 @@ struct dispatch_grouped_range_rolling_window { std::unique_ptr> operator()(column_view const& input, column_view const& orderby_column, - cudf::order const& timestamp_ordering, + cudf::order const& order_of_orderby_column, rmm::device_uvector const& group_offsets, rmm::device_uvector const& group_labels, range_window_bounds const& preceding_window, @@ -868,7 +872,7 @@ struct dispatch_grouped_range_rolling_window { { return grouped_range_rolling_window_impl(input, orderby_column, - timestamp_ordering, + order_of_orderby_column, group_offsets, group_labels, preceding_window, diff --git a/cpp/src/rolling/range_window_bounds.cpp b/cpp/src/rolling/range_window_bounds.cpp index 831e901f652..77520ccff63 100644 --- a/cpp/src/rolling/range_window_bounds.cpp +++ b/cpp/src/rolling/range_window_bounds.cpp @@ -30,28 +30,34 @@ namespace { * This makes it possible to copy construct and copy assign `range_window_bounds` objects. */ struct range_scalar_constructor { - template (), void>* = nullptr> + template ())> std::unique_ptr operator()(scalar const& range_scalar_) const { CUDF_FAIL( "Unsupported range type. " - "Only Durations and non-boolean integral range types are allowed."); + "Only Durations, fixed-point, and non-boolean integral range types are allowed."); } - template (), void>* = nullptr> + template ())> std::unique_ptr operator()(scalar const& range_scalar_) const { return std::make_unique>( static_cast const&>(range_scalar_)); } - template && !cudf::is_boolean(), void>* = nullptr> + template && not cudf::is_boolean())> std::unique_ptr operator()(scalar const& range_scalar_) const { return std::make_unique>( static_cast const&>(range_scalar_)); } + + template ())> + std::unique_ptr operator()(scalar const& range_scalar_) const + { + return std::make_unique>( + static_cast const&>(range_scalar_)); + } }; } // namespace diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 99267d37318..c723411352d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -343,6 +343,7 @@ ConfigureTest( ROLLING_TEST rolling/collect_ops_test.cpp rolling/empty_input_test.cpp + rolling/grouped_rolling_range_test.cpp rolling/grouped_rolling_test.cpp rolling/lead_lag_test.cpp rolling/nth_element_test.cpp diff --git a/cpp/tests/rolling/grouped_rolling_range_test.cpp b/cpp/tests/rolling/grouped_rolling_range_test.cpp new file mode 100644 index 00000000000..c44d804dcf1 --- /dev/null +++ b/cpp/tests/rolling/grouped_rolling_range_test.cpp @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +namespace cudf::test::rolling { + +template +using fwcw = cudf::test::fixed_width_column_wrapper; +template +using decimals_column = cudf::test::fixed_point_column_wrapper; +using ints_column = fwcw; +using bigints_column = fwcw; +using column_ptr = std::unique_ptr; +using namespace numeric; +using namespace cudf::test::iterators; + +struct BaseGroupedRollingRangeOrderByDecimalTest : public BaseFixture { + // Stand-in for std::pow(10, n), but for integral return. + static constexpr std::array pow10{1, 10, 100, 1000, 10000, 100000}; + // Test data. + column_ptr const grouping_keys = ints_column{0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}.release(); + column_ptr const agg_values = ints_column{1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3}.release(); + cudf::size_type const num_rows = grouping_keys->size(); +}; + +using base = BaseGroupedRollingRangeOrderByDecimalTest; // Shortcut to base test class. + +template +struct GroupedRollingRangeOrderByDecimalTypedTest : BaseGroupedRollingRangeOrderByDecimalTest { + using Rep = typename DecimalT::rep; + + auto make_fixed_point_range_bounds(typename DecimalT::rep value, scale_type scale) const + { + return cudf::range_window_bounds::get(*cudf::make_fixed_point_scalar(value, scale)); + } + + auto make_unbounded_fixed_point_range_bounds() const + { + return cudf::range_window_bounds::unbounded(data_type{type_to_id()}); + } + + /// For different scales, generate order_by column with + /// the same effective values: [0, 100, 200, 300, ... 1100, 1200, 1300] + /// For scale == -2, the rep values are: [0, 10000, 20000, 30000, ... 110000, 120000, 130000] + /// For scale == 2, the rep values are: [0, 1, 2, 3, ... 11, 12, 13] + column_ptr generate_order_by_column(scale_type scale) const + { + auto const begin = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [&](auto i) -> Rep { return (i * 10000) / base::pow10[scale + 2]; }); + + return decimals_column{begin, begin + num_rows, scale_type{scale}}.release(); + } + + /** + * @brief Scale the range bounds value to new scale, so that effective + * value remains identical. + * + * Keeping the effective range bounds value identical ensures that + * the expected result from grouped_rolling remains the same. + */ + Rep rescale_range_value(Rep const& value_at_scale_0, scale_type new_scale) const + { + // Scale -> Rep (for value == 200) + // -2 -> 20000 + // -1 -> 2000 + // 0 -> 200 + // 1 -> 20 + // 2 -> 2 + return (value_at_scale_0 * 100) / base::pow10[new_scale + 2]; + } + + /** + * @brief Get grouped rolling results for specified order-by column and range scale + * + */ + column_ptr get_grouped_range_rolling_result(column_view const& order_by_column, + scale_type const& range_scale) const + { + auto const preceding = + this->make_fixed_point_range_bounds(rescale_range_value(Rep{200}, range_scale), range_scale); + auto const following = + this->make_fixed_point_range_bounds(rescale_range_value(Rep{100}, range_scale), range_scale); + + return cudf::grouped_range_rolling_window(cudf::table_view{{grouping_keys->view()}}, + order_by_column, + cudf::order::ASCENDING, + agg_values->view(), + preceding, + following, + 1, // min_periods + *cudf::make_sum_aggregation()); + } + + /** + * @brief Run grouped_rolling test for specified order-by column scale with + * no nulls in the order-by column + * + */ + void run_test_no_null_oby(scale_type const& order_by_column_scale) const + { + auto const order_by = generate_order_by_column(order_by_column_scale); + // Run tests for range bounds generated for all scales >= oby_column_scale. + for (int32_t range_scale = order_by_column_scale; range_scale <= 2; ++range_scale) { + auto const results = get_grouped_range_rolling_result(*order_by, scale_type{range_scale}); + auto const expected_results = + bigints_column{{2, 3, 4, 4, 4, 3, 4, 6, 8, 6, 6, 9, 12, 9}, no_nulls()}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_results); + } + } + + /** + * @brief Run grouped_rolling test for specified order-by column scale with + * nulls in the order-by column (i.e. 2 nulls at the beginning of each group) + * + */ + void run_test_nulls_in_oby(scale_type const& order_by_column_scale) const + { + // Nullify the first two rows of each group in the order_by column. + auto const nulled_order_by = [&] { + auto col = generate_order_by_column(order_by_column_scale); + auto new_null_mask = create_null_mask(col->size(), mask_state::ALL_VALID); + set_null_mask( + static_cast(new_null_mask.data()), 0, 2, false); // Nulls in first group. + set_null_mask( + static_cast(new_null_mask.data()), 6, 8, false); // Nulls in second group. + set_null_mask( + static_cast(new_null_mask.data()), 10, 12, false); // Nulls in third group. + col->set_null_mask(std::move(new_null_mask)); + return col; + }(); + + // Run tests for range bounds generated for all scales >= oby_column_scale. + for (auto range_scale = int32_t{order_by_column_scale}; range_scale <= 2; ++range_scale) { + auto const results = + get_grouped_range_rolling_result(*nulled_order_by, scale_type{range_scale}); + auto const expected_results = + bigints_column{{2, 2, 2, 3, 4, 3, 4, 4, 4, 4, 6, 6, 6, 6}, no_nulls()}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_results); + } + } + + /** + * @brief Run grouped_rolling test for specified order-by column scale with + * unbounded preceding and unbounded following. + * + */ + void run_test_unbounded_preceding_to_unbounded_following(scale_type oby_column_scale) + { + auto const order_by = generate_order_by_column(oby_column_scale); + auto const preceding = make_unbounded_fixed_point_range_bounds(); + auto const following = make_unbounded_fixed_point_range_bounds(); + auto results = + cudf::grouped_range_rolling_window(cudf::table_view{{grouping_keys->view()}}, + order_by->view(), + cudf::order::ASCENDING, + agg_values->view(), + preceding, + following, + 1, // min_periods + *cudf::make_sum_aggregation()); + + auto expected_results = + bigints_column{{6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 12, 12, 12, 12}, no_nulls()}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_results); + } + + /** + * @brief Run grouped_rolling test for specified order-by column scale with + * unbounded preceding and unbounded following. + * + */ + void run_test_unbounded_preceding_to_current_row(scale_type oby_column_scale) + { + auto const order_by = generate_order_by_column(oby_column_scale); + auto const unbounded_preceding = make_unbounded_fixed_point_range_bounds(); + + for (int32_t range_scale = oby_column_scale; range_scale <= 2; ++range_scale) { + auto const current_row = make_fixed_point_range_bounds( + rescale_range_value(Rep{0}, scale_type{range_scale}), scale_type{range_scale}); + auto const results = + cudf::grouped_range_rolling_window(cudf::table_view{{grouping_keys->view()}}, + order_by->view(), + cudf::order::ASCENDING, + agg_values->view(), + unbounded_preceding, + current_row, + 1, // min_periods + *cudf::make_sum_aggregation()); + + auto expected_results = + bigints_column{{1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 3, 6, 9, 12}, no_nulls()}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_results); + } + } + + /** + * @brief Run grouped_rolling test for specified order-by column scale with + * unbounded preceding and unbounded following. + * + */ + void run_test_current_row_to_unbounded_following(scale_type oby_column_scale) + { + auto const order_by = generate_order_by_column(oby_column_scale); + auto const unbounded_following = make_unbounded_fixed_point_range_bounds(); + + for (int32_t range_scale = oby_column_scale; range_scale <= 2; ++range_scale) { + auto const current_row = make_fixed_point_range_bounds( + rescale_range_value(Rep{0}, scale_type{range_scale}), scale_type{range_scale}); + auto const results = + cudf::grouped_range_rolling_window(cudf::table_view{{grouping_keys->view()}}, + order_by->view(), + cudf::order::ASCENDING, + agg_values->view(), + current_row, + unbounded_following, + 1, // min_periods + *cudf::make_sum_aggregation()); + + auto expected_results = + bigints_column{{6, 5, 4, 3, 2, 1, 8, 6, 4, 2, 12, 9, 6, 3}, no_nulls()}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_results); + } + } +}; + +TYPED_TEST_SUITE(GroupedRollingRangeOrderByDecimalTypedTest, FixedPointTypes); + +TYPED_TEST(GroupedRollingRangeOrderByDecimalTypedTest, BoundedRanges) +{ + for (auto const order_by_column_scale : {-2, -1, 0, 1, 2}) { + auto const oby_scale = scale_type{order_by_column_scale}; + this->run_test_no_null_oby(oby_scale); + this->run_test_nulls_in_oby(oby_scale); + } +} + +TYPED_TEST(GroupedRollingRangeOrderByDecimalTypedTest, UnboundedRanges) +{ + for (auto const order_by_scale : {-2, -1, 0, 1, 2}) { + auto const order_by_column_scale = scale_type{order_by_scale}; + this->run_test_unbounded_preceding_to_unbounded_following(order_by_column_scale); + this->run_test_unbounded_preceding_to_current_row(order_by_column_scale); + this->run_test_current_row_to_unbounded_following(order_by_column_scale); + } +} + +} // namespace cudf::test::rolling diff --git a/cpp/tests/rolling/range_window_bounds_test.cpp b/cpp/tests/rolling/range_window_bounds_test.cpp index 99b461f05ee..6372536968a 100644 --- a/cpp/tests/rolling/range_window_bounds_test.cpp +++ b/cpp/tests/rolling/range_window_bounds_test.cpp @@ -151,5 +151,62 @@ TYPED_TEST(NumericRangeWindowBoundsTest, WrongRangeType) cudf::logic_error); } +template +struct DecimalRangeBoundsTest : RangeWindowBoundsTest { +}; + +TYPED_TEST_SUITE(DecimalRangeBoundsTest, cudf::test::FixedPointTypes); + +TYPED_TEST(DecimalRangeBoundsTest, BoundsConstruction) +{ + using namespace numeric; + using DecimalT = TypeParam; + using Rep = cudf::detail::range_rep_type; + + // Interval type must match the decimal type. + static_assert(std::is_same_v, DecimalT>); + + auto const range_3 = + range_window_bounds::get(fixed_point_scalar{Rep{3}, scale_type{0}}); + EXPECT_FALSE(range_3.is_unbounded() && + "range_window_bounds constructed from scalar cannot be unbounded."); + EXPECT_EQ(cudf::detail::range_comparable_value(range_3), Rep{3}); + + auto const range_unbounded = range_window_bounds::unbounded(data_type{type_to_id()}); + EXPECT_TRUE(range_unbounded.is_unbounded() && + "range_window_bounds::unbounded() must return an unbounded range."); +} + +TYPED_TEST(DecimalRangeBoundsTest, Rescale) +{ + using namespace numeric; + using DecimalT = TypeParam; + using RepT = typename DecimalT::rep; + + // Powers of 10. + auto constexpr pow10 = std::array{1, 10, 100, 1000, 10000, 100000}; + + // Check that the rep has expected values at different range scales. + auto const order_by_scale = -2; + auto const order_by_data_type = data_type{type_to_id(), order_by_scale}; + + for (auto const range_scale : {-2, -1, 0, 1, 2}) { + auto const decimal_range_bounds = + range_window_bounds::get(fixed_point_scalar{RepT{20}, scale_type{range_scale}}); + auto const rescaled_range_rep = + cudf::detail::range_comparable_value(decimal_range_bounds, order_by_data_type); + EXPECT_EQ(rescaled_range_rep, RepT{20} * pow10[range_scale - order_by_scale]); + } + + // Order By column scale cannot exceed range scale: + { + auto const decimal_range_bounds = + range_window_bounds::get(fixed_point_scalar{RepT{200}, scale_type{-3}}); + EXPECT_THROW( + cudf::detail::range_comparable_value(decimal_range_bounds, order_by_data_type), + cudf::logic_error); + } +} + } // namespace test } // namespace cudf diff --git a/java/src/main/java/ai/rapids/cudf/Scalar.java b/java/src/main/java/ai/rapids/cudf/Scalar.java index 205efadfe6c..2fb202a72b1 100644 --- a/java/src/main/java/ai/rapids/cudf/Scalar.java +++ b/java/src/main/java/ai/rapids/cudf/Scalar.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -754,6 +754,8 @@ public boolean equals(Object o) { case TIMESTAMP_NANOSECONDS: case DECIMAL64: return getLong() == other.getLong(); + case DECIMAL128: + return getBigDecimal().equals(other.getBigDecimal()); case STRING: return Arrays.equals(getUTF8(), other.getUTF8()); case LIST: @@ -819,6 +821,9 @@ public int hashCode() { valueHash = v.hashCode(); } break; + case DECIMAL128: + valueHash = getBigDecimal().hashCode(); + break; default: throw new IllegalStateException("Unknown scalar type: " + type); } diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index c4a94809269..e90273072f7 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -3915,6 +3915,9 @@ public Table aggregateWindowsOverRanges(AggregationOverWindow... windowAggregate case TIMESTAMP_DAYS: case TIMESTAMP_NANOSECONDS: case TIMESTAMP_MICROSECONDS: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: break; default: throw new IllegalArgumentException("Expected range-based window orderBy's " + diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index df8dce245ca..628ff4d4e0b 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -55,6 +55,7 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.*; +import java.util.function.IntFunction; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -5777,6 +5778,124 @@ void testRangeWindowingCountUnboundedDESCWithNullsLast() { } } + /** + * Helper for constructing BigInteger from int + * @param x Integer value + * @return BigInteger equivalent of x + */ + private static BigInteger big(int x) + { + return new BigInteger("" + x); + } + + /** + * Helper to get scalar for preceding == Decimal(value), + * with data width depending upon the the order-by + * column index: + * orderby_col_idx = 2 -> Decimal32 + * orderby_col_idx = 3 -> Decimal64 + * orderby_col_idx = 4 -> Decimal128 + */ + private static Scalar getDecimalScalarRangeBounds(int scale, int unscaledValue, int orderby_col_idx) + { + switch(orderby_col_idx) + { + case 2: return Scalar.fromDecimal(scale, unscaledValue); + case 3: return Scalar.fromDecimal(scale, Long.valueOf(unscaledValue)); + case 4: return Scalar.fromDecimal(scale, big(unscaledValue)); + default: + throw new IllegalStateException("Unexpected order by column index: " + + orderby_col_idx); + } + } + + @Test + void testRangeWindowsWithDecimalOrderBy() { + try (Table unsorted = new Table.TestBuilder() + .column(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) // GBY Key + .column(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3) // GBY Key + .decimal32Column(-1, 4000, 3000, 2000, 1000, + 4000, 3000, 2000, 1000, + 4000, 3000, 2000, 1000) // Decimal OBY Key + .decimal64Column(-1, 4000l, 3000l, 2000l, 1000l, + 4000l, 3000l, 2000l, 1000l, + 4000l, 3000l, 2000l, 1000l) // Decimal OBY Key + .decimal128Column(-1, RoundingMode.UNNECESSARY, + big(4000), big(3000), big(2000), big(1000), + big(4000), big(3000), big(2000), big(1000), + big(4000), big(3000), big(2000), big(1000)) + .column(9, 1, 5, 7, 2, 8, 9, 7, 6, 6, 0, 8) // Agg Column + .build()) { + + // Columns 2,3,4 are decimal order-by columns of type DECIMAL32, DECIMAL64, + // and DECIMAL128 respectively, with similarly ordered values. + // In the following loop, each decimal type is tested as the order-by column, + // producing the same results with similar range bounds. + for (int decimal_oby_col_idx = 2; decimal_oby_col_idx <= 4; ++decimal_oby_col_idx) { + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), + OrderByArg.asc(1), + OrderByArg.asc(decimal_oby_col_idx)); + ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { + ColumnVector sortedAggColumn = sorted.getColumn(5); + assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); + + // Test Window functionality with range window (200 PRECEDING and 100 FOLLOWING) + try (Scalar preceding200 = getDecimalScalarRangeBounds(0, 200, decimal_oby_col_idx); + Scalar following100 = getDecimalScalarRangeBounds(2, 1, decimal_oby_col_idx); + WindowOptions window = WindowOptions.builder() + .minPeriods(1) + .window(preceding200, following100) + .orderByColumnIndex(decimal_oby_col_idx) + .build()) { + + try (Table windowAggResults = sorted.groupBy(0, 1) + .aggregateWindowsOverRanges(RollingAggregation.count() + .onColumn(5) + .overWindow(window)); + ColumnVector expect = ColumnVector.fromBoxedInts(2, 3, 4, 3, 2, 3, 4, 3, 2, 3, 4, 3)) { + assertColumnsAreEqual(expect, windowAggResults.getColumn(0)); + } + } + + // Test Window functionality with range window (UNBOUNDED PRECEDING and CURRENT ROW) + try (Scalar current_row = getDecimalScalarRangeBounds(0, 0, decimal_oby_col_idx); + WindowOptions window = WindowOptions.builder() + .minPeriods(1) + .unboundedPreceding() + .following(current_row) + .orderByColumnIndex(decimal_oby_col_idx) + .build()) { + + try (Table windowAggResults = sorted.groupBy(0, 1) + .aggregateWindowsOverRanges(RollingAggregation.count() + .onColumn(5) + .overWindow(window)); + ColumnVector expect = ColumnVector.fromBoxedInts(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4)) { + assertColumnsAreEqual(expect, windowAggResults.getColumn(0)); + } + } + + // Test Window functionality with range window (UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING) + try (WindowOptions window = WindowOptions.builder() + .minPeriods(1) + .unboundedPreceding() + .unboundedFollowing() + .orderByColumnIndex(decimal_oby_col_idx) + .build()) { + + try (Table windowAggResults = sorted.groupBy(0, 1) + .aggregateWindowsOverRanges(RollingAggregation.count() + .onColumn(5) + .overWindow(window)); + ColumnVector expect = ColumnVector.fromBoxedInts(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4)) { + assertColumnsAreEqual(expect, windowAggResults.getColumn(0)); + } + } + } + } + } + } + @Test void testGroupByCountWithNulls() { try (Table t1 = new Table.TestBuilder().column(null, null, 1, 1, 1, 1)