From 9a3f3a9f0d67f71bf7376e0251d67fdb725f512f Mon Sep 17 00:00:00 2001 From: MithunR Date: Sun, 25 Jun 2023 22:37:00 -0700 Subject: [PATCH] Fix floating point window range extents. (#13606) This commit fixes the window range calculations for floating-point order by columns. Window range calculations involve comparing the `delta` value (preceding/following) with the current row value, and capping `current_row - delta` at `numeric_limits::min()`. It turns out that for `float`/`double` values, `numeric_limits::min()` returns `FLT_MIN` which is the lowest positive finite float value. This causes the erstwhile logic to produce incorrect results when the order-by column contains negative float values. The fix involves replacing `numeric_limits::min()` with `numeric_limits::lowest()` which returns the true min float value. Reference: 1. https://en.cppreference.com/w/cpp/types/numeric_limits/min Authors: - MithunR (https://github.com/mythrocks) Approvers: - Mark Harris (https://github.com/harrism) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/13606 --- cpp/src/rolling/grouped_rolling.cu | 21 +++++++++++----- .../rolling/grouped_rolling_range_test.cpp | 25 +++++++++++++++++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index 7a084d7ced4..af253a1b253 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -247,27 +247,36 @@ __device__ T add_safe(T const& value, T const& delta) } /** - * @brief Subtract `delta` from value, and cap at numeric_limits::min(), for signed types. + * @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for signed types. + * + * Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns + * the smallest finite value, as opposed to min() which returns the smallest _positive_ value. */ template ::is_signed)> __device__ T subtract_safe(T const& value, T const& delta) { // delta >= 0; - return (value >= 0 || (value - cuda::std::numeric_limits::min()) >= delta) + return (value >= 0 || (value - cuda::std::numeric_limits::lowest()) >= delta) ? (value - delta) - : cuda::std::numeric_limits::min(); + : cuda::std::numeric_limits::lowest(); } /** - * @brief Subtract `delta` from value, and cap at numeric_limits::min(), for unsigned types. + * @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for unsigned types. + * + * Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns + * the smallest finite value, as opposed to min() which returns the smallest _positive_ value. + * + * This distinction isn't truly relevant for this overload (because float is signed). + * lowest() is kept for uniformity. */ template ::is_signed)> __device__ T subtract_safe(T const& value, T const& delta) { // delta >= 0; - return ((value - cuda::std::numeric_limits::min()) >= delta) + return ((value - cuda::std::numeric_limits::lowest()) >= delta) ? (value - delta) - : cuda::std::numeric_limits::min(); + : cuda::std::numeric_limits::lowest(); } /** diff --git a/cpp/tests/rolling/grouped_rolling_range_test.cpp b/cpp/tests/rolling/grouped_rolling_range_test.cpp index 6a8eea8755e..c2a6eb104df 100644 --- a/cpp/tests/rolling/grouped_rolling_range_test.cpp +++ b/cpp/tests/rolling/grouped_rolling_range_test.cpp @@ -116,6 +116,16 @@ struct GroupedRollingRangeOrderByNumericTest : public BaseGroupedRollingRangeOrd return fwcw(begin, begin + num_rows).release(); } + /// Generate order-by column with values: [-1400, -1300, -1200 ... -300, -200, -100] + [[nodiscard]] column_ptr generate_negative_order_by_column() const + { + auto const begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), + [&](T const& i) -> T { return (i - num_rows) * 100; }); + + return fwcw(begin, begin + num_rows).release(); + } + /** * @brief Run grouped_rolling test with no nulls in the order-by column */ @@ -130,6 +140,20 @@ struct GroupedRollingRangeOrderByNumericTest : public BaseGroupedRollingRangeOrd CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_results); } + /** + * @brief Run grouped_rolling test with no nulls in the order-by column + */ + void run_test_negative_oby() const + { + auto const preceding = make_range_bounds(T{200}); + auto const following = make_range_bounds(T{100}); + auto const order_by = generate_negative_order_by_column(); + auto const results = get_grouped_range_rolling_sum_result(preceding, following, *order_by); + auto const expected_results = bigints_column{{2, 3, 4, 4, 4, 3, 4, 6, 8, 6, 6, 9, 12, 9}, + cudf::test::iterators::no_nulls()}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_results); + } + /** * @brief Run grouped_rolling test with nulls in the order-by column * (i.e. 2 nulls at the beginning of each group) @@ -225,6 +249,7 @@ TYPED_TEST_SUITE(GroupedRollingRangeOrderByFloatingPointTest, cudf::test::Floati TYPED_TEST(GroupedRollingRangeOrderByFloatingPointTest, BoundedRanges) { this->run_test_no_null_oby(); + this->run_test_negative_oby(); this->run_test_nulls_in_oby(); }