Skip to content

Commit

Permalink
Fix floating point window range extents. (#13606)
Browse files Browse the repository at this point in the history
This commit fixes the window range calculations for floating-point order by columns.

Window range calculations involve comparing the `delta` value (preceding/following) with the current row value, and capping `current_row - delta` at `numeric_limits::min()`.

It turns out that for `float`/`double` values, `numeric_limits::min()` returns `FLT_MIN` which is the lowest positive finite float value. This causes the erstwhile logic to produce incorrect results when the order-by column contains negative float values.

The fix involves replacing `numeric_limits::min()` with `numeric_limits::lowest()` which returns the true min float value.

Reference:
1. https://en.cppreference.com/w/cpp/types/numeric_limits/min

Authors:
  - MithunR (https://github.com/mythrocks)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Nghia Truong (https://github.com/ttnghia)

URL: #13606
  • Loading branch information
mythrocks authored Jun 26, 2023
1 parent c7e9405 commit 9a3f3a9
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 6 deletions.
21 changes: 15 additions & 6 deletions cpp/src/rolling/grouped_rolling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -247,27 +247,36 @@ __device__ T add_safe(T const& value, T const& delta)
}

/**
* @brief Subtract `delta` from value, and cap at numeric_limits::min(), for signed types.
* @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for signed types.
*
* Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
* the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
*/
template <typename T, CUDF_ENABLE_IF(cuda::std::numeric_limits<T>::is_signed)>
__device__ T subtract_safe(T const& value, T const& delta)
{
// delta >= 0;
return (value >= 0 || (value - cuda::std::numeric_limits<T>::min()) >= delta)
return (value >= 0 || (value - cuda::std::numeric_limits<T>::lowest()) >= delta)
? (value - delta)
: cuda::std::numeric_limits<T>::min();
: cuda::std::numeric_limits<T>::lowest();
}

/**
* @brief Subtract `delta` from value, and cap at numeric_limits::min(), for unsigned types.
* @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for unsigned types.
*
* Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
* the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
*
* This distinction isn't truly relevant for this overload (because float is signed).
* lowest() is kept for uniformity.
*/
template <typename T, CUDF_ENABLE_IF(not cuda::std::numeric_limits<T>::is_signed)>
__device__ T subtract_safe(T const& value, T const& delta)
{
// delta >= 0;
return ((value - cuda::std::numeric_limits<T>::min()) >= delta)
return ((value - cuda::std::numeric_limits<T>::lowest()) >= delta)
? (value - delta)
: cuda::std::numeric_limits<T>::min();
: cuda::std::numeric_limits<T>::lowest();
}

/**
Expand Down
25 changes: 25 additions & 0 deletions cpp/tests/rolling/grouped_rolling_range_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,16 @@ struct GroupedRollingRangeOrderByNumericTest : public BaseGroupedRollingRangeOrd
return fwcw<T>(begin, begin + num_rows).release();
}

/// Generate order-by column with values: [-1400, -1300, -1200 ... -300, -200, -100]
[[nodiscard]] column_ptr generate_negative_order_by_column() const
{
auto const begin =
thrust::make_transform_iterator(thrust::make_counting_iterator<cudf::size_type>(0),
[&](T const& i) -> T { return (i - num_rows) * 100; });

return fwcw<T>(begin, begin + num_rows).release();
}

/**
* @brief Run grouped_rolling test with no nulls in the order-by column
*/
Expand All @@ -130,6 +140,20 @@ struct GroupedRollingRangeOrderByNumericTest : public BaseGroupedRollingRangeOrd
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_results);
}

/**
* @brief Run grouped_rolling test with no nulls in the order-by column
*/
void run_test_negative_oby() const
{
auto const preceding = make_range_bounds(T{200});
auto const following = make_range_bounds(T{100});
auto const order_by = generate_negative_order_by_column();
auto const results = get_grouped_range_rolling_sum_result(preceding, following, *order_by);
auto const expected_results = bigints_column{{2, 3, 4, 4, 4, 3, 4, 6, 8, 6, 6, 9, 12, 9},
cudf::test::iterators::no_nulls()};
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_results);
}

/**
* @brief Run grouped_rolling test with nulls in the order-by column
* (i.e. 2 nulls at the beginning of each group)
Expand Down Expand Up @@ -225,6 +249,7 @@ TYPED_TEST_SUITE(GroupedRollingRangeOrderByFloatingPointTest, cudf::test::Floati
TYPED_TEST(GroupedRollingRangeOrderByFloatingPointTest, BoundedRanges)
{
this->run_test_no_null_oby();
this->run_test_negative_oby();
this->run_test_nulls_in_oby();
}

Expand Down

0 comments on commit 9a3f3a9

Please sign in to comment.