Fix floating point window range extents. (#13606)

This commit fixes the window range calculations for floating-point order by columns. Window range calculations involve comparing the `delta` value (preceding/following) with the current row value, and capping `current_row - delta` at `numeric_limits::min()`. It turns out that for `float`/`double` values, `numeric_limits::min()` returns `FLT_MIN` which is the lowest positive finite float value. This causes the erstwhile logic to produce incorrect results when the order-by column contains negative float values. The fix involves replacing `numeric_limits::min()` with `numeric_limits::lowest()` which returns the true min float value. Reference: 1. https://en.cppreference.com/w/cpp/types/numeric_limits/min Authors: - MithunR (https://github.com/mythrocks) Approvers: - Mark Harris (https://github.com/harrism) - Nghia Truong (https://github.com/ttnghia) URL: #13606
rapidsai · Jun 26, 2023 · 9a3f3a9 · 9a3f3a9
1 parent c7e9405
commit 9a3f3a9
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 6 deletions.
diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu
@@ -247,27 +247,36 @@ __device__ T add_safe(T const& value, T const& delta)
 }
 
 /**
- * @brief Subtract `delta` from value, and cap at numeric_limits::min(), for signed types.
+ * @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for signed types.
+ *
+ * Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
+ * the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
  */
 template <typename T, CUDF_ENABLE_IF(cuda::std::numeric_limits<T>::is_signed)>
 __device__ T subtract_safe(T const& value, T const& delta)
 {
   // delta >= 0;
-  return (value >= 0 || (value - cuda::std::numeric_limits<T>::min()) >= delta)
+  return (value >= 0 || (value - cuda::std::numeric_limits<T>::lowest()) >= delta)
            ? (value - delta)
-           : cuda::std::numeric_limits<T>::min();
+           : cuda::std::numeric_limits<T>::lowest();
 }
 
 /**
- * @brief Subtract `delta` from value, and cap at numeric_limits::min(), for unsigned types.
+ * @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for unsigned types.
+ *
+ * Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
+ * the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
+ *
+ * This distinction isn't truly relevant for this overload (because float is signed).
+ * lowest() is kept for uniformity.
  */
 template <typename T, CUDF_ENABLE_IF(not cuda::std::numeric_limits<T>::is_signed)>
 __device__ T subtract_safe(T const& value, T const& delta)
 {
   // delta >= 0;
-  return ((value - cuda::std::numeric_limits<T>::min()) >= delta)
+  return ((value - cuda::std::numeric_limits<T>::lowest()) >= delta)
            ? (value - delta)
-           : cuda::std::numeric_limits<T>::min();
+           : cuda::std::numeric_limits<T>::lowest();
 }
 
 /**

diff --git a/cpp/tests/rolling/grouped_rolling_range_test.cpp b/cpp/tests/rolling/grouped_rolling_range_test.cpp
@@ -116,6 +116,16 @@ struct GroupedRollingRangeOrderByNumericTest : public BaseGroupedRollingRangeOrd
     return fwcw<T>(begin, begin + num_rows).release();
   }
 
+  /// Generate order-by column with values: [-1400, -1300, -1200 ... -300, -200, -100]
+  [[nodiscard]] column_ptr generate_negative_order_by_column() const
+  {
+    auto const begin =
+      thrust::make_transform_iterator(thrust::make_counting_iterator<cudf::size_type>(0),
+                                      [&](T const& i) -> T { return (i - num_rows) * 100; });
+
+    return fwcw<T>(begin, begin + num_rows).release();
+  }
+
   /**
    * @brief Run grouped_rolling test with no nulls in the order-by column
    */
@@ -130,6 +140,20 @@ struct GroupedRollingRangeOrderByNumericTest : public BaseGroupedRollingRangeOrd
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_results);
   }
 
+  /**
+   * @brief Run grouped_rolling test with no nulls in the order-by column
+   */
+  void run_test_negative_oby() const
+  {
+    auto const preceding = make_range_bounds(T{200});
+    auto const following = make_range_bounds(T{100});
+    auto const order_by  = generate_negative_order_by_column();
+    auto const results   = get_grouped_range_rolling_sum_result(preceding, following, *order_by);
+    auto const expected_results = bigints_column{{2, 3, 4, 4, 4, 3, 4, 6, 8, 6, 6, 9, 12, 9},
+                                                 cudf::test::iterators::no_nulls()};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_results);
+  }
+
   /**
    * @brief Run grouped_rolling test with nulls in the order-by column
    * (i.e. 2 nulls at the beginning of each group)
@@ -225,6 +249,7 @@ TYPED_TEST_SUITE(GroupedRollingRangeOrderByFloatingPointTest, cudf::test::Floati
 TYPED_TEST(GroupedRollingRangeOrderByFloatingPointTest, BoundedRanges)
 {
   this->run_test_no_null_oby();
+  this->run_test_negative_oby();
   this->run_test_nulls_in_oby();
 }