From 6b371ec88e8cbd013652b6ad9b6523d2875b9baa Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Thu, 11 Apr 2024 16:13:55 -0400
Subject: [PATCH 1/4] Fix exponent overflow in strings-to-double conversion

---
 .../detail/convert/string_to_float.cuh        |  3 +-
 cpp/tests/strings/floats_tests.cpp            | 37 ++++++++++---------
 2 files changed, 22 insertions(+), 18 deletions(-)
diff --git a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh
index ab934750f9e..f4142b132de 100644
--- a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh
+++ b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -102,6 +102,7 @@ __device__ inline double stod(string_view const& d_str)
           ch = *in_ptr++;
           if (ch < '0' || ch > '9') break;
           exp_ten = (exp_ten * 10) + (int)(ch - '0');
+          if (exp_ten >= 1e8) { break; }
         }
       }
     }
diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp
index f668c384787..4f2582fa341 100644
--- a/cpp/tests/strings/floats_tests.cpp
+++ b/cpp/tests/strings/floats_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/debug_utilities.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 
 #include <cudf/strings/convert/convert_floats.hpp>
 #include <cudf/strings/strings_column_view.hpp>
@@ -25,8 +27,6 @@
 
 #include <vector>
 
-constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};
-
 struct StringsConvertTest : public cudf::test::BaseFixture {};
 
 TEST_F(StringsConvertTest, IsFloat)
@@ -89,7 +89,7 @@ TEST_F(StringsConvertTest, ToFloats32)
     h_expected.begin(),
     h_expected.end(),
     thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsConvertTest, FromFloats32)
@@ -118,38 +118,41 @@ TEST_F(StringsConvertTest, FromFloats32)
     h_expected.end(),
     thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsConvertTest, ToFloats64)
 {
   // clang-format off
   std::vector<const char*> h_strings{
-    "1234",   nullptr,    "-876",     "543.2",         "-0.12",   ".25",
+    "1234",   "",         "-876",     "543.2",         "-0.12",   ".25",
     "-.002",  "",         "-0.0",     "1.28e256",      "NaN",     "abc123",
     "123abc", "456e",     "-1.78e+5", "-122.33644782", "12e+309", "1.7976931348623159E308",
     "-Inf",   "-INFINITY", "1.0",     "1.7976931348623157e+308",  "1.7976931348623157e-307",
     // subnormal numbers:           v--- smallest double               v--- result is 0
-    "4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324" };
+    "4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324",
+    // another very small number
+    "9.299999257686047e-0005603333574677677" };
   // clang-format on
-  cudf::test::strings_column_wrapper strings(
-    h_strings.begin(),
-    h_strings.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
+  auto validity = cudf::test::iterators::null_at(1);
+  cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end(), validity);
 
   std::vector<double> h_expected;
   std::for_each(h_strings.begin(), h_strings.end(), [&](char const* str) {
-    h_expected.push_back(str ? std::atof(str) : 0);
+    h_expected.push_back(std::atof(str));
   });
 
   auto strings_view = cudf::strings_column_view(strings);
   auto results = cudf::strings::to_floats(strings_view, cudf::data_type{cudf::type_id::FLOAT64});
 
   cudf::test::fixed_width_column_wrapper<double> expected(
-    h_expected.begin(),
-    h_expected.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
+    h_expected.begin(), h_expected.end(), validity);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+
+  results = cudf::strings::is_float(strings_view);
+  cudf::test::fixed_width_column_wrapper<bool> is_expected(
+    {1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, validity);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, is_expected);
 }
 
 TEST_F(StringsConvertTest, FromFloats64)
@@ -178,7 +181,7 @@ TEST_F(StringsConvertTest, FromFloats64)
     h_expected.end(),
     thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsConvertTest, ZeroSizeStringsColumnFloat)

From c52480c763c74f9d821db0dd47e08663b99032cd Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Thu, 11 Apr 2024 17:47:51 -0400
Subject: [PATCH 2/4] remove debug_utilities include

---
 cpp/tests/strings/floats_tests.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp
index 4f2582fa341..9fa1a3325b4 100644
--- a/cpp/tests/strings/floats_tests.cpp
+++ b/cpp/tests/strings/floats_tests.cpp
@@ -17,7 +17,6 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/debug_utilities.hpp>
 #include <cudf_test/iterator_utilities.hpp>
 
 #include <cudf/strings/convert/convert_floats.hpp>

From 12b8462ee9194e0ee2f635aebc3d5289b353079c Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Thu, 11 Apr 2024 19:26:54 -0400
Subject: [PATCH 3/4] change float constant to integer

---
 cpp/include/cudf/strings/detail/convert/string_to_float.cuh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh
index f4142b132de..a8c1d30349d 100644
--- a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh
+++ b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh
@@ -102,7 +102,8 @@ __device__ inline double stod(string_view const& d_str)
           ch = *in_ptr++;
           if (ch < '0' || ch > '9') break;
           exp_ten = (exp_ten * 10) + (int)(ch - '0');
-          if (exp_ten >= 1e8) { break; }
+          // prevent integer overflow
+          if (exp_ten >= 100'000'000) { break; }
         }
       }
     }

From 7e21176b116744c21b9d8b38de49aa29f9ce3f21 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Fri, 12 Apr 2024 17:08:09 -0400
Subject: [PATCH 4/4] update comment

---
 cpp/include/cudf/strings/detail/convert/string_to_float.cuh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh
index a8c1d30349d..bbf56cf1446 100644
--- a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh
+++ b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh
@@ -102,7 +102,8 @@ __device__ inline double stod(string_view const& d_str)
           ch = *in_ptr++;
           if (ch < '0' || ch > '9') break;
           exp_ten = (exp_ten * 10) + (int)(ch - '0');
-          // prevent integer overflow
+          // Prevent integer overflow in exp_ten. 100,000,000 is the largest
+          // power of ten that can be multiplied by 10 without overflow.
           if (exp_ten >= 100'000'000) { break; }
         }
       }