From 6b371ec88e8cbd013652b6ad9b6523d2875b9baa Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 11 Apr 2024 16:13:55 -0400 Subject: [PATCH 1/4] Fix exponent overflow in strings-to-double conversion --- .../detail/convert/string_to_float.cuh | 3 +- cpp/tests/strings/floats_tests.cpp | 37 ++++++++++--------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh index ab934750f9e..f4142b132de 100644 --- a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh +++ b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,6 +102,7 @@ __device__ inline double stod(string_view const& d_str) ch = *in_ptr++; if (ch < '0' || ch > '9') break; exp_ten = (exp_ten * 10) + (int)(ch - '0'); + if (exp_ten >= 1e8) { break; } } } } diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index f668c384787..4f2582fa341 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -25,8 +27,6 @@ #include -constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; - struct StringsConvertTest : public cudf::test::BaseFixture {}; TEST_F(StringsConvertTest, IsFloat) @@ -89,7 +89,7 @@ TEST_F(StringsConvertTest, ToFloats32) h_expected.begin(), h_expected.end(), thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } TEST_F(StringsConvertTest, FromFloats32) @@ -118,38 +118,41 @@ TEST_F(StringsConvertTest, FromFloats32) h_expected.end(), thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } TEST_F(StringsConvertTest, ToFloats64) { // clang-format off std::vector h_strings{ - "1234", nullptr, "-876", "543.2", "-0.12", ".25", + "1234", "", "-876", "543.2", "-0.12", ".25", "-.002", "", "-0.0", "1.28e256", "NaN", "abc123", "123abc", "456e", "-1.78e+5", "-122.33644782", "12e+309", "1.7976931348623159E308", "-Inf", "-INFINITY", "1.0", "1.7976931348623157e+308", "1.7976931348623157e-307", // subnormal numbers: v--- smallest double v--- result is 0 - "4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324" }; + "4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324", + // another very small number + "9.299999257686047e-0005603333574677677" }; // clang-format on - cudf::test::strings_column_wrapper strings( - h_strings.begin(), - h_strings.end(), - thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); + auto validity = cudf::test::iterators::null_at(1); + cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end(), validity); std::vector h_expected; std::for_each(h_strings.begin(), h_strings.end(), [&](char const* str) { - h_expected.push_back(str ? std::atof(str) : 0); + h_expected.push_back(std::atof(str)); }); auto strings_view = cudf::strings_column_view(strings); auto results = cudf::strings::to_floats(strings_view, cudf::data_type{cudf::type_id::FLOAT64}); cudf::test::fixed_width_column_wrapper expected( - h_expected.begin(), - h_expected.end(), - thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); + h_expected.begin(), h_expected.end(), validity); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); + + results = cudf::strings::is_float(strings_view); + cudf::test::fixed_width_column_wrapper is_expected( + {1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, validity); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, is_expected); } TEST_F(StringsConvertTest, FromFloats64) @@ -178,7 +181,7 @@ TEST_F(StringsConvertTest, FromFloats64) h_expected.end(), thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } TEST_F(StringsConvertTest, ZeroSizeStringsColumnFloat) From c52480c763c74f9d821db0dd47e08663b99032cd Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 11 Apr 2024 17:47:51 -0400 Subject: [PATCH 2/4] remove debug_utilities include --- cpp/tests/strings/floats_tests.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index 4f2582fa341..9fa1a3325b4 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include From 12b8462ee9194e0ee2f635aebc3d5289b353079c Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 11 Apr 2024 19:26:54 -0400 Subject: [PATCH 3/4] change float constant to integer --- cpp/include/cudf/strings/detail/convert/string_to_float.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh index f4142b132de..a8c1d30349d 100644 --- a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh +++ b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh @@ -102,7 +102,8 @@ __device__ inline double stod(string_view const& d_str) ch = *in_ptr++; if (ch < '0' || ch > '9') break; exp_ten = (exp_ten * 10) + (int)(ch - '0'); - if (exp_ten >= 1e8) { break; } + // prevent integer overflow + if (exp_ten >= 100'000'000) { break; } } } } From 7e21176b116744c21b9d8b38de49aa29f9ce3f21 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 12 Apr 2024 17:08:09 -0400 Subject: [PATCH 4/4] update comment --- cpp/include/cudf/strings/detail/convert/string_to_float.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh index a8c1d30349d..bbf56cf1446 100644 --- a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh +++ b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh @@ -102,7 +102,8 @@ __device__ inline double stod(string_view const& d_str) ch = *in_ptr++; if (ch < '0' || ch > '9') break; exp_ten = (exp_ten * 10) + (int)(ch - '0'); - // prevent integer overflow + // Prevent integer overflow in exp_ten. 100,000,000 is the largest + // power of ten that can be multiplied by 10 without overflow. if (exp_ten >= 100'000'000) { break; } } }