From 74b39e213a4e6a6a1cf9f0e8d19a112fc6639214 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 15 Apr 2024 14:57:25 -0400 Subject: [PATCH] Fix exponent overflow in strings-to-double conversion (#15517) Adds a check when computing the exponent in the strings-to-double conversion to prevent an integer overflow. Closes #15508 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Muhammad Haseeb (https://github.com/mhaseeb123) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/15517 --- .../detail/convert/string_to_float.cuh | 5 ++- cpp/tests/strings/floats_tests.cpp | 36 ++++++++++--------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh index ab934750f9e..bbf56cf1446 100644 --- a/cpp/include/cudf/strings/detail/convert/string_to_float.cuh +++ b/cpp/include/cudf/strings/detail/convert/string_to_float.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,6 +102,9 @@ __device__ inline double stod(string_view const& d_str) ch = *in_ptr++; if (ch < '0' || ch > '9') break; exp_ten = (exp_ten * 10) + (int)(ch - '0'); + // Prevent integer overflow in exp_ten. 100,000,000 is the largest + // power of ten that can be multiplied by 10 without overflow. + if (exp_ten >= 100'000'000) { break; } } } } diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index f668c384787..9fa1a3325b4 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -25,8 +26,6 @@ #include -constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; - struct StringsConvertTest : public cudf::test::BaseFixture {}; TEST_F(StringsConvertTest, IsFloat) @@ -89,7 +88,7 @@ TEST_F(StringsConvertTest, ToFloats32) h_expected.begin(), h_expected.end(), thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } TEST_F(StringsConvertTest, FromFloats32) @@ -118,38 +117,41 @@ TEST_F(StringsConvertTest, FromFloats32) h_expected.end(), thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } TEST_F(StringsConvertTest, ToFloats64) { // clang-format off std::vector h_strings{ - "1234", nullptr, "-876", "543.2", "-0.12", ".25", + "1234", "", "-876", "543.2", "-0.12", ".25", "-.002", "", "-0.0", "1.28e256", "NaN", "abc123", "123abc", "456e", "-1.78e+5", "-122.33644782", "12e+309", "1.7976931348623159E308", "-Inf", "-INFINITY", "1.0", "1.7976931348623157e+308", "1.7976931348623157e-307", // subnormal numbers: v--- smallest double v--- result is 0 - "4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324" }; + "4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324", + // another very small number + "9.299999257686047e-0005603333574677677" }; // clang-format on - cudf::test::strings_column_wrapper strings( - h_strings.begin(), - h_strings.end(), - thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); + auto validity = cudf::test::iterators::null_at(1); + cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end(), validity); std::vector h_expected; std::for_each(h_strings.begin(), h_strings.end(), [&](char const* str) { - h_expected.push_back(str ? std::atof(str) : 0); + h_expected.push_back(std::atof(str)); }); auto strings_view = cudf::strings_column_view(strings); auto results = cudf::strings::to_floats(strings_view, cudf::data_type{cudf::type_id::FLOAT64}); cudf::test::fixed_width_column_wrapper expected( - h_expected.begin(), - h_expected.end(), - thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); + h_expected.begin(), h_expected.end(), validity); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); + + results = cudf::strings::is_float(strings_view); + cudf::test::fixed_width_column_wrapper is_expected( + {1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, validity); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, is_expected); } TEST_F(StringsConvertTest, FromFloats64) @@ -178,7 +180,7 @@ TEST_F(StringsConvertTest, FromFloats64) h_expected.end(), thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } TEST_F(StringsConvertTest, ZeroSizeStringsColumnFloat)