Skip to content

Commit

Permalink
Fix exponent overflow in strings-to-double conversion (#15517)
Browse files Browse the repository at this point in the history
Adds a check when computing the exponent in the strings-to-double conversion to prevent an integer overflow.

Closes #15508

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Bradley Dice (https://github.com/bdice)

URL: #15517
  • Loading branch information
davidwendt authored Apr 15, 2024
1 parent ca7d85b commit 74b39e2
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 18 deletions.
5 changes: 4 additions & 1 deletion cpp/include/cudf/strings/detail/convert/string_to_float.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -102,6 +102,9 @@ __device__ inline double stod(string_view const& d_str)
ch = *in_ptr++;
if (ch < '0' || ch > '9') break;
exp_ten = (exp_ten * 10) + (int)(ch - '0');
// Prevent integer overflow in exp_ten. 100,000,000 is the largest
// power of ten that can be multiplied by 10 without overflow.
if (exp_ten >= 100'000'000) { break; }
}
}
}
Expand Down
36 changes: 19 additions & 17 deletions cpp/tests/strings/floats_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,7 @@
#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/iterator_utilities.hpp>

#include <cudf/strings/convert/convert_floats.hpp>
#include <cudf/strings/strings_column_view.hpp>
Expand All @@ -25,8 +26,6 @@

#include <vector>

constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};

struct StringsConvertTest : public cudf::test::BaseFixture {};

TEST_F(StringsConvertTest, IsFloat)
Expand Down Expand Up @@ -89,7 +88,7 @@ TEST_F(StringsConvertTest, ToFloats32)
h_expected.begin(),
h_expected.end(),
thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
}

TEST_F(StringsConvertTest, FromFloats32)
Expand Down Expand Up @@ -118,38 +117,41 @@ TEST_F(StringsConvertTest, FromFloats32)
h_expected.end(),
thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
}

TEST_F(StringsConvertTest, ToFloats64)
{
// clang-format off
std::vector<const char*> h_strings{
"1234", nullptr, "-876", "543.2", "-0.12", ".25",
"1234", "", "-876", "543.2", "-0.12", ".25",
"-.002", "", "-0.0", "1.28e256", "NaN", "abc123",
"123abc", "456e", "-1.78e+5", "-122.33644782", "12e+309", "1.7976931348623159E308",
"-Inf", "-INFINITY", "1.0", "1.7976931348623157e+308", "1.7976931348623157e-307",
// subnormal numbers: v--- smallest double v--- result is 0
"4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324" };
"4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324",
// another very small number
"9.299999257686047e-0005603333574677677" };
// clang-format on
cudf::test::strings_column_wrapper strings(
h_strings.begin(),
h_strings.end(),
thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
auto validity = cudf::test::iterators::null_at(1);
cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end(), validity);

std::vector<double> h_expected;
std::for_each(h_strings.begin(), h_strings.end(), [&](char const* str) {
h_expected.push_back(str ? std::atof(str) : 0);
h_expected.push_back(std::atof(str));
});

auto strings_view = cudf::strings_column_view(strings);
auto results = cudf::strings::to_floats(strings_view, cudf::data_type{cudf::type_id::FLOAT64});

cudf::test::fixed_width_column_wrapper<double> expected(
h_expected.begin(),
h_expected.end(),
thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
h_expected.begin(), h_expected.end(), validity);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);

results = cudf::strings::is_float(strings_view);
cudf::test::fixed_width_column_wrapper<bool> is_expected(
{1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, validity);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, is_expected);
}

TEST_F(StringsConvertTest, FromFloats64)
Expand Down Expand Up @@ -178,7 +180,7 @@ TEST_F(StringsConvertTest, FromFloats64)
h_expected.end(),
thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
}

TEST_F(StringsConvertTest, ZeroSizeStringsColumnFloat)
Expand Down

0 comments on commit 74b39e2

Please sign in to comment.