From f6f246cd3f0c76d401c290f94a9570694891f8dd Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 15 Apr 2022 14:49:25 -0400 Subject: [PATCH 1/3] Fix rounding error in stod on very small float numbers --- cpp/src/strings/convert/convert_floats.cu | 11 +++++++++-- cpp/tests/strings/floats_tests.cpp | 4 +++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index b8a10a00f5b..e4c4862c14c 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -130,10 +130,17 @@ __device__ inline double stod(string_view const& d_str) else if (exp_ten < std::numeric_limits::min_exponent10) return double{0}; + double base = sign * static_cast(digits); + exp_ten += 1 - num_digits; - // exp10() is faster than pow(10.0,exp_ten) + // extra floating-point division needed only in extreme range (e-287 - e-307) + // where num_digits may push exp_ten below min_exponent10 (e-307) + if (exp_ten < std::numeric_limits::min_exponent10) { + base = base / exp10(static_cast(num_digits - 1)); + exp_ten += num_digits - 1; // adjust exponent + } + double const exponent = exp10(static_cast(std::abs(exp_ten))); - double const base = sign * static_cast(digits); return exp_ten < 0 ? base / exponent : base * exponent; } diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index bec06f7e601..a22040b713e 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -125,11 +125,13 @@ TEST_F(StringsConvertTest, FromFloats32) TEST_F(StringsConvertTest, ToFloats64) { + // clang-format off std::vector h_strings{ "1234", nullptr, "-876", "543.2", "-0.12", ".25", "-.002", "", "-0.0", "1.28e256", "NaN", "abc123", "123abc", "456e", "-1.78e+5", "-122.33644782", "12e+309", "1.7976931348623159E308", - "-Inf", "-INFINITY"}; + "-Inf", "-INFINITY", "1.0", "1.7976931348623157e+308", "1.7976931348623157e-307"}; + // clang-format on cudf::test::strings_column_wrapper strings( h_strings.begin(), h_strings.end(), From 958f3f97964fb6d600316441f8d3450f644eba80 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 18 Apr 2022 08:57:09 -0400 Subject: [PATCH 2/3] code for supporting subnormal float values from bdice --- cpp/src/strings/convert/convert_floats.cu | 20 +++++++++++++------- cpp/tests/strings/floats_tests.cpp | 4 +++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index e4c4862c14c..a5c02c76e71 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -124,20 +124,26 @@ __device__ inline double stod(string_view const& d_str) exp_ten *= exp_sign; exp_ten += exp_off; exp_ten += num_digits - 1; - if (exp_ten > std::numeric_limits::max_exponent10) + if (exp_ten > std::numeric_limits::max_exponent10) { return sign > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); - else if (exp_ten < std::numeric_limits::min_exponent10) - return double{0}; + } + // else if (exp_ten < std::numeric_limits::min_exponent10) + // return double{0}; double base = sign * static_cast(digits); exp_ten += 1 - num_digits; - // extra floating-point division needed only in extreme range (e-287 - e-307) - // where num_digits may push exp_ten below min_exponent10 (e-307) - if (exp_ten < std::numeric_limits::min_exponent10) { - base = base / exp10(static_cast(num_digits - 1)); + // If 10^exp_ten would result in a subnormal value, the base and + // exponent should be adjusted so that 10^exp_ten is a normal value + auto const subnormal_shift = std::numeric_limits::min_exponent10 - exp_ten; + if (subnormal_shift > 0) { + // Handle subnormal values. Ensure that both base and exponent are + // normal values before computing their product. + base = base / exp10(static_cast(num_digits - 1 + subnormal_shift)); exp_ten += num_digits - 1; // adjust exponent + auto const exponent = exp10(static_cast(exp_ten + subnormal_shift)); + return base * exponent; } double const exponent = exp10(static_cast(std::abs(exp_ten))); diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index a22040b713e..e0b23538e4c 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -130,7 +130,9 @@ TEST_F(StringsConvertTest, ToFloats64) "1234", nullptr, "-876", "543.2", "-0.12", ".25", "-.002", "", "-0.0", "1.28e256", "NaN", "abc123", "123abc", "456e", "-1.78e+5", "-122.33644782", "12e+309", "1.7976931348623159E308", - "-Inf", "-INFINITY", "1.0", "1.7976931348623157e+308", "1.7976931348623157e-307"}; + "-Inf", "-INFINITY", "1.0", "1.7976931348623157e+308", "1.7976931348623157e-307", + // subnormal numbers + "4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324" }; // clang-format on cudf::test::strings_column_wrapper strings( h_strings.begin(), From a10e432e478a5e274c02981698a1dead3cf57b47 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 18 Apr 2022 09:18:11 -0400 Subject: [PATCH 3/3] update comments --- cpp/src/strings/convert/convert_floats.cu | 2 -- cpp/tests/strings/floats_tests.cpp | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index a5c02c76e71..ef8376e6aca 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -128,8 +128,6 @@ __device__ inline double stod(string_view const& d_str) return sign > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } - // else if (exp_ten < std::numeric_limits::min_exponent10) - // return double{0}; double base = sign * static_cast(digits); diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index e0b23538e4c..360ea8be178 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -131,7 +131,7 @@ TEST_F(StringsConvertTest, ToFloats64) "-.002", "", "-0.0", "1.28e256", "NaN", "abc123", "123abc", "456e", "-1.78e+5", "-122.33644782", "12e+309", "1.7976931348623159E308", "-Inf", "-INFINITY", "1.0", "1.7976931348623157e+308", "1.7976931348623157e-307", - // subnormal numbers + // subnormal numbers: v--- smallest double v--- result is 0 "4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324" }; // clang-format on cudf::test::strings_column_wrapper strings(