From db90dfd709831b3ba5dbb928b73b64123dcb9a19 Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Sun, 21 Apr 2024 17:19:16 +0100 Subject: [PATCH 1/6] Change skew normal quantile to use bracket_and_solve_root. Rather than Newton iterations. Add test case. Fixes https://github.com/boostorg/math/issues/1120 --- .../boost/math/distributions/skew_normal.hpp | 46 ++++++++++++-- test/Jamfile.v2 | 1 + test/git_issue_1120.cpp | 60 +++++++++++++++++++ 3 files changed, 102 insertions(+), 5 deletions(-) create mode 100644 test/git_issue_1120.cpp diff --git a/include/boost/math/distributions/skew_normal.hpp b/include/boost/math/distributions/skew_normal.hpp index f3cc5a6b59..0ee88be8ca 100644 --- a/include/boost/math/distributions/skew_normal.hpp +++ b/include/boost/math/distributions/skew_normal.hpp @@ -27,6 +27,10 @@ #include #include // std::lower_bound, std::distance +#ifdef BOOST_MATH_INSTRUMENT_SKEW_NORMAL_ITERATIONS +extern std::uintmax_t global_iter_count; +#endif + namespace boost{ namespace math{ namespace detail @@ -681,14 +685,46 @@ namespace boost{ namespace math{ // refine the result by numerically searching the root of (p-cdf) - const RealType search_min = support(dist).first; - const RealType search_max = support(dist).second; - const int get_digits = policies::digits();// get digits from policy, std::uintmax_t max_iter = policies::get_max_root_iterations(); // and max iterations. - result = tools::newton_raphson_iterate(detail::skew_normal_quantile_functor(dist, p), result, - search_min, search_max, get_digits, max_iter); + if (result == 0) + result = tools::min_value(); // we need to be one side of zero or the other for the root finder to work. + + auto fun = [&, dist, p](const RealType& x)->RealType { return cdf(dist, x) - p; }; + + RealType f_result = fun(result); + + if (f_result == 0) + return result; + + if (f_result * result > 0) + { + // If the root is in the direction of zero, we need to check that we're the correct side of it: + RealType f_zero = fun(static_cast(0)); + if (f_zero * f_result > 0) + { + // we're the wrong side of zero: + result = -result; + f_result = fun(result); + } + } + + RealType scaling_factor = 1.25; + if (f_result * result > 0) + { + // We're heading towards zero... it's a long way down so use a larger scaling factor: + scaling_factor = 16; + } + + auto p_result = tools::bracket_and_solve_root(fun, result, scaling_factor, true, tools::eps_tolerance(get_digits), max_iter, Policy()); + +#ifdef BOOST_MATH_INSTRUMENT_SKEW_NORMAL_ITERATIONS + global_iter_count += max_iter; +#endif + + result = (p_result.first + p_result.second) / 2; + if (max_iter >= policies::get_max_root_iterations()) { return policies::raise_evaluation_error(function, "Unable to locate solution in a reasonable time: either there is no answer to quantile" // LCOV_EXCL_LINE diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index cce2a8fcf8..20619145b8 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -873,6 +873,7 @@ test-suite distribution_tests : [ run scipy_issue_18302.cpp ../../test/build//boost_unit_test_framework ] [ run scipy_issue_18511.cpp ../../test/build//boost_unit_test_framework ] [ compile scipy_issue_19762.cpp ] + [ run git_issue_1120.cpp ] ; test-suite new_floats : diff --git a/test/git_issue_1120.cpp b/test/git_issue_1120.cpp new file mode 100644 index 0000000000..787a7a45b1 --- /dev/null +++ b/test/git_issue_1120.cpp @@ -0,0 +1,60 @@ +// Copyright John Maddock 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// +// The purpose of this test case is to probe the skew normal quantiles +// for extreme values of skewness and ensure that our root finders don't +// blow up, see https://github.com/boostorg/math/issues/1120 for original +// test case. We test both the maximum number of iterations taken, and the +// overall total (ie average). Any changes to the skew normal should +// ideally NOT cause this test to fail, as this indicates that our root +// finding has been made worse by the change!! +// +// Note that defining BOOST_MATH_INSTRUMENT_SKEW_NORMAL_ITERATIONS +// causes the skew normal quantile to save the number of iterations +// to a global variable "global_iter_count". +// + +#define BOOST_MATH_INSTRUMENT_SKEW_NORMAL_ITERATIONS + +#include +#include +#include "math_unit_test.hpp" + +std::uintmax_t global_iter_count; +std::uintmax_t total_iter_count = 0; + +int main() +{ + using scipy_policy = boost::math::policies::policy>; + + std::mt19937 gen; + std::uniform_real<> location(-3, 3); + std::uniform_real<> scale(0.001, 3); + + for (unsigned skew = 50; skew < 2000; skew += 43) + { + constexpr double pn[] = { 0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 0.4 }; + boost::math::skew_normal_distribution dist(location(gen), scale(gen), skew); + for (unsigned i = 0; i < 7; ++i) + { + global_iter_count = 0; + double x = quantile(dist, pn[i]); + total_iter_count += global_iter_count; + CHECK_LE(global_iter_count, static_cast(55)); + double p = cdf(dist, x); + CHECK_ULP_CLOSE(p, pn[i], 45000); + + global_iter_count = 0; + x = quantile(complement(dist, pn[i])); + total_iter_count += global_iter_count; + CHECK_LE(global_iter_count, static_cast(55)); + p = cdf(complement(dist, x)); + CHECK_ULP_CLOSE(p, pn[i], 45000); + } + } + CHECK_LE(total_iter_count, static_cast(10000)); + return boost::math::test::report_errors(); +} From 991700d1ba298cb3ebf24031ee7dbb7d1ff2e3be Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Sun, 21 Apr 2024 17:49:36 +0100 Subject: [PATCH 2/6] Correct usage. --- test/git_issue_1120.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/git_issue_1120.cpp b/test/git_issue_1120.cpp index 787a7a45b1..11a05182ba 100644 --- a/test/git_issue_1120.cpp +++ b/test/git_issue_1120.cpp @@ -31,8 +31,8 @@ int main() using scipy_policy = boost::math::policies::policy>; std::mt19937 gen; - std::uniform_real<> location(-3, 3); - std::uniform_real<> scale(0.001, 3); + std::uniform_real_distribution location(-3, 3); + std::uniform_real_distribution scale(0.001, 3); for (unsigned skew = 50; skew < 2000; skew += 43) { From a194dc580deccdbb8ae29ada4dee8f1c1ef36ba6 Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Mon, 22 Apr 2024 19:02:28 +0100 Subject: [PATCH 3/6] Clean up skew normal quantile with one Newton step. Improve test case. --- .../boost/math/distributions/skew_normal.hpp | 38 ++++++------------- test/git_issue_1120.cpp | 4 +- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/include/boost/math/distributions/skew_normal.hpp b/include/boost/math/distributions/skew_normal.hpp index 0ee88be8ca..7df81b6f5e 100644 --- a/include/boost/math/distributions/skew_normal.hpp +++ b/include/boost/math/distributions/skew_normal.hpp @@ -618,32 +618,6 @@ namespace boost{ namespace math{ return factor * y*y / (x*x); } - namespace detail - { - - template - struct skew_normal_quantile_functor - { - skew_normal_quantile_functor(const boost::math::skew_normal_distribution dist, RealType const& p) - : distribution(dist), prob(p) - { - } - - boost::math::tuple operator()(RealType const& x) - { - RealType c = cdf(distribution, x); - RealType fx = c - prob; // Difference cdf - value - to minimize. - RealType dx = pdf(distribution, x); // pdf is 1st derivative. - // return both function evaluation difference f(x) and 1st derivative f'(x). - return boost::math::make_tuple(fx, dx); - } - private: - const boost::math::skew_normal_distribution distribution; - RealType prob; - }; - - } // namespace detail - template inline RealType quantile(const skew_normal_distribution& dist, const RealType& p) { @@ -725,6 +699,18 @@ namespace boost{ namespace math{ result = (p_result.first + p_result.second) / 2; + // + // Try one last Newton step, just to close up the interval: + // + RealType step = fun(result) / pdf(dist, result); + + if (result - step <= p_result.first) + result = p_result.first; + else if (result - step >= p_result.second) + result = p_result.second; + else + result -= step; + if (max_iter >= policies::get_max_root_iterations()) { return policies::raise_evaluation_error(function, "Unable to locate solution in a reasonable time: either there is no answer to quantile" // LCOV_EXCL_LINE diff --git a/test/git_issue_1120.cpp b/test/git_issue_1120.cpp index 11a05182ba..7f1143c0c1 100644 --- a/test/git_issue_1120.cpp +++ b/test/git_issue_1120.cpp @@ -45,14 +45,14 @@ int main() total_iter_count += global_iter_count; CHECK_LE(global_iter_count, static_cast(55)); double p = cdf(dist, x); - CHECK_ULP_CLOSE(p, pn[i], 45000); + CHECK_ABSOLUTE_ERROR(p, pn[i], 5 * std::numeric_limits::epsilon()); global_iter_count = 0; x = quantile(complement(dist, pn[i])); total_iter_count += global_iter_count; CHECK_LE(global_iter_count, static_cast(55)); p = cdf(complement(dist, x)); - CHECK_ULP_CLOSE(p, pn[i], 45000); + CHECK_ABSOLUTE_ERROR(p, pn[i], 5 * std::numeric_limits::epsilon()); } } CHECK_LE(total_iter_count, static_cast(10000)); From 4f6963a4ed6ae6a177d909681b35d49153d4dad9 Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Tue, 23 Apr 2024 09:15:17 +0100 Subject: [PATCH 4/6] clang need a higher tolerance. --- test/git_issue_1120.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/git_issue_1120.cpp b/test/git_issue_1120.cpp index 7f1143c0c1..7b92bb4816 100644 --- a/test/git_issue_1120.cpp +++ b/test/git_issue_1120.cpp @@ -45,14 +45,14 @@ int main() total_iter_count += global_iter_count; CHECK_LE(global_iter_count, static_cast(55)); double p = cdf(dist, x); - CHECK_ABSOLUTE_ERROR(p, pn[i], 5 * std::numeric_limits::epsilon()); + CHECK_ABSOLUTE_ERROR(p, pn[i], 15 * std::numeric_limits::epsilon()); global_iter_count = 0; x = quantile(complement(dist, pn[i])); total_iter_count += global_iter_count; CHECK_LE(global_iter_count, static_cast(55)); p = cdf(complement(dist, x)); - CHECK_ABSOLUTE_ERROR(p, pn[i], 5 * std::numeric_limits::epsilon()); + CHECK_ABSOLUTE_ERROR(p, pn[i], 15 * std::numeric_limits::epsilon()); } } CHECK_LE(total_iter_count, static_cast(10000)); From c728d051c0290ac9692f151f470088b109997d68 Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Tue, 23 Apr 2024 11:24:08 +0100 Subject: [PATCH 5/6] Up clang error rates again. --- test/git_issue_1120.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/git_issue_1120.cpp b/test/git_issue_1120.cpp index 7b92bb4816..657e5eac11 100644 --- a/test/git_issue_1120.cpp +++ b/test/git_issue_1120.cpp @@ -45,14 +45,14 @@ int main() total_iter_count += global_iter_count; CHECK_LE(global_iter_count, static_cast(55)); double p = cdf(dist, x); - CHECK_ABSOLUTE_ERROR(p, pn[i], 15 * std::numeric_limits::epsilon()); + CHECK_ABSOLUTE_ERROR(p, pn[i], 45 * std::numeric_limits::epsilon()); global_iter_count = 0; x = quantile(complement(dist, pn[i])); total_iter_count += global_iter_count; CHECK_LE(global_iter_count, static_cast(55)); p = cdf(complement(dist, x)); - CHECK_ABSOLUTE_ERROR(p, pn[i], 15 * std::numeric_limits::epsilon()); + CHECK_ABSOLUTE_ERROR(p, pn[i], 45 * std::numeric_limits::epsilon()); } } CHECK_LE(total_iter_count, static_cast(10000)); From e2875171805e7111f580d23089aeca5b60050b6c Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Tue, 23 Apr 2024 12:56:25 +0100 Subject: [PATCH 6/6] Update max iterations. --- test/git_issue_1120.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/git_issue_1120.cpp b/test/git_issue_1120.cpp index 657e5eac11..3a74d9c218 100644 --- a/test/git_issue_1120.cpp +++ b/test/git_issue_1120.cpp @@ -43,14 +43,14 @@ int main() global_iter_count = 0; double x = quantile(dist, pn[i]); total_iter_count += global_iter_count; - CHECK_LE(global_iter_count, static_cast(55)); + CHECK_LE(global_iter_count, static_cast(60)); double p = cdf(dist, x); CHECK_ABSOLUTE_ERROR(p, pn[i], 45 * std::numeric_limits::epsilon()); global_iter_count = 0; x = quantile(complement(dist, pn[i])); total_iter_count += global_iter_count; - CHECK_LE(global_iter_count, static_cast(55)); + CHECK_LE(global_iter_count, static_cast(60)); p = cdf(complement(dist, x)); CHECK_ABSOLUTE_ERROR(p, pn[i], 45 * std::numeric_limits::epsilon()); }