diff --git a/.editorconfig b/.editorconfig index 89ddac5b..6882897b 100644 --- a/.editorconfig +++ b/.editorconfig @@ -9,7 +9,6 @@ indent_style = space indent_size = 4 insert_final_newline = true trim_trailing_whitespace = true -max_line_length = 79 # 2 space indentation [{*.bib, *.json,*.jsonc,*.yaml,*.yml}] diff --git a/.vscode/extensions.json b/.vscode/extensions.json index b2c2205d..12b89215 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -5,7 +5,8 @@ "davidanson.vscode-markdownlint", "detachhead.basedpyright", "editorconfig.editorconfig", - "ms-python.python" + "ms-python.python", + "notzaki.pandocciter" ], "unwantedRecommendations": [ "ms-pyright.pyright", diff --git a/.vscode/settings.json b/.vscode/settings.json index 0967ef42..b757e6cc 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1 +1,19 @@ -{} +{ + "[python]": { + "editor.defaultFormatter": "charliermarsh.ruff", + "editor.rulers": [79, 130] + }, + "[bibtex]": { + "editor.rulers": [] + }, + "[markdown]": { + "editor.rulers": [80] + }, + "[markdown_latex_combined]": { + "editor.rulers": [80] + }, + "markdown.extension.completion.root": "docs", + "PandocCiter.DefaultBib": "docs/refs.bib", + "PandocCiter.DefaultBibs": ["docs"], + "python.testing.pytestEnabled": true +} diff --git a/docs/api/distributions.md b/docs/api/distributions.md index ed521a41..f55f64f7 100644 --- a/docs/api/distributions.md +++ b/docs/api/distributions.md @@ -1,20 +1,36 @@ # Probability Distributions - - -::: lmo.distributions - options: - heading_level: 2 - ## `scipy.stats` integration -::: lmo.contrib.scipy_stats +::: lmo.contrib.scipy_stats.l_rv_generic + options: + show_bases: false + show_root_full_path: false + show_root_members_full_path: false + +## Parametric + +::: lmo.distributions.kumaraswamy + options: + show_symbol_type_heading: false + show_labels: false +::: lmo.distributions.wakeby + options: + show_symbol_type_heading: false + show_labels: false +::: lmo.distributions.genlambda + options: + show_symbol_type_heading: false + show_labels: false + +## Nonparametric + +::: lmo.distributions.l_poly options: - heading_level: 3 - show_root_heading: false - show_root_toc_entry: false - members: - - l_rv_generic + group_by_category: false + show_labels: false + filters: + - "!^(a|b|random_state)$" diff --git a/docs/distributions.md b/docs/distributions.md index b192fb69..b883dadd 100644 --- a/docs/distributions.md +++ b/docs/distributions.md @@ -1,5 +1,10 @@ +--- +hide: + - navigation +--- + -# L-moments of common probability distributions +# Exact L-moments of probability distributions This page lists theoretical L-moments [@hosking1990] of popular probability distributions. @@ -33,14 +38,14 @@ Numerical calculation of these L-statistics using [`scipy.stats`][scipy.stats] distributions, refer to [`rv_continuous.l_stats`][lmo.contrib.scipy_stats.l_rv_generic.l_stats]. -For direct calculation of the L-stats from a CDF or PPF, see -[`l_stats_from_cdf`][lmo.theoretical.l_stats_from_cdf] or -[`l_stats_from_ppf`][lmo.theoretical.l_stats_from_ppf], respectively. +For direct calculation of the L-stats from a CDF or PPF function, see + +- [`lmo.theoretical.l_stats_from_cdf`][lmo.theoretical.l_stats_from_cdf] +- [`lmo.theoretical.l_stats_from_ppf`][lmo.theoretical.l_stats_from_ppf] /// -An overview of the L-location, L-scale, L-skewness and L-kurtosis, -of a bunch of popular univariate probability distributions, for which they -exist (in closed form). +An overview of the exact L-location, L-scale, L-skewness and L-kurtosis +of some well-known (univariate) probability distributions. ### L-stats @@ -782,8 +787,8 @@ compactly expressed as & \text{if } \alpha = 0 \wedge r = 1 \\ \displaystyle \frac{1}{\alpha r} \left[ \frac - {\B(t + 1 - \alpha,\ r - 1 + \alpha)} - {\B(r + s + t - 1 - \alpha,\ \alpha)} + {\B(r + \alpha - 1,\ t - \alpha + 1)} + {\B(\alpha,\ r + s + t - \alpha + 1)} - \ffact{1}{r} \right] & \text{otherwise,} @@ -824,7 +829,7 @@ There are several notable special cases of the GPD: ### Burr III / Dagum -The *Burr III* distribution [^BURR], also known as the +The *Burr III* distribution [@burr1942], also known as the [*Dagum distribution*](https://wikipedia.org/wiki/Dagum_distribution), has two shape parameters \( \alpha \) and \( \beta \), both restricted to the positive reals @@ -964,7 +969,7 @@ Its general \( r \)-th trimmed L-moment are: \tlmoment{s,t}{r} = \frac{1}{r} \sum_{k = t + 1}^{r + s + t} - (-1)^{k - 1} + (-1)^{k - t - 1} \binom{r + k - 2}{r + t - 1} \binom{r + s + t}{k} \frac{\B\bigl(1 / \alpha,\ 1 + k \beta \bigr)}{\alpha} diff --git a/docs/examples/lmm.ipynb b/docs/examples/lmm.ipynb index eed2aa5c..04aa8bcc 100644 --- a/docs/examples/lmm.ipynb +++ b/docs/examples/lmm.ipynb @@ -1,12 +1,5 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Fitting the Generalized Extreme Value Distribution " - ] - }, { "cell_type": "code", "execution_count": 1, diff --git a/docs/examples/visual_intro.ipynb b/docs/examples/visual_intro.ipynb index 28ba97ed..1916292c 100644 --- a/docs/examples/visual_intro.ipynb +++ b/docs/examples/visual_intro.ipynb @@ -4,14 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# A visual introduction to L-moments\n" + "## Required dependencies\n", + "\n", + "This notebook requires that you have \n", + "[`matplotlib`](https://pypi.org/project/matplotlib/) installed,\n", + "and, of course, [Lmo](https://pypi.org/project/lmo/) (preferrably the latest\n", + "stable release)." ] }, { diff --git a/docs/styles/theme.css b/docs/styles/theme.css index cbcc2e2c..ed46a035 100644 --- a/docs/styles/theme.css +++ b/docs/styles/theme.css @@ -45,11 +45,15 @@ tr.row-double-top > td { border-top: 0.15rem double var(--md-typeset-table-color) !important; } +.md-grid { + max-width: 1440px; +} + /* Workarounds */ -td, +/* td, th { white-space: nowrap; -} +} */ .md-typeset__scrollwrap > .md-typeset__table > table, .md-typeset div.arithmatex { overflow-y: hidden; diff --git a/lmo/_lm.py b/lmo/_lm.py index 3b2c1a1e..439bf24e 100644 --- a/lmo/_lm.py +++ b/lmo/_lm.py @@ -329,7 +329,7 @@ def l_moment( - $(0, 0)$ or $(0)$: The original **L**-moment, introduced by Hosking in 1990. - $(t, t)$ or $(t)$: **TL**-moment (**T**rimmed L-moment) - $\\lambda_r^t$, with symmetric trimming. First introduced by + $\lambda_r^{(t)}$, with symmetric trimming. First introduced by Elamir & Seheult in 2003, and refined by Hosking in 2007. Generally more robust than L-moments. Useful for fitting pathological distributions, such as the Cauchy distribution. diff --git a/lmo/contrib/scipy_stats.py b/lmo/contrib/scipy_stats.py index afbca912..cec0c714 100644 --- a/lmo/contrib/scipy_stats.py +++ b/lmo/contrib/scipy_stats.py @@ -5,12 +5,12 @@ """ from __future__ import annotations +import contextlib from collections.abc import Callable, Mapping, Sequence from typing import ( Any, ClassVar, Literal, - NamedTuple, Protocol, TypeAlias, TypeVar, @@ -38,6 +38,7 @@ moments_to_ratio, round0, ) +from lmo.distributions._lm import get_lm_func, has_lm_func from lmo.theoretical import ( l_moment_cov_from_cdf, l_moment_from_cdf, @@ -110,7 +111,7 @@ class l_rv_generic(PatchClass): a: float | None b: float | None badvalue: float | None - name: int + name: str numargs: int random_state: np.random.RandomState | np.random.Generator shapes: str @@ -155,7 +156,7 @@ def ppf(q: float, /) -> float: def _l_moment( self, - r: npt.NDArray[np.int64], + r: npt.NDArray[np.intp], *args: Any, trim: _Tuple2[int] | _Tuple2[float] = (0, 0), quad_opts: lspt.QuadOptions | None = None, @@ -165,40 +166,37 @@ def _l_moment( `loc=0` and `scale=1`). Todo: - - Sparse caching; key as `(self.name, args, r, trim)`, using a + - Sparse caching; key as `(self, args, r, trim)`, using a priority queue. Prefer small `r` and `sum(trim)`, skip fractional trim. - - Dispatch mechanism for providing known theoretical L-moments - of specific distributions, `r` and `trim`. - """ + name = self.name + if quad_opts is None and has_lm_func(name): + with contextlib.suppress(NotImplementedError): + return get_lm_func(name)(r, trim[0], trim[1], *args) + cdf, ppf = self._get_xxf(*args) - lmbda_r = l_moment_from_cdf( - cdf, - r, - trim=trim, - support=self._get_support(*args), - ppf=ppf, - quad_opts=quad_opts, - ) + + # TODO: use ppf when appropriate (e.g. genextreme, tukeylambda, kappa4) + with np.errstate(over='ignore', under='ignore'): + lmbda_r = l_moment_from_cdf( + cdf, + r, + trim=trim, + support=self._get_support(*args), + ppf=ppf, + quad_opts=quad_opts, + ) # re-wrap scalars in 0-d arrays (lmo.theoretical unpacks them) return np.asarray(lmbda_r) - def _logqdf( - self, - u: _ArrF8, - *args: Any, - ) -> _ArrF8: + @np.errstate(divide='ignore') + def _logqdf(self, u: _ArrF8, *args: Any) -> _ArrF8: """Overridable log quantile distribution function (QDF).""" - with np.errstate(divide='ignore'): - return -self._logpxf(self._ppf(u, *args), *args) + return -self._logpxf(self._ppf(u, *args), *args) - def _qdf( - self, - u: _ArrF8, - *args: Any, - ) -> _ArrF8: + def _qdf(self, u: _ArrF8, *args: Any) -> _ArrF8: r""" Overridable quantile distribution function (QDF). @@ -520,12 +518,7 @@ def l_stats( **kwds, ) - def l_loc( - self, - *args: Any, - trim: lmt.AnyTrim = 0, - **kwds: Any, - ) -> float: + def l_loc(self, *args: Any, trim: lmt.AnyTrim = 0, **kwds: Any) -> float: """ L-location of the distribution, i.e. the 1st L-moment. @@ -536,12 +529,7 @@ def l_loc( return float(self.l_moment(1, *args, trim=trim, **kwds)) - def l_scale( - self, - *args: Any, - trim: lmt.AnyTrim = 0, - **kwds: Any, - ) -> float: + def l_scale(self, *args: Any, trim: lmt.AnyTrim = 0, **kwds: Any) -> float: """ L-scale of the distribution, i.e. the 2nd L-moment. @@ -549,31 +537,21 @@ def l_scale( """ return float(self.l_moment(2, *args, trim=trim, **kwds)) - def l_skew( - self, - *args: Any, - trim: lmt.AnyTrim = 0, - **kwds: Any, - ) -> float: + def l_skew(self, *args: Any, trim: lmt.AnyTrim = 0, **kwds: Any) -> float: """L-skewness coefficient of the distribution; the 3rd L-moment ratio. Alias for `X.l_ratio(3, 2, ...)`. """ return float(self.l_ratio(3, 2, *args, trim=trim, **kwds)) - def l_kurtosis( - self, - *args: Any, - trim: lmt.AnyTrim = 0, - **kwds: Any, - ) -> float: + def l_kurt(self, *args: Any, trim: lmt.AnyTrim = 0, **kwds: Any) -> float: """L-kurtosis coefficient of the distribution; the 4th L-moment ratio. Alias for `X.l_ratio(4, 2, ...)`. """ return float(self.l_ratio(4, 2, *args, trim=trim, **kwds)) - l_kurt = l_kurtosis + l_kurtosis = l_kurt def l_moments_cov( self, @@ -1139,18 +1117,18 @@ def l_fit( (Method of L-moment): >>> norm.l_fit(x, random_state=rng) - FitArgs(loc=0.0033145, scale=0.96179) + (0.0033145, 0.96179) >>> norm.l_fit(x, trim=1, random_state=rng) - FitArgs(loc=0.019765, scale=0.96749) + (0.0196385, 0.96861) To use more L-moments than the number of parameters, two in this case, `n_extra` can be used. This will use the L-GMM (Generalized Method of L-Moments), which results in slightly better estimates: >>> norm.l_fit(x, n_extra=1, random_state=rng) - FitArgs(loc=0.0039747, scale=0.96233) + (0.0039747, .96233) >>> norm.l_fit(x, trim=1, n_extra=1, random_state=rng) - FitArgs(loc=-0.00127874, scale=0.968547) + (-0.00127874, 0.968547) Parameters: data: @@ -1225,29 +1203,33 @@ def l_fit( scipy_fit(self, data, bounds=bounds, guess=args or None), ).params - _lmo_cache = {} + x = np.asarray(data) + r = np.arange(1, len(args0) + n_extra + 1) + + _lmo_cache: dict[tuple[float, ...], _ArrF8] = {} _lmo_fn = self._l_moment # temporary cache to speed up L-moment calculations with the same # shape args def lmo_fn( - r: npt.NDArray[np.int64], + r: npt.NDArray[np.intp], *args: float, trim: tuple[int, int] | tuple[float, float] = (0, 0), ) -> _ArrF8: shapes, loc, scale = args[:-2], args[-2], args[-1] - # r and trim will be the same within inference.fit; safe to ignore + # r and trim are constants, so it's safe to ignore them here if shapes in _lmo_cache: - lmbda_r = np.asarray(_lmo_cache[shapes], np.float64) + lmbda_r = _lmo_cache[shapes] else: lmbda_r = _lmo_fn(r, *shapes, trim=trim) - _lmo_cache[shapes] = tuple(lmbda_r) + lmbda_r.setflags(write=False) # prevent cache corruption + _lmo_cache[shapes] = lmbda_r - if scale != 1: - lmbda_r[r >= 1] *= scale + # ensure we're working with a copy + lmbda_r = lmbda_r * scale # noqa: PLR6104 if loc != 0: - lmbda_r[r == 1] += loc + lmbda_r[0] += loc return lmbda_r kwargs0: dict[str, Any] = { @@ -1259,14 +1241,11 @@ def lmo_fn( # => weight matrix is constant between steps, use 1 step by default kwargs0['k'] = 1 - x = np.asarray(data) - r = np.arange(1, len(args0) + n_extra + 1) - result = inference.fit( ppf=self.ppf, args0=args0, n_obs=x.size, - l_moments=l_moment_est(x, r, trim=trim, sort='quicksort'), + l_moments=l_moment_est(x, r, trim=trim), r=r, trim=trim, l_moment_fn=lmo_fn, @@ -1275,12 +1254,7 @@ def lmo_fn( if full_output: return result - params_and_types = [ - (param.name, int if param.integrality else float) - for param in self._param_info() - ] - FitArgs = NamedTuple('FitArgs', params_and_types) - return FitArgs(*result.args) + return tuple(map(float, result.args)) def l_fit_loc_scale( self, @@ -1342,7 +1316,6 @@ def l_moment( trim: lmt.AnyTrim = ..., quad_opts: lspt.QuadOptions | None = ..., ) -> _ArrF8: ... - @overload def l_moment( self, @@ -1351,7 +1324,6 @@ def l_moment( trim: lmt.AnyTrim = ..., quad_opts: lspt.QuadOptions | None = ..., ) -> np.float64: ... - def l_moment( # noqa: D102 self, order: lmt.AnyOrder | lmt.AnyOrderND, @@ -1376,7 +1348,6 @@ def l_ratio( trim: lmt.AnyTrim = ..., quad_opts: lspt.QuadOptions | None = ..., ) -> _ArrF8: ... - @overload def l_ratio( self, @@ -1386,7 +1357,6 @@ def l_ratio( trim: lmt.AnyTrim = ..., quad_opts: lspt.QuadOptions | None = ..., ) -> np.float64: ... - def l_ratio( # noqa: D102 self, order: lmt.AnyOrder | lmt.AnyOrderND, diff --git a/lmo/distributions/__init__.py b/lmo/distributions/__init__.py index 77e85506..1f807dc7 100644 --- a/lmo/distributions/__init__.py +++ b/lmo/distributions/__init__.py @@ -6,69 +6,79 @@ from __future__ import annotations -from typing import Final, cast +from typing import TYPE_CHECKING, Final, cast import lmo.typing.scipy as lspt +from . import _lm from ._genlambda import genlambda_gen from ._kumaraswamy import kumaraswamy_gen +from ._lm import * # noqa: F403 from ._nonparametric import l_poly from ._wakeby import wakeby_gen -__all__ = 'l_poly', 'kumaraswamy', 'wakeby', 'genlambda' +__all__ = ['l_poly', 'kumaraswamy', 'wakeby', 'genlambda'] +__all__ += _lm.__all__ -wakeby: Final = cast( - lspt.RVContinuous, - wakeby_gen(a=0.0, name='wakeby'), -) -r"""A Wakeby random variable, a generalization of -[`scipy.stats.genpareto`][scipy.stats.genpareto]. +# mkdocstring workaround +if not TYPE_CHECKING: + wakeby = wakeby_gen(name='wakeby', a=0.0) + r"""A Wakeby random variable, a generalization of + [`scipy.stats.genpareto`][scipy.stats.genpareto]. -[`wakeby`][wakeby] takes \( b \), \( d \) and \( f \) as shape parameters. - -For a detailed description of the Wakeby distribution, refer to -[Distributions - Wakeby](../distributions.md#wakeby). -""" + [`wakeby`][wakeby] takes \( b \), \( d \) and \( f \) as shape parameters. + For a detailed description of the Wakeby distribution, refer to + [Distributions - Wakeby](../distributions.md#wakeby). + """ +else: + wakeby: Final = cast(lspt.RVContinuous, wakeby_gen(a=0.0, name='wakeby')) -kumaraswamy: Final = cast( - lspt.RVContinuous, - kumaraswamy_gen(a=0.0, b=1.0, name='kumaraswamy'), -) -r""" -A Kumaraswamy random variable, similar to -[`scipy.stats.beta`][scipy.stats.beta]. +# mkdocstring workaround +if not TYPE_CHECKING: + kumaraswamy = kumaraswamy_gen(name='kumaraswamy', a=0.0, b=1.0) + r""" + A Kumaraswamy random variable, similar to + [`scipy.stats.beta`][scipy.stats.beta]. -The probability density function for -[`kumaraswamy`][lmo.distributions.kumaraswamy] is: + The probability density function for + [`kumaraswamy`][lmo.distributions.kumaraswamy] is: -\[ - f(x, a, b) = a x^{a - 1} b \left(1 - x^a\right)^{b - 1} -\] + \[ + f(x, a, b) = a x^{a - 1} b \left(1 - x^a\right)^{b - 1} + \] -for \( 0 < x < 1,\ a > 0,\ b > 0 \). + for \( 0 < x < 1,\ a > 0,\ b > 0 \). -[`kumaraswamy`][kumaraswamy] takes \( a \) and \( b \) as shape parameters. + [`kumaraswamy`][kumaraswamy] takes \( a \) and \( b \) as shape parameters. -See Also: - - [Theoretical L-moments - Kumaraswamy](../distributions.md#kumaraswamy) -""" + See Also: + - [Theoretical L-moments: Kumaraswamy](../distributions.md#kumaraswamy) + """ +else: + kumaraswamy: Final = cast( + lspt.RVContinuous, + kumaraswamy_gen(a=0.0, b=1.0, name='kumaraswamy'), + ) +# mkdocstring workaround +if not TYPE_CHECKING: + genlambda = genlambda_gen(name='genlambda') + r"""A generalized Tukey-Lambda random variable. -genlambda: Final = cast( - lspt.RVContinuous, - genlambda_gen(name='genlambda'), -) -r"""A generalized Tukey-Lambda random variable. + `genlambda` takes `b`, `d` and `f` as shape parameters. + `b` and `d` can be any float, and `f` requires `-1 <= f <= 1`. -`genlambda` takes `b`, `d` and `f` as shape parameters. -`b` and `d` can be any float, and `f` requires `-1 <= f <= 1`. + If `f == 0` and `b == d`, `genlambda` is equivalent to + [`scipy.stats.tukeylambda`][scipy.stats.tukeylambda], with `b` (or `d`) as + shape parameter. -If `f == 0` and `b == d`, `genlambda` is equivalent to -[`scipy.stats.tukeylambda`][scipy.stats.tukeylambda], with `b` (or `d`) as -shape parameter. - -For a detailed description of the GLD, refer to -[Distributions - GLD](../distributions.md#gld). -""" + For a detailed description of the GLD, refer to + [Distributions - GLD](../distributions.md#gld). + """ +else: + genlambda: Final = cast( + lspt.RVContinuous, + genlambda_gen(name='genlambda'), + ) diff --git a/lmo/distributions/_genlambda.py b/lmo/distributions/_genlambda.py index 5a5d1140..bcd3717a 100644 --- a/lmo/distributions/_genlambda.py +++ b/lmo/distributions/_genlambda.py @@ -7,19 +7,20 @@ import functools import math from collections.abc import Callable, Sequence -from typing import TypeAlias, TypeVar, cast +from typing import Final, TypeAlias, TypeVar, cast import numpy as np import numpy.typing as npt -import scipy.special as sc +import scipy.special as sps from scipy.stats._distn_infrastructure import ( _ShapeInfo, # noqa: PLC2701 # pyright: ignore[reportPrivateUsage] ) -from scipy.stats.distributions import rv_continuous as _rv_continuous +from scipy.stats.distributions import rv_continuous import lmo.typing.scipy as lspt from lmo.special import harmonic from lmo.theoretical import entropy_from_qdf, l_moment_from_ppf +from ._lm import get_lm_func __all__ = ('genlambda_gen',) @@ -45,16 +46,16 @@ def _genlambda_ppf0(q: float, b: float, d: float, f: float) -> float: return _genlambda_support(b, d, f)[1] u = math.log(q) if b == 0 else (q**b - 1) / b - v = math.log(1 - q) if d == 0 else ((1 - q)**d - 1) / d + v = math.log(1 - q) if d == 0 else ((1 - q) ** d - 1) / d return (1 + f) * u - (1 - f) * v -_genlambda_ppf = np.vectorize(_genlambda_ppf0, [float]) +_genlambda_ppf: Final = np.vectorize(_genlambda_ppf0, [float]) @np.errstate(divide='ignore') def _genlambda_qdf(q: _T_f8, b: float, d: float, f: float) -> _T_f8: - return cast(_T_f8, (1 + f) * q**(b - 1) + (1 - f) * (1 - q)**(d - 1)) + return cast(_T_f8, (1 + f) * q ** (b - 1) + (1 - f) * (1 - q) ** (d - 1)) def _genlambda_cdf0( # noqa: C901 @@ -85,13 +86,17 @@ def _genlambda_cdf0( # noqa: C901 # special cases if abs(f + 1) < ptol: - return 1 - math.exp(-x / 2) if d == 0 else 1 - (1 - d * x / 2)**(1 / d) + if d == 0: + return 1 - math.exp(-x / 2) + + return 1 - (1 - d * x / 2) ** (1 / d) if abs(f - 1) < ptol: - return math.exp(x / 2) if b == 0 else (1 + b * x / 2)**(1 / b) + return math.exp(x / 2) if b == 0 else (1 + b * x / 2) ** (1 / b) if abs(f) < ptol and abs(b) < ptol and abs(d) < ptol: # logistic if x >= 0: return 1 / (1 + math.exp(-x)) + return math.exp(x) / (1 + math.exp(x)) if abs(b - 1) < ptol and abs(d - 1) < ptol: # uniform on [-1 - f, 1 - f] @@ -115,49 +120,15 @@ def _genlambda_cdf0( # noqa: C901 return p_mid -_genlambda_cdf = np.vectorize( +_genlambda_cdf: Final = np.vectorize( _genlambda_cdf0, [float], excluded={'ptol', 'xtol', 'maxiter'}, ) +_genlambda_lm: Final = get_lm_func('genlambda') -def _genlambda_lmo0( - r: int, - s: float, - t: float, - b: float, - d: float, - f: float, -) -> float: - if r == 0: - return 1 - - if b <= -1 - s and d <= -1 - t: - return math.nan - - def _lmo0_partial(trim: float, theta: float) -> float | np.float64: - if r == 1 and theta == 0: - return harmonic(trim) - harmonic(s + t + 1) - - return ( - (-1)**r * - sc.poch(r + trim, s + t - trim + 1) - * sc.poch(1 - theta, r - 2) - / sc.poch(1 + theta + trim, r + s + t - trim) - - (1 / theta if r == 1 else 0) - ) / r - - return ( - (1 + f) * _lmo0_partial(s, b) - + (-1)**r * (1 - f) * _lmo0_partial(t, d) - ) - - -_genlambda_lmo = np.vectorize(_genlambda_lmo0, [float], excluded={1, 2}) - - -class genlambda_gen(cast(type[lspt.AnyRV], _rv_continuous)): +class genlambda_gen(cast(type[lspt.AnyRV], rv_continuous)): def _argcheck(self, b: float, d: float, f: float) -> int: return np.isfinite(b) & np.isfinite(d) & (f >= -1) & (f <= 1) @@ -177,57 +148,33 @@ def _get_support( def _fitstart( self, - data: npt.NDArray[np.float64], + data: _ArrF8, args: tuple[float, float, float] | None = None, ) -> tuple[float, float, float, float, float]: # Arbitrary, but the default f=1 is a bad start return cast( tuple[float, float, float, float, float], - super()._fitstart(data, args or (1., 1., 0.)), # pyright: ignore[reportUnknownMemberType,reportAttributeAccessIssue] + super()._fitstart(data, args or (1.0, 1.0, 0.0)), # pyright: ignore[reportUnknownMemberType,reportAttributeAccessIssue] ) - def _pdf( - self, - x: npt.NDArray[np.float64], - b: float, - d: float, - f: float, - ) -> npt.NDArray[np.float64]: + def _pdf(self, x: _ArrF8, b: float, d: float, f: float) -> _ArrF8: return 1 / self._qdf(self._cdf(x, b, d, f), b, d, f) - def _cdf( - self, - x: npt.NDArray[np.float64], - b: float, - d: float, - f: float, - ) -> npt.NDArray[np.float64]: + def _cdf(self, x: _ArrF8, b: float, d: float, f: float) -> _ArrF8: return _genlambda_cdf(x, b, d, f) - def _qdf( - self, - u: npt.NDArray[np.float64], - b: float, - d: float, - f: float, - ) -> npt.NDArray[np.float64]: - return _genlambda_qdf(u, b, d, f) + def _qdf(self, q: _ArrF8, b: float, d: float, f: float) -> _ArrF8: + return _genlambda_qdf(q, b, d, f) - def _ppf( + def _ppf(self, q: _ArrF8, b: float, d: float, f: float) -> _ArrF8: + return _genlambda_ppf(q, b, d, f) + + def _stats( self, - u: npt.NDArray[np.float64], b: float, d: float, f: float, - ) -> npt.NDArray[np.float64]: - return _genlambda_ppf(u, b, d, f) - - def _stats(self, b: float, d: float, f: float) -> tuple[ - float, - float, - float | None, - float | None, - ]: + ) -> tuple[float, float, float | None, float | None]: if b <= -1 or d <= -1: # hard NaN (not inf); indeterminate integral return math.nan, math.nan, math.nan, math.nan @@ -235,7 +182,10 @@ def _stats(self, b: float, d: float, f: float) -> tuple[ a, c = 1 + f, 1 - f b1, d1 = 1 + b, 1 + d - m1 = 0 if b == d and f == 0 else _genlambda_lmo0(1, 0, 0, b, d, f) + if b == d and f == 0: + m1 = 0 + else: + m1 = cast(float, _genlambda_lm(1, 0, 0, b, d, f)) if b <= -1 / 2 or d <= -1 / 2: return m1, math.nan, math.nan, math.nan @@ -245,22 +195,20 @@ def _stats(self, b: float, d: float, f: float) -> tuple[ elif b == 0: m2 = ( a**2 - + (c / d1)**2 / (d1 + d) + + (c / d1) ** 2 / (d1 + d) + 2 * a * c / (d * d1) * (1 - harmonic(1 + d)) ) elif d == 0: m2 = ( c**2 - + (a / b1)**2 / (b1 + b) + + (a / b1) ** 2 / (b1 + b) + 2 * a * c / (b * b1) * (1 - harmonic(1 + b)) ) else: m2 = ( - (a / b1)**2 / (b1 + b) - + (c / d1)**2 / (d1 + d) - + 2 * a * c / (b * d) * ( - 1 / (b1 * d1) - sc.beta(b1, d1) - ) + (a / b1) ** 2 / (b1 + b) + + (c / d1) ** 2 / (d1 + d) + + 2 * a * c / (b * d) * (1 / (b1 * d1) - sps.beta(b1, d1)) ) # Feeling adventurous? You're welcome to contribute these missing @@ -280,10 +228,11 @@ def _entropy(self, b: float, d: float, f: float) -> float: def _l_moment( self, - r: npt.NDArray[np.int64], + r: npt.NDArray[np.intp], b: float, d: float, f: float, + *, trim: tuple[int, int] | tuple[float, float], quad_opts: lspt.QuadOptions | None = None, ) -> _ArrF8: @@ -292,7 +241,7 @@ def _l_moment( if quad_opts is not None: # only do numerical integration when quad_opts is passed lmbda_r = cast( - float | npt.NDArray[np.float64], + float | _ArrF8, l_moment_from_ppf( cast( Callable[[float], float], @@ -305,6 +254,4 @@ def _l_moment( ) return np.asarray(lmbda_r) - return np.atleast_1d( - cast(_ArrF8, _genlambda_lmo(r, s, t, b, d, f)), - ) + return _genlambda_lm(r, s, t, b, d, f) diff --git a/lmo/distributions/_kumaraswamy.py b/lmo/distributions/_kumaraswamy.py index 575a2931..59cf5daa 100644 --- a/lmo/distributions/_kumaraswamy.py +++ b/lmo/distributions/_kumaraswamy.py @@ -1,5 +1,6 @@ from __future__ import annotations +import functools from typing import TYPE_CHECKING, TypeAlias, cast, final, overload import numpy as np @@ -14,6 +15,7 @@ import lmo.typing.scipy as lspt from lmo.special import harmonic from lmo.theoretical import l_moment_from_ppf +from ._lm import get_lm_func if TYPE_CHECKING: @@ -25,31 +27,7 @@ _ArrF8: TypeAlias = onpt.Array[tuple[int, ...], np.float64] - -def _kumaraswamy_lmo0( - r: int, - s: int, - t: int, - a: float, - b: float, -) -> np.float64: - if r == 0: - return np.float64(1) - - k = np.arange(t + 1, r + s + t + 1) - return ( - np.sum( - (-1) ** (k - 1) - * cast(_ArrF8, sc.comb(r + k - 2, r + t - 1)) # pyright: ignore[reportUnknownMemberType] - * cast(_ArrF8, sc.comb(r + s + t, k)) # pyright: ignore[reportUnknownMemberType] - * cast(_ArrF8, sc.beta(1 / a, 1 + k * b)) - / a, - ) - / r - ) - - -_kumaraswamy_lmo = np.vectorize(_kumaraswamy_lmo0, [float], excluded={1, 2}) +_lm_kumaraswamy = get_lm_func('kumaraswamy') @final @@ -80,24 +58,24 @@ def _sf(self, x: _ArrF8, a: float, b: float) -> _ArrF8: def _isf(self, q: _ArrF8, a: float, b: float) -> _ArrF8: return (1 - q ** (1 / b)) ** (1 / a) - def _qdf(self, u: _ArrF8, a: float, b: float) -> _ArrF8: + def _qdf(self, q: _ArrF8, a: float, b: float) -> _ArrF8: return ( - (1 - u) ** (1 / (b - 1)) - * (1 - (1 - u) ** (1 / b)) ** (1 / (a - 1)) + (1 - q) ** (1 / (b - 1)) + * (1 - (1 - q) ** (1 / b)) ** (1 / (a - 1)) / (a * b) ) @overload - def _ppf(self, u: float, a: float, b: float) -> np.float64: ... + def _ppf(self, q: _ArrF8, a: float, b: float) -> _ArrF8: ... @overload - def _ppf(self, u: _ArrF8, a: float, b: float) -> _ArrF8: ... + def _ppf(self, q: float, a: float, b: float) -> np.float64: ... def _ppf( self, - u: float | _ArrF8, + q: float | _ArrF8, a: float, b: float, ) -> np.float64 | _ArrF8: - return (1 - (1 - u) ** (1 / b)) ** (1 / a) + return (1 - (1 - q) ** (1 / b)) ** (1 / a) def _entropy(self, a: float, b: float) -> np.float64: # https://wikipedia.org/wiki/Kumaraswamy_distribution @@ -108,20 +86,22 @@ def _munp(self, n: int, a: float, b: float) -> float: def _l_moment( self, - r: npt.NDArray[np.int64], + r: npt.NDArray[np.intp], a: float, b: float, + *, trim: tuple[int, int] | tuple[float, float], quad_opts: lspt.QuadOptions | None = None, ) -> _ArrF8: s, t = trim - if quad_opts is not None or isinstance(s, float): - - def _ppf(u: float, /) -> np.float64: - return self._ppf(u, a, b) - - out = l_moment_from_ppf(_ppf, r, trim=trim, quad_opts=quad_opts) + if quad_opts is not None or isinstance(trim[0], float): + out = l_moment_from_ppf( + functools.partial(self._ppf, a=a, b=b), + r, + trim=trim, + quad_opts=quad_opts, + ) else: - out = cast(_ArrF8, _kumaraswamy_lmo(r, s, t, a, b)) + out = _lm_kumaraswamy(r, s, t, a, b) return np.atleast_1d(out) diff --git a/lmo/distributions/_lm.py b/lmo/distributions/_lm.py new file mode 100644 index 00000000..3b4feb1b --- /dev/null +++ b/lmo/distributions/_lm.py @@ -0,0 +1,506 @@ +# ruff: noqa: C901 + +""" +Known theoretical L-moments for specific distributions. +The names that are used here, match those in +[`scipy.stats.distributions`][scipy.stats.distributions]. +""" + +from __future__ import annotations + +import sys +from collections.abc import Callable +from math import gamma, log +from typing import ( + TYPE_CHECKING, + Concatenate, + Final, + Literal, + ParamSpec, + Protocol, + TypeAlias, + TypeVar, + cast, + overload, +) + +import numpy as np +import numpy.typing as npt +import optype.numpy as onpt +import scipy.special as sps + +from lmo.special import harmonic +from lmo.theoretical import l_moment_from_ppf + + +if sys.version_info >= (3, 13): + from typing import TypeIs +else: + from typing_extensions import TypeIs + +if TYPE_CHECKING: + import lmo.typing.np as lnpt + + +__all__ = [ + 'lm_uniform', + 'lm_logistic', + 'lm_expon', + 'lm_gumbel_r', + 'lm_genextreme', + 'lm_genpareto', + 'lm_kumaraswamy', + 'lm_wakeby', + 'lm_genlambda', +] + +DistributionName: TypeAlias = Literal[ + # 0 params + 'uniform', + 'logistic', + 'expon', + 'gumbel_r', + # 1 param + 'genextreme', + 'genpareto', + # 2 params + 'kumaraswamy', + # 3 params + 'wakeby', + 'genlambda', +] +_LM_REGISTRY: Final[dict[DistributionName, _LmVFunc[...]]] = {} + +_ArrF8: TypeAlias = onpt.Array[tuple[int, ...], np.float64] + +_Tss = ParamSpec('_Tss') +# (r, s, t, *params) -> float +_LmFunc: TypeAlias = Callable[ + Concatenate[int, float, float, _Tss], + float | np.float64, +] + +_ShapeT = TypeVar('_ShapeT', bound=tuple[int, ...]) + + +_LN2: Final = np.log(2) +_LN3: Final = np.log(3) +_LN5: Final = np.log(5) + + +# workaround for partial type annotations (i.e. missing and un-inferrable) +_binom = cast( + Callable[ + [npt.NDArray[np.intp] | int, npt.NDArray[np.intp] | int], + npt.NDArray[np.intp], + ], + sps.comb, +) + + +class _LmVFunc(Protocol[_Tss]): + pyfunc: _LmFunc[_Tss] + + @overload + def __call__( + self, + r: onpt.CanArray[_ShapeT, np.dtype[lnpt.Integer]], + s: float, + t: float, + /, + *args: _Tss.args, + **kwds: _Tss.kwargs, + ) -> onpt.Array[_ShapeT, np.float64]: ... + @overload + def __call__( + self, + r: int | lnpt.Integer, + s: float, + t: float, + /, + *args: _Tss.args, + **kwds: _Tss.kwargs, + ) -> onpt.Array[tuple[()], np.float64]: ... + + +def register_lm_func( + name: DistributionName, + /, +) -> Callable[[_LmFunc[_Tss]], _LmVFunc[_Tss]]: + # TODO: vectorize decorator (only for `r`), with correct signature + assert name not in _LM_REGISTRY + + def _wrapper(func: _LmFunc[_Tss], /) -> _LmVFunc[_Tss]: + vfunc = np.vectorize(func, [float], excluded={1, 2}) + _LM_REGISTRY[name] = vfunc + return vfunc + + return _wrapper + + +def has_lm_func(name: str, /) -> TypeIs[DistributionName]: + return name in _LM_REGISTRY + + +def get_lm_func(name: DistributionName, /) -> _LmVFunc[...]: + """ + Return the L-moment function for the given distribution, or raise a + `KeyError` if not registered. + """ + return _LM_REGISTRY[name] + + +@register_lm_func('uniform') +def lm_uniform(r: int, s: float, t: float, /) -> float: + """ + Exact generalized* trimmed L-moments of the standard uniform distribution + on the interval [0, 1]. + + *: Only the `s` trim-length can be fractional. + """ + if r == 0: + return 1 + + if not isinstance(t, int): + msg = 't must be an integer' + raise NotImplementedError(msg) + + if r == 1: + return (1 + s) / (2 + s + t) + if r == 2: + return 1 / (2 * (3 + s + t)) + return 0 + + +@register_lm_func('logistic') +def lm_logistic(r: int, s: float, t: float, /) -> float: + """ + Exact generalized trimmed L-moments of the standard logistic + distribution. + """ + if r == 0: + return 1 + + if isinstance(t, float) and not t.is_integer(): + msg = 't must be an integer' + raise NotImplementedError(msg) + + # symmetric trimming + if s == t: + if r % 2: + return 0 + + if s == 0: + if r == 2: + return 1 + if r == 4: + return 1 / 6 + if s == 1: + if r == 2: + return 1 / 2 + if r == 4: + return 1 / 24 + + return 2 * sps.beta(r - 1, t + 1) / r + + # asymmetric trimming + if r == 1: + return harmonic(s) - harmonic(t) + + r1m = r - 1 + return ((-1) ** r * sps.beta(r1m, s + 1) + sps.beta(r1m, t + 1)) / r + + +@register_lm_func('expon') +def lm_expon(r: int, s: float, t: float, /) -> float: + """ + Exact generalized trimmed L-moments of the standard exponential + distribution. + """ + if r == 0: + return 1 + if r == 1: + return 1 / (1 + t) if s == 0 else harmonic(s + t + 1) - harmonic(t) + + if t == 0: + return 1 / (r * (r - 1)) + + # it doesn't depend on `s` when `r > 1`, which is pretty crazy actually + if r == 2: + return 1 / (2 * (t + 1)) + if r == 3: + return 1 / (3 * (t + 1) * (t + 2)) + if r == 4: + # the 2 here isn't a typo + return 1 / (2 * (t + 1) * (t + 2) * (t + 3)) + + return sps.beta(r - 1, t + 1) / r + + +@register_lm_func('gumbel_r') +def lm_gumbel_r(r: int, s: float, t: float, /) -> np.float64 | float: + """ + Exact trimmed L-moments of the Gumbel / Extreme Value (EV) distribution. + + Doesn't support fractional trimming. + """ + if r == 0: + return 1 + + if not isinstance(s, int) or not isinstance(t, int): + msg = 'fractional trimming' + raise NotImplementedError(msg) + + match (r, s, t): + case (1, 0, 0): + return np.euler_gamma + case (1, 0, 1): + return np.euler_gamma - _LN2 + case (1, 1, 1): + return np.euler_gamma + _LN2 * 3 - _LN3 * 2 + + case (2, 0, 0): + return _LN2 + case (2, 0, 1): + return _LN2 * 3 - _LN3 * 3 / 2 + case (2, 1, 1): + return _LN2 * -9 + _LN3 * 6 + + case (3, 0, 0): + return _LN2 * -3 + _LN3 * 2 + case (3, 0, 1): + return _LN2 * -38 / 3 + _LN3 * 8 + case (3, 1, 1): + return (_LN2 * 110 - _LN3 * 40 - _LN5 * 20) / 3 + + case (4, 0, 0): + return _LN2 * 16 - _LN3 * 10 + case (4, 0, 1): + return _LN2 * 60 - _LN3 * 25 - _LN5 * 35 / 4 + case (4, 1, 1): + return (_LN2 * -107 + _LN3 * 6 + _LN5 * 42) * 5 / 4 + + case _: + return lm_genextreme.pyfunc(r, s, t, 0) + + +@register_lm_func('genextreme') +def lm_genextreme( + r: int, + s: float, + t: float, + /, + a: float, +) -> np.float64 | float: + """ + Exact trimmed L-moments of the Generalized Extreme Value (GEV) + distribution. + + Doesn't support fractional trimming. + """ + if r == 0: + return 1 + + if not isinstance(s, int) or not isinstance(t, int): + msg = 'fractional trimming' + raise NotImplementedError(msg) + + if a < 0 and (isinstance(a, int) or a.is_integer()): + msg = 'a cannot be a negative integer' + raise ValueError(msg) + + if r + s + t < 8: + # numerical accurate to within approx. < 1e-12 error + k0 = s + 1 + kn = r + s + t + k = np.arange(k0, kn + 1, dtype=np.intp) + + if a == 0: + pwm = np.euler_gamma + np.log(k) + else: + pwm = (1 / a - sps.gamma(a) / k**a) + + return np.sum( + (-1) ** k + * _binom(kn, k) + * _binom(r - 2 + k, r - 2 + k0) + * pwm, + ) * (-1) ** (r + s) / r + + # NOTE: some performance notes: + # - `math.log` is faster for scalar input that `numpy.log` + # - conditionals within the function are avoided through multiple functions + if a == 0: + def _ppf(q: float) -> float: + if q <= 0: + return -float('inf') + if q >= 1: + return float('inf') + return -log(-log(q)) + elif a < 0: + def _ppf(q: float) -> float: + if q <= 0: + return 1 / a + if q >= 1: + return float('inf') + return (1 - (-log(q))**a) / a + else: # a > 0 + def _ppf(q: float) -> float: + if q <= 0: + return -float('inf') + if q >= 1: + return 1 / a + return (1 - (-log(q))**a) / a + + return l_moment_from_ppf(_ppf, r, (s, t)) + + +@register_lm_func('genpareto') +def lm_genpareto( + r: int, + s: float, + t: float, + /, + a: float, +) -> np.float64 | float: + """ + Exact trimmed L-moments of the Generalized Pareto distribution (GPD). + """ + if r == 0: + return 1 + if a == 0: + return lm_expon.pyfunc(r, s, t) + if a == 1: + return lm_uniform.pyfunc(r, s, t) + + if not isinstance(t, int): + msg = 'fractional trimming' + raise NotImplementedError(msg) + + if a >= 1 + t: + return float('nan') + + b = a - 1 + n = r + s + t + 1 + + if r == 1 and s == 0: + return 1 / (t - b) + if r == 1: + return (gamma(t - b) / gamma(r + t) * gamma(n) / gamma(n - a) - 1) / a + + return sps.beta(r + b, t - b) / (a * sps.beta(n - a, a)) / r + + +@register_lm_func('kumaraswamy') +def lm_kumaraswamy( + r: int, + s: float, + t: float, + /, + a: float, + b: float, +) -> np.float64 | float: + """ + Exact trimmed L-moments of (the location-scale reparametrization of) + Kumaraswamy's distribution [@kumaraswamy1980]. + + Doesn't support fractional trimming. + """ + if r == 0: + return 1 + + if not isinstance(s, int) or not isinstance(t, int): + msg = 'fractional trimming not supported' + raise NotImplementedError(msg) + + k = np.arange(t + 1, r + s + t + 1) + return ( + np.sum( + (-1) ** (k - t - 1) + * cast(_ArrF8, sps.comb(r + k - 2, r + t - 1)) # pyright: ignore[reportUnknownMemberType] + * cast(_ArrF8, sps.comb(r + s + t, k)) # pyright: ignore[reportUnknownMemberType] + * cast(_ArrF8, sps.beta(1 / a, 1 + k * b)) + / a, + ) + / r + ) + + +@register_lm_func('wakeby') +def lm_wakeby( + r: int, + s: float, + t: float, + /, + b: float, + d: float, + f: float, +) -> float | np.float64: + """ + Exact generalized trimmed L-moments of (the location-scale + reparametrization of) Wakeby's distribution [@houghton1978]. + """ + if r == 0: + return 1 + + if d >= (b == 0) + 1 + t: + return np.nan + + def _lmo0_partial(theta: float, scale: float, /) -> float | np.float64: + if scale == 0: + return 0 + if r == 1 and theta == 0: + return harmonic(s + t + 1) - harmonic(t) + + return ( + scale + * ( + sps.poch(r + t, s + 1) + * sps.poch(1 - theta, r - 2) + / sps.poch(1 + theta + t, r + s) + + (1 / theta if r == 1 else 0) + ) + / r + ) + + return float(_lmo0_partial(b, f) + _lmo0_partial(-d, 1 - f)) + + +@register_lm_func('genlambda') +def lm_genlambda( + r: int, + s: float, + t: float, + /, + b: float, + d: float, + f: float, +) -> float: + """ + Exact generalized trimmed L-moments of the (location-scale + reparametrization of the) Generalized Tukey-Lambda Distribution (GLD) + [@ramberg1974]. + """ + if r == 0: + return 1 + + if b <= -1 - s and d <= -1 - t: + return np.nan + + sgn = (-1) ** r + + def _lmo0_partial(trim: float, theta: float) -> float | np.float64: + if r == 1 and theta == 0: + return harmonic(trim) - harmonic(s + t + 1) + + return ( + sgn + * sps.poch(r + trim, s + t - trim + 1) + * sps.poch(1 - theta, r - 2) + / sps.poch(1 + theta + trim, r + s + t - trim) + - (1 / theta if r == 1 else 0) + ) / r + + lhs = (1 + f) * _lmo0_partial(s, b) + rhs = (1 - f) * _lmo0_partial(t, d) + return lhs + sgn * rhs diff --git a/lmo/distributions/_nonparametric.py b/lmo/distributions/_nonparametric.py index ea4f7c38..f9912294 100644 --- a/lmo/distributions/_nonparametric.py +++ b/lmo/distributions/_nonparametric.py @@ -258,8 +258,7 @@ def rvs( Defining number of random variates, defaults to 1. random_state: Seed or [`numpy.random.Generator`][numpy.random.Generator] - instance. Defaults to - [`l_poly.random_state`][lmo.distributions.l_poly.random_state]. + instance. Defaults to `l_poly.random_state`. Returns: A scalar or array with shape like `size`. diff --git a/lmo/distributions/_wakeby.py b/lmo/distributions/_wakeby.py index fbadba0a..dc2f2e6c 100644 --- a/lmo/distributions/_wakeby.py +++ b/lmo/distributions/_wakeby.py @@ -2,7 +2,7 @@ import math import warnings from collections.abc import Callable, Sequence -from typing import TypeAlias, cast, overload +from typing import Final, TypeAlias, TypeVar, cast import numpy as np import numpy.typing as npt @@ -13,8 +13,8 @@ from scipy.stats.distributions import rv_continuous as _rv_continuous import lmo.typing.scipy as lspt -from lmo.special import harmonic from lmo.theoretical import l_moment_from_ppf +from ._lm import get_lm_func __all__ = ('wakeby_gen',) @@ -22,22 +22,19 @@ _ArrF8: TypeAlias = npt.NDArray[np.float64] +_PT = TypeVar('_PT', float, _ArrF8) + def _wakeby_ub(b: float, d: float, f: float) -> float: - """Upper bound of x.""" + """Upper bound of the domain of Wakeby's distribution function.""" if d < 0: return f / b - (1 - f) / d if f == 1 and b: return 1 / b - return math.inf + return np.inf -def _wakeby_isf0( - q: float, - b: float, - d: float, - f: float, -) -> float: +def _wakeby_isf0(q: float, b: float, d: float, f: float) -> float: """Inverse survival function, does not validate params.""" if q <= 0: return _wakeby_ub(b, d, f) @@ -64,37 +61,15 @@ def _wakeby_isf0( _wakeby_isf = np.vectorize(_wakeby_isf0, [float]) -@overload -def _wakeby_qdf( - p: float, - b: float, - d: float, - f: float, -) -> float: ... -@overload -def _wakeby_qdf( - p: npt.NDArray[np.float64], - b: float, - d: float, - f: float, -) -> npt.NDArray[np.float64]: ... -def _wakeby_qdf( - p: float | npt.NDArray[np.float64], - b: float, - d: float, - f: float, -) -> float | npt.NDArray[np.float64]: +def _wakeby_qdf(p: _PT, b: float, d: float, f: float) -> _PT: """Quantile density function (QDF), the derivative of the PPF.""" q = 1 - p - return f * q ** (b - 1) + (1 - f) * q ** (-d - 1) + lhs = f * q ** (b - 1) + rhs = (1 - f) / q ** (d + 1) + return cast(_PT, lhs + rhs) -def _wakeby_sf0( # noqa: C901 - x: float, - b: float, - d: float, - f: float, -) -> float: +def _wakeby_sf0(x: float, b: float, d: float, f: float) -> float: # noqa: C901 """ Numerical approximation of Wakeby's survival function. @@ -189,44 +164,8 @@ def _wakeby_sf0( # noqa: C901 return math.exp(-z) if -z >= ufl else 0 -_wakeby_sf = np.vectorize(_wakeby_sf0, [float]) - - -def _wakeby_lmo0( - r: int, - s: float, - t: float, - b: float, - d: float, - f: float, -) -> float: - if r == 0: - return 1.0 - - if d >= (b == 0) + 1 + t: - return math.nan - - def _lmo0_partial(theta: float, scale: float) -> np.float64 | float: - if scale == 0: - return 0 - if r == 1 and theta == 0: - return harmonic(s + t + 1) - harmonic(t) - - return ( - scale - * ( - sc.poch(r + t, s + 1) - * sc.poch(1 - theta, r - 2) - / sc.poch(1 + theta + t, r + s) - + (1 / theta if r == 1 else 0) - ) - / r - ) - - return float(_lmo0_partial(b, f) + _lmo0_partial(-d, 1 - f)) - - -_wakeby_lmo = np.vectorize(_wakeby_lmo0, [float], excluded={1, 2}) +_wakeby_sf: Final = np.vectorize(_wakeby_sf0, [float]) +_wakeby_lm: Final = get_lm_func('wakeby') class wakeby_gen(cast(type[lspt.AnyRV], _rv_continuous)): @@ -261,7 +200,7 @@ def _get_support( def _fitstart( self, - data: npt.NDArray[np.float64], + data: _ArrF8, args: tuple[float, float, float] | None = None, ) -> tuple[float, float, float, float, float]: # Arbitrary, but the default f=1 is a bad start @@ -270,50 +209,20 @@ def _fitstart( super()._fitstart(data, args or (1.0, 1.0, 0.5)), # pyright: ignore[reportUnknownMemberType,reportAttributeAccessIssue] ) - def _pdf( - self, - x: npt.NDArray[np.float64], - b: float, - d: float, - f: float, - ) -> npt.NDArray[np.float64]: + def _pdf(self, x: _ArrF8, b: float, d: float, f: float) -> _ArrF8: # application of the inverse function theorem return 1 / self._qdf(self._cdf(x, b, d, f), b, d, f) - def _cdf( - self, - x: npt.NDArray[np.float64], - b: float, - d: float, - f: float, - ) -> npt.NDArray[np.float64]: + def _cdf(self, x: _ArrF8, b: float, d: float, f: float) -> _ArrF8: return 1 - _wakeby_sf(x, b, d, f) - def _qdf( - self, - u: npt.NDArray[np.float64], - b: float, - d: float, - f: float, - ) -> npt.NDArray[np.float64]: - return _wakeby_qdf(u, b, d, f) + def _qdf(self, q: _ArrF8, b: float, d: float, f: float) -> _ArrF8: + return _wakeby_qdf(q, b, d, f) - def _ppf( - self, - p: npt.NDArray[np.float64], - b: float, - d: float, - f: float, - ) -> npt.NDArray[np.float64]: - return _wakeby_isf(1 - p, b, d, f) + def _ppf(self, q: _ArrF8, b: float, d: float, f: float) -> _ArrF8: + return _wakeby_isf(1 - q, b, d, f) - def _isf( - self, - q: npt.NDArray[np.float64], - b: float, - d: float, - f: float, - ) -> npt.NDArray[np.float64]: + def _isf(self, q: _ArrF8, b: float, d: float, f: float) -> _ArrF8: return _wakeby_isf(q, b, d, f) def _stats( @@ -347,12 +256,34 @@ def _stats( return m1, m2, m3, m4 + def _entropy(self, b: float, d: float, f: float) -> float: + """ + Entropy can be calculated from the QDF (PPF derivative) as the + Integrate[Log[QDF[u]], {u, 0, 1}]. This is the (semi) closed-form + solution in this case. + At the time of writing, this result appears to be novel. + + The `f` conditionals are the limiting cases, e.g. for uniform, + exponential, and GPD (genpareto). + """ + if f == 0: + return 1 + d + if f == 1: + return 1 - b + + bd = b + d + assert bd > 0 + + ibd = 1 / bd + return 1 - b + bd * float(sc.hyp2f1(1, ibd, 1 + ibd, f / (f - 1))) + def _l_moment( self, r: npt.NDArray[np.int64], b: float, d: float, f: float, + *, trim: tuple[int, int] | tuple[float, float], quad_opts: lspt.QuadOptions | None = None, ) -> _ArrF8: @@ -360,42 +291,17 @@ def _l_moment( if quad_opts is not None: # only do numerical integration when quad_opts is passed - lmbda_r = cast( - float | npt.NDArray[np.float64], - l_moment_from_ppf( - cast( - Callable[[float], float], - functools.partial(self._ppf, b=b, d=d, f=f), - ), - r, - trim=trim, - quad_opts=quad_opts, + lmbda_r = l_moment_from_ppf( + cast( + Callable[[float], float], + functools.partial(self._ppf, b=b, d=d, f=f), ), + r, + trim=trim, + quad_opts=quad_opts, ) out = lmbda_r else: - out = _wakeby_lmo(r, s, t, b, d, f) + out = _wakeby_lm(r, s, t, b, d, f) return np.atleast_1d(out) - - def _entropy(self, b: float, d: float, f: float) -> float: - """ - Entropy can be calculated from the QDF (PPF derivative) as the - Integrate[Log[QDF[u]], {u, 0, 1}]. This is the (semi) closed-form - solution in this case. - At the time of writing, this result appears to be novel. - - The `f` conditionals are the limiting cases, e.g. for uniform, - exponential, and GPD (genpareto). - """ - if f == 0: - return 1 + d - if f == 1: - return 1 - b - - bd = b + d - assert bd > 0 - - return ( - 1 - b + bd * float(sc.hyp2f1(1, 1 / bd, 1 + 1 / bd, -f / (1 - f))) - ) diff --git a/lmo/inference/_l_gmm.py b/lmo/inference/_l_gmm.py index f08408fe..185618ad 100644 --- a/lmo/inference/_l_gmm.py +++ b/lmo/inference/_l_gmm.py @@ -12,8 +12,9 @@ from lmo._lm import l_moment as l_moment_est from lmo._lm_co import l_coscale as l_coscale_est from lmo._utils import clean_orders, clean_trim -from lmo.diagnostic import HypothesisTestResult, l_moment_bounds +from lmo.diagnostic import HypothesisTestResult from lmo.theoretical import l_moment_from_ppf +from lmo.theoretical._utils import l_coef_factor if TYPE_CHECKING: @@ -150,14 +151,23 @@ def _loss_step( trim: lmt.AnyTrim, w_rr: _ArrF8, ) -> np.float64: + """ + This is the computational bottleneck of L-(G)MM. + So avoid doing slow things here. + """ lmbda_r = l_fn(r, *args, trim=trim) - if not np.all(np.isfinite(lmbda_r)): - msg = f'failed to find the L-moments of ppf{args} with {trim=}' - raise ValueError(msg) + # if not np.all(np.isfinite(lmbda_r)): + # msg = f'failed to find the L-moments of ppf{args} with {trim=}' + # raise ValueError(msg) + + # in-place subtraction to avoid creating a new array + g_r = lmbda_r + g_r -= l_r - g_r = lmbda_r - l_r - return np.sqrt(cast(np.float64, g_r.T @ w_rr @ g_r)) + # `cast()` calls aren't free + # return np.sqrt(cast(np.float64, g_r.T @ w_rr @ g_r)) + return np.sqrt(g_r.T @ w_rr @ g_r) # pyright: ignore[reportReturnType] def _get_l_moment_fn(ppf: lspt.RVFunction[...]) -> Callable[..., _ArrF8]: @@ -356,9 +366,7 @@ def fit( # noqa: C901 # Individual L-moment "natural" scaling constants, making their magnitudes # order- and trim- agnostic (used in convergence criterion) - l_r_ub = np.r_[1, l_moment_bounds(_r[1:], trim=_trim)] - l_2c = l_r[1] / l_r_ub[1] - scale_r = cast(_ArrF8, 1 / (l_2c * l_r_ub)) + scale_r = l_coef_factor(_r, _trim[0], _trim[1]) / l_r[1] # Initial parametric population L-moments _l_moment_fn = l_moment_fn or _get_l_moment_fn(ppf) @@ -375,7 +383,7 @@ def fit( # noqa: C901 k_min, epsmax = 1, l_tol # Random number generator - if isinstance(random_state, np.random.RandomState | np.random.Generator): + if isinstance(random_state, np.random.RandomState): rng = random_state else: rng = np.random.default_rng(random_state) diff --git a/lmo/theoretical/_utils.py b/lmo/theoretical/_utils.py index a351f960..1409d867 100644 --- a/lmo/theoretical/_utils.py +++ b/lmo/theoretical/_utils.py @@ -4,13 +4,21 @@ from typing import Concatenate, Final, ParamSpec, TypeAlias, cast import numpy as np +import numpy.typing as npt import scipy.integrate as spi +import scipy.special as sps import lmo.typing.np as lnpt import lmo.typing.scipy as lspt -__all__ = ('ALPHA', 'QUAD_LIMIT', 'l_const', 'tighten_cdf_support') +__all__ = ( + 'ALPHA', + 'QUAD_LIMIT', + 'l_coef_factor', + 'l_const', + 'tighten_cdf_support', +) _Fn1: TypeAlias = Callable[[float], float | lnpt.Float] @@ -50,6 +58,29 @@ def l_const(r: int, s: float, t: float, k: int = 0) -> float: return factorial(r - 1 - k) / r * v +def l_coef_factor( + r: int | np.intp | npt.NDArray[np.intp], + s: float = 0, + t: float = 0, +) -> npt.NDArray[np.float64]: + if s == t == 0: + return np.sqrt(2 * r - 1) + + assert s + t > -1 + + _r = np.asarray(r) + c = ( + np.sqrt( + (2 * _r + (s + t - 1)) + * sps.beta(_r + s, _r + t) + / sps.beta(_r, _r + s + t), + ) + * _r + / (_r + s + t) + ) + return c[()] if _r.ndim == 0 and np.isscalar(r) else c + + def tighten_cdf_support( cdf: _Fn1, support: tuple[float, float] | None = None, diff --git a/lmo/typing/np.py b/lmo/typing/np.py index f4395ac0..8bc41864 100644 --- a/lmo/typing/np.py +++ b/lmo/typing/np.py @@ -56,7 +56,7 @@ Number: TypeAlias = np.number[Any] Natural: TypeAlias = UInt | Bool -Integer: TypeAlias = Int | Natural +Integer: TypeAlias = np.integer[Any] | np.bool_ Real: TypeAlias = Float | Integer diff --git a/mkdocs.yml b/mkdocs.yml index 54f2c643..715e879c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -20,8 +20,7 @@ nav: - api/diagnostic.md - api/pandas.md - api/low_level.md - - "Theory": - - Distributions: distributions.md + - Distributions: distributions.md - Contributing: contributing.md theme: @@ -160,8 +159,23 @@ markdown_extensions: - pymdownx.emoji: emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_generator: !!python/name:material.extensions.emoji.to_svg + alt: short + options: + attributes: + align: absmiddle + height: "20px" + width: "20px" + image_path: https://github.githubassets.com/images/icons/emoji/unicode/ + non_standard_image_path: https://github.githubassets.com/images/icons/emoji/ - pymdownx.highlight - pymdownx.inlinehilite + - pymdownx.magiclink: + repo_url_shortener: true + repo_url_shorthand: true + provider: github + user: jorenham + repo: Lmo + - pymdownx.saneheaders - pymdownx.snippets - pymdownx.striphtml - pymdownx.superfences @@ -173,7 +187,8 @@ markdown_extensions: case: lower - pymdownx.tasklist: custom_checkbox: true - - pymdownx.tilde + - pymdownx.tilde: + subscript: false extra_css: - https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css diff --git a/poetry.lock b/poetry.lock index 843b6780..de9cd26d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -757,13 +757,13 @@ files = [ [[package]] name = "hypothesis" -version = "6.111.1" +version = "6.111.2" description = "A library for property-based testing" optional = false python-versions = ">=3.8" files = [ - {file = "hypothesis-6.111.1-py3-none-any.whl", hash = "sha256:9422adbac4b2104f6cf92dc6604b5c9df975efc08ffc7145ecc39bc617243835"}, - {file = "hypothesis-6.111.1.tar.gz", hash = "sha256:6ab6185a858fa692bf125c0d0a936134edc318bee01c05e407c71c9ead0b61c5"}, + {file = "hypothesis-6.111.2-py3-none-any.whl", hash = "sha256:055e8228958e22178d6077e455fd86a72044d02dac130dbf9c8b31e161b9809c"}, + {file = "hypothesis-6.111.2.tar.gz", hash = "sha256:0496ad28c7240ee9ba89fcc7fb1dc74e89f3e40fbcbbb5f73c0091558dec8e6e"}, ] [package.dependencies] @@ -773,10 +773,10 @@ numpy = {version = ">=1.17.3", optional = true, markers = "extra == \"numpy\""} sortedcontainers = ">=2.1.0,<3.0.0" [package.extras] -all = ["backports.zoneinfo (>=0.2.1)", "black (>=19.10b0)", "click (>=7.0)", "crosshair-tool (>=0.0.66)", "django (>=3.2)", "dpcontracts (>=0.4)", "hypothesis-crosshair (>=0.0.12)", "lark (>=0.10.1)", "libcst (>=0.3.16)", "numpy (>=1.17.3)", "pandas (>=1.1)", "pytest (>=4.6)", "python-dateutil (>=1.4)", "pytz (>=2014.1)", "redis (>=3.0.0)", "rich (>=9.0.0)", "tzdata (>=2024.1)"] +all = ["backports.zoneinfo (>=0.2.1)", "black (>=19.10b0)", "click (>=7.0)", "crosshair-tool (>=0.0.70)", "django (>=3.2)", "dpcontracts (>=0.4)", "hypothesis-crosshair (>=0.0.13)", "lark (>=0.10.1)", "libcst (>=0.3.16)", "numpy (>=1.17.3)", "pandas (>=1.1)", "pytest (>=4.6)", "python-dateutil (>=1.4)", "pytz (>=2014.1)", "redis (>=3.0.0)", "rich (>=9.0.0)", "tzdata (>=2024.1)"] cli = ["black (>=19.10b0)", "click (>=7.0)", "rich (>=9.0.0)"] codemods = ["libcst (>=0.3.16)"] -crosshair = ["crosshair-tool (>=0.0.66)", "hypothesis-crosshair (>=0.0.12)"] +crosshair = ["crosshair-tool (>=0.0.70)", "hypothesis-crosshair (>=0.0.13)"] dateutil = ["python-dateutil (>=1.4)"] django = ["django (>=3.2)"] dpcontracts = ["dpcontracts (>=0.4)"] @@ -1183,6 +1183,78 @@ files = [ {file = "latexcodec-3.0.0.tar.gz", hash = "sha256:917dc5fe242762cc19d963e6548b42d63a118028cdd3361d62397e3b638b6bc5"}, ] +[[package]] +name = "line-profiler" +version = "4.1.3" +description = "Line-by-line profiler" +optional = false +python-versions = ">=3.6" +files = [ + {file = "line_profiler-4.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b26cccca30c0f859c585cd4a6c75ffde4dca80ba98a858d3d04b44a6b560c65"}, + {file = "line_profiler-4.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e8a1ed7bf88049cb8d069a2dac96c91b25b5a77cb712c207b7f484ab86f8b134"}, + {file = "line_profiler-4.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c320a8ccb2b9d0df85b8f19000242407d0cb1ea5804b4967fe6f755824c81a87"}, + {file = "line_profiler-4.1.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5751939d9dd95b1ec74e0aee428fe17d037fcb346fd23a7bf928b71c2dca2d19"}, + {file = "line_profiler-4.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b45f405d63730e5284403c1ff293f1e7f8ac7a39486db4c55a858712cec333d"}, + {file = "line_profiler-4.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9e24d61810ad153ab6a795d68f735812de4131f282128b799467f7fa56cac94f"}, + {file = "line_profiler-4.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f961465381e5bdc9fa7e5597af6714ada700d3e6ca61cca56763477f1047ff23"}, + {file = "line_profiler-4.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:6112436cb48ab635bc64e3dbfd80f67b56967e72aa7853e5084a64e11be5fe65"}, + {file = "line_profiler-4.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:16c8d2830e9daf0bcd49422e9367db5c825b02b88c383b9228c281ce14a5ad80"}, + {file = "line_profiler-4.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0e3ed5dd55bda1b0f65893ff377b6aedae69490f7be4fd5d818dd5bcc75553bf"}, + {file = "line_profiler-4.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f0ad37589b270e59f65ec6704435f02ece6d4246af112c0413095a5d3b13285b"}, + {file = "line_profiler-4.1.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c29ef65e3e0085f20ffedcddfa8d02f6f6eaa0dacec29129cd74d206f9f6c"}, + {file = "line_profiler-4.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ef054e1b6fd2443341911a2ddad0f8b6ed24903fa6a7e5e8201cd4272132e3a"}, + {file = "line_profiler-4.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:02bc0650ef8f87a489d6fbafcc0040ca76144d2a4c40e4044babccfe769b5525"}, + {file = "line_profiler-4.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f032c0973f0c1150440dce5f9b91509fce474c11b10c2c93a2109e1e0dab8a45"}, + {file = "line_profiler-4.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:ec8a34285338aadc6a74e91b022b6d8ea19ac5deaaa0c9b880a1ab7b4ed45c43"}, + {file = "line_profiler-4.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8ae10578f1325772ccfa2833288d826e4bc781214d74b87331a6b7e5793252ca"}, + {file = "line_profiler-4.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b7c89c68379879d3a11c5e76499f0f7a08683436762af6bf51db126d3cb9cdd9"}, + {file = "line_profiler-4.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9f4abf9ecb8b508d96420dde44d54a8484e73468132229bbba2229283a7e9fb"}, + {file = "line_profiler-4.1.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d12bf40ed654ad1d5c132be172054b9ec5ae3ba138ca2099002075fb14396a64"}, + {file = "line_profiler-4.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56d17f3bf22b9c7d72b3cb2d283d71152f4cc98e8ba88e720c743b2e3d9be6ad"}, + {file = "line_profiler-4.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9d7c7593ae86215d99d1d32e4b92ed6ace2ac8388aab781b74bf97d44e72ff1f"}, + {file = "line_profiler-4.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:248f16ba356ac1e19be834b0bdaf29c95c1c9229beaa63e0e3aad9aa3edfc012"}, + {file = "line_profiler-4.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:b85468d30ed16e362e8a044df0f331796c6ec5a76a55e88aae57078a2eec6afa"}, + {file = "line_profiler-4.1.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:82d5333d1ffac08b34828213bd674165e50876610061faa97660928b346a620d"}, + {file = "line_profiler-4.1.3-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f56985a885e2936eab6303fc82f1a20e5e0bb6d4d8f44f8a3825179d261053e"}, + {file = "line_profiler-4.1.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:713d43be1382f47c2f04d5d25ba3c65978292249849f85746a8476d6a8863717"}, + {file = "line_profiler-4.1.3-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:5d6a3dd7ba3a17da254338313ec1d4ce4bdd723812e5cb58f4d05b78c1c5dbe4"}, + {file = "line_profiler-4.1.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:481bbace88b2e15fb63a16e578a48faa28eba7399afe7da6ce1bde569780c346"}, + {file = "line_profiler-4.1.3-cp36-cp36m-win_amd64.whl", hash = "sha256:654b16f9e82b0ce7f7657ef859bf2324275e9cd70c8169414922c9cb37d5589f"}, + {file = "line_profiler-4.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:39332137af7a562c44524cef7c37de9860428ce2cde8b9c51047ccad9fd5eca4"}, + {file = "line_profiler-4.1.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dad96626acd5804c818c374d34ce1debea07b1e100b160499f4dfbcf5fc1cbe6"}, + {file = "line_profiler-4.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7125846d636959907e307c1f0bbf6f05fe5b7ca195b929f7b676fd20cf0763f2"}, + {file = "line_profiler-4.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a89de2a09363dd1a62a0a49e82a7157854b6e92b1893627b14e952412357db60"}, + {file = "line_profiler-4.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9e11f5831a251d3a3551372b523b3bc0da1e912ab2ade2c4d9d8e0b225eed6ab"}, + {file = "line_profiler-4.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:66d856975284dc62ac6f5a97757e160c1eb9898078014385cf74b829d8d806b7"}, + {file = "line_profiler-4.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3fb0f43900d36d7ccd8b30b8506498440d5ec610f2f1d40de3de11c3e304fb90"}, + {file = "line_profiler-4.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7394227bfb5bf15002d3695e674916fe82c38957cd2f56fccd43b71dc3447d1e"}, + {file = "line_profiler-4.1.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8e19a0ca3198b173a5b7caa304be3b39d122f89b0cfc2a134c5cbb4105ee2fd6"}, + {file = "line_profiler-4.1.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ad57e3c80fb0aee0c86a25d738e3556063eb3d57d0a43217de13f134417915d"}, + {file = "line_profiler-4.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cca919a8199236326f14f3719e992f30dd43a272b0e8fcb98e436a66e4a96fc"}, + {file = "line_profiler-4.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d6753834e1ea03ea19015d0553f0ce0d61bbf2269b85fc0f42833d616369488b"}, + {file = "line_profiler-4.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a32559afd550852f2054a441d33afe16e8b68b167ffb15373ec2b521c6fdc51f"}, + {file = "line_profiler-4.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:e526f9dfad5e8e21cd5345d5213757cfc26af33f072042f3ccff36b10c46a23c"}, + {file = "line_profiler-4.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5aec873bea3a1357c1a21f788b44d29e288df2a579b4433c8a85fc2b0a8c229d"}, + {file = "line_profiler-4.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6059a8960487fc1e7b333178d39c53d3de5fd3c7da04477019e70d13c4c8520c"}, + {file = "line_profiler-4.1.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2ac815ba3cdc8603de6b0ea57a725f4aea1e0a2b7d8c99fabb43f6f2b1670dc0"}, + {file = "line_profiler-4.1.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4ebd58a953fa86384150b79638331133ef0c22d8d68f046e00fe97e62053edae"}, + {file = "line_profiler-4.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c91e4cb038496e771220daccb512dab5311619392fec59ea916e9316630e9825"}, + {file = "line_profiler-4.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b4e4a49a42d4d9e1dce122dd0a5a427f9a337c22cf8a82712f006cae038870bf"}, + {file = "line_profiler-4.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:209d41401238eb0da340f92dfaf60dd84500be475b2b6738cf0ef28579b4df9a"}, + {file = "line_profiler-4.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:68684974e81344344174caf723bb4ab6659bc186d05c8f7e2453002e6bf74cff"}, + {file = "line_profiler-4.1.3.tar.gz", hash = "sha256:e5f1123c3672c3218ba063c23bd64a51159e44649fed6780b993c781fb5ed318"}, +] + +[package.extras] +all = ["Cython (>=3.0.3)", "IPython (>=7.14.0)", "IPython (>=7.18.0)", "IPython (>=8.12.2)", "IPython (>=8.14.0)", "cibuildwheel (>=2.11.2)", "cibuildwheel (>=2.11.2)", "cibuildwheel (>=2.11.2)", "cibuildwheel (>=2.11.2)", "cibuildwheel (>=2.11.2)", "cibuildwheel (>=2.8.1)", "cmake (>=3.21.2)", "coverage[toml] (>=6.1.1)", "coverage[toml] (>=6.5.0)", "coverage[toml] (>=6.5.0)", "coverage[toml] (>=6.5.0)", "coverage[toml] (>=6.5.0)", "coverage[toml] (>=7.3.0)", "ninja (>=1.10.2)", "pytest (>=6.2.5)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest-cov (>=3.0.0)", "rich (>=12.3.0)", "scikit-build (>=0.11.1)", "setuptools (>=41.0.1)", "setuptools (>=68.2.2)", "ubelt (>=1.3.4)", "xdoctest (>=1.1.3)"] +all-strict = ["Cython (==3.0.3)", "IPython (==7.14.0)", "IPython (==7.18.0)", "IPython (==8.12.2)", "IPython (==8.14.0)", "cibuildwheel (==2.11.2)", "cibuildwheel (==2.11.2)", "cibuildwheel (==2.11.2)", "cibuildwheel (==2.11.2)", "cibuildwheel (==2.11.2)", "cibuildwheel (==2.8.1)", "cmake (==3.21.2)", "coverage[toml] (==6.1.1)", "coverage[toml] (==6.5.0)", "coverage[toml] (==6.5.0)", "coverage[toml] (==6.5.0)", "coverage[toml] (==6.5.0)", "coverage[toml] (==7.3.0)", "ninja (==1.10.2)", "pytest (==6.2.5)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest-cov (==3.0.0)", "rich (==12.3.0)", "scikit-build (==0.11.1)", "setuptools (==41.0.1)", "setuptools (==68.2.2)", "ubelt (==1.3.4)", "xdoctest (==1.1.3)"] +ipython = ["IPython (>=7.14.0)", "IPython (>=7.18.0)", "IPython (>=8.12.2)", "IPython (>=8.14.0)"] +ipython-strict = ["IPython (==7.14.0)", "IPython (==7.18.0)", "IPython (==8.12.2)", "IPython (==8.14.0)"] +optional = ["IPython (>=7.14.0)", "IPython (>=7.18.0)", "IPython (>=8.12.2)", "IPython (>=8.14.0)", "rich (>=12.3.0)"] +optional-strict = ["IPython (==7.14.0)", "IPython (==7.18.0)", "IPython (==8.12.2)", "IPython (==8.14.0)", "rich (==12.3.0)"] +tests = ["coverage[toml] (>=6.1.1)", "coverage[toml] (>=6.5.0)", "coverage[toml] (>=6.5.0)", "coverage[toml] (>=6.5.0)", "coverage[toml] (>=6.5.0)", "coverage[toml] (>=7.3.0)", "pytest (>=6.2.5)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest (>=7.4.4)", "pytest-cov (>=3.0.0)", "ubelt (>=1.3.4)", "xdoctest (>=1.1.3)"] +tests-strict = ["coverage[toml] (==6.1.1)", "coverage[toml] (==6.5.0)", "coverage[toml] (==6.5.0)", "coverage[toml] (==6.5.0)", "coverage[toml] (==6.5.0)", "coverage[toml] (==7.3.0)", "pytest (==6.2.5)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest (==7.4.4)", "pytest-cov (==3.0.0)", "ubelt (==1.3.4)", "xdoctest (==1.1.3)"] + [[package]] name = "markdown" version = "3.7" @@ -1864,14 +1936,19 @@ files = [ [[package]] name = "paginate" -version = "0.5.6" +version = "0.5.7" description = "Divides large result sets into pages for easier browsing" optional = false python-versions = "*" files = [ - {file = "paginate-0.5.6.tar.gz", hash = "sha256:5e6007b6a9398177a7e1648d04fdd9f8c9766a1a945bceac82f1929e8c78af2d"}, + {file = "paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591"}, + {file = "paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945"}, ] +[package.extras] +dev = ["pytest", "tox"] +lint = ["black"] + [[package]] name = "pandas" version = "2.2.2" @@ -2300,13 +2377,13 @@ files = [ [[package]] name = "pyparsing" -version = "3.1.2" +version = "3.1.4" description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false python-versions = ">=3.6.8" files = [ - {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"}, - {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"}, + {file = "pyparsing-3.1.4-py3-none-any.whl", hash = "sha256:a6a7ee4235a3f944aa1fa2249307708f893fe5717dc603503c6c7969c070fb7c"}, + {file = "pyparsing-3.1.4.tar.gz", hash = "sha256:f86ec8d1a83f11977c9a6ea7598e8c27fc5cddfa5b07ea2241edbbde1d7bc032"}, ] [package.extras] @@ -3375,4 +3452,4 @@ pandas = ["pandas"] [metadata] lock-version = "2.0" python-versions = "^3.10.1" -content-hash = "e4ab66e5097f53b77f911c19ecc21f9e076ac195268038139e8b813004bd087f" +content-hash = "5729109ba5b17f720ae0d1bcc010c9ea0eae2a0de066bf9c4bb187fa459d8f73" diff --git a/pyproject.toml b/pyproject.toml index b5868b7c..02bfab1b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ optype = {version = "^0.6.1", extras = ["numpy"]} [tool.poetry.group.dev.dependencies] codespell = "^2.3.0" -hypothesis = {version = "^6.111.1", extras = ["numpy"]} +hypothesis = {version = "^6.111.2", extras = ["numpy"]} pre-commit = "^3.8.0" basedpyright = "^1.17.0" pytest = "^8.3.2" @@ -75,6 +75,7 @@ mkdocstrings = {version = "^0.25.2", extras = ["python"]} [tool.poetry.group.debug.dependencies] ipython = "^8.26.0" ipykernel = "^6.29.5" +line_profiler = "4.1.3" matplotlib = "^3.9.2" typing-extensions = "^4.12.2" diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 4112fe14..61271a6a 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -1,14 +1,15 @@ import functools -from typing import cast +from typing import Any, cast import numpy as np +import numpy.typing as npt import pytest from numpy.testing import assert_allclose as _assert_allclose +from scipy.stats import distributions from scipy.stats.distributions import tukeylambda, uniform import lmo.typing as lmt -import lmo.typing.scipy as lspt -from lmo.distributions import genlambda, l_poly, wakeby +from lmo.distributions import genlambda, kumaraswamy, l_poly, wakeby Q = np.linspace(1 / 100, 1, 99, endpoint=False) @@ -23,7 +24,7 @@ def test_l_poly_eq_uniform(trim: lmt.AnyTrim): p0 = x0 = np.linspace(0, 1) - X = cast('lspt.RVContinuous', uniform()) + X = cast(Any, uniform()) X_hat = l_poly(X.l_moment([1, 2], trim=trim), trim=trim) t4 = X.l_stats(trim=trim) @@ -51,24 +52,27 @@ def test_l_poly_eq_uniform(trim: lmt.AnyTrim): assert_allclose(H_hat, H) -@pytest.mark.parametrize('scale', [1, .5, 2]) +@pytest.mark.parametrize('scale', [1, 0.5, 2]) @pytest.mark.parametrize('loc', [0, 1, -1]) -@pytest.mark.parametrize(('b', 'd', 'f'), [ - (1, 0, 1), - (0, 0, 1), - (0, 0.9, 0), - (0, 1.2, 0), - (0.9, 0, 1), - (1.2, 0, 1), - (1, 1, .5), - (1, 1, .9), - (.8, 1.2, .4), - (.3, .3, .7), - (3, -2, .69), - (1, -0.9, .5), -]) +@pytest.mark.parametrize( + ('b', 'd', 'f'), + [ + (1, 0, 1), + (0, 0, 1), + (0, 0.9, 0), + (0, 1.2, 0), + (0.9, 0, 1), + (1.2, 0, 1), + (1, 1, 0.5), + (1, 1, 0.9), + (0.8, 1.2, 0.4), + (0.3, 0.3, 0.7), + (3, -2, 0.69), + (1, -0.9, 0.5), + ], +) def test_wakeby(b: float, d: float, f: float, loc: float, scale: float): - X = wakeby(b, d, f, loc, scale) + X = cast(Any, wakeby(b, d, f, loc, scale)) assert X.cdf(X.support()[0]) == 0 assert X.ppf(0) == X.support()[0] @@ -98,8 +102,8 @@ def test_wakeby(b: float, d: float, f: float, loc: float, scale: float): @pytest.mark.parametrize('lam', [0, 0.14, 1, -1]) def test_genlambda_tukeylamba(lam: float): - X0 = cast(lspt.RVContinuous, tukeylambda(lam)) - X = genlambda(lam, lam, 0) + X0 = cast(Any, tukeylambda(lam)) + X = cast(Any, genlambda(lam, lam, 0)) x0 = X0.ppf(Q) x = X.ppf(Q) @@ -107,7 +111,10 @@ def test_genlambda_tukeylamba(lam: float): assert x[-1] <= X.support()[1] assert_allclose(x, x0) - _pp = np.linspace(X0.ppf(0.05), X0.ppf(0.95), 100) + _pp = cast( + npt.NDArray[np.float64], + np.linspace(X0.ppf(0.05), X0.ppf(0.95), 100), + ) u0 = X0.cdf(_pp) u = X.cdf(_pp) assert_allclose(u, u0) @@ -135,10 +142,10 @@ def test_genlambda_tukeylamba(lam: float): # @pytest.mark.parametrize('scale', [1, .5, 2]) # @pytest.mark.parametrize('loc', [0, 1, -1]) @pytest.mark.parametrize('f', [0, 1, -1]) -@pytest.mark.parametrize('d', [0, .5, 2, -0.9, -1.95]) -@pytest.mark.parametrize('b', [0, .5, 1, -0.9, -1.95]) +@pytest.mark.parametrize('d', [0, 0.5, 2, -0.9, -1.95]) +@pytest.mark.parametrize('b', [0, 0.5, 1, -0.9, -1.95]) def test_genlambda(b: float, d: float, f: float): - X = genlambda(b, d, f) + X = cast(Any, genlambda(b, d, f)) assert X.cdf(X.support()[0]) == 0 assert X.ppf(0) == X.support()[0] @@ -180,3 +187,28 @@ def test_genlambda(b: float, d: float, f: float): assert tl_tau_quad[1] > 0 or np.isnan(tl_tau_quad[1]) tl_tau_theo = X.l_stats(trim=1) assert_allclose(tl_tau_theo, tl_tau_quad, atol=1e-7) + + +@pytest.mark.parametrize('trim', [(0, 0), (0, 1), (1, 1)]) +@pytest.mark.parametrize( + 'rv', + [ + distributions.uniform(), + distributions.logistic(), + distributions.expon(), + distributions.gumbel_r(), + distributions.genextreme(-0.1), + distributions.genpareto(0.1), + kumaraswamy(2, 5), + wakeby(5, 1, 0.6), + genlambda(.5, -1, -0.1), + ], +) +def test_exact_lm(rv: Any, trim: tuple[int, int]) -> None: + r = np.arange(1, 9) + # if `quad_opts` is not None, the numerical fallback is used + lm_quad = rv.l_moment(r, trim=trim, quad_opts={}) + lm_exact = rv.l_moment(r, trim=trim) + + assert not np.all(lm_exact == lm_quad) + assert_allclose(lm_exact, lm_quad)