Merge pull request #112 from jorenham/kumaraswamy

Kumaraswamy's distribution implementation
jorenham · Nov 28, 2023 · d031b5e · d031b5e
2 parents 711f4e6 + 5240454
commit d031b5e
Show file tree

Hide file tree

Showing 6 changed files with 248 additions and 61 deletions.
diff --git a/README.md b/README.md
@@ -41,8 +41,6 @@ So Lmo is as fast as sorting your samples (in terms of time-complexity).
 - Fast estimation of L-*co*moment matrices from your multidimensional data
   or multivariate distribution.
 - Goodness-of-fit test, using L-moment or L-moment ratio's.
-- Non-parametric estimation of continuous distributions
-  with `lmo.l_rv_nonparametric`
 - Exact (co)variance structure of the sample- and population L-moments.
 - Theoretical & empirical influence functions of L-moments & L-ratio's.
 - Complete [docs](https://jorenham.github.io/lmo/), including detailed API

diff --git a/docs/api.md b/docs/api.md
@@ -19,37 +19,37 @@
       filters: ["^l_co"]
       heading_level: 4
 
-### `scipy.stats` extensions
+### Distributions
 
-::: lmo.contrib.scipy_stats
+::: lmo.distributions
     options:
-      show_bases: false
-      members:
-      - l_rv_generic
       heading_level: 4
 
-### `pandas` extensions (optional)
+### Statistical test and tools
 
-::: lmo.contrib.pandas
+::: lmo.diagnostic
     options:
-      show_bases: false
-      members:
-      - Series
-      - DataFrame
       heading_level: 4
 
-### Distributions
+## Third party integration
 
-::: lmo
+### `scipy.stats`
+
+::: lmo.contrib.scipy_stats
     options:
+      show_bases: false
       members:
-      - l_rv_nonparametric
+      - l_rv_generic
       heading_level: 4
 
-### Statistical test and tools
+### `pandas` (optional)
 
-::: lmo.diagnostic
+::: lmo.contrib.pandas
     options:
+      show_bases: false
+      members:
+      - Series
+      - DataFrame
       heading_level: 4
 
 ## Low-level API

diff --git a/docs/distributions.md b/docs/distributions.md
@@ -1041,19 +1041,18 @@ Its general \( r \)-th trimmed L-moment are:
 \[
     \begin{equation}
         \tlmoment{s,t}{r} =
-            \beta \
-            \frac{r + s + t}{r}
-            \sum_{k = t}^{r + s + t - 1}
-                (-1)^k
-                \binom{k + r - 1}{k - t}
-                \binom{r + s + t - 1}{k}
-                \B\bigl(1 + 1 / \alpha,\ \beta + k \beta \bigr)
+            \frac{1}{r}
+            \sum_{k = t + 1}^{r + s + t}
+                (-1)^{k - 1}
+                \binom{r + k - 2}{r + t - 1}
+                \binom{r + s + t}{k}
+                \frac{\B\bigl(1 / \alpha,\ 1 + k \beta \bigr)}{\alpha}
             \label{eq:lr_kum}
     \end{equation}
 \]
 
-Unfortunately, the Kumaraswamy distribution is not implemented in
-`scipy.stats`.
+The Kumaraswamy distribution is implemented in
+[`lmo.distributions.kumaraswamy`][lmo.distributions.kumaraswamy].
 
 ### Wakeby
 
@@ -1073,28 +1072,6 @@ Each of the scale- \( \alpha, \gamma \) and shape parameters
 Lmo figured out that the L-moments with any order \( r \in \mathbb{N}_{\ge 1} \)
 and trim \( s, t \in \mathbb{N}^2_{\ge 1} \) can be expressed as
 
-<!-- \[
-\begin{equation}
-    \tlmoment{s,t}{r}
-        =
-        \frac{\gamma}{r \delta}
-        \frac
-            {\B(\delta + r - 1,\ t - \delta + 1)}
-            {\B(\delta,\ r + s + t - \delta + 1)}
-        -
-        \frac{\alpha}{r \beta}
-        \frac
-            {\B(-\beta + r - 1,\ t + \beta + 1)}
-            {\B(-\beta,\ r + s + t + \beta + 1)}
-    +
-    \begin{cases}
-         \mu + \frac \alpha \beta - \frac \gamma \delta
-            & \text{if } r = 1 \\
-        0
-            & \text{if } r > 1
-    \end{cases}
-\end{equation}
-\] -->
 \[
 \begin{equation}
     \tlmoment{s,t}{r}

diff --git a/lmo/__init__.py b/lmo/__init__.py
@@ -31,15 +31,10 @@
     'l_comoment',
     'l_coratio',
     'l_costats',
-
-    'l_rv_nonparametric',
 )
 
 from typing import TYPE_CHECKING, Final
 
-from ._distns import (
-    l_rv_nonparametric,
-)
 from ._lm import (
     l_kurtosis,
     l_loc,
@@ -69,7 +64,7 @@
 from ._meta import get_version as _get_version
 
 if not TYPE_CHECKING:
-# install contrib module extensions
+    # install contrib module extensions
     from .contrib import install as _install
 
     _install()

diff --git a/lmo/_distns.py → lmo/distributions.py b/lmo/_distns.py → lmo/distributions.py
@@ -1,22 +1,30 @@
-# pyright: reportIncompatibleMethodOverride=false
+"""Probability distributions, compatible with [`scipy.stats`][scipy.stats]."""
+
+__all__ = (
+    'l_rv_nonparametric',
+    'kumaraswamy',
+)
 
-__all__ = ('l_rv_nonparametric',)
+# pyright: reportIncompatibleMethodOverride=false
 
 import functools
 import math
 import warnings
-from collections.abc import Callable, Mapping
+from collections.abc import Callable, Mapping, Sequence
 from typing import (
     Any,
     Final,
     SupportsIndex,
+    TypeAlias,
     TypeVar,
     cast,
 )
 
 import numpy as np
 import numpy.polynomial as npp
 import numpy.typing as npt
+import scipy.special as sc  # type: ignore
+from scipy.stats._distn_infrastructure import _ShapeInfo  # type: ignore
 from scipy.stats.distributions import (  # type: ignore
     rv_continuous,
 )
@@ -27,10 +35,13 @@
     clean_trim,
 )
 from .diagnostic import l_ratio_bounds
+from .special import harmonic
+from .theoretical import l_moment_from_cdf
 from .typing import (
     AnyTrim,
     FloatVector,
     PolySeries,
+    QuadOptions,
 )
 
 T = TypeVar('T')
@@ -42,6 +53,8 @@
 _F_EPS: Final[np.float64] = np.finfo(float).eps
 
 
+# Non-parametric
+
 def _check_lmoments(l_r: npt.NDArray[np.floating[Any]], s: float, t: float):
     if (n := len(l_r)) < 2:
         msg = f'at least 2 L-moments required, got {n}'
@@ -83,7 +96,6 @@ def _ppf_poly_series(
         symbol='q',
     )
 
-
 class l_rv_nonparametric(rv_continuous):  # noqa: N801
     r"""
     Estimate a distribution using the given L-moments.
@@ -356,7 +368,9 @@ def fit(
                 to the provided `l_moments`.
 
         Returns:
-            A fitted [`l_rv_nonparametric`][lmo.l_rv_nonparametric] instance.
+            A fitted
+            [`l_rv_nonparametric`][lmo.distributions.l_rv_nonparametric]
+            instance.
 
         Todo:
             - Optimal `rmax` selection (the error appears to be periodic..?)
@@ -387,3 +401,158 @@ def fit(
 
         return cls(l_r, trim=_trim, a=a, b=b)
 
+
+# Parametric
+
+
+_ArrF8: TypeAlias = npt.NDArray[np.float64]
+
+def _l_moment_kumaraswamy_single(
+    r: int,
+    s: int,
+    t: int,
+    a: float,
+    b: float,
+) -> float:
+    if r == 0:
+        return 1.0
+
+    k = np.arange(t + 1, r + s + t + 1)
+    return (
+        (-1)**(k - 1)
+        * cast(_ArrF8, sc.comb(r + k - 2, r + t - 1))  # type: ignore
+        * cast(_ArrF8, sc.comb(r + s + t, k))  # type: ignore
+        * cast(_ArrF8, sc.beta(1 / a, 1 + k * b)) / a  # type: ignore
+    ).sum() / r
+
+_l_moment_kumaraswamy = np.vectorize(
+    _l_moment_kumaraswamy_single,
+    otypes=[float],
+    excluded={1, 2, 3, 4},
+)
+
+
+class kumaraswamy_gen(rv_continuous):  # noqa: N801
+    def _shape_info(self) -> Sequence[_ShapeInfo]:
+        ia = _ShapeInfo('a', False, (0, np.inf), (False, False))
+        ib = _ShapeInfo('b', False, (0, np.inf), (False, False))
+        return [ia, ib]
+
+    def _pdf(
+        self,
+        x: npt.NDArray[np.float64],
+        a: float,
+        b: float,
+    ) -> npt.NDArray[np.float64]:
+        return a * b * x**(a - 1) * (1 - x**a)**(b - 1)
+
+    def _logpdf(
+        self,
+        x: npt.NDArray[np.float64],
+        a: float,
+        b: float,
+    ) -> npt.NDArray[np.float64]:
+        return (
+            np.log(a * b)
+            + (a - 1) * np.log(x)
+            + (b - 1) * np.log(1 - x**a)
+        )
+
+    def _cdf(
+        self,
+        x: npt.NDArray[np.float64],
+        a: float,
+        b: float,
+    ) -> npt.NDArray[np.float64]:
+        return 1 - (1 - x**a)**b
+
+    def _sf(
+        self,
+        x: npt.NDArray[np.float64],
+        a: float,
+        b: float,
+    ) -> npt.NDArray[np.float64]:
+        return (1 - x**a)**(b - 1)
+
+    def _isf(
+        self,
+        q: npt.NDArray[np.float64],
+        a: float,
+        b: float,
+    ) -> npt.NDArray[np.float64]:
+        return (1 - q**(1 / b))**(1 / a)
+
+    def _ppf(
+        self,
+        q: npt.NDArray[np.float64],
+        a: float,
+        b: float,
+    ) -> npt.NDArray[np.float64]:
+        return (1 - (1 - q)**(1 / b))**(1 / a)
+
+    def _entropy(self, a: float, b: float) -> float:
+        # https://en.wikipedia.org/wiki/Kumaraswamy_distribution
+        return (1 - 1 / b) + (1 - 1 / a) * harmonic(b) - np.log(a * b)
+
+    def _munp(
+        self,
+        n: int,
+        a: float,
+        b: float,
+    ) -> float:
+        return b * cast(float, sc.beta(1 + n / a, b))  # type: ignore
+
+    def _l_moment(
+        self,
+        r: npt.NDArray[np.int64],
+        a: float,
+        b: float,
+        trim: tuple[int, int] | tuple[float, float],
+        quad_opts: QuadOptions | None = None,
+    ) -> _ArrF8:
+        s, t = trim
+        lmbda_r: float | npt.NDArray[np.float64]
+        if isinstance(s, float) or isinstance(t, float):
+            lmbda_r = cast(
+                float | npt.NDArray[np.float64],
+                l_moment_from_cdf(
+                    functools.partial(self._cdf, a=a, b=b), # type: ignore
+                    r,
+                    trim=trim,
+                    support=(0, 1),
+                    ppf=functools.partial(self._ppf, a=a, b=b), # type: ignore
+                    quad_opts=quad_opts,
+                ),  # type: ignore
+            )
+            return np.asarray(lmbda_r)
+
+        return np.atleast_1d(
+            cast(_ArrF8, _l_moment_kumaraswamy(r, s, t, a, b)),
+        )
+
+
+kumaraswamy: Final[rv_continuous] = kumaraswamy_gen(
+    a=0.0,
+    b=1.0,
+    name='kumaraswamy',
+)
+r"""
+A Kumaraswamy random variable, similar to
+[`scipy.stats.beta`][scipy.stats.beta].
+
+The probability density function for
+[`kumaraswamy`][lmo.distributions.kumaraswamy] is:
+
+\[
+    f(x, a, b) = a x^{a - 1} b \left(1 - x^a\right)^{b - 1}
+\]
+
+for \( 0 < x < 1,\ a > 0,\ b > 0 \).
+
+[`kumaraswamy`][kumaraswamy] takes \( a \) and \( b \) as shape parameters.
+
+See Also:
+    - [Kumaraswamy distribution - Wikipedia
+    ](https://wikipedia.org/wiki/Kumaraswamy_distribution)
+
+"""