From 7d8a5c5ea8473e919f89ded6922d0f3be613b438 Mon Sep 17 00:00:00 2001
From: xieyj17 <theyjxie@gmail.com>
Date: Sun, 31 Dec 2023 14:20:12 -0500
Subject: [PATCH 1/7] Add continuous class for Generalized Pareto distribution.

---
 pymc_experimental/distributions/continuous.py | 152 ++++++++++++++++++
 1 file changed, 152 insertions(+)

diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py
index dcf9b775..677cc961 100644
--- a/pymc_experimental/distributions/continuous.py
+++ b/pymc_experimental/distributions/continuous.py
@@ -221,6 +221,158 @@ def moment(rv, size, mu, sigma, xi):
         return mode
 
 
+# Generalized Pareto Distribution
+class GenParetoRV(RandomVariable):
+    name: str = "Generalized Pareto Distribution"
+    ndim_supp: int = 0
+    ndims_params: List[int] = [0, 0, 0]
+    dtype: str = "floatX"
+    _print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GP}")
+
+    def __call__(self, mu=0.0, sigma=1.0, xi=1.0, size=None, **kwargs) -> TensorVariable:
+        return super().__call__(mu, sigma, xi, size=size, **kwargs)
+
+    @classmethod
+    def rng_fn(
+        cls,
+        rng: Union[np.random.RandomState, np.random.Generator],
+        mu: np.ndarray,
+        sigma: np.ndarray,
+        xi: np.ndarray,
+        size: Tuple[int, ...],
+    ) -> np.ndarray:
+        # using scipy's parameterization
+        return stats.genpareto.rvs(c=xi, loc=mu, scale=sigma, random_state=rng, size=size)
+
+
+gp = GenParetoRV()
+
+
+class GenPareto(Continuous):
+    r"""
+    The Generalized Pareto Distribution
+
+    The pdf of this distribution is
+
+    .. math::
+
+       f(x \mid \mu, \sigma, \xi) = \frac{1}{\sigma} (1 + \xi z)^{-1/\xi-1}
+
+    where
+
+    .. math::
+
+        z = \frac{x - \mu}{\sigma}
+
+    and is defined on the set:
+
+    .. math::
+
+        \left\{x: x \geq \mu \right\} if \xi \geq 0, or \\
+        \left\{x: \mu \leq x \leq \mu - \sigma/\xi \right\} if \xi < 0.
+
+    .. plot::
+
+        import matplotlib.pyplot as plt
+        import numpy as np
+        import scipy.stats as st
+        import arviz as az
+        plt.style.use('arviz-darkgrid')
+        x = np.linspace(-2, 10, 200)
+        mus = [0., 0., 0., 0., 1., ]
+        sigmas = [1., 1., 1.,2., 1., ]
+        xis = [1., 0., 5., 1., 1.,  ]
+        for mu, sigma, xi in zip(mus, sigmas, xis):
+            pdf = st.genpareto.pdf(x, c=xi, loc=mu, scale=sigma)
+            plt.plot(x, pdf, label=rf'$\mu$ = {mu}, $\sigma$ = {sigma}, $\xi$={xi}')
+        plt.xlabel('x', fontsize=12)
+        plt.ylabel('f(x)', fontsize=12)
+        plt.legend(loc=1)
+        plt.show()
+
+
+    ========  =========================================================================
+    Support   * :math:`x \geq \mu`, when :math:`\xi \geq 0`
+              * :math:`\mu \leq x \leq \mu - \sigma/\xi` when :math:`\xi < 0`
+    Mean      * :math:`\mu + \frac{\sigma}{1-\xi}`, when :math:`\xi < 1`
+    Variance  * :math:`\frac{\sigma^2}{(1-\xi)^2 (1-2\xi)}`, when :math:`\xi < 0.5`
+    ========  =========================================================================
+
+    Parameters
+    ----------
+    mu : float
+        Location parameter.
+    sigma : float
+        Scale parameter (sigma > 0).
+    xi : float
+        Shape parameter
+
+    """
+
+    rv_op = gp
+
+    @classmethod
+    def dist(cls, mu=0, sigma=1, xi=0, **kwargs):
+        mu = pt.as_tensor_variable(floatX(mu))
+        sigma = pt.as_tensor_variable(floatX(sigma))
+        xi = pt.as_tensor_variable(floatX(xi))
+
+        return super().dist([mu, sigma, xi], **kwargs)
+
+    def logp(value, mu, sigma, xi):
+        """
+        Calculate log-probability of Generalized Pareto distribution
+        at specified value.
+
+        Parameters
+        ----------
+        value: numeric
+            Value(s) for which log-probability is calculated. If the log probabilities for multiple
+            values are desired the values must be provided in a numpy array or Pytensor tensor
+
+        Returns
+        -------
+        TensorVariable
+        """
+        scaled = (value - mu) / sigma
+        p = (1 / sigma) * (1 + xi * scaled) ** (-1 / xi - 1)
+        logprob = pt.switch(pt.gt(scaled, 0.0), pt.log(p), -np.inf)
+        return check_parameters(logprob, sigma > 0, msg="sigma > 0")
+
+    def logcdf(value, mu, sigma, xi):
+        """
+        Compute the log of the cumulative distribution function for Generalized Pareto
+        distribution at the specified value.
+
+        Parameters
+        ----------
+        value: numeric or np.ndarray or `TensorVariable`
+            Value(s) for which log CDF is calculated. If the log CDF for
+            multiple values are desired the values must be provided in a numpy
+            array or `TensorVariable`.
+
+        Returns
+        -------
+        TensorVariable
+        """
+        scaled = (value - mu) / sigma
+        logc_expression = 1 - (1 + xi * scaled) ** (-1 / xi)
+
+        logc = pt.switch(sigma > 0, pt.log(logc_expression), -np.inf)
+
+        return check_parameters(logc, sigma > 0, msg="sigma > 0")
+
+    def moment(rv, size, mu, sigma, xi):
+        r"""
+        Mean is defined when :math:`\xi < 1`
+        """
+        mean_expression = mu + sigma / (1 - xi)
+        mean = pt.switch(xi < 1, mean_expression, np.inf)
+        if not rv_size_is_none(size):
+            mean = pt.full(size, mean)
+        return check_parameters(mean, xi < 1, msg="xi < 1")
+
+
 class Chi:
     r"""
     :math:`\chi` log-likelihood.

From 22a3b0b5a65065f5b6bba0c77826871d231d93b8 Mon Sep 17 00:00:00 2001
From: Rob Zinkov <rob@zinkov.com>
Date: Mon, 1 Jan 2024 17:55:21 +0100
Subject: [PATCH 2/7] Remove use of treedict, addresses #291

---
 pymc_experimental/model/marginal_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc_experimental/model/marginal_model.py b/pymc_experimental/model/marginal_model.py
index 1eb23ff2..1f8e4531 100644
--- a/pymc_experimental/model/marginal_model.py
+++ b/pymc_experimental/model/marginal_model.py
@@ -84,7 +84,7 @@ class MarginalModel(Model):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.marginalized_rvs = []
-        self._marginalized_named_vars_to_dims = treedict()
+        self._marginalized_named_vars_to_dims = {}
 
     def _delete_rv_mappings(self, rv: TensorVariable) -> None:
         """Remove all model mappings referring to rv

From cb0ac01b592b4b9944e3504517278f9d5653f318 Mon Sep 17 00:00:00 2001
From: xieyj17 <theyjxie@gmail.com>
Date: Mon, 1 Jan 2024 14:36:59 -0500
Subject: [PATCH 3/7] add unit testss

---
 pymc_experimental/distributions/__init__.py   |  8 ++-
 pymc_experimental/distributions/continuous.py | 33 +++++-----
 .../tests/distributions/test_continuous.py    | 60 ++++++++++++++++++-
 3 files changed, 85 insertions(+), 16 deletions(-)

diff --git a/pymc_experimental/distributions/__init__.py b/pymc_experimental/distributions/__init__.py
index f06db969..822db0b9 100644
--- a/pymc_experimental/distributions/__init__.py
+++ b/pymc_experimental/distributions/__init__.py
@@ -17,7 +17,12 @@
 Experimental probability distributions for stochastic nodes in PyMC.
 """
 
-from pymc_experimental.distributions.continuous import Chi, GenExtreme, Maxwell
+from pymc_experimental.distributions.continuous import (
+    Chi,
+    GenExtreme,
+    GenPareto,
+    Maxwell,
+)
 from pymc_experimental.distributions.discrete import (
     BetaNegativeBinomial,
     GeneralizedPoisson,
@@ -32,6 +37,7 @@
     "DiscreteMarkovChain",
     "GeneralizedPoisson",
     "GenExtreme",
+    "GenPareto",
     "R2D2M2CP",
     "Skellam",
     "histogram_approximation",
diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py
index 677cc961..73804aa6 100644
--- a/pymc_experimental/distributions/continuous.py
+++ b/pymc_experimental/distributions/continuous.py
@@ -264,13 +264,11 @@ class GenPareto(Continuous):
 
         z = \frac{x - \mu}{\sigma}
 
-    and is defined on the set:
+    and is defined on the set (when :math:`\xi \geq 0`):
 
     .. math::
 
-        \left\{x: x \geq \mu \right\} if \xi \geq 0, or \\
-        \left\{x: \mu \leq x \leq \mu - \sigma/\xi \right\} if \xi < 0.
-
+        \left\{x: x \geq \mu \right\}
     .. plot::
 
         import matplotlib.pyplot as plt
@@ -293,7 +291,6 @@ class GenPareto(Continuous):
 
     ========  =========================================================================
     Support   * :math:`x \geq \mu`, when :math:`\xi \geq 0`
-              * :math:`\mu \leq x \leq \mu - \sigma/\xi` when :math:`\xi < 0`
     Mean      * :math:`\mu + \frac{\sigma}{1-\xi}`, when :math:`\xi < 1`
     Variance  * :math:`\frac{\sigma^2}{(1-\xi)^2 (1-2\xi)}`, when :math:`\xi < 0.5`
     ========  =========================================================================
@@ -305,8 +302,7 @@ class GenPareto(Continuous):
     sigma : float
         Scale parameter (sigma > 0).
     xi : float
-        Shape parameter
-
+        Shape parameter (xi >= 0)
     """
 
     rv_op = gp
@@ -334,10 +330,16 @@ def logp(value, mu, sigma, xi):
         -------
         TensorVariable
         """
+
         scaled = (value - mu) / sigma
-        p = (1 / sigma) * (1 + xi * scaled) ** (-1 / xi - 1)
-        logprob = pt.switch(pt.gt(scaled, 0.0), pt.log(p), -np.inf)
-        return check_parameters(logprob, sigma > 0, msg="sigma > 0")
+
+        logp_expression = pt.switch(
+            pt.eq(xi, 0),
+            -1 * scaled,
+            -1 * pt.log(sigma) - ((xi + 1) / xi) * pt.log1p(xi * scaled),
+        )
+        logp = pt.switch(pt.ge(scaled, 0), logp_expression, -np.inf)
+        return check_parameters(logp, sigma > 0, xi >= 0, msg="sigma > 0 and xi >= 0")
 
     def logcdf(value, mu, sigma, xi):
         """
@@ -356,11 +358,14 @@ def logcdf(value, mu, sigma, xi):
         TensorVariable
         """
         scaled = (value - mu) / sigma
-        logc_expression = 1 - (1 + xi * scaled) ** (-1 / xi)
-
-        logc = pt.switch(sigma > 0, pt.log(logc_expression), -np.inf)
+        logc_expression = pt.switch(
+            pt.eq(xi, 0),
+            pt.log(1 - pt.exp(-1 * scaled)),
+            pt.log(1 - pt.pow((1 + xi * scaled), (-1 / xi))),
+        )
+        logc = pt.switch(pt.ge(scaled, 0), logc_expression, -np.inf)
 
-        return check_parameters(logc, sigma > 0, msg="sigma > 0")
+        return check_parameters(logc, sigma > 0, xi >= 0, msg="sigma > 0 and xi >= 0")
 
     def moment(rv, size, mu, sigma, xi):
         r"""
diff --git a/pymc_experimental/tests/distributions/test_continuous.py b/pymc_experimental/tests/distributions/test_continuous.py
index 5df4aef1..083febf6 100644
--- a/pymc_experimental/tests/distributions/test_continuous.py
+++ b/pymc_experimental/tests/distributions/test_continuous.py
@@ -33,7 +33,7 @@
 )
 
 # the distributions to be tested
-from pymc_experimental.distributions import Chi, GenExtreme, Maxwell
+from pymc_experimental.distributions import Chi, GenExtreme, GenPareto, Maxwell
 
 
 class TestGenExtremeClass:
@@ -138,6 +138,64 @@ class TestGenExtreme(BaseTestDistributionRandom):
     ]
 
 
+class TestGenParetoClass:
+    """
+    Wrapper class so that tests of experimental additions can be dropped into
+    PyMC directly on adoption.
+
+    pm.logp(GenPareto.dist(mu=0.,sigma=1.,xi=5.),value=1.)
+    """
+
+    def test_logp(self):
+        def ref_logp(value, mu, sigma, xi):
+            if xi == 0:
+                return sp.expon.logpdf((value - mu) / sigma)
+            else:
+                return sp.genpareto.logpdf(value, c=xi, loc=mu, scale=sigma)
+
+        check_logp(
+            GenPareto,
+            R,
+            {"mu": R, "sigma": Rplusbig, "xi": Rplusbig},
+            ref_logp,
+            decimal=select_by_precision(float64=6, float32=2),
+            skip_paramdomain_outside_edge_test=True,
+        )
+
+    def test_logcdf(self):
+        def ref_logc(value, mu, sigma, xi):
+            if xi == 0:
+                return sp.expon.logcdf((value - mu) / sigma)
+            else:
+                return sp.genpareto.logcdf(value, c=xi, loc=mu, scale=sigma)
+
+        check_logcdf(
+            GenPareto,
+            R,
+            {
+                "mu": R,
+                "sigma": Rplusbig,
+                "xi": Rplusbig,
+            },
+            ref_logc,
+            decimal=select_by_precision(float64=6, float32=2),
+            skip_paramdomain_outside_edge_test=True,
+        )
+
+
+class TestGenPareto(BaseTestDistributionRandom):
+    pymc_dist = GenPareto
+    pymc_dist_params = {"mu": 0, "sigma": 1, "xi": 1}
+    expected_rv_op_params = {"mu": 0, "sigma": 1, "xi": 1}
+    reference_dist_params = {"loc": 0, "scale": 1, "c": 0.1}
+    reference_dist = seeded_scipy_distribution_builder("genpareto")
+    tests_to_run = [
+        "check_pymc_params_match_rv_op",
+        "check_pymc_draws_match_reference",
+        "check_rv_size",
+    ]
+
+
 class TestChiClass:
     """
     Wrapper class so that tests of experimental additions can be dropped into

From c5ceda2815f934fc00bb7f39020e4eda40d46298 Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <28983449+ricardoV94@users.noreply.github.com>
Date: Wed, 3 Jan 2024 11:21:24 +0100
Subject: [PATCH 4/7] Update version.txt

---
 pymc_experimental/version.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc_experimental/version.txt b/pymc_experimental/version.txt
index ceddfb28..e3b86dd9 100644
--- a/pymc_experimental/version.txt
+++ b/pymc_experimental/version.txt
@@ -1 +1 @@
-0.0.15
+0.0.16

From fd112718c4a0ad14cabec3037b3c946504ce7518 Mon Sep 17 00:00:00 2001
From: xieyj17 <theyjxie@gmail.com>
Date: Wed, 3 Jan 2024 22:59:48 -0500
Subject: [PATCH 5/7] address comments

---
 pymc_experimental/distributions/continuous.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py
index 73804aa6..2342380d 100644
--- a/pymc_experimental/distributions/continuous.py
+++ b/pymc_experimental/distributions/continuous.py
@@ -30,7 +30,7 @@
 from pymc.distributions.shape_utils import rv_size_is_none
 from pymc.logprob.utils import CheckParameterValue
 from pymc.pytensorf import floatX
-from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.random.op import RandomVariable, ScipyRandomVariable
 from pytensor.tensor.variable import TensorVariable
 from scipy import stats
 
@@ -222,12 +222,12 @@ def moment(rv, size, mu, sigma, xi):
 
 
 # Generalized Pareto Distribution
-class GenParetoRV(RandomVariable):
-    name: str = "Generalized Pareto Distribution"
+class GenParetoRV(ScipyRandomVariable):
+    name: str = "Generalized Pareto"
     ndim_supp: int = 0
     ndims_params: List[int] = [0, 0, 0]
     dtype: str = "floatX"
-    _print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GP}")
+    _print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GenPareto}")
 
     def __call__(self, mu=0.0, sigma=1.0, xi=1.0, size=None, **kwargs) -> TensorVariable:
         return super().__call__(mu, sigma, xi, size=size, **kwargs)

From 140a7b8752fca3cec76075e242e3e1637cb11a41 Mon Sep 17 00:00:00 2001
From: xieyj17 <theyjxie@gmail.com>
Date: Wed, 3 Jan 2024 23:02:52 -0500
Subject: [PATCH 6/7] address comments

---
 pymc_experimental/distributions/continuous.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py
index 2342380d..3efbb748 100644
--- a/pymc_experimental/distributions/continuous.py
+++ b/pymc_experimental/distributions/continuous.py
@@ -229,9 +229,6 @@ class GenParetoRV(ScipyRandomVariable):
     dtype: str = "floatX"
     _print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GenPareto}")
 
-    def __call__(self, mu=0.0, sigma=1.0, xi=1.0, size=None, **kwargs) -> TensorVariable:
-        return super().__call__(mu, sigma, xi, size=size, **kwargs)
-
     @classmethod
     def rng_fn(
         cls,
@@ -245,7 +242,7 @@ def rng_fn(
         return stats.genpareto.rvs(c=xi, loc=mu, scale=sigma, random_state=rng, size=size)
 
 
-gp = GenParetoRV()
+gen_pareto = GenParetoRV()
 
 
 class GenPareto(Continuous):
@@ -302,10 +299,10 @@ class GenPareto(Continuous):
     sigma : float
         Scale parameter (sigma > 0).
     xi : float
-        Shape parameter (xi >= 0)
+        Shape parameter (xi >= 0). Notice that we are using a more restrictive definition for Generalized Pareto Distribution (xi can be smaller than 0). We only include :math:`\xi \geq 0` since it's more commonly used for modelling the tails.
     """
 
-    rv_op = gp
+    rv_op = gen_pareto
 
     @classmethod
     def dist(cls, mu=0, sigma=1, xi=0, **kwargs):

From d5054b2d5aa5b9b345f7803941929a3b353a397b Mon Sep 17 00:00:00 2001
From: xieyj17 <theyjxie@gmail.com>
Date: Wed, 3 Jan 2024 23:06:38 -0500
Subject: [PATCH 7/7] add entry to api reference

---
 docs/api_reference.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/api_reference.rst b/docs/api_reference.rst
index dfc43968..fd508701 100644
--- a/docs/api_reference.rst
+++ b/docs/api_reference.rst
@@ -35,6 +35,7 @@ Distributions
    GeneralizedPoisson
    BetaNegativeBinomial
    GenExtreme
+   GenPareto
    R2D2M2CP
    Skellam
    histogram_approximation