From 7d8a5c5ea8473e919f89ded6922d0f3be613b438 Mon Sep 17 00:00:00 2001 From: xieyj17 Date: Sun, 31 Dec 2023 14:20:12 -0500 Subject: [PATCH 1/7] Add continuous class for Generalized Pareto distribution. --- pymc_experimental/distributions/continuous.py | 152 ++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py index dcf9b775..677cc961 100644 --- a/pymc_experimental/distributions/continuous.py +++ b/pymc_experimental/distributions/continuous.py @@ -221,6 +221,158 @@ def moment(rv, size, mu, sigma, xi): return mode +# Generalized Pareto Distribution +class GenParetoRV(RandomVariable): + name: str = "Generalized Pareto Distribution" + ndim_supp: int = 0 + ndims_params: List[int] = [0, 0, 0] + dtype: str = "floatX" + _print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GP}") + + def __call__(self, mu=0.0, sigma=1.0, xi=1.0, size=None, **kwargs) -> TensorVariable: + return super().__call__(mu, sigma, xi, size=size, **kwargs) + + @classmethod + def rng_fn( + cls, + rng: Union[np.random.RandomState, np.random.Generator], + mu: np.ndarray, + sigma: np.ndarray, + xi: np.ndarray, + size: Tuple[int, ...], + ) -> np.ndarray: + # using scipy's parameterization + return stats.genpareto.rvs(c=xi, loc=mu, scale=sigma, random_state=rng, size=size) + + +gp = GenParetoRV() + + +class GenPareto(Continuous): + r""" + The Generalized Pareto Distribution + + The pdf of this distribution is + + .. math:: + + f(x \mid \mu, \sigma, \xi) = \frac{1}{\sigma} (1 + \xi z)^{-1/\xi-1} + + where + + .. math:: + + z = \frac{x - \mu}{\sigma} + + and is defined on the set: + + .. math:: + + \left\{x: x \geq \mu \right\} if \xi \geq 0, or \\ + \left\{x: \mu \leq x \leq \mu - \sigma/\xi \right\} if \xi < 0. + + .. plot:: + + import matplotlib.pyplot as plt + import numpy as np + import scipy.stats as st + import arviz as az + plt.style.use('arviz-darkgrid') + x = np.linspace(-2, 10, 200) + mus = [0., 0., 0., 0., 1., ] + sigmas = [1., 1., 1.,2., 1., ] + xis = [1., 0., 5., 1., 1., ] + for mu, sigma, xi in zip(mus, sigmas, xis): + pdf = st.genpareto.pdf(x, c=xi, loc=mu, scale=sigma) + plt.plot(x, pdf, label=rf'$\mu$ = {mu}, $\sigma$ = {sigma}, $\xi$={xi}') + plt.xlabel('x', fontsize=12) + plt.ylabel('f(x)', fontsize=12) + plt.legend(loc=1) + plt.show() + + + ======== ========================================================================= + Support * :math:`x \geq \mu`, when :math:`\xi \geq 0` + * :math:`\mu \leq x \leq \mu - \sigma/\xi` when :math:`\xi < 0` + Mean * :math:`\mu + \frac{\sigma}{1-\xi}`, when :math:`\xi < 1` + Variance * :math:`\frac{\sigma^2}{(1-\xi)^2 (1-2\xi)}`, when :math:`\xi < 0.5` + ======== ========================================================================= + + Parameters + ---------- + mu : float + Location parameter. + sigma : float + Scale parameter (sigma > 0). + xi : float + Shape parameter + + """ + + rv_op = gp + + @classmethod + def dist(cls, mu=0, sigma=1, xi=0, **kwargs): + mu = pt.as_tensor_variable(floatX(mu)) + sigma = pt.as_tensor_variable(floatX(sigma)) + xi = pt.as_tensor_variable(floatX(xi)) + + return super().dist([mu, sigma, xi], **kwargs) + + def logp(value, mu, sigma, xi): + """ + Calculate log-probability of Generalized Pareto distribution + at specified value. + + Parameters + ---------- + value: numeric + Value(s) for which log-probability is calculated. If the log probabilities for multiple + values are desired the values must be provided in a numpy array or Pytensor tensor + + Returns + ------- + TensorVariable + """ + scaled = (value - mu) / sigma + p = (1 / sigma) * (1 + xi * scaled) ** (-1 / xi - 1) + logprob = pt.switch(pt.gt(scaled, 0.0), pt.log(p), -np.inf) + return check_parameters(logprob, sigma > 0, msg="sigma > 0") + + def logcdf(value, mu, sigma, xi): + """ + Compute the log of the cumulative distribution function for Generalized Pareto + distribution at the specified value. + + Parameters + ---------- + value: numeric or np.ndarray or `TensorVariable` + Value(s) for which log CDF is calculated. If the log CDF for + multiple values are desired the values must be provided in a numpy + array or `TensorVariable`. + + Returns + ------- + TensorVariable + """ + scaled = (value - mu) / sigma + logc_expression = 1 - (1 + xi * scaled) ** (-1 / xi) + + logc = pt.switch(sigma > 0, pt.log(logc_expression), -np.inf) + + return check_parameters(logc, sigma > 0, msg="sigma > 0") + + def moment(rv, size, mu, sigma, xi): + r""" + Mean is defined when :math:`\xi < 1` + """ + mean_expression = mu + sigma / (1 - xi) + mean = pt.switch(xi < 1, mean_expression, np.inf) + if not rv_size_is_none(size): + mean = pt.full(size, mean) + return check_parameters(mean, xi < 1, msg="xi < 1") + + class Chi: r""" :math:`\chi` log-likelihood. From 22a3b0b5a65065f5b6bba0c77826871d231d93b8 Mon Sep 17 00:00:00 2001 From: Rob Zinkov Date: Mon, 1 Jan 2024 17:55:21 +0100 Subject: [PATCH 2/7] Remove use of treedict, addresses #291 --- pymc_experimental/model/marginal_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc_experimental/model/marginal_model.py b/pymc_experimental/model/marginal_model.py index 1eb23ff2..1f8e4531 100644 --- a/pymc_experimental/model/marginal_model.py +++ b/pymc_experimental/model/marginal_model.py @@ -84,7 +84,7 @@ class MarginalModel(Model): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.marginalized_rvs = [] - self._marginalized_named_vars_to_dims = treedict() + self._marginalized_named_vars_to_dims = {} def _delete_rv_mappings(self, rv: TensorVariable) -> None: """Remove all model mappings referring to rv From cb0ac01b592b4b9944e3504517278f9d5653f318 Mon Sep 17 00:00:00 2001 From: xieyj17 Date: Mon, 1 Jan 2024 14:36:59 -0500 Subject: [PATCH 3/7] add unit testss --- pymc_experimental/distributions/__init__.py | 8 ++- pymc_experimental/distributions/continuous.py | 33 +++++----- .../tests/distributions/test_continuous.py | 60 ++++++++++++++++++- 3 files changed, 85 insertions(+), 16 deletions(-) diff --git a/pymc_experimental/distributions/__init__.py b/pymc_experimental/distributions/__init__.py index f06db969..822db0b9 100644 --- a/pymc_experimental/distributions/__init__.py +++ b/pymc_experimental/distributions/__init__.py @@ -17,7 +17,12 @@ Experimental probability distributions for stochastic nodes in PyMC. """ -from pymc_experimental.distributions.continuous import Chi, GenExtreme, Maxwell +from pymc_experimental.distributions.continuous import ( + Chi, + GenExtreme, + GenPareto, + Maxwell, +) from pymc_experimental.distributions.discrete import ( BetaNegativeBinomial, GeneralizedPoisson, @@ -32,6 +37,7 @@ "DiscreteMarkovChain", "GeneralizedPoisson", "GenExtreme", + "GenPareto", "R2D2M2CP", "Skellam", "histogram_approximation", diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py index 677cc961..73804aa6 100644 --- a/pymc_experimental/distributions/continuous.py +++ b/pymc_experimental/distributions/continuous.py @@ -264,13 +264,11 @@ class GenPareto(Continuous): z = \frac{x - \mu}{\sigma} - and is defined on the set: + and is defined on the set (when :math:`\xi \geq 0`): .. math:: - \left\{x: x \geq \mu \right\} if \xi \geq 0, or \\ - \left\{x: \mu \leq x \leq \mu - \sigma/\xi \right\} if \xi < 0. - + \left\{x: x \geq \mu \right\} .. plot:: import matplotlib.pyplot as plt @@ -293,7 +291,6 @@ class GenPareto(Continuous): ======== ========================================================================= Support * :math:`x \geq \mu`, when :math:`\xi \geq 0` - * :math:`\mu \leq x \leq \mu - \sigma/\xi` when :math:`\xi < 0` Mean * :math:`\mu + \frac{\sigma}{1-\xi}`, when :math:`\xi < 1` Variance * :math:`\frac{\sigma^2}{(1-\xi)^2 (1-2\xi)}`, when :math:`\xi < 0.5` ======== ========================================================================= @@ -305,8 +302,7 @@ class GenPareto(Continuous): sigma : float Scale parameter (sigma > 0). xi : float - Shape parameter - + Shape parameter (xi >= 0) """ rv_op = gp @@ -334,10 +330,16 @@ def logp(value, mu, sigma, xi): ------- TensorVariable """ + scaled = (value - mu) / sigma - p = (1 / sigma) * (1 + xi * scaled) ** (-1 / xi - 1) - logprob = pt.switch(pt.gt(scaled, 0.0), pt.log(p), -np.inf) - return check_parameters(logprob, sigma > 0, msg="sigma > 0") + + logp_expression = pt.switch( + pt.eq(xi, 0), + -1 * scaled, + -1 * pt.log(sigma) - ((xi + 1) / xi) * pt.log1p(xi * scaled), + ) + logp = pt.switch(pt.ge(scaled, 0), logp_expression, -np.inf) + return check_parameters(logp, sigma > 0, xi >= 0, msg="sigma > 0 and xi >= 0") def logcdf(value, mu, sigma, xi): """ @@ -356,11 +358,14 @@ def logcdf(value, mu, sigma, xi): TensorVariable """ scaled = (value - mu) / sigma - logc_expression = 1 - (1 + xi * scaled) ** (-1 / xi) - - logc = pt.switch(sigma > 0, pt.log(logc_expression), -np.inf) + logc_expression = pt.switch( + pt.eq(xi, 0), + pt.log(1 - pt.exp(-1 * scaled)), + pt.log(1 - pt.pow((1 + xi * scaled), (-1 / xi))), + ) + logc = pt.switch(pt.ge(scaled, 0), logc_expression, -np.inf) - return check_parameters(logc, sigma > 0, msg="sigma > 0") + return check_parameters(logc, sigma > 0, xi >= 0, msg="sigma > 0 and xi >= 0") def moment(rv, size, mu, sigma, xi): r""" diff --git a/pymc_experimental/tests/distributions/test_continuous.py b/pymc_experimental/tests/distributions/test_continuous.py index 5df4aef1..083febf6 100644 --- a/pymc_experimental/tests/distributions/test_continuous.py +++ b/pymc_experimental/tests/distributions/test_continuous.py @@ -33,7 +33,7 @@ ) # the distributions to be tested -from pymc_experimental.distributions import Chi, GenExtreme, Maxwell +from pymc_experimental.distributions import Chi, GenExtreme, GenPareto, Maxwell class TestGenExtremeClass: @@ -138,6 +138,64 @@ class TestGenExtreme(BaseTestDistributionRandom): ] +class TestGenParetoClass: + """ + Wrapper class so that tests of experimental additions can be dropped into + PyMC directly on adoption. + + pm.logp(GenPareto.dist(mu=0.,sigma=1.,xi=5.),value=1.) + """ + + def test_logp(self): + def ref_logp(value, mu, sigma, xi): + if xi == 0: + return sp.expon.logpdf((value - mu) / sigma) + else: + return sp.genpareto.logpdf(value, c=xi, loc=mu, scale=sigma) + + check_logp( + GenPareto, + R, + {"mu": R, "sigma": Rplusbig, "xi": Rplusbig}, + ref_logp, + decimal=select_by_precision(float64=6, float32=2), + skip_paramdomain_outside_edge_test=True, + ) + + def test_logcdf(self): + def ref_logc(value, mu, sigma, xi): + if xi == 0: + return sp.expon.logcdf((value - mu) / sigma) + else: + return sp.genpareto.logcdf(value, c=xi, loc=mu, scale=sigma) + + check_logcdf( + GenPareto, + R, + { + "mu": R, + "sigma": Rplusbig, + "xi": Rplusbig, + }, + ref_logc, + decimal=select_by_precision(float64=6, float32=2), + skip_paramdomain_outside_edge_test=True, + ) + + +class TestGenPareto(BaseTestDistributionRandom): + pymc_dist = GenPareto + pymc_dist_params = {"mu": 0, "sigma": 1, "xi": 1} + expected_rv_op_params = {"mu": 0, "sigma": 1, "xi": 1} + reference_dist_params = {"loc": 0, "scale": 1, "c": 0.1} + reference_dist = seeded_scipy_distribution_builder("genpareto") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + class TestChiClass: """ Wrapper class so that tests of experimental additions can be dropped into From c5ceda2815f934fc00bb7f39020e4eda40d46298 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira <28983449+ricardoV94@users.noreply.github.com> Date: Wed, 3 Jan 2024 11:21:24 +0100 Subject: [PATCH 4/7] Update version.txt --- pymc_experimental/version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc_experimental/version.txt b/pymc_experimental/version.txt index ceddfb28..e3b86dd9 100644 --- a/pymc_experimental/version.txt +++ b/pymc_experimental/version.txt @@ -1 +1 @@ -0.0.15 +0.0.16 From fd112718c4a0ad14cabec3037b3c946504ce7518 Mon Sep 17 00:00:00 2001 From: xieyj17 Date: Wed, 3 Jan 2024 22:59:48 -0500 Subject: [PATCH 5/7] address comments --- pymc_experimental/distributions/continuous.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py index 73804aa6..2342380d 100644 --- a/pymc_experimental/distributions/continuous.py +++ b/pymc_experimental/distributions/continuous.py @@ -30,7 +30,7 @@ from pymc.distributions.shape_utils import rv_size_is_none from pymc.logprob.utils import CheckParameterValue from pymc.pytensorf import floatX -from pytensor.tensor.random.op import RandomVariable +from pytensor.tensor.random.op import RandomVariable, ScipyRandomVariable from pytensor.tensor.variable import TensorVariable from scipy import stats @@ -222,12 +222,12 @@ def moment(rv, size, mu, sigma, xi): # Generalized Pareto Distribution -class GenParetoRV(RandomVariable): - name: str = "Generalized Pareto Distribution" +class GenParetoRV(ScipyRandomVariable): + name: str = "Generalized Pareto" ndim_supp: int = 0 ndims_params: List[int] = [0, 0, 0] dtype: str = "floatX" - _print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GP}") + _print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GenPareto}") def __call__(self, mu=0.0, sigma=1.0, xi=1.0, size=None, **kwargs) -> TensorVariable: return super().__call__(mu, sigma, xi, size=size, **kwargs) From 140a7b8752fca3cec76075e242e3e1637cb11a41 Mon Sep 17 00:00:00 2001 From: xieyj17 Date: Wed, 3 Jan 2024 23:02:52 -0500 Subject: [PATCH 6/7] address comments --- pymc_experimental/distributions/continuous.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py index 2342380d..3efbb748 100644 --- a/pymc_experimental/distributions/continuous.py +++ b/pymc_experimental/distributions/continuous.py @@ -229,9 +229,6 @@ class GenParetoRV(ScipyRandomVariable): dtype: str = "floatX" _print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GenPareto}") - def __call__(self, mu=0.0, sigma=1.0, xi=1.0, size=None, **kwargs) -> TensorVariable: - return super().__call__(mu, sigma, xi, size=size, **kwargs) - @classmethod def rng_fn( cls, @@ -245,7 +242,7 @@ def rng_fn( return stats.genpareto.rvs(c=xi, loc=mu, scale=sigma, random_state=rng, size=size) -gp = GenParetoRV() +gen_pareto = GenParetoRV() class GenPareto(Continuous): @@ -302,10 +299,10 @@ class GenPareto(Continuous): sigma : float Scale parameter (sigma > 0). xi : float - Shape parameter (xi >= 0) + Shape parameter (xi >= 0). Notice that we are using a more restrictive definition for Generalized Pareto Distribution (xi can be smaller than 0). We only include :math:`\xi \geq 0` since it's more commonly used for modelling the tails. """ - rv_op = gp + rv_op = gen_pareto @classmethod def dist(cls, mu=0, sigma=1, xi=0, **kwargs): From d5054b2d5aa5b9b345f7803941929a3b353a397b Mon Sep 17 00:00:00 2001 From: xieyj17 Date: Wed, 3 Jan 2024 23:06:38 -0500 Subject: [PATCH 7/7] add entry to api reference --- docs/api_reference.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/api_reference.rst b/docs/api_reference.rst index dfc43968..fd508701 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -35,6 +35,7 @@ Distributions GeneralizedPoisson BetaNegativeBinomial GenExtreme + GenPareto R2D2M2CP Skellam histogram_approximation