-
-
Notifications
You must be signed in to change notification settings - Fork 50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement Generalized Pareto distribution #294
base: main
Are you sure you want to change the base?
Changes from 3 commits
7d8a5c5
22a3b0b
cb0ac01
c5ceda2
fd11271
140a7b8
d5054b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -221,6 +221,163 @@ def moment(rv, size, mu, sigma, xi): | |||||
return mode | ||||||
|
||||||
|
||||||
# Generalized Pareto Distribution | ||||||
class GenParetoRV(RandomVariable): | ||||||
name: str = "Generalized Pareto Distribution" | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
ndim_supp: int = 0 | ||||||
ndims_params: List[int] = [0, 0, 0] | ||||||
dtype: str = "floatX" | ||||||
_print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GP}") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
def __call__(self, mu=0.0, sigma=1.0, xi=1.0, size=None, **kwargs) -> TensorVariable: | ||||||
return super().__call__(mu, sigma, xi, size=size, **kwargs) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not strictly necessary because most users will never call the RV directly. We usually provide default values through the PyMC distribution class
Suggested change
|
||||||
|
||||||
@classmethod | ||||||
def rng_fn( | ||||||
cls, | ||||||
rng: Union[np.random.RandomState, np.random.Generator], | ||||||
mu: np.ndarray, | ||||||
sigma: np.ndarray, | ||||||
xi: np.ndarray, | ||||||
size: Tuple[int, ...], | ||||||
) -> np.ndarray: | ||||||
# using scipy's parameterization | ||||||
return stats.genpareto.rvs(c=xi, loc=mu, scale=sigma, random_state=rng, size=size) | ||||||
|
||||||
|
||||||
gp = GenParetoRV() | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
|
||||||
class GenPareto(Continuous): | ||||||
r""" | ||||||
The Generalized Pareto Distribution | ||||||
|
||||||
The pdf of this distribution is | ||||||
|
||||||
.. math:: | ||||||
|
||||||
f(x \mid \mu, \sigma, \xi) = \frac{1}{\sigma} (1 + \xi z)^{-1/\xi-1} | ||||||
|
||||||
where | ||||||
|
||||||
.. math:: | ||||||
|
||||||
z = \frac{x - \mu}{\sigma} | ||||||
|
||||||
and is defined on the set (when :math:`\xi \geq 0`): | ||||||
|
||||||
.. math:: | ||||||
|
||||||
\left\{x: x \geq \mu \right\} | ||||||
.. plot:: | ||||||
|
||||||
import matplotlib.pyplot as plt | ||||||
import numpy as np | ||||||
import scipy.stats as st | ||||||
import arviz as az | ||||||
plt.style.use('arviz-darkgrid') | ||||||
x = np.linspace(-2, 10, 200) | ||||||
mus = [0., 0., 0., 0., 1., ] | ||||||
sigmas = [1., 1., 1.,2., 1., ] | ||||||
xis = [1., 0., 5., 1., 1., ] | ||||||
for mu, sigma, xi in zip(mus, sigmas, xis): | ||||||
pdf = st.genpareto.pdf(x, c=xi, loc=mu, scale=sigma) | ||||||
plt.plot(x, pdf, label=rf'$\mu$ = {mu}, $\sigma$ = {sigma}, $\xi$={xi}') | ||||||
plt.xlabel('x', fontsize=12) | ||||||
plt.ylabel('f(x)', fontsize=12) | ||||||
plt.legend(loc=1) | ||||||
plt.show() | ||||||
|
||||||
|
||||||
======== ========================================================================= | ||||||
Support * :math:`x \geq \mu`, when :math:`\xi \geq 0` | ||||||
Mean * :math:`\mu + \frac{\sigma}{1-\xi}`, when :math:`\xi < 1` | ||||||
Variance * :math:`\frac{\sigma^2}{(1-\xi)^2 (1-2\xi)}`, when :math:`\xi < 0.5` | ||||||
======== ========================================================================= | ||||||
|
||||||
Parameters | ||||||
---------- | ||||||
mu : float | ||||||
Location parameter. | ||||||
sigma : float | ||||||
Scale parameter (sigma > 0). | ||||||
xi : float | ||||||
Shape parameter (xi >= 0) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could be worth a note saying this is more restrictive than other definitions of the GenPareto (in wikipedia there seems to be special cases for xi < 0?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. xi < 0 are less seen for modelling extreme values. I will add a note here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. xi < 0 are less seen for modelling extreme values. I will add a note here. |
||||||
""" | ||||||
|
||||||
rv_op = gp | ||||||
|
||||||
@classmethod | ||||||
def dist(cls, mu=0, sigma=1, xi=0, **kwargs): | ||||||
mu = pt.as_tensor_variable(floatX(mu)) | ||||||
sigma = pt.as_tensor_variable(floatX(sigma)) | ||||||
xi = pt.as_tensor_variable(floatX(xi)) | ||||||
|
||||||
return super().dist([mu, sigma, xi], **kwargs) | ||||||
|
||||||
def logp(value, mu, sigma, xi): | ||||||
""" | ||||||
Calculate log-probability of Generalized Pareto distribution | ||||||
at specified value. | ||||||
|
||||||
Parameters | ||||||
---------- | ||||||
value: numeric | ||||||
Value(s) for which log-probability is calculated. If the log probabilities for multiple | ||||||
values are desired the values must be provided in a numpy array or Pytensor tensor | ||||||
|
||||||
Returns | ||||||
------- | ||||||
TensorVariable | ||||||
""" | ||||||
Comment on lines
+316
to
+329
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These docstrings are incomplete, and the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the logp for Generalized Pareto distribution should be
|
||||||
|
||||||
scaled = (value - mu) / sigma | ||||||
|
||||||
logp_expression = pt.switch( | ||||||
pt.eq(xi, 0), | ||||||
-1 * scaled, | ||||||
-1 * pt.log(sigma) - ((xi + 1) / xi) * pt.log1p(xi * scaled), | ||||||
) | ||||||
logp = pt.switch(pt.ge(scaled, 0), logp_expression, -np.inf) | ||||||
return check_parameters(logp, sigma > 0, xi >= 0, msg="sigma > 0 and xi >= 0") | ||||||
|
||||||
def logcdf(value, mu, sigma, xi): | ||||||
""" | ||||||
Compute the log of the cumulative distribution function for Generalized Pareto | ||||||
distribution at the specified value. | ||||||
|
||||||
Parameters | ||||||
---------- | ||||||
value: numeric or np.ndarray or `TensorVariable` | ||||||
Value(s) for which log CDF is calculated. If the log CDF for | ||||||
multiple values are desired the values must be provided in a numpy | ||||||
array or `TensorVariable`. | ||||||
|
||||||
Returns | ||||||
------- | ||||||
TensorVariable | ||||||
""" | ||||||
Comment on lines
+342
to
+356
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here |
||||||
scaled = (value - mu) / sigma | ||||||
logc_expression = pt.switch( | ||||||
pt.eq(xi, 0), | ||||||
pt.log(1 - pt.exp(-1 * scaled)), | ||||||
pt.log(1 - pt.pow((1 + xi * scaled), (-1 / xi))), | ||||||
) | ||||||
logc = pt.switch(pt.ge(scaled, 0), logc_expression, -np.inf) | ||||||
|
||||||
return check_parameters(logc, sigma > 0, xi >= 0, msg="sigma > 0 and xi >= 0") | ||||||
|
||||||
def moment(rv, size, mu, sigma, xi): | ||||||
r""" | ||||||
Mean is defined when :math:`\xi < 1` | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need to provide a real "moment", just anything that always has finite logp. So in this case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you suggesting that we only need to return There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that you can just return |
||||||
""" | ||||||
mean_expression = mu + sigma / (1 - xi) | ||||||
mean = pt.switch(xi < 1, mean_expression, np.inf) | ||||||
if not rv_size_is_none(size): | ||||||
mean = pt.full(size, mean) | ||||||
return check_parameters(mean, xi < 1, msg="xi < 1") | ||||||
|
||||||
|
||||||
class Chi: | ||||||
r""" | ||||||
:math:`\chi` log-likelihood. | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,7 +33,7 @@ | |
) | ||
|
||
# the distributions to be tested | ||
from pymc_experimental.distributions import Chi, GenExtreme, Maxwell | ||
from pymc_experimental.distributions import Chi, GenExtreme, GenPareto, Maxwell | ||
|
||
|
||
class TestGenExtremeClass: | ||
|
@@ -138,6 +138,64 @@ class TestGenExtreme(BaseTestDistributionRandom): | |
] | ||
|
||
|
||
class TestGenParetoClass: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing the test for moment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the logp for Generalized Pareto distribution should be
|
||
""" | ||
Wrapper class so that tests of experimental additions can be dropped into | ||
PyMC directly on adoption. | ||
|
||
pm.logp(GenPareto.dist(mu=0.,sigma=1.,xi=5.),value=1.) | ||
""" | ||
|
||
def test_logp(self): | ||
def ref_logp(value, mu, sigma, xi): | ||
if xi == 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Scipy genpareto logpdf fails for xi = 0? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I do noticed a tiny bug in scipy's function when calculating pdf for general pareto distribution with xi==0. Will double check and sumbit a PR to fix that as well. |
||
return sp.expon.logpdf((value - mu) / sigma) | ||
else: | ||
return sp.genpareto.logpdf(value, c=xi, loc=mu, scale=sigma) | ||
|
||
check_logp( | ||
GenPareto, | ||
R, | ||
{"mu": R, "sigma": Rplusbig, "xi": Rplusbig}, | ||
ref_logp, | ||
decimal=select_by_precision(float64=6, float32=2), | ||
skip_paramdomain_outside_edge_test=True, | ||
Comment on lines
+161
to
+162
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you skipping the outside edge test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since I am bounding the xi to be >= 0, I'd like to skip the outside edge test. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The point of that test is to make sure the bounding is defined correctly, so you shouldn't skip |
||
) | ||
|
||
def test_logcdf(self): | ||
def ref_logc(value, mu, sigma, xi): | ||
if xi == 0: | ||
return sp.expon.logcdf((value - mu) / sigma) | ||
else: | ||
return sp.genpareto.logcdf(value, c=xi, loc=mu, scale=sigma) | ||
|
||
check_logcdf( | ||
GenPareto, | ||
R, | ||
{ | ||
"mu": R, | ||
"sigma": Rplusbig, | ||
"xi": Rplusbig, | ||
}, | ||
ref_logc, | ||
decimal=select_by_precision(float64=6, float32=2), | ||
skip_paramdomain_outside_edge_test=True, | ||
) | ||
|
||
|
||
class TestGenPareto(BaseTestDistributionRandom): | ||
pymc_dist = GenPareto | ||
pymc_dist_params = {"mu": 0, "sigma": 1, "xi": 1} | ||
expected_rv_op_params = {"mu": 0, "sigma": 1, "xi": 1} | ||
reference_dist_params = {"loc": 0, "scale": 1, "c": 0.1} | ||
reference_dist = seeded_scipy_distribution_builder("genpareto") | ||
tests_to_run = [ | ||
"check_pymc_params_match_rv_op", | ||
"check_pymc_draws_match_reference", | ||
"check_rv_size", | ||
] | ||
|
||
|
||
class TestChiClass: | ||
""" | ||
Wrapper class so that tests of experimental additions can be dropped into | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should subclass
ScipyRandomVariable
because Scipy RVs (sometimes) do something dumb withsize=(1,)