diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 91227e409d4..3f73b652c4f 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -35,6 +35,7 @@ This new version of `Theano-PyMC` comes with an experimental JAX backend which, - Change SMC metropolis kernel to independent metropolis kernel [#4115](https://github.com/pymc-devs/pymc3/pull/4115)) - Add alternative parametrization to NegativeBinomial distribution in terms of n and p (see [#4126](https://github.com/pymc-devs/pymc3/issues/4126)) - Added semantically meaningful `str` representations to PyMC3 objects for console, notebook, and GraphViz use (see [#4076](https://github.com/pymc-devs/pymc3/pull/4076), [#4065](https://github.com/pymc-devs/pymc3/pull/4065), [#4159](https://github.com/pymc-devs/pymc3/pull/4159), [#4217](https://github.com/pymc-devs/pymc3/pull/4217), and [#4243](https://github.com/pymc-devs/pymc3/pull/4243)). +- Add Discrete HyperGeometric Distribution (see [#4249](https://github.com/pymc-devs/pymc3/pull/#4249)) ### Maintenance - Switch the dependency of Theano to our own fork, [Theano-PyMC](https://github.com/pymc-devs/Theano-PyMC). diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index 2a671d3293b..b4b56d749ae 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -63,6 +63,7 @@ from .discrete import ZeroInflatedBinomial from .discrete import DiscreteUniform from .discrete import Geometric +from .discrete import HyperGeometric from .discrete import Categorical from .discrete import OrderedLogistic @@ -141,6 +142,7 @@ "ZeroInflatedBinomial", "DiscreteUniform", "Geometric", + "HyperGeometric", "Categorical", "OrderedLogistic", "DensityDist", diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index fd2d3996189..d88c9bcd210 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -38,6 +38,7 @@ "ZeroInflatedNegativeBinomial", "DiscreteUniform", "Geometric", + "HyperGeometric", "Categorical", "OrderedLogistic", ] @@ -809,6 +810,118 @@ def logp(self, value): return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1) +class HyperGeometric(Discrete): + R""" + Discrete hypergeometric distribution. + + The probability of :math:`x` successes in a sequence of :math:`n` bernoulli + trials taken without replacement from a population of :math:`N` objects, + containing :math:`k` good (or successful or Type I) objects. + The pmf of this distribution is + + .. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}} + + .. plot:: + + import matplotlib.pyplot as plt + import numpy as np + import scipy.stats as st + plt.style.use('seaborn-darkgrid') + x = np.arange(1, 15) + N = 50 + k = 10 + for n in [20, 25]: + pmf = st.hypergeom.pmf(x, N, k, n) + plt.plot(x, pmf, '-o', label='n = {}'.format(n)) + plt.plot(x, pmf, '-o', label='N = {}'.format(N)) + plt.plot(x, pmf, '-o', label='k = {}'.format(k)) + plt.xlabel('x', fontsize=12) + plt.ylabel('f(x)', fontsize=12) + plt.legend(loc=1) + plt.show() + + ======== ============================= + Support :math:`x \in \left[\max(0, n - N + k), \min(k, n)\right]` + Mean :math:`\dfrac{nk}{N}` + Variance :math:`\dfrac{(N-n)nk(N-k)}{(N-1)N^2}` + ======== ============================= + + Parameters + ---------- + N : integer + Total size of the population + k : integer + Number of successful individuals in the population + n : integer + Number of samples drawn from the population + """ + + def __init__(self, N, k, n, *args, **kwargs): + super().__init__(*args, **kwargs) + self.N = intX(N) + self.k = intX(k) + self.n = intX(n) + self.mode = intX(tt.floor((n + 1) * (k + 1) / (N + 2))) + + def random(self, point=None, size=None): + r""" + Draw random values from HyperGeometric distribution. + + Parameters + ---------- + point : dict, optional + Dict of variable values on which random values are to be + conditioned (uses default point if not specified). + size : int, optional + Desired size of random sample (returns one sample if not + specified). + + Returns + ------- + array + """ + + N, k, n = draw_values([self.N, self.k, self.n], point=point, size=size) + return generate_samples(self._random, N, k, n, dist_shape=self.shape, size=size) + + def _random(self, M, n, N, size=None): + r"""Wrapper around scipy stat's hypergeom.rvs""" + try: + samples = stats.hypergeom.rvs(M=M, n=n, N=N, size=size) + return samples + except ValueError: + raise ValueError("Domain error in arguments") + + def logp(self, value): + r""" + Calculate log-probability of HyperGeometric distribution at specified value. + + Parameters + ---------- + value : numeric + Value(s) for which log-probability is calculated. If the log probabilities for multiple + values are desired the values must be provided in a numpy array or theano tensor + + Returns + ------- + TensorVariable + """ + N = self.N + k = self.k + n = self.n + tot, good = N, k + bad = tot - good + result = ( + betaln(good + 1, 1) + + betaln(bad + 1, 1) + + betaln(tot - n + 1, n + 1) + - betaln(value + 1, good - value + 1) + - betaln(n - value + 1, bad - n + value + 1) + - betaln(tot + 1, 1) + ) + return result + + class DiscreteUniform(Discrete): R""" Discrete uniform distribution. diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 04d9b0e3c05..f7210264f1c 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -75,6 +75,7 @@ Rice, Kumaraswamy, Moyal, + HyperGeometric, ) from ..distributions import continuous @@ -790,6 +791,14 @@ def test_geometric(self): Geometric, Nat, {"p": Unit}, lambda value, p: np.log(sp.geom.pmf(value, p)) ) + def test_hypergeometric(self): + self.pymc3_matches_scipy( + HyperGeometric, + Nat, + {"N": NatSmall, "k": NatSmall, "n": NatSmall}, + lambda value, N, k, n: sp.hypergeom.logpmf(value, N, k, n), + ) + def test_negative_binomial(self): def test_fun(value, mu, alpha): return sp.nbinom.logpmf(value, alpha, 1 - mu / (mu + alpha)) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 4daa1857b85..84bd8bc117c 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -507,6 +507,11 @@ class TestGeometric(BaseTestCases.BaseTestCase): params = {"p": 0.5} +class TestHyperGeometric(BaseTestCases.BaseTestCase): + distribution = pm.HyperGeometric + params = {"N": 50, "k": 25, "n": 10} + + class TestMoyal(BaseTestCases.BaseTestCase): distribution = pm.Moyal params = {"mu": 0.0, "sigma": 1.0} @@ -739,6 +744,22 @@ def ref_rand(size, alpha, mu): def test_geometric(self): pymc3_random_discrete(pm.Geometric, {"p": Unit}, size=500, fails=50, ref_rand=nr.geometric) + def test_hypergeometric(self): + def ref_rand(size, N, k, n): + return st.hypergeom.rvs(M=N, n=k, N=n, size=size) + + pymc3_random_discrete( + pm.HyperGeometric, + { + "N": Domain([10, 11, 12, 13], "int64"), + "k": Domain([4, 5, 6, 7], "int64"), + "n": Domain([6, 7, 8, 9], "int64"), + }, + size=500, + fails=50, + ref_rand=ref_rand, + ) + def test_discrete_uniform(self): def ref_rand(size, lower, upper): return st.randint.rvs(lower, upper + 1, size=size)