Skip to content

Commit

Permalink
Add HyperGeometric Distribution to pymc3.distributions.discrete #4108 (
Browse files Browse the repository at this point in the history
…#4249)

* Add HyperGeometric distribution to discrete.py; Add tests

* Add HyperGeo to distirbutions/__init__.py

* Fix minor linting issue

* Add ref_rand helper function. Clip lower in logp

* Fix bug. Now pymc3_matches_scipy runs without error but pymc3_random_discrete diverges from expected value

* passes match with scipy test in test_distributions.py but fails in test_distributions_random.py

* Modify HyperGeom.random; Random test still failing. match_with_scipy test passing

* rm stray print

* Fix failing random test by specifying domain

* Update pymc3/distributions/discrete.py

Remove stray newline

Co-authored-by: Tirth Patel <[email protected]>

* Add note in RELEASE-NOTES.md

Co-authored-by: Tirth Patel <[email protected]>
  • Loading branch information
Harivallabha and tirthasheshpatel authored Dec 3, 2020
1 parent ced05db commit 198d13e
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 0 deletions.
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ This new version of `Theano-PyMC` comes with an experimental JAX backend which,
- Change SMC metropolis kernel to independent metropolis kernel [#4115](https://github.com/pymc-devs/pymc3/pull/4115))
- Add alternative parametrization to NegativeBinomial distribution in terms of n and p (see [#4126](https://github.com/pymc-devs/pymc3/issues/4126))
- Added semantically meaningful `str` representations to PyMC3 objects for console, notebook, and GraphViz use (see [#4076](https://github.com/pymc-devs/pymc3/pull/4076), [#4065](https://github.com/pymc-devs/pymc3/pull/4065), [#4159](https://github.com/pymc-devs/pymc3/pull/4159), [#4217](https://github.com/pymc-devs/pymc3/pull/4217), and [#4243](https://github.com/pymc-devs/pymc3/pull/4243)).
- Add Discrete HyperGeometric Distribution (see [#4249](https://github.com/pymc-devs/pymc3/pull/#4249))

### Maintenance
- Switch the dependency of Theano to our own fork, [Theano-PyMC](https://github.com/pymc-devs/Theano-PyMC).
Expand Down
2 changes: 2 additions & 0 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
from .discrete import ZeroInflatedBinomial
from .discrete import DiscreteUniform
from .discrete import Geometric
from .discrete import HyperGeometric
from .discrete import Categorical
from .discrete import OrderedLogistic

Expand Down Expand Up @@ -141,6 +142,7 @@
"ZeroInflatedBinomial",
"DiscreteUniform",
"Geometric",
"HyperGeometric",
"Categorical",
"OrderedLogistic",
"DensityDist",
Expand Down
113 changes: 113 additions & 0 deletions pymc3/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"ZeroInflatedNegativeBinomial",
"DiscreteUniform",
"Geometric",
"HyperGeometric",
"Categorical",
"OrderedLogistic",
]
Expand Down Expand Up @@ -809,6 +810,118 @@ def logp(self, value):
return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1)


class HyperGeometric(Discrete):
R"""
Discrete hypergeometric distribution.
The probability of :math:`x` successes in a sequence of :math:`n` bernoulli
trials taken without replacement from a population of :math:`N` objects,
containing :math:`k` good (or successful or Type I) objects.
The pmf of this distribution is
.. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}}
.. plot::
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
plt.style.use('seaborn-darkgrid')
x = np.arange(1, 15)
N = 50
k = 10
for n in [20, 25]:
pmf = st.hypergeom.pmf(x, N, k, n)
plt.plot(x, pmf, '-o', label='n = {}'.format(n))
plt.plot(x, pmf, '-o', label='N = {}'.format(N))
plt.plot(x, pmf, '-o', label='k = {}'.format(k))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.legend(loc=1)
plt.show()
======== =============================
Support :math:`x \in \left[\max(0, n - N + k), \min(k, n)\right]`
Mean :math:`\dfrac{nk}{N}`
Variance :math:`\dfrac{(N-n)nk(N-k)}{(N-1)N^2}`
======== =============================
Parameters
----------
N : integer
Total size of the population
k : integer
Number of successful individuals in the population
n : integer
Number of samples drawn from the population
"""

def __init__(self, N, k, n, *args, **kwargs):
super().__init__(*args, **kwargs)
self.N = intX(N)
self.k = intX(k)
self.n = intX(n)
self.mode = intX(tt.floor((n + 1) * (k + 1) / (N + 2)))

def random(self, point=None, size=None):
r"""
Draw random values from HyperGeometric distribution.
Parameters
----------
point : dict, optional
Dict of variable values on which random values are to be
conditioned (uses default point if not specified).
size : int, optional
Desired size of random sample (returns one sample if not
specified).
Returns
-------
array
"""

N, k, n = draw_values([self.N, self.k, self.n], point=point, size=size)
return generate_samples(self._random, N, k, n, dist_shape=self.shape, size=size)

def _random(self, M, n, N, size=None):
r"""Wrapper around scipy stat's hypergeom.rvs"""
try:
samples = stats.hypergeom.rvs(M=M, n=n, N=N, size=size)
return samples
except ValueError:
raise ValueError("Domain error in arguments")

def logp(self, value):
r"""
Calculate log-probability of HyperGeometric distribution at specified value.
Parameters
----------
value : numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
values are desired the values must be provided in a numpy array or theano tensor
Returns
-------
TensorVariable
"""
N = self.N
k = self.k
n = self.n
tot, good = N, k
bad = tot - good
result = (
betaln(good + 1, 1)
+ betaln(bad + 1, 1)
+ betaln(tot - n + 1, n + 1)
- betaln(value + 1, good - value + 1)
- betaln(n - value + 1, bad - n + value + 1)
- betaln(tot + 1, 1)
)
return result


class DiscreteUniform(Discrete):
R"""
Discrete uniform distribution.
Expand Down
9 changes: 9 additions & 0 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
Rice,
Kumaraswamy,
Moyal,
HyperGeometric,
)

from ..distributions import continuous
Expand Down Expand Up @@ -790,6 +791,14 @@ def test_geometric(self):
Geometric, Nat, {"p": Unit}, lambda value, p: np.log(sp.geom.pmf(value, p))
)

def test_hypergeometric(self):
self.pymc3_matches_scipy(
HyperGeometric,
Nat,
{"N": NatSmall, "k": NatSmall, "n": NatSmall},
lambda value, N, k, n: sp.hypergeom.logpmf(value, N, k, n),
)

def test_negative_binomial(self):
def test_fun(value, mu, alpha):
return sp.nbinom.logpmf(value, alpha, 1 - mu / (mu + alpha))
Expand Down
21 changes: 21 additions & 0 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,11 @@ class TestGeometric(BaseTestCases.BaseTestCase):
params = {"p": 0.5}


class TestHyperGeometric(BaseTestCases.BaseTestCase):
distribution = pm.HyperGeometric
params = {"N": 50, "k": 25, "n": 10}


class TestMoyal(BaseTestCases.BaseTestCase):
distribution = pm.Moyal
params = {"mu": 0.0, "sigma": 1.0}
Expand Down Expand Up @@ -739,6 +744,22 @@ def ref_rand(size, alpha, mu):
def test_geometric(self):
pymc3_random_discrete(pm.Geometric, {"p": Unit}, size=500, fails=50, ref_rand=nr.geometric)

def test_hypergeometric(self):
def ref_rand(size, N, k, n):
return st.hypergeom.rvs(M=N, n=k, N=n, size=size)

pymc3_random_discrete(
pm.HyperGeometric,
{
"N": Domain([10, 11, 12, 13], "int64"),
"k": Domain([4, 5, 6, 7], "int64"),
"n": Domain([6, 7, 8, 9], "int64"),
},
size=500,
fails=50,
ref_rand=ref_rand,
)

def test_discrete_uniform(self):
def ref_rand(size, lower, upper):
return st.randint.rvs(lower, upper + 1, size=size)
Expand Down

0 comments on commit 198d13e

Please sign in to comment.