From 99fd87b3ff568f5af072f7225865feccad5aa975 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 15 Jun 2021 18:07:41 +0200 Subject: [PATCH 01/18] Add Ordered Multinomial implementation --- pymc3/distributions/__init__.py | 2 ++ pymc3/distributions/multivariate.py | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index 807b483712b..56ac8b26e95 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -92,6 +92,7 @@ LKJCorr, MatrixNormal, Multinomial, + OrderedMultinomial, MvNormal, MvStudentT, Wishart, @@ -159,6 +160,7 @@ "Dirichlet", "Multinomial", "DirichletMultinomial", + "OrderedMultinomial", "Wishart", "WishartBartlett", "LKJCholeskyCov", diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index b7566d59ac6..855e3ce667b 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -53,6 +53,7 @@ "Dirichlet", "Multinomial", "DirichletMultinomial", + "OrderedMultinomial", "Wishart", "WishartBartlett", "LKJCorr", @@ -687,6 +688,32 @@ def _distr_parameters_for_repr(self): return ["n", "a"] +class OrderedMultinomial(Multinomial): + rv_op = multinomial + + @classmethod + def dist(cls, eta, cutpoints, n, compute_p=True, *args, **kwargs): + eta = at.as_tensor_variable(floatX(eta)) + cutpoints = at.as_tensor_variable(cutpoints) + n = at.as_tensor_variable(n) + + pa = sigmoid(cutpoints - at.shape_padright(eta)) + p_cum = at.concatenate( + [ + at.zeros_like(at.shape_padright(pa[..., 0])), + pa, + at.ones_like(at.shape_padright(pa[..., 0])), + ], + axis=-1, + ) + if compute_p: + p = pm.Deterministic("complete_p", p_cum[..., 1:] - p_cum[..., :-1]) + else: + p = p_cum[..., 1:] - p_cum[..., :-1] + + return super().dist(n, p, *args, **kwargs) + + def posdef(AA): try: linalg.cholesky(AA) From fdc09f2c47ff550112c34fbc812659d2219a2abf Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 15 Jun 2021 18:13:12 +0200 Subject: [PATCH 02/18] isort --- pymc3/distributions/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index 56ac8b26e95..6dba668ec63 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -92,9 +92,9 @@ LKJCorr, MatrixNormal, Multinomial, - OrderedMultinomial, MvNormal, MvStudentT, + OrderedMultinomial, Wishart, WishartBartlett, ) From bb5c745665e0ba7c877b797c0adce1a2131bbcec Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 16 Jun 2021 12:22:59 +0200 Subject: [PATCH 03/18] Address Ricardo's comments --- pymc3/distributions/multivariate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 855e3ce667b..b6cc648c97b 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -45,7 +45,7 @@ from pymc3.distributions.continuous import ChiSquared, Normal, assert_negative_support from pymc3.distributions.dist_math import bound, factln, logpow, multigammaln from pymc3.distributions.distribution import Continuous, Discrete -from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker +from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker, sigmoid __all__ = [ "MvNormal", @@ -695,7 +695,7 @@ class OrderedMultinomial(Multinomial): def dist(cls, eta, cutpoints, n, compute_p=True, *args, **kwargs): eta = at.as_tensor_variable(floatX(eta)) cutpoints = at.as_tensor_variable(cutpoints) - n = at.as_tensor_variable(n) + n = at.as_tensor_variable(intX(n)) pa = sigmoid(cutpoints - at.shape_padright(eta)) p_cum = at.concatenate( @@ -706,7 +706,7 @@ def dist(cls, eta, cutpoints, n, compute_p=True, *args, **kwargs): ], axis=-1, ) - if compute_p: + if compute_p and pm.modelcontext(None): p = pm.Deterministic("complete_p", p_cum[..., 1:] - p_cum[..., :-1]) else: p = p_cum[..., 1:] - p_cum[..., :-1] From 6c1d51f1b49c34a55ac193f86b0c15c2211477ad Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 16 Jun 2021 12:30:07 +0200 Subject: [PATCH 04/18] Fix typo in quaddist_parse --- pymc3/distributions/multivariate.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index b6cc648c97b..41f52d5c567 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -110,17 +110,13 @@ def quaddist_parse(value, mu, cov, mat_type="cov"): onedim = False delta = value - mu - - if mat_type == "cov": - # Use this when Theano#5908 is released. - # return MvNormalLogp()(self.cov, delta) - chol_cov = cholesky(cov) + # Use this when Theano#5908 is released. + # return MvNormalLogp()(self.cov, delta) + chol_cov = cholesky(cov) + if mat_type == "cov" or mat_type != "tau": dist, logdet, ok = quaddist_chol(delta, chol_cov) - elif mat_type == "tau": - dist, logdet, ok = quaddist_tau(delta, chol_cov) else: - dist, logdet, ok = quaddist_chol(delta, chol_cov) - + dist, logdet, ok = quaddist_tau(delta, chol_cov) if onedim: return dist[0], logdet, ok From 04d8c5736b593535010fbe053aaccf9b2a1c4a58 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 16 Jun 2021 12:50:06 +0200 Subject: [PATCH 05/18] isort --- pymc3/distributions/discrete.py | 42 +++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index b6a831f3b2e..25281c51139 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -27,6 +27,8 @@ ) from scipy import stats +import pymc3 as pm + from pymc3.aesaraf import floatX, intX, take_along_axis from pymc3.distributions.dist_math import ( betaln, @@ -59,6 +61,7 @@ "HyperGeometric", "Categorical", "OrderedLogistic", + "OrderedProbit", ] @@ -1643,8 +1646,8 @@ class OrderedLogistic(Categorical): Useful for regression on ordinal data values whose values range from 1 to K as a function of some predictor, :math:`\eta`. The cutpoints, :math:`c`, separate which ranges of :math:`\eta` are - mapped to which of the K observed dependent variables. The number - of cutpoints is K - 1. It is recommended that the cutpoints are + mapped to which of the K observed dependent variables. The number + of cutpoints is K - 1. It is recommended that the cutpoints are constrained to be ordered. .. math:: @@ -1667,8 +1670,12 @@ class OrderedLogistic(Categorical): The predictor. c: array The length K - 1 array of cutpoints which break :math:`\eta` into - ranges. Do not explicitly set the first and last elements of + ranges. Do not explicitly set the first and last elements of :math:`c` to negative and positive infinity. + compute_p: boolean, default True + Whether to compute and store in the trace the inferred probabilities of each categories, + based on the cutpoints values. Defaults to True. + Might be useful to disable it if memory usage is of interest. Examples -------- @@ -1691,7 +1698,7 @@ class OrderedLogistic(Categorical): cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2, transform=pm.distributions.transforms.ordered) y_ = pm.OrderedLogistic("y", cutpoints=cutpoints, eta=x, observed=y) - idata = pm.sample(1000) + idata = pm.sample() # Plot the results plt.hist(cluster1, 30, alpha=0.5); @@ -1706,7 +1713,7 @@ class OrderedLogistic(Categorical): rv_op = categorical @classmethod - def dist(cls, eta, cutpoints, *args, **kwargs): + def dist(cls, eta, cutpoints, compute_p=True, *args, **kwargs): eta = at.as_tensor_variable(floatX(eta)) cutpoints = at.as_tensor_variable(cutpoints) @@ -1719,7 +1726,10 @@ def dist(cls, eta, cutpoints, *args, **kwargs): ], axis=-1, ) - p = p_cum[..., 1:] - p_cum[..., :-1] + if compute_p and pm.modelcontext(None): + p = pm.Deterministic("complete_p", p_cum[..., 1:] - p_cum[..., :-1]) + else: + p = p_cum[..., 1:] - p_cum[..., :-1] return super().dist(p, **kwargs) @@ -1731,8 +1741,8 @@ class OrderedProbit(Categorical): Useful for regression on ordinal data values whose values range from 1 to K as a function of some predictor, :math:`\eta`. The cutpoints, :math:`c`, separate which ranges of :math:`\eta` are - mapped to which of the K observed dependent variables. The number - of cutpoints is K - 1. It is recommended that the cutpoints are + mapped to which of the K observed dependent variables. The number + of cutpoints is K - 1. It is recommended that the cutpoints are constrained to be ordered. In order to stabilize the computation, log-likelihood is computed @@ -1758,9 +1768,12 @@ class OrderedProbit(Categorical): The predictor. c : array The length K - 1 array of cutpoints which break :math:`\eta` into - ranges. Do not explicitly set the first and last elements of + ranges. Do not explicitly set the first and last elements of :math:`c` to negative and positive infinity. - + compute_p: boolean, default True + Whether to compute and store in the trace the inferred probabilities of each categories, + based on the cutpoints' values. Defaults to True. + Might be useful to disable it if memory usage is of interest. sigma: float The standard deviation of probit function. Example @@ -1783,7 +1796,7 @@ class OrderedProbit(Categorical): cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2, transform=pm.distributions.transforms.ordered) y_ = pm.OrderedProbit("y", cutpoints=cutpoints, eta=x, observed=y) - idata = pm.sample(1000) + idata = pm.sample() # Plot the results plt.hist(cluster1, 30, alpha=0.5); @@ -1798,7 +1811,7 @@ class OrderedProbit(Categorical): rv_op = categorical @classmethod - def dist(cls, eta, cutpoints, *args, **kwargs): + def dist(cls, eta, cutpoints, compute_p=True, *args, **kwargs): eta = at.as_tensor_variable(floatX(eta)) cutpoints = at.as_tensor_variable(cutpoints) @@ -1812,6 +1825,9 @@ def dist(cls, eta, cutpoints, *args, **kwargs): axis=-1, ) _log_p = at.as_tensor_variable(floatX(_log_p)) - p = at.exp(_log_p) + if compute_p and pm.modelcontext(None): + p = pm.Deterministic("complete_p", at.exp(_log_p)) + else: + p = at.exp(_log_p) return super().dist(p, **kwargs) From d1677c4418786d335f3c0deaeee9413826d49077 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 16 Jun 2021 13:05:15 +0200 Subject: [PATCH 06/18] Add docstring to new OrderedMultinomial --- pymc3/distributions/discrete.py | 10 ++-- pymc3/distributions/multivariate.py | 75 +++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 25281c51139..5f1de0ce6e2 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1668,13 +1668,13 @@ class OrderedLogistic(Categorical): ---------- eta: float The predictor. - c: array + cutpoints: array The length K - 1 array of cutpoints which break :math:`\eta` into ranges. Do not explicitly set the first and last elements of :math:`c` to negative and positive infinity. compute_p: boolean, default True Whether to compute and store in the trace the inferred probabilities of each categories, - based on the cutpoints values. Defaults to True. + based on the cutpoints' values. Defaults to True. Might be useful to disable it if memory usage is of interest. Examples @@ -1707,7 +1707,6 @@ class OrderedLogistic(Categorical): posterior = idata.posterior.stack(sample=("chain", "draw")) plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k'); plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k'); - """ rv_op = categorical @@ -1764,9 +1763,9 @@ class OrderedProbit(Categorical): Parameters ---------- - eta : float + eta: float The predictor. - c : array + cutpoints: array The length K - 1 array of cutpoints which break :math:`\eta` into ranges. Do not explicitly set the first and last elements of :math:`c` to negative and positive infinity. @@ -1805,7 +1804,6 @@ class OrderedProbit(Categorical): posterior = idata.posterior.stack(sample=("chain", "draw")) plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k'); plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k'); - """ rv_op = categorical diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 41f52d5c567..d6d46d616db 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -685,6 +685,81 @@ def _distr_parameters_for_repr(self): class OrderedMultinomial(Multinomial): + R""" + Ordered Multinomial log-likelihood. + + Useful for regression on ordinal data values whose values range + from 1 to K as a function of some predictor, :math:`\eta`, but + which are _aggregated_ by trial, like multinomial observations (in + contrast to `pm.OrderedLogistic`, which only accepts ordinal data + in a _disaggregated_ format, like categorical observations). + The cutpoints, :math:`c`, separate which ranges of :math:`\eta` are + mapped to which of the K observed dependent variables. The number + of cutpoints is K - 1. It is recommended that the cutpoints are + constrained to be ordered. + + .. math:: + + f(k \mid \eta, c) = \left\{ + \begin{array}{l} + 1 - \text{logit}^{-1}(\eta - c_1) + \,, \text{if } k = 0 \\ + \text{logit}^{-1}(\eta - c_{k - 1}) - + \text{logit}^{-1}(\eta - c_{k}) + \,, \text{if } 0 < k < K \\ + \text{logit}^{-1}(\eta - c_{K - 1}) + \,, \text{if } k = K \\ + \end{array} + \right. + + Parameters + ---------- + eta: float + The predictor. + cutpoints: array + The length K - 1 array of cutpoints which break :math:`\eta` into + ranges. Do not explicitly set the first and last elements of + :math:`c` to negative and positive infinity. + n: int + The total number of multinomial trials. + compute_p: boolean, default True + Whether to compute and store in the trace the inferred probabilities of each categories, + based on the cutpoints' values. Defaults to True. + Might be useful to disable it if memory usage is of interest. + + Examples + -------- + + .. code-block:: python + + # Generate data for a simple 1 dimensional example problem + true_cum_p = np.array([0.1, 0.15, 0.25, 0.50, 0.65, 0.90, 1.0]) + true_p = np.hstack([true_cum_p[0], true_cum_p[1:] - true_cum_p[:-1]]) + fake_elections = np.random.multinomial(n=1_000, pvals=true_p, size=60) + + # Ordered multinomial regression + with pm.Model() as model: + cutpoints = pm.Normal( + "cutpoints", + mu=np.arange(6) - 2.5, + sigma=1.5, + initval=np.arange(6) - 2.5, + transform=pm.distributions.transforms.ordered, + ) + + pm.OrderedMultinomial( + "results", + eta=0.0, + cutpoints=cutpoints, + n=fake_elections.sum(1), + observed=fake_elections, + ) + + trace = pm.sample() + + # Plot the results + arviz.plot_posterior(trace_12_4, var_names=["complete_p"], ref_val=list(true_p)); + """ rv_op = multinomial @classmethod From 5ef4f12754f84570f31edccd9a28d5b7a6e5f3d6 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 22 Jun 2021 17:04:30 +0200 Subject: [PATCH 07/18] Black --- pymc3/distributions/multivariate.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index d6d46d616db..a7ac2d9953b 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -684,7 +684,7 @@ def _distr_parameters_for_repr(self): return ["n", "a"] -class OrderedMultinomial(Multinomial): +class _OrderedMultinomial(Multinomial): R""" Ordered Multinomial log-likelihood. @@ -763,7 +763,7 @@ class OrderedMultinomial(Multinomial): rv_op = multinomial @classmethod - def dist(cls, eta, cutpoints, n, compute_p=True, *args, **kwargs): + def dist(cls, eta, cutpoints, n, *args, **kwargs): eta = at.as_tensor_variable(floatX(eta)) cutpoints = at.as_tensor_variable(cutpoints) n = at.as_tensor_variable(intX(n)) @@ -777,14 +777,18 @@ def dist(cls, eta, cutpoints, n, compute_p=True, *args, **kwargs): ], axis=-1, ) - if compute_p and pm.modelcontext(None): - p = pm.Deterministic("complete_p", p_cum[..., 1:] - p_cum[..., :-1]) - else: - p = p_cum[..., 1:] - p_cum[..., :-1] + p = p_cum[..., 1:] - p_cum[..., :-1] return super().dist(n, p, *args, **kwargs) +def OrderedMultinomial(name, *args, compute_p=True, **kwargs): + out_rv = _OrderedMultinomial(name, *args, **kwargs) + if compute_p: + pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[4]) + return out_rv + + def posdef(AA): try: linalg.cholesky(AA) From 6367c3f61c482b1bb56547fc99847244570da669 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Thu, 24 Jun 2021 16:44:10 +0200 Subject: [PATCH 08/18] Remove explicit import --- pymc3/distributions/multivariate.py | 15 +++++++---- pymc3/tests/test_distributions.py | 32 ++++++++++++++++++++++++ pymc3/tests/test_distributions_random.py | 12 +++++++++ 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index a7ac2d9953b..a039165157c 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -782,11 +782,16 @@ def dist(cls, eta, cutpoints, n, *args, **kwargs): return super().dist(n, p, *args, **kwargs) -def OrderedMultinomial(name, *args, compute_p=True, **kwargs): - out_rv = _OrderedMultinomial(name, *args, **kwargs) - if compute_p: - pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[4]) - return out_rv +class OrderedMultinomial: + def __new__(cls, name, *args, compute_p=True, **kwargs): + out_rv = _OrderedMultinomial(name, *args, **kwargs) + if compute_p: + pm.Deterministic(f"{name}_p", out_rv.owner.inputs[4]) + return out_rv + + @classmethod + def dist(cls, *args, **kwargs): + return _OrderedMultinomial.dist(*args, **kwargs) def posdef(AA): diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 162117ce021..462fa1e0bb7 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -2898,6 +2898,38 @@ def test_orderedlogistic_dimensions(shape): assert np.allclose(ologp, expected) +def test_ordered_multi_probs(): + with pm.Model() as m: + pm.OrderedMultinomial("om_p", n=1000, cutpoints=np.array([-2, 0, 2]), eta=0) + pm.OrderedMultinomial( + "om_no_p", n=1000, cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False + ) + assert len(m.deterministics) == 1 + + x = pm.OrderedMultinomial.dist(n=1000, cutpoints=np.array([-2, 0, 2]), eta=0) + assert isinstance(x, TensorVariable) + + +def test_ordered_logistic_probs(): + with pm.Model() as m: + pm.OrderedLogistic("ol_p", cutpoints=np.array([-2, 0, 2]), eta=0) + pm.OrderedMultinomial("ol_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False) + assert len(m.deterministics) == 1 + + x = pm.OrderedLogistic.dist(cutpoints=np.array([-2, 0, 2]), eta=0) + assert isinstance(x, TensorVariable) + + +def test_ordered_probit_probs(): + with pm.Model() as m: + pm.OrderedProbit("op_p", cutpoints=np.array([-2, 0, 2]), eta=0) + pm.OrderedProbit("op_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False) + assert len(m.deterministics) == 1 + + x = pm.OrderedProbit.dist(cutpoints=np.array([-2, 0, 2]), eta=0) + assert isinstance(x, TensorVariable) + + @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.parametrize("size", [(1,), (4,)], ids=str) def test_car_logp(size): diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index fdcf2afd606..ac56b2d4faa 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -1306,6 +1306,18 @@ class TestOrderedProbit(BaseTestDistribution): ] +class TestOrderedMultinomial(BaseTestDistribution): + pymc_dist = pm.OrderedMultinomial + pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2]), "n": 1000} + sizes_to_check = [None, (1), (4,), (3, 2)] + sizes_expected = [(4,), (1, 4), (4, 4), (3, 2, 4)] + expected_rv_op_params = {"p": np.array([0.11920292, 0.38079708, 0.38079708, 0.11920292])} + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_rv_size", + ] + + class TestInterpolated(BaseTestDistribution): def interpolated_rng_fn(self, size, mu, sigma, rng): return st.norm.rvs(loc=mu, scale=sigma, size=size) From d131214dd0c74bc038023e786299e20700c64632 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 30 Jun 2021 16:08:39 +0200 Subject: [PATCH 09/18] Fixed typos in tests --- pymc3/tests/test_distributions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 462fa1e0bb7..2e5d16a30b1 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -2898,7 +2898,7 @@ def test_orderedlogistic_dimensions(shape): assert np.allclose(ologp, expected) -def test_ordered_multi_probs(): +def test_ordered_multinomial_probs(): with pm.Model() as m: pm.OrderedMultinomial("om_p", n=1000, cutpoints=np.array([-2, 0, 2]), eta=0) pm.OrderedMultinomial( @@ -2913,7 +2913,7 @@ def test_ordered_multi_probs(): def test_ordered_logistic_probs(): with pm.Model() as m: pm.OrderedLogistic("ol_p", cutpoints=np.array([-2, 0, 2]), eta=0) - pm.OrderedMultinomial("ol_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False) + pm.OrderedLogistic("ol_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False) assert len(m.deterministics) == 1 x = pm.OrderedLogistic.dist(cutpoints=np.array([-2, 0, 2]), eta=0) From f444a6ab4841fd5cdde1105302aca203d013f085 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Thu, 1 Jul 2021 12:49:28 +0200 Subject: [PATCH 10/18] Black --- pymc3/distributions/discrete.py | 46 ++++++++++++++++++++--------- pymc3/distributions/multivariate.py | 2 +- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 5f1de0ce6e2..275fac25692 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1639,7 +1639,7 @@ def logcdf(value, psi, n, p): ) -class OrderedLogistic(Categorical): +class _OrderedLogistic(Categorical): R""" Ordered Logistic log-likelihood. @@ -1712,7 +1712,7 @@ class OrderedLogistic(Categorical): rv_op = categorical @classmethod - def dist(cls, eta, cutpoints, compute_p=True, *args, **kwargs): + def dist(cls, eta, cutpoints, *args, **kwargs): eta = at.as_tensor_variable(floatX(eta)) cutpoints = at.as_tensor_variable(cutpoints) @@ -1725,15 +1725,24 @@ def dist(cls, eta, cutpoints, compute_p=True, *args, **kwargs): ], axis=-1, ) - if compute_p and pm.modelcontext(None): - p = pm.Deterministic("complete_p", p_cum[..., 1:] - p_cum[..., :-1]) - else: - p = p_cum[..., 1:] - p_cum[..., :-1] + p = p_cum[..., 1:] - p_cum[..., :-1] + + return super().dist(p, *args, **kwargs) + - return super().dist(p, **kwargs) +class OrderedLogistic: + def __new__(cls, name, *args, compute_p=True, **kwargs): + out_rv = _OrderedLogistic(name, *args, **kwargs) + if compute_p: + pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[3]) + return out_rv + + @classmethod + def dist(cls, *args, **kwargs): + return _OrderedLogistic.dist(*args, **kwargs) -class OrderedProbit(Categorical): +class _OrderedProbit(Categorical): R""" Ordered Probit log-likelihood. @@ -1809,7 +1818,7 @@ class OrderedProbit(Categorical): rv_op = categorical @classmethod - def dist(cls, eta, cutpoints, compute_p=True, *args, **kwargs): + def dist(cls, eta, cutpoints, *args, **kwargs): eta = at.as_tensor_variable(floatX(eta)) cutpoints = at.as_tensor_variable(cutpoints) @@ -1823,9 +1832,18 @@ def dist(cls, eta, cutpoints, compute_p=True, *args, **kwargs): axis=-1, ) _log_p = at.as_tensor_variable(floatX(_log_p)) - if compute_p and pm.modelcontext(None): - p = pm.Deterministic("complete_p", at.exp(_log_p)) - else: - p = at.exp(_log_p) + p = at.exp(_log_p) + + return super().dist(p, *args, **kwargs) + - return super().dist(p, **kwargs) +class OrderedProbit: + def __new__(cls, name, *args, compute_p=True, **kwargs): + out_rv = _OrderedProbit(name, *args, **kwargs) + if compute_p: + pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[3]) + return out_rv + + @classmethod + def dist(cls, *args, **kwargs): + return _OrderedProbit.dist(*args, **kwargs) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index a039165157c..e1c3ac8ce13 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -786,7 +786,7 @@ class OrderedMultinomial: def __new__(cls, name, *args, compute_p=True, **kwargs): out_rv = _OrderedMultinomial(name, *args, **kwargs) if compute_p: - pm.Deterministic(f"{name}_p", out_rv.owner.inputs[4]) + pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[4]) return out_rv @classmethod From 8d981776b528bcc467a787e6929acce8ae0c7e18 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 2 Jul 2021 08:56:11 +0200 Subject: [PATCH 11/18] Move docstrings to the user-facing classes --- pymc3/distributions/discrete.py | 96 +++++++++-------- pymc3/distributions/multivariate.py | 156 +++++++++++++++------------- 2 files changed, 133 insertions(+), 119 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 275fac25692..790c47645f9 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1640,8 +1640,34 @@ def logcdf(value, psi, n, p): class _OrderedLogistic(Categorical): + r""" + Underlying class for ordered logistic distributions. + See docs for the OrderedLogistic wrapper class for more details on how to use it in models. + """ + rv_op = categorical + + @classmethod + def dist(cls, eta, cutpoints, *args, **kwargs): + eta = at.as_tensor_variable(floatX(eta)) + cutpoints = at.as_tensor_variable(cutpoints) + + pa = sigmoid(cutpoints - at.shape_padright(eta)) + p_cum = at.concatenate( + [ + at.zeros_like(at.shape_padright(pa[..., 0])), + pa, + at.ones_like(at.shape_padright(pa[..., 0])), + ], + axis=-1, + ) + p = p_cum[..., 1:] - p_cum[..., :-1] + + return super().dist(p, *args, **kwargs) + + +class OrderedLogistic: R""" - Ordered Logistic log-likelihood. + Wrapper class for Ordered Logistic distributions. Useful for regression on ordinal data values whose values range from 1 to K as a function of some predictor, :math:`\eta`. The @@ -1709,6 +1735,22 @@ class _OrderedLogistic(Categorical): plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k'); """ + def __new__(cls, name, *args, compute_p=True, **kwargs): + out_rv = _OrderedLogistic(name, *args, **kwargs) + if compute_p: + pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[3]) + return out_rv + + @classmethod + def dist(cls, *args, **kwargs): + return _OrderedLogistic.dist(*args, **kwargs) + + +class _OrderedProbit(Categorical): + r""" + Underlying class for ordered probit distributions. + See docs for the OrderedProbit wrapper class for more details on how to use it in models. + """ rv_op = categorical @classmethod @@ -1716,35 +1758,24 @@ def dist(cls, eta, cutpoints, *args, **kwargs): eta = at.as_tensor_variable(floatX(eta)) cutpoints = at.as_tensor_variable(cutpoints) - pa = sigmoid(cutpoints - at.shape_padright(eta)) - p_cum = at.concatenate( + probits = at.shape_padright(eta) - cutpoints + _log_p = at.concatenate( [ - at.zeros_like(at.shape_padright(pa[..., 0])), - pa, - at.ones_like(at.shape_padright(pa[..., 0])), + at.shape_padright(normal_lccdf(0, 1, probits[..., 0])), + log_diff_normal_cdf(0, 1, probits[..., :-1], probits[..., 1:]), + at.shape_padright(normal_lcdf(0, 1, probits[..., -1])), ], axis=-1, ) - p = p_cum[..., 1:] - p_cum[..., :-1] + _log_p = at.as_tensor_variable(floatX(_log_p)) + p = at.exp(_log_p) return super().dist(p, *args, **kwargs) -class OrderedLogistic: - def __new__(cls, name, *args, compute_p=True, **kwargs): - out_rv = _OrderedLogistic(name, *args, **kwargs) - if compute_p: - pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[3]) - return out_rv - - @classmethod - def dist(cls, *args, **kwargs): - return _OrderedLogistic.dist(*args, **kwargs) - - -class _OrderedProbit(Categorical): +class OrderedProbit: R""" - Ordered Probit log-likelihood. + Wrapper class for Ordered Probit distributions. Useful for regression on ordinal data values whose values range from 1 to K as a function of some predictor, :math:`\eta`. The @@ -1815,29 +1846,6 @@ class _OrderedProbit(Categorical): plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k'); """ - rv_op = categorical - - @classmethod - def dist(cls, eta, cutpoints, *args, **kwargs): - eta = at.as_tensor_variable(floatX(eta)) - cutpoints = at.as_tensor_variable(cutpoints) - - probits = at.shape_padright(eta) - cutpoints - _log_p = at.concatenate( - [ - at.shape_padright(normal_lccdf(0, 1, probits[..., 0])), - log_diff_normal_cdf(0, 1, probits[..., :-1], probits[..., 1:]), - at.shape_padright(normal_lcdf(0, 1, probits[..., -1])), - ], - axis=-1, - ) - _log_p = at.as_tensor_variable(floatX(_log_p)) - p = at.exp(_log_p) - - return super().dist(p, *args, **kwargs) - - -class OrderedProbit: def __new__(cls, name, *args, compute_p=True, **kwargs): out_rv = _OrderedProbit(name, *args, **kwargs) if compute_p: diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index e1c3ac8ce13..ca0ccde18c9 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -685,81 +685,10 @@ def _distr_parameters_for_repr(self): class _OrderedMultinomial(Multinomial): - R""" - Ordered Multinomial log-likelihood. - - Useful for regression on ordinal data values whose values range - from 1 to K as a function of some predictor, :math:`\eta`, but - which are _aggregated_ by trial, like multinomial observations (in - contrast to `pm.OrderedLogistic`, which only accepts ordinal data - in a _disaggregated_ format, like categorical observations). - The cutpoints, :math:`c`, separate which ranges of :math:`\eta` are - mapped to which of the K observed dependent variables. The number - of cutpoints is K - 1. It is recommended that the cutpoints are - constrained to be ordered. - - .. math:: - - f(k \mid \eta, c) = \left\{ - \begin{array}{l} - 1 - \text{logit}^{-1}(\eta - c_1) - \,, \text{if } k = 0 \\ - \text{logit}^{-1}(\eta - c_{k - 1}) - - \text{logit}^{-1}(\eta - c_{k}) - \,, \text{if } 0 < k < K \\ - \text{logit}^{-1}(\eta - c_{K - 1}) - \,, \text{if } k = K \\ - \end{array} - \right. - - Parameters - ---------- - eta: float - The predictor. - cutpoints: array - The length K - 1 array of cutpoints which break :math:`\eta` into - ranges. Do not explicitly set the first and last elements of - :math:`c` to negative and positive infinity. - n: int - The total number of multinomial trials. - compute_p: boolean, default True - Whether to compute and store in the trace the inferred probabilities of each categories, - based on the cutpoints' values. Defaults to True. - Might be useful to disable it if memory usage is of interest. - - Examples - -------- - - .. code-block:: python - - # Generate data for a simple 1 dimensional example problem - true_cum_p = np.array([0.1, 0.15, 0.25, 0.50, 0.65, 0.90, 1.0]) - true_p = np.hstack([true_cum_p[0], true_cum_p[1:] - true_cum_p[:-1]]) - fake_elections = np.random.multinomial(n=1_000, pvals=true_p, size=60) - - # Ordered multinomial regression - with pm.Model() as model: - cutpoints = pm.Normal( - "cutpoints", - mu=np.arange(6) - 2.5, - sigma=1.5, - initval=np.arange(6) - 2.5, - transform=pm.distributions.transforms.ordered, - ) - - pm.OrderedMultinomial( - "results", - eta=0.0, - cutpoints=cutpoints, - n=fake_elections.sum(1), - observed=fake_elections, - ) - - trace = pm.sample() - - # Plot the results - arviz.plot_posterior(trace_12_4, var_names=["complete_p"], ref_val=list(true_p)); - """ + r""" + Underlying class for ordered multinomial distributions. + See docs for the OrderedMultinomial wrapper class for more details on how to use it in models. + """ rv_op = multinomial @classmethod @@ -783,6 +712,83 @@ def dist(cls, eta, cutpoints, n, *args, **kwargs): class OrderedMultinomial: + R""" + Wrapper class for Ordered Multinomial distributions. + + Useful for regression on ordinal data values whose values range + from 1 to K as a function of some predictor, :math:`\eta`, but + which are _aggregated_ by trial, like multinomial observations (in + contrast to `pm.OrderedLogistic`, which only accepts ordinal data + in a _disaggregated_ format, like categorical observations). + The cutpoints, :math:`c`, separate which ranges of :math:`\eta` are + mapped to which of the K observed dependent variables. The number + of cutpoints is K - 1. It is recommended that the cutpoints are + constrained to be ordered. + + .. math:: + + f(k \mid \eta, c) = \left\{ + \begin{array}{l} + 1 - \text{logit}^{-1}(\eta - c_1) + \,, \text{if } k = 0 \\ + \text{logit}^{-1}(\eta - c_{k - 1}) - + \text{logit}^{-1}(\eta - c_{k}) + \,, \text{if } 0 < k < K \\ + \text{logit}^{-1}(\eta - c_{K - 1}) + \,, \text{if } k = K \\ + \end{array} + \right. + + Parameters + ---------- + eta: float + The predictor. + cutpoints: array + The length K - 1 array of cutpoints which break :math:`\eta` into + ranges. Do not explicitly set the first and last elements of + :math:`c` to negative and positive infinity. + n: int + The total number of multinomial trials. + compute_p: boolean, default True + Whether to compute and store in the trace the inferred probabilities of each + categories, + based on the cutpoints' values. Defaults to True. + Might be useful to disable it if memory usage is of interest. + + Examples + -------- + + .. code-block:: python + + # Generate data for a simple 1 dimensional example problem + true_cum_p = np.array([0.1, 0.15, 0.25, 0.50, 0.65, 0.90, 1.0]) + true_p = np.hstack([true_cum_p[0], true_cum_p[1:] - true_cum_p[:-1]]) + fake_elections = np.random.multinomial(n=1_000, pvals=true_p, size=60) + + # Ordered multinomial regression + with pm.Model() as model: + cutpoints = pm.Normal( + "cutpoints", + mu=np.arange(6) - 2.5, + sigma=1.5, + initval=np.arange(6) - 2.5, + transform=pm.distributions.transforms.ordered, + ) + + pm.OrderedMultinomial( + "results", + eta=0.0, + cutpoints=cutpoints, + n=fake_elections.sum(1), + observed=fake_elections, + ) + + trace = pm.sample() + + # Plot the results + arviz.plot_posterior(trace_12_4, var_names=["complete_p"], ref_val=list(true_p)); + """ + def __new__(cls, name, *args, compute_p=True, **kwargs): out_rv = _OrderedMultinomial(name, *args, **kwargs) if compute_p: From a62669d45b2589bfe04f476f87019d82fb4245db Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Sun, 4 Jul 2021 17:38:37 +0200 Subject: [PATCH 12/18] Black --- pymc3/tests/test_distributions_random.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index dee77adf026..3fe186e4ae5 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -1450,13 +1450,16 @@ class TestOrderedProbit(BaseTestDistribution): "check_rv_size", ] - + class TestOrderedMultinomial(BaseTestDistribution): pymc_dist = pm.OrderedMultinomial pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2]), "n": 1000} sizes_to_check = [None, (1), (4,), (3, 2)] sizes_expected = [(4,), (1, 4), (4, 4), (3, 2, 4)] - expected_rv_op_params = {"p": np.array([0.11920292, 0.38079708, 0.38079708, 0.11920292])} + expected_rv_op_params = { + "n": 1000, + "p": np.array([0.11920292, 0.38079708, 0.38079708, 0.11920292]), + } tests_to_run = [ "check_pymc_params_match_rv_op", "check_rv_size", From c9c9a97cdfc64c10f9af3741b09b0fe6e2ea0d4b Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Sun, 4 Jul 2021 17:39:06 +0200 Subject: [PATCH 13/18] Add back rv_op in wrapper classes --- pymc3/distributions/discrete.py | 2 ++ pymc3/distributions/multivariate.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 790c47645f9..5c2e43fc627 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1734,6 +1734,7 @@ class OrderedLogistic: plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k'); plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k'); """ + rv_op = categorical def __new__(cls, name, *args, compute_p=True, **kwargs): out_rv = _OrderedLogistic(name, *args, **kwargs) @@ -1845,6 +1846,7 @@ class OrderedProbit: plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k'); plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k'); """ + rv_op = categorical def __new__(cls, name, *args, compute_p=True, **kwargs): out_rv = _OrderedProbit(name, *args, **kwargs) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 66fd60e68f7..17df686009d 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -26,7 +26,7 @@ from aesara.graph.basic import Apply from aesara.graph.op import Op -from aesara.tensor import gammaln +from aesara.tensor import gammaln, sigmoid from aesara.tensor.nlinalg import det, eigh, matrix_inverse, trace from aesara.tensor.random.basic import MultinomialRV, dirichlet, multivariate_normal from aesara.tensor.random.op import RandomVariable, default_shape_from_params @@ -791,6 +791,7 @@ class OrderedMultinomial: # Plot the results arviz.plot_posterior(trace_12_4, var_names=["complete_p"], ref_val=list(true_p)); """ + rv_op = multinomial def __new__(cls, name, *args, compute_p=True, **kwargs): out_rv = _OrderedMultinomial(name, *args, **kwargs) From 5281c4a0f8e990ec660d85895a30abacf44fb287 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Sun, 4 Jul 2021 18:10:19 +0200 Subject: [PATCH 14/18] isort --- pymc3/tests/test_distributions_random.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 3fe186e4ae5..cd780aeb427 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -33,8 +33,9 @@ from pymc3.aesaraf import change_rv_size, floatX, intX from pymc3.distributions.continuous import get_tau_sigma, interpolated +from pymc3.distributions.discrete import _OrderedLogistic, _OrderedProbit from pymc3.distributions.dist_math import clipped_beta_rvs -from pymc3.distributions.multivariate import quaddist_matrix +from pymc3.distributions.multivariate import _OrderedMultinomial, quaddist_matrix from pymc3.distributions.shape_utils import to_tuple from pymc3.exceptions import ShapeError from pymc3.tests.helpers import SeededTest, select_by_precision @@ -1432,7 +1433,7 @@ def seeded_zero_inflated_negbinomial_rng_fn(self): class TestOrderedLogistic(BaseTestDistribution): - pymc_dist = pm.OrderedLogistic + pymc_dist = _OrderedLogistic pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])} expected_rv_op_params = {"p": np.array([0.11920292, 0.38079708, 0.38079708, 0.11920292])} tests_to_run = [ @@ -1442,7 +1443,7 @@ class TestOrderedLogistic(BaseTestDistribution): class TestOrderedProbit(BaseTestDistribution): - pymc_dist = pm.OrderedProbit + pymc_dist = _OrderedProbit pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])} expected_rv_op_params = {"p": np.array([0.02275013, 0.47724987, 0.47724987, 0.02275013])} tests_to_run = [ @@ -1452,7 +1453,7 @@ class TestOrderedProbit(BaseTestDistribution): class TestOrderedMultinomial(BaseTestDistribution): - pymc_dist = pm.OrderedMultinomial + pymc_dist = _OrderedMultinomial pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2]), "n": 1000} sizes_to_check = [None, (1), (4,), (3, 2)] sizes_expected = [(4,), (1, 4), (4, 4), (3, 2, 4)] From 898c40e08f3d1e7b5140a546120a6f98c2815f92 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Sun, 4 Jul 2021 20:53:09 +0200 Subject: [PATCH 15/18] Black --- pymc3/distributions/discrete.py | 2 -- pymc3/distributions/multivariate.py | 1 - 2 files changed, 3 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 5c2e43fc627..790c47645f9 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1734,7 +1734,6 @@ class OrderedLogistic: plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k'); plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k'); """ - rv_op = categorical def __new__(cls, name, *args, compute_p=True, **kwargs): out_rv = _OrderedLogistic(name, *args, **kwargs) @@ -1846,7 +1845,6 @@ class OrderedProbit: plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k'); plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k'); """ - rv_op = categorical def __new__(cls, name, *args, compute_p=True, **kwargs): out_rv = _OrderedProbit(name, *args, **kwargs) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 17df686009d..5d3e313fc18 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -791,7 +791,6 @@ class OrderedMultinomial: # Plot the results arviz.plot_posterior(trace_12_4, var_names=["complete_p"], ref_val=list(true_p)); """ - rv_op = multinomial def __new__(cls, name, *args, compute_p=True, **kwargs): out_rv = _OrderedMultinomial(name, *args, **kwargs) From 4ce1294844e32e37953328ad7c68fef921b1f1c6 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Sun, 4 Jul 2021 22:26:16 +0200 Subject: [PATCH 16/18] Mention in release notes --- RELEASE-NOTES.md | 3 ++- pymc3/distributions/multivariate.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 81c7a089726..cd37f6e834f 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -18,7 +18,8 @@ - The `size` kwarg behaves like it does in Aesara/NumPy. For univariate RVs it is the same as `shape`, but for multivariate RVs it depends on how the RV implements broadcasting to dimensionality greater than `RVOp.ndim_supp`. - An `Ellipsis` (`...`) in the last position of `shape` or `dims` can be used as short-hand notation for implied dimensions. - Add `logcdf` method to Kumaraswamy distribution (see [#4706](https://github.com/pymc-devs/pymc3/pull/4706)). -- ... +- The `OrderedMultinomial` distribution has been added for use on ordinal data which are _aggregated_ by trial, like multinomial observations, whereas `OrderedLogistic` only accepts ordinal data in a _disaggregated_ format, like categorical + observations (see [#4773](https://github.com/pymc-devs/pymc3/pull/4773)). ### Maintenance - Remove float128 dtype support (see [#4514](https://github.com/pymc-devs/pymc3/pull/4514)). diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 5d3e313fc18..0fdbe51777e 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -718,7 +718,7 @@ class OrderedMultinomial: R""" Wrapper class for Ordered Multinomial distributions. - Useful for regression on ordinal data values whose values range + Useful for regression on ordinal data whose values range from 1 to K as a function of some predictor, :math:`\eta`, but which are _aggregated_ by trial, like multinomial observations (in contrast to `pm.OrderedLogistic`, which only accepts ordinal data From bd4470feffc4ba537dd521cf02926f162a3c77a8 Mon Sep 17 00:00:00 2001 From: Alexandre ANDORRA Date: Sun, 4 Jul 2021 23:35:15 +0200 Subject: [PATCH 17/18] Apply suggestions from code review Co-authored-by: Ricardo Vieira <28983449+ricardoV94@users.noreply.github.com> --- pymc3/distributions/multivariate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 0fdbe51777e..8cf02e92906 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -116,7 +116,7 @@ def quaddist_parse(value, mu, cov, mat_type="cov"): # Use this when Theano#5908 is released. # return MvNormalLogp()(self.cov, delta) chol_cov = cholesky(cov) - if mat_type == "cov" or mat_type != "tau": + if mat_type != "tau": dist, logdet, ok = quaddist_chol(delta, chol_cov) else: dist, logdet, ok = quaddist_tau(delta, chol_cov) From 64b1ac8e0a2815bc28e06c36368dfe3470572ec8 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 5 Jul 2021 10:48:20 +0200 Subject: [PATCH 18/18] Add reference to new distribution in docs --- docs/source/api/distributions/multivariate.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/api/distributions/multivariate.rst b/docs/source/api/distributions/multivariate.rst index d54e78ef336..839ffe8e90d 100644 --- a/docs/source/api/distributions/multivariate.rst +++ b/docs/source/api/distributions/multivariate.rst @@ -13,6 +13,7 @@ Multivariate LKJCholeskyCov LKJCorr Multinomial + OrderedMultinomial Dirichlet DirichletMultinomial CAR