From 43bd7110318219f24ce1f84bb3badcecff6f34dd Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Tue, 16 Mar 2021 23:00:40 -0500 Subject: [PATCH 01/10] Make transform objects stateless --- pymc3/backends/base.py | 2 +- pymc3/distributions/__init__.py | 93 ++++---- pymc3/distributions/continuous.py | 27 +-- pymc3/distributions/multivariate.py | 10 +- pymc3/distributions/transforms.py | 314 +++++++++++++--------------- pymc3/model.py | 9 +- pymc3/tests/test_distributions.py | 7 +- pymc3/tests/test_transforms.py | 199 ++++++++++-------- 8 files changed, 342 insertions(+), 319 deletions(-) diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py index 866a9bbcdc3..bbd3388146a 100644 --- a/pymc3/backends/base.py +++ b/pymc3/backends/base.py @@ -68,7 +68,7 @@ def __init__(self, name, model=None, vars=None, test_point=None): if transform: # We need to create and add an un-transformed version of # each transformed variable - untrans_var = transform.backward(var) + untrans_var = transform.backward(v, var) untrans_var.name = v.name vars.append(untrans_var) vars.append(var) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index f7bc508d7fe..5eec31935d3 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import warnings + from functools import singledispatch from itertools import chain from typing import Generator, List, Optional, Tuple, Union @@ -20,7 +22,7 @@ from aesara import config from aesara.graph.basic import Variable, ancestors, clone_replace -from aesara.graph.op import compute_test_value +from aesara.graph.op import Op, compute_test_value from aesara.tensor.random.op import Observed, RandomVariable from aesara.tensor.subtensor import AdvancedSubtensor, AdvancedSubtensor1, Subtensor from aesara.tensor.var import TensorVariable @@ -33,7 +35,7 @@ @singledispatch -def logp_transform(op, inputs): +def logp_transform(op: Op): return None @@ -141,7 +143,8 @@ def change_rv_size( def rv_log_likelihood_args( rv_var: TensorVariable, - transformed: Optional[bool] = True, + *, + return_observations: bool = True, ) -> Tuple[TensorVariable, TensorVariable]: """Get a `RandomVariable` and its corresponding log-likelihood `TensorVariable` value. @@ -151,8 +154,9 @@ def rv_log_likelihood_args( A variable corresponding to a `RandomVariable`, whether directly or indirectly (e.g. an observed variable that's the output of an `Observed` `Op`). - transformed - When ``True``, return the transformed value var. + return_observations + When ``True``, return the observed values in place of the log-likelihood + value variable. Returns ======= @@ -163,12 +167,14 @@ def rv_log_likelihood_args( """ if rv_var.owner and isinstance(rv_var.owner.op, Observed): - return tuple(rv_var.owner.inputs) - elif hasattr(rv_var.tag, "value_var"): - rv_value = rv_var.tag.value_var - return rv_var, rv_value - else: - return rv_var, None + rv_var, obs_var = rv_var.owner.inputs + if return_observations: + return rv_var, obs_var + else: + return rv_var, rv_log_likelihood_args(rv_var)[1] + + rv_value = getattr(rv_var.tag, "value_var", None) + return rv_var, rv_value def rv_ancestors(graphs: List[TensorVariable]) -> Generator[TensorVariable, None, None]: @@ -217,7 +223,7 @@ def sample_to_measure_vars( if not (anc.owner and isinstance(anc.owner.op, RandomVariable)): continue - _, value_var = rv_log_likelihood_args(anc) + _, value_var = rv_log_likelihood_args(anc, return_observations=False) if value_var is not None: replace[anc] = value_var @@ -233,8 +239,10 @@ def sample_to_measure_vars( def logpt( rv_var: TensorVariable, rv_value: Optional[TensorVariable] = None, - jacobian: Optional[bool] = True, - scaling: Optional[bool] = True, + *, + jacobian: bool = True, + scaling: bool = True, + transformed: bool = True, **kwargs, ) -> TensorVariable: """Create a measure-space (i.e. log-likelihood) graph for a random variable at a given point. @@ -257,6 +265,8 @@ def logpt( Whether or not to include the Jacobian term. scaling A scaling term to apply to the generated log-likelihood graph. + transformed + Apply transforms. """ @@ -282,22 +292,22 @@ def logpt( raise NotImplementedError("Missing value support is incomplete") - # "Flatten" and sum an array of indexed RVs' log-likelihoods - rv_var, missing_values = rv_node.inputs - - missing_values = missing_values.data - logp_var = aet.sum( - [ - logpt( - rv_var, - ) - for idx, missing in zip( - np.ndindex(missing_values.shape), missing_values.flatten() - ) - if missing - ] - ) - return logp_var + # # "Flatten" and sum an array of indexed RVs' log-likelihoods + # rv_var, missing_values = rv_node.inputs + # + # missing_values = missing_values.data + # logp_var = aet.sum( + # [ + # logpt( + # rv_var, + # ) + # for idx, missing in zip( + # np.ndindex(missing_values.shape), missing_values.flatten() + # ) + # if missing + # ] + # ) + # return logp_var return aet.zeros_like(rv_var) @@ -312,15 +322,16 @@ def logpt( # If any of the measure vars are transformed measure-space variables # (signified by having a `transform` value in their tags), then we apply # the their transforms and add their Jacobians (when enabled) - if transform: - logp_var = _logp(rv_node.op, transform.backward(rv_value), *dist_params, **kwargs) + if transform and transformed: + logp_var = _logp(rv_node.op, transform.backward(rv_var, rv_value), *dist_params, **kwargs) + logp_var = transform_logp( logp_var, tuple(replacements.values()), ) if jacobian: - transformed_jacobian = transform.jacobian_det(rv_value) + transformed_jacobian = transform.jacobian_det(rv_var, rv_value) if transformed_jacobian: if logp_var.ndim > transformed_jacobian.ndim: logp_var = logp_var.sum(axis=-1) @@ -345,11 +356,17 @@ def transform_logp(logp_var: TensorVariable, inputs: List[TensorVariable]) -> Te for measure_var in inputs: transform = getattr(measure_var.tag, "transform", None) + rv_var = getattr(measure_var.tag, "rv_var", None) + + if transform is not None and rv_var is None: + warnings.warn( + f"A transform was found for {measure_var} but not a corresponding random variable" + ) - if transform is None: + if transform is None or rv_var is None: continue - trans_rv_value = transform.backward(measure_var) + trans_rv_value = transform.backward(rv_var, measure_var) trans_replacements[measure_var] = trans_rv_value if trans_replacements: @@ -359,7 +376,7 @@ def transform_logp(logp_var: TensorVariable, inputs: List[TensorVariable]) -> Te @singledispatch -def _logp(op, value, *dist_params, **kwargs): +def _logp(op: Op, value: TensorVariable, *dist_params, **kwargs): """Create a log-likelihood graph. This function dispatches on the type of `op`, which should be a subclass @@ -370,7 +387,9 @@ def _logp(op, value, *dist_params, **kwargs): return aet.zeros_like(value) -def logcdf(rv_var, rv_value, jacobian=True, **kwargs): +def logcdf( + rv_var: TensorVariable, rv_value: Optional[TensorVariable], jacobian: bool = True, **kwargs +): """Create a log-CDF graph.""" rv_var, _ = rv_log_likelihood_args(rv_var) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index b28481c3654..f6890266f9f 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -104,31 +104,24 @@ class BoundedContinuous(Continuous): @logp_transform.register(PositiveContinuous) -def pos_cont_transform(op, rv_var): +def pos_cont_transform(op): return transforms.log @logp_transform.register(UnitContinuous) -def unit_cont_transform(op, rv_var): +def unit_cont_transform(op): return transforms.logodds @logp_transform.register(BoundedContinuous) -def bounded_cont_transform(op, rv_var): - _, _, _, lower, upper = rv_var.owner.inputs - lower = aet.as_tensor_variable(lower) if lower is not None else None - upper = aet.as_tensor_variable(upper) if upper is not None else None - - if lower is None and upper is None: - transform = None - elif lower is not None and upper is None: - transform = transforms.lowerbound(lower) - elif lower is None and upper is not None: - transform = transforms.upperbound(upper) - else: - transform = transforms.interval(lower, upper) - - return transform +def bounded_cont_transform(op): + def transform_params(rv_var): + _, _, _, lower, upper = rv_var.owner.inputs + lower = aet.as_tensor_variable(lower) if lower is not None else None + upper = aet.as_tensor_variable(upper) if upper is not None else None + return lower, upper + + return transforms.interval(transform_params) def assert_negative_support(var, label, distname, value=-1e-6): diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 281317255b2..a11366423ba 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -125,7 +125,7 @@ def quaddist_parse(value, mu, cov, mat_type="cov"): def quaddist_chol(delta, chol_mat): - diag = aet.nlinalg.diag(chol_mat) + diag = aet.diag(chol_mat) # Check if the covariance matrix is positive definite. ok = aet.all(diag > 0) # If not, replace the diagonal. We return -inf later, but @@ -222,7 +222,7 @@ class MvNormal(Continuous): def dist(cls, mu, cov=None, tau=None, chol=None, lower=True, **kwargs): mu = aet.as_tensor_variable(mu) cov = quaddist_matrix(cov, tau, chol, lower) - return super().__init__([mu, cov], **kwargs) + return super().dist([mu, cov], **kwargs) def logp(value, mu, cov): """ @@ -968,7 +968,11 @@ def __init__(self, eta, n, sd_dist, *args, **kwargs): if sd_dist.shape.ndim not in [0, 1]: raise ValueError("Invalid shape for sd_dist.") - transform = transforms.CholeskyCovPacked(n) + def transform_params(rv_var): + _, _, _, n, eta = rv_var.owner.inputs + return np.arange(1, n + 1).cumsum() - 1 + + transform = transforms.CholeskyCovPacked(transform_params) kwargs["shape"] = shape kwargs["transform"] = transform diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py index 3aa6b81ec00..98c7a15ebd4 100644 --- a/pymc3/distributions/transforms.py +++ b/pymc3/distributions/transforms.py @@ -12,12 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import warnings - import aesara.tensor as aet -import numpy as np from aesara.tensor.subtensor import advanced_set_subtensor1 +from aesara.tensor.var import TensorVariable from pymc3.aesaraf import floatX, gradient from pymc3.math import invlogit, logit, logsumexp @@ -28,8 +26,6 @@ "logodds", "interval", "log_exp_m1", - "lowerbound", - "upperbound", "ordered", "log", "sum_to_1", @@ -45,19 +41,39 @@ class Transform: Attributes ---------- name: str + The name of the transform. + param_extract_fn: callable + A callable that takes a `TensorVariable` representing a random + variable, and returns the parameters required by the transform. + By customizing this function, one can broaden the applicability of--or + specialize--a `Transform` without the need to create a new `Transform` + class or altering existing `Transform` classes. For instance, + new `RandomVariable`s can supply their own `param_extract_fn` + implementations that account for their own unique parameterizations. """ + __slots__ = ("param_extract_fn",) name = "" - def forward(self, x): - """Applies transformation forward to input variable `x`. - When transform is used on some distribution `p`, it will transform the random variable `x` after sampling - from `p`. + def forward(self, rv_var: TensorVariable, rv_value: TensorVariable) -> TensorVariable: + """Applies transformation forward to input variable `rv_value`. + + When a transform is applied to a value of some random variable + `rv_var`, it will transform the random variable `rv_value` after + sampling from `rv_var`. + + **Do not apply transforms to `rv_var`.** `rv_var` is only provided + as a means of describing the random variable associated with `rv_value`. + `rv_value` is the variable that should be transformed, and the transform + can use information from `rv_var`--within `param_extract_fn`--to do + that (e.g. the random variable's parameters via `rv_var.owner.inputs`). Parameters ---------- - x: tensor - Input tensor to be transformed. + rv_var + The random variable. + rv_value + The variable representing a value of `rv_var`. Returns -------- @@ -66,15 +82,15 @@ def forward(self, x): """ raise NotImplementedError - def backward(self, z): - """Applies inverse of transformation to input variable `z`. - When transform is used on some distribution `p`, which has observed values `z`, it is used to - transform the values of `z` correctly to the support of `p`. + def backward(self, rv_var: TensorVariable, rv_value: TensorVariable) -> TensorVariable: + """Applies inverse of transformation. Parameters ---------- - z: tensor - Input tensor to be inverse transformed. + rv_var + The random variable. + rv_value + The variable representing a value of `rv_var`. Returns ------- @@ -83,19 +99,21 @@ def backward(self, z): """ raise NotImplementedError - def jacobian_det(self, x): + def jacobian_det(self, rv_var: TensorVariable, rv_value: TensorVariable) -> TensorVariable: """Calculates logarithm of the absolute value of the Jacobian determinant - of the backward transformation for input `x`. + of the backward transformation. Parameters ---------- - x: tensor - Input to calculate Jacobian determinant of. + rv_var + The random variable. + rv_value + The variable representing a value of `rv_var`. Returns ------- tensor - The log abs Jacobian determinant of `x` w.r.t. this transform. + The log abs Jacobian determinant w.r.t. this transform. """ raise NotImplementedError @@ -104,22 +122,24 @@ def __str__(self): class ElemwiseTransform(Transform): - def jacobian_det(self, x): - grad = aet.reshape(gradient(aet.sum(self.backward(x)), [x]), x.shape) + def jacobian_det(self, rv_var, rv_value): + grad = aet.reshape( + gradient(aet.sum(self.backward(rv_var, rv_value)), [rv_value]), rv_value.shape + ) return aet.log(aet.abs_(grad)) class Log(ElemwiseTransform): name = "log" - def backward(self, x): - return aet.exp(x) + def backward(self, rv_var, rv_value): + return aet.exp(rv_value) - def forward(self, x): - return aet.log(x) + def forward(self, rv_var, rv_value): + return aet.log(rv_value) - def jacobian_det(self, x): - return x + def jacobian_det(self, rv_var, rv_value): + return rv_value log = Log() @@ -128,19 +148,19 @@ def jacobian_det(self, x): class LogExpM1(ElemwiseTransform): name = "log_exp_m1" - def backward(self, x): - return aet.nnet.softplus(x) + def backward(self, rv_var, rv_value): + return aet.nnet.softplus(rv_value) - def forward(self, x): + def forward(self, rv_var, rv_value): """Inverse operation of softplus. y = Log(Exp(x) - 1) = Log(1 - Exp(-x)) + x """ - return aet.log(1.0 - aet.exp(-x)) + x + return aet.log(1.0 - aet.exp(-rv_value)) + rv_value - def jacobian_det(self, x): - return -aet.nnet.softplus(-x) + def jacobian_det(self, rv_var, rv_value): + return -aet.nnet.softplus(-rv_value) log_exp_m1 = LogExpM1() @@ -149,11 +169,11 @@ def jacobian_det(self, x): class LogOdds(ElemwiseTransform): name = "logodds" - def backward(self, x): - return invlogit(x, 0.0) + def backward(self, rv_var, rv_value): + return invlogit(rv_value, 0.0) - def forward(self, x): - return logit(x) + def forward(self, rv_var, rv_value): + return logit(rv_value) logodds = LogOdds() @@ -164,101 +184,63 @@ class Interval(ElemwiseTransform): name = "interval" - def __init__(self, a, b): - self.a = aet.as_tensor_variable(a) - self.b = aet.as_tensor_variable(b) - - def backward(self, x): - a, b = self.a, self.b - sigmoid_x = aet.nnet.sigmoid(x) - r = sigmoid_x * b + (1 - sigmoid_x) * a - return r - - def forward(self, x): - a, b = self.a, self.b - return aet.log(x - a) - aet.log(b - x) - - def jacobian_det(self, x): - s = aet.nnet.softplus(-x) - return aet.log(self.b - self.a) - 2 * s - x + def __init__(self, param_extract_fn): + self.param_extract_fn = param_extract_fn + + def backward(self, rv_var, rv_value): + a, b = self.param_extract_fn(rv_var) + + if a is not None and b is not None: + sigmoid_x = aet.nnet.sigmoid(rv_value) + return sigmoid_x * b + (1 - sigmoid_x) * a + elif a is not None: + return aet.exp(rv_value) + a + elif b is not None: + return b - aet.exp(rv_value) + else: + return rv_value + + def forward(self, rv_var, rv_value): + a, b = self.param_extract_fn(rv_var) + if a is not None and b is not None: + return aet.log(rv_value - a) - aet.log(b - rv_value) + elif a is not None: + return aet.log(rv_value - a) + elif b is not None: + return aet.log(b - rv_value) + else: + return rv_value + + def jacobian_det(self, rv_var, rv_value): + a, b = self.param_extract_fn(rv_var) + + if a is not None and b is not None: + s = aet.nnet.softplus(-rv_value) + return aet.log(b - a) - 2 * s - rv_value + else: + return rv_value interval = Interval -class LowerBound(ElemwiseTransform): - """Transform from real line interval [a,inf] to whole real line.""" - - name = "lowerbound" - - def __init__(self, a): - self.a = aet.as_tensor_variable(a) - - def backward(self, x): - a = self.a - r = aet.exp(x) + a - return r - - def forward(self, x): - a = self.a - return aet.log(x - a) - - def jacobian_det(self, x): - return x - - -lowerbound = LowerBound -""" -Alias for ``LowerBound`` (:class: LowerBound) Transform (:class: Transform) class -for use in the ``transform`` argument of a random variable. -""" - - -class UpperBound(ElemwiseTransform): - """Transform from real line interval [-inf,b] to whole real line.""" - - name = "upperbound" - - def __init__(self, b): - self.b = aet.as_tensor_variable(b) - - def backward(self, x): - b = self.b - r = b - aet.exp(x) - return r - - def forward(self, x): - b = self.b - return aet.log(b - x) - - def jacobian_det(self, x): - return x - - -upperbound = UpperBound -""" -Alias for ``UpperBound`` (:class: UpperBound) Transform (:class: Transform) class -for use in the ``transform`` argument of a random variable. -""" - - class Ordered(Transform): name = "ordered" - def backward(self, y): - x = aet.zeros(y.shape) - x = aet.inc_subtensor(x[..., 0], y[..., 0]) - x = aet.inc_subtensor(x[..., 1:], aet.exp(y[..., 1:])) + def backward(self, rv_var, rv_value): + x = aet.zeros(rv_value.shape) + x = aet.inc_subtensor(x[..., 0], rv_value[..., 0]) + x = aet.inc_subtensor(x[..., 1:], aet.exp(rv_value[..., 1:])) return aet.cumsum(x, axis=-1) - def forward(self, x): - y = aet.zeros(x.shape) - y = aet.inc_subtensor(y[..., 0], x[..., 0]) - y = aet.inc_subtensor(y[..., 1:], aet.log(x[..., 1:] - x[..., :-1])) + def forward(self, rv_var, rv_value): + y = aet.zeros(rv_value.shape) + y = aet.inc_subtensor(y[..., 0], rv_value[..., 0]) + y = aet.inc_subtensor(y[..., 1:], aet.log(rv_value[..., 1:] - rv_value[..., :-1])) return y - def jacobian_det(self, y): - return aet.sum(y[..., 1:], axis=-1) + def jacobian_det(self, rv_var, rv_value): + return aet.sum(rv_value[..., 1:], axis=-1) ordered = Ordered() @@ -276,15 +258,15 @@ class SumTo1(Transform): name = "sumto1" - def backward(self, y): - remaining = 1 - aet.sum(y[..., :], axis=-1, keepdims=True) - return aet.concatenate([y[..., :], remaining], axis=-1) + def backward(self, rv_var, rv_value): + remaining = 1 - aet.sum(rv_value[..., :], axis=-1, keepdims=True) + return aet.concatenate([rv_value[..., :], remaining], axis=-1) - def forward(self, x): - return x[..., :-1] + def forward(self, rv_var, rv_value): + return rv_value[..., :-1] - def jacobian_det(self, x): - y = aet.zeros(x.shape) + def jacobian_det(self, rv_var, rv_value): + y = aet.zeros(rv_value.shape) return aet.sum(y, axis=-1) @@ -303,30 +285,24 @@ class StickBreaking(Transform): name = "stickbreaking" - def __init__(self, eps=None): - if eps is not None: - warnings.warn( - "The argument `eps` is deprecated and will not be used.", DeprecationWarning - ) - - def forward(self, x_): - x = x_.T + def forward(self, rv_var, rv_value): + x = rv_value.T n = x.shape[0] lx = aet.log(x) shift = aet.sum(lx, 0, keepdims=True) / n y = lx[:-1] - shift return floatX(y.T) - def backward(self, y_): - y = y_.T + def backward(self, rv_var, rv_value): + y = rv_value.T y = aet.concatenate([y, -aet.sum(y, 0, keepdims=True)]) # "softmax" with vector support and no deprication warning: e_y = aet.exp(y - aet.max(y, 0, keepdims=True)) x = e_y / aet.sum(e_y, 0, keepdims=True) return floatX(x.T) - def jacobian_det(self, y_): - y = y_.T + def jacobian_det(self, rv_var, rv_value): + y = rv_value.T Km1 = y.shape[0] + 1 sy = aet.sum(y, 0, keepdims=True) r = aet.concatenate([y + sy, aet.zeros(sy.shape)]) @@ -343,14 +319,14 @@ class Circular(ElemwiseTransform): name = "circular" - def backward(self, y): - return aet.arctan2(aet.sin(y), aet.cos(y)) + def backward(self, rv_var, rv_value): + return aet.arctan2(aet.sin(rv_value), aet.cos(rv_value)) - def forward(self, x): - return aet.as_tensor_variable(x) + def forward(self, rv_var, rv_value): + return aet.as_tensor_variable(rv_value) - def jacobian_det(self, x): - return aet.zeros(x.shape) + def jacobian_det(self, rv_var, rv_value): + return aet.zeros(rv_value.shape) circular = Circular() @@ -359,44 +335,50 @@ def jacobian_det(self, x): class CholeskyCovPacked(Transform): name = "cholesky-cov-packed" - def __init__(self, n): - self.diag_idxs = np.arange(1, n + 1).cumsum() - 1 + def __init__(self, param_extract_fn): + self.param_extract_fn = param_extract_fn - def backward(self, x): - return advanced_set_subtensor1(x, aet.exp(x[self.diag_idxs]), self.diag_idxs) + def backward(self, rv_var, rv_value): + diag_idxs = self.param_extract_fn(rv_var) + return advanced_set_subtensor1(rv_value, aet.exp(rv_value[diag_idxs]), diag_idxs) - def forward(self, y): - return advanced_set_subtensor1(y, aet.log(y[self.diag_idxs]), self.diag_idxs) + def forward(self, rv_var, rv_value): + diag_idxs = self.param_extract_fn(rv_var) + return advanced_set_subtensor1(rv_value, aet.log(rv_value[diag_idxs]), diag_idxs) - def jacobian_det(self, y): - return aet.sum(y[self.diag_idxs]) + def jacobian_det(self, rv_var, rv_value): + diag_idxs = self.param_extract_fn(rv_var) + return aet.sum(rv_value[diag_idxs]) class Chain(Transform): + + __slots__ = ("param_extract_fn", "transform_list", "name") + def __init__(self, transform_list): self.transform_list = transform_list self.name = "+".join([transf.name for transf in self.transform_list]) - def forward(self, x): - y = x + def forward(self, rv_var, rv_value): + y = rv_value for transf in self.transform_list: - y = transf.forward(y) + y = transf.forward(rv_var, y) return y - def backward(self, y): - x = y + def backward(self, rv_var, rv_value): + x = rv_value for transf in reversed(self.transform_list): - x = transf.backward(x) + x = transf.backward(rv_var, x) return x - def jacobian_det(self, y): - y = aet.as_tensor_variable(y) + def jacobian_det(self, rv_var, rv_value): + y = aet.as_tensor_variable(rv_value) det_list = [] ndim0 = y.ndim for transf in reversed(self.transform_list): - det_ = transf.jacobian_det(y) + det_ = transf.jacobian_det(rv_var, y) det_list.append(det_) - y = transf.backward(y) + y = transf.backward(rv_var, y) ndim0 = min(ndim0, det_.ndim) # match the shape of the smallest jacobian_det det = 0.0 diff --git a/pymc3/model.py b/pymc3/model.py index f9a2abf52ef..78950d24642 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -1079,22 +1079,23 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, trans self.free_RVs.append(rv_var) value_var = rv_var.clone() - transform = transform or logp_transform(rv_var.owner.op, rv_var) + transform = transform or logp_transform(rv_var.owner.op) if transform is not None: value_var.tag.transform = transform value_var.name = f"{rv_var.name}_{transform.name}" if aesara.config.compute_test_value != "off": - value_var.tag.test_value = transform.forward(value_var).tag.test_value + value_var.tag.test_value = transform.forward(rv_var, value_var).tag.test_value # The transformed variable needs to be a named variable in the # model, too self.named_vars[value_var.name] = value_var else: - value_var = rv_var.clone() value_var.name = rv_var.name rv_var.tag.value_var = value_var + # XXX: This is a circular reference. + value_var.tag.rv_var = rv_var elif isinstance(data, dict): @@ -1626,6 +1627,8 @@ def make_obs_var( # variable `rv_var`). value_var = rv_var.clone() rv_var.tag.value_var = value_var + # XXX: This is a circular reference. + value_var.tag.rv_var = rv_var value_var.name = f"{rv_var.name}" missing_values = None diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 2c986fd2d37..252577cbd17 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -619,14 +619,15 @@ def logp_reference(args): pt = dict(pt) pt_d = {} for k, v in pt.items(): - nv = param_vars.get(k, model.named_vars.get(k)) + rv_var = model.named_vars.get(k) + nv = param_vars.get(k, rv_var) nv = getattr(nv.tag, "value_var", nv) transform = getattr(nv.tag, "transform", None) if transform: # TODO: The compiled graph behind this should be cached and # reused (if it isn't already). - v = transform.forward(v).eval() + v = transform.forward(rv_var, v).eval() if nv.name in param_vars: # Update the shared parameter variables in `param_vars` @@ -1846,7 +1847,7 @@ def test_dirichlet_with_batch_shapes(self, dist_shape): d_value = d.tag.value_var d_point = d.eval() if hasattr(d_value.tag, "transform"): - d_point_trans = d_value.tag.transform.forward(d_point).eval() + d_point_trans = d_value.tag.transform.forward(d, d_point).eval() else: d_point_trans = d_point diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py index a71af261f38..cedf33c37fb 100644 --- a/pymc3/tests/test_transforms.py +++ b/pymc3/tests/test_transforms.py @@ -44,35 +44,42 @@ tol = 1e-7 if aesara.config.floatX == "float64" else 1e-6 -def check_transform(transform, domain, constructor=aet.dscalar, test=0): +def check_transform(transform, domain, constructor=aet.dscalar, test=0, rv_var=None): x = constructor("x") x.tag.test_value = test # test forward and forward_val - forward_f = aesara.function([x], transform.forward(x)) + # FIXME: What's being tested here? That the transformed graph can compile? + forward_f = aesara.function([x], transform.forward(rv_var, x)) # test transform identity - identity_f = aesara.function([x], transform.backward(transform.forward(x))) + identity_f = aesara.function([x], transform.backward(rv_var, transform.forward(rv_var, x))) for val in domain.vals: close_to(val, identity_f(val), tol) -def check_vector_transform(transform, domain): - return check_transform(transform, domain, aet.dvector, test=np.array([0, 0])) +def check_vector_transform(transform, domain, rv_var=None): + return check_transform(transform, domain, aet.dvector, test=np.array([0, 0]), rv_var=rv_var) -def get_values(transform, domain=R, constructor=aet.dscalar, test=0): +def get_values(transform, domain=R, constructor=aet.dscalar, test=0, rv_var=None): x = constructor("x") x.tag.test_value = test - f = aesara.function([x], transform.backward(x)) + f = aesara.function([x], transform.backward(rv_var, x)) return np.array([f(val) for val in domain.vals]) def check_jacobian_det( - transform, domain, constructor=aet.dscalar, test=0, make_comparable=None, elemwise=False + transform, + domain, + constructor=aet.dscalar, + test=0, + make_comparable=None, + elemwise=False, + rv_var=None, ): y = constructor("y") y.tag.test_value = test - x = transform.backward(y) + x = transform.backward(rv_var, y) if make_comparable: x = make_comparable(x) @@ -85,7 +92,7 @@ def check_jacobian_det( actual_ljd = aesara.function([y], jac) computed_ljd = aesara.function( - [y], aet.as_tensor_variable(transform.jacobian_det(y)), on_unused_input="ignore" + [y], aet.as_tensor_variable(transform.jacobian_det(rv_var, y)), on_unused_input="ignore" ) for yval in domain.vals: @@ -93,10 +100,6 @@ def check_jacobian_det( def test_stickbreaking(): - with pytest.warns( - DeprecationWarning, match="The argument `eps` is deprecated and will not be used." - ): - tr.StickBreaking(eps=1e-9) check_vector_transform(tr.stick_breaking, Simplex(2)) check_vector_transform(tr.stick_breaking, Simplex(4)) @@ -121,7 +124,9 @@ def test_stickbreaking_accuracy(): val = np.array([-30]) x = aet.dvector("x") x.tag.test_value = val - identity_f = aesara.function([x], tr.stick_breaking.forward(tr.stick_breaking.backward(x))) + identity_f = aesara.function( + [x], tr.stick_breaking.forward(None, tr.stick_breaking.backward(None, x)) + ) close_to(val, identity_f(val), tol) @@ -166,7 +171,10 @@ def test_logodds(): def test_lowerbound(): - trans = tr.lowerbound(0.0) + def transform_params(rv_var): + return 0.0, None + + trans = tr.interval(transform_params) check_transform(trans, Rplusbig) check_jacobian_det(trans, Rplusbig, elemwise=True) @@ -177,7 +185,10 @@ def test_lowerbound(): def test_upperbound(): - trans = tr.upperbound(0.0) + def transform_params(rv_var): + return None, 0.0 + + trans = tr.interval(transform_params) check_transform(trans, Rminusbig) check_jacobian_det(trans, Rminusbig, elemwise=True) @@ -190,7 +201,11 @@ def test_upperbound(): def test_interval(): for a, b in [(-4, 5.5), (0.1, 0.7), (-10, 4.3)]: domain = Unit * np.float64(b - a) + np.float64(a) - trans = tr.interval(a, b) + + def transform_params(x, z=a, y=b): + return z, y + + trans = tr.interval(transform_params) check_transform(trans, domain) check_jacobian_det(trans, domain, elemwise=True) @@ -223,7 +238,7 @@ def test_circular(): close_to_logical(vals > -np.pi, True, tol) close_to_logical(vals < np.pi, True, tol) - assert isinstance(trans.forward(1), TensorConstant) + assert isinstance(trans.forward(None, 1), TensorConstant) def test_ordered(): @@ -262,13 +277,14 @@ def check_transform_elementwise_logp(self, model): pt = model.test_point array = np.random.randn(*pt[x0.name].shape) transform = x0.tag.transform - logp_nojac = logpt(x, transform.backward(array), jacobian=False) - jacob_det = transform.jacobian_det(aesara.shared(array)) - assert logpt(x).ndim == jacob_det.ndim + logp_notrans = logpt(x, transform.backward(x, array), transformed=False) - elementwiselogp = logp_nojac + jacob_det + jacob_det = transform.jacobian_det(x, aesara.shared(array)) + assert logpt(x).ndim == jacob_det.ndim - close_to(logpt(x, array).eval(), elementwiselogp.eval(), tol) + v1 = logpt(x, array, jacobian=False).eval() + v2 = logp_notrans.eval() + close_to(v1, v2, tol) def check_vectortransform_elementwise_logp(self, model, vect_opt=0): x = model.free_RVs[0] @@ -278,82 +294,81 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0): pt = model.test_point array = np.random.randn(*pt[x0.name].shape) transform = x0.tag.transform - logp_nojac = logpt(x, transform.backward(array)) - jacob_det = transform.jacobian_det(aesara.shared(array)) + logp_nojac = logpt(x, transform.backward(x, array), transformed=False) + + jacob_det = transform.jacobian_det(x, aesara.shared(array)) assert logpt(x).ndim == jacob_det.ndim - if vect_opt == 0: - # the original distribution is univariate - elementwiselogp = logp_nojac.sum(axis=-1) + jacob_det - else: - elementwiselogp = logp_nojac + jacob_det # Hack to get relative tolerance - a = logpt(x, array).eval() - b = elementwiselogp.eval() + a = logpt(x, array, jacobian=False).eval() + b = logp_nojac.eval() close_to(a, b, np.abs(0.5 * (a + b) * tol)) @pytest.mark.parametrize( - "sd,shape", + "sd,size", [ (2.5, 2), (5.0, (2, 3)), (np.ones(3) * 10.0, (4, 3)), ], ) - @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_half_normal(self, sd, shape): - model = self.build_model(pm.HalfNormal, {"sd": sd}, size=shape, transform=tr.log) + def test_half_normal(self, sd, size): + model = self.build_model(pm.HalfNormal, {"sd": sd}, size=size, transform=tr.log) self.check_transform_elementwise_logp(model) - @pytest.mark.parametrize("lam,shape", [(2.5, 2), (5.0, (2, 3)), (np.ones(3), (4, 3))]) - @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_exponential(self, lam, shape): - model = self.build_model(pm.Exponential, {"lam": lam}, size=shape, transform=tr.log) + @pytest.mark.parametrize("lam,size", [(2.5, 2), (5.0, (2, 3)), (np.ones(3), (4, 3))]) + def test_exponential(self, lam, size): + model = self.build_model(pm.Exponential, {"lam": lam}, size=size, transform=tr.log) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( - "a,b,shape", + "a,b,size", [ (1.0, 1.0, 2), (0.5, 0.5, (2, 3)), (np.ones(3), np.ones(3), (4, 3)), ], ) - @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_beta(self, a, b, shape): - model = self.build_model(pm.Beta, {"alpha": a, "beta": b}, size=shape, transform=tr.logodds) + def test_beta(self, a, b, size): + model = self.build_model(pm.Beta, {"alpha": a, "beta": b}, size=size, transform=tr.logodds) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( - "lower,upper,shape", + "lower,upper,size", [ (0.0, 1.0, 2), (0.5, 5.5, (2, 3)), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3)), ], ) - def test_uniform(self, lower, upper, shape): - interval = tr.Interval(lower, upper) + def test_uniform(self, lower, upper, size): + def transform_params(rv_var): + _, _, _, lower, upper = rv_var.owner.inputs + lower = aet.as_tensor_variable(lower) if lower is not None else None + upper = aet.as_tensor_variable(upper) if upper is not None else None + return lower, upper + + interval = tr.Interval(transform_params) model = self.build_model( - pm.Uniform, {"lower": lower, "upper": upper}, size=shape, transform=interval + pm.Uniform, {"lower": lower, "upper": upper}, size=size, transform=interval ) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( - "mu,kappa,shape", [(0.0, 1.0, 2), (-0.5, 5.5, (2, 3)), (np.zeros(3), np.ones(3), (4, 3))] + "mu,kappa,size", [(0.0, 1.0, 2), (-0.5, 5.5, (2, 3)), (np.zeros(3), np.ones(3), (4, 3))] ) @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_vonmises(self, mu, kappa, shape): + def test_vonmises(self, mu, kappa, size): model = self.build_model( - pm.VonMises, {"mu": mu, "kappa": kappa}, size=shape, transform=tr.circular + pm.VonMises, {"mu": mu, "kappa": kappa}, size=size, transform=tr.circular ) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( - "a,shape", [(np.ones(2), 2), (np.ones((2, 3)) * 0.5, (2, 3)), (np.ones(3), (4, 3))] + "a,size", [(np.ones(2), None), (np.ones((2, 3)) * 0.5, None), (np.ones(3), (4,))] ) - def test_dirichlet(self, a, shape): - model = self.build_model(pm.Dirichlet, {"a": a}, size=shape, transform=tr.stick_breaking) + def test_dirichlet(self, a, size): + model = self.build_model(pm.Dirichlet, {"a": a}, size=size, transform=tr.stick_breaking) self.check_vectortransform_elementwise_logp(model, vect_opt=1) def test_normal_ordered(self): @@ -367,113 +382,119 @@ def test_normal_ordered(self): self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( - "sd,shape", + "sd,size", [ (2.5, (2,)), (np.ones(3), (4, 3)), ], ) @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") - def test_half_normal_ordered(self, sd, shape): - testval = np.sort(np.abs(np.random.randn(*shape))) + def test_half_normal_ordered(self, sd, size): + testval = np.sort(np.abs(np.random.randn(*size))) model = self.build_model( pm.HalfNormal, {"sd": sd}, - size=shape, + size=size, testval=testval, transform=tr.Chain([tr.log, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) - @pytest.mark.parametrize("lam,shape", [(2.5, (2,)), (np.ones(3), (4, 3))]) - def test_exponential_ordered(self, lam, shape): - testval = np.sort(np.abs(np.random.randn(*shape))) + @pytest.mark.parametrize("lam,size", [(2.5, (2,)), (np.ones(3), (4, 3))]) + def test_exponential_ordered(self, lam, size): + testval = np.sort(np.abs(np.random.randn(*size))) model = self.build_model( pm.Exponential, {"lam": lam}, - size=shape, + size=size, testval=testval, transform=tr.Chain([tr.log, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( - "a,b,shape", + "a,b,size", [ (1.0, 1.0, (2,)), (np.ones(3), np.ones(3), (4, 3)), ], ) - def test_beta_ordered(self, a, b, shape): - testval = np.sort(np.abs(np.random.rand(*shape))) + def test_beta_ordered(self, a, b, size): + testval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.Beta, {"alpha": a, "beta": b}, - size=shape, + size=size, testval=testval, transform=tr.Chain([tr.logodds, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( - "lower,upper,shape", + "lower,upper,size", [(0.0, 1.0, (2,)), (pm.floatX(np.zeros(3)), pm.floatX(np.ones(3)), (4, 3))], ) - def test_uniform_ordered(self, lower, upper, shape): - interval = tr.Interval(lower, upper) - testval = np.sort(np.abs(np.random.rand(*shape))) + def test_uniform_ordered(self, lower, upper, size): + def transform_params(rv_var): + _, _, _, lower, upper = rv_var.owner.inputs + lower = aet.as_tensor_variable(lower) if lower is not None else None + upper = aet.as_tensor_variable(upper) if upper is not None else None + return lower, upper + + interval = tr.Interval(transform_params) + + testval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.Uniform, {"lower": lower, "upper": upper}, - size=shape, + size=size, testval=testval, transform=tr.Chain([interval, tr.ordered]), ) - self.check_vectortransform_elementwise_logp(model, vect_opt=0) + self.check_vectortransform_elementwise_logp(model, vect_opt=1) - @pytest.mark.parametrize( - "mu,kappa,shape", [(0.0, 1.0, (2,)), (np.zeros(3), np.ones(3), (4, 3))] - ) - def test_vonmises_ordered(self, mu, kappa, shape): - testval = np.sort(np.abs(np.random.rand(*shape))) + @pytest.mark.parametrize("mu,kappa,size", [(0.0, 1.0, (2,)), (np.zeros(3), np.ones(3), (4, 3))]) + @pytest.mark.xfail(reason="Distribution not refactored yet") + def test_vonmises_ordered(self, mu, kappa, size): + testval = np.sort(np.abs(np.random.rand(*size))) model = self.build_model( pm.VonMises, {"mu": mu, "kappa": kappa}, - size=shape, + size=size, testval=testval, transform=tr.Chain([tr.circular, tr.ordered]), ) self.check_vectortransform_elementwise_logp(model, vect_opt=0) @pytest.mark.parametrize( - "lower,upper,shape,transform", + "lower,upper,size,transform", [ (0.0, 1.0, (2,), tr.stick_breaking), (0.5, 5.5, (2, 3), tr.stick_breaking), (np.zeros(3), np.ones(3), (4, 3), tr.Chain([tr.sum_to_1, tr.logodds])), ], ) - def test_uniform_other(self, lower, upper, shape, transform): - testval = np.ones(shape) / shape[-1] + def test_uniform_other(self, lower, upper, size, transform): + testval = np.ones(size) / size[-1] model = self.build_model( pm.Uniform, {"lower": lower, "upper": upper}, - size=shape, + size=size, testval=testval, transform=transform, ) - self.check_vectortransform_elementwise_logp(model, vect_opt=0) + self.check_vectortransform_elementwise_logp(model, vect_opt=1) @pytest.mark.parametrize( - "mu,cov,shape", + "mu,cov,size,shape", [ - (np.zeros(2), np.diag(np.ones(2)), (2,)), - (np.zeros(3), np.diag(np.ones(3)), (4, 3)), + (np.zeros(2), np.diag(np.ones(2)), None, (2,)), + (np.zeros(3), np.diag(np.ones(3)), (4,), (4, 3)), ], ) - def test_mvnormal_ordered(self, mu, cov, shape): + def test_mvnormal_ordered(self, mu, cov, size, shape): testval = np.sort(np.random.randn(*shape)) model = self.build_model( - pm.MvNormal, {"mu": mu, "cov": cov}, size=shape, testval=testval, transform=tr.ordered + pm.MvNormal, {"mu": mu, "cov": cov}, size=size, testval=testval, transform=tr.ordered ) self.check_vectortransform_elementwise_logp(model, vect_opt=1) From ec025134121f074c8cd0683971892a15cb4167ad Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Wed, 17 Mar 2021 19:13:49 -0500 Subject: [PATCH 02/10] Add tests for two important open logpt and Model issues --- pymc3/tests/test_distributions.py | 37 ++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 252577cbd17..990bb4cd567 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -553,6 +553,30 @@ def RandomPdMatrix(n): return np.dot(A, A.T) + n * np.identity(n) +def test_hierarchical_logpt(): + """Make sure there are no random variables in a model's log-likelihood graph.""" + with pm.Model() as m: + x = pm.Uniform("x", lower=0, upper=1) + y = pm.Uniform("y", lower=0, upper=x) + + logpt_ancestors = list(ancestors([m.logpt])) + ancestors_with_owner = [a for a in logpt_ancestors if a.owner] + assert len(ancestors_with_owner) > 0 + assert not any(isinstance(v.owner.op, RandomVariable) for v in ancestors_with_owner) + assert x.tag.value_var in logpt_ancestors + assert y.tag.value_var in logpt_ancestors + + +def test_hierarchical_obs_logpt(): + obs = np.array([0.5, 0.4, 5, 2]) + + with pm.Model() as model: + x = pm.Normal("x", 0, 1, observed=obs) + pm.Normal("y", x, 1, observed=obs) + + model.logp(model.test_point) + + class TestMatchesScipy: def check_logp( self, @@ -2711,16 +2735,3 @@ def func(x): import pickle pickle.loads(pickle.dumps(y)) - - -def test_hierarchical_logpt(): - with pm.Model() as m: - x = pm.Uniform("x", lower=0, upper=1) - y = pm.Uniform("y", lower=0, upper=x) - - # Make sure that hierarchical random variables are replaced with their - # log-likelihood space variables in the log-likelhood - logpt_ancestors = list(ancestors([m.logpt])) - assert not any(isinstance(v, RandomVariable) for v in logpt_ancestors) - assert x.tag.value_var in logpt_ancestors - assert y.tag.value_var in logpt_ancestors From 199ef5689d5eaec5985f9693bf82f34fe4aad26c Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Sat, 20 Mar 2021 13:10:57 -0500 Subject: [PATCH 03/10] Add non_sequences to uses of Scan Op This make `aesara.graph.basic.clone_replace` work correctly when `Scan`s are included in a graph. --- pymc3/aesaraf.py | 6 ++++-- pymc3/distributions/dist_math.py | 10 +++++++--- pymc3/step_methods/sgmcmc.py | 3 ++- pymc3/variational/approximations.py | 4 ++-- pymc3/variational/opvi.py | 11 ++++++----- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/pymc3/aesaraf.py b/pymc3/aesaraf.py index 7d8d82d91b1..0c425a90a3d 100644 --- a/pymc3/aesaraf.py +++ b/pymc3/aesaraf.py @@ -159,10 +159,12 @@ def jacobian(f, vars=None): def jacobian_diag(f, x): idx = aet.arange(f.shape[0], dtype="int32") - def grad_ii(i): + def grad_ii(i, f, x): return grad(f[i], x)[i] - return aesara.scan(grad_ii, sequences=[idx], n_steps=f.shape[0], name="jacobian_diag")[0] + return aesara.scan( + grad_ii, sequences=[idx], n_steps=f.shape[0], non_sequences=[f, x], name="jacobian_diag" + )[0] @aesara.config.change_flags(compute_test_value="ignore") diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index 96eb5147dbf..9bbe9fd30f4 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -467,7 +467,7 @@ def incomplete_beta_cfe(a, b, x, small): qkm1 = one r = one - def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r): + def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r, a, b, x, small): xk = -(x * k1 * k2) / (k3 * k4) pk = pkm1 + pkm2 * xk qk = qkm1 + qkm2 * xk @@ -523,6 +523,7 @@ def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r): (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), "float64" ) ], + non_sequences=[a, b, x, small], ) return r[-1] @@ -541,14 +542,17 @@ def incomplete_beta_ps(a, b, value): threshold = np.MachAr().eps * ai s = aet.constant(0, dtype="float64") - def _step(i, t, s): + def _step(i, t, s, a, b, value): t *= (i - b) * value / i step = t / (a + i) s += step return ((t, s), until(aet.abs_(step) < threshold)) (t, s), _ = scan( - _step, sequences=[aet.arange(2, 302)], outputs_info=[e for e in aet.cast((t, s), "float64")] + _step, + sequences=[aet.arange(2, 302)], + outputs_info=[e for e in aet.cast((t, s), "float64")], + non_sequences=[a, b, value], ) s = s[-1] + t1 + ai diff --git a/pymc3/step_methods/sgmcmc.py b/pymc3/step_methods/sgmcmc.py index 407ab602f62..06cc7713194 100644 --- a/pymc3/step_methods/sgmcmc.py +++ b/pymc3/step_methods/sgmcmc.py @@ -64,8 +64,9 @@ def elemwise_dlogL(vars, model, flat_view): terms = [] for var in vars: output, _ = aesara.scan( - lambda i, logX=logL, v=var: aesara.grad(logX[i], v).flatten(), + lambda i, logX, v: aesara.grad(logX[i], v).flatten(), sequences=[aet.arange(logL.shape[0])], + non_sequences=[logL, var], ) terms.append(output) dlogL = aesara.clone_replace( diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 7b64d46d0bd..75dd640a812 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -595,10 +595,10 @@ def evaluate_over_trace(self, node): """ node = self.to_flat_input(node) - def sample(post): + def sample(post, node): return aesara.clone_replace(node, {self.input: post}) - nodes, _ = aesara.scan(sample, self.histogram) + nodes, _ = aesara.scan(sample, self.histogram, non_sequences=[node]) return nodes diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py index 1be94b959f5..d7f62167e28 100644 --- a/pymc3/variational/opvi.py +++ b/pymc3/variational/opvi.py @@ -1159,10 +1159,10 @@ def symbolic_sample_over_posterior(self, node): random = self.symbolic_random.astype(self.symbolic_initial.dtype) random = aet.patternbroadcast(random, self.symbolic_initial.broadcastable) - def sample(post): + def sample(post, node): return aesara.clone_replace(node, {self.input: post}) - nodes, _ = aesara.scan(sample, random) + nodes, _ = aesara.scan(sample, random, non_sequences=[node]) return nodes def symbolic_single_sample(self, node): @@ -1521,10 +1521,11 @@ def symbolic_sample_over_posterior(self, node): """ node = self.to_flat_input(node) - def sample(*post): - return aesara.clone_replace(node, dict(zip(self.inputs, post))) + def sample(*post, node, inputs): + node, inputs = post[-2:] + return aesara.clone_replace(node, dict(zip(inputs, post))) - nodes, _ = aesara.scan(sample, self.symbolic_randoms) + nodes, _ = aesara.scan(sample, self.symbolic_randoms, non_sequences=[node, inputs]) return nodes def symbolic_single_sample(self, node): From d1c79bfee0886b7c0ee53ff725261cf10cf2f8e1 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Sat, 20 Mar 2021 13:54:50 -0500 Subject: [PATCH 04/10] Replace Observed Op with tag.observations --- pymc3/distributions/__init__.py | 52 ++++------- pymc3/model.py | 141 ++++++++++++------------------ pymc3/model_graph.py | 8 +- pymc3/sampling.py | 8 +- pymc3/tests/test_model.py | 15 ++-- pymc3/tests/test_model_helpers.py | 21 ++--- 6 files changed, 97 insertions(+), 148 deletions(-) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index 5eec31935d3..4e26f565bf4 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -23,7 +23,7 @@ from aesara import config from aesara.graph.basic import Variable, ancestors, clone_replace from aesara.graph.op import Op, compute_test_value -from aesara.tensor.random.op import Observed, RandomVariable +from aesara.tensor.random.op import RandomVariable from aesara.tensor.subtensor import AdvancedSubtensor, AdvancedSubtensor1, Subtensor from aesara.tensor.var import TensorVariable @@ -141,22 +141,16 @@ def change_rv_size( return rv_var -def rv_log_likelihood_args( - rv_var: TensorVariable, - *, - return_observations: bool = True, +def extract_rv_and_value_vars( + var: TensorVariable, ) -> Tuple[TensorVariable, TensorVariable]: - """Get a `RandomVariable` and its corresponding log-likelihood `TensorVariable` value. + """Extract a random variable and its corresponding value variable from a generic + `TensorVariable`. Parameters ========== - rv_var - A variable corresponding to a `RandomVariable`, whether directly or - indirectly (e.g. an observed variable that's the output of an - `Observed` `Op`). - return_observations - When ``True``, return the observed values in place of the log-likelihood - value variable. + var + A variable corresponding to a `RandomVariable`. Returns ======= @@ -165,16 +159,14 @@ def rv_log_likelihood_args( variable). """ + if not var.owner: + return None, None - if rv_var.owner and isinstance(rv_var.owner.op, Observed): - rv_var, obs_var = rv_var.owner.inputs - if return_observations: - return rv_var, obs_var - else: - return rv_var, rv_log_likelihood_args(rv_var)[1] + if isinstance(var.owner.op, RandomVariable): + rv_value = getattr(var.tag, "value_var", None) + return var, rv_value - rv_value = getattr(rv_var.tag, "value_var", None) - return rv_var, rv_value + return None, None def rv_ancestors(graphs: List[TensorVariable]) -> Generator[TensorVariable, None, None]: @@ -186,14 +178,6 @@ def rv_ancestors(graphs: List[TensorVariable]) -> Generator[TensorVariable, None yield anc -def strip_observed(x: TensorVariable) -> TensorVariable: - """Return the `RandomVariable` term for an `Observed` node input; otherwise, return the input.""" - if x.owner and isinstance(x.owner.op, Observed): - return x.owner.inputs[0] - else: - return x - - def sample_to_measure_vars( graphs: List[TensorVariable], ) -> Tuple[List[TensorVariable], List[TensorVariable]]: @@ -223,7 +207,7 @@ def sample_to_measure_vars( if not (anc.owner and isinstance(anc.owner.op, RandomVariable)): continue - _, value_var = rv_log_likelihood_args(anc, return_observations=False) + _, value_var = extract_rv_and_value_vars(anc) if value_var is not None: replace[anc] = value_var @@ -270,7 +254,7 @@ def logpt( """ - rv_var, rv_value_var = rv_log_likelihood_args(rv_var) + rv_var, rv_value_var = extract_rv_and_value_vars(rv_var) if rv_value is None: rv_value = rv_value_var @@ -311,8 +295,8 @@ def logpt( return aet.zeros_like(rv_var) - # This case should be reached when `rv_var` is either the result of an - # `Observed` or a `RandomVariable` `Op` + # This case should be reached when `rv_var` is the output of a + # `RandomVariable` `Op` rng, size, dtype, *dist_params = rv_node.inputs dist_params, replacements = sample_to_measure_vars(dist_params) @@ -392,7 +376,7 @@ def logcdf( ): """Create a log-CDF graph.""" - rv_var, _ = rv_log_likelihood_args(rv_var) + rv_var, _ = extract_rv_and_value_vars(rv_var) rv_node = rv_var.owner if not rv_node: diff --git a/pymc3/model.py b/pymc3/model.py index 78950d24642..9925a9948a8 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -30,7 +30,6 @@ from aesara.compile.sharedvalue import SharedVariable from aesara.gradient import grad from aesara.graph.basic import Constant, Variable, graph_inputs -from aesara.tensor.random.op import Observed, observed from aesara.tensor.var import TensorVariable from pandas import Series @@ -897,7 +896,9 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): for var in self.free_RVs + self.potentials ] ) - observed_RVs_logp = aet.sum([aet.sum(logpt(obs)) for obs in self.observed_RVs]) + observed_RVs_logp = aet.sum( + [aet.sum(logpt(obs, obs.tag.observations)) for obs in self.observed_RVs] + ) costs = [free_RVs_logp, observed_RVs_logp] else: @@ -912,7 +913,7 @@ def logpt(self): """Aesara scalar of log-probability of the model""" with self: factors = [logpt_sum(var, getattr(var.tag, "value_var", None)) for var in self.free_RVs] - factors += [logpt_sum(obs) for obs in self.observed_RVs] + factors += [logpt_sum(obs, obs.tag.observations) for obs in self.observed_RVs] factors += self.potentials logp_var = aet.sum([aet.sum(factor) for factor in factors]) if self.name: @@ -934,7 +935,9 @@ def logp_nojact(self): logpt_sum(var, getattr(var.tag, "value_var", None), jacobian=False) for var in self.free_RVs ] - factors += [logpt_sum(obs, jacobian=False) for obs in self.observed_RVs] + factors += [ + logpt_sum(obs, obs.tag.observations, jacobian=False) for obs in self.observed_RVs + ] factors += self.potentials logp_var = aet.sum([aet.sum(factor) for factor in factors]) if self.name: @@ -954,7 +957,7 @@ def varlogpt(self): @property def datalogpt(self): with self: - factors = [logpt(obs) for obs in self.observed_RVs] + factors = [logpt(obs, obs.tag.observations) for obs in self.observed_RVs] factors += [aet.sum(factor) for factor in self.potentials] return aet.sum(factors) @@ -1068,56 +1071,7 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, trans rv_var.tag.total_size = total_size if data is None: - # Create a `TensorVariable` that will be used as the random - # variable's "value" in log-likelihood graphs. - # - # In general, we'll call this type of variable the "value" variable. - # - # In all other cases, the role of the value variable is taken by - # observed data. That's why value variables are only referenced in - # this branch of the conditional. self.free_RVs.append(rv_var) - value_var = rv_var.clone() - - transform = transform or logp_transform(rv_var.owner.op) - - if transform is not None: - value_var.tag.transform = transform - value_var.name = f"{rv_var.name}_{transform.name}" - if aesara.config.compute_test_value != "off": - value_var.tag.test_value = transform.forward(rv_var, value_var).tag.test_value - - # The transformed variable needs to be a named variable in the - # model, too - self.named_vars[value_var.name] = value_var - else: - value_var.name = rv_var.name - - rv_var.tag.value_var = value_var - # XXX: This is a circular reference. - value_var.tag.rv_var = rv_var - - elif isinstance(data, dict): - - # TODO: How exactly does this dictionary map to `rv_var`? - - # obs_rvs = {name: make_obs_var(rv_var, d, name, self) for name, d in data.items()} - # rv_var.tag.data = obs_rvs - # - # missing_values = [ - # datum.missing_values for datum in data.values() if datum.missing_values is not None - # ] - # rv_var.tag.missing_values = missing_values - # - # self.observed_RVs.append(rv_var) - # - # if missing_values: - # self.free_RVs += rv_var.tag.missing_values - # self.missing_values += rv_var.tag.missing_values - # for v in rv_var.tag.missing_values: - # self.named_vars[v.name] = v - - raise NotImplementedError() else: if ( isinstance(data, Variable) @@ -1128,8 +1082,7 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, trans data = pandas_to_array(data) - rv_var = make_obs_var(rv_var, data, name, self) - rv_var.tag.data = data + rv_var = make_obs_var(rv_var, data) self.observed_RVs.append(rv_var) @@ -1138,6 +1091,37 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, trans self.missing_values.append(rv_var.tag.missing_values) self.named_vars[rv_var.tag.missing_values.name] = rv_var.tag.missing_values + # Create a `TensorVariable` that will be used as the random + # variable's "value" in log-likelihood graphs. + # + # In general, we'll call this type of variable the "value" variable. + # + # In all other cases, the role of the value variable is taken by + # observed data. That's why value variables are only referenced in + # this branch of the conditional. + value_var = rv_var.type() + + if aesara.config.compute_test_value != "off": + value_var.tag.test_value = rv_var.tag.test_value + + value_var.name = f"{rv_var.name}_value" + + rv_var.tag.value_var = value_var + + # Make the value variable a transformed value variable, + # if there's an applicable transform + transform = transform or logp_transform(rv_var.owner.op) + + if transform is not None: + value_var.tag.transform = transform + value_var.name = f"{value_var.name}_{transform.name}__" + if aesara.config.compute_test_value != "off": + value_var.tag.test_value = transform.forward(rv_var, value_var).tag.test_value + + # The transformed variable needs to be a named variable in the + # model, too + self.named_vars[value_var.name] = value_var + self.add_random_variable(rv_var, dims) return rv_var @@ -1577,9 +1561,7 @@ def pandas_to_array(data): return pm.floatX(ret) -def make_obs_var( - rv_var: TensorVariable, data: Union[np.ndarray], name: str, model: Model -) -> TensorVariable: +def make_obs_var(rv_var: TensorVariable, data: Union[np.ndarray]) -> TensorVariable: """Create a `TensorVariable` for an observed random variable. Parameters @@ -1588,16 +1570,13 @@ def make_obs_var( The random variable that is observed. data: ndarray The observed data. - name: str - The name of the random variable. - model: Model - The model object. Returns ======= The new observed random variable """ + name = rv_var.name data = pandas_to_array(data).astype(rv_var.dtype) # The shapes of the observed random variable and its data might not @@ -1618,47 +1597,35 @@ def make_obs_var( rv_var = change_rv_size(rv_var, new_size) - if aesara.config.compute_test_value != "off" and test_value is not None: - # We try to reuse the old test value - rv_var.tag.test_value = np.broadcast_to(test_value, rv_var.tag.test_value.shape) - - # An independent variable used as the generic log-likelihood input - # parameter (i.e. the measure-space counterpart to the sample-space - # variable `rv_var`). - value_var = rv_var.clone() - rv_var.tag.value_var = value_var - # XXX: This is a circular reference. - value_var.tag.rv_var = rv_var - value_var.name = f"{rv_var.name}" + if aesara.config.compute_test_value != "off": + if test_value is not None: + # We try to reuse the old test value + rv_var.tag.test_value = np.broadcast_to(test_value, rv_var.tag.test_value.shape) + else: + rv_var.tag.test_value = data missing_values = None mask = getattr(data, "mask", None) if mask is not None: impute_message = ( - "Data in {name} contains missing values and" + f"Data in {rv_var} contains missing values and" " will be automatically imputed from the" - " sampling distribution.".format(name=name) + " sampling distribution." ) warnings.warn(impute_message, ImputationWarning) missing_values = rv_var[mask] constant = aet.as_tensor_variable(data.filled()) data = aet.set_subtensor(constant[mask.nonzero()], missing_values) - - # Now, we need log-likelihood-space terms for these missing values - value_var.name = f"{rv_var.name}_missing" - elif sps.issparse(data): data = sparse.basic.as_sparse(data, name=name) else: data = aet.as_tensor_variable(data, name=name) - rv_obs = observed(rv_var, data) - rv_obs.tag.missing_values = missing_values - - rv_obs.name = name + rv_var.tag.missing_values = missing_values + rv_var.tag.observations = data - return rv_obs + return rv_var def _walk_up_rv(rv, formatting="plain"): @@ -1749,7 +1716,7 @@ def as_iterargs(data): def all_continuous(vars): """Check that vars not include discrete variables or BART variables, excepting observed RVs.""" - vars_ = [var for var in vars if not (var.owner and isinstance(var.owner.op, Observed))] + vars_ = [var for var in vars if not (var.owner and hasattr(var.tag, "observations"))] if any( [ (var.dtype in pm.discrete_types or (var.owner and isinstance(var.owner.op, pm.BART))) diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py index fda715e7c21..e35eaf1123b 100644 --- a/pymc3/model_graph.py +++ b/pymc3/model_graph.py @@ -17,7 +17,7 @@ from aesara.compile.sharedvalue import SharedVariable from aesara.graph.basic import walk -from aesara.tensor.random.op import Observed +from aesara.tensor.random.op import RandomVariable from aesara.tensor.var import TensorVariable import pymc3 as pm @@ -112,9 +112,9 @@ def update_input_map(key: str, val: Set[VarName]): for var_name in self.var_names: var = self.model[var_name] update_input_map(var_name, self.get_parents(var)) - if var.owner and isinstance(var.owner.op, Observed): + if hasattr(var.tag, "observations"): try: - obs_name = var.observations.name + obs_name = var.tag.observations.name if obs_name: input_map[var_name] = input_map[var_name].difference({obs_name}) update_input_map(obs_name, {var_name}) @@ -128,7 +128,7 @@ def _make_node(self, var_name, graph, *, formatting: str = "plain"): # styling for node attrs = {} - if v.owner and isinstance(v.owner.op, Observed): + if v.owner and isinstance(v.owner.op, RandomVariable) and hasattr(v.tag, "observations"): attrs["style"] = "filled" # make Data be roundtangle, instead of rectangle diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 8a80e7a1de9..8d1b222e417 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -41,7 +41,7 @@ from pymc3.backends.base import BaseTrace, MultiTrace from pymc3.backends.ndarray import NDArray from pymc3.blocking import DictToArrayBijection -from pymc3.distributions import change_rv_size, rv_ancestors, strip_observed +from pymc3.distributions import change_rv_size, rv_ancestors from pymc3.exceptions import IncorrectArgumentsError, SamplingError from pymc3.model import Model, Point, all_continuous, modelcontext from pymc3.parallel_sampling import Draw, _cpu_count @@ -1717,9 +1717,7 @@ def sample_posterior_predictive( if progressbar: indices = progress_bar(indices, total=samples, display=progressbar) - vars_to_sample = [ - strip_observed(v) for v in get_default_varnames(vars_, include_transformed=False) - ] + vars_to_sample = list(get_default_varnames(vars_, include_transformed=False)) if not vars_to_sample: return {} @@ -1973,7 +1971,7 @@ def sample_prior_predictive( names = get_default_varnames(vars_, include_transformed=False) - vars_to_sample = [strip_observed(model[name]) for name in names] + vars_to_sample = [model[name] for name in names] inputs = [i for i in inputvars(vars_to_sample)] sampler_fn = aesara.function( inputs, diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py index 35ed91e8c39..34589cee233 100644 --- a/pymc3/tests/test_model.py +++ b/pymc3/tests/test_model.py @@ -22,6 +22,8 @@ import pandas as pd import pytest +from aesara.tensor.subtensor import AdvancedIncSubtensor + import pymc3 as pm from pymc3 import Deterministic, Potential @@ -194,17 +196,20 @@ def test_duplicate_vars(): def test_empty_observed(): data = pd.DataFrame(np.ones((2, 3)) / 3) data.values[:] = np.nan - with pm.Model(): + with pm.Model(aesara_config={"compute_test_value": "raise"}): a = pm.Normal("a", observed=data) + + assert isinstance(a.tag.observations.owner.op, AdvancedIncSubtensor) # The masked observations are replaced by elements of the RV `a`, # which means that they should all have the same sample test values - a_data = a.owner.inputs[1] - npt.assert_allclose(a.tag.test_value, a_data.tag.test_value) + a_data = a.tag.observations.owner.inputs[1] + npt.assert_allclose(a.tag.test_value.flatten(), a_data.tag.test_value) # Let's try this again with another distribution b = pm.Gamma("b", alpha=1, beta=1, observed=data) - b_data = b.owner.inputs[1] - npt.assert_allclose(b.tag.test_value, b_data.tag.test_value) + assert isinstance(b.tag.observations.owner.op, AdvancedIncSubtensor) + b_data = b.tag.observations.owner.inputs[1] + npt.assert_allclose(b.tag.test_value.flatten(), b_data.tag.test_value) class TestValueGradFunction(unittest.TestCase): diff --git a/pymc3/tests/test_model_helpers.py b/pymc3/tests/test_model_helpers.py index 93fdb972599..4acaee7dd30 100644 --- a/pymc3/tests/test_model_helpers.py +++ b/pymc3/tests/test_model_helpers.py @@ -108,7 +108,6 @@ def test_pandas_to_array(self, input_dtype): # Make sure the returned object is a Aesara TensorVariable assert isinstance(wrapped, TensorVariable) - @pytest.mark.xfail(reason="`Observed` `Op` doesn't take `SparseConstant`s, yet") def test_make_obs_var(self): """ Check returned values for `data` given known inputs to `as_tensor()`. @@ -127,20 +126,16 @@ def test_make_obs_var(self): with fake_model: fake_distribution = pm.Normal.dist(mu=0, sigma=1) # Create the testval attribute simply for the sake of model testing - fake_distribution.testval = None + fake_distribution.name = input_name # Check function behavior using the various inputs - dense_output = pm.model.make_obs_var(fake_distribution, dense_input, input_name, fake_model) - sparse_output = pm.model.make_obs_var( - fake_distribution, sparse_input, input_name, fake_model - ) - masked_output = pm.model.make_obs_var( - fake_distribution, masked_array_input, input_name, fake_model - ) + dense_output = pm.model.make_obs_var(fake_distribution, dense_input) + sparse_output = pm.model.make_obs_var(fake_distribution, sparse_input) + masked_output = pm.model.make_obs_var(fake_distribution, masked_array_input) # Ensure that the missing values are appropriately set to None for func_output in [dense_output, sparse_output]: - assert func_output.missing_values is None + assert func_output.tag.missing_values is None # Ensure that the Aesara variable names are correctly set. # Note that the output for masked inputs do not have their names set @@ -149,11 +144,11 @@ def test_make_obs_var(self): assert func_output.name == input_name # Ensure the that returned functions are all of the correct type - assert isinstance(dense_output, TensorConstant) - assert sparse.basic._is_sparse_variable(sparse_output) + assert isinstance(dense_output.tag.observations, TensorConstant) + assert sparse.basic._is_sparse_variable(sparse_output.tag.observations) # Masked output is something weird. Just ensure it has missing values # self.assertIsInstance(masked_output, TensorConstant) - assert masked_output.missing_values is not None + assert masked_output.tag.missing_values is not None return None From 74935fab93898b7a7fc5db61584eafed0dcfa7da Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Sat, 20 Mar 2021 13:55:39 -0500 Subject: [PATCH 05/10] Add missing imports to pymc3.step_methods.gibbs --- pymc3/step_methods/gibbs.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pymc3/step_methods/gibbs.py b/pymc3/step_methods/gibbs.py index 49737676cbe..47115f5aeed 100644 --- a/pymc3/step_methods/gibbs.py +++ b/pymc3/step_methods/gibbs.py @@ -19,6 +19,9 @@ """ from warnings import warn +import aesara.tensor as aet + +from aesara.graph.basic import graph_inputs from numpy import arange, array, cumsum, empty, exp, max, nested_iters, searchsorted from numpy.random import uniform @@ -78,7 +81,7 @@ def elemwise_logp(model, var): v_logp = logpt(v) if var in graph_inputs([v_logp]): terms.append(v_logp) - return model.fn(add(*terms)) + return model.fn(aet.add(*terms)) def categorical(prob, shape): From e2c42b36ef75cf59e0ba0c139d1de9273a673d53 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Sat, 20 Mar 2021 13:59:42 -0500 Subject: [PATCH 06/10] Comment out unused moments --- pymc3/distributions/continuous.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index f6890266f9f..350358d71a2 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1153,8 +1153,8 @@ def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwar alpha = aet.as_tensor_variable(floatX(alpha)) beta = aet.as_tensor_variable(floatX(beta)) - mean = alpha / (alpha + beta) - variance = (alpha * beta) / ((alpha + beta) ** 2 * (alpha + beta + 1)) + # mean = alpha / (alpha + beta) + # variance = (alpha * beta) / ((alpha + beta) ** 2 * (alpha + beta + 1)) assert_negative_support(alpha, "alpha", "Beta") assert_negative_support(beta, "beta", "Beta") From 02e170ab50f98e9eb30e9f6305f5b7698e0be0de Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Sat, 20 Mar 2021 20:18:06 -0500 Subject: [PATCH 07/10] Make logpt work correctly for nested models and transforms --- pymc3/distributions/__init__.py | 287 +++++++++++++++------------- pymc3/distributions/distribution.py | 33 ++-- pymc3/distributions/multivariate.py | 11 +- pymc3/distributions/transforms.py | 17 +- pymc3/model.py | 27 +-- pymc3/sampling.py | 4 +- pymc3/tests/test_distributions.py | 121 ++++++------ 7 files changed, 266 insertions(+), 234 deletions(-) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index 4e26f565bf4..dfc96175ac9 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -11,17 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import warnings from functools import singledispatch -from itertools import chain -from typing import Generator, List, Optional, Tuple, Union +from typing import Callable, Dict, Generator, Iterable, List, Optional, Tuple, Union import aesara.tensor as aet import numpy as np from aesara import config -from aesara.graph.basic import Variable, ancestors, clone_replace +from aesara.graph.basic import Variable, clone_replace, graph_inputs, io_toposort, walk from aesara.graph.op import Op, compute_test_value from aesara.tensor.random.op import RandomVariable from aesara.tensor.subtensor import AdvancedSubtensor, AdvancedSubtensor1, Subtensor @@ -163,61 +161,105 @@ def extract_rv_and_value_vars( return None, None if isinstance(var.owner.op, RandomVariable): - rv_value = getattr(var.tag, "value_var", None) + rv_value = getattr(var.tag, "observations", getattr(var.tag, "value_var", None)) return var, rv_value return None, None -def rv_ancestors(graphs: List[TensorVariable]) -> Generator[TensorVariable, None, None]: - """Yield the ancestors that are `RandomVariable` outputs for the given `graphs`.""" - for anc in ancestors(graphs): - if anc in graphs: - continue - if anc.owner and isinstance(anc.owner.op, RandomVariable): - yield anc +def rv_ancestors( + graphs: Iterable[TensorVariable], walk_past_rvs: bool = False +) -> Generator[TensorVariable, None, None]: + """Yield everything except the inputs of ``RandomVariable``s. + Parameters + ========== + graphs + The graphs to walk. + walk_past_rvs + If ``True``, do descend into ``RandomVariable``s. + """ -def sample_to_measure_vars( - graphs: List[TensorVariable], -) -> Tuple[List[TensorVariable], List[TensorVariable]]: - """Replace sample-space variables in graphs with their measure-space counterparts. + def expand(var): + if var.owner and (walk_past_rvs or not isinstance(var.owner.op, RandomVariable)): + return reversed(var.owner.inputs) - Sample-space variables are `TensorVariable` outputs of `RandomVariable` - `Op`s. Measure-space variables are `TensorVariable`s that correspond to - the value of a sample-space variable in a likelihood function (e.g. ``x`` - in ``p(X = x)``, where ``X`` is the corresponding sample-space variable). - (``x`` is also the variable found in ``rv_var.tag.value_var``, so this - function could also be called ``sample_to_value_vars``.) + yield from walk(graphs, expand, False) + + +def replace_rvs_in_graphs( + graphs: Iterable[TensorVariable], + replacement_fn: Callable[[TensorVariable], Dict[TensorVariable, TensorVariable]], + initial_replacements: Optional[Dict[TensorVariable, TensorVariable]] = None, +) -> Tuple[TensorVariable, Dict[TensorVariable, TensorVariable]]: + """Replace random variables in graphs + + This will *not* recompute test values. Parameters ========== graphs - The graphs in which random variables are to be replaced by their - measure variables. + The graphs in which random variables are to be replaced. Returns ======= Tuple containing the transformed graphs and a ``dict`` of the replacements that were made. """ - replace = {} - for anc in chain(rv_ancestors(graphs), graphs): + replacements = {} + if initial_replacements: + replacements.update(initial_replacements) - if not (anc.owner and isinstance(anc.owner.op, RandomVariable)): - continue + for var in rv_ancestors(graphs): + if var.owner and isinstance(var.owner.op, RandomVariable): + replacement_fn(var, replacements) - _, value_var = extract_rv_and_value_vars(anc) + if replacements: + graphs = clone_replace(graphs, replacements) - if value_var is not None: - replace[anc] = value_var + return graphs, replacements - if replace: - measure_graphs = clone_replace(graphs, replace=replace) - else: - measure_graphs = graphs - return measure_graphs, replace +def rvs_to_value_vars( + graphs: Iterable[TensorVariable], initial_replacements: Dict[TensorVariable, TensorVariable] +) -> Tuple[Iterable[TensorVariable], Dict[TensorVariable, TensorVariable]]: + """Replace random variables in graphs with their value variables. + + This will *not* recompute test values. + """ + + def value_var_replacements(var, replacements): + rv_var, rv_value_var = extract_rv_and_value_vars(var) + + if rv_value_var is not None: + replacements[var] = rv_value_var + + return replace_rvs_in_graphs(graphs, value_var_replacements, initial_replacements) + + +def apply_transforms( + graphs: Iterable[TensorVariable], +) -> Tuple[TensorVariable, Dict[TensorVariable, TensorVariable]]: + """Apply the transforms associated with each random variable in `graphs`. + + This will *not* recompute test values. + """ + + def transform_replacements(var, replacements): + rv_var, rv_value_var = extract_rv_and_value_vars(var) + + if rv_value_var is None: + return + + transform = getattr(rv_value_var.tag, "transform", None) + + if transform is None: + return + + trans_rv_value = transform.backward(rv_var, rv_value_var) + replacements[var] = trans_rv_value + + return replace_rvs_in_graphs(graphs, transform_replacements) def logpt( @@ -227,6 +269,8 @@ def logpt( jacobian: bool = True, scaling: bool = True, transformed: bool = True, + cdf: bool = False, + sum: bool = False, **kwargs, ) -> TensorVariable: """Create a measure-space (i.e. log-likelihood) graph for a random variable at a given point. @@ -241,78 +285,65 @@ def logpt( rv_var The `RandomVariable` output that determines the log-likelihood graph. rv_value - The input variable for the log-likelihood graph. If `rv_value` is - a transformed variable, its transformations will be applied. - If no value is provided, `rv_var.tag.value_var` will be checked and, - when available, used. + The variable that represents the value of `rv_var` in its + log-likelihood. If no value is provided, `rv_var.tag.value_var` will + be checked and, when available, used. jacobian Whether or not to include the Jacobian term. scaling A scaling term to apply to the generated log-likelihood graph. transformed Apply transforms. + cdf + Return the log cumulative distribution. + sum + Sum the log-likelihood. """ rv_var, rv_value_var = extract_rv_and_value_vars(rv_var) if rv_value is None: + + if rv_value_var is None: + raise ValueError(f"No value variable specified or associated with {rv_var}") + rv_value = rv_value_var else: rv_value = aet.as_tensor(rv_value) - if rv_value_var is None: - rv_value_var = rv_value + # Make sure that the value is compatible with the random variable + rv_value = rv_var.type.filter_variable(rv_value.astype(rv_var.dtype)) + + if rv_value_var is None: + rv_value_var = rv_value rv_node = rv_var.owner if not rv_node: - raise TypeError("rv_var must be the output of a RandomVariable Op") + return aet.zeros_like(rv_var) if not isinstance(rv_node.op, RandomVariable): + return _logp(rv_node.op, rv_value, rv_node.inputs) - # This will probably need another generic function... - if isinstance(rv_node.op, (Subtensor, AdvancedSubtensor, AdvancedSubtensor1)): - - raise NotImplementedError("Missing value support is incomplete") - - # # "Flatten" and sum an array of indexed RVs' log-likelihoods - # rv_var, missing_values = rv_node.inputs - # - # missing_values = missing_values.data - # logp_var = aet.sum( - # [ - # logpt( - # rv_var, - # ) - # for idx, missing in zip( - # np.ndindex(missing_values.shape), missing_values.flatten() - # ) - # if missing - # ] - # ) - # return logp_var - - return aet.zeros_like(rv_var) - - # This case should be reached when `rv_var` is the output of a - # `RandomVariable` `Op` rng, size, dtype, *dist_params = rv_node.inputs - dist_params, replacements = sample_to_measure_vars(dist_params) - - transform = getattr(rv_value_var.tag, "transform", None) + # Here, we plug the actual random variable into the log-likelihood graph, + # because we want a log-likelihood graph that only contains + # random variables. This is important, because a random variable's + # parameters can contain random variables themselves. + # Ultimately, with a graph containing only random variables and + # "deterministics", we can simply replace all the random variables with + # their value variables and be done. + if not cdf: + logp_var = _logp(rv_node.op, rv_var, *dist_params, **kwargs) + else: + logp_var = _logcdf(rv_node.op, rv_var, *dist_params, **kwargs) - # If any of the measure vars are transformed measure-space variables - # (signified by having a `transform` value in their tags), then we apply - # the their transforms and add their Jacobians (when enabled) - if transform and transformed: - logp_var = _logp(rv_node.op, transform.backward(rv_var, rv_value), *dist_params, **kwargs) + transform = getattr(rv_value_var.tag, "transform", None) if rv_value_var else None - logp_var = transform_logp( - logp_var, - tuple(replacements.values()), - ) + if transform and transformed and not cdf: + (logp_var,), _ = apply_transforms((logp_var,)) if jacobian: transformed_jacobian = transform.jacobian_det(rv_var, rv_value) @@ -320,45 +351,33 @@ def logpt( if logp_var.ndim > transformed_jacobian.ndim: logp_var = logp_var.sum(axis=-1) logp_var += transformed_jacobian - else: - logp_var = _logp(rv_node.op, rv_value, *dist_params, **kwargs) + + # Replace random variables with their value variables + (logp_var,), replaced = rvs_to_value_vars((logp_var,), {rv_var: rv_value}) + + if rv_value_var != rv_value: + (logp_var,) = clone_replace((logp_var,), replace={rv_value_var: rv_value}) + + if sum: + logp_var = aet.sum(logp_var) if scaling: logp_var *= _get_scaling( - getattr(rv_var.tag, "total_size", None), rv_value_var.shape, rv_value_var.ndim + getattr(rv_var.tag, "total_size", None), rv_value.shape, rv_value.ndim ) + # Recompute test values for the changes introduced by the replacements + # above. + if config.compute_test_value != "off": + for node in io_toposort(graph_inputs((logp_var,)), (logp_var,)): + compute_test_value(node) + if rv_var.name is not None: logp_var.name = "__logp_%s" % rv_var.name return logp_var -def transform_logp(logp_var: TensorVariable, inputs: List[TensorVariable]) -> TensorVariable: - """Transform the inputs of a log-likelihood graph.""" - trans_replacements = {} - for measure_var in inputs: - - transform = getattr(measure_var.tag, "transform", None) - rv_var = getattr(measure_var.tag, "rv_var", None) - - if transform is not None and rv_var is None: - warnings.warn( - f"A transform was found for {measure_var} but not a corresponding random variable" - ) - - if transform is None or rv_var is None: - continue - - trans_rv_value = transform.backward(rv_var, measure_var) - trans_replacements[measure_var] = trans_rv_value - - if trans_replacements: - (logp_var,) = clone_replace([logp_var], trans_replacements) - - return logp_var - - @singledispatch def _logp(op: Op, value: TensorVariable, *dist_params, **kwargs): """Create a log-likelihood graph. @@ -371,26 +390,35 @@ def _logp(op: Op, value: TensorVariable, *dist_params, **kwargs): return aet.zeros_like(value) -def logcdf( - rv_var: TensorVariable, rv_value: Optional[TensorVariable], jacobian: bool = True, **kwargs -): - """Create a log-CDF graph.""" - - rv_var, _ = extract_rv_and_value_vars(rv_var) - rv_node = rv_var.owner - - if not rv_node: - raise TypeError() +@_logp.register(Subtensor) +@_logp.register(AdvancedSubtensor) +@_logp.register(AdvancedSubtensor1) +def subtensor_logp(op, value, *inputs, **kwargs): - rv_value = aet.as_tensor(rv_value) - - rng, size, dtype, *dist_params = rv_node.inputs - - dist_params, replacements = sample_to_measure_vars(dist_params) - - logp_var = _logcdf(rv_node.op, rv_value, *dist_params, **kwargs) + # TODO: Compute the log-likelihood for a subtensor/index operation. + raise NotImplementedError() - return logp_var + # "Flatten" and sum an array of indexed RVs' log-likelihoods + # rv_var, missing_values = + # + # missing_values = missing_values.data + # logp_var = aet.sum( + # [ + # logpt( + # rv_var, + # ) + # for idx, missing in zip( + # np.ndindex(missing_values.shape), missing_values.flatten() + # ) + # if missing + # ] + # ) + # return logp_var + + +def logcdf(*args, **kwargs): + """Create a log-CDF graph.""" + return logpt(*args, cdf=True, **kwargs) @singledispatch @@ -405,16 +433,15 @@ def _logcdf(op, value, *args, **kwargs): raise NotImplementedError() -def logpt_sum(rv_var: TensorVariable, rv_value: Optional[TensorVariable] = None, **kwargs): +def logpt_sum(*args, **kwargs): """Return the sum of the logp values for the given observations. Subclasses can use this to improve the speed of logp evaluations if only the sum of the logp values is needed. """ - return aet.sum(logpt(rv_var, rv_value, **kwargs)) + return logpt(*args, sum=True, **kwargs) -from pymc3.distributions import shape_utils, timeseries, transforms from pymc3.distributions.bart import BART from pymc3.distributions.bound import Bound from pymc3.distributions.continuous import ( diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index 268d46c87be..2b7d48a6c15 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -26,7 +26,7 @@ from aesara.tensor.random.op import RandomVariable -from pymc3.distributions import _logcdf, _logp, logp_transform +from pymc3.distributions import _logcdf, _logp if TYPE_CHECKING: from typing import Optional, Callable @@ -111,12 +111,12 @@ def logp(op, value, *dist_params, **kwargs): def logcdf(op, value, *dist_params, **kwargs): return class_logcdf(value, *dist_params, **kwargs) - class_transform = clsdict.get("transform") - if class_transform: - - @logp_transform.register(rv_type) - def transform(op, *args, **kwargs): - return class_transform(*args, **kwargs) + # class_transform = clsdict.get("transform") + # if class_transform: + # + # @logp_transform.register(rv_type) + # def transform(op, *args, **kwargs): + # return class_transform(*args, **kwargs) # Register the Aesara `RandomVariable` type as a subclass of this # `Distribution` type. @@ -328,26 +328,17 @@ def _distr_parameters_for_repr(self): class Discrete(Distribution): """Base class for discrete distributions""" - def __init__(self, shape=(), dtype=None, defaults=("mode",), *args, **kwargs): - if dtype is None: - if aesara.config.floatX == "float32": - dtype = "int16" - else: - dtype = "int64" - if dtype != "int16" and dtype != "int64": - raise TypeError("Discrete classes expect dtype to be int16 or int64.") + def __new__(cls, name, *args, **kwargs): - super().__init__(shape, dtype, defaults=defaults, *args, **kwargs) + if kwargs.get("transform", None): + raise ValueError("Transformations for discrete distributions") + + return super().__new__(cls, name, *args, **kwargs) class Continuous(Distribution): """Base class for continuous distributions""" - def __init__(self, shape=(), dtype=None, defaults=("median", "mean", "mode"), *args, **kwargs): - if dtype is None: - dtype = aesara.config.floatX - super().__init__(shape, dtype, defaults=defaults, *args, **kwargs) - class DensityDist(Distribution): """Distribution based on a given log density function. diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index a11366423ba..fc2833f9fd1 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -387,7 +387,7 @@ class Dirichlet(Continuous): rv_op = dirichlet @classmethod - def dist(cls, a, **kwargs): + def dist(cls, a, transform=transforms.stick_breaking, **kwargs): a = aet.as_tensor_variable(a) # mean = a / aet.sum(a) @@ -418,15 +418,6 @@ def logp(value, a): broadcast_conditions=False, ) - def transform(rv_var): - - if rv_var.ndim == 1 or rv_var.broadcastable[-1]: - # If this variable is just a bunch of scalars/degenerate - # Dirichlets, we can't transform it - return None - - return transforms.stick_breaking - def _distr_parameters_for_repr(self): return ["a"] diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py index 98c7a15ebd4..7dbbc7afd09 100644 --- a/pymc3/distributions/transforms.py +++ b/pymc3/distributions/transforms.py @@ -218,7 +218,7 @@ def jacobian_det(self, rv_var, rv_value): s = aet.nnet.softplus(-rv_value) return aet.log(b - a) - 2 * s - rv_value else: - return rv_value + return aet.ones_like(rv_value) interval = Interval @@ -286,6 +286,11 @@ class StickBreaking(Transform): name = "stickbreaking" def forward(self, rv_var, rv_value): + if rv_var.ndim == 1 or rv_var.broadcastable[-1]: + # If this variable is just a bunch of scalars/degenerate + # Dirichlets, we can't transform it + return rv_value + x = rv_value.T n = x.shape[0] lx = aet.log(x) @@ -294,6 +299,11 @@ def forward(self, rv_var, rv_value): return floatX(y.T) def backward(self, rv_var, rv_value): + if rv_var.ndim == 1 or rv_var.broadcastable[-1]: + # If this variable is just a bunch of scalars/degenerate + # Dirichlets, we can't transform it + return rv_value + y = rv_value.T y = aet.concatenate([y, -aet.sum(y, 0, keepdims=True)]) # "softmax" with vector support and no deprication warning: @@ -302,6 +312,11 @@ def backward(self, rv_var, rv_value): return floatX(x.T) def jacobian_det(self, rv_var, rv_value): + if rv_var.ndim == 1 or rv_var.broadcastable[-1]: + # If this variable is just a bunch of scalars/degenerate + # Dirichlets, we can't transform it + return aet.ones_like(rv_value) + y = rv_value.T Km1 = y.shape[0] + 1 sy = aet.sum(y, 0, keepdims=True) diff --git a/pymc3/model.py b/pymc3/model.py index 9925a9948a8..1d27efec3e4 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -240,6 +240,8 @@ def build_named_node_tree(graphs): T = TypeVar("T", bound="ContextMeta") +no_transform_object = object() + class ContextMeta(type): """Functionality for objects that put themselves in a context using @@ -1047,7 +1049,9 @@ def add_coords(self, coords): else: self.coords[name] = coords[name] - def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, transform=None): + def register_rv( + self, rv_var, name, data=None, total_size=None, dims=None, transform=no_transform_object + ): """Register an (un)observed random variable with the model. Parameters @@ -1104,13 +1108,14 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, trans if aesara.config.compute_test_value != "off": value_var.tag.test_value = rv_var.tag.test_value - value_var.name = f"{rv_var.name}_value" + value_var.name = rv_var.name rv_var.tag.value_var = value_var # Make the value variable a transformed value variable, # if there's an applicable transform - transform = transform or logp_transform(rv_var.owner.op) + if transform is no_transform_object: + transform = logp_transform(rv_var.owner.op) if transform is not None: value_var.tag.transform = transform @@ -1118,10 +1123,6 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, trans if aesara.config.compute_test_value != "off": value_var.tag.test_value = transform.forward(rv_var, value_var).tag.test_value - # The transformed variable needs to be a named variable in the - # model, too - self.named_vars[value_var.name] = value_var - self.add_random_variable(rv_var, dims) return rv_var @@ -1173,7 +1174,7 @@ def __getitem__(self, key): except KeyError: raise e - def makefn(self, outs, mode=None, transformed=True, *args, **kwargs): + def makefn(self, outs, mode=None, *args, **kwargs): """Compiles a Aesara function which returns ``outs`` and takes the variable ancestors of ``outs`` as inputs. @@ -1187,11 +1188,8 @@ def makefn(self, outs, mode=None, transformed=True, *args, **kwargs): Compiled Aesara function """ with self: - vars = [ - v if not transformed else getattr(v.tag, "transformed_var", v) for v in self.vars - ] return aesara.function( - vars, + self.vars, outs, allow_input_downcast=True, on_unused_input="ignore", @@ -1324,7 +1322,10 @@ def check_test_point(self, test_point=None, round_vals=2): return Series( { - rv.name: np.round(self.fn(logpt_sum(rv))(test_point), round_vals) + rv.name: np.round( + self.fn(logpt_sum(rv, getattr(rv.tag, "observations", None)))(test_point), + round_vals, + ) for rv in self.basic_RVs }, name="Log-probability of test_point", diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 8d1b222e417..d35d3d248db 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1723,7 +1723,9 @@ def sample_posterior_predictive( return {} if not hasattr(_trace, "varnames"): - inputs_and_names = [(i, i.name) for i in rv_ancestors(vars_to_sample)] + inputs_and_names = [ + (rv, rv.name) for rv in rv_ancestors(vars_to_sample, walk_past_rvs=True) + ] inputs, input_names = zip(*inputs_and_names) else: input_names = _trace.varnames diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 990bb4cd567..15fd860981c 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -97,6 +97,7 @@ ZeroInflatedBinomial, ZeroInflatedNegativeBinomial, ZeroInflatedPoisson, + change_rv_size, continuous, logcdf, logpt, @@ -560,9 +561,9 @@ def test_hierarchical_logpt(): y = pm.Uniform("y", lower=0, upper=x) logpt_ancestors = list(ancestors([m.logpt])) - ancestors_with_owner = [a for a in logpt_ancestors if a.owner] - assert len(ancestors_with_owner) > 0 - assert not any(isinstance(v.owner.op, RandomVariable) for v in ancestors_with_owner) + ops = {a.owner.op for a in logpt_ancestors if a.owner} + assert len(ops) > 0 + assert not any(isinstance(o, RandomVariable) for o in ops) assert x.tag.value_var in logpt_ancestors assert y.tag.value_var in logpt_ancestors @@ -571,10 +572,13 @@ def test_hierarchical_obs_logpt(): obs = np.array([0.5, 0.4, 5, 2]) with pm.Model() as model: - x = pm.Normal("x", 0, 1, observed=obs) - pm.Normal("y", x, 1, observed=obs) + x = pm.Uniform("x", 0, 1, observed=obs) + pm.Uniform("y", x, 2, observed=obs) - model.logp(model.test_point) + logpt_ancestors = list(ancestors([model.logpt])) + ops = {a.owner.op for a in logpt_ancestors if a.owner} + assert len(ops) > 0 + assert not any(isinstance(o, RandomVariable) for o in ops) class TestMatchesScipy: @@ -737,9 +741,10 @@ def check_logcdf( params = dict(pt) scipy_cdf = scipy_logcdf(**params) value = params.pop("value") - dist = pymc3_dist.dist(**params) + with Model() as m: + dist = pymc3_dist("y", **params) params["value"] = value # for displaying in err_msg - with aesara.config.change_flags(mode=Mode("py")): + with aesara.config.change_flags(on_opt_error="raise", mode=Mode("py")): assert_almost_equal( logcdf(dist, value).eval(), scipy_cdf, @@ -804,14 +809,8 @@ def check_logcdf( ) # Test that method works with multiple values or raises informative TypeError - try: - with aesara.config.change_flags(mode=Mode("py")): - logcdf(valid_dist, np.array([valid_value, valid_value])).eval() - except TypeError as err: - if not str(err).endswith( - ".logcdf expects a scalar value but received a 1-dimensional object." - ): - raise + with pytest.raises(TypeError), aesara.config.change_flags(mode=Mode("py")): + logcdf(valid_dist, np.array([valid_value, valid_value])).eval() def check_selfconsistency_discrete_logcdf( self, distribution, domain, paramdomains, decimal=None, n_samples=100 @@ -828,10 +827,13 @@ def check_selfconsistency_discrete_logcdf( value = params.pop("value") values = np.arange(domain.lower, value + 1) dist = distribution.dist(**params) + # This only works for scalar random variables + assert dist.owner.op.ndim_supp == 0 + values_dist = change_rv_size(dist, values.shape) with aesara.config.change_flags(mode=Mode("py")): assert_almost_equal( logcdf(dist, value).eval(), - logsumexp(logpt(dist, values), keepdims=False).eval(), + logsumexp(logpt(values_dist, values), keepdims=False).eval(), decimal=decimal, err_msg=str(pt), ) @@ -864,8 +866,8 @@ def test_uniform(self): invalid_dist = Uniform.dist(lower=1, upper=0) with aesara.config.change_flags(mode=Mode("py")): - assert logpt(invalid_dist, 0.5).eval() == -np.inf - assert logcdf(invalid_dist, 2).eval() == -np.inf + assert logpt(invalid_dist, np.array(0.5)).eval() == -np.inf + assert logcdf(invalid_dist, np.array(2.0)).eval() == -np.inf @pytest.mark.xfail(reason="Distribution not refactored yet") def test_triangular(self): @@ -1475,13 +1477,22 @@ def test_beta_binomial(self): {"alpha": Rplus, "beta": Rplus, "n": NatSmall}, ) + @pytest.mark.xfail(reason="Bernoulli logit_p not refactored yet") + def test_bernoulli_logit_p(self): + self.check_logp( + Bernoulli, + Bool, + {"logit_p": R}, + lambda value, logit_p: sp.bernoulli.logpmf(value, scipy.special.expit(logit_p)), + ) + self.check_logcdf( + Bernoulli, + Bool, + {"logit_p": R}, + lambda value, logit_p: sp.bernoulli.logcdf(value, scipy.special.expit(logit_p)), + ) + def test_bernoulli(self): - # self.check_logp( - # Bernoulli, - # Bool, - # {"logit_p": R}, - # lambda value, logit_p: sp.bernoulli.logpmf(value, scipy.special.expit(logit_p)), - # ) self.check_logp( Bernoulli, Bool, @@ -1494,12 +1505,6 @@ def test_bernoulli(self): {"p": Unit}, lambda value, p: sp.bernoulli.logcdf(value, p), ) - # self.check_logcdf( - # Bernoulli, - # Bool, - # {"logit_p": R}, - # lambda value, logit_p: sp.bernoulli.logcdf(value, scipy.special.expit(logit_p)), - # ) self.check_selfconsistency_discrete_logcdf( Bernoulli, Bool, @@ -1904,23 +1909,24 @@ def test_multinomial(self, n): Multinomial, Vector(Nat, n), {"p": Simplex(n), "n": Nat}, multinomial_logpdf ) - # @pytest.mark.parametrize( - # "p,n", - # [ - # [[0.25, 0.25, 0.25, 0.25], 1], - # [[0.3, 0.6, 0.05, 0.05], 2], - # [[0.3, 0.6, 0.05, 0.05], 10], - # ], - # ) - # def test_multinomial_mode(self, p, n): - # _p = np.array(p) - # with Model() as model: - # m = Multinomial("m", n, _p, _p.shape) - # assert_allclose(m.distribution.mode.eval().sum(), n) - # _p = np.array([p, p]) - # with Model() as model: - # m = Multinomial("m", n, _p, _p.shape) - # assert_allclose(m.distribution.mode.eval().sum(axis=-1), n) + @pytest.mark.skip(reason="Moment calculations have not been refactored yet") + @pytest.mark.parametrize( + "p,n", + [ + [[0.25, 0.25, 0.25, 0.25], 1], + [[0.3, 0.6, 0.05, 0.05], 2], + [[0.3, 0.6, 0.05, 0.05], 10], + ], + ) + def test_multinomial_mode(self, p, n): + _p = np.array(p) + with Model() as model: + m = Multinomial("m", n, _p, _p.shape) + assert_allclose(m.distribution.mode.eval().sum(), n) + _p = np.array([p, p]) + with Model() as model: + m = Multinomial("m", n, _p, _p.shape) + assert_allclose(m.distribution.mode.eval().sum(axis=-1), n) @pytest.mark.parametrize( "p, size, n", @@ -1949,12 +1955,13 @@ def test_multinomial_random(self, p, size, n): assert m.eval().shape == size + p.shape - # def test_multinomial_mode_with_shape(self): - # n = [1, 10] - # p = np.asarray([[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]]) - # with Model() as model: - # m = Multinomial("m", n=n, p=p, size=(2, 4)) - # assert_allclose(m.distribution.mode.eval().sum(axis=-1), n) + @pytest.mark.skip(reason="Moment calculations have not been refactored yet") + def test_multinomial_mode_with_shape(self): + n = [1, 10] + p = np.asarray([[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]]) + with Model() as model: + m = Multinomial("m", n=n, p=p, size=(2, 4)) + assert_allclose(m.distribution.mode.eval().sum(axis=-1), n) def test_multinomial_vec(self): vals = np.array([[2, 4, 4], [3, 3, 4]]) @@ -2198,12 +2205,14 @@ def test_batch_dirichlet_multinomial(self): sample = dist.random(size=2) assert_allclose(sample, np.stack([vals, vals], axis=0)) + @aesara.config.change_flags(compute_test_value="raise") def test_categorical_bounds(self): with Model(): x = Categorical("x", p=np.array([0.2, 0.3, 0.5])) assert np.isinf(logpt(x, -1).tag.test_value) assert np.isinf(logpt(x, 3).tag.test_value) + @aesara.config.change_flags(compute_test_value="raise") def test_categorical_valid_p(self): with Model(): x = Categorical("x", p=np.array([-0.2, 0.3, 0.5])) @@ -2670,11 +2679,7 @@ def test_str(self): assert str_repr in model_str -@pytest.mark.xfail(reason="Distribution not refactored yet") def test_discrete_trafo(): - with pytest.raises(ValueError) as err: - Binomial.dist(n=5, p=0.5, transform="log") - err.match("Transformations for discrete distributions") with Model(): with pytest.raises(ValueError) as err: Binomial("a", n=5, p=0.5, transform="log") From 58b2f11c02700d300739f46c0128719eb45d1110 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 22 Mar 2021 00:12:47 -0500 Subject: [PATCH 08/10] Disable use of Arviz in pymc3.tests.test_data_container --- pymc3/tests/test_data_container.py | 61 ++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py index c0fe7e82f30..353a31a64aa 100644 --- a/pymc3/tests/test_data_container.py +++ b/pymc3/tests/test_data_container.py @@ -70,7 +70,13 @@ def test_sample_posterior_predictive_after_set_data(self): y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y) - trace = pm.sample(1000, tune=1000, chains=1) + trace = pm.sample( + 1000, + tune=1000, + chains=1, + return_inferencedata=False, + compute_convergence_checks=False, + ) # Predict on new data. with model: x_test = [5, 6, 9] @@ -86,13 +92,27 @@ def test_sample_after_set_data(self): y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y) - pm.sample(1000, init=None, tune=1000, chains=1) + pm.sample( + 1000, + init=None, + tune=1000, + chains=1, + return_inferencedata=False, + compute_convergence_checks=False, + ) # Predict on new data. new_x = [5.0, 6.0, 9.0] new_y = [5.0, 6.0, 9.0] with model: pm.set_data(new_data={"x": new_x, "y": new_y}) - new_trace = pm.sample(1000, init=None, tune=1000, chains=1) + new_trace = pm.sample( + 1000, + init=None, + tune=1000, + chains=1, + return_inferencedata=False, + compute_convergence_checks=False, + ) pp_trace = pm.sample_posterior_predictive(new_trace, 1000) assert pp_trace["obs"].shape == (1000, 3) @@ -110,7 +130,14 @@ def test_shared_data_as_index(self): pm.Normal("obs", alpha[index], np.sqrt(1e-2), observed=y) prior_trace = pm.sample_prior_predictive(1000, var_names=["alpha"]) - trace = pm.sample(1000, init=None, tune=1000, chains=1) + trace = pm.sample( + 1000, + init=None, + tune=1000, + chains=1, + return_inferencedata=False, + compute_convergence_checks=False, + ) # Predict on new data new_index = np.array([0, 1, 2]) @@ -132,14 +159,18 @@ def test_shared_data_as_rv_input(self): with pm.Model() as m: x = pm.Data("x", [1.0, 2.0, 3.0]) _ = pm.Normal("y", mu=x, size=3) - trace = pm.sample(chains=1) + trace = pm.sample( + chains=1, return_inferencedata=False, compute_convergence_checks=False + ) np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]), x.get_value(), atol=1e-1) np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]), trace["y"].mean(0), atol=1e-1) with m: pm.set_data({"x": np.array([2.0, 4.0, 6.0])}) - trace = pm.sample(chains=1) + trace = pm.sample( + chains=1, return_inferencedata=False, compute_convergence_checks=False + ) np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]), x.get_value(), atol=1e-1) np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]), trace["y"].mean(0), atol=1e-1) @@ -175,7 +206,14 @@ def test_set_data_to_non_data_container_variables(self): y = np.array([1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y) - pm.sample(1000, init=None, tune=1000, chains=1) + pm.sample( + 1000, + init=None, + tune=1000, + chains=1, + return_inferencedata=False, + compute_convergence_checks=False, + ) with pytest.raises(TypeError) as error: pm.set_data({"beta": [1.1, 2.2, 3.3]}, model=model) error.match("defined as `pymc3.Data` inside the model") @@ -188,7 +226,14 @@ def test_model_to_graphviz_for_model_with_data_container(self): beta = pm.Normal("beta", 0, 10.0) obs_sigma = floatX(np.sqrt(1e-2)) pm.Normal("obs", beta * x, obs_sigma, observed=y) - pm.sample(1000, init=None, tune=1000, chains=1) + pm.sample( + 1000, + init=None, + tune=1000, + chains=1, + return_inferencedata=False, + compute_convergence_checks=False, + ) for formatting in {"latex", "latex_with_params"}: with pytest.raises(ValueError, match="Unsupported formatting"): From 80d189fc23406bba1ef411526f7709d942da6b80 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 22 Mar 2021 00:35:59 -0500 Subject: [PATCH 09/10] Set model seed correctly in pymc3.tests.test_ndarray_backend --- pymc3/tests/test_ndarray_backend.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pymc3/tests/test_ndarray_backend.py b/pymc3/tests/test_ndarray_backend.py index 75e027d2443..b4744373ed1 100644 --- a/pymc3/tests/test_ndarray_backend.py +++ b/pymc3/tests/test_ndarray_backend.py @@ -267,14 +267,12 @@ def test_sample_posterior_predictive(self, tmpdir_factory): assert save_dir == directory - seed = 10 - np.random.seed(seed) - with TestSaveLoad.model(): + with TestSaveLoad.model() as model: + model.default_rng.get_value(borrow=True).seed(10) ppc = pm.sample_posterior_predictive(self.trace) - seed = 10 - np.random.seed(seed) - with TestSaveLoad.model(): + with TestSaveLoad.model() as model: + model.default_rng.get_value(borrow=True).seed(10) trace2 = pm.load_trace(directory) ppc2 = pm.sample_posterior_predictive(trace2) ppc2f = pm.sample_posterior_predictive(trace2) From 049c5f8c098eebd462a39c55de554f08ce62f35b Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 22 Mar 2021 19:32:01 -0500 Subject: [PATCH 10/10] Prevent Model from turning on test value computations --- pymc3/model.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/pymc3/model.py b/pymc3/model.py index 1d27efec3e4..d6f3e3f82b7 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -809,10 +809,7 @@ def __new__(cls, *args, **kwargs): instance._parent = kwargs.get("model") else: instance._parent = cls.get_context(error_if_none=False) - aesara_config = kwargs.get("aesara_config", None) - if aesara_config is None or "compute_test_value" not in aesara_config: - aesara_config = {"compute_test_value": "ignore"} - instance._aesara_config = aesara_config + instance._aesara_config = kwargs.get("aesara_config", {}) return instance def __init__(self, name="", model=None, aesara_config=None, coords=None, check_bounds=True): @@ -1007,7 +1004,20 @@ def independent_vars(self): @property def test_point(self): """Test point used to check that the model doesn't generate errors""" - return Point(((var, var.tag.test_value) for var in self.vars), model=self) + points = [] + for var in self.free_RVs: + var_value = getattr(var.tag, "test_value", None) + + if var_value is None: + try: + var_value = var.eval() + var.tag.test_value = var_value + except Exception: + raise Exception(f"Couldn't generate an initial value for {var}") + + points.append((getattr(var.tag, "value_var", var), var_value)) + + return Point(points, model=self) @property def disc_vars(self): @@ -1594,11 +1604,11 @@ def make_obs_var(rv_var: TensorVariable, data: Union[np.ndarray]) -> TensorVaria else: new_size = data.shape - test_value = getattr(rv_var.tag, "test_value", None) - rv_var = change_rv_size(rv_var, new_size) if aesara.config.compute_test_value != "off": + test_value = getattr(rv_var.tag, "test_value", None) + if test_value is not None: # We try to reuse the old test value rv_var.tag.test_value = np.broadcast_to(test_value, rv_var.tag.test_value.shape)