diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 58b4ae09478..fdad4344ff9 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -14,7 +14,7 @@ from . import transforms from .dist_math import bound, logpow, gammaln, betaln, std_cdf, i0, i1 -from .distribution import Continuous, draw_values, generate_samples +from .distribution import Univariate, Continuous, draw_values, generate_samples __all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace', 'StudentT', 'Cauchy', 'HalfCauchy', 'Gamma', 'Weibull', @@ -23,20 +23,25 @@ 'VonMises', 'SkewNormal'] -class PositiveContinuous(Continuous): - """Base class for positive continuous distributions""" +class PositiveUnivariateContinuous(Univariate, Continuous): + """Base class for positive univariate continuous distributions""" - def __init__(self, transform=transforms.log, *args, **kwargs): - super(PositiveContinuous, self).__init__( - transform=transform, *args, **kwargs) + def __init__(self, dist_params, *args, **kwargs): + transform = kwargs.get('transform', transforms.log) + super(PositiveUnivariateContinuous, self).__init__(dist_params, + transform=transform, + *args, **kwargs) -class UnitContinuous(Continuous): - """Base class for continuous distributions on [0,1]""" +class UnitUnivariateContinuous(Univariate, Continuous): + """Base class for univariate continuous distributions in [0,1]""" + + def __init__(self, dist_params, *args, **kwargs): + transform = kwargs.get('transform', transforms.logodds) + super(UnitUnivariateContinuous, self).__init__(dist_params, + transform=transform, + *args, **kwargs) - def __init__(self, transform=transforms.logodds, *args, **kwargs): - super(UnitContinuous, self).__init__( - transform=transform, *args, **kwargs) def assert_negative_support(var, label, distname, value=-1e-6): # Checks for evidence of positive support for a variable @@ -44,8 +49,7 @@ def assert_negative_support(var, label, distname, value=-1e-6): return try: # Transformed distribution - support = np.isfinite(var.transformed.distribution.dist - .logp(value).tag.test_value) + support = np.isfinite(var.transformed.distribution.dist.logp(value).tag.test_value) except AttributeError: try: # Untransformed distribution @@ -61,7 +65,7 @@ def assert_negative_support(var, label, distname, value=-1e-6): def get_tau_sd(tau=None, sd=None): - """ + r""" Find precision and standard deviation .. math:: @@ -98,10 +102,10 @@ def get_tau_sd(tau=None, sd=None): tau = 1. * tau sd = 1. * sd - return (tau, sd) + return (tt.as_tensor_variable(tau), tt.as_tensor_variable(sd)) -class Uniform(Continuous): +class Uniform(Univariate, Continuous): R""" Continuous uniform log-likelihood. @@ -123,15 +127,18 @@ class Uniform(Continuous): Upper limit. """ - def __init__(self, lower=0, upper=1, transform='interval', - *args, **kwargs): - super(Uniform, self).__init__(*args, **kwargs) + def __init__(self, lower=0, upper=1, transform='interval', *args, **kwargs): + lower = tt.as_tensor_variable(lower) + upper = tt.as_tensor_variable(upper) self.lower = lower self.upper = upper self.mean = (upper + lower) / 2. self.median = self.mean + dist_params = (self.lower, self.upper) + super(Uniform, self).__init__(dist_params, *args, **kwargs) + if transform == 'interval': self.transform = transforms.interval(lower, upper) @@ -147,18 +154,21 @@ def logp(self, value): lower = self.lower upper = self.upper return bound(-tt.log(upper - lower), - value >= lower, value <= upper) + value >= lower, value <= upper) -class Flat(Continuous): +class Flat(Univariate, Continuous): """ Uninformative log-likelihood that returns 0 regardless of the passed value. """ def __init__(self, *args, **kwargs): - super(Flat, self).__init__(*args, **kwargs) - self.median = 0 + + self.median = tt.as_tensor_variable(0.) + dist_params = (self.median,) + + super(Flat, self).__init__(dist_params, *args, **kwargs) def random(self, point=None, size=None, repeat=None): raise ValueError('Cannot sample from Flat distribution') @@ -167,7 +177,7 @@ def logp(self, value): return tt.zeros_like(value) -class Normal(Continuous): +class Normal(Univariate, Continuous): R""" Univariate normal log-likelihood. @@ -201,28 +211,19 @@ class Normal(Continuous): Precision (tau > 0). """ - def __init__(self, *args, **kwargs): + def __init__(self, mu=0.0, sd=None, tau=None, *args, **kwargs): + # FIXME In order to catch the case where Normal('x', 0, .1) is # called to display a warning we have to fetch the args and # kwargs manually. After a certain period we should revert # back to the old calling signature. - if len(args) == 1: - mu = args[0] - sd = kwargs.pop('sd', None) - tau = kwargs.pop('tau', None) - elif len(args) == 2: - warnings.warn(('The order of positional arguments to Normal()' - 'has changed. The new signature is:' - 'Normal(name, mu, sd) instead of Normal(name, mu, tau).'), - DeprecationWarning) - mu, sd = args - tau = kwargs.pop('tau', None) - else: - mu = kwargs.pop('mu', 0.) - sd = kwargs.pop('sd', None) - tau = kwargs.pop('tau', None) + warnings.warn(('The order of positional arguments to Normal()' + 'has changed. The new signature is:' + 'Normal(name, mu, sd) instead of Normal(name, mu, tau).'), + DeprecationWarning) + mu = tt.as_tensor_variable(mu) self.mean = self.median = self.mode = self.mu = mu self.tau, self.sd = get_tau_sd(tau=tau, sd=sd) self.variance = 1. / self.tau @@ -230,7 +231,9 @@ def __init__(self, *args, **kwargs): assert_negative_support(sd, 'sd', 'Normal') assert_negative_support(tau, 'tau', 'Normal') - super(Normal, self).__init__(**kwargs) + dist_params = (self.mu, self.tau) + + super(Normal, self).__init__(dist_params, *args, **kwargs) def random(self, point=None, size=None, repeat=None): mu, tau, sd = draw_values([self.mu, self.tau, self.sd], @@ -247,7 +250,7 @@ def logp(self, value): sd > 0) -class HalfNormal(PositiveContinuous): +class HalfNormal(PositiveUnivariateContinuous): R""" Half-normal log-likelihood. @@ -271,8 +274,8 @@ class HalfNormal(PositiveContinuous): Precision (tau > 0). """ - def __init__(self, sd=None, tau=None, *args, **kwargs): - super(HalfNormal, self).__init__(*args, **kwargs) + def __init__(self, tau=None, sd=None, *args, **kwargs): + self.tau, self.sd = get_tau_sd(tau=tau, sd=sd) self.mean = tt.sqrt(2 / (np.pi * self.tau)) self.variance = (1. - 2 / np.pi) / self.tau @@ -280,6 +283,10 @@ def __init__(self, sd=None, tau=None, *args, **kwargs): assert_negative_support(tau, 'tau', 'HalfNormal') assert_negative_support(sd, 'sd', 'HalfNormal') + dist_params = (self.tau,) + + super(HalfNormal, self).__init__(dist_params, *args, **kwargs) + def random(self, point=None, size=None, repeat=None): sd = draw_values([self.sd], point=point) return generate_samples(stats.halfnorm.rvs, loc=0., scale=sd, @@ -294,7 +301,7 @@ def logp(self, value): tau > 0, sd > 0) -class Wald(PositiveContinuous): +class Wald(PositiveUnivariateContinuous): R""" Wald log-likelihood. @@ -351,7 +358,7 @@ class Wald(PositiveContinuous): """ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs): - super(Wald, self).__init__(*args, **kwargs) + self.mu, self.lam, self.phi = self.get_mu_lam_phi(mu, lam, phi) self.alpha = alpha self.mean = self.mu + alpha @@ -363,22 +370,29 @@ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs): assert_negative_support(mu, 'mu', 'Wald') assert_negative_support(lam, 'lam', 'Wald') + dist_params = (self.mu, self.lam, self.alpha) + + super(Wald, self).__init__(dist_params, *args, **kwargs) + def get_mu_lam_phi(self, mu, lam, phi): + res = None if mu is None: if lam is not None and phi is not None: - return lam / phi, lam, phi + res = (lam / phi, lam, phi) else: if lam is None: if phi is None: - return mu, 1., 1. / mu + res = (mu, 1., 1. / mu) else: - return mu, mu * phi, phi + res = (mu, mu * phi, phi) else: if phi is None: - return mu, lam, lam / mu + res = (mu, lam, lam / mu) - raise ValueError('Wald distribution must specify either mu only, ' - 'mu and lam, mu and phi, or lam and phi.') + if res is None: + raise ValueError('Wald distribution must specify either mu only, ' + 'mu and lam, mu and phi, or lam and phi.') + return map(tt.as_tensor_variable, res) def _random(self, mu, lam, alpha, size=None): v = np.random.normal(size=size)**2 @@ -402,16 +416,15 @@ def logp(self, value): lam = self.lam alpha = self.alpha # value *must* be iid. Otherwise this is wrong. - return bound(logpow(lam / (2. * np.pi), 0.5) - - logpow(value - alpha, 1.5) - - (0.5 * lam / (value - alpha) - * ((value - alpha - mu) / mu)**2), + return bound(logpow(lam / (2. * np.pi), 0.5) - + logpow(value - alpha, 1.5) - + (0.5 * lam / (value - alpha) * ((value - alpha - mu) / + mu)**2), # XXX these two are redundant. Please, check. - value > 0, value - alpha > 0, - mu > 0, lam > 0, alpha >= 0) + value > 0, value - alpha > 0, mu > 0, lam > 0, alpha >= 0) -class Beta(UnitContinuous): +class Beta(UnitUnivariateContinuous): R""" Beta log-likelihood. @@ -454,10 +467,7 @@ class Beta(UnitContinuous): the binomial distribution. """ - def __init__(self, alpha=None, beta=None, mu=None, sd=None, - *args, **kwargs): - super(Beta, self).__init__(*args, **kwargs) - + def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs): alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd) self.alpha = alpha self.beta = beta @@ -468,6 +478,10 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None, assert_negative_support(alpha, 'alpha', 'Beta') assert_negative_support(beta, 'beta', 'Beta') + dist_params = (self.alpha, self.beta) + + super(Beta, self).__init__(dist_params, *args, **kwargs) + def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None): if (alpha is not None) and (beta is not None): pass @@ -479,7 +493,7 @@ def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None): raise ValueError('Incompatible parameterization. Either use alpha ' 'and beta, or mu and sd to specify distribution.') - return alpha, beta + return tt.as_tensor_variable(alpha), tt.as_tensor_variable(beta) def random(self, point=None, size=None, repeat=None): alpha, beta = draw_values([self.alpha, self.beta], @@ -492,13 +506,12 @@ def logp(self, value): alpha = self.alpha beta = self.beta - return bound(logpow(value, alpha - 1) + logpow(1 - value, beta - 1) - - betaln(alpha, beta), - value >= 0, value <= 1, + return bound(logpow(value, alpha - 1) + logpow(1 - value, beta - 1) - + betaln(alpha, beta), value >= 0, value <= 1, alpha > 0, beta > 0) -class Exponential(PositiveContinuous): +class Exponential(PositiveUnivariateContinuous): R""" Exponential log-likelihood. @@ -519,15 +532,16 @@ class Exponential(PositiveContinuous): """ def __init__(self, lam, *args, **kwargs): - super(Exponential, self).__init__(*args, **kwargs) self.lam = lam self.mean = 1. / lam self.median = self.mean * tt.log(2) self.mode = 0 + dist_params = (self.lam,) + self.variance = lam**-2 - assert_negative_support(lam, 'lam', 'Exponential') + super(Exponential, self).__init__(dist_params, *args, **kwargs) def random(self, point=None, size=None, repeat=None): lam = draw_values([self.lam], point=point) @@ -540,7 +554,7 @@ def logp(self, value): return bound(tt.log(lam) - lam * value, value > 0, lam > 0) -class Laplace(Continuous): +class Laplace(Univariate, Continuous): R""" Laplace log-likelihood. @@ -564,14 +578,17 @@ class Laplace(Continuous): """ def __init__(self, mu, b, *args, **kwargs): - super(Laplace, self).__init__(*args, **kwargs) - self.b = b - self.mean = self.median = self.mode = self.mu = mu - + self.b = tt.as_tensor_variable(b) + self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable( + mu) self.variance = 2 * b**2 assert_negative_support(b, 'b', 'Laplace') + dist_params = (self.b, self.mu) + + super(Laplace, self).__init__(dist_params, *args, **kwargs) + def random(self, point=None, size=None, repeat=None): mu, b = draw_values([self.mu, self.b], point=point) return generate_samples(np.random.laplace, mu, b, @@ -585,7 +602,7 @@ def logp(self, value): return -tt.log(2 * b) - abs(value - mu) / b -class Lognormal(PositiveContinuous): +class Lognormal(PositiveUnivariateContinuous): R""" Log-normal log-likelihood. @@ -614,20 +631,22 @@ class Lognormal(PositiveContinuous): Scale parameter (tau > 0). """ - def __init__(self, mu=0, sd=None, tau=None, *args, **kwargs): - super(Lognormal, self).__init__(*args, **kwargs) - - self.mu = mu - self.tau, self.sd = get_tau_sd(tau=tau, sd=sd) - - self.mean = tt.exp(mu + 1. / (2 * self.tau)) - self.median = tt.exp(mu) - self.mode = tt.exp(mu - 1. / self.tau) - self.variance = (tt.exp(1. / self.tau) - 1) * tt.exp(2 * mu + 1. / self.tau) + def __init__(self, mu=0, tau=1, *args, + **kwargs): + self.mu = tt.as_tensor_variable(mu) + self.tau = tt.as_tensor_variable(tau) + self.mean = tt.exp(self.mu + 1. / (2. * self.tau)) + self.median = tt.exp(self.mu) + self.mode = tt.exp(self.mu - 1. / self.tau) + self.variance = (tt.exp(1. / self.tau) - 1.) * tt.exp(2 * self.mu + 1. / self.tau) assert_negative_support(tau, 'tau', 'Lognormal') assert_negative_support(sd, 'sd', 'Lognormal') + dist_params = (self.mu, self.tau) + + super(Lognormal, self).__init__(dist_params, *args, **kwargs) + def _random(self, mu, tau, size=None): samples = np.random.normal(size=size) return np.exp(mu + (tau**-0.5) * samples) @@ -647,8 +666,8 @@ def logp(self, value): tau > 0) -class StudentT(Continuous): - R""" +class StudentT(Univariate, Continuous): + r""" Non-central Student's T log-likelihood. Describes a normal variable whose precision is gamma distributed. @@ -675,9 +694,8 @@ class StudentT(Continuous): lam : float Scale parameter (lam > 0). """ - - def __init__(self, nu, mu=0, lam=None, sd=None, *args, **kwargs): - super(StudentT, self).__init__(*args, **kwargs) + def __init__(self, nu, mu=0, lam=None, sd=None, ndim=None, size=None, + dtype=None, *args, **kwargs): self.nu = nu = tt.as_tensor_variable(nu) self.lam, self.sd = get_tau_sd(tau=lam, sd=sd) self.mean = self.median = self.mode = self.mu = mu @@ -689,6 +707,9 @@ def __init__(self, nu, mu=0, lam=None, sd=None, *args, **kwargs): assert_negative_support(lam, 'lam (sd)', 'StudentT') assert_negative_support(nu, 'nu', 'StudentT') + dist_params = (self.nu, self.mu, self.lam) + super(StudentT, self).__init__(dist_params, *args, **kwargs) + def random(self, point=None, size=None, repeat=None): nu, mu, lam = draw_values([self.nu, self.mu, self.lam], point=point) @@ -709,7 +730,7 @@ def logp(self, value): lam > 0, nu > 0, sd > 0) -class Pareto(PositiveContinuous): +class Pareto(PositiveUnivariateContinuous): R""" Pareto log-likelihood. @@ -736,20 +757,21 @@ class Pareto(PositiveContinuous): """ def __init__(self, alpha, m, *args, **kwargs): - super(Pareto, self).__init__(*args, **kwargs) - self.alpha = alpha - self.m = m - self.mean = tt.switch(tt.gt(alpha, 1), alpha * - m / (alpha - 1.), np.inf) - self.median = m * 2.**(1. / alpha) + self.alpha = tt.as_tensor_variable(alpha) + self.m = tt.as_tensor_variable(m) + self.mean = tt.switch(tt.gt(self.alpha, 1), self.alpha * self.m / (self.alpha - 1.), np.inf) + self.median = self.m * 2.**(1. / self.alpha) self.variance = tt.switch( - tt.gt(alpha, 2), - (alpha * m**2) / ((alpha - 2.) * (alpha - 1.)**2), + tt.gt(self.alpha, 2), + (self.alpha * self.m**2) / ((self.alpha - 2.) * (self.alpha - 1.)**2), np.inf) assert_negative_support(alpha, 'alpha', 'Pareto') assert_negative_support(m, 'm', 'Pareto') + dist_params = (self.alpha, self.m) + + super(Pareto, self).__init__(dist_params, *args, **kwargs) def _random(self, alpha, m, size=None): u = np.random.uniform(size=size) @@ -770,7 +792,7 @@ def logp(self, value): value >= m, alpha > 0, m > 0) -class Cauchy(Continuous): +class Cauchy(Univariate, Continuous): R""" Cauchy log-likelihood. @@ -797,9 +819,12 @@ class Cauchy(Continuous): """ def __init__(self, alpha, beta, *args, **kwargs): - super(Cauchy, self).__init__(*args, **kwargs) - self.median = self.mode = self.alpha = alpha - self.beta = beta + self.median = self.mode = self.alpha = tt.as_tensor_variable(alpha) + self.beta = tt.as_tensor_variable(beta) + + dist_params = (self.alpha, self.beta) + + super(Cauchy, self).__init__(dist_params, *args, **kwargs) assert_negative_support(beta, 'beta', 'Cauchy') @@ -822,7 +847,7 @@ def logp(self, value): beta > 0) -class HalfCauchy(PositiveContinuous): +class HalfCauchy(PositiveUnivariateContinuous): R""" Half-Cauchy log-likelihood. @@ -844,10 +869,13 @@ class HalfCauchy(PositiveContinuous): """ def __init__(self, beta, *args, **kwargs): - super(HalfCauchy, self).__init__(*args, **kwargs) - self.mode = 0 + self.mode = tt.as_tensor_variable(0.) + self.beta = tt.as_tensor_variable(beta) self.median = beta - self.beta = beta + + dist_params = (self.beta,) + + super(HalfCauchy, self).__init__(dist_params, *args, **kwargs) assert_negative_support(beta, 'beta', 'HalfCauchy') @@ -868,7 +896,7 @@ def logp(self, value): value >= 0, beta > 0) -class Gamma(PositiveContinuous): +class Gamma(PositiveUnivariateContinuous): R""" Gamma log-likelihood. @@ -907,19 +935,21 @@ class Gamma(PositiveContinuous): Alternative scale parameter (sd > 0). """ - def __init__(self, alpha=None, beta=None, mu=None, sd=None, - *args, **kwargs): - super(Gamma, self).__init__(*args, **kwargs) + def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs): alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd) self.alpha = alpha self.beta = beta - self.mean = alpha / beta - self.mode = tt.maximum((alpha - 1) / beta, 0) - self.variance = alpha / beta**2 + self.mean = self.alpha / self.beta + self.mode = tt.maximum((self.alpha - 1) / self.beta, 0) + self.variance = self.alpha / self.beta**2 assert_negative_support(alpha, 'alpha', 'Gamma') assert_negative_support(beta, 'beta', 'Gamma') + dist_params = (self.alpha, self.beta) + + super(Gamma, self).__init__(dist_params, *args, **kwargs) + def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None): if (alpha is not None) and (beta is not None): pass @@ -931,7 +961,7 @@ def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None): 'alpha and beta, or mu and sd to specify ' 'distribution.') - return alpha, beta + return tt.as_tensor_variable(alpha), tt.as_tensor_variable(beta) def random(self, point=None, size=None, repeat=None): alpha, beta = draw_values([self.alpha, self.beta], @@ -952,7 +982,7 @@ def logp(self, value): beta > 0) -class InverseGamma(PositiveContinuous): +class InverseGamma(PositiveUnivariateContinuous): R""" Inverse gamma log-likelihood, the reciprocal of the gamma distribution. @@ -976,12 +1006,10 @@ class InverseGamma(PositiveContinuous): beta : float Scale parameter (beta > 0). """ - - def __init__(self, alpha, beta=1, *args, **kwargs): - super(InverseGamma, self).__init__(*args, **kwargs) - self.alpha = alpha - self.beta = beta - self.mean = self._calculate_mean() + def __init__(self, alpha, beta=1., *args, **kwargs): + self.alpha = tt.as_tensor_variable(alpha) + self.beta = tt.as_tensor_variable(beta) + self.mean = (alpha > 1) * beta / (alpha - 1.) or np.inf self.mode = beta / (alpha + 1.) self.variance = tt.switch(tt.gt(alpha, 2), (beta**2) / (alpha * (alpha - 1.)**2), @@ -989,6 +1017,10 @@ def __init__(self, alpha, beta=1, *args, **kwargs): assert_negative_support(alpha, 'alpha', 'InverseGamma') assert_negative_support(beta, 'beta', 'InverseGamma') + dist_params = (self.alpha, self.beta) + + super(InverseGamma, self).__init__(dist_params, *args, **kwargs) + def _calculate_mean(self): m = self.beta / (self.alpha - 1.) try: @@ -1007,8 +1039,8 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): alpha = self.alpha beta = self.beta - return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value - + logpow(value, -alpha - 1), + return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value + + logpow(value, -alpha - 1), value > 0, alpha > 0, beta > 0) @@ -1033,12 +1065,13 @@ class ChiSquared(Gamma): """ def __init__(self, nu, *args, **kwargs): - self.nu = nu - super(ChiSquared, self).__init__(alpha=nu / 2., beta=0.5, + self.nu = tt.as_tensor_variable(nu) + super(ChiSquared, self).__init__(alpha=self.nu / 2., + beta=tt.as_tensor_variable(0.5), *args, **kwargs) -class Weibull(PositiveContinuous): +class Weibull(PositiveUnivariateContinuous): R""" Weibull log-likelihood. @@ -1061,11 +1094,9 @@ class Weibull(PositiveContinuous): beta : float Scale parameter (beta > 0). """ - def __init__(self, alpha, beta, *args, **kwargs): - super(Weibull, self).__init__(*args, **kwargs) - self.alpha = alpha - self.beta = beta + self.alpha = tt.as_tensor_variable(alpha) + self.beta = tt.as_tensor_variable(beta) self.mean = beta * tt.exp(gammaln(1 + 1. / alpha)) self.median = beta * tt.exp(gammaln(tt.log(2)))**(1. / alpha) self.variance = (beta**2) * \ @@ -1074,6 +1105,10 @@ def __init__(self, alpha, beta, *args, **kwargs): assert_negative_support(alpha, 'alpha', 'Weibull') assert_negative_support(beta, 'beta', 'Weibull') + dist_params = (self.alpha, self.beta) + + super(Weibull, self).__init__(dist_params, *args, **kwargs) + def random(self, point=None, size=None, repeat=None): alpha, beta = draw_values([self.alpha, self.beta], point=point) @@ -1094,7 +1129,7 @@ def logp(self, value): value >= 0, alpha > 0, beta > 0) -class Bounded(Continuous): +class Bounded(Univariate, Continuous): R""" An upper, lower or upper+lower bounded distribution @@ -1204,13 +1239,14 @@ def dist(cls, *args, **kwargs): def StudentTpos(*args, **kwargs): warnings.warn("StudentTpos has been deprecated. In future, use HalfStudentT instead.", - DeprecationWarning) + DeprecationWarning) return HalfStudentT(*args, **kwargs) + HalfStudentT = Bound(StudentT, lower=0) -class ExGaussian(Continuous): +class ExGaussian(Univariate, Continuous): R""" Exponentially modified Gaussian log-likelihood. @@ -1256,16 +1292,19 @@ class ExGaussian(Continuous): """ def __init__(self, mu, sigma, nu, *args, **kwargs): - super(ExGaussian, self).__init__(*args, **kwargs) - self.mu = mu - self.sigma = sigma - self.nu = nu - self.mean = mu + nu - self.variance = (sigma**2) + (nu**2) + self.mu = tt.as_tensor_variable(mu) + self.sigma = tt.as_tensor_variable(sigma) + self.nu = tt.as_tensor_variable(nu) + self.mean = self.mu + self.nu + self.variance = self.sigma**2 + self.nu**2 assert_negative_support(sigma, 'sigma', 'ExGaussian') assert_negative_support(nu, 'nu', 'ExGaussian') + dist_params = (self.mu, self.sigma, self.nu) + + super(ExGaussian, self).__init__(dist_params, *args, **kwargs) + def random(self, point=None, size=None, repeat=None): mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu], point=point) @@ -1292,7 +1331,7 @@ def logp(self, value): return bound(lp, sigma > 0., nu > 0.) -class VonMises(Continuous): +class VonMises(Univariate, Continuous): R""" Univariate VonMises log-likelihood. @@ -1316,12 +1355,14 @@ class VonMises(Continuous): Concentration (\frac{1}{kappa} is analogous to \sigma^2). """ - def __init__(self, mu=0.0, kappa=None, transform='circular', - *args, **kwargs): - super(VonMises, self).__init__(*args, **kwargs) - self.mean = self.median = self.mode = self.mu = mu - self.kappa = kappa - self.variance = 1 - i1(kappa) / i0(kappa) + def __init__(self, mu=0.0, kappa=None, transform='circular', *args, **kwargs): + self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(mu) + self.kappa = tt.as_tensor_variable(kappa) + self.variance = 1. - i1(self.kappa) / i0(self.kappa) + + dist_params = (self.mu, self.kappa) + + super(VonMises, self).__init__(dist_params, *args, **kwargs) if transform == 'circular': self.transform = transforms.Circular() @@ -1338,7 +1379,8 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): mu = self.mu kappa = self.kappa - return bound(kappa * tt.cos(mu - value) - tt.log(2 * np.pi * i0(kappa)), value >= -np.pi, value <= np.pi, kappa >= 0) + return bound(kappa * tt.cos(mu - value) - tt.log(2 * np.pi * i0(kappa)), + value >= -np.pi, value <= np.pi, kappa >= 0) class SkewNormal(Continuous): @@ -1380,8 +1422,9 @@ class SkewNormal(Continuous): approaching plus/minus infinite we get a half-normal distribution. """ + def __init__(self, mu=0.0, sd=None, tau=None, alpha=1, *args, **kwargs): - super(SkewNormal, self).__init__(*args, **kwargs) + super(SkewNormal, self).__init__(ndim=1, *args, **kwargs) self.mu = mu self.tau, self.sd = get_tau_sd(tau=tau, sd=sd) self.alpha = alpha @@ -1405,8 +1448,7 @@ def logp(self, value): mu = self.mu alpha = self.alpha return bound( - tt.log(1 + - tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2))) + tt.log(1 + tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2))) + (-tau * (value - mu)**2 + tt.log(tau / np.pi / 2.)) / 2., tau > 0, sd > 0) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index edfdfaecb96..b78a7995f7a 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1,4 +1,5 @@ from functools import partial +import warnings import numpy as np import theano @@ -6,15 +7,20 @@ from scipy import stats from .dist_math import bound, factln, binomln, betaln, logpow -from .distribution import Discrete, draw_values, generate_samples - -__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'Poisson', +from .distribution import ( + Univariate, + Discrete, + draw_values, + generate_samples, + has_const_inputs) + +__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant', 'ZeroInflatedPoisson', 'ZeroInflatedNegativeBinomial', 'DiscreteUniform', 'Geometric', 'Categorical'] -class Binomial(Discrete): +class Binomial(Univariate, Discrete): R""" Binomial log-likelihood. @@ -39,10 +45,14 @@ class Binomial(Discrete): """ def __init__(self, n, p, *args, **kwargs): - super(Binomial, self).__init__(*args, **kwargs) - self.n = n - self.p = p - self.mode = tt.cast(tt.round(n * p), self.dtype) + self.n = tt.as_tensor_variable(n) + self.p = tt.as_tensor_variable(p) + + self.mode = tt.cast(tt.round(n * p), 'int32') + + dist_params = (self.n, self.p) + + super(Binomial, self).__init__(dist_params, *args, **kwargs) def random(self, point=None, size=None, repeat=None): n, p = draw_values([self.n, self.p], point=point) @@ -60,7 +70,7 @@ def logp(self, value): 0 <= p, p <= 1) -class BetaBinomial(Discrete): +class BetaBinomial(Univariate, Discrete): R""" Beta-binomial log-likelihood. @@ -90,11 +100,16 @@ class BetaBinomial(Discrete): """ def __init__(self, alpha, beta, n, *args, **kwargs): - super(BetaBinomial, self).__init__(*args, **kwargs) - self.alpha = alpha - self.beta = beta - self.n = n - self.mode = tt.cast(tt.round(alpha / (alpha + beta)), 'int8') + self.alpha = tt.as_tensor_variable(alpha) + self.beta = tt.as_tensor_variable(beta) + self.n = tt.as_tensor_variable(n) + + self.mode = tt.cast(tt.round(self.alpha / (self.alpha + self.beta)), + 'int8') + + dist_params = (self.alpha, self.beta, self.n) + + super(BetaBinomial, self).__init__(dist_params, *args, **kwargs) def _random(self, alpha, beta, n, size=None): size = size or 1 @@ -125,7 +140,7 @@ def logp(self, value): alpha > 0, beta > 0) -class Bernoulli(Discrete): +class Bernoulli(Univariate, Discrete): R"""Bernoulli log-likelihood The Bernoulli distribution describes the probability of successes @@ -146,10 +161,14 @@ class Bernoulli(Discrete): """ def __init__(self, p, *args, **kwargs): - super(Bernoulli, self).__init__(*args, **kwargs) - self.p = p + self.p = tt.as_tensor_variable(p) + + dist_params = (self.p,) + self.mode = tt.cast(tt.round(p), 'int8') + super(Bernoulli, self).__init__(dist_params, *args, **kwargs) + def random(self, point=None, size=None, repeat=None): p = draw_values([self.p], point=point) return generate_samples(stats.bernoulli.rvs, p, @@ -164,7 +183,7 @@ def logp(self, value): p >= 0, p <= 1) -class Poisson(Discrete): +class Poisson(Univariate, Discrete): R""" Poisson log-likelihood. @@ -192,10 +211,14 @@ class Poisson(Discrete): """ def __init__(self, mu, *args, **kwargs): - super(Poisson, self).__init__(*args, **kwargs) - self.mu = mu + self.mu = tt.as_tensor_variable(mu) + self.mode = tt.floor(mu).astype('int32') + dist_params = (self.mu,) + + super(Poisson, self).__init__(dist_params, *args, **kwargs) + def random(self, point=None, size=None, repeat=None): mu = draw_values([self.mu], point=point) return generate_samples(stats.poisson.rvs, mu, @@ -212,7 +235,7 @@ def logp(self, value): 0, log_prob) -class NegativeBinomial(Discrete): +class NegativeBinomial(Univariate, Discrete): R""" Negative binomial log-likelihood. @@ -239,11 +262,16 @@ class NegativeBinomial(Discrete): """ def __init__(self, mu, alpha, *args, **kwargs): - super(NegativeBinomial, self).__init__(*args, **kwargs) - self.mu = mu - self.alpha = alpha + self.mu = tt.as_tensor_variable(mu) + self.alpha = tt.as_tensor_variable(alpha) + self.mode = tt.floor(mu).astype('int32') + dist_params = (self.mu, self.alpha) + + super(NegativeBinomial, self).__init__(dist_params, + *args, **kwargs) + def random(self, point=None, size=None, repeat=None): mu, alpha = draw_values([self.mu, self.alpha], point=point) g = generate_samples(stats.gamma.rvs, alpha, scale=mu / alpha, @@ -266,7 +294,7 @@ def logp(self, value): negbinom) -class Geometric(Discrete): +class Geometric(Univariate, Discrete): R""" Geometric log-likelihood. @@ -288,9 +316,14 @@ class Geometric(Discrete): """ def __init__(self, p, *args, **kwargs): - super(Geometric, self).__init__(*args, **kwargs) - self.p = p - self.mode = 1 + self.p = tt.as_tensor_variable(p) + + self.mode = tt.as_tensor_variable(1) + + dist_params = (self.p,) + + super(Geometric, self).__init__( + dist_params, *args, **kwargs) def random(self, point=None, size=None, repeat=None): p = draw_values([self.p], point=point) @@ -304,7 +337,7 @@ def logp(self, value): 0 <= p, p <= 1, value >= 1) -class DiscreteUniform(Discrete): +class DiscreteUniform(Univariate, Discrete): R""" Discrete uniform distribution. @@ -325,12 +358,16 @@ class DiscreteUniform(Discrete): """ def __init__(self, lower, upper, *args, **kwargs): - super(DiscreteUniform, self).__init__(*args, **kwargs) self.lower = tt.floor(lower).astype('int32') self.upper = tt.floor(upper).astype('int32') + self.mode = tt.maximum( tt.floor((upper - lower) / 2.).astype('int32'), self.lower) + dist_params = (self.lower, self.upper) + + super(DiscreteUniform, self).__init__(dist_params, *args, **kwargs) + def _random(self, lower, upper, size=None): # This way seems to be the only to deal with lower and upper # as array-like. @@ -352,7 +389,7 @@ def logp(self, value): lower <= value, value <= upper) -class Categorical(Discrete): +class Categorical(Univariate, Discrete): R""" Categorical log-likelihood. @@ -372,15 +409,23 @@ class Categorical(Discrete): """ def __init__(self, p, *args, **kwargs): - super(Categorical, self).__init__(*args, **kwargs) - try: - self.k = tt.shape(p)[-1].tag.test_value - except AttributeError: - self.k = tt.shape(p)[-1] + + self.k = tt.shape(p)[-1] + + if has_const_inputs(self.k): + # FIXME: This feels like a hack. Seems like it would be better to + # evaluate somewhere else (e.g. exactly where a value is needed). + self.k = self.k.eval() + self.p = tt.as_tensor_variable(p) self.p = (p.T / tt.sum(p, -1)).T + self.mode = tt.argmax(p) + dist_params = (self.p,) + + super(Categorical, self).__init__(dist_params, *args, **kwargs) + def random(self, point=None, size=None, repeat=None): def random_choice(k, *args, **kwargs): if len(kwargs['p'].shape) > 1: @@ -413,7 +458,7 @@ def logp(self, value): sumto1) -class Constant(Discrete): +class Constant(Univariate, Discrete): """ Constant log-likelihood. @@ -424,8 +469,10 @@ class Constant(Discrete): """ def __init__(self, c, *args, **kwargs): - super(Constant, self).__init__(*args, **kwargs) - self.mean = self.median = self.mode = self.c = c + self.mean = self.median = self.mode = self.c = tt.as_tensor_variable(c) + dist_params = (self.c,) + + super(Constant, self).__init__(dist_params, *args, **kwargs) def random(self, point=None, size=None, repeat=None): c = draw_values([self.c], point=point) @@ -441,13 +488,14 @@ def logp(self, value): c = self.c return bound(0, tt.eq(value, c)) + def ConstantDist(*args, **kwargs): warnings.warn("ConstantDist has been deprecated. In future, use Constant instead.", - DeprecationWarning) + DeprecationWarning) return Constant(*args, **kwargs) -class ZeroInflatedPoisson(Discrete): +class ZeroInflatedPoisson(Univariate, Discrete): R""" Zero-inflated Poisson log-likelihood. @@ -478,12 +526,15 @@ class ZeroInflatedPoisson(Discrete): """ def __init__(self, theta, psi, *args, **kwargs): - super(ZeroInflatedPoisson, self).__init__(*args, **kwargs) - self.theta = theta - self.psi = psi + self.theta = tt.as_tensor_variable(theta) + self.psi = tt.as_tensor_variable(psi) self.pois = Poisson.dist(theta) self.mode = self.pois.mode + dist_params = (self.theta, self.psi) + + super(ZeroInflatedPoisson, self).__init__(dist_params, *args, **kwargs) + def random(self, point=None, size=None, repeat=None): theta, psi = draw_values([self.theta, self.psi], point=point) g = generate_samples(stats.poisson.rvs, theta, @@ -497,7 +548,7 @@ def logp(self, value): tt.log((1. - self.psi) + self.psi * tt.exp(-self.theta))) -class ZeroInflatedNegativeBinomial(Discrete): +class ZeroInflatedNegativeBinomial(Univariate, Discrete): R""" Zero-Inflated Negative binomial log-likelihood. @@ -529,13 +580,18 @@ class ZeroInflatedNegativeBinomial(Discrete): """ def __init__(self, mu, alpha, psi, *args, **kwargs): - super(ZeroInflatedNegativeBinomial, self).__init__(*args, **kwargs) - self.mu = mu - self.alpha = alpha - self.psi = psi + self.mu = tt.as_tensor_variable(mu) + self.alpha = tt.as_tensor_variable(alpha) + self.psi = tt.as_tensor_variable(psi) + self.nb = NegativeBinomial.dist(mu, alpha) self.mode = self.nb.mode + dist_params = (self.mu, self.alpha, self.psi) + + super(ZeroInflatedNegativeBinomial, self).__init__( + dist_params, *args, **kwargs) + def random(self, point=None, size=None, repeat=None): mu, alpha, psi = draw_values( [self.mu, self.alpha, self.psi], point=point) diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index 61570ceaa38..60e187c0774 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -1,18 +1,71 @@ +import collections +import warnings + import numpy as np import theano.tensor as tt from theano import function +from theano.gof import Constant +from theano.tensor.raw_random import _infer_ndim_bcast from ..memoize import memoize from ..model import Model, get_named_nodes from ..vartypes import string_types -__all__ = ['DensityDist', 'Distribution', 'Continuous', - 'Discrete', 'NoDistribution', 'TensorType', 'draw_values'] +__all__ = ['DensityDist', 'Distribution', + 'Continuous', 'Discrete', 'NoDistribution', 'TensorType', + 'Multivariate'] + class _Unpickling(object): pass + +def _as_tensor_shape_variable(var): + r""" Just some useful shape object parsing steps. + Mostly copied from `_infer_ndim_bcast`. + """ + + if var is None: + return tt.constant([], dtype='int64') + + res = var + if isinstance(res, (tuple, list)): + if len(res) == 0: + return tt.constant([], dtype='int64') + res = tt.as_tensor_variable(res, ndim=1) + + else: + if res.ndim != 1: + raise TypeError("shape must be a vector or list of scalar, got\ + '%s'" % res) + + if (not (res.dtype.startswith('int') or + res.dtype.startswith('uint'))): + + raise TypeError('shape must be an integer vector or list', + res.dtype) + return res + + +def has_const_inputs(nodes): + r"""Checks that nodes have only constant inputs for + their Ops. Useful for justifying one-time evals. + """ + if not isinstance(nodes, collections.Iterable): + nodes = [nodes] + + for node in nodes: + owner = getattr(node, 'owner', None) + if owner is not None: + if not has_const_inputs(owner.inputs): + return False + elif not isinstance(node, Constant): + return False + + return True + + class Distribution(object): """Statistical distribution""" def __new__(cls, name, *args, **kwargs): @@ -41,37 +94,155 @@ def dist(cls, *args, **kwargs): dist.__init__(*args, **kwargs) return dist - def __init__(self, shape, dtype, testval=None, defaults=[], transform=None): - self.shape = np.atleast_1d(shape) - if False in (np.floor(self.shape) == self.shape): - raise TypeError("Expected int elements in shape") + def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, + testval=None, defaults=None, transform=None, *args, **kwargs): + r""" + Distributions are specified in terms of the shape of their support, the shape + of the space of independent instances and the shape of the space of replications. + The "total" shape of the distribution is the concatenation of each of these + spaces, i.e. `dist.shape = shape_reps + shape_ind + shape_supp`. + + We're able to specify the number of dimensions for the + support of a concrete distribution type (e.g. scalar distributions have + `ndim_supp=0` and a multivariate normal has vector support, + so `ndim_supp=1`) and have the exact sizes of each dimension symbolic. + To actually instantiate a distribution, + we at least need a list/tuple/vector (`ndim_supp` in + length) containing symbolic scalars, `shape_supp`, representing the + exact size of each dimension. In the case that `shape_supp` has an + unknown length at the graph building stage (e.g. it is a generic Theano + vector or tensor), we must provide `ndim_supp`. + + The symbolic scalars `shape_supp` must either be required by a + distribution's constructor, or inferred through its required + parameters. Since most distributions are either scalar, or + have parameters within the space of their support (e.g. the + multivariate normal's mean parameter) inference can be + straight-forward. In the latter case, we refer to the parameters + as "informative". + + We also attempt to handle the specification of a collections of independent-- + but not identical instances of the base distribution (each with support as above). + These have a space with shape `shape_ind`. Generally, `shape_ind` will be + implicitly given by the distribution's parameters. For instance, + if a multivariate normal distribution is instantiated with a matrix mean + parameter `mu`, we can assume that each row specifies the mean for an + independent distribution. In this case the covariance parameter would either + have to be an `ndim=3` tensor, for which the last two dimensions specify each + covariance matrix, or a single matrix that is to apply to each independent + variate implied by the matrix `mu`. + + Here are a few ways of inferring shapes: + * When a distribution is scalar, then `shape_supp = ()` + * and has an informative parameter, e.g. `mu`, then `shape_ind = tt.shape(mu)`. + * When a distribution is multivariate + * and has an informative parameter, e.g. `mu`, then + `shape_supp = tt.shape(mu)[-ndim_supp:]` and `shape_ind = tt.shape(mu)[-ndim_supp:]`. + In all remaining cases the shapes must be provided by the caller. + `shape_reps` is always provided by the caller. + + + Parameters + ---------- + shape_supp + tuple + Shape of the support for this distribution type. + shape_ind + tuple + Dimension of independent, but not necessarily identical, copies of + this distribution type. + shape_reps + tuple + Dimension of independent and identical copies of + this distribution type. + bcast + A tuple of boolean values. + Broadcast dimensions. + dtype + The data type. + testval + Test value to be added to the Theano variable's tag.test_value. + defaults + List of string names for attributes that can be used as this + distribution's default value. + transform + A transform function + """ + + self.shape_supp = _as_tensor_shape_variable(shape_supp) + self.ndim_supp = tt.get_vector_length(self.shape_supp) + self.shape_ind = _as_tensor_shape_variable(shape_ind) + self.ndim_ind = tt.get_vector_length(self.shape_ind) + self.shape_reps = _as_tensor_shape_variable(shape_reps) + self.ndim_reps = tt.get_vector_length(self.shape_reps) + + self.bcast = bcast self.dtype = dtype - self.type = TensorType(self.dtype, self.shape) - self.testval = testval + + ndim_sum = self.ndim_supp + self.ndim_ind + self.ndim_reps + if ndim_sum == 0: + self.shape = tt.constant([], dtype='int32') + else: + self.shape = tuple(self.shape_reps) +\ + tuple(self.shape_ind) +\ + tuple(self.shape_supp) + self.shape = tt.as_tensor_variable(self.shape) + + if has_const_inputs(self.shape): + # FIXME: This feels like a hack. Seems like it would be better to + # evaluate somewhere else (e.g. exactly where a value is needed). + self.shape = self.shape.eval() + + self.ndim = tt.get_vector_length(self.shape) self.defaults = defaults self.transform = transform + if testval is None: + testval = self.get_test_value(defaults=self.defaults) + + self.testval = testval + self.type = tt.TensorType(str(dtype), self.bcast) + def default(self): - return self.get_test_val(self.testval, self.defaults) + return self.get_test_value(self.testval, self.defaults) - def get_test_val(self, val, defaults): + def get_test_value(self, val=None, defaults=None): + test_val = None if val is None: for v in defaults: - if hasattr(self, v) and np.all(np.isfinite(self.getattr_value(v))): - return self.getattr_value(v) + the_attr = getattr(self, v, None) + + if the_attr is not None and np.all(np.isfinite(self.getattr_value(v))): + test_val = self.getattr_value(v) + break else: - return self.getattr_value(val) + test_val = self.getattr_value(val) - if val is None: - raise AttributeError(str(self) + " has no finite default value to use, checked: " + - str(defaults) + " pass testval argument or adjust so value is finite.") + if test_val is not None: + if self.ndim_reps > 0 and hasattr(self.shape_reps, 'value'): + bcast_shape = self.getattr_value(self.shape) + test_val = np.broadcast_to(test_val, bcast_shape) + + return test_val + + raise AttributeError(str(self) + " has no finite default value to use, checked: " + + str(defaults) + " pass testval argument or adjust so value is finite.") def getattr_value(self, val): + """ Attempts to obtain a non-symbolic value for an attribute + (potentially given in str form) + """ if isinstance(val, string_types): val = getattr(self, val) - if isinstance(val, tt.TensorVariable): + # Could use Theano's: + # val = theano.gof.op.get_test_value(val) + if isinstance(val, tt.sharedvar.SharedVariable): + return val.get_value() + elif isinstance(val, tt.TensorVariable): return val.tag.test_value + elif isinstance(val, tt.TensorConstant): + return val.value if isinstance(val, tt.TensorConstant): return val.value @@ -85,10 +256,14 @@ def TensorType(dtype, shape): class NoDistribution(Distribution): - def __init__(self, shape, dtype, testval=None, defaults=[], transform=None, parent_dist=None, *args, **kwargs): - super(NoDistribution, self).__init__(shape=shape, dtype=dtype, - testval=testval, defaults=defaults, - *args, **kwargs) + def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, + testval=None, defaults=[], transform=None, parent_dist=None, + *args, **kwargs): + + super(NoDistribution, self).__init__(shape_supp, shape_ind, shape_reps, + bcast, dtype, testval=testval, + defaults=defaults, *args, + **kwargs) self.parent_dist = parent_dist def __getattr__(self, name): @@ -103,38 +278,130 @@ def logp(self, x): class Discrete(Distribution): """Base class for discrete distributions""" - - def __init__(self, shape=(), dtype='int64', defaults=['mode'], *args, **kwargs): + def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype='int64', + defaults=['mode'], *args, **kwargs): if dtype != 'int64': raise TypeError('Discrete classes expect dtype to be int64.') - super(Discrete, self).__init__( - shape, dtype, defaults=defaults, *args, **kwargs) + super(Discrete, self).__init__(shape_supp, shape_ind, shape_reps, + bcast, dtype, defaults=defaults, *args, + **kwargs) class Continuous(Distribution): """Base class for continuous distributions""" - def __init__(self, shape=(), dtype='float64', defaults=['median', 'mean', 'mode'], *args, **kwargs): - super(Continuous, self).__init__( - shape, dtype, defaults=defaults, *args, **kwargs) + def __init__(self, shape_supp, shape_ind, shape_reps, bcast, + dtype='float64', defaults=['median', 'mean', 'mode'], *args, + **kwargs): + super(Continuous, self).__init__(shape_supp, shape_ind, shape_reps, + bcast, dtype, defaults=defaults, + *args, **kwargs) class DensityDist(Distribution): """Distribution based on a given log density function.""" - def __init__(self, logp, shape=(), dtype='float64', testval=0, *args, **kwargs): - super(DensityDist, self).__init__( - shape, dtype, testval, *args, **kwargs) + def __init__(self, logp, shape_supp=None, shape_ind=None, shape_reps=None, + bcast=None, dtype='float64', *args, **kwargs): self.logp = logp + # TODO: Could add some generic handling like this in + # `Distribution.__init__`, just to handle deprecated use of `shape`. + if (shape_supp is None) or (shape_ind is None) or (shape_reps is None): + + # If we only got the old `shape` parameter, assume it's only + # specifying replications. + old_shape = kwargs.get('shape', None) + if old_shape is not None: + warnings.warn(('The `shape` parameter is deprecated; use `size` to' + ' specify the shape and number of replications.'), + DeprecationWarning) + shape_supp = tuple() + shape_ind = tuple() + shape_reps = old_shape + + bcast += tuple(True if s_ == 1 else False + for s_ in old_shape) + else: + raise ValueError("shapes and bcast must be specified.") + + super(DensityDist, self).__init__(shape_supp, shape_ind, shape_reps, + bcast, dtype, *args, + **kwargs) + + +class Univariate(Distribution): + + def __init__(self, dist_params, ndim=None, size=None, + shape=None, dtype=None, + *args, **kwargs): + r"""This constructor automates some of the shape determination, since + univariate distributions are simple in that regard. + + Parameters + ---------- + dist_params: tuple + A tuple containing the distributions parameters. These parameters + are checked for shape compatibility(/"broadcastibility"), so make + sure their dimensions line up. For example, if a the distribution + for a scalar random variable has parameters `(a, b)`, where `a` is + scalar and `b` is a vector, if we get an array of `a` values, then + `b` should be given an extra dimension to broadcast along those + independent variates implied by `a`. This function will try + to figure that stuff out, but no promises. + ndim: int + A hint for the number of dimensions. + (Not currently used, but could be useful in cases for which + the shape dimensions aren't easily assessed.) + size: tuple (int) + Shape of replications. + shape: tuple (int) + Deprecated; use ``size``. + dtype: string + Name of primitive numeric type. + """ + + if shape is not None: + warnings.warn(('The `shape` parameter is deprecated; use `size` to' + ' specify the shape and number of replications.'), + DeprecationWarning) + if size is None: + size = shape + + self.dist_params = tuple(tt.as_tensor_variable(x) for x in dist_params) + + # Parameters need to match in shape; we use the following to + # determine what the [independent terms'] ultimate shape is. + ndim, ind_size, bcast = _infer_ndim_bcast(*((ndim, None) + + self.dist_params)) + + # We have to be careful with `as_tensor_variable`; it will produce + # empty collections with dtype=floatX, which violates our expectations + # for a shape object. + if size is None or np.alen(size) == 0: + size = np.array((), dtype=np.int) + elif np.shape(size) == (): + size = np.asarray((size,), dtype=np.int) + else: + size = np.asarray(size, dtype=np.int) -class MultivariateContinuous(Continuous): + # Add broadcast info from replication dimensions. + bcast += tuple(True if s_ == 1 else False + for s_ in size) - pass + size = tt.as_tensor_variable(size, ndim=1) + if dtype is None: + dtype = tt.scal.upcast(*((tt.config.floatX,) + + tuple(x.dtype for x in self.dist_params))) -class MultivariateDiscrete(Discrete): + super(Univariate, self).__init__( + tuple(), ind_size, size, bcast, *args, **kwargs) + +class Multivariate(Distribution): + r"""TODO: Automate some of the multivariate shape determination? + """ pass diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 456f40be591..96961d4b140 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -12,9 +12,14 @@ import pymc3 as pm from . import transforms -from .distribution import Continuous, Discrete, draw_values, generate_samples from ..model import Deterministic from .continuous import ChiSquared, Normal +from .distribution import ( + Multivariate, + Continuous, + Discrete, + draw_values, + generate_samples) from .special import gammaln, multigammaln from .dist_math import bound, logpow, factln @@ -58,8 +63,9 @@ def get_tau_cov(mu, tau=None, cov=None): return (tau, cov) -class MvNormal(Continuous): - R""" + +class MvNormal(Multivariate, Continuous): + r""" Multivariate normal log-likelihood. .. math:: @@ -82,21 +88,45 @@ class MvNormal(Continuous): Covariance matrix. tau : array, optional Precision matrix. - """ + ndim : int + TODO + size : tuple of ints + TODO - def __init__(self, mu, cov=None, tau=None, *args, **kwargs): - super(MvNormal, self).__init__(*args, **kwargs) + """ + def __init__(self, mu, tau, ndim=None, size=None, dtype=None, *args, + **kwargs): warnings.warn(('The calling signature of MvNormal() has changed. The new signature is:\n' 'MvNormal(name, mu, cov) instead of MvNormal(name, mu, tau).' 'You do not need to explicitly invert the covariance matrix anymore.'), DeprecationWarning) - self.mean = self.median = self.mode = self.mu = mu - self.tau, self.cov = get_tau_cov(mu, tau=tau, cov=cov) + # TODO: Need to apply replications/size. + self.mu = tt.as_tensor_variable(mu) + self.tau = tt.as_tensor_variable(tau) + + self.dist_params = (self.mu, self.tau) + + self.median = self.mode = self.mean = self.mu + # TODO: How do we want to use ndim? + shape_supp = (self.mu.shape[-1],) + shape_ind = tuple(self.mu.shape[:-1]) + + if self.mu.ndim > 0: + bcast = (False,) * (1 + tt.get_vector_length(shape_ind)) + else: + bcast = (False,) + + if dtype is None: + dtype = self.mu.dtype + + super(MvNormal, self).__init__(shape_supp, shape_ind, (), bcast, dtype, + *args, **kwargs) def random(self, point=None, size=None): mu, cov = draw_values([self.mu, self.cov], point=point) def _random(mean, cov, size=None): + # FIXME: cov here is actually precision? return stats.multivariate_normal.rvs( mean, cov, None if size == mean.shape else size) @@ -119,9 +149,9 @@ def logp(self, value): return -1 / 2. * result -class MvStudentT(Continuous): - R""" - Multivariate Student-T log-likelihood. +class MvStudentT(Multivariate, Continuous): + """ + Multivariate Student T log-likelihood. .. math:: f(\mathbf{x}| \nu,\mu,\Sigma) = @@ -145,15 +175,28 @@ class MvStudentT(Continuous): mu : array Vector of means. """ + def __init__(self, nu, Sigma, mu=None, ndim=None, size=None, dtype=None, *args, **kwargs): + # TODO: Need to apply replications/size. + self.nu = tt.as_tensor_variable(nu) + self.Sigma = tt.as_tensor_variable(Sigma) + + # TODO: How do we want to use ndim? + shape_supp = self.Sigma.shape[0] + shape_ind = self.Sigma.shape[:-2] - def __init__(self, nu, Sigma, mu=None, *args, **kwargs): - super(MvStudentT, self).__init__(*args, **kwargs) - self.nu = nu - self.mu = np.zeros(Sigma.shape[0]) if mu is None else mu - self.Sigma = Sigma + self.mu = tt.as_tensor_variable(tt.zeros(shape_supp) if mu is None else mu) + self.dist_params = (self.nu, self.mu, self.Sigma) self.mean = self.median = self.mode = self.mu = mu + # FIXME: this isn't correct/ideal + bcast = (False,) * (shape_supp.ndim + shape_ind.ndim) + if dtype is None: + dtype = self.mu.dtype + + super(MvStudentT, self).__init__(shape_supp, shape_ind, (), ndim, + bcast, dtype, *args, **kwargs) + def random(self, point=None, size=None): chi2 = np.random.chisquare mvn = np.random.multivariate_normal @@ -170,6 +213,7 @@ def logp(self, value): mu = self.mu d = S.shape[0] + n = value.shape[0] X = value - mu @@ -182,7 +226,7 @@ def logp(self, value): return log_pdf -class Dirichlet(Continuous): +class Dirichlet(Multivariate, Continuous): R""" Dirichlet log-likelihood. @@ -212,19 +256,36 @@ class Dirichlet(Continuous): as a parent of Multinomial and Categorical nevertheless. """ - def __init__(self, a, transform=transforms.stick_breaking, + def __init__(self, a, dtype=tt.config.floatX, + transform=transforms.stick_breaking, *args, **kwargs): - shape = a.shape[-1] - kwargs.setdefault("shape", shape) - super(Dirichlet, self).__init__(transform=transform, *args, **kwargs) + self.a = tt.as_tensor_variable(a) + self.mean = self.a / tt.sum(self.a) + self.mode = tt.switch(tt.all(self.a > 1), + (self.a - 1) / tt.sum(self.a - 1), + np.nan) - self.k = shape - self.a = a - self.mean = a / tt.sum(a) + self.dist_params = (self.a,) - self.mode = tt.switch(tt.all(a > 1), - (a - 1) / tt.sum(a - 1), - np.nan) + shape_supp = (self.a.shape[-1],) + shape_ind = tuple(self.a.shape[:-1]) + + shape_reps = kwargs.pop('shape', None) + if shape_reps is not None: + warnings.warn(('The `shape` parameter is deprecated; use `size` to' + ' specify the shape and number of replications.'), + DeprecationWarning) + shape_reps = kwargs.pop('size', ()) + + # FIXME: this isn't correct/ideal + if self.a.ndim > 0: + bcast = (False,) * (1 + tt.get_vector_length(shape_ind)) + else: + bcast = (False,) + + super(Dirichlet, self).__init__(shape_supp, shape_ind, shape_reps, + bcast, dtype, transform=transform, + *args, **kwargs) def random(self, point=None, size=None): a = draw_values([self.a], point=point) @@ -233,12 +294,12 @@ def _random(a, size=None): return stats.dirichlet.rvs(a, None if size == a.shape else size) samples = generate_samples(_random, a, - dist_shape=self.shape, + dist_shape=self.shape_supp, size=size) return samples def logp(self, value): - k = self.k + k = self.shape_supp a = self.a # only defined for sum(value) == 1 @@ -249,7 +310,7 @@ def logp(self, value): broadcast_conditions=False) -class Multinomial(Discrete): +class Multinomial(Multivariate, Discrete): R""" Multinomial log-likelihood. @@ -281,26 +342,42 @@ class Multinomial(Discrete): rescaled otherwise. """ - def __init__(self, n, p, *args, **kwargs): - super(Multinomial, self).__init__(*args, **kwargs) + def __init__(self, n, p, ndim=None, size=None, dtype='int32', + *args, **kwargs): + self.n = tt.as_tensor_variable(n, ndim=0) + self.p = tt.as_tensor_variable(p) + self.p = self.p / tt.sum(self.p, axis=-1, keepdims=True) + + self.mean = self.n * self.p + self.mode = tt.cast(tt.round(self.mean), dtype) - p = p / tt.sum(p, axis=-1, keepdims=True) + self.dist_params = (self.n, self.p) - if len(self.shape) == 2: - try: - assert n.shape == (self.shape[0],) - except AttributeError: - # this occurs when n is a scalar Python int or float - n *= tt.ones(self.shape[0]) + # XXX, FIXME: Is this really multivariate? + shape_supp = (1,) + shape_ind = () + # shape_supp = (self.p.shape[-1],) + # shape_ind = tuple(self.p.shape[:-1]) - self.n = tt.shape_padright(n) - self.p = p if p.ndim == 2 else tt.shape_padleft(p) - else: - self.n = n - self.p = p + shape_reps = kwargs.pop('shape', None) + if shape_reps is not None: + warnings.warn(('The `shape` parameter is deprecated; use `size` to' + ' specify the shape and number of replications.'), + DeprecationWarning) + shape_reps = kwargs.pop('size', ()) - self.mean = self.n * self.p - self.mode = tt.cast(tt.round(self.mean), 'int32') + # XXX, FIXME: Is this really multivariate? + bcast = (False,) + # if self.p.ndim > 0: + # bcast = (False,) * (1 + tt.get_vector_length(shape_ind)) + # else: + # bcast = (False,) + + if dtype is None: + dtype = self.p.dtype + + super(Multinomial, self).__init__(shape_supp, shape_ind, shape_reps, + bcast, dtype, *args, **kwargs) def _random(self, n, p, size=None): if size == p.shape: @@ -378,7 +455,7 @@ def __str__(self): matrix_pos_def = PosDefMatrix() -class Wishart(Continuous): +class Wishart(Multivariate, Continuous): R""" Wishart log-likelihood. @@ -415,8 +492,8 @@ class Wishart(Continuous): use WishartBartlett or LKJCorr. """ - def __init__(self, n, V, *args, **kwargs): - super(Wishart, self).__init__(*args, **kwargs) + def __init__(self, n, V, ndim=None, size=None, dtype=None, + *args, **kwargs): warnings.warn('The Wishart distribution can currently not be used ' 'for MCMC sampling. The probability of sampling a ' 'symmetric matrix is basically zero. Instead, please ' @@ -424,17 +501,35 @@ def __init__(self, n, V, *args, **kwargs): 'For more information on the issues surrounding the ' 'Wishart see here: https://github.com/pymc-devs/pymc3/issues/538.', UserWarning) - self.n = n - self.p = p = V.shape[0] - self.V = V - self.mean = n * V - self.mode = tt.switch(1 * (n >= p + 1), - (n - p - 1) * V, + # TODO: Need to apply replications/size. + self.n = tt.as_tensor_variable(n, ndim=0) + self.V = tt.as_tensor_variable(V) + + self.dist_params = (self.n, self.V) + + # TODO: How do we want to use ndim? + shape_supp = (self.V.shape[-1],) + shape_ind = () + + self.mode = tt.switch(1 * (self.n >= shape_supp + 1), + (self.n - shape_supp - 1) * self.V, np.nan) + self.mean = self.n * self.V + + # FIXME: this isn't correct/ideal + bcast = (False,) + if dtype is None: + dtype = self.V.dtype + + self.mode = tt.switch(1 * (self.n >= self.shape_supp + 1), + (self.n - self.shape_supp - 1) * self.V, np.nan) + super(Wishart, self).__init__(shape_supp, shape_ind, (), bcast, dtype, + *args, **kwargs) + def logp(self, X): n = self.n - p = self.p + p = self.shape_supp V = self.V IVI = det(V) @@ -447,8 +542,7 @@ def logp(self, X): matrix_pos_def(X), tt.eq(X, X.T), n > (p - 1), - broadcast_conditions=False - ) + broadcast_conditions=False) def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testval=None): @@ -529,7 +623,7 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T)) -class LKJCorr(Continuous): +class LKJCorr(Multivariate, Continuous): R""" The LKJ (Lewandowski, Kurowicka and Joe) log-likelihood. @@ -569,16 +663,34 @@ class LKJCorr(Continuous): 100(9), pp.1989-2001. """ - def __init__(self, n, p, *args, **kwargs): - self.n = n - self.p = p - n_elem = int(p * (p - 1) / 2) - self.mean = np.zeros(n_elem) - super(LKJCorr, self).__init__(shape=n_elem, *args, **kwargs) + def __init__(self, n, p, ndim=None, size=None, dtype=tt.config.floatX, + *args, **kwargs): + # TODO: Need to apply replications/size. + self.n = tt.as_tensor_variable(n, ndim=0) + self.p = tt.as_tensor_variable(p, ndim=0) + + self.dist_params = (self.n, self.p) - self.tri_index = np.zeros([p, p], dtype=int) - self.tri_index[np.triu_indices(p, k=1)] = np.arange(n_elem) - self.tri_index[np.triu_indices(p, k=1)[::-1]] = np.arange(n_elem) + # TODO: How do we want to use ndim? + n_elem = (self.p * (self.p - 1) / 2) + shape_supp = (n_elem,) + self.mean = tt.zeros(n_elem) + + # FIXME: triu, bcast, etc. + self.tri_index = tt.zeros((self.p, self.p), dtype=int) + self.tri_index[tt.triu(self.p, k=1)] = tt.arange(n_elem) + self.tri_index[tt.triu(self.p, k=1)[::-1]] = tt.arange(n_elem) + + shape_ind = () + + # FIXME: this isn't correct/ideal + bcast = (False,) + if dtype is None: + dtype = self.n.dtype + + super(LKJCorr, self).__init__(shape_supp, shape_ind, (), + bcast, dtype, + *args, **kwargs) def _normalizing_constant(self, n, p): if n == 1: @@ -611,5 +723,4 @@ def logp(self, x): tt.all(X <= 1), tt.all(X >= -1), matrix_pos_def(X), n > 0, - broadcast_conditions=False - ) + broadcast_conditions=False) diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py index 8dd2e282fbc..94fc95bb34d 100644 --- a/pymc3/distributions/timeseries.py +++ b/pymc3/distributions/timeseries.py @@ -20,7 +20,7 @@ class AR1(Continuous): """ def __init__(self, k, tau_e, *args, **kwargs): - super(AR1, self).__init__(*args, **kwargs) + super(AR1, self).__init__(ndim=1, size=(), *args, **kwargs) self.k = k self.tau_e = tau_e self.tau = tau_e * (1 - k ** 2) @@ -56,7 +56,7 @@ class GaussianRandomWalk(Continuous): def __init__(self, tau=None, init=Flat.dist(), sd=None, mu=0., *args, **kwargs): - super(GaussianRandomWalk, self).__init__(*args, **kwargs) + super(GaussianRandomWalk, self).__init__(ndim=1, size=(), *args, **kwargs) self.tau = tau self.sd = sd self.mu = mu @@ -100,7 +100,7 @@ class GARCH11(Continuous): def __init__(self, omega=None, alpha_1=None, beta_1=None, initial_vol=None, *args, **kwargs): - super(GARCH11, self).__init__(*args, **kwargs) + super(GARCH11, self).__init__(ndim=1, size=(), *args, **kwargs) self.omega = omega self.alpha_1 = alpha_1 diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py index 4234d4530b1..c02779e07c2 100644 --- a/pymc3/distributions/transforms.py +++ b/pymc3/distributions/transforms.py @@ -46,13 +46,14 @@ class TransformedDistribution(Distribution): """A distribution that has been transformed from one space into another.""" def __init__(self, dist, transform, *args, **kwargs): - """ + r""" Parameters ---------- dist : Distribution + TODO transform : Transform - args, kwargs - arguments to Distribution""" + TODO + """ forward = transform.forward testval = forward(dist.default()) @@ -61,9 +62,18 @@ def __init__(self, dist, transform, *args, **kwargs): v = forward(FreeRV(name='v', distribution=dist)) self.type = v.type + # We can get the transformed support shape from a single dummy var in + # only the support (i.e. without the independent or replication dimensions). + shape_supp = forward(tt.alloc(1, *dist.shape_supp)).shape + + # XXX: We assume these two shapes don't change under a transform. + shape_ind = dist.shape_ind + shape_reps = dist.shape_reps + super(TransformedDistribution, self).__init__( - v.shape.tag.test_value, v.dtype, - testval, dist.defaults, *args, **kwargs) + shape_supp, shape_ind, shape_reps, + v.broadcastable, v.dtype, + testval.tag.test_value, dist.defaults, *args, **kwargs) if transform.name == 'stickbreaking': b = np.hstack(((np.atleast_1d(self.shape) == 1)[:-1], False)) @@ -193,7 +203,7 @@ class StickBreaking(Transform): Parameters ---------- eps : float, positive value - A small value for numerical stability in invlogit. + A small value for numerical stability in invlogit. """ name = "stickbreaking" @@ -250,7 +260,7 @@ def backward(self, y): def forward(self, x): return x - + def jacobian_det(self, x): return 0 diff --git a/pymc3/model.py b/pymc3/model.py index d28f2340e05..12d657cef25 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -183,7 +183,6 @@ def __init__(self): self.potentials = [] self.missing_values = [] self.model = self - @property @memoize def bijection(self): @@ -243,6 +242,11 @@ def unobserved_RVs(self): """List of all random variable, including deterministic ones.""" return self.vars + self.deterministics + @property + def untransformed_RVs(self): + """All variables except those transformed for MCMC""" + return [v for v in self.vars if v not in self.hidden_RVs] + self.deterministics + @property def test_point(self): """Test point used to check that the model doesn't generate errors""" @@ -494,17 +498,34 @@ def __init__(self, type=None, owner=None, index=None, name=None, owner : theano owner (optional) name : str distribution : Distribution - model : Model""" + model : Model + """ if type is None: type = distribution.type super(FreeRV, self).__init__(type, owner, index, name) if distribution is not None: - self.dshape = tuple(distribution.shape) - self.dsize = int(np.prod(distribution.shape)) + self.dshape = distribution.shape + + const_dsize = 1 + const_shape = tuple() + for s in distribution.shape: + try: + shape_val = tt.get_scalar_constant_value(s) + const_dsize *= shape_val + const_shape += shape_val + except tt.NotScalarConstantError: + const_dsize = None + const_shape = None + break + + if const_dsize is None: + self.dsize = tt.prod(distribution.shape) + else: + self.dsize = const_dsize + self.distribution = distribution - self.tag.test_value = np.ones( - distribution.shape, distribution.dtype) * distribution.default() + self.tag.test_value = distribution.testval self.logp_elemwiset = distribution.logp(self) self.model = model @@ -536,17 +557,44 @@ def as_tensor(data, name, model, distribution): dtype = distribution.dtype data = pandas_to_array(data).astype(dtype) + missing_in_dims = 0 if hasattr(data, 'mask'): + missing_in_dims = np.ma.count_masked(data, axis=0) + + if not np.all(missing_in_dims == 0): from .distributions import NoDistribution - testval = distribution.testval or data.mean().astype(dtype) - fakedist = NoDistribution.dist(shape=data.mask.sum(), dtype=dtype, - testval=testval, parent_dist=distribution) + + # XXX: There was an `or` between these two values; was this a + # condition on None values (as interpreted below), or some kind of + # bitwise operation on the values? + testval = distribution.testval + if testval is None: + testval = data.mean() + testval = testval.astype(dtype) + + # FIXME: What are the correct values for these? Does it + # really matter? + shape_supp = tuple() + shape_ind = tuple() + missing_count = np.sum(missing_in_dims) + shape_reps = tt.as_tensor_variable(np.atleast_1d(missing_count)) + + bcast = (False,) + + # XXX: If these missing value distributions are being used + # directly, then the result will be incorrect when + # the parent distribution is multivariate and the missing values were + # found about the support and/or independent dimensions. + fakedist = NoDistribution.dist(shape_supp, shape_ind, shape_reps, + bcast, dtype, + testval=testval[data.mask.nonzero()], + parent_dist=distribution) missing_values = FreeRV(name=name + '_missing', distribution=fakedist, model=model) constant = tt.as_tensor_variable(data.filled()) - dataTensor = tt.set_subtensor( - constant[data.mask.nonzero()], missing_values) + dataTensor = tt.set_subtensor(constant[data.mask.nonzero()], + missing_values) dataTensor.missing_values = missing_values return dataTensor else: @@ -579,7 +627,6 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None, super(TensorVariable, self).__init__(type, None, None, name) if distribution is not None: - data = as_tensor(data, name, model, distribution) self.missing_values = data.missing_values diff --git a/pymc3/tests/test_distribution_defaults.py b/pymc3/tests/test_distribution_defaults.py index e70b480422c..549c402a643 100644 --- a/pymc3/tests/test_distribution_defaults.py +++ b/pymc3/tests/test_distribution_defaults.py @@ -1,19 +1,21 @@ from __future__ import division from ..model import Model -from ..distributions import DiscreteUniform, Continuous +from ..distributions import DiscreteUniform, Univariate, Continuous import numpy as np from nose.tools import raises -class DistTest(Continuous): +class DistTest(Univariate, Continuous): def __init__(self, a, b, *args, **kwargs): - super(DistTest, self).__init__(*args, **kwargs) self.a = a self.b = b + dist_params = (self.a, self.b) + super(DistTest, self).__init__(dist_params, *args, **kwargs) + def logp(self, v): return 0 diff --git a/pymc3/tests/test_distribution_shapes.py b/pymc3/tests/test_distribution_shapes.py new file mode 100644 index 00000000000..d7637952be2 --- /dev/null +++ b/pymc3/tests/test_distribution_shapes.py @@ -0,0 +1,146 @@ +from __future__ import division + +from ..model import Model +from ..distributions import Normal + +import numpy as np +from nose.tools import raises + + +def test_univariate_shape_info(): + with Model() as model: + # Test scalar case with replications. + x = Normal('x', 0, 1, size=(3, 2)) + + # Check resulting objects' shapes + np.testing.assert_array_equal(x.distribution.shape, (3, 2)) + np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2)) + np.testing.assert_array_equal(np.shape(x.random()), (3, 2)) + + # Check shape info + np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ()) + assert x.distribution.ndim_supp == 0 + np.testing.assert_array_equal(x.distribution.shape_ind.eval(), ()) + assert x.distribution.ndim_ind == 0 + np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2)) + assert x.distribution.ndim_reps == 2 + + with Model() as model: + # Test scalar case with empty size input. + x = Normal('x', 0, 1, size=()) + + # Check resulting objects' shapes + np.testing.assert_array_equal(x.distribution.shape, ()) + np.testing.assert_array_equal(np.shape(x.tag.test_value), ()) + # FIXME: `Distribution.random` adds an unnecessary dimension. + # np.testing.assert_array_equal(np.shape(x.random()), ()) + + # Check shape info + np.testing.assert_array_equal(x.distribution.bcast, ()) + np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ()) + assert x.distribution.ndim_supp == 0 + np.testing.assert_array_equal(x.distribution.shape_ind.eval(), ()) + assert x.distribution.ndim_ind == 0 + np.testing.assert_array_equal(x.distribution.shape_reps.eval(), ()) + assert x.distribution.ndim_reps == 0 + + with Model() as model: + # Test independent terms alone. + x = Normal('x', mu=np.arange(0, 2), tau=np.arange(1, 3)) + + # Check resulting objects' shapes + np.testing.assert_array_equal(x.distribution.shape, (2,)) + np.testing.assert_array_equal(np.shape(x.tag.test_value), (2,)) + np.testing.assert_array_equal(np.shape(x.random()), (2,)) + + # Check shape info + np.testing.assert_array_equal(x.distribution.bcast, (False,)) + np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ()) + assert x.distribution.ndim_supp == 0 + np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,)) + assert x.distribution.ndim_ind == 1 + np.testing.assert_array_equal(x.distribution.shape_reps.eval(), ()) + assert x.distribution.ndim_reps == 0 + + with Model() as model: + # Test independent terms and replication. + x = Normal('x', mu=np.arange(0, 2), tau=np.arange(1, 3), + size=(3, 2)) + + # Check resulting objects' shapes + np.testing.assert_array_equal(x.distribution.shape, (3, 2, 2)) + np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2, 2)) + np.testing.assert_array_equal(np.shape(x.random()), (3, 2, 2)) + + # Check shape info + np.testing.assert_array_equal(x.distribution.bcast, + (False, False, False)) + np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ()) + assert x.distribution.ndim_supp == 0 + np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,)) + assert x.distribution.ndim_ind == 1 + np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2)) + assert x.distribution.ndim_reps == 2 + + with Model() as model: + # Test broadcasting among the independent terms. + x = Normal('x', mu=np.arange(0, 2), tau=1, size=(3, 2)) + + # Check resulting objects' shapes + np.testing.assert_array_equal(x.distribution.shape, (3, 2, 2)) + np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2, 2)) + np.testing.assert_array_equal(np.shape(x.random()), (3, 2, 2)) + + # Check shape info + np.testing.assert_array_equal(x.distribution.bcast, + (False, False, False)) + np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ()) + assert x.distribution.ndim_supp == 0 + np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,)) + assert x.distribution.ndim_ind == 1 + np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2)) + assert x.distribution.ndim_reps == 2 + + with Model() as model: + # Test broadcasting among the independent terms, where independent + # terms are determined by a non-default test value parameter. + x = Normal('x', mu=0, tau=np.r_[1, 2], size=(3, 2)) + + # Check resulting objects' shapes + np.testing.assert_array_equal(x.distribution.shape, (3, 2, 2)) + np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2, 2)) + np.testing.assert_array_equal(np.shape(x.random()), (3, 2, 2)) + + # Check shape info + np.testing.assert_array_equal(x.distribution.bcast, + (False, False, False)) + np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ()) + assert x.distribution.ndim_supp == 0 + np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,)) + assert x.distribution.ndim_ind == 1 + np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2)) + assert x.distribution.ndim_reps == 2 + + +# TODO +# def test_multivariate_shape_info(): +# with Model() as model: +# x = MvNormal('x', np.ones((3,)), np.zeros((3, 3))) +# +# with Model() as model: +# x = MvNormal('x', np.ones((3,)), np.zeros((3,)), size=(3, 2)) +# +# with Model() as model: +# x = MvNormal('x', np.ones((3, 2)), np.zeros((3, 3))) +# +# with Model() as model: +# x = Dirichlet('x', np.r_[1, 1, 1], size=(3, 2)) +# +# with Model() as model: +# x = Multinomial('x', 2, np.r_[0.5, 0.5], size=(3, 2)) +# +# with Model() as model: +# x = Multinomial('x', np.r_[2, 2], np.r_[0.5, 0.5]) +# +# with Model() as model: +# x = Multinomial('x', 2, np.r_[[0.5, 0.5], [0.1, 0.9]]) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 7f18ea0958d..d81cdefd68c 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -118,19 +118,19 @@ def build_model(distfam, valuedomain, vardomains, extra_args={}): def integrate_nd(f, domain, shape, dtype): - if shape == () or shape == (1,): + if np.array_equal(shape, ()) or np.array_equal(shape, (1,)): if dtype in continuous_types: return integrate.quad(f, domain.lower, domain.upper, epsabs=1e-8)[0] else: return sum(f(j) for j in range(domain.lower, domain.upper + 1)) - elif shape == (2,): + elif np.array_equal(shape, (2,)): def f2(a, b): return f([a, b]) return integrate.dblquad(f2, domain.lower[0], domain.upper[0], lambda _: domain.lower[1], lambda _: domain.upper[1])[0] - elif shape == (3,): + elif np.array_equal(shape, (3,)): def f3(a, b, c): return f([a, b, c]) diff --git a/pymc3/variational/advi.py b/pymc3/variational/advi.py index 67889529b0c..879df2d72ee 100644 --- a/pymc3/variational/advi.py +++ b/pymc3/variational/advi.py @@ -208,11 +208,11 @@ def logp_(input): r = MRG_RandomStreams(seed=random_seed) if n_mcsamples == 1: - n = r.normal(size=inarray.tag.test_value.shape) + n = r.normal(size=np.shape(inarray.tag.test_value)) q = n * tt.exp(w) + u elbo = logp_(q) + tt.sum(w) + 0.5 * l * (1 + tt.log(2.0 * np.pi)) else: - n = r.normal(size=(n_mcsamples, u.tag.test_value.shape[0])) + n = r.normal(size=(n_mcsamples, np.shape(u.tag.test_value)[0])) qs = n * tt.exp(w) + u logps, _ = theano.scan(fn=lambda q: logp_(q), outputs_info=None, @@ -255,7 +255,7 @@ def optimizer(loss, param): i_int = i.astype('int64') value = param_.get_value(borrow=True) accu = theano.shared( - np.zeros(value.shape + (n_win,), dtype=value.dtype)) + np.zeros(np.shape(value) + (n_win,), dtype=value.dtype)) grad = tt.grad(loss, param_) # Append squared gradient vector to accu_new @@ -324,17 +324,17 @@ def rvs(x): for v in global_RVs: u = theano.shared(vparams['means'][str(v)]).ravel() w = theano.shared(vparams['stds'][str(v)]).ravel() - n = r.normal(size=u.tag.test_value.shape) - updates.update({v: (n * w + u).reshape(v.tag.test_value.shape)}) + n = r.normal(size=np.shape(u.tag.test_value)) + updates.update({v: (n * w + u).reshape(np.shape(v.tag.test_value))}) if local_RVs is not None: for v_, (uw, _) in local_RVs.items(): v = get_transformed(v_) u = uw[0].ravel() w = uw[1].ravel() - n = r.normal(size=u.tag.test_value.shape) + n = r.normal(size=np.shape(u.tag.test_value)) updates.update( - {v: (n * tt.exp(w) + u).reshape(v.tag.test_value.shape)}) + {v: (n * tt.exp(w) + u).reshape(np.shape(v.tag.test_value))}) # Replace some nodes of the graph with variational distributions vars = model.free_RVs