From 884f5142140e073eda5e4935db97efb29b16ce3b Mon Sep 17 00:00:00 2001 From: brandon willard Date: Sat, 21 May 2016 19:23:00 -0500 Subject: [PATCH] initial distribution shape refactoring --- pymc3/distributions/continuous.py | 294 ++++++++++++++++------------ pymc3/distributions/discrete.py | 123 +++++++----- pymc3/distributions/distribution.py | 88 ++++++--- pymc3/distributions/multivariate.py | 156 ++++++++------- pymc3/model.py | 21 +- 5 files changed, 388 insertions(+), 294 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 23c732ebf2e..e4a2968dccd 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -8,7 +8,7 @@ from __future__ import division import numpy as np -import theano.tensor as T +import theano.tensor as tt from scipy import stats from . import transforms @@ -26,7 +26,8 @@ class PositiveUnivariateContinuous(UnivariateContinuous): def __init__(self, *args, **kwargs): transform = kwargs.get('transform', transforms.log) - super(PositiveUnivariateContinuous, self).__init__(transform=transform, *args, **kwargs) + super(PositiveUnivariateContinuous, self).__init__(transform=transform, + *args, **kwargs) class UnitUnivariateContinuous(UnivariateContinuous): @@ -34,7 +35,8 @@ class UnitUnivariateContinuous(UnivariateContinuous): def __init__(self, *args, **kwargs): transform = kwargs.get('transform', transforms.logodds) - super(UnitUnivariateContinuous, self).__init__(transform=transform, *args, **kwargs) + super(UnitUnivariateContinuous, self).__init__(transform=transform, + *args, **kwargs) def get_tau_sd(tau=None, sd=None): @@ -75,7 +77,7 @@ def get_tau_sd(tau=None, sd=None): tau = 1. * tau sd = 1. * sd - return (T.as_tensor_variable(tau), T.as_tensor_variable(sd)) + return (tt.as_tensor_variable(tau), tt.as_tensor_variable(sd)) class Uniform(UnivariateContinuous): @@ -100,17 +102,19 @@ class Uniform(UnivariateContinuous): Upper limit. """ - def __init__(self, lower=0, upper=1, transform='interval', size=None, ndim=None, dtype=None, *args, **kwargs): + def __init__(self, lower=0, upper=1, transform='interval', size=None, + ndim=None, dtype=None, *args, **kwargs): - lower = T.as_tensor_variable(lower) - upper = T.as_tensor_variable(upper) + lower = tt.as_tensor_variable(lower) + upper = tt.as_tensor_variable(upper) self.dist_params = (lower, upper) self.lower = lower self.upper = upper self.mean = (upper + lower) / 2. self.median = self.mean - super(Uniform, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Uniform, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) if transform == 'interval': self.transform = transforms.interval(lower, upper) @@ -126,7 +130,7 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): lower = self.lower upper = self.upper - return bound(-T.log(upper - lower), + return bound(-tt.log(upper - lower), value >= lower, value <= upper) @@ -138,16 +142,17 @@ class Flat(UnivariateContinuous): def __init__(self, ndim=None, size=None, dtype=None, *args, **kwargs): - self.median = T.as_tensor_variable(0.) + self.median = tt.as_tensor_variable(0.) self.dist_params = (self.median,) - super(Flat, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Flat, self).__init__(self.dist_params, ndim, size, dtype, *args, + **kwargs) def random(self, point=None, size=None, repeat=None): raise ValueError('Cannot sample from Flat distribution') def logp(self, value): - return T.zeros_like(value) + return tt.zeros_like(value) class Normal(UnivariateContinuous): @@ -184,16 +189,18 @@ class Normal(UnivariateContinuous): Standard deviation (sd > 0). """ - def __init__(self, mu=0.0, tau=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs): + def __init__(self, mu=0.0, tau=None, sd=None, ndim=None, size=None, + dtype=None, *args, **kwargs): - mu = T.as_tensor_variable(mu) + mu = tt.as_tensor_variable(mu) self.mean = self.median = self.mode = self.mu = mu self.tau, self.sd = get_tau_sd(tau=tau, sd=sd) self.variance = 1. / self.tau self.dist_params = (self.mu, self.tau) - super(Normal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Normal, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def random(self, point=None, size=None, repeat=None): mu, tau, sd = draw_values([self.mu, self.tau, self.sd], @@ -206,7 +213,7 @@ def logp(self, value): tau = self.tau sd = self.sd mu = self.mu - return bound((-tau * (value - mu)**2 + T.log(tau / np.pi / 2.)) / 2., + return bound((-tau * (value - mu)**2 + tt.log(tau / np.pi / 2.)) / 2., tau > 0, sd > 0) @@ -234,15 +241,17 @@ class HalfNormal(PositiveUnivariateContinuous): Standard deviation (sd > 0). """ - def __init__(self, tau=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs): + def __init__(self, tau=None, sd=None, ndim=None, size=None, dtype=None, + *args, **kwargs): self.tau, self.sd = get_tau_sd(tau=tau, sd=sd) - self.mean = T.sqrt(2 / (np.pi * self.tau)) + self.mean = tt.sqrt(2 / (np.pi * self.tau)) self.variance = (1. - 2 / np.pi) / self.tau self.dist_params = (self.tau,) - super(HalfNormal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(HalfNormal, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def random(self, point=None, size=None, repeat=None): tau = draw_values([self.tau], point=point) @@ -253,7 +262,7 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): tau = self.tau sd = self.sd - return bound(-0.5 * tau * value**2 + 0.5 * T.log(tau * 2. / np.pi), + return bound(-0.5 * tau * value**2 + 0.5 * tt.log(tau * 2. / np.pi), value >= 0, tau > 0, sd > 0) @@ -314,18 +323,20 @@ class Wald(PositiveUnivariateContinuous): The American Statistician, Vol. 30, No. 2, pp. 88-90 """ - def __init__(self, mu=None, lam=None, phi=None, alpha=0., ndim=None, size=None, dtype=None, *args, **kwargs): + def __init__(self, mu=None, lam=None, phi=None, alpha=0., ndim=None, + size=None, dtype=None, *args, **kwargs): self.mu, self.lam, self.phi = self.get_mu_lam_phi(mu, lam, phi) self.alpha = alpha self.mean = self.mu + alpha - self.mode = self.mu * (T.sqrt(1. + (1.5 * self.mu / self.lam)**2) + self.mode = self.mu * (tt.sqrt(1. + (1.5 * self.mu / self.lam)**2) - 1.5 * self.mu / self.lam) + alpha self.variance = (self.mu**3) / self.lam self.dist_params = (self.mu, self.lam, self.alpha) - super(Wald, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Wald, self).__init__(self.dist_params, ndim, size, dtype, *args, + **kwargs) def get_mu_lam_phi(self, mu, lam, phi): res = None @@ -345,7 +356,7 @@ def get_mu_lam_phi(self, mu, lam, phi): if res is None: raise ValueError('Wald distribution must specify either mu only, ' 'mu and lam, mu and phi, or lam and phi.') - return map(T.as_tensor_variable, res) + return map(tt.as_tensor_variable, res) def _random(self, mu, lam, alpha, size=None): v = np.random.normal(size=size)**2 @@ -369,13 +380,12 @@ def logp(self, value): lam = self.lam alpha = self.alpha # value *must* be iid. Otherwise this is wrong. - return bound(logpow(lam / (2. * np.pi), 0.5) - - logpow(value - alpha, 1.5) - - (0.5 * lam / (value - alpha) - * ((value - alpha - mu) / mu)**2), + return bound(logpow(lam / (2. * np.pi), 0.5) - + logpow(value - alpha, 1.5) - + (0.5 * lam / (value - alpha) * ((value - alpha - mu) / + mu)**2), # XXX these two are redundant. Please, check. - value > 0, value - alpha > 0, - mu > 0, lam > 0, alpha >= 0) + value > 0, value - alpha > 0, mu > 0, lam > 0, alpha >= 0) class Beta(UnitUnivariateContinuous): @@ -421,7 +431,8 @@ class Beta(UnitUnivariateContinuous): the binomial distribution. """ - def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs): + def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, + size=None, dtype=None, *args, **kwargs): alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd) self.alpha = alpha self.beta = beta @@ -431,7 +442,8 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, size=None self.dist_params = (self.alpha, self.beta) - super(Beta, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Beta, self).__init__(self.dist_params, ndim, size, dtype, *args, + **kwargs) def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None): if (alpha is not None) and (beta is not None): @@ -444,7 +456,7 @@ def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None): raise ValueError('Incompatible parameterization. Either use alpha ' 'and beta, or mu and sd to specify distribution.') - return T.as_tensor_variable(alpha), T.as_tensor_variable(beta) + return tt.as_tensor_variable(alpha), tt.as_tensor_variable(beta) def random(self, point=None, size=None, repeat=None): alpha, beta = draw_values([self.alpha, self.beta], @@ -457,9 +469,8 @@ def logp(self, value): alpha = self.alpha beta = self.beta - return bound(logpow(value, alpha - 1) + logpow(1 - value, beta - 1) - - betaln(alpha, beta), - value >= 0, value <= 1, + return bound(logpow(value, alpha - 1) + logpow(1 - value, beta - 1) - + betaln(alpha, beta), value >= 0, value <= 1, alpha > 0, beta > 0) @@ -486,14 +497,15 @@ class Exponential(PositiveUnivariateContinuous): def __init__(self, lam, ndim=None, size=None, dtype=None, *args, **kwargs): self.lam = lam self.mean = 1. / lam - self.median = self.mean * T.log(2) + self.median = self.mean * tt.log(2) self.mode = 0 self.dist_params = (self.lam,) self.variance = lam**-2 - super(Exponential, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Exponential, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def random(self, point=None, size=None, repeat=None): lam = draw_values([self.lam], point=point) @@ -503,7 +515,7 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): lam = self.lam - return bound(T.log(lam) - lam * value, value > 0, lam > 0) + return bound(tt.log(lam) - lam * value, value > 0, lam > 0) class Laplace(UnivariateContinuous): @@ -529,15 +541,17 @@ class Laplace(UnivariateContinuous): Scale parameter (b > 0). """ - def __init__(self, mu, b, ndim=None, size=None, dtype=None, *args, **kwargs): - self.b = T.as_tensor_variable(b) - self.mean = self.median = self.mode = self.mu = T.as_tensor_variable( + def __init__(self, mu, b, ndim=None, size=None, dtype=None, *args, + **kwargs): + self.b = tt.as_tensor_variable(b) + self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable( mu) self.variance = 2 * b**2 self.dist_params = (self.b, self.mu) - super(Laplace, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Laplace, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def random(self, point=None, size=None, repeat=None): mu, b = draw_values([self.mu, self.b], point=point) @@ -549,7 +563,7 @@ def logp(self, value): mu = self.mu b = self.b - return -T.log(2 * b) - abs(value - mu) / b + return -tt.log(2 * b) - abs(value - mu) / b class Lognormal(PositiveUnivariateContinuous): @@ -581,18 +595,20 @@ class Lognormal(PositiveUnivariateContinuous): Scale parameter (tau > 0). """ - def __init__(self, mu=0, tau=1, ndim=None, size=None, dtype=None, *args, **kwargs): + def __init__(self, mu=0, tau=1, ndim=None, size=None, dtype=None, *args, + **kwargs): - self.mu = T.as_tensor_variable(mu) - self.tau = T.as_tensor_variable(tau) - self.mean = T.exp(mu + 1. / (2. * tau)) - self.median = T.exp(mu) - self.mode = T.exp(mu - 1. / tau) - self.variance = (T.exp(1. / tau) - 1.) * T.exp(2 * mu + 1. / tau) + self.mu = tt.as_tensor_variable(mu) + self.tau = tt.as_tensor_variable(tau) + self.mean = tt.exp(mu + 1. / (2. * tau)) + self.median = tt.exp(mu) + self.mode = tt.exp(mu - 1. / tau) + self.variance = (tt.exp(1. / tau) - 1.) * tt.exp(2 * mu + 1. / tau) self.dist_params = (self.mu, self.tau) - super(Lognormal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Lognormal, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def _random(self, mu, tau, size=None): samples = np.random.normal(size=size) @@ -607,10 +623,9 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): mu = self.mu tau = self.tau - return bound(-0.5 * tau * (T.log(value) - mu)**2 - + 0.5 * T.log(tau / (2. * np.pi)) - - T.log(value), - tau > 0) + return bound(-0.5 * tau * (tt.log(value) - mu)**2 + + 0.5 * tt.log(tau / (2. * np.pi)) - + tt.log(value), tau > 0) class StudentT(UnivariateContinuous): @@ -642,18 +657,20 @@ class StudentT(UnivariateContinuous): Scale parameter (lam > 0). """ - def __init__(self, nu, mu=0, lam=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs): - self.nu = nu = T.as_tensor_variable(nu) + def __init__(self, nu, mu=0, lam=None, sd=None, ndim=None, size=None, + dtype=None, *args, **kwargs): + self.nu = nu = tt.as_tensor_variable(nu) self.lam, self.sd = get_tau_sd(tau=lam, sd=sd) self.mean = self.median = self.mode = self.mu = mu - self.variance = T.switch((nu > 2) * 1, - (1 / self.lam) * (nu / (nu - 2)), - np.inf) + self.variance = tt.switch((nu > 2) * 1, + (1 / self.lam) * (nu / (nu - 2)), + np.inf) self.dist_params = (self.nu, self.mu, self.lam) - super(StudentT, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(StudentT, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def random(self, point=None, size=None, repeat=None): nu, mu, lam = draw_values([self.nu, self.mu, self.lam], @@ -668,10 +685,10 @@ def logp(self, value): lam = self.lam sd = self.sd - return bound(gammaln((nu + 1.0) / 2.0) - + .5 * T.log(lam / (nu * np.pi)) - - gammaln(nu / 2.0) - - (nu + 1.0) / 2.0 * T.log1p(lam * (value - mu)**2 / nu), + return bound(gammaln((nu + 1.0) / 2.0) + + .5 * tt.log(lam / (nu * np.pi)) - + gammaln(nu / 2.0) - + (nu + 1.0) / 2.0 * tt.log1p(lam * (value - mu)**2 / nu), lam > 0, nu > 0, sd > 0) @@ -701,19 +718,22 @@ class Pareto(PositiveUnivariateContinuous): Scale parameter (m > 0). """ - def __init__(self, alpha, m, ndim=None, size=None, dtype=None, *args, **kwargs): - self.alpha = T.as_tensor_variable(alpha) - self.m = T.as_tensor_variable(m) - self.mean = T.switch(T.gt(alpha, 1), alpha * m / (alpha - 1.), np.inf) + def __init__(self, alpha, m, ndim=None, size=None, dtype=None, *args, + **kwargs): + self.alpha = tt.as_tensor_variable(alpha) + self.m = tt.as_tensor_variable(m) + self.mean = tt.switch(tt.gt(alpha, 1), alpha * m / (alpha - 1.), + np.inf) self.median = m * 2.**(1. / alpha) - self.variance = T.switch( - T.gt(alpha, 2), + self.variance = tt.switch( + tt.gt(alpha, 2), (alpha * m**2) / ((alpha - 2.) * (alpha - 1.)**2), np.inf) self.dist_params = (self.alpha, self.m) - super(Pareto, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Pareto, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def _random(self, alpha, m, size=None): u = np.random.uniform(size=size) @@ -729,8 +749,8 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): alpha = self.alpha m = self.m - return bound(T.log(alpha) + logpow(m, alpha) - - logpow(value, alpha + 1), + return bound(tt.log(alpha) + logpow(m, alpha) - + logpow(value, alpha + 1), value >= m, alpha > 0, m > 0) @@ -760,13 +780,15 @@ class Cauchy(UnivariateContinuous): Scale parameter > 0 """ - def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, **kwargs): - self.median = self.mode = self.alpha = T.as_tensor_variable(alpha) - self.beta = T.as_tensor_variable(beta) + def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, + **kwargs): + self.median = self.mode = self.alpha = tt.as_tensor_variable(alpha) + self.beta = tt.as_tensor_variable(beta) self.dist_params = (self.alpha, self.beta) - super(Cauchy, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Cauchy, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def _random(self, alpha, beta, size=None): u = np.random.uniform(size=size) @@ -782,8 +804,8 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): alpha = self.alpha beta = self.beta - return bound(- T.log(np.pi) - T.log(beta) - - T.log1p(((value - alpha) / beta)**2), + return bound(- tt.log(np.pi) - tt.log(beta) - + tt.log1p(((value - alpha) / beta)**2), beta > 0) @@ -808,14 +830,16 @@ class HalfCauchy(PositiveUnivariateContinuous): Scale parameter (beta > 0). """ - def __init__(self, beta, ndim=None, size=None, dtype=None, *args, **kwargs): - self.mode = T.as_tensor_variable(0.) - self.beta = T.as_tensor_variable(beta) + def __init__(self, beta, ndim=None, size=None, dtype=None, *args, + **kwargs): + self.mode = tt.as_tensor_variable(0.) + self.beta = tt.as_tensor_variable(beta) self.median = beta self.dist_params = (self.beta,) - super(HalfCauchy, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(HalfCauchy, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def _random(self, beta, size=None): u = np.random.uniform(size=size) @@ -829,8 +853,8 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): beta = self.beta - return bound(T.log(2) - T.log(np.pi) - T.log(beta) - - T.log1p((value / beta)**2), + return bound(tt.log(2) - tt.log(np.pi) - tt.log(beta) - + tt.log1p((value / beta)**2), value >= 0, beta > 0) @@ -873,17 +897,19 @@ class Gamma(PositiveUnivariateContinuous): Alternative scale parameter (sd > 0). """ - def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs): + def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, + size=None, dtype=None, *args, **kwargs): alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd) self.alpha = alpha self.beta = beta self.mean = alpha / beta - self.mode = T.maximum((alpha - 1) / beta, 0) + self.mode = tt.maximum((alpha - 1) / beta, 0) self.variance = alpha / beta**2 self.dist_params = (self.alpha, self.beta) - super(Gamma, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Gamma, self).__init__(self.dist_params, ndim, size, dtype, *args, + **kwargs) def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None): if (alpha is not None) and (beta is not None): @@ -896,7 +922,7 @@ def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None): 'alpha and beta, or mu and sd to specify ' 'distribution.') - return T.as_tensor_variable(alpha), T.as_tensor_variable(beta) + return tt.as_tensor_variable(alpha), tt.as_tensor_variable(beta) def random(self, point=None, size=None, repeat=None): alpha, beta = draw_values([self.alpha, self.beta], @@ -942,18 +968,20 @@ class InverseGamma(PositiveUnivariateContinuous): Scale parameter (beta > 0). """ - def __init__(self, alpha, beta=1., ndim=None, size=None, dtype=None, *args, **kwargs): - self.alpha = T.as_tensor_variable(alpha) - self.beta = T.as_tensor_variable(beta) + def __init__(self, alpha, beta=1., ndim=None, size=None, dtype=None, *args, + **kwargs): + self.alpha = tt.as_tensor_variable(alpha) + self.beta = tt.as_tensor_variable(beta) self.mean = (alpha > 1) * beta / (alpha - 1.) or np.inf self.mode = beta / (alpha + 1.) - self.variance = T.switch(T.gt(alpha, 2), - (beta**2) / (alpha * (alpha - 1.)**2), - np.inf) + self.variance = tt.switch(tt.gt(alpha, 2), + (beta**2) / (alpha * (alpha - 1.)**2), + np.inf) self.dist_params = (self.alpha, self.beta) - super(InverseGamma, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(InverseGamma, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def random(self, point=None, size=None, repeat=None): alpha, beta = draw_values([self.alpha, self.beta], @@ -965,8 +993,8 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): alpha = self.alpha beta = self.beta - return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value - + logpow(value, -alpha - 1), + return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value + + logpow(value, -alpha - 1), value > 0, alpha > 0, beta > 0) @@ -991,8 +1019,10 @@ class ChiSquared(Gamma): """ def __init__(self, nu, *args, **kwargs): - self.nu = T.as_tensor_variable(nu) - super(ChiSquared, self).__init__(alpha=self.nu / 2., beta=T.as_tensor_variable(0.5), *args, **kwargs) + self.nu = tt.as_tensor_variable(nu) + super(ChiSquared, self).__init__(alpha=self.nu / 2., + beta=tt.as_tensor_variable(0.5), + *args, **kwargs) class Weibull(PositiveUnivariateContinuous): @@ -1019,17 +1049,19 @@ class Weibull(PositiveUnivariateContinuous): Scale parameter (beta > 0). """ - def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, **kwargs): - self.alpha = T.as_tensor_variable(alpha) - self.beta = T.as_tensor_variable(beta) - self.mean = beta * T.exp(gammaln(1 + 1. / alpha)) - self.median = beta * T.exp(gammaln(T.log(2)))**(1. / alpha) + def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, + **kwargs): + self.alpha = tt.as_tensor_variable(alpha) + self.beta = tt.as_tensor_variable(beta) + self.mean = beta * tt.exp(gammaln(1 + 1. / alpha)) + self.median = beta * tt.exp(gammaln(tt.log(2)))**(1. / alpha) self.variance = (beta**2) * \ - T.exp(gammaln(1 + 2. / alpha - self.mean**2)) + tt.exp(gammaln(1 + 2. / alpha - self.mean**2)) self.dist_params = (self.alpha, self.beta) - super(Weibull, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Weibull, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def random(self, point=None, size=None, repeat=None): alpha, beta = draw_values([self.alpha, self.beta], @@ -1045,9 +1077,9 @@ def _random(a, b, size=None): def logp(self, value): alpha = self.alpha beta = self.beta - return bound(T.log(alpha) - T.log(beta) - + (alpha - 1) * T.log(value / beta) - - (value / beta)**alpha, + return bound(tt.log(alpha) - tt.log(beta) + + (alpha - 1) * tt.log(value / beta) - + (value / beta)**alpha, value >= 0, alpha > 0, beta > 0) @@ -1155,16 +1187,18 @@ class ExGaussian(UnivariateContinuous): Vol. 4, No. 1, pp 35-45. """ - def __init__(self, mu, sigma, nu, ndim=None, size=None, dtype=None, *args, **kwargs): - self.mu = T.as_tensor_variable(mu) - self.sigma = T.as_tensor_variable(sigma) - self.nu = T.as_tensor_variable(nu) + def __init__(self, mu, sigma, nu, ndim=None, size=None, dtype=None, *args, + **kwargs): + self.mu = tt.as_tensor_variable(mu) + self.sigma = tt.as_tensor_variable(sigma) + self.nu = tt.as_tensor_variable(nu) self.mean = mu + nu self.variance = (sigma**2) + (nu**2) self.dist_params = (self.mu, self.sigma, self.nu) - super(ExGaussian, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(ExGaussian, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) def random(self, point=None, size=None, repeat=None): mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu], @@ -1184,11 +1218,12 @@ def logp(self, value): nu = self.nu # This condition suggested by exGAUS.R from gamlss - lp = T.switch(T.gt(nu, 0.05 * sigma), - - T.log(nu) + (mu - value) / nu + 0.5 * (sigma / nu)**2 - + logpow(std_cdf((value - mu) / sigma - sigma / nu), 1.), - - T.log(sigma * T.sqrt(2 * np.pi)) - - 0.5 * ((value - mu) / sigma)**2) + lp = tt.switch(tt.gt(nu, 0.05 * sigma), + -tt.log(nu) + (mu - value) / nu + + 0.5 * (sigma / nu)**2 + + logpow(std_cdf((value - mu) / sigma - sigma / nu), 1.), + -tt.log(sigma * tt.sqrt(2 * np.pi)) + + 0.5 * ((value - mu) / sigma)**2) return bound(lp, sigma > 0., nu > 0.) @@ -1214,15 +1249,16 @@ class VonMises(UnivariateContinuous): Concentration (\frac{1}{kappa} is analogous to \sigma^2). """ - def __init__(self, mu=0.0, kappa=None, transform='circular', ndim=None, size=None, dtype=None, *args, **kwargs): - self.mean = self.median = self.mode = self.mu = T.as_tensor_variable( - mu) - self.kappa = T.as_tensor_variable(kappa) + def __init__(self, mu=0.0, kappa=None, transform='circular', ndim=None, + size=None, dtype=None, *args, **kwargs): + self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(mu) + self.kappa = tt.as_tensor_variable(kappa) self.variance = 1. - i1(kappa) / i0(kappa) self.dist_params = (self.mu, self.kappa) - super(VonMises, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(VonMises, self).__init__(self.dist_params, ndim, size, dtype, + *args, **kwargs) if transform == 'circular': self.transform = transforms.Circular() @@ -1237,4 +1273,6 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): mu = self.mu kappa = self.kappa - return bound(kappa * T.cos(mu - value) - T.log(2 * np.pi * i0(kappa)), value >= -np.pi, value <= np.pi, kappa >= 0) + return bound(kappa * tt.cos(mu - value) - + tt.log(2 * np.pi * i0(kappa)), + value >= -np.pi, value <= np.pi, kappa >= 0) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 1015208c768..bfc62ebd18b 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -2,7 +2,7 @@ import numpy as np import theano -import theano.tensor as T +import theano.tensor as tt from scipy import stats from .dist_math import bound, factln, binomln, betaln, logpow @@ -36,14 +36,16 @@ class Binomial(UnivariateDiscrete): p : float Probability of success in each trial (0 < p < 1). """ + def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs): - self.n = T.as_tensor_variable(n) - self.p = T.as_tensor_variable(p) - self.mode = T.cast(T.round(n * p), self.dtype) + self.n = tt.as_tensor_variable(n) + self.p = tt.as_tensor_variable(p) + self.mode = tt.cast(tt.round(n * p), self.dtype) self.dist_params = (self.n, self.p) - super(Binomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Binomial, self).__init__( + self.dist_params, ndim, size, dtype, *args, **kwargs) def random(self, point=None, size=None, repeat=None): n, p = draw_values([self.n, self.p], point=point) @@ -89,15 +91,17 @@ class BetaBinomial(UnivariateDiscrete): beta : float beta > 0. """ + def __init__(self, alpha, beta, n, ndim=None, size=None, dtype=None, *args, **kwargs): - self.alpha = T.as_tensor_variable(alpha) - self.beta = T.as_tensor_variable(beta) - self.n = T.as_tensor_variable(n) - self.mode = T.cast(T.round(alpha / (alpha + beta)), 'int8') + self.alpha = tt.as_tensor_variable(alpha) + self.beta = tt.as_tensor_variable(beta) + self.n = tt.as_tensor_variable(n) + self.mode = tt.cast(tt.round(alpha / (alpha + beta)), 'int8') self.dist_params = (self.alpha, self.beta, self.n) - super(BetaBinomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(BetaBinomial, self).__init__( + self.dist_params, ndim, size, dtype, *args, **kwargs) def _random(self, alpha, beta, n, size=None): size = size or 1 @@ -147,13 +151,15 @@ class Bernoulli(UnivariateDiscrete): p : float Probability of success (0 < p < 1). """ + def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs): - self.p = T.as_tensor_variable(p) - self.mode = T.cast(T.round(p), 'int8') + self.p = tt.as_tensor_variable(p) + self.mode = tt.cast(tt.round(p), 'int8') self.dist_params = (self.p,) # FIXME: bcast, etc. - super(Bernoulli, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Bernoulli, self).__init__( + self.dist_params, ndim, size, dtype, *args, **kwargs) def random(self, point=None, size=None, repeat=None): p = draw_values([self.p], point=point) @@ -164,7 +170,7 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): p = self.p return bound( - T.switch(value, T.log(p), T.log(1 - p)), + tt.switch(value, tt.log(p), tt.log(1 - p)), value >= 0, value <= 1, p >= 0, p <= 1) @@ -195,12 +201,14 @@ class Poisson(UnivariateDiscrete): The Poisson distribution can be derived as a limiting case of the binomial distribution. """ + def __init__(self, mu, ndim=None, size=None, dtype=None, *args, **kwargs): - self.mu = T.as_tensor_variable(mu) - self.mode = T.floor(mu).astype('int32') + self.mu = tt.as_tensor_variable(mu) + self.mode = tt.floor(mu).astype('int32') self.dist_params = (self.mu,) - super(Poisson, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Poisson, self).__init__(self.dist_params, + ndim, size, dtype, *args, **kwargs) def random(self, point=None, size=None, repeat=None): mu = draw_values([self.mu], point=point) @@ -240,13 +248,15 @@ class NegativeBinomial(UnivariateDiscrete): alpha : float Gamma distribution parameter (alpha > 0). """ + def __init__(self, mu, alpha, ndim=None, size=None, dtype=None, *args, **kwargs): - self.mu = T.as_tensor_variable(mu) - self.alpha = T.as_tensor_variable(alpha) - self.mode = T.floor(mu).astype('int32') + self.mu = tt.as_tensor_variable(mu) + self.alpha = tt.as_tensor_variable(alpha) + self.mode = tt.floor(mu).astype('int32') self.dist_params = (self.mu, self.alpha) - super(NegativeBinomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(NegativeBinomial, self).__init__( + self.dist_params, ndim, size, dtype, *args, **kwargs) def random(self, point=None, size=None, repeat=None): mu, alpha = draw_values([self.mu, self.alpha], point=point) @@ -265,9 +275,9 @@ def logp(self, value): value >= 0, mu > 0, alpha > 0) # Return Poisson when alpha gets very large. - return T.switch(1 * (alpha > 1e10), - Poisson.dist(self.mu).logp(value), - negbinom) + return tt.switch(1 * (alpha > 1e10), + Poisson.dist(self.mu).logp(value), + negbinom) class Geometric(UnivariateDiscrete): @@ -290,12 +300,14 @@ class Geometric(UnivariateDiscrete): p : float Probability of success on an individual trial (0 < p <= 1). """ + def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs): - self.p = T.as_tensor_variable(p) - self.mode = T.as_tensor_variable(1) + self.p = tt.as_tensor_variable(p) + self.mode = tt.as_tensor_variable(1) self.dist_params = (self.p,) - super(Geometric, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(Geometric, self).__init__( + self.dist_params, ndim, size, dtype, *args, **kwargs) def random(self, point=None, size=None, repeat=None): p = draw_values([self.p], point=point) @@ -305,7 +317,7 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): p = self.p - return bound(T.log(p) + logpow(1 - p, value - 1), + return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1) @@ -328,13 +340,15 @@ class DiscreteUniform(UnivariateDiscrete): upper : int Upper limit (upper > lower). """ + def __init__(self, lower, upper, ndim=None, size=None, dtype=None, *args, **kwargs): - self.lower = T.floor(lower).astype('int32') - self.upper = T.floor(upper).astype('int32') - self.mode = T.floor((upper - lower) / 2.).astype('int32') + self.lower = tt.floor(lower).astype('int32') + self.upper = tt.floor(upper).astype('int32') + self.mode = tt.floor((upper - lower) / 2.).astype('int32') self.dist_params = (self.lower, self.upper) - super(DiscreteUniform, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(DiscreteUniform, self).__init__( + self.dist_params, ndim, size, dtype, *args, **kwargs) def _random(self, lower, upper, size=None): # This way seems to be the only to deal with lower and upper @@ -353,7 +367,7 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): upper = self.upper lower = self.lower - return bound(-T.log(upper - lower + 1), + return bound(-tt.log(upper - lower + 1), lower <= value, value <= upper) @@ -374,16 +388,17 @@ class Categorical(UnivariateDiscrete): p : float p > 0 and the elements of p must sum to 1. """ + def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs): - self.p = T.as_tensor_variable(p) - self.shape_support = T.shape(p)[-1] - self.mode = T.argmax(p) + self.p = tt.as_tensor_variable(p) + self.shape_support = tt.shape(p)[-1] + self.mode = tt.argmax(p) self.dist_params = (self.p,) # FIXME: The stated support is univariate? - super(Categorical, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) - + super(Categorical, self).__init__( + self.dist_params, ndim, size, dtype, *args, **kwargs) def random(self, point=None, size=None, repeat=None): p, k = draw_values([self.p, self.shape_support], point=point) @@ -397,11 +412,12 @@ def logp(self, value): p = self.p k = self.k - sumto1 = theano.gradient.zero_grad(T.le(abs(T.sum(p, axis=-1) - 1), 1e-5)) + sumto1 = theano.gradient.zero_grad( + tt.le(abs(tt.sum(p, axis=-1) - 1), 1e-5)) if p.ndim > 1: - a = T.log(p[T.arange(p.shape[0]), value]) + a = tt.log(p[tt.arange(p.shape[0]), value]) else: - a = T.log(p[value]) + a = tt.log(p[value]) return bound(a, value >= 0, value <= (k - 1), sumto1) @@ -416,10 +432,12 @@ class ConstantDist(UnivariateDiscrete): value : float or int Constant parameter. """ + def __init__(self, c, ndim=None, size=None, dtype=None, *args, **kwargs): - self.mean = self.median = self.mode = self.c = T.as_tensor_variable(c) + self.mean = self.median = self.mode = self.c = tt.as_tensor_variable(c) self.dist_params = (self.c,) - super(ConstantDist, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(ConstantDist, self).__init__( + self.dist_params, ndim, size, dtype, *args, **kwargs) def random(self, point=None, size=None, repeat=None): c = draw_values([self.c], point=point) @@ -433,27 +451,30 @@ def _random(c, dtype=dtype, size=None): def logp(self, value): c = self.c - return bound(0, T.eq(value, c)) + return bound(0, tt.eq(value, c)) class ZeroInflatedPoisson(UnivariateDiscrete): + def __init__(self, theta, z, ndim=None, size=None, dtype=None, *args, **kwargs): - self.theta = T.as_tensor_variable(theta) - self.z = T.as_tensor_variable(z) + self.theta = tt.as_tensor_variable(theta) + self.z = tt.as_tensor_variable(z) self.pois = Poisson.dist(theta) self.const = ConstantDist.dist(0) self.mode = self.pois.mode self.dist_params = (self.theta, self.z) - super(ZeroInflatedPoisson, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs) + super(ZeroInflatedPoisson, self).__init__( + self.dist_params, ndim, size, dtype, *args, **kwargs) def random(self, point=None, size=None, repeat=None): #theta = draw_values([self.theta], point=point) # TODO: Finish me - raise NotImplementedError("zero inflated poisson sampling not implemented!") + raise NotImplementedError( + "zero inflated poisson sampling not implemented!") def logp(self, value): - return T.switch(self.z, - self.pois.logp(value), - self.const.logp(value)) + return tt.switch(self.z, + self.pois.logp(value), + self.const.logp(value)) diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index d8ac0eb044e..68e7cab8f4c 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -1,5 +1,5 @@ import numpy as np -import theano.tensor as T +import theano.tensor as tt from theano import function from theano.tensor.raw_random import _infer_ndim_bcast @@ -7,9 +7,12 @@ from ..model import Model, get_named_nodes -__all__ = ['DensityDist', 'Distribution', - 'Continuous', 'Discrete', 'MultivariateContinuous', 'UnivariateContinuous', - 'MultivariateDiscrete', 'UnivariateDiscrete', 'NoDistribution'] +#__all__ = ['DensityDist', 'Distribution', +# 'Continuous', 'Discrete', 'MultivariateContinuous', 'UnivariateContinuous', +# 'MultivariateDiscrete', 'UnivariateDiscrete', 'NoDistribution'] + +__all__ = ['DensityDist', 'Distribution', 'Continuous', + 'Discrete', 'NoDistribution', 'TensorType'] def _as_tensor_shape_variable(var): @@ -17,13 +20,13 @@ def _as_tensor_shape_variable(var): `_infer_ndim_bcast` """ if var is None: - return T.constant([], dtype='int64') + return tt.constant([], dtype='int64') res = var if isinstance(res, (tuple, list)): if len(res) == 0: - return T.constant([], dtype='int64') - res = T.as_tensor_variable(res, ndim=1) + return tt.constant([], dtype='int64') + res = tt.as_tensor_variable(res, ndim=1) else: if res.ndim != 1: @@ -106,10 +109,10 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, Here are a few ways of inferring shapes: * When a distribution is scalar, then `shape_supp = ()` - * and has an informative parameter, e.g. `mu`, then `shape_ind = T.shape(mu)`. + * and has an informative parameter, e.g. `mu`, then `shape_ind = tt.shape(mu)`. * When a distribution is multivariate * and has an informative parameter, e.g. `mu`, then - `shape_supp = T.shape(mu)[-ndim_supp:]` and `shape_ind = T.shape(mu)[-ndim_supp:]`. + `shape_supp = tt.shape(mu)[-ndim_supp:]` and `shape_ind = tt.shape(mu)[-ndim_supp:]`. In all remaining cases the shapes must be provided by the caller. `shape_reps` is always provided by the caller. @@ -142,15 +145,15 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, """ self.shape_supp = _as_tensor_shape_variable(shape_supp) - self.ndim_supp = T.get_vector_length(self.shape_supp) + self.ndim_supp = tt.get_vector_length(self.shape_supp) self.shape_ind = _as_tensor_shape_variable(shape_ind) - self.ndim_ind = T.get_vector_length(self.shape_ind) + self.ndim_ind = tt.get_vector_length(self.shape_ind) self.shape_reps = _as_tensor_shape_variable(shape_reps) - self.ndim_reps = T.get_vector_length(self.shape_reps) + self.ndim_reps = tt.get_vector_length(self.shape_reps) ndim_sum = self.ndim_supp + self.ndim_ind + self.ndim_reps if ndim_sum == 0: - self.shape = T.constant([], dtype='int64') + self.shape = tt.constant([], dtype='int64') else: self.shape = tuple(self.shape_reps) +\ tuple(self.shape_ind) +\ @@ -158,16 +161,16 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, if testval is None: if ndim_sum == 0: - testval = T.constant(0, dtype=dtype) + testval = tt.constant(0, dtype=dtype) else: - testval = T.zeros(self.shape) + testval = tt.zeros(self.shape) - self.ndim = T.get_vector_length(self.shape) + self.ndim = tt.get_vector_length(self.shape) self.testval = testval self.defaults = defaults self.transform = transform - self.type = T.TensorType(str(dtype), bcast) + self.type = tt.TensorType(str(dtype), bcast) def default(self): return self.get_test_val(self.testval, self.defaults) @@ -192,11 +195,11 @@ def getattr_value(self, val): if isinstance(val, str): val = getattr(self, val) - if isinstance(val, T.sharedvar.SharedVariable): + if isinstance(val, tt.sharedvar.SharedVariable): return val.get_value() - elif isinstance(val, T.TensorVariable): + elif isinstance(val, tt.TensorVariable): return val.tag.test_value - elif isinstance(val, T.TensorConstant): + elif isinstance(val, tt.TensorConstant): return val.value return val @@ -204,6 +207,22 @@ def getattr_value(self, val): class NoDistribution(Distribution): + def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, + testval=None, defaults=[], transform=None, parent_dist=None, + *args, **kwargs): + super(NoDistribution, self).__init__(shape_supp, shape_ind, shape_reps, + bcast, dtype=dtype, + testval=testval, + defaults=defaults, *args, + **kwargs) + self.parent_dist = parent_dist + + def __getattr__(self, name): + try: + self.__dict__[name] + except KeyError: + return getattr(self.parent_dist, name) + def logp(self, x): return 0 @@ -211,32 +230,38 @@ def logp(self, x): class Discrete(Distribution): """Base class for discrete distributions""" - def __init__(self, ndim_support, ndim, size, bcast, dtype='int64', defaults=['mode'], *args, **kwargs): - super(Discrete, self).__init__(ndim_support, ndim, size, - bcast, dtype, defaults=defaults, *args, **kwargs) + def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype='int64', + defaults=['mode'], *args, **kwargs): + super(Discrete, self).__init__(shape_supp, shape_ind, shape_reps, + bcast, dtype, defaults=defaults, *args, + **kwargs) class Continuous(Distribution): """Base class for continuous distributions""" def __init__(self, shape_supp, shape_ind, shape_reps, bcast, - dtype='float64', defaults=['median', 'mean', 'mode'], *args, **kwargs): + dtype='float64', defaults=['median', 'mean', 'mode'], *args, + **kwargs): super(Continuous, self).__init__(shape_supp, shape_ind, shape_reps, - bcast, dtype, defaults=defaults, *args, **kwargs) + bcast, dtype, defaults=defaults, + *args, **kwargs) class DensityDist(Distribution): """Distribution based on a given log density function.""" - def __init__(self, logp, ndim_support, ndim, size, bcast, dtype='float64', *args, **kwargs): - super(DensityDist, self).__init__( - ndim, size, bcast, dtype, *args, **kwargs) + def __init__(self, logp, ndim_support, ndim, size, bcast, dtype='float64', + *args, **kwargs): + super(DensityDist, self).__init__(ndim, size, bcast, dtype, *args, + **kwargs) self.logp = logp class UnivariateContinuous(Continuous): - def __init__(self, dist_params, ndim=None, size=None, dtype=None, bcast=None, *args, **kwargs): + def __init__(self, dist_params, ndim=None, size=None, dtype=None, + bcast=None, *args, **kwargs): """ Parameters @@ -248,11 +273,12 @@ def __init__(self, dist_params, ndim=None, size=None, dtype=None, bcast=None, *a """ self.shape_supp = () - dist_params = tuple(T.as_tensor_variable(x) for x in dist_params) + dist_params = tuple(tt.as_tensor_variable(x) for x in dist_params) ndim, size, bcast = _infer_ndim_bcast(*(ndim, size) + dist_params) if dtype is None: - dtype = T.scal.upcast(*(T.config.floatX,) + tuple(x.dtype for x in dist_params)) + dtype = tt.scal.upcast(*(tt.config.floatX,) + + tuple(x.dtype for x in dist_params)) # We just assume super(UnivariateContinuous, self).__init__( diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 06271d5b304..e89bebe1f49 100644 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -1,11 +1,11 @@ import warnings import numpy as np -import theano.tensor as T +import theano.tensor as tt import theano from scipy import stats -from theano.tensor.nlinalg import det, matrix_inverse, trace, eigh +from theano.tensor.nlinalg import det, matrix_inverse, trace from . import transforms from .distribution import MultivariateContinuous, MultivariateDiscrete, draw_values, generate_samples @@ -45,9 +45,9 @@ class MvNormal(MultivariateContinuous): """ def __init__(self, mu, tau, ndim=None, size=None, dtype=None, *args, **kwargs): - self.mean = self.median = self.mode = self.mu = T.as_tensor_variable( + self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable( mu) - self.tau = T.as_tensor_variable(tau) + self.tau = tt.as_tensor_variable(tau) self.dist_params = (self.mu, self.tau) @@ -56,15 +56,15 @@ def __init__(self, mu, tau, ndim=None, size=None, dtype=None, *args, **kwargs): shape_ind = self.mu.shape[:-1] if self.mu.ndim > 0: - bcast = (False,) * (1 + T.get_vector_length(shape_ind)) + bcast = (False,) * (1 + tt.get_vector_length(shape_ind)) else: bcast = (False,) if dtype is None: dtype = self.mu.dtype - super(MvNormal, self).__init__(shape_supp, - shape_ind, (), bcast, dtype, *args, **kwargs) + super(MvNormal, self).__init__(shape_supp, shape_ind, (), bcast, dtype, + *args, **kwargs) def random(self, point=None, size=None): @@ -76,13 +76,14 @@ def random(self, point=None, size=None): # def random(rng, point=[], size=None): # # if size is None: - # size = T.constant([], dtype='int64') + # size = tt.constant([], dtype='int64') # type = self.type # else: - # size = T.as_tensor_variable(size) + # size = tt.as_tensor_variable(size) # type = ... # op = RandomFunction('multivariate_normal', type) - # rand_op = lambda random_state: op(random_state, size, self.mu, T.nlinalg.matrix_invers(self.tau)) + # rand_op = lambda random_state: op(random_state, size, self.mu, + # tt.nlinalg.matrix_invers(self.tau)) # rand_sym_res = rng.gen(rand_op) # return function(point, rand_sym_res)() # @@ -118,30 +119,33 @@ def _random(mean, tau, size=None): return samples def logp(self, value): - mu = T.as_tensor_variable(self.mu) - tau = T.as_tensor_variable(self.tau) + mu = tt.as_tensor_variable(self.mu) + tau = tt.as_tensor_variable(self.tau) - reps_shape_prod = T.prod(self.shape_reps, keepdims=True) + reps_shape_prod = tt.prod(self.shape_reps, keepdims=True) # collapse reps dimensions - flat_supp_shape = T.concatenate((reps_shape_prod, self.shape_supp)) + flat_supp_shape = tt.concatenate((reps_shape_prod, self.shape_supp)) mus_collapsed = mu.reshape(flat_supp_shape, ndim=2) - taus_collapsed = tau.reshape(T.concatenate((reps_shape_prod, - self.shape_supp, self.shape_supp)), ndim=3) + taus_collapsed = tau.reshape(tt.concatenate((reps_shape_prod, + self.shape_supp, + self.shape_supp)), ndim=3) # force value to conform to reps_shape - value_reshape = T.ones_like(mu) * value + value_reshape = tt.ones_like(mu) * value values_collapsed = value_reshape.reshape(flat_supp_shape, ndim=2) def single_logl(_mu, _tau, _value, k): delta = _value - _mu - result = k * T.log(2 * np.pi) + T.log(det(_tau)) - result += T.square(delta.dot(_tau)).sum(axis=-1) + result = k * tt.log(2 * np.pi) + tt.log(det(_tau)) + result += tt.square(delta.dot(_tau)).sum(axis=-1) return -result / 2 from theano import scan - res, _ = scan(fn=single_logl, sequences=[mus_collapsed, taus_collapsed, values_collapsed], non_sequences=[self.shape_supp], strict=True - ) + res, _ = scan(fn=single_logl, + sequences=[mus_collapsed, taus_collapsed, + values_collapsed], + non_sequences=[self.shape_supp], strict=True) return res.sum() @@ -176,10 +180,10 @@ class Dirichlet(MultivariateContinuous): """ def __init__(self, a, transform=transforms.stick_breaking, ndim=None, size=None, dtype=None, *args, **kwargs): - self.a = T.as_tensor_variable(a) - self.mean = self.a / T.sum(self.a) - self.mode = T.switch(T.all(self.a > 1), - (self.a - 1) / T.sum(self.a - 1), + self.a = tt.as_tensor_variable(a) + self.mean = self.a / tt.sum(self.a) + self.mode = tt.switch(tt.all(self.a > 1), + (self.a - 1) / tt.sum(self.a - 1), np.nan) self.dist_params = (self.a,) @@ -190,15 +194,16 @@ def __init__(self, a, transform=transforms.stick_breaking, ndim=None, size=None, # FIXME: this isn't correct/ideal if self.a.ndim > 0: - bcast = (False,) * (1 + T.get_vector_length(shape_ind)) + bcast = (False,) * (1 + tt.get_vector_length(shape_ind)) else: bcast = (False,) if dtype is None: dtype = self.mu.dtype - super(Dirichlet, self).__init__(shape_supp, shape_ind, (), - bcast, dtype, transform=transform, *args, **kwargs) + super(Dirichlet, self).__init__(shape_supp, shape_ind, (), bcast, + dtype, transform=transform, *args, + **kwargs) def random(self, point=None, size=None): a = draw_values([self.a], point=point) @@ -216,10 +221,9 @@ def logp(self, value): a = self.a # only defined for sum(value) == 1 - return bound(T.sum(logpow(value, a - 1) - gammaln(a), axis=0) - + gammaln(T.sum(a, axis=0)), - T.all(value >= 0), T.all(value <= 1), - k > 1, T.all(a > 0)) + return bound(tt.sum(logpow(value, a - 1) - gammaln(a), axis=0) + + gammaln(tt.sum(a, axis=0)), tt.all(value >= 0), + tt.all(value <= 1), k > 1, tt.all(a > 0)) class Multinomial(MultivariateDiscrete): @@ -253,12 +257,13 @@ class Multinomial(MultivariateDiscrete): be non-negative and sum to 1. """ - def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs): + def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, + **kwargs): # TODO: allow tensor of independent n's. - self.n = T.as_tensor_variable(n, ndim=0) - self.p = T.as_tensor_variable(p) + self.n = tt.as_tensor_variable(n, ndim=0) + self.p = tt.as_tensor_variable(p) self.mean = n * p - self.mode = T.cast(T.round(n * p), 'int32') + self.mode = tt.cast(tt.round(n * p), 'int32') self.dist_params = (self.n, self.p) @@ -273,8 +278,8 @@ def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs): dtype = self.p.dtype # FIXME: n.shape == p.shape? bcast, etc. - super(Multinomial, self).__init__(shape_supp, - shape_ind, (), bcast, dtype, *args, **kwargs) + super(Multinomial, self).__init__(shape_supp, shape_ind, (), bcast, + dtype, *args, **kwargs) def _random(self, n, p, size=None): if size == p.shape: @@ -293,8 +298,8 @@ def logp(self, x): p = self.p # only defined for sum(p) == 1 return bound( - factln(n) + T.sum(x * T.log(p) - factln(x)), - T.all(x >= 0), T.all(x <= n), T.eq(T.sum(x), n), + factln(n) + tt.sum(x * tt.log(p) - factln(x)), + tt.all(x >= 0), tt.all(x <= n), tt.eq(tt.sum(x), n), n >= 0) @@ -320,7 +325,7 @@ class PosDefMatrix(theano.Op): def make_node(self, x): x = theano.tensor.as_tensor_variable(x) assert x.ndim == 2 - o = T.TensorType(dtype='int8', broadcastable=[])() + o = tt.TensorType(dtype='int8', broadcastable=[])() return theano.Apply(self, [x], [o]) # Python implementation: @@ -381,7 +386,8 @@ class Wishart(MultivariateContinuous): p x p positive definite matrix. """ - def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, **kwargs): + def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, + **kwargs): warnings.warn('The Wishart distribution can currently not be used ' 'for MCMC sampling. The probability of sampling a ' 'symmetric matrix is basically zero. Instead, please ' @@ -390,12 +396,9 @@ def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, **kwargs): 'https://github.com/pymc-devs/pymc3/issues/538.', UserWarning) # TODO: allow tensor of independent n's. - self.n = T.as_tensor_variable(n, ndim=0) - self.V = T.as_tensor_variable(V) - self.mean = n * V - self.mode = T.switch(1 * (n >= self.shape_support + 1), - (n - self.shape_support - 1) * V, - np.nan) + self.n = tt.as_tensor_variable(n, ndim=0) + self.V = tt.as_tensor_variable(V) + self.mean = self.n * self.V self.dist_params = (self.n, self.V) @@ -408,8 +411,12 @@ def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, **kwargs): if dtype is None: dtype = self.V.dtype - super(Wishart, self).__init__(shape_supp, - shape_ind, (), bcast, dtype, *args, **kwargs) + self.mode = tt.switch(1 * (self.n >= self.shape_supp + 1), + (self.n - self.shape_supp - 1) * self.V, + np.nan) + + super(Wishart, self).__init__(shape_supp, shape_ind, (), bcast, dtype, + *args, **kwargs) def logp(self, X): n = self.n @@ -419,12 +426,12 @@ def logp(self, X): IVI = det(V) IXI = det(X) - return bound(((n - p - 1) * T.log(IXI) - - trace(matrix_inverse(V).dot(X)) - - n * p * T.log(2) - n * T.log(IVI) - - 2 * multigammaln(n / 2., p)) / 2, + return bound(((n - p - 1) * tt.log(IXI) - + trace(matrix_inverse(V).dot(X)) - + n * p * tt.log(2) - n * tt.log(IVI) - + 2 * multigammaln(n / 2., p)) / 2, matrix_pos_def(X), - T.eq(X, X.T), + tt.eq(X, X.T), n > (p - 1)) @@ -468,18 +475,19 @@ class LKJCorr(MultivariateContinuous): 100(9), pp.1989-2001. """ - def __init__(self, n, p, *args, **kwargs): + def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, + **kwargs): # TODO: allow tensor of independent n's. - self.n = T.as_tensor_variable(n, ndim=0) - self.p = T.as_tensor_variable(p, ndim=0) + self.n = tt.as_tensor_variable(n, ndim=0) + self.p = tt.as_tensor_variable(p, ndim=0) n_elem = (p * (p - 1) / 2) - self.mean = T.zeros(n_elem) + self.mean = tt.zeros(n_elem) self.shape_support = n_elem # FIXME: triu, bcast, etc. - self.tri_index = T.zeros((p, p), dtype=int) - self.tri_index[T.triu(p, k=1)] = T.arange(n_elem) - self.tri_index[T.triu(p, k=1)[::-1]] = T.arange(n_elem) + self.tri_index = tt.zeros((p, p), dtype=int) + self.tri_index[tt.triu(p, k=1)] = tt.arange(n_elem) + self.tri_index[tt.triu(p, k=1)[::-1]] = tt.arange(n_elem) self.dist_params = (self.n, self.p) @@ -492,24 +500,24 @@ def __init__(self, n, p, *args, **kwargs): if dtype is None: dtype = self.n.dtype - super(LKJCorr, self).__init__(shape_supp, - shape_ind, (), bcast, dtype, *args, **kwargs) + super(LKJCorr, self).__init__(shape_supp, shape_ind, (), bcast, dtype, + *args, **kwargs) def _normalizing_constant(self, n, p): if n == 1: - result = gammaln(2. * T.arange(1, int((p - 1) / 2) + 1)).sum() + result = gammaln(2. * tt.arange(1, int((p - 1) / 2) + 1)).sum() if p % 2 == 1: - result += (0.25 * (p ** 2 - 1) * T.log(np.pi) - - 0.25 * (p - 1) ** 2 * T.log(2.) + result += (0.25 * (p ** 2 - 1) * tt.log(np.pi) + - 0.25 * (p - 1) ** 2 * tt.log(2.) - (p - 1) * gammaln(int((p + 1) / 2))) else: - result += (0.25 * p * (p - 2) * T.log(np.pi) - + 0.25 * (3 * p ** 2 - 4 * p) * T.log(2.) + result += (0.25 * p * (p - 2) * tt.log(np.pi) + + 0.25 * (3 * p ** 2 - 4 * p) * tt.log(2.) + p * gammaln(p / 2) - (p - 1) * gammaln(p)) else: result = -(p - 1) * gammaln(n + 0.5 * (p - 1)) - k = T.arange(1, p) - result += (0.5 * k * T.log(np.pi) + k = tt.arange(1, p) + result += (0.5 * k * tt.log(np.pi) + gammaln(n + 0.5 * (p - 1 - k))).sum() return result @@ -518,11 +526,11 @@ def logp(self, x): p = self.p X = x[self.tri_index] - X = T.fill_diagonal(X, 1) + X = tt.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) - result += (n - 1.) * T.log(det(X)) + result += (n - 1.) * tt.log(det(X)) return bound(result, - T.all(X <= 1), T.all(X >= -1), + tt.all(X <= 1), tt.all(X >= -1), matrix_pos_def(X), n > 0) diff --git a/pymc3/model.py b/pymc3/model.py index cc14d298d6b..06061f7cfb4 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -1,6 +1,6 @@ import numpy as np import theano -import theano.tensor as T +import theano.tensor as tt from theano.tensor.var import TensorVariable from .memoize import memoize @@ -163,7 +163,7 @@ def fastd2logp(self, vars=None): @property def logpt(self): """Theano scalar of log-probability of the model""" - return T.sum(self.logp_elemwiset) + return tt.sum(self.logp_elemwiset) class Model(Context, Factor): @@ -192,14 +192,14 @@ def __init__(self, verbose=1): def logpt(self): """Theano scalar of log-probability of the model""" factors = [var.logpt for var in self.basic_RVs] + self.potentials - return T.add(*map(T.sum, factors)) + return tt.add(*map(tt.sum, factors)) @property def varlogpt(self): """Theano scalar of log-probability of the unobserved random variables (excluding deterministic).""" factors = [var.logpt for var in self.vars] - return T.add(*map(T.sum, factors)) + return tt.add(*map(tt.sum, factors)) @property def vars(self): @@ -477,13 +477,14 @@ def __init__(self, type=None, owner=None, index=None, name=None, if distribution is not None: self.dshape = distribution.shape - self.dsize = T.prod(distribution.shape) + self.dsize = tt.prod(distribution.shape) self.distribution = distribution # TODO: why should we do this? shouldn't the default/test_val # of the distribution be enough? - #dist_test_val = distribution.default() - #self.tag.test_value = np.ones( - # distribution.shape.tag.test_value, distribution.dtype) * dist_test_val + # dist_test_val = distribution.default() + # self.tag.test_value = np.ones( + # distribution.shape.tag.test_value, distribution.dtype) * + # dist_test_val self.tag.test_value = distribution.default() self.logp_elemwiset = distribution.logp(self) self.model = model @@ -522,14 +523,14 @@ def as_tensor(data, name, model, dtype): missing_values = FreeRV(name=name + '_missing', distribution=fakedist, model=model) - constant = T.as_tensor_variable(data.filled()) + constant = tt.as_tensor_variable(data.filled()) dataTensor = theano.tensor.set_subtensor(constant[data.mask.nonzero()], missing_values) dataTensor.missing_values = missing_values return dataTensor else: - data = T.as_tensor_variable(data, name=name) + data = tt.as_tensor_variable(data, name=name) data.missing_values = None return data