From 884f5142140e073eda5e4935db97efb29b16ce3b Mon Sep 17 00:00:00 2001
From: brandon willard <brandonwillard@gmail.com>
Date: Sat, 21 May 2016 19:23:00 -0500
Subject: [PATCH] initial distribution shape refactoring

---
 pymc3/distributions/continuous.py   | 294 ++++++++++++++++------------
 pymc3/distributions/discrete.py     | 123 +++++++-----
 pymc3/distributions/distribution.py |  88 ++++++---
 pymc3/distributions/multivariate.py | 156 ++++++++-------
 pymc3/model.py                      |  21 +-
 5 files changed, 388 insertions(+), 294 deletions(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 23c732ebf2e..e4a2968dccd 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -8,7 +8,7 @@
 from __future__ import division
 
 import numpy as np
-import theano.tensor as T
+import theano.tensor as tt
 from scipy import stats
 
 from . import transforms
@@ -26,7 +26,8 @@ class PositiveUnivariateContinuous(UnivariateContinuous):
 
     def __init__(self, *args, **kwargs):
         transform = kwargs.get('transform', transforms.log)
-        super(PositiveUnivariateContinuous, self).__init__(transform=transform, *args, **kwargs)
+        super(PositiveUnivariateContinuous, self).__init__(transform=transform,
+                                                           *args, **kwargs)
 
 
 class UnitUnivariateContinuous(UnivariateContinuous):
@@ -34,7 +35,8 @@ class UnitUnivariateContinuous(UnivariateContinuous):
 
     def __init__(self, *args, **kwargs):
         transform = kwargs.get('transform', transforms.logodds)
-        super(UnitUnivariateContinuous, self).__init__(transform=transform, *args, **kwargs)
+        super(UnitUnivariateContinuous, self).__init__(transform=transform,
+                                                       *args, **kwargs)
 
 
 def get_tau_sd(tau=None, sd=None):
@@ -75,7 +77,7 @@ def get_tau_sd(tau=None, sd=None):
     tau = 1. * tau
     sd = 1. * sd
 
-    return (T.as_tensor_variable(tau), T.as_tensor_variable(sd))
+    return (tt.as_tensor_variable(tau), tt.as_tensor_variable(sd))
 
 
 class Uniform(UnivariateContinuous):
@@ -100,17 +102,19 @@ class Uniform(UnivariateContinuous):
         Upper limit.
     """
 
-    def __init__(self, lower=0, upper=1, transform='interval', size=None, ndim=None, dtype=None, *args, **kwargs):
+    def __init__(self, lower=0, upper=1, transform='interval', size=None,
+                 ndim=None, dtype=None, *args, **kwargs):
 
-        lower = T.as_tensor_variable(lower)
-        upper = T.as_tensor_variable(upper)
+        lower = tt.as_tensor_variable(lower)
+        upper = tt.as_tensor_variable(upper)
         self.dist_params = (lower, upper)
         self.lower = lower
         self.upper = upper
         self.mean = (upper + lower) / 2.
         self.median = self.mean
 
-        super(Uniform, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Uniform, self).__init__(self.dist_params, ndim, size, dtype,
+                                      *args, **kwargs)
 
         if transform == 'interval':
             self.transform = transforms.interval(lower, upper)
@@ -126,7 +130,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         lower = self.lower
         upper = self.upper
-        return bound(-T.log(upper - lower),
+        return bound(-tt.log(upper - lower),
                      value >= lower, value <= upper)
 
 
@@ -138,16 +142,17 @@ class Flat(UnivariateContinuous):
 
     def __init__(self, ndim=None, size=None, dtype=None, *args, **kwargs):
 
-        self.median = T.as_tensor_variable(0.)
+        self.median = tt.as_tensor_variable(0.)
         self.dist_params = (self.median,)
 
-        super(Flat, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Flat, self).__init__(self.dist_params, ndim, size, dtype, *args,
+                                   **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         raise ValueError('Cannot sample from Flat distribution')
 
     def logp(self, value):
-        return T.zeros_like(value)
+        return tt.zeros_like(value)
 
 
 class Normal(UnivariateContinuous):
@@ -184,16 +189,18 @@ class Normal(UnivariateContinuous):
         Standard deviation (sd > 0).
     """
 
-    def __init__(self, mu=0.0, tau=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, mu=0.0, tau=None, sd=None, ndim=None, size=None,
+                 dtype=None, *args, **kwargs):
 
-        mu = T.as_tensor_variable(mu)
+        mu = tt.as_tensor_variable(mu)
         self.mean = self.median = self.mode = self.mu = mu
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
         self.variance = 1. / self.tau
 
         self.dist_params = (self.mu, self.tau)
 
-        super(Normal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Normal, self).__init__(self.dist_params, ndim, size, dtype,
+                                     *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, tau, sd = draw_values([self.mu, self.tau, self.sd],
@@ -206,7 +213,7 @@ def logp(self, value):
         tau = self.tau
         sd = self.sd
         mu = self.mu
-        return bound((-tau * (value - mu)**2 + T.log(tau / np.pi / 2.)) / 2.,
+        return bound((-tau * (value - mu)**2 + tt.log(tau / np.pi / 2.)) / 2.,
                      tau > 0, sd > 0)
 
 
@@ -234,15 +241,17 @@ class HalfNormal(PositiveUnivariateContinuous):
         Standard deviation (sd > 0).
     """
 
-    def __init__(self, tau=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, tau=None, sd=None, ndim=None, size=None, dtype=None,
+                 *args, **kwargs):
 
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
-        self.mean = T.sqrt(2 / (np.pi * self.tau))
+        self.mean = tt.sqrt(2 / (np.pi * self.tau))
         self.variance = (1. - 2 / np.pi) / self.tau
 
         self.dist_params = (self.tau,)
 
-        super(HalfNormal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(HalfNormal, self).__init__(self.dist_params, ndim, size, dtype,
+                                         *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         tau = draw_values([self.tau], point=point)
@@ -253,7 +262,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         tau = self.tau
         sd = self.sd
-        return bound(-0.5 * tau * value**2 + 0.5 * T.log(tau * 2. / np.pi),
+        return bound(-0.5 * tau * value**2 + 0.5 * tt.log(tau * 2. / np.pi),
                      value >= 0,
                      tau > 0, sd > 0)
 
@@ -314,18 +323,20 @@ class Wald(PositiveUnivariateContinuous):
         The American Statistician, Vol. 30, No. 2, pp. 88-90
     """
 
-    def __init__(self, mu=None, lam=None, phi=None, alpha=0., ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, mu=None, lam=None, phi=None, alpha=0., ndim=None,
+                 size=None, dtype=None, *args, **kwargs):
 
         self.mu, self.lam, self.phi = self.get_mu_lam_phi(mu, lam, phi)
         self.alpha = alpha
         self.mean = self.mu + alpha
-        self.mode = self.mu * (T.sqrt(1. + (1.5 * self.mu / self.lam)**2)
+        self.mode = self.mu * (tt.sqrt(1. + (1.5 * self.mu / self.lam)**2)
                                - 1.5 * self.mu / self.lam) + alpha
         self.variance = (self.mu**3) / self.lam
 
         self.dist_params = (self.mu, self.lam, self.alpha)
 
-        super(Wald, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Wald, self).__init__(self.dist_params, ndim, size, dtype, *args,
+                                   **kwargs)
 
     def get_mu_lam_phi(self, mu, lam, phi):
         res = None
@@ -345,7 +356,7 @@ def get_mu_lam_phi(self, mu, lam, phi):
         if res is None:
             raise ValueError('Wald distribution must specify either mu only, '
                              'mu and lam, mu and phi, or lam and phi.')
-        return map(T.as_tensor_variable, res)
+        return map(tt.as_tensor_variable, res)
 
     def _random(self, mu, lam, alpha, size=None):
         v = np.random.normal(size=size)**2
@@ -369,13 +380,12 @@ def logp(self, value):
         lam = self.lam
         alpha = self.alpha
         # value *must* be iid. Otherwise this is wrong.
-        return bound(logpow(lam / (2. * np.pi), 0.5)
-                     - logpow(value - alpha, 1.5)
-                     - (0.5 * lam / (value - alpha)
-                        * ((value - alpha - mu) / mu)**2),
+        return bound(logpow(lam / (2. * np.pi), 0.5) -
+                     logpow(value - alpha, 1.5) -
+                     (0.5 * lam / (value - alpha) * ((value - alpha - mu) /
+                                                     mu)**2),
                      # XXX these two are redundant. Please, check.
-                     value > 0, value - alpha > 0,
-                     mu > 0, lam > 0, alpha >= 0)
+                     value > 0, value - alpha > 0, mu > 0, lam > 0, alpha >= 0)
 
 
 class Beta(UnitUnivariateContinuous):
@@ -421,7 +431,8 @@ class Beta(UnitUnivariateContinuous):
     the binomial distribution.
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None,
+                 size=None, dtype=None, *args, **kwargs):
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
         self.beta = beta
@@ -431,7 +442,8 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, size=None
 
         self.dist_params = (self.alpha, self.beta)
 
-        super(Beta, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Beta, self).__init__(self.dist_params, ndim, size, dtype, *args,
+                                   **kwargs)
 
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
@@ -444,7 +456,7 @@ def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
             raise ValueError('Incompatible parameterization. Either use alpha '
                              'and beta, or mu and sd to specify distribution.')
 
-        return T.as_tensor_variable(alpha), T.as_tensor_variable(beta)
+        return tt.as_tensor_variable(alpha), tt.as_tensor_variable(beta)
 
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
@@ -457,9 +469,8 @@ def logp(self, value):
         alpha = self.alpha
         beta = self.beta
 
-        return bound(logpow(value, alpha - 1) + logpow(1 - value, beta - 1)
-                     - betaln(alpha, beta),
-                     value >= 0, value <= 1,
+        return bound(logpow(value, alpha - 1) + logpow(1 - value, beta - 1) -
+                     betaln(alpha, beta), value >= 0, value <= 1,
                      alpha > 0, beta > 0)
 
 
@@ -486,14 +497,15 @@ class Exponential(PositiveUnivariateContinuous):
     def __init__(self, lam, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.lam = lam
         self.mean = 1. / lam
-        self.median = self.mean * T.log(2)
+        self.median = self.mean * tt.log(2)
         self.mode = 0
 
         self.dist_params = (self.lam,)
 
         self.variance = lam**-2
 
-        super(Exponential, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Exponential, self).__init__(self.dist_params, ndim, size, dtype,
+                                          *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         lam = draw_values([self.lam], point=point)
@@ -503,7 +515,7 @@ def random(self, point=None, size=None, repeat=None):
 
     def logp(self, value):
         lam = self.lam
-        return bound(T.log(lam) - lam * value, value > 0, lam > 0)
+        return bound(tt.log(lam) - lam * value, value > 0, lam > 0)
 
 
 class Laplace(UnivariateContinuous):
@@ -529,15 +541,17 @@ class Laplace(UnivariateContinuous):
         Scale parameter (b > 0).
     """
 
-    def __init__(self, mu, b, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.b = T.as_tensor_variable(b)
-        self.mean = self.median = self.mode = self.mu = T.as_tensor_variable(
+    def __init__(self, mu, b, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
+        self.b = tt.as_tensor_variable(b)
+        self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(
             mu)
         self.variance = 2 * b**2
 
         self.dist_params = (self.b, self.mu)
 
-        super(Laplace, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Laplace, self).__init__(self.dist_params, ndim, size, dtype,
+                                      *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, b = draw_values([self.mu, self.b], point=point)
@@ -549,7 +563,7 @@ def logp(self, value):
         mu = self.mu
         b = self.b
 
-        return -T.log(2 * b) - abs(value - mu) / b
+        return -tt.log(2 * b) - abs(value - mu) / b
 
 
 class Lognormal(PositiveUnivariateContinuous):
@@ -581,18 +595,20 @@ class Lognormal(PositiveUnivariateContinuous):
         Scale parameter (tau > 0).
     """
 
-    def __init__(self, mu=0, tau=1, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, mu=0, tau=1, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
 
-        self.mu = T.as_tensor_variable(mu)
-        self.tau = T.as_tensor_variable(tau)
-        self.mean = T.exp(mu + 1. / (2. * tau))
-        self.median = T.exp(mu)
-        self.mode = T.exp(mu - 1. / tau)
-        self.variance = (T.exp(1. / tau) - 1.) * T.exp(2 * mu + 1. / tau)
+        self.mu = tt.as_tensor_variable(mu)
+        self.tau = tt.as_tensor_variable(tau)
+        self.mean = tt.exp(mu + 1. / (2. * tau))
+        self.median = tt.exp(mu)
+        self.mode = tt.exp(mu - 1. / tau)
+        self.variance = (tt.exp(1. / tau) - 1.) * tt.exp(2 * mu + 1. / tau)
 
         self.dist_params = (self.mu, self.tau)
 
-        super(Lognormal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Lognormal, self).__init__(self.dist_params, ndim, size, dtype,
+                                        *args, **kwargs)
 
     def _random(self, mu, tau, size=None):
         samples = np.random.normal(size=size)
@@ -607,10 +623,9 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         mu = self.mu
         tau = self.tau
-        return bound(-0.5 * tau * (T.log(value) - mu)**2
-                     + 0.5 * T.log(tau / (2. * np.pi))
-                     - T.log(value),
-                     tau > 0)
+        return bound(-0.5 * tau * (tt.log(value) - mu)**2 +
+                     0.5 * tt.log(tau / (2. * np.pi)) -
+                     tt.log(value), tau > 0)
 
 
 class StudentT(UnivariateContinuous):
@@ -642,18 +657,20 @@ class StudentT(UnivariateContinuous):
         Scale parameter (lam > 0).
     """
 
-    def __init__(self, nu, mu=0, lam=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.nu = nu = T.as_tensor_variable(nu)
+    def __init__(self, nu, mu=0, lam=None, sd=None, ndim=None, size=None,
+                 dtype=None, *args, **kwargs):
+        self.nu = nu = tt.as_tensor_variable(nu)
         self.lam, self.sd = get_tau_sd(tau=lam, sd=sd)
         self.mean = self.median = self.mode = self.mu = mu
 
-        self.variance = T.switch((nu > 2) * 1,
-                                 (1 / self.lam) * (nu / (nu - 2)),
-                                 np.inf)
+        self.variance = tt.switch((nu > 2) * 1,
+                                  (1 / self.lam) * (nu / (nu - 2)),
+                                  np.inf)
 
         self.dist_params = (self.nu, self.mu, self.lam)
 
-        super(StudentT, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(StudentT, self).__init__(self.dist_params, ndim, size, dtype,
+                                       *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         nu, mu, lam = draw_values([self.nu, self.mu, self.lam],
@@ -668,10 +685,10 @@ def logp(self, value):
         lam = self.lam
         sd = self.sd
 
-        return bound(gammaln((nu + 1.0) / 2.0)
-                     + .5 * T.log(lam / (nu * np.pi))
-                     - gammaln(nu / 2.0)
-                     - (nu + 1.0) / 2.0 * T.log1p(lam * (value - mu)**2 / nu),
+        return bound(gammaln((nu + 1.0) / 2.0) +
+                     .5 * tt.log(lam / (nu * np.pi)) -
+                     gammaln(nu / 2.0) -
+                     (nu + 1.0) / 2.0 * tt.log1p(lam * (value - mu)**2 / nu),
                      lam > 0, nu > 0, sd > 0)
 
 
@@ -701,19 +718,22 @@ class Pareto(PositiveUnivariateContinuous):
         Scale parameter (m > 0).
     """
 
-    def __init__(self, alpha, m, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.alpha = T.as_tensor_variable(alpha)
-        self.m = T.as_tensor_variable(m)
-        self.mean = T.switch(T.gt(alpha, 1), alpha * m / (alpha - 1.), np.inf)
+    def __init__(self, alpha, m, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.m = tt.as_tensor_variable(m)
+        self.mean = tt.switch(tt.gt(alpha, 1), alpha * m / (alpha - 1.),
+                              np.inf)
         self.median = m * 2.**(1. / alpha)
-        self.variance = T.switch(
-            T.gt(alpha, 2),
+        self.variance = tt.switch(
+            tt.gt(alpha, 2),
             (alpha * m**2) / ((alpha - 2.) * (alpha - 1.)**2),
             np.inf)
 
         self.dist_params = (self.alpha, self.m)
 
-        super(Pareto, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Pareto, self).__init__(self.dist_params, ndim, size, dtype,
+                                     *args, **kwargs)
 
     def _random(self, alpha, m, size=None):
         u = np.random.uniform(size=size)
@@ -729,8 +749,8 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         alpha = self.alpha
         m = self.m
-        return bound(T.log(alpha) + logpow(m, alpha)
-                     - logpow(value, alpha + 1),
+        return bound(tt.log(alpha) + logpow(m, alpha) -
+                     logpow(value, alpha + 1),
                      value >= m, alpha > 0, m > 0)
 
 
@@ -760,13 +780,15 @@ class Cauchy(UnivariateContinuous):
         Scale parameter > 0
     """
 
-    def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.median = self.mode = self.alpha = T.as_tensor_variable(alpha)
-        self.beta = T.as_tensor_variable(beta)
+    def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
+        self.median = self.mode = self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
 
         self.dist_params = (self.alpha, self.beta)
 
-        super(Cauchy, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Cauchy, self).__init__(self.dist_params, ndim, size, dtype,
+                                     *args, **kwargs)
 
     def _random(self, alpha, beta, size=None):
         u = np.random.uniform(size=size)
@@ -782,8 +804,8 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         alpha = self.alpha
         beta = self.beta
-        return bound(- T.log(np.pi) - T.log(beta)
-                     - T.log1p(((value - alpha) / beta)**2),
+        return bound(- tt.log(np.pi) - tt.log(beta) -
+                     tt.log1p(((value - alpha) / beta)**2),
                      beta > 0)
 
 
@@ -808,14 +830,16 @@ class HalfCauchy(PositiveUnivariateContinuous):
         Scale parameter (beta > 0).
     """
 
-    def __init__(self, beta, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.mode = T.as_tensor_variable(0.)
-        self.beta = T.as_tensor_variable(beta)
+    def __init__(self, beta, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
+        self.mode = tt.as_tensor_variable(0.)
+        self.beta = tt.as_tensor_variable(beta)
         self.median = beta
 
         self.dist_params = (self.beta,)
 
-        super(HalfCauchy, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(HalfCauchy, self).__init__(self.dist_params, ndim, size, dtype,
+                                         *args, **kwargs)
 
     def _random(self, beta, size=None):
         u = np.random.uniform(size=size)
@@ -829,8 +853,8 @@ def random(self, point=None, size=None, repeat=None):
 
     def logp(self, value):
         beta = self.beta
-        return bound(T.log(2) - T.log(np.pi) - T.log(beta)
-                     - T.log1p((value / beta)**2),
+        return bound(tt.log(2) - tt.log(np.pi) - tt.log(beta) -
+                     tt.log1p((value / beta)**2),
                      value >= 0, beta > 0)
 
 
@@ -873,17 +897,19 @@ class Gamma(PositiveUnivariateContinuous):
         Alternative scale parameter (sd > 0).
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None,
+                 size=None, dtype=None, *args, **kwargs):
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
         self.beta = beta
         self.mean = alpha / beta
-        self.mode = T.maximum((alpha - 1) / beta, 0)
+        self.mode = tt.maximum((alpha - 1) / beta, 0)
         self.variance = alpha / beta**2
 
         self.dist_params = (self.alpha, self.beta)
 
-        super(Gamma, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Gamma, self).__init__(self.dist_params, ndim, size, dtype, *args,
+                                    **kwargs)
 
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
@@ -896,7 +922,7 @@ def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
                              'alpha and beta, or mu and sd to specify '
                              'distribution.')
 
-        return T.as_tensor_variable(alpha), T.as_tensor_variable(beta)
+        return tt.as_tensor_variable(alpha), tt.as_tensor_variable(beta)
 
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
@@ -942,18 +968,20 @@ class InverseGamma(PositiveUnivariateContinuous):
         Scale parameter (beta > 0).
     """
 
-    def __init__(self, alpha, beta=1., ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.alpha = T.as_tensor_variable(alpha)
-        self.beta = T.as_tensor_variable(beta)
+    def __init__(self, alpha, beta=1., ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
         self.mean = (alpha > 1) * beta / (alpha - 1.) or np.inf
         self.mode = beta / (alpha + 1.)
-        self.variance = T.switch(T.gt(alpha, 2),
-                                 (beta**2) / (alpha * (alpha - 1.)**2),
-                                 np.inf)
+        self.variance = tt.switch(tt.gt(alpha, 2),
+                                  (beta**2) / (alpha * (alpha - 1.)**2),
+                                  np.inf)
 
         self.dist_params = (self.alpha, self.beta)
 
-        super(InverseGamma, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(InverseGamma, self).__init__(self.dist_params, ndim, size, dtype,
+                                           *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
@@ -965,8 +993,8 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         alpha = self.alpha
         beta = self.beta
-        return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value
-                     + logpow(value, -alpha - 1),
+        return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value +
+                     logpow(value, -alpha - 1),
                      value > 0, alpha > 0, beta > 0)
 
 
@@ -991,8 +1019,10 @@ class ChiSquared(Gamma):
     """
 
     def __init__(self, nu, *args, **kwargs):
-        self.nu = T.as_tensor_variable(nu)
-        super(ChiSquared, self).__init__(alpha=self.nu / 2., beta=T.as_tensor_variable(0.5), *args, **kwargs)
+        self.nu = tt.as_tensor_variable(nu)
+        super(ChiSquared, self).__init__(alpha=self.nu / 2.,
+                                         beta=tt.as_tensor_variable(0.5),
+                                         *args, **kwargs)
 
 
 class Weibull(PositiveUnivariateContinuous):
@@ -1019,17 +1049,19 @@ class Weibull(PositiveUnivariateContinuous):
         Scale parameter (beta > 0).
     """
 
-    def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.alpha = T.as_tensor_variable(alpha)
-        self.beta = T.as_tensor_variable(beta)
-        self.mean = beta * T.exp(gammaln(1 + 1. / alpha))
-        self.median = beta * T.exp(gammaln(T.log(2)))**(1. / alpha)
+    def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
+        self.mean = beta * tt.exp(gammaln(1 + 1. / alpha))
+        self.median = beta * tt.exp(gammaln(tt.log(2)))**(1. / alpha)
         self.variance = (beta**2) * \
-            T.exp(gammaln(1 + 2. / alpha - self.mean**2))
+            tt.exp(gammaln(1 + 2. / alpha - self.mean**2))
 
         self.dist_params = (self.alpha, self.beta)
 
-        super(Weibull, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Weibull, self).__init__(self.dist_params, ndim, size, dtype,
+                                      *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
@@ -1045,9 +1077,9 @@ def _random(a, b, size=None):
     def logp(self, value):
         alpha = self.alpha
         beta = self.beta
-        return bound(T.log(alpha) - T.log(beta)
-                     + (alpha - 1) * T.log(value / beta)
-                     - (value / beta)**alpha,
+        return bound(tt.log(alpha) - tt.log(beta) +
+                     (alpha - 1) * tt.log(value / beta) -
+                     (value / beta)**alpha,
                      value >= 0, alpha > 0, beta > 0)
 
 
@@ -1155,16 +1187,18 @@ class ExGaussian(UnivariateContinuous):
         Vol. 4, No. 1, pp 35-45.
     """
 
-    def __init__(self, mu, sigma, nu, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.mu = T.as_tensor_variable(mu)
-        self.sigma = T.as_tensor_variable(sigma)
-        self.nu = T.as_tensor_variable(nu)
+    def __init__(self, mu, sigma, nu, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
+        self.mu = tt.as_tensor_variable(mu)
+        self.sigma = tt.as_tensor_variable(sigma)
+        self.nu = tt.as_tensor_variable(nu)
         self.mean = mu + nu
         self.variance = (sigma**2) + (nu**2)
 
         self.dist_params = (self.mu, self.sigma, self.nu)
 
-        super(ExGaussian, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(ExGaussian, self).__init__(self.dist_params, ndim, size, dtype,
+                                         *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu],
@@ -1184,11 +1218,12 @@ def logp(self, value):
         nu = self.nu
 
         # This condition suggested by exGAUS.R from gamlss
-        lp = T.switch(T.gt(nu,  0.05 * sigma),
-                      - T.log(nu) + (mu - value) / nu + 0.5 * (sigma / nu)**2
-                      + logpow(std_cdf((value - mu) / sigma - sigma / nu), 1.),
-                      - T.log(sigma * T.sqrt(2 * np.pi))
-                      - 0.5 * ((value - mu) / sigma)**2)
+        lp = tt.switch(tt.gt(nu,  0.05 * sigma),
+                       -tt.log(nu) + (mu - value) / nu +
+                       0.5 * (sigma / nu)**2 +
+                       logpow(std_cdf((value - mu) / sigma - sigma / nu), 1.),
+                       -tt.log(sigma * tt.sqrt(2 * np.pi)) +
+                       0.5 * ((value - mu) / sigma)**2)
         return bound(lp, sigma > 0., nu > 0.)
 
 
@@ -1214,15 +1249,16 @@ class VonMises(UnivariateContinuous):
         Concentration (\frac{1}{kappa} is analogous to \sigma^2).
     """
 
-    def __init__(self, mu=0.0, kappa=None, transform='circular', ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.mean = self.median = self.mode = self.mu = T.as_tensor_variable(
-            mu)
-        self.kappa = T.as_tensor_variable(kappa)
+    def __init__(self, mu=0.0, kappa=None, transform='circular', ndim=None,
+                 size=None, dtype=None, *args, **kwargs):
+        self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(mu)
+        self.kappa = tt.as_tensor_variable(kappa)
         self.variance = 1. - i1(kappa) / i0(kappa)
 
         self.dist_params = (self.mu, self.kappa)
 
-        super(VonMises, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(VonMises, self).__init__(self.dist_params, ndim, size, dtype,
+                                       *args, **kwargs)
 
         if transform == 'circular':
             self.transform = transforms.Circular()
@@ -1237,4 +1273,6 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         mu = self.mu
         kappa = self.kappa
-        return bound(kappa * T.cos(mu - value) - T.log(2 * np.pi * i0(kappa)), value >= -np.pi, value <= np.pi, kappa >= 0)
+        return bound(kappa * tt.cos(mu - value) -
+                     tt.log(2 * np.pi * i0(kappa)),
+                     value >= -np.pi, value <= np.pi, kappa >= 0)
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 1015208c768..bfc62ebd18b 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import theano
-import theano.tensor as T
+import theano.tensor as tt
 from scipy import stats
 
 from .dist_math import bound, factln, binomln, betaln, logpow
@@ -36,14 +36,16 @@ class Binomial(UnivariateDiscrete):
     p : float
         Probability of success in each trial (0 < p < 1).
     """
+
     def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.n = T.as_tensor_variable(n)
-        self.p = T.as_tensor_variable(p)
-        self.mode = T.cast(T.round(n * p), self.dtype)
+        self.n = tt.as_tensor_variable(n)
+        self.p = tt.as_tensor_variable(p)
+        self.mode = tt.cast(tt.round(n * p), self.dtype)
 
         self.dist_params = (self.n, self.p)
 
-        super(Binomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Binomial, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         n, p = draw_values([self.n, self.p], point=point)
@@ -89,15 +91,17 @@ class BetaBinomial(UnivariateDiscrete):
     beta : float
         beta > 0.
     """
+
     def __init__(self, alpha, beta, n, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.alpha = T.as_tensor_variable(alpha)
-        self.beta = T.as_tensor_variable(beta)
-        self.n = T.as_tensor_variable(n)
-        self.mode = T.cast(T.round(alpha / (alpha + beta)), 'int8')
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
+        self.n = tt.as_tensor_variable(n)
+        self.mode = tt.cast(tt.round(alpha / (alpha + beta)), 'int8')
 
         self.dist_params = (self.alpha, self.beta, self.n)
 
-        super(BetaBinomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(BetaBinomial, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def _random(self, alpha, beta, n, size=None):
         size = size or 1
@@ -147,13 +151,15 @@ class Bernoulli(UnivariateDiscrete):
     p : float
         Probability of success (0 < p < 1).
     """
+
     def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.p = T.as_tensor_variable(p)
-        self.mode = T.cast(T.round(p), 'int8')
+        self.p = tt.as_tensor_variable(p)
+        self.mode = tt.cast(tt.round(p), 'int8')
 
         self.dist_params = (self.p,)
         # FIXME: bcast, etc.
-        super(Bernoulli, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Bernoulli, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         p = draw_values([self.p], point=point)
@@ -164,7 +170,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         p = self.p
         return bound(
-            T.switch(value, T.log(p), T.log(1 - p)),
+            tt.switch(value, tt.log(p), tt.log(1 - p)),
             value >= 0, value <= 1,
             p >= 0, p <= 1)
 
@@ -195,12 +201,14 @@ class Poisson(UnivariateDiscrete):
     The Poisson distribution can be derived as a limiting case of the
     binomial distribution.
     """
+
     def __init__(self, mu, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.mu = T.as_tensor_variable(mu)
-        self.mode = T.floor(mu).astype('int32')
+        self.mu = tt.as_tensor_variable(mu)
+        self.mode = tt.floor(mu).astype('int32')
         self.dist_params = (self.mu,)
 
-        super(Poisson, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Poisson, self).__init__(self.dist_params,
+                                      ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu = draw_values([self.mu], point=point)
@@ -240,13 +248,15 @@ class NegativeBinomial(UnivariateDiscrete):
     alpha : float
         Gamma distribution parameter (alpha > 0).
     """
+
     def __init__(self, mu, alpha, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.mu = T.as_tensor_variable(mu)
-        self.alpha = T.as_tensor_variable(alpha)
-        self.mode = T.floor(mu).astype('int32')
+        self.mu = tt.as_tensor_variable(mu)
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.mode = tt.floor(mu).astype('int32')
         self.dist_params = (self.mu, self.alpha)
 
-        super(NegativeBinomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(NegativeBinomial, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, alpha = draw_values([self.mu, self.alpha], point=point)
@@ -265,9 +275,9 @@ def logp(self, value):
                          value >= 0, mu > 0, alpha > 0)
 
         # Return Poisson when alpha gets very large.
-        return T.switch(1 * (alpha > 1e10),
-                        Poisson.dist(self.mu).logp(value),
-                        negbinom)
+        return tt.switch(1 * (alpha > 1e10),
+                         Poisson.dist(self.mu).logp(value),
+                         negbinom)
 
 
 class Geometric(UnivariateDiscrete):
@@ -290,12 +300,14 @@ class Geometric(UnivariateDiscrete):
     p : float
         Probability of success on an individual trial (0 < p <= 1).
     """
+
     def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.p = T.as_tensor_variable(p)
-        self.mode = T.as_tensor_variable(1)
+        self.p = tt.as_tensor_variable(p)
+        self.mode = tt.as_tensor_variable(1)
         self.dist_params = (self.p,)
 
-        super(Geometric, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Geometric, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         p = draw_values([self.p], point=point)
@@ -305,7 +317,7 @@ def random(self, point=None, size=None, repeat=None):
 
     def logp(self, value):
         p = self.p
-        return bound(T.log(p) + logpow(1 - p, value - 1),
+        return bound(tt.log(p) + logpow(1 - p, value - 1),
                      0 <= p, p <= 1, value >= 1)
 
 
@@ -328,13 +340,15 @@ class DiscreteUniform(UnivariateDiscrete):
     upper : int
         Upper limit (upper > lower).
     """
+
     def __init__(self, lower, upper, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.lower = T.floor(lower).astype('int32')
-        self.upper = T.floor(upper).astype('int32')
-        self.mode = T.floor((upper - lower) / 2.).astype('int32')
+        self.lower = tt.floor(lower).astype('int32')
+        self.upper = tt.floor(upper).astype('int32')
+        self.mode = tt.floor((upper - lower) / 2.).astype('int32')
         self.dist_params = (self.lower, self.upper)
 
-        super(DiscreteUniform, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(DiscreteUniform, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def _random(self, lower, upper, size=None):
         # This way seems to be the only to deal with lower and upper
@@ -353,7 +367,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         upper = self.upper
         lower = self.lower
-        return bound(-T.log(upper - lower + 1),
+        return bound(-tt.log(upper - lower + 1),
                      lower <= value, value <= upper)
 
 
@@ -374,16 +388,17 @@ class Categorical(UnivariateDiscrete):
     p : float
         p > 0 and the elements of p must sum to 1.
     """
+
     def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.p = T.as_tensor_variable(p)
-        self.shape_support = T.shape(p)[-1]
-        self.mode = T.argmax(p)
+        self.p = tt.as_tensor_variable(p)
+        self.shape_support = tt.shape(p)[-1]
+        self.mode = tt.argmax(p)
 
         self.dist_params = (self.p,)
         # FIXME: The stated support is univariate?
 
-        super(Categorical, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
-
+        super(Categorical, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         p, k = draw_values([self.p, self.shape_support], point=point)
@@ -397,11 +412,12 @@ def logp(self, value):
         p = self.p
         k = self.k
 
-        sumto1 = theano.gradient.zero_grad(T.le(abs(T.sum(p, axis=-1) - 1), 1e-5))
+        sumto1 = theano.gradient.zero_grad(
+            tt.le(abs(tt.sum(p, axis=-1) - 1), 1e-5))
         if p.ndim > 1:
-            a = T.log(p[T.arange(p.shape[0]), value])
+            a = tt.log(p[tt.arange(p.shape[0]), value])
         else:
-            a = T.log(p[value])
+            a = tt.log(p[value])
         return bound(a,
                      value >= 0, value <= (k - 1),
                      sumto1)
@@ -416,10 +432,12 @@ class ConstantDist(UnivariateDiscrete):
     value : float or int
         Constant parameter.
     """
+
     def __init__(self, c, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.mean = self.median = self.mode = self.c = T.as_tensor_variable(c)
+        self.mean = self.median = self.mode = self.c = tt.as_tensor_variable(c)
         self.dist_params = (self.c,)
-        super(ConstantDist, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(ConstantDist, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         c = draw_values([self.c], point=point)
@@ -433,27 +451,30 @@ def _random(c, dtype=dtype, size=None):
 
     def logp(self, value):
         c = self.c
-        return bound(0, T.eq(value, c))
+        return bound(0, tt.eq(value, c))
 
 
 class ZeroInflatedPoisson(UnivariateDiscrete):
+
     def __init__(self, theta, z, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.theta = T.as_tensor_variable(theta)
-        self.z = T.as_tensor_variable(z)
+        self.theta = tt.as_tensor_variable(theta)
+        self.z = tt.as_tensor_variable(z)
         self.pois = Poisson.dist(theta)
         self.const = ConstantDist.dist(0)
         self.mode = self.pois.mode
 
         self.dist_params = (self.theta, self.z)
 
-        super(ZeroInflatedPoisson, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(ZeroInflatedPoisson, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         #theta = draw_values([self.theta], point=point)
         # TODO: Finish me
-        raise NotImplementedError("zero inflated poisson sampling not implemented!")
+        raise NotImplementedError(
+            "zero inflated poisson sampling not implemented!")
 
     def logp(self, value):
-        return T.switch(self.z,
-                        self.pois.logp(value),
-                        self.const.logp(value))
+        return tt.switch(self.z,
+                         self.pois.logp(value),
+                         self.const.logp(value))
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index d8ac0eb044e..68e7cab8f4c 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -1,5 +1,5 @@
 import numpy as np
-import theano.tensor as T
+import theano.tensor as tt
 from theano import function
 from theano.tensor.raw_random import _infer_ndim_bcast
 
@@ -7,9 +7,12 @@
 from ..model import Model, get_named_nodes
 
 
-__all__ = ['DensityDist', 'Distribution',
-           'Continuous', 'Discrete', 'MultivariateContinuous', 'UnivariateContinuous',
-           'MultivariateDiscrete', 'UnivariateDiscrete', 'NoDistribution']
+#__all__ = ['DensityDist', 'Distribution',
+#           'Continuous', 'Discrete', 'MultivariateContinuous', 'UnivariateContinuous',
+#           'MultivariateDiscrete', 'UnivariateDiscrete', 'NoDistribution']
+
+__all__ = ['DensityDist', 'Distribution', 'Continuous',
+           'Discrete', 'NoDistribution', 'TensorType']
 
 
 def _as_tensor_shape_variable(var):
@@ -17,13 +20,13 @@ def _as_tensor_shape_variable(var):
     `_infer_ndim_bcast` """
 
     if var is None:
-        return T.constant([], dtype='int64')
+        return tt.constant([], dtype='int64')
 
     res = var
     if isinstance(res, (tuple, list)):
         if len(res) == 0:
-            return T.constant([], dtype='int64')
-        res = T.as_tensor_variable(res, ndim=1)
+            return tt.constant([], dtype='int64')
+        res = tt.as_tensor_variable(res, ndim=1)
 
     else:
         if res.ndim != 1:
@@ -106,10 +109,10 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
 
         Here are a few ways of inferring shapes:
             * When a distribution is scalar, then `shape_supp = ()`
-                * and has an informative parameter, e.g. `mu`, then `shape_ind = T.shape(mu)`.
+                * and has an informative parameter, e.g. `mu`, then `shape_ind = tt.shape(mu)`.
             * When a distribution is multivariate
                 * and has an informative parameter, e.g. `mu`, then
-                `shape_supp = T.shape(mu)[-ndim_supp:]` and `shape_ind = T.shape(mu)[-ndim_supp:]`.
+                `shape_supp = tt.shape(mu)[-ndim_supp:]` and `shape_ind = tt.shape(mu)[-ndim_supp:]`.
         In all remaining cases the shapes must be provided by the caller.
         `shape_reps` is always provided by the caller.
 
@@ -142,15 +145,15 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
         """
 
         self.shape_supp = _as_tensor_shape_variable(shape_supp)
-        self.ndim_supp = T.get_vector_length(self.shape_supp)
+        self.ndim_supp = tt.get_vector_length(self.shape_supp)
         self.shape_ind = _as_tensor_shape_variable(shape_ind)
-        self.ndim_ind = T.get_vector_length(self.shape_ind)
+        self.ndim_ind = tt.get_vector_length(self.shape_ind)
         self.shape_reps = _as_tensor_shape_variable(shape_reps)
-        self.ndim_reps = T.get_vector_length(self.shape_reps)
+        self.ndim_reps = tt.get_vector_length(self.shape_reps)
 
         ndim_sum = self.ndim_supp + self.ndim_ind + self.ndim_reps
         if ndim_sum == 0:
-            self.shape = T.constant([], dtype='int64')
+            self.shape = tt.constant([], dtype='int64')
         else:
             self.shape = tuple(self.shape_reps) +\
                 tuple(self.shape_ind) +\
@@ -158,16 +161,16 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
 
         if testval is None:
             if ndim_sum == 0:
-                testval = T.constant(0, dtype=dtype)
+                testval = tt.constant(0, dtype=dtype)
             else:
-                testval = T.zeros(self.shape)
+                testval = tt.zeros(self.shape)
 
-        self.ndim = T.get_vector_length(self.shape)
+        self.ndim = tt.get_vector_length(self.shape)
 
         self.testval = testval
         self.defaults = defaults
         self.transform = transform
-        self.type = T.TensorType(str(dtype), bcast)
+        self.type = tt.TensorType(str(dtype), bcast)
 
     def default(self):
         return self.get_test_val(self.testval, self.defaults)
@@ -192,11 +195,11 @@ def getattr_value(self, val):
         if isinstance(val, str):
             val = getattr(self, val)
 
-        if isinstance(val, T.sharedvar.SharedVariable):
+        if isinstance(val, tt.sharedvar.SharedVariable):
             return val.get_value()
-        elif isinstance(val, T.TensorVariable):
+        elif isinstance(val, tt.TensorVariable):
             return val.tag.test_value
-        elif isinstance(val, T.TensorConstant):
+        elif isinstance(val, tt.TensorConstant):
             return val.value
 
         return val
@@ -204,6 +207,22 @@ def getattr_value(self, val):
 
 class NoDistribution(Distribution):
 
+    def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
+                 testval=None, defaults=[], transform=None, parent_dist=None,
+                 *args, **kwargs):
+        super(NoDistribution, self).__init__(shape_supp, shape_ind, shape_reps,
+                                             bcast, dtype=dtype,
+                                             testval=testval,
+                                             defaults=defaults, *args,
+                                             **kwargs)
+        self.parent_dist = parent_dist
+
+    def __getattr__(self, name):
+        try:
+            self.__dict__[name]
+        except KeyError:
+            return getattr(self.parent_dist, name)
+
     def logp(self, x):
         return 0
 
@@ -211,32 +230,38 @@ def logp(self, x):
 class Discrete(Distribution):
     """Base class for discrete distributions"""
 
-    def __init__(self, ndim_support, ndim, size, bcast, dtype='int64', defaults=['mode'], *args, **kwargs):
-        super(Discrete, self).__init__(ndim_support, ndim, size,
-                                       bcast, dtype, defaults=defaults, *args, **kwargs)
+    def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype='int64',
+                 defaults=['mode'], *args, **kwargs):
+        super(Discrete, self).__init__(shape_supp, shape_ind, shape_reps,
+                                       bcast, dtype, defaults=defaults, *args,
+                                       **kwargs)
 
 
 class Continuous(Distribution):
     """Base class for continuous distributions"""
 
     def __init__(self, shape_supp, shape_ind, shape_reps, bcast,
-            dtype='float64', defaults=['median', 'mean', 'mode'], *args, **kwargs):
+                 dtype='float64', defaults=['median', 'mean', 'mode'], *args,
+                 **kwargs):
         super(Continuous, self).__init__(shape_supp, shape_ind, shape_reps,
-                bcast, dtype, defaults=defaults, *args, **kwargs)
+                                         bcast, dtype, defaults=defaults,
+                                         *args, **kwargs)
 
 
 class DensityDist(Distribution):
     """Distribution based on a given log density function."""
 
-    def __init__(self, logp, ndim_support, ndim, size, bcast, dtype='float64', *args, **kwargs):
-        super(DensityDist, self).__init__(
-            ndim, size, bcast, dtype, *args, **kwargs)
+    def __init__(self, logp, ndim_support, ndim, size, bcast, dtype='float64',
+                 *args, **kwargs):
+        super(DensityDist, self).__init__(ndim, size, bcast, dtype, *args,
+                                          **kwargs)
         self.logp = logp
 
 
 class UnivariateContinuous(Continuous):
 
-    def __init__(self, dist_params, ndim=None, size=None, dtype=None, bcast=None, *args, **kwargs):
+    def __init__(self, dist_params, ndim=None, size=None, dtype=None,
+                 bcast=None, *args, **kwargs):
         """
 
         Parameters
@@ -248,11 +273,12 @@ def __init__(self, dist_params, ndim=None, size=None, dtype=None, bcast=None, *a
         """
 
         self.shape_supp = ()
-        dist_params = tuple(T.as_tensor_variable(x) for x in dist_params)
+        dist_params = tuple(tt.as_tensor_variable(x) for x in dist_params)
 
         ndim, size, bcast = _infer_ndim_bcast(*(ndim, size) + dist_params)
         if dtype is None:
-            dtype = T.scal.upcast(*(T.config.floatX,) + tuple(x.dtype for x in dist_params))
+            dtype = tt.scal.upcast(*(tt.config.floatX,) +
+                                   tuple(x.dtype for x in dist_params))
 
         # We just assume
         super(UnivariateContinuous, self).__init__(
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 06271d5b304..e89bebe1f49 100644
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -1,11 +1,11 @@
 import warnings
 
 import numpy as np
-import theano.tensor as T
+import theano.tensor as tt
 import theano
 
 from scipy import stats
-from theano.tensor.nlinalg import det, matrix_inverse, trace, eigh
+from theano.tensor.nlinalg import det, matrix_inverse, trace
 
 from . import transforms
 from .distribution import MultivariateContinuous, MultivariateDiscrete, draw_values, generate_samples
@@ -45,9 +45,9 @@ class MvNormal(MultivariateContinuous):
     """
 
     def __init__(self, mu, tau, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.mean = self.median = self.mode = self.mu = T.as_tensor_variable(
+        self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(
             mu)
-        self.tau = T.as_tensor_variable(tau)
+        self.tau = tt.as_tensor_variable(tau)
 
         self.dist_params = (self.mu, self.tau)
 
@@ -56,15 +56,15 @@ def __init__(self, mu, tau, ndim=None, size=None, dtype=None, *args, **kwargs):
         shape_ind = self.mu.shape[:-1]
 
         if self.mu.ndim > 0:
-            bcast = (False,) * (1 + T.get_vector_length(shape_ind))
+            bcast = (False,) * (1 + tt.get_vector_length(shape_ind))
         else:
             bcast = (False,)
 
         if dtype is None:
             dtype = self.mu.dtype
 
-        super(MvNormal, self).__init__(shape_supp,
-                                       shape_ind, (), bcast, dtype, *args, **kwargs)
+        super(MvNormal, self).__init__(shape_supp, shape_ind, (), bcast, dtype,
+                                       *args, **kwargs)
 
     def random(self, point=None, size=None):
 
@@ -76,13 +76,14 @@ def random(self, point=None, size=None):
         # def random(rng, point=[], size=None):
         #
         #     if size is None:
-        #         size = T.constant([], dtype='int64')
+        #         size = tt.constant([], dtype='int64')
         #         type = self.type
         #     else:
-        #         size = T.as_tensor_variable(size)
+        #         size = tt.as_tensor_variable(size)
         #         type = ...
         #     op = RandomFunction('multivariate_normal', type)
-        #     rand_op = lambda random_state: op(random_state, size, self.mu, T.nlinalg.matrix_invers(self.tau))
+        #     rand_op = lambda random_state: op(random_state, size, self.mu,
+        #                                       tt.nlinalg.matrix_invers(self.tau))
         #     rand_sym_res = rng.gen(rand_op)
         #     return function(point, rand_sym_res)()
         #
@@ -118,30 +119,33 @@ def _random(mean, tau, size=None):
         return samples
 
     def logp(self, value):
-        mu = T.as_tensor_variable(self.mu)
-        tau = T.as_tensor_variable(self.tau)
+        mu = tt.as_tensor_variable(self.mu)
+        tau = tt.as_tensor_variable(self.tau)
 
-        reps_shape_prod = T.prod(self.shape_reps, keepdims=True)
+        reps_shape_prod = tt.prod(self.shape_reps, keepdims=True)
 
         # collapse reps dimensions
-        flat_supp_shape = T.concatenate((reps_shape_prod, self.shape_supp))
+        flat_supp_shape = tt.concatenate((reps_shape_prod, self.shape_supp))
         mus_collapsed = mu.reshape(flat_supp_shape, ndim=2)
-        taus_collapsed = tau.reshape(T.concatenate((reps_shape_prod,
-                                                    self.shape_supp, self.shape_supp)), ndim=3)
+        taus_collapsed = tau.reshape(tt.concatenate((reps_shape_prod,
+                                                     self.shape_supp,
+                                                     self.shape_supp)), ndim=3)
 
         # force value to conform to reps_shape
-        value_reshape = T.ones_like(mu) * value
+        value_reshape = tt.ones_like(mu) * value
         values_collapsed = value_reshape.reshape(flat_supp_shape, ndim=2)
 
         def single_logl(_mu, _tau, _value, k):
             delta = _value - _mu
-            result = k * T.log(2 * np.pi) + T.log(det(_tau))
-            result += T.square(delta.dot(_tau)).sum(axis=-1)
+            result = k * tt.log(2 * np.pi) + tt.log(det(_tau))
+            result += tt.square(delta.dot(_tau)).sum(axis=-1)
             return -result / 2
 
         from theano import scan
-        res, _ = scan(fn=single_logl, sequences=[mus_collapsed, taus_collapsed, values_collapsed], non_sequences=[self.shape_supp], strict=True
-                      )
+        res, _ = scan(fn=single_logl,
+                      sequences=[mus_collapsed, taus_collapsed,
+                                 values_collapsed],
+                      non_sequences=[self.shape_supp], strict=True)
         return res.sum()
 
 
@@ -176,10 +180,10 @@ class Dirichlet(MultivariateContinuous):
     """
 
     def __init__(self, a, transform=transforms.stick_breaking, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.a = T.as_tensor_variable(a)
-        self.mean = self.a / T.sum(self.a)
-        self.mode = T.switch(T.all(self.a > 1),
-                             (self.a - 1) / T.sum(self.a - 1),
+        self.a = tt.as_tensor_variable(a)
+        self.mean = self.a / tt.sum(self.a)
+        self.mode = tt.switch(tt.all(self.a > 1),
+                             (self.a - 1) / tt.sum(self.a - 1),
                              np.nan)
 
         self.dist_params = (self.a,)
@@ -190,15 +194,16 @@ def __init__(self, a, transform=transforms.stick_breaking, ndim=None, size=None,
 
         # FIXME: this isn't correct/ideal
         if self.a.ndim > 0:
-            bcast = (False,) * (1 + T.get_vector_length(shape_ind))
+            bcast = (False,) * (1 + tt.get_vector_length(shape_ind))
         else:
             bcast = (False,)
 
         if dtype is None:
             dtype = self.mu.dtype
 
-        super(Dirichlet, self).__init__(shape_supp, shape_ind, (),
-                                        bcast, dtype, transform=transform, *args, **kwargs)
+        super(Dirichlet, self).__init__(shape_supp, shape_ind, (), bcast,
+                                        dtype, transform=transform, *args,
+                                        **kwargs)
 
     def random(self, point=None, size=None):
         a = draw_values([self.a], point=point)
@@ -216,10 +221,9 @@ def logp(self, value):
         a = self.a
 
         # only defined for sum(value) == 1
-        return bound(T.sum(logpow(value, a - 1) - gammaln(a), axis=0)
-                     + gammaln(T.sum(a, axis=0)),
-                     T.all(value >= 0), T.all(value <= 1),
-                     k > 1, T.all(a > 0))
+        return bound(tt.sum(logpow(value, a - 1) - gammaln(a), axis=0) +
+                     gammaln(tt.sum(a, axis=0)), tt.all(value >= 0),
+                     tt.all(value <= 1), k > 1, tt.all(a > 0))
 
 
 class Multinomial(MultivariateDiscrete):
@@ -253,12 +257,13 @@ class Multinomial(MultivariateDiscrete):
         be non-negative and sum to 1.
     """
 
-    def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, n, p, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
         # TODO: allow tensor of independent n's.
-        self.n = T.as_tensor_variable(n, ndim=0)
-        self.p = T.as_tensor_variable(p)
+        self.n = tt.as_tensor_variable(n, ndim=0)
+        self.p = tt.as_tensor_variable(p)
         self.mean = n * p
-        self.mode = T.cast(T.round(n * p), 'int32')
+        self.mode = tt.cast(tt.round(n * p), 'int32')
 
         self.dist_params = (self.n, self.p)
 
@@ -273,8 +278,8 @@ def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
             dtype = self.p.dtype
 
         # FIXME: n.shape == p.shape? bcast, etc.
-        super(Multinomial, self).__init__(shape_supp,
-                                          shape_ind, (), bcast, dtype, *args, **kwargs)
+        super(Multinomial, self).__init__(shape_supp, shape_ind, (), bcast,
+                                          dtype, *args, **kwargs)
 
     def _random(self, n, p, size=None):
         if size == p.shape:
@@ -293,8 +298,8 @@ def logp(self, x):
         p = self.p
         # only defined for sum(p) == 1
         return bound(
-            factln(n) + T.sum(x * T.log(p) - factln(x)),
-            T.all(x >= 0), T.all(x <= n), T.eq(T.sum(x), n),
+            factln(n) + tt.sum(x * tt.log(p) - factln(x)),
+            tt.all(x >= 0), tt.all(x <= n), tt.eq(tt.sum(x), n),
             n >= 0)
 
 
@@ -320,7 +325,7 @@ class PosDefMatrix(theano.Op):
     def make_node(self, x):
         x = theano.tensor.as_tensor_variable(x)
         assert x.ndim == 2
-        o = T.TensorType(dtype='int8', broadcastable=[])()
+        o = tt.TensorType(dtype='int8', broadcastable=[])()
         return theano.Apply(self, [x], [o])
 
     # Python implementation:
@@ -381,7 +386,8 @@ class Wishart(MultivariateContinuous):
         p x p positive definite matrix.
     """
 
-    def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, n, V, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
         warnings.warn('The Wishart distribution can currently not be used '
                       'for MCMC sampling. The probability of sampling a '
                       'symmetric matrix is basically zero. Instead, please '
@@ -390,12 +396,9 @@ def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, **kwargs):
                       'https://github.com/pymc-devs/pymc3/issues/538.',
                       UserWarning)
         # TODO: allow tensor of independent n's.
-        self.n = T.as_tensor_variable(n, ndim=0)
-        self.V = T.as_tensor_variable(V)
-        self.mean = n * V
-        self.mode = T.switch(1 * (n >= self.shape_support + 1),
-                             (n - self.shape_support - 1) * V,
-                             np.nan)
+        self.n = tt.as_tensor_variable(n, ndim=0)
+        self.V = tt.as_tensor_variable(V)
+        self.mean = self.n * self.V
 
         self.dist_params = (self.n, self.V)
 
@@ -408,8 +411,12 @@ def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, **kwargs):
         if dtype is None:
             dtype = self.V.dtype
 
-        super(Wishart, self).__init__(shape_supp,
-                                      shape_ind, (), bcast, dtype, *args, **kwargs)
+        self.mode = tt.switch(1 * (self.n >= self.shape_supp + 1),
+                              (self.n - self.shape_supp - 1) * self.V,
+                              np.nan)
+
+        super(Wishart, self).__init__(shape_supp, shape_ind, (), bcast, dtype,
+                                      *args, **kwargs)
 
     def logp(self, X):
         n = self.n
@@ -419,12 +426,12 @@ def logp(self, X):
         IVI = det(V)
         IXI = det(X)
 
-        return bound(((n - p - 1) * T.log(IXI)
-                      - trace(matrix_inverse(V).dot(X))
-                      - n * p * T.log(2) - n * T.log(IVI)
-                      - 2 * multigammaln(n / 2., p)) / 2,
+        return bound(((n - p - 1) * tt.log(IXI) -
+                      trace(matrix_inverse(V).dot(X)) -
+                      n * p * tt.log(2) - n * tt.log(IVI) -
+                      2 * multigammaln(n / 2., p)) / 2,
                      matrix_pos_def(X),
-                     T.eq(X, X.T),
+                     tt.eq(X, X.T),
                      n > (p - 1))
 
 
@@ -468,18 +475,19 @@ class LKJCorr(MultivariateContinuous):
         100(9), pp.1989-2001.
     """
 
-    def __init__(self, n, p, *args, **kwargs):
+    def __init__(self, n, p, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
         # TODO: allow tensor of independent n's.
-        self.n = T.as_tensor_variable(n, ndim=0)
-        self.p = T.as_tensor_variable(p, ndim=0)
+        self.n = tt.as_tensor_variable(n, ndim=0)
+        self.p = tt.as_tensor_variable(p, ndim=0)
         n_elem = (p * (p - 1) / 2)
-        self.mean = T.zeros(n_elem)
+        self.mean = tt.zeros(n_elem)
         self.shape_support = n_elem
 
         # FIXME: triu, bcast, etc.
-        self.tri_index = T.zeros((p, p), dtype=int)
-        self.tri_index[T.triu(p, k=1)] = T.arange(n_elem)
-        self.tri_index[T.triu(p, k=1)[::-1]] = T.arange(n_elem)
+        self.tri_index = tt.zeros((p, p), dtype=int)
+        self.tri_index[tt.triu(p, k=1)] = tt.arange(n_elem)
+        self.tri_index[tt.triu(p, k=1)[::-1]] = tt.arange(n_elem)
 
         self.dist_params = (self.n, self.p)
 
@@ -492,24 +500,24 @@ def __init__(self, n, p, *args, **kwargs):
         if dtype is None:
             dtype = self.n.dtype
 
-        super(LKJCorr, self).__init__(shape_supp,
-                                      shape_ind, (), bcast, dtype, *args, **kwargs)
+        super(LKJCorr, self).__init__(shape_supp, shape_ind, (), bcast, dtype,
+                                      *args, **kwargs)
 
     def _normalizing_constant(self, n, p):
         if n == 1:
-            result = gammaln(2. * T.arange(1, int((p - 1) / 2) + 1)).sum()
+            result = gammaln(2. * tt.arange(1, int((p - 1) / 2) + 1)).sum()
             if p % 2 == 1:
-                result += (0.25 * (p ** 2 - 1) * T.log(np.pi)
-                           - 0.25 * (p - 1) ** 2 * T.log(2.)
+                result += (0.25 * (p ** 2 - 1) * tt.log(np.pi)
+                           - 0.25 * (p - 1) ** 2 * tt.log(2.)
                            - (p - 1) * gammaln(int((p + 1) / 2)))
             else:
-                result += (0.25 * p * (p - 2) * T.log(np.pi)
-                           + 0.25 * (3 * p ** 2 - 4 * p) * T.log(2.)
+                result += (0.25 * p * (p - 2) * tt.log(np.pi)
+                           + 0.25 * (3 * p ** 2 - 4 * p) * tt.log(2.)
                            + p * gammaln(p / 2) - (p - 1) * gammaln(p))
         else:
             result = -(p - 1) * gammaln(n + 0.5 * (p - 1))
-            k = T.arange(1, p)
-            result += (0.5 * k * T.log(np.pi)
+            k = tt.arange(1, p)
+            result += (0.5 * k * tt.log(np.pi)
                        + gammaln(n + 0.5 * (p - 1 - k))).sum()
         return result
 
@@ -518,11 +526,11 @@ def logp(self, x):
         p = self.p
 
         X = x[self.tri_index]
-        X = T.fill_diagonal(X, 1)
+        X = tt.fill_diagonal(X, 1)
 
         result = self._normalizing_constant(n, p)
-        result += (n - 1.) * T.log(det(X))
+        result += (n - 1.) * tt.log(det(X))
         return bound(result,
-                     T.all(X <= 1), T.all(X >= -1),
+                     tt.all(X <= 1), tt.all(X >= -1),
                      matrix_pos_def(X),
                      n > 0)
diff --git a/pymc3/model.py b/pymc3/model.py
index cc14d298d6b..06061f7cfb4 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1,6 +1,6 @@
 import numpy as np
 import theano
-import theano.tensor as T
+import theano.tensor as tt
 from theano.tensor.var import TensorVariable
 
 from .memoize import memoize
@@ -163,7 +163,7 @@ def fastd2logp(self, vars=None):
     @property
     def logpt(self):
         """Theano scalar of log-probability of the model"""
-        return T.sum(self.logp_elemwiset)
+        return tt.sum(self.logp_elemwiset)
 
 
 class Model(Context, Factor):
@@ -192,14 +192,14 @@ def __init__(self, verbose=1):
     def logpt(self):
         """Theano scalar of log-probability of the model"""
         factors = [var.logpt for var in self.basic_RVs] + self.potentials
-        return T.add(*map(T.sum, factors))
+        return tt.add(*map(tt.sum, factors))
 
     @property
     def varlogpt(self):
         """Theano scalar of log-probability of the unobserved random variables
            (excluding deterministic)."""
         factors = [var.logpt for var in self.vars]
-        return T.add(*map(T.sum, factors))
+        return tt.add(*map(tt.sum, factors))
 
     @property
     def vars(self):
@@ -477,13 +477,14 @@ def __init__(self, type=None, owner=None, index=None, name=None,
 
         if distribution is not None:
             self.dshape = distribution.shape
-            self.dsize = T.prod(distribution.shape)
+            self.dsize = tt.prod(distribution.shape)
             self.distribution = distribution
             # TODO: why should we do this?  shouldn't the default/test_val
             # of the distribution be enough?
-            #dist_test_val = distribution.default()
-            #self.tag.test_value = np.ones(
-            #    distribution.shape.tag.test_value, distribution.dtype) * dist_test_val
+            # dist_test_val = distribution.default()
+            # self.tag.test_value = np.ones(
+            #     distribution.shape.tag.test_value, distribution.dtype) *
+            #     dist_test_val
             self.tag.test_value = distribution.default()
             self.logp_elemwiset = distribution.logp(self)
             self.model = model
@@ -522,14 +523,14 @@ def as_tensor(data, name, model, dtype):
         missing_values = FreeRV(name=name + '_missing', distribution=fakedist,
                                 model=model)
 
-        constant = T.as_tensor_variable(data.filled())
+        constant = tt.as_tensor_variable(data.filled())
 
         dataTensor = theano.tensor.set_subtensor(constant[data.mask.nonzero()],
                                                  missing_values)
         dataTensor.missing_values = missing_values
         return dataTensor
     else:
-        data = T.as_tensor_variable(data, name=name)
+        data = tt.as_tensor_variable(data, name=name)
         data.missing_values = None
         return data