diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 58b4ae09478..fdad4344ff9 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -14,7 +14,7 @@
 
 from . import transforms
 from .dist_math import bound, logpow, gammaln, betaln, std_cdf, i0, i1
-from .distribution import Continuous, draw_values, generate_samples
+from .distribution import Univariate, Continuous, draw_values, generate_samples
 
 __all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace',
            'StudentT', 'Cauchy', 'HalfCauchy', 'Gamma', 'Weibull',
@@ -23,20 +23,25 @@
            'VonMises', 'SkewNormal']
 
 
-class PositiveContinuous(Continuous):
-    """Base class for positive continuous distributions"""
+class PositiveUnivariateContinuous(Univariate, Continuous):
+    """Base class for positive univariate continuous distributions"""
 
-    def __init__(self, transform=transforms.log, *args, **kwargs):
-        super(PositiveContinuous, self).__init__(
-            transform=transform, *args, **kwargs)
+    def __init__(self, dist_params, *args, **kwargs):
+        transform = kwargs.get('transform', transforms.log)
+        super(PositiveUnivariateContinuous, self).__init__(dist_params,
+                                                           transform=transform,
+                                                           *args, **kwargs)
 
 
-class UnitContinuous(Continuous):
-    """Base class for continuous distributions on [0,1]"""
+class UnitUnivariateContinuous(Univariate, Continuous):
+    """Base class for univariate continuous distributions in [0,1]"""
+
+    def __init__(self, dist_params, *args, **kwargs):
+        transform = kwargs.get('transform', transforms.logodds)
+        super(UnitUnivariateContinuous, self).__init__(dist_params,
+                                                       transform=transform,
+                                                       *args, **kwargs)
 
-    def __init__(self, transform=transforms.logodds, *args, **kwargs):
-        super(UnitContinuous, self).__init__(
-            transform=transform, *args, **kwargs)
 
 def assert_negative_support(var, label, distname, value=-1e-6):
     # Checks for evidence of positive support for a variable
@@ -44,8 +49,7 @@ def assert_negative_support(var, label, distname, value=-1e-6):
         return
     try:
         # Transformed distribution
-        support = np.isfinite(var.transformed.distribution.dist
-                                .logp(value).tag.test_value)
+        support = np.isfinite(var.transformed.distribution.dist.logp(value).tag.test_value)
     except AttributeError:
         try:
             # Untransformed distribution
@@ -61,7 +65,7 @@ def assert_negative_support(var, label, distname, value=-1e-6):
 
 
 def get_tau_sd(tau=None, sd=None):
-    """
+    r"""
     Find precision and standard deviation
 
     .. math::
@@ -98,10 +102,10 @@ def get_tau_sd(tau=None, sd=None):
     tau = 1. * tau
     sd = 1. * sd
 
-    return (tau, sd)
+    return (tt.as_tensor_variable(tau), tt.as_tensor_variable(sd))
 
 
-class Uniform(Continuous):
+class Uniform(Univariate, Continuous):
     R"""
     Continuous uniform log-likelihood.
 
@@ -123,15 +127,18 @@ class Uniform(Continuous):
         Upper limit.
     """
 
-    def __init__(self, lower=0, upper=1, transform='interval',
-                 *args, **kwargs):
-        super(Uniform, self).__init__(*args, **kwargs)
+    def __init__(self, lower=0, upper=1, transform='interval', *args, **kwargs):
 
+        lower = tt.as_tensor_variable(lower)
+        upper = tt.as_tensor_variable(upper)
         self.lower = lower
         self.upper = upper
         self.mean = (upper + lower) / 2.
         self.median = self.mean
 
+        dist_params = (self.lower, self.upper)
+        super(Uniform, self).__init__(dist_params, *args, **kwargs)
+
         if transform == 'interval':
             self.transform = transforms.interval(lower, upper)
 
@@ -147,18 +154,21 @@ def logp(self, value):
         lower = self.lower
         upper = self.upper
         return bound(-tt.log(upper - lower),
-                              value >= lower, value <= upper)
+                             value >= lower, value <= upper)
 
 
-class Flat(Continuous):
+class Flat(Univariate, Continuous):
     """
     Uninformative log-likelihood that returns 0 regardless of
     the passed value.
     """
 
     def __init__(self, *args, **kwargs):
-        super(Flat, self).__init__(*args, **kwargs)
-        self.median = 0
+
+        self.median = tt.as_tensor_variable(0.)
+        dist_params = (self.median,)
+
+        super(Flat, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         raise ValueError('Cannot sample from Flat distribution')
@@ -167,7 +177,7 @@ def logp(self, value):
         return tt.zeros_like(value)
 
 
-class Normal(Continuous):
+class Normal(Univariate, Continuous):
     R"""
     Univariate normal log-likelihood.
 
@@ -201,28 +211,19 @@ class Normal(Continuous):
         Precision (tau > 0).
     """
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, mu=0.0, sd=None, tau=None, *args, **kwargs):
+
         # FIXME In order to catch the case where Normal('x', 0, .1) is
         # called to display a warning we have to fetch the args and
         # kwargs manually.  After a certain period we should revert
         # back to the old calling signature.
 
-        if len(args) == 1:
-            mu = args[0]
-            sd = kwargs.pop('sd', None)
-            tau = kwargs.pop('tau', None)
-        elif len(args) == 2:
-            warnings.warn(('The order of positional arguments to Normal()'
-                           'has changed. The new signature is:'
-                           'Normal(name, mu, sd) instead of Normal(name, mu, tau).'),
-                          DeprecationWarning)
-            mu, sd = args
-            tau = kwargs.pop('tau', None)
-        else:
-            mu = kwargs.pop('mu', 0.)
-            sd = kwargs.pop('sd', None)
-            tau = kwargs.pop('tau', None)
+        warnings.warn(('The order of positional arguments to Normal()'
+                       'has changed. The new signature is:'
+                       'Normal(name, mu, sd) instead of Normal(name, mu, tau).'),
+                      DeprecationWarning)
 
+        mu = tt.as_tensor_variable(mu)
         self.mean = self.median = self.mode = self.mu = mu
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
         self.variance = 1. / self.tau
@@ -230,7 +231,9 @@ def __init__(self, *args, **kwargs):
         assert_negative_support(sd, 'sd', 'Normal')
         assert_negative_support(tau, 'tau', 'Normal')
 
-        super(Normal, self).__init__(**kwargs)
+        dist_params = (self.mu, self.tau)
+
+        super(Normal, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, tau, sd = draw_values([self.mu, self.tau, self.sd],
@@ -247,7 +250,7 @@ def logp(self, value):
                      sd > 0)
 
 
-class HalfNormal(PositiveContinuous):
+class HalfNormal(PositiveUnivariateContinuous):
     R"""
     Half-normal log-likelihood.
 
@@ -271,8 +274,8 @@ class HalfNormal(PositiveContinuous):
         Precision (tau > 0).
     """
 
-    def __init__(self, sd=None, tau=None, *args, **kwargs):
-        super(HalfNormal, self).__init__(*args, **kwargs)
+    def __init__(self, tau=None, sd=None, *args, **kwargs):
+
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
         self.mean = tt.sqrt(2 / (np.pi * self.tau))
         self.variance = (1. - 2 / np.pi) / self.tau
@@ -280,6 +283,10 @@ def __init__(self, sd=None, tau=None, *args, **kwargs):
         assert_negative_support(tau, 'tau', 'HalfNormal')
         assert_negative_support(sd, 'sd', 'HalfNormal')
 
+        dist_params = (self.tau,)
+
+        super(HalfNormal, self).__init__(dist_params, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         sd = draw_values([self.sd], point=point)
         return generate_samples(stats.halfnorm.rvs, loc=0., scale=sd,
@@ -294,7 +301,7 @@ def logp(self, value):
                      tau > 0, sd > 0)
 
 
-class Wald(PositiveContinuous):
+class Wald(PositiveUnivariateContinuous):
     R"""
     Wald log-likelihood.
 
@@ -351,7 +358,7 @@ class Wald(PositiveContinuous):
     """
 
     def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
-        super(Wald, self).__init__(*args, **kwargs)
+
         self.mu, self.lam, self.phi = self.get_mu_lam_phi(mu, lam, phi)
         self.alpha = alpha
         self.mean = self.mu + alpha
@@ -363,22 +370,29 @@ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
         assert_negative_support(mu, 'mu', 'Wald')
         assert_negative_support(lam, 'lam', 'Wald')
 
+        dist_params = (self.mu, self.lam, self.alpha)
+
+        super(Wald, self).__init__(dist_params, *args, **kwargs)
+
     def get_mu_lam_phi(self, mu, lam, phi):
+        res = None
         if mu is None:
             if lam is not None and phi is not None:
-                return lam / phi, lam, phi
+                res = (lam / phi, lam, phi)
         else:
             if lam is None:
                 if phi is None:
-                    return mu, 1., 1. / mu
+                    res = (mu, 1., 1. / mu)
                 else:
-                    return mu, mu * phi, phi
+                    res = (mu, mu * phi, phi)
             else:
                 if phi is None:
-                    return mu, lam, lam / mu
+                    res = (mu, lam, lam / mu)
 
-        raise ValueError('Wald distribution must specify either mu only, '
-                         'mu and lam, mu and phi, or lam and phi.')
+        if res is None:
+            raise ValueError('Wald distribution must specify either mu only, '
+                             'mu and lam, mu and phi, or lam and phi.')
+        return map(tt.as_tensor_variable, res)
 
     def _random(self, mu, lam, alpha, size=None):
         v = np.random.normal(size=size)**2
@@ -402,16 +416,15 @@ def logp(self, value):
         lam = self.lam
         alpha = self.alpha
         # value *must* be iid. Otherwise this is wrong.
-        return bound(logpow(lam / (2. * np.pi), 0.5)
-                     - logpow(value - alpha, 1.5)
-                     - (0.5 * lam / (value - alpha)
-                        * ((value - alpha - mu) / mu)**2),
+        return bound(logpow(lam / (2. * np.pi), 0.5) -
+                     logpow(value - alpha, 1.5) -
+                     (0.5 * lam / (value - alpha) * ((value - alpha - mu) /
+                                                     mu)**2),
                      # XXX these two are redundant. Please, check.
-                     value > 0, value - alpha > 0,
-                     mu > 0, lam > 0, alpha >= 0)
+                     value > 0, value - alpha > 0, mu > 0, lam > 0, alpha >= 0)
 
 
-class Beta(UnitContinuous):
+class Beta(UnitUnivariateContinuous):
     R"""
     Beta log-likelihood.
 
@@ -454,10 +467,7 @@ class Beta(UnitContinuous):
     the binomial distribution.
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None,
-                 *args, **kwargs):
-        super(Beta, self).__init__(*args, **kwargs)
-
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs):
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
         self.beta = beta
@@ -468,6 +478,10 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None,
         assert_negative_support(alpha, 'alpha', 'Beta')
         assert_negative_support(beta, 'beta', 'Beta')
 
+        dist_params = (self.alpha, self.beta)
+
+        super(Beta, self).__init__(dist_params, *args, **kwargs)
+
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
             pass
@@ -479,7 +493,7 @@ def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
             raise ValueError('Incompatible parameterization. Either use alpha '
                              'and beta, or mu and sd to specify distribution.')
 
-        return alpha, beta
+        return tt.as_tensor_variable(alpha), tt.as_tensor_variable(beta)
 
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
@@ -492,13 +506,12 @@ def logp(self, value):
         alpha = self.alpha
         beta = self.beta
 
-        return bound(logpow(value, alpha - 1) + logpow(1 - value, beta - 1)
-                     - betaln(alpha, beta),
-                     value >= 0, value <= 1,
+        return bound(logpow(value, alpha - 1) + logpow(1 - value, beta - 1) -
+                     betaln(alpha, beta), value >= 0, value <= 1,
                      alpha > 0, beta > 0)
 
 
-class Exponential(PositiveContinuous):
+class Exponential(PositiveUnivariateContinuous):
     R"""
     Exponential log-likelihood.
 
@@ -519,15 +532,16 @@ class Exponential(PositiveContinuous):
     """
 
     def __init__(self, lam, *args, **kwargs):
-        super(Exponential, self).__init__(*args, **kwargs)
         self.lam = lam
         self.mean = 1. / lam
         self.median = self.mean * tt.log(2)
         self.mode = 0
 
+        dist_params = (self.lam,)
+
         self.variance = lam**-2
 
-        assert_negative_support(lam, 'lam', 'Exponential')
+        super(Exponential, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         lam = draw_values([self.lam], point=point)
@@ -540,7 +554,7 @@ def logp(self, value):
         return bound(tt.log(lam) - lam * value, value > 0, lam > 0)
 
 
-class Laplace(Continuous):
+class Laplace(Univariate, Continuous):
     R"""
     Laplace log-likelihood.
 
@@ -564,14 +578,17 @@ class Laplace(Continuous):
     """
 
     def __init__(self, mu, b, *args, **kwargs):
-        super(Laplace, self).__init__(*args, **kwargs)
-        self.b = b
-        self.mean = self.median = self.mode = self.mu = mu
-
+        self.b = tt.as_tensor_variable(b)
+        self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(
+            mu)
         self.variance = 2 * b**2
 
         assert_negative_support(b, 'b', 'Laplace')
 
+        dist_params = (self.b, self.mu)
+
+        super(Laplace, self).__init__(dist_params, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         mu, b = draw_values([self.mu, self.b], point=point)
         return generate_samples(np.random.laplace, mu, b,
@@ -585,7 +602,7 @@ def logp(self, value):
         return -tt.log(2 * b) - abs(value - mu) / b
 
 
-class Lognormal(PositiveContinuous):
+class Lognormal(PositiveUnivariateContinuous):
     R"""
     Log-normal log-likelihood.
 
@@ -614,20 +631,22 @@ class Lognormal(PositiveContinuous):
         Scale parameter (tau > 0).
     """
 
-    def __init__(self, mu=0, sd=None, tau=None, *args, **kwargs):
-        super(Lognormal, self).__init__(*args, **kwargs)
-
-        self.mu = mu
-        self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
-
-        self.mean = tt.exp(mu + 1. / (2 * self.tau))
-        self.median = tt.exp(mu)
-        self.mode = tt.exp(mu - 1. / self.tau)
-        self.variance = (tt.exp(1. / self.tau) - 1) * tt.exp(2 * mu + 1. / self.tau)
+    def __init__(self, mu=0, tau=1, *args,
+                 **kwargs):
+        self.mu = tt.as_tensor_variable(mu)
+        self.tau = tt.as_tensor_variable(tau)
+        self.mean = tt.exp(self.mu + 1. / (2. * self.tau))
+        self.median = tt.exp(self.mu)
+        self.mode = tt.exp(self.mu - 1. / self.tau)
+        self.variance = (tt.exp(1. / self.tau) - 1.) * tt.exp(2 * self.mu + 1. / self.tau)
 
         assert_negative_support(tau, 'tau', 'Lognormal')
         assert_negative_support(sd, 'sd', 'Lognormal')
 
+        dist_params = (self.mu, self.tau)
+
+        super(Lognormal, self).__init__(dist_params, *args, **kwargs)
+
     def _random(self, mu, tau, size=None):
         samples = np.random.normal(size=size)
         return np.exp(mu + (tau**-0.5) * samples)
@@ -647,8 +666,8 @@ def logp(self, value):
                      tau > 0)
 
 
-class StudentT(Continuous):
-    R"""
+class StudentT(Univariate, Continuous):
+    r"""
     Non-central Student's T log-likelihood.
 
     Describes a normal variable whose precision is gamma distributed.
@@ -675,9 +694,8 @@ class StudentT(Continuous):
     lam : float
         Scale parameter (lam > 0).
     """
-
-    def __init__(self, nu, mu=0, lam=None, sd=None, *args, **kwargs):
-        super(StudentT, self).__init__(*args, **kwargs)
+    def __init__(self, nu, mu=0, lam=None, sd=None, ndim=None, size=None,
+                 dtype=None, *args, **kwargs):
         self.nu = nu = tt.as_tensor_variable(nu)
         self.lam, self.sd = get_tau_sd(tau=lam, sd=sd)
         self.mean = self.median = self.mode = self.mu = mu
@@ -689,6 +707,9 @@ def __init__(self, nu, mu=0, lam=None, sd=None, *args, **kwargs):
         assert_negative_support(lam, 'lam (sd)', 'StudentT')
         assert_negative_support(nu, 'nu', 'StudentT')
 
+        dist_params = (self.nu, self.mu, self.lam)
+        super(StudentT, self).__init__(dist_params, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         nu, mu, lam = draw_values([self.nu, self.mu, self.lam],
                                   point=point)
@@ -709,7 +730,7 @@ def logp(self, value):
                      lam > 0, nu > 0, sd > 0)
 
 
-class Pareto(PositiveContinuous):
+class Pareto(PositiveUnivariateContinuous):
     R"""
     Pareto log-likelihood.
 
@@ -736,20 +757,21 @@ class Pareto(PositiveContinuous):
     """
 
     def __init__(self, alpha, m, *args, **kwargs):
-        super(Pareto, self).__init__(*args, **kwargs)
-        self.alpha = alpha
-        self.m = m
-        self.mean = tt.switch(tt.gt(alpha, 1), alpha *
-                              m / (alpha - 1.), np.inf)
-        self.median = m * 2.**(1. / alpha)
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.m = tt.as_tensor_variable(m)
+        self.mean = tt.switch(tt.gt(self.alpha, 1), self.alpha * self.m / (self.alpha - 1.), np.inf)
+        self.median = self.m * 2.**(1. / self.alpha)
         self.variance = tt.switch(
-            tt.gt(alpha, 2),
-            (alpha * m**2) / ((alpha - 2.) * (alpha - 1.)**2),
+            tt.gt(self.alpha, 2),
+            (self.alpha * self.m**2) / ((self.alpha - 2.) * (self.alpha - 1.)**2),
             np.inf)
 
         assert_negative_support(alpha, 'alpha', 'Pareto')
         assert_negative_support(m, 'm', 'Pareto')
 
+        dist_params = (self.alpha, self.m)
+
+        super(Pareto, self).__init__(dist_params, *args, **kwargs)
 
     def _random(self, alpha, m, size=None):
         u = np.random.uniform(size=size)
@@ -770,7 +792,7 @@ def logp(self, value):
                      value >= m, alpha > 0, m > 0)
 
 
-class Cauchy(Continuous):
+class Cauchy(Univariate, Continuous):
     R"""
     Cauchy log-likelihood.
 
@@ -797,9 +819,12 @@ class Cauchy(Continuous):
     """
 
     def __init__(self, alpha, beta, *args, **kwargs):
-        super(Cauchy, self).__init__(*args, **kwargs)
-        self.median = self.mode = self.alpha = alpha
-        self.beta = beta
+        self.median = self.mode = self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
+
+        dist_params = (self.alpha, self.beta)
+
+        super(Cauchy, self).__init__(dist_params, *args, **kwargs)
 
         assert_negative_support(beta, 'beta', 'Cauchy')
 
@@ -822,7 +847,7 @@ def logp(self, value):
                      beta > 0)
 
 
-class HalfCauchy(PositiveContinuous):
+class HalfCauchy(PositiveUnivariateContinuous):
     R"""
     Half-Cauchy log-likelihood.
 
@@ -844,10 +869,13 @@ class HalfCauchy(PositiveContinuous):
     """
 
     def __init__(self, beta, *args, **kwargs):
-        super(HalfCauchy, self).__init__(*args, **kwargs)
-        self.mode = 0
+        self.mode = tt.as_tensor_variable(0.)
+        self.beta = tt.as_tensor_variable(beta)
         self.median = beta
-        self.beta = beta
+
+        dist_params = (self.beta,)
+
+        super(HalfCauchy, self).__init__(dist_params, *args, **kwargs)
 
         assert_negative_support(beta, 'beta', 'HalfCauchy')
 
@@ -868,7 +896,7 @@ def logp(self, value):
                      value >= 0, beta > 0)
 
 
-class Gamma(PositiveContinuous):
+class Gamma(PositiveUnivariateContinuous):
     R"""
     Gamma log-likelihood.
 
@@ -907,19 +935,21 @@ class Gamma(PositiveContinuous):
         Alternative scale parameter (sd > 0).
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None,
-                 *args, **kwargs):
-        super(Gamma, self).__init__(*args, **kwargs)
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs):
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
         self.beta = beta
-        self.mean = alpha / beta
-        self.mode = tt.maximum((alpha - 1) / beta, 0)
-        self.variance = alpha / beta**2
+        self.mean = self.alpha / self.beta
+        self.mode = tt.maximum((self.alpha - 1) / self.beta, 0)
+        self.variance = self.alpha / self.beta**2
 
         assert_negative_support(alpha, 'alpha', 'Gamma')
         assert_negative_support(beta, 'beta', 'Gamma')
 
+        dist_params = (self.alpha, self.beta)
+
+        super(Gamma, self).__init__(dist_params, *args, **kwargs)
+
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
             pass
@@ -931,7 +961,7 @@ def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
                              'alpha and beta, or mu and sd to specify '
                              'distribution.')
 
-        return alpha, beta
+        return tt.as_tensor_variable(alpha), tt.as_tensor_variable(beta)
 
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
@@ -952,7 +982,7 @@ def logp(self, value):
             beta > 0)
 
 
-class InverseGamma(PositiveContinuous):
+class InverseGamma(PositiveUnivariateContinuous):
     R"""
     Inverse gamma log-likelihood, the reciprocal of the gamma distribution.
 
@@ -976,12 +1006,10 @@ class InverseGamma(PositiveContinuous):
     beta : float
         Scale parameter (beta > 0).
     """
-
-    def __init__(self, alpha, beta=1, *args, **kwargs):
-        super(InverseGamma, self).__init__(*args, **kwargs)
-        self.alpha = alpha
-        self.beta = beta
-        self.mean = self._calculate_mean()
+    def __init__(self, alpha, beta=1., *args, **kwargs):
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
+        self.mean = (alpha > 1) * beta / (alpha - 1.) or np.inf
         self.mode = beta / (alpha + 1.)
         self.variance = tt.switch(tt.gt(alpha, 2),
                                   (beta**2) / (alpha * (alpha - 1.)**2),
@@ -989,6 +1017,10 @@ def __init__(self, alpha, beta=1, *args, **kwargs):
         assert_negative_support(alpha, 'alpha', 'InverseGamma')
         assert_negative_support(beta, 'beta', 'InverseGamma')
 
+        dist_params = (self.alpha, self.beta)
+
+        super(InverseGamma, self).__init__(dist_params, *args, **kwargs)
+
     def _calculate_mean(self):
         m = self.beta / (self.alpha - 1.)
         try:
@@ -1007,8 +1039,8 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         alpha = self.alpha
         beta = self.beta
-        return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value
-                     + logpow(value, -alpha - 1),
+        return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value +
+                     logpow(value, -alpha - 1),
                      value > 0, alpha > 0, beta > 0)
 
 
@@ -1033,12 +1065,13 @@ class ChiSquared(Gamma):
     """
 
     def __init__(self, nu, *args, **kwargs):
-        self.nu = nu
-        super(ChiSquared, self).__init__(alpha=nu / 2., beta=0.5,
+        self.nu = tt.as_tensor_variable(nu)
+        super(ChiSquared, self).__init__(alpha=self.nu / 2.,
+                                         beta=tt.as_tensor_variable(0.5),
                                          *args, **kwargs)
 
 
-class Weibull(PositiveContinuous):
+class Weibull(PositiveUnivariateContinuous):
     R"""
     Weibull log-likelihood.
 
@@ -1061,11 +1094,9 @@ class Weibull(PositiveContinuous):
     beta : float
         Scale parameter (beta > 0).
     """
-
     def __init__(self, alpha, beta, *args, **kwargs):
-        super(Weibull, self).__init__(*args, **kwargs)
-        self.alpha = alpha
-        self.beta = beta
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
         self.mean = beta * tt.exp(gammaln(1 + 1. / alpha))
         self.median = beta * tt.exp(gammaln(tt.log(2)))**(1. / alpha)
         self.variance = (beta**2) * \
@@ -1074,6 +1105,10 @@ def __init__(self, alpha, beta, *args, **kwargs):
         assert_negative_support(alpha, 'alpha', 'Weibull')
         assert_negative_support(beta, 'beta', 'Weibull')
 
+        dist_params = (self.alpha, self.beta)
+
+        super(Weibull, self).__init__(dist_params, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
                                   point=point)
@@ -1094,7 +1129,7 @@ def logp(self, value):
                      value >= 0, alpha > 0, beta > 0)
 
 
-class Bounded(Continuous):
+class Bounded(Univariate, Continuous):
     R"""
     An upper, lower or upper+lower bounded distribution
 
@@ -1204,13 +1239,14 @@ def dist(cls, *args, **kwargs):
 
 def StudentTpos(*args, **kwargs):
     warnings.warn("StudentTpos has been deprecated. In future, use HalfStudentT instead.",
-                DeprecationWarning)
+                  DeprecationWarning)
     return HalfStudentT(*args, **kwargs)
 
+
 HalfStudentT = Bound(StudentT, lower=0)
 
 
-class ExGaussian(Continuous):
+class ExGaussian(Univariate, Continuous):
     R"""
     Exponentially modified Gaussian log-likelihood.
 
@@ -1256,16 +1292,19 @@ class ExGaussian(Continuous):
     """
 
     def __init__(self, mu, sigma, nu, *args, **kwargs):
-        super(ExGaussian, self).__init__(*args, **kwargs)
-        self.mu = mu
-        self.sigma = sigma
-        self.nu = nu
-        self.mean = mu + nu
-        self.variance = (sigma**2) + (nu**2)
+        self.mu = tt.as_tensor_variable(mu)
+        self.sigma = tt.as_tensor_variable(sigma)
+        self.nu = tt.as_tensor_variable(nu)
+        self.mean = self.mu + self.nu
+        self.variance = self.sigma**2 + self.nu**2
 
         assert_negative_support(sigma, 'sigma', 'ExGaussian')
         assert_negative_support(nu, 'nu', 'ExGaussian')
 
+        dist_params = (self.mu, self.sigma, self.nu)
+
+        super(ExGaussian, self).__init__(dist_params, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu],
                                     point=point)
@@ -1292,7 +1331,7 @@ def logp(self, value):
         return bound(lp, sigma > 0., nu > 0.)
 
 
-class VonMises(Continuous):
+class VonMises(Univariate, Continuous):
     R"""
     Univariate VonMises log-likelihood.
 
@@ -1316,12 +1355,14 @@ class VonMises(Continuous):
         Concentration (\frac{1}{kappa} is analogous to \sigma^2).
     """
 
-    def __init__(self, mu=0.0, kappa=None, transform='circular',
-                 *args, **kwargs):
-        super(VonMises, self).__init__(*args, **kwargs)
-        self.mean = self.median = self.mode = self.mu = mu
-        self.kappa = kappa
-        self.variance = 1 - i1(kappa) / i0(kappa)
+    def __init__(self, mu=0.0, kappa=None, transform='circular', *args, **kwargs):
+        self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(mu)
+        self.kappa = tt.as_tensor_variable(kappa)
+        self.variance = 1. - i1(self.kappa) / i0(self.kappa)
+
+        dist_params = (self.mu, self.kappa)
+
+        super(VonMises, self).__init__(dist_params, *args, **kwargs)
 
         if transform == 'circular':
             self.transform = transforms.Circular()
@@ -1338,7 +1379,8 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         mu = self.mu
         kappa = self.kappa
-        return bound(kappa * tt.cos(mu - value) - tt.log(2 * np.pi * i0(kappa)), value >= -np.pi, value <= np.pi, kappa >= 0)
+        return bound(kappa * tt.cos(mu - value) - tt.log(2 * np.pi * i0(kappa)),
+                     value >= -np.pi, value <= np.pi, kappa >= 0)
 
 
 class SkewNormal(Continuous):
@@ -1380,8 +1422,9 @@ class SkewNormal(Continuous):
     approaching plus/minus infinite we get a half-normal distribution.
 
     """
+
     def __init__(self, mu=0.0, sd=None, tau=None, alpha=1,  *args, **kwargs):
-        super(SkewNormal, self).__init__(*args, **kwargs)
+        super(SkewNormal, self).__init__(ndim=1, *args, **kwargs)
         self.mu = mu
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
         self.alpha = alpha
@@ -1405,8 +1448,7 @@ def logp(self, value):
         mu = self.mu
         alpha = self.alpha
         return bound(
-            tt.log(1 +
-            tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2)))
+            tt.log(1 + tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2)))
             + (-tau * (value - mu)**2
             + tt.log(tau / np.pi / 2.)) / 2.,
             tau > 0, sd > 0)
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index edfdfaecb96..b78a7995f7a 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -1,4 +1,5 @@
 from functools import partial
+import warnings
 
 import numpy as np
 import theano
@@ -6,15 +7,20 @@
 from scipy import stats
 
 from .dist_math import bound, factln, binomln, betaln, logpow
-from .distribution import Discrete, draw_values, generate_samples
-
-__all__ = ['Binomial',  'BetaBinomial',  'Bernoulli',  'Poisson',
+from .distribution import (
+    Univariate,
+    Discrete,
+    draw_values,
+    generate_samples,
+    has_const_inputs)
+
+__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'Poisson',
            'NegativeBinomial', 'ConstantDist', 'Constant', 'ZeroInflatedPoisson',
            'ZeroInflatedNegativeBinomial', 'DiscreteUniform', 'Geometric',
            'Categorical']
 
 
-class Binomial(Discrete):
+class Binomial(Univariate, Discrete):
     R"""
     Binomial log-likelihood.
 
@@ -39,10 +45,14 @@ class Binomial(Discrete):
     """
 
     def __init__(self, n, p, *args, **kwargs):
-        super(Binomial, self).__init__(*args, **kwargs)
-        self.n = n
-        self.p = p
-        self.mode = tt.cast(tt.round(n * p), self.dtype)
+        self.n = tt.as_tensor_variable(n)
+        self.p = tt.as_tensor_variable(p)
+
+        self.mode = tt.cast(tt.round(n * p), 'int32')
+
+        dist_params = (self.n, self.p)
+
+        super(Binomial, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         n, p = draw_values([self.n, self.p], point=point)
@@ -60,7 +70,7 @@ def logp(self, value):
             0 <= p, p <= 1)
 
 
-class BetaBinomial(Discrete):
+class BetaBinomial(Univariate, Discrete):
     R"""
     Beta-binomial log-likelihood.
 
@@ -90,11 +100,16 @@ class BetaBinomial(Discrete):
     """
 
     def __init__(self, alpha, beta, n, *args, **kwargs):
-        super(BetaBinomial, self).__init__(*args, **kwargs)
-        self.alpha = alpha
-        self.beta = beta
-        self.n = n
-        self.mode = tt.cast(tt.round(alpha / (alpha + beta)), 'int8')
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
+        self.n = tt.as_tensor_variable(n)
+
+        self.mode = tt.cast(tt.round(self.alpha / (self.alpha + self.beta)),
+                            'int8')
+
+        dist_params = (self.alpha, self.beta, self.n)
+
+        super(BetaBinomial, self).__init__(dist_params, *args, **kwargs)
 
     def _random(self, alpha, beta, n, size=None):
         size = size or 1
@@ -125,7 +140,7 @@ def logp(self, value):
                      alpha > 0, beta > 0)
 
 
-class Bernoulli(Discrete):
+class Bernoulli(Univariate, Discrete):
     R"""Bernoulli log-likelihood
 
     The Bernoulli distribution describes the probability of successes
@@ -146,10 +161,14 @@ class Bernoulli(Discrete):
     """
 
     def __init__(self, p, *args, **kwargs):
-        super(Bernoulli, self).__init__(*args, **kwargs)
-        self.p = p
+        self.p = tt.as_tensor_variable(p)
+
+        dist_params = (self.p,)
+
         self.mode = tt.cast(tt.round(p), 'int8')
 
+        super(Bernoulli, self).__init__(dist_params, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         p = draw_values([self.p], point=point)
         return generate_samples(stats.bernoulli.rvs, p,
@@ -164,7 +183,7 @@ def logp(self, value):
             p >= 0, p <= 1)
 
 
-class Poisson(Discrete):
+class Poisson(Univariate, Discrete):
     R"""
     Poisson log-likelihood.
 
@@ -192,10 +211,14 @@ class Poisson(Discrete):
     """
 
     def __init__(self, mu, *args, **kwargs):
-        super(Poisson, self).__init__(*args, **kwargs)
-        self.mu = mu
+        self.mu = tt.as_tensor_variable(mu)
+
         self.mode = tt.floor(mu).astype('int32')
 
+        dist_params = (self.mu,)
+
+        super(Poisson, self).__init__(dist_params, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         mu = draw_values([self.mu], point=point)
         return generate_samples(stats.poisson.rvs, mu,
@@ -212,7 +235,7 @@ def logp(self, value):
                          0, log_prob)
 
 
-class NegativeBinomial(Discrete):
+class NegativeBinomial(Univariate, Discrete):
     R"""
     Negative binomial log-likelihood.
 
@@ -239,11 +262,16 @@ class NegativeBinomial(Discrete):
     """
 
     def __init__(self, mu, alpha, *args, **kwargs):
-        super(NegativeBinomial, self).__init__(*args, **kwargs)
-        self.mu = mu
-        self.alpha = alpha
+        self.mu = tt.as_tensor_variable(mu)
+        self.alpha = tt.as_tensor_variable(alpha)
+
         self.mode = tt.floor(mu).astype('int32')
 
+        dist_params = (self.mu, self.alpha)
+
+        super(NegativeBinomial, self).__init__(dist_params,
+                                               *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         mu, alpha = draw_values([self.mu, self.alpha], point=point)
         g = generate_samples(stats.gamma.rvs, alpha, scale=mu / alpha,
@@ -266,7 +294,7 @@ def logp(self, value):
                          negbinom)
 
 
-class Geometric(Discrete):
+class Geometric(Univariate, Discrete):
     R"""
     Geometric log-likelihood.
 
@@ -288,9 +316,14 @@ class Geometric(Discrete):
     """
 
     def __init__(self, p, *args, **kwargs):
-        super(Geometric, self).__init__(*args, **kwargs)
-        self.p = p
-        self.mode = 1
+        self.p = tt.as_tensor_variable(p)
+
+        self.mode = tt.as_tensor_variable(1)
+
+        dist_params = (self.p,)
+
+        super(Geometric, self).__init__(
+              dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         p = draw_values([self.p], point=point)
@@ -304,7 +337,7 @@ def logp(self, value):
                      0 <= p, p <= 1, value >= 1)
 
 
-class DiscreteUniform(Discrete):
+class DiscreteUniform(Univariate, Discrete):
     R"""
     Discrete uniform distribution.
 
@@ -325,12 +358,16 @@ class DiscreteUniform(Discrete):
     """
 
     def __init__(self, lower, upper, *args, **kwargs):
-        super(DiscreteUniform, self).__init__(*args, **kwargs)
         self.lower = tt.floor(lower).astype('int32')
         self.upper = tt.floor(upper).astype('int32')
+
         self.mode = tt.maximum(
             tt.floor((upper - lower) / 2.).astype('int32'), self.lower)
 
+        dist_params = (self.lower, self.upper)
+
+        super(DiscreteUniform, self).__init__(dist_params, *args, **kwargs)
+
     def _random(self, lower, upper, size=None):
         # This way seems to be the only to deal with lower and upper
         # as array-like.
@@ -352,7 +389,7 @@ def logp(self, value):
                      lower <= value, value <= upper)
 
 
-class Categorical(Discrete):
+class Categorical(Univariate, Discrete):
     R"""
     Categorical log-likelihood.
 
@@ -372,15 +409,23 @@ class Categorical(Discrete):
     """
 
     def __init__(self, p, *args, **kwargs):
-        super(Categorical, self).__init__(*args, **kwargs)
-        try:
-            self.k = tt.shape(p)[-1].tag.test_value
-        except AttributeError:
-            self.k = tt.shape(p)[-1]
+
+        self.k = tt.shape(p)[-1]
+
+        if has_const_inputs(self.k):
+            # FIXME: This feels like a hack.  Seems like it would be better to
+            # evaluate somewhere else (e.g. exactly where a value is needed).
+            self.k = self.k.eval()
+
         self.p = tt.as_tensor_variable(p)
         self.p = (p.T / tt.sum(p, -1)).T
+
         self.mode = tt.argmax(p)
 
+        dist_params = (self.p,)
+
+        super(Categorical, self).__init__(dist_params, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         def random_choice(k, *args, **kwargs):
             if len(kwargs['p'].shape) > 1:
@@ -413,7 +458,7 @@ def logp(self, value):
                      sumto1)
 
 
-class Constant(Discrete):
+class Constant(Univariate, Discrete):
     """
     Constant log-likelihood.
 
@@ -424,8 +469,10 @@ class Constant(Discrete):
     """
 
     def __init__(self, c, *args, **kwargs):
-        super(Constant, self).__init__(*args, **kwargs)
-        self.mean = self.median = self.mode = self.c = c
+        self.mean = self.median = self.mode = self.c = tt.as_tensor_variable(c)
+        dist_params = (self.c,)
+
+        super(Constant, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         c = draw_values([self.c], point=point)
@@ -441,13 +488,14 @@ def logp(self, value):
         c = self.c
         return bound(0, tt.eq(value, c))
 
+
 def ConstantDist(*args, **kwargs):
     warnings.warn("ConstantDist has been deprecated. In future, use Constant instead.",
-                DeprecationWarning)
+                  DeprecationWarning)
     return Constant(*args, **kwargs)
 
 
-class ZeroInflatedPoisson(Discrete):
+class ZeroInflatedPoisson(Univariate, Discrete):
     R"""
     Zero-inflated Poisson log-likelihood.
 
@@ -478,12 +526,15 @@ class ZeroInflatedPoisson(Discrete):
     """
 
     def __init__(self, theta, psi, *args, **kwargs):
-        super(ZeroInflatedPoisson, self).__init__(*args, **kwargs)
-        self.theta = theta
-        self.psi = psi
+        self.theta = tt.as_tensor_variable(theta)
+        self.psi = tt.as_tensor_variable(psi)
         self.pois = Poisson.dist(theta)
         self.mode = self.pois.mode
 
+        dist_params = (self.theta, self.psi)
+
+        super(ZeroInflatedPoisson, self).__init__(dist_params, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         theta, psi = draw_values([self.theta, self.psi], point=point)
         g = generate_samples(stats.poisson.rvs, theta,
@@ -497,7 +548,7 @@ def logp(self, value):
                          tt.log((1. - self.psi) + self.psi * tt.exp(-self.theta)))
 
 
-class ZeroInflatedNegativeBinomial(Discrete):
+class ZeroInflatedNegativeBinomial(Univariate, Discrete):
     R"""
     Zero-Inflated Negative binomial log-likelihood.
 
@@ -529,13 +580,18 @@ class ZeroInflatedNegativeBinomial(Discrete):
     """
 
     def __init__(self, mu, alpha, psi, *args, **kwargs):
-        super(ZeroInflatedNegativeBinomial, self).__init__(*args, **kwargs)
-        self.mu = mu
-        self.alpha = alpha
-        self.psi = psi
+        self.mu = tt.as_tensor_variable(mu)
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.psi = tt.as_tensor_variable(psi)
+
         self.nb = NegativeBinomial.dist(mu, alpha)
         self.mode = self.nb.mode
 
+        dist_params = (self.mu, self.alpha, self.psi)
+
+        super(ZeroInflatedNegativeBinomial, self).__init__(
+            dist_params, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         mu, alpha, psi = draw_values(
             [self.mu, self.alpha, self.psi], point=point)
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 61570ceaa38..60e187c0774 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -1,18 +1,71 @@
+import collections
+import warnings
+
 import numpy as np
 import theano.tensor as tt
 from theano import function
+from theano.gof import Constant
+from theano.tensor.raw_random import _infer_ndim_bcast
 
 from ..memoize import memoize
 from ..model import Model, get_named_nodes
 from ..vartypes import string_types
 
 
-__all__ = ['DensityDist', 'Distribution', 'Continuous',
-           'Discrete', 'NoDistribution', 'TensorType', 'draw_values']
+__all__ = ['DensityDist', 'Distribution',
+           'Continuous', 'Discrete', 'NoDistribution', 'TensorType',
+           'Multivariate']
+
 
 class _Unpickling(object):
     pass
 
+
+def _as_tensor_shape_variable(var):
+    r""" Just some useful shape object parsing steps.
+    Mostly copied from `_infer_ndim_bcast`.
+    """
+
+    if var is None:
+        return tt.constant([], dtype='int64')
+
+    res = var
+    if isinstance(res, (tuple, list)):
+        if len(res) == 0:
+            return tt.constant([], dtype='int64')
+        res = tt.as_tensor_variable(res, ndim=1)
+
+    else:
+        if res.ndim != 1:
+            raise TypeError("shape must be a vector or list of scalar, got\
+                            '%s'" % res)
+
+    if (not (res.dtype.startswith('int') or
+             res.dtype.startswith('uint'))):
+
+        raise TypeError('shape must be an integer vector or list',
+                        res.dtype)
+    return res
+
+
+def has_const_inputs(nodes):
+    r"""Checks that nodes have only constant inputs for
+    their Ops.  Useful for justifying one-time evals.
+    """
+    if not isinstance(nodes, collections.Iterable):
+        nodes = [nodes]
+
+    for node in nodes:
+        owner = getattr(node, 'owner', None)
+        if owner is not None:
+            if not has_const_inputs(owner.inputs):
+                return False
+        elif not isinstance(node, Constant):
+            return False
+
+    return True
+
+
 class Distribution(object):
     """Statistical distribution"""
     def __new__(cls, name, *args, **kwargs):
@@ -41,37 +94,155 @@ def dist(cls, *args, **kwargs):
         dist.__init__(*args, **kwargs)
         return dist
 
-    def __init__(self, shape, dtype, testval=None, defaults=[], transform=None):
-        self.shape = np.atleast_1d(shape)
-        if False in (np.floor(self.shape) == self.shape):
-            raise TypeError("Expected int elements in shape")
+    def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
+                 testval=None, defaults=None, transform=None, *args, **kwargs):
+        r"""
+        Distributions are specified in terms of the shape of their support, the shape
+        of the space of independent instances and the shape of the space of replications.
+        The "total" shape of the distribution is the concatenation of each of these
+        spaces, i.e. `dist.shape = shape_reps + shape_ind + shape_supp`.
+
+        We're able to specify the number of dimensions for the
+        support of a concrete distribution type (e.g. scalar distributions have
+        `ndim_supp=0` and a multivariate normal has vector support,
+        so `ndim_supp=1`) and have the exact sizes of each dimension symbolic.
+        To actually instantiate a distribution,
+        we at least need a list/tuple/vector (`ndim_supp` in
+        length) containing symbolic scalars, `shape_supp`, representing the
+        exact size of each dimension.  In the case that `shape_supp` has an
+        unknown length at the graph building stage (e.g. it is a generic Theano
+        vector or tensor), we must provide `ndim_supp`.
+
+        The symbolic scalars `shape_supp` must either be required by a
+        distribution's constructor, or inferred through its required
+        parameters.  Since most distributions are either scalar, or
+        have parameters within the space of their support (e.g. the
+        multivariate normal's mean parameter) inference can be
+        straight-forward.  In the latter case, we refer to the parameters
+        as "informative".
+
+        We also attempt to handle the specification of a collections of independent--
+        but not identical instances of the base distribution (each with support as above).
+        These have a space with shape `shape_ind`.  Generally, `shape_ind` will be
+        implicitly given by the distribution's parameters.  For instance,
+        if a multivariate normal distribution is instantiated with a matrix mean
+        parameter `mu`, we can assume that each row specifies the mean for an
+        independent distribution.  In this case the covariance parameter would either
+        have to be an `ndim=3` tensor, for which the last two dimensions specify each
+        covariance matrix, or a single matrix that is to apply to each independent
+        variate implied by the matrix `mu`.
+
+        Here are a few ways of inferring shapes:
+            * When a distribution is scalar, then `shape_supp = ()`
+                * and has an informative parameter, e.g. `mu`, then `shape_ind = tt.shape(mu)`.
+            * When a distribution is multivariate
+                * and has an informative parameter, e.g. `mu`, then
+                `shape_supp = tt.shape(mu)[-ndim_supp:]` and `shape_ind = tt.shape(mu)[-ndim_supp:]`.
+        In all remaining cases the shapes must be provided by the caller.
+        `shape_reps` is always provided by the caller.
+
+
+        Parameters
+        ----------
+        shape_supp
+            tuple
+            Shape of the support for this distribution type.
+        shape_ind
+            tuple
+            Dimension of independent, but not necessarily identical, copies of
+            this distribution type.
+        shape_reps
+            tuple
+            Dimension of independent and identical copies of
+            this distribution type.
+        bcast
+            A tuple of boolean values.
+            Broadcast dimensions.
+        dtype
+            The data type.
+        testval
+            Test value to be added to the Theano variable's tag.test_value.
+        defaults
+            List of string names for attributes that can be used as this
+            distribution's default value.
+        transform
+            A transform function
+        """
+
+        self.shape_supp = _as_tensor_shape_variable(shape_supp)
+        self.ndim_supp = tt.get_vector_length(self.shape_supp)
+        self.shape_ind = _as_tensor_shape_variable(shape_ind)
+        self.ndim_ind = tt.get_vector_length(self.shape_ind)
+        self.shape_reps = _as_tensor_shape_variable(shape_reps)
+        self.ndim_reps = tt.get_vector_length(self.shape_reps)
+
+        self.bcast = bcast
         self.dtype = dtype
-        self.type = TensorType(self.dtype, self.shape)
-        self.testval = testval
+
+        ndim_sum = self.ndim_supp + self.ndim_ind + self.ndim_reps
+        if ndim_sum == 0:
+            self.shape = tt.constant([], dtype='int32')
+        else:
+            self.shape = tuple(self.shape_reps) +\
+                tuple(self.shape_ind) +\
+                tuple(self.shape_supp)
+            self.shape = tt.as_tensor_variable(self.shape)
+
+        if has_const_inputs(self.shape):
+            # FIXME: This feels like a hack.  Seems like it would be better to
+            # evaluate somewhere else (e.g. exactly where a value is needed).
+            self.shape = self.shape.eval()
+
+        self.ndim = tt.get_vector_length(self.shape)
         self.defaults = defaults
         self.transform = transform
 
+        if testval is None:
+            testval = self.get_test_value(defaults=self.defaults)
+
+        self.testval = testval
+        self.type = tt.TensorType(str(dtype), self.bcast)
+
     def default(self):
-        return self.get_test_val(self.testval, self.defaults)
+        return self.get_test_value(self.testval, self.defaults)
 
-    def get_test_val(self, val, defaults):
+    def get_test_value(self, val=None, defaults=None):
+        test_val = None
         if val is None:
             for v in defaults:
-                if hasattr(self, v) and np.all(np.isfinite(self.getattr_value(v))):
-                    return self.getattr_value(v)
+                the_attr = getattr(self, v, None)
+
+                if the_attr is not None and np.all(np.isfinite(self.getattr_value(v))):
+                    test_val = self.getattr_value(v)
+                    break
         else:
-            return self.getattr_value(val)
+            test_val = self.getattr_value(val)
 
-        if val is None:
-            raise AttributeError(str(self) + " has no finite default value to use, checked: " +
-                                 str(defaults) + " pass testval argument or adjust so value is finite.")
+        if test_val is not None:
+            if self.ndim_reps > 0 and hasattr(self.shape_reps, 'value'):
+                bcast_shape = self.getattr_value(self.shape)
+                test_val = np.broadcast_to(test_val, bcast_shape)
+
+            return test_val
+
+        raise AttributeError(str(self) + " has no finite default value to use, checked: " +
+                             str(defaults) + " pass testval argument or adjust so value is finite.")
 
     def getattr_value(self, val):
+        """ Attempts to obtain a non-symbolic value for an attribute
+        (potentially given in str form)
+        """
         if isinstance(val, string_types):
             val = getattr(self, val)
 
-        if isinstance(val, tt.TensorVariable):
+        # Could use Theano's:
+        # val = theano.gof.op.get_test_value(val)
+        if isinstance(val, tt.sharedvar.SharedVariable):
+            return val.get_value()
+        elif isinstance(val, tt.TensorVariable):
             return val.tag.test_value
+        elif isinstance(val, tt.TensorConstant):
+            return val.value
 
         if isinstance(val, tt.TensorConstant):
             return val.value
@@ -85,10 +256,14 @@ def TensorType(dtype, shape):
 
 class NoDistribution(Distribution):
 
-    def __init__(self, shape, dtype, testval=None, defaults=[], transform=None, parent_dist=None, *args, **kwargs):
-        super(NoDistribution, self).__init__(shape=shape, dtype=dtype,
-                                             testval=testval, defaults=defaults,
-                                             *args, **kwargs)
+    def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
+                 testval=None, defaults=[], transform=None, parent_dist=None,
+                 *args, **kwargs):
+
+        super(NoDistribution, self).__init__(shape_supp, shape_ind, shape_reps,
+                                             bcast, dtype, testval=testval,
+                                             defaults=defaults, *args,
+                                             **kwargs)
         self.parent_dist = parent_dist
 
     def __getattr__(self, name):
@@ -103,38 +278,130 @@ def logp(self, x):
 
 class Discrete(Distribution):
     """Base class for discrete distributions"""
-
-    def __init__(self, shape=(), dtype='int64', defaults=['mode'], *args, **kwargs):
+    def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype='int64',
+                 defaults=['mode'], *args, **kwargs):
         if dtype != 'int64':
             raise TypeError('Discrete classes expect dtype to be int64.')
-        super(Discrete, self).__init__(
-            shape, dtype, defaults=defaults, *args, **kwargs)
+        super(Discrete, self).__init__(shape_supp, shape_ind, shape_reps,
+                                       bcast, dtype, defaults=defaults, *args,
+                                       **kwargs)
 
 
 class Continuous(Distribution):
     """Base class for continuous distributions"""
 
-    def __init__(self, shape=(), dtype='float64', defaults=['median', 'mean', 'mode'], *args, **kwargs):
-        super(Continuous, self).__init__(
-            shape, dtype, defaults=defaults, *args, **kwargs)
+    def __init__(self, shape_supp, shape_ind, shape_reps, bcast,
+                 dtype='float64', defaults=['median', 'mean', 'mode'], *args,
+                 **kwargs):
+        super(Continuous, self).__init__(shape_supp, shape_ind, shape_reps,
+                                         bcast, dtype, defaults=defaults,
+                                         *args, **kwargs)
 
 
 class DensityDist(Distribution):
     """Distribution based on a given log density function."""
 
-    def __init__(self, logp, shape=(), dtype='float64', testval=0, *args, **kwargs):
-        super(DensityDist, self).__init__(
-            shape, dtype, testval, *args, **kwargs)
+    def __init__(self, logp, shape_supp=None, shape_ind=None, shape_reps=None,
+                 bcast=None, dtype='float64', *args, **kwargs):
         self.logp = logp
 
+        # TODO: Could add some generic handling like this in
+        # `Distribution.__init__`, just to handle deprecated use of `shape`.
+        if (shape_supp is None) or (shape_ind is None) or (shape_reps is None):
+
+            # If we only got the old `shape` parameter, assume it's only
+            # specifying replications.
+            old_shape = kwargs.get('shape', None)
+            if old_shape is not None:
+                warnings.warn(('The `shape` parameter is deprecated; use `size` to'
+                               ' specify the shape and number of replications.'),
+                              DeprecationWarning)
+                shape_supp = tuple()
+                shape_ind = tuple()
+                shape_reps = old_shape
+
+                bcast += tuple(True if s_ == 1 else False
+                               for s_ in old_shape)
+            else:
+                raise ValueError("shapes and bcast must be specified.")
+
+        super(DensityDist, self).__init__(shape_supp, shape_ind, shape_reps,
+                                          bcast, dtype, *args,
+                                          **kwargs)
+
+
+class Univariate(Distribution):
+
+    def __init__(self, dist_params, ndim=None, size=None,
+                 shape=None, dtype=None,
+                 *args, **kwargs):
+        r"""This constructor automates some of the shape determination, since
+        univariate distributions are simple in that regard.
+
+        Parameters
+        ----------
+        dist_params: tuple
+            A tuple containing the distributions parameters.  These parameters
+            are checked for shape compatibility(/"broadcastibility"), so make
+            sure their dimensions line up.  For example, if a the distribution
+            for a scalar random variable has parameters `(a, b)`, where `a` is
+            scalar and `b` is a vector, if we get an array of `a` values, then
+            `b` should be given an extra dimension to broadcast along those
+            independent variates implied by `a`.  This function will try
+            to figure that stuff out, but no promises.
+        ndim: int
+            A hint for the number of dimensions.
+            (Not currently used, but could be useful in cases for which
+            the shape dimensions aren't easily assessed.)
+        size: tuple (int)
+            Shape of replications.
+        shape: tuple (int)
+            Deprecated; use ``size``.
+        dtype: string
+            Name of primitive numeric type.
+        """
+
+        if shape is not None:
+            warnings.warn(('The `shape` parameter is deprecated; use `size` to'
+                           ' specify the shape and number of replications.'),
+                          DeprecationWarning)
+            if size is None:
+                size = shape
+
+        self.dist_params = tuple(tt.as_tensor_variable(x) for x in dist_params)
+
+        # Parameters need to match in shape; we use the following to
+        # determine what the [independent terms'] ultimate shape is.
+        ndim, ind_size, bcast = _infer_ndim_bcast(*((ndim, None) +
+                                                  self.dist_params))
+
+        # We have to be careful with `as_tensor_variable`; it will produce
+        # empty collections with dtype=floatX, which violates our expectations
+        # for a shape object.
+        if size is None or np.alen(size) == 0:
+            size = np.array((), dtype=np.int)
+        elif np.shape(size) == ():
+            size = np.asarray((size,), dtype=np.int)
+        else:
+            size = np.asarray(size, dtype=np.int)
 
-class MultivariateContinuous(Continuous):
+        # Add broadcast info from replication dimensions.
+        bcast += tuple(True if s_ == 1 else False
+                       for s_ in size)
 
-    pass
+        size = tt.as_tensor_variable(size, ndim=1)
 
+        if dtype is None:
+            dtype = tt.scal.upcast(*((tt.config.floatX,) +
+                                     tuple(x.dtype for x in self.dist_params)))
 
-class MultivariateDiscrete(Discrete):
+        super(Univariate, self).__init__(
+            tuple(), ind_size, size, bcast, *args, **kwargs)
 
+
+class Multivariate(Distribution):
+    r"""TODO: Automate some of the multivariate shape determination?
+    """
     pass
 
 
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 456f40be591..96961d4b140 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -12,9 +12,14 @@
 
 import pymc3 as pm
 from . import transforms
-from .distribution import Continuous, Discrete, draw_values, generate_samples
 from ..model import Deterministic
 from .continuous import ChiSquared, Normal
+from .distribution import (
+    Multivariate,
+    Continuous,
+    Discrete,
+    draw_values,
+    generate_samples)
 from .special import gammaln, multigammaln
 from .dist_math import bound, logpow, factln
 
@@ -58,8 +63,9 @@ def get_tau_cov(mu, tau=None, cov=None):
 
     return (tau, cov)
 
-class MvNormal(Continuous):
-    R"""
+
+class MvNormal(Multivariate, Continuous):
+    r"""
     Multivariate normal log-likelihood.
 
     .. math::
@@ -82,21 +88,45 @@ class MvNormal(Continuous):
         Covariance matrix.
     tau : array, optional
         Precision matrix.
-    """
+    ndim : int
+        TODO
+    size : tuple of ints
+        TODO
 
-    def __init__(self, mu, cov=None, tau=None, *args, **kwargs):
-        super(MvNormal, self).__init__(*args, **kwargs)
+    """
+    def __init__(self, mu, tau, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
         warnings.warn(('The calling signature of MvNormal() has changed. The new signature is:\n'
                        'MvNormal(name, mu, cov) instead of MvNormal(name, mu, tau).'
                        'You do not need to explicitly invert the covariance matrix anymore.'),
                       DeprecationWarning)
-        self.mean = self.median = self.mode = self.mu = mu
-        self.tau, self.cov = get_tau_cov(mu, tau=tau, cov=cov)
+        # TODO: Need to apply replications/size.
+        self.mu = tt.as_tensor_variable(mu)
+        self.tau = tt.as_tensor_variable(tau)
+
+        self.dist_params = (self.mu, self.tau)
+
+        self.median = self.mode = self.mean = self.mu
+        # TODO: How do we want to use ndim?
+        shape_supp = (self.mu.shape[-1],)
+        shape_ind = tuple(self.mu.shape[:-1])
+
+        if self.mu.ndim > 0:
+            bcast = (False,) * (1 + tt.get_vector_length(shape_ind))
+        else:
+            bcast = (False,)
+
+        if dtype is None:
+            dtype = self.mu.dtype
+
+        super(MvNormal, self).__init__(shape_supp, shape_ind, (), bcast, dtype,
+                                       *args, **kwargs)
 
     def random(self, point=None, size=None):
         mu, cov = draw_values([self.mu, self.cov], point=point)
 
         def _random(mean, cov, size=None):
+            # FIXME: cov here is actually precision?
             return stats.multivariate_normal.rvs(
                 mean, cov, None if size == mean.shape else size)
 
@@ -119,9 +149,9 @@ def logp(self, value):
         return -1 / 2. * result
 
 
-class MvStudentT(Continuous):
-    R"""
-    Multivariate Student-T log-likelihood.
+class MvStudentT(Multivariate, Continuous):
+    """
+    Multivariate Student T log-likelihood.
 
     .. math::
         f(\mathbf{x}| \nu,\mu,\Sigma) =
@@ -145,15 +175,28 @@ class MvStudentT(Continuous):
     mu : array
         Vector of means.
     """
+    def __init__(self, nu, Sigma, mu=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+        # TODO: Need to apply replications/size.
+        self.nu = tt.as_tensor_variable(nu)
+        self.Sigma = tt.as_tensor_variable(Sigma)
+
+        # TODO: How do we want to use ndim?
+        shape_supp = self.Sigma.shape[0]
+        shape_ind = self.Sigma.shape[:-2]
 
-    def __init__(self, nu, Sigma, mu=None, *args, **kwargs):
-        super(MvStudentT, self).__init__(*args, **kwargs)
-        self.nu = nu
-        self.mu = np.zeros(Sigma.shape[0]) if mu is None else mu
-        self.Sigma = Sigma
+        self.mu = tt.as_tensor_variable(tt.zeros(shape_supp) if mu is None else mu)
 
+        self.dist_params = (self.nu, self.mu, self.Sigma)
         self.mean = self.median = self.mode = self.mu = mu
 
+        # FIXME: this isn't correct/ideal
+        bcast = (False,) * (shape_supp.ndim + shape_ind.ndim)
+        if dtype is None:
+            dtype = self.mu.dtype
+
+        super(MvStudentT, self).__init__(shape_supp, shape_ind, (), ndim,
+                                         bcast, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None):
         chi2 = np.random.chisquare
         mvn = np.random.multivariate_normal
@@ -170,6 +213,7 @@ def logp(self, value):
         mu = self.mu
 
         d = S.shape[0]
+        n = value.shape[0]
 
         X = value - mu
 
@@ -182,7 +226,7 @@ def logp(self, value):
         return log_pdf
 
 
-class Dirichlet(Continuous):
+class Dirichlet(Multivariate, Continuous):
     R"""
     Dirichlet log-likelihood.
 
@@ -212,19 +256,36 @@ class Dirichlet(Continuous):
     as a parent of Multinomial and Categorical nevertheless.
     """
 
-    def __init__(self, a, transform=transforms.stick_breaking,
+    def __init__(self, a, dtype=tt.config.floatX,
+                 transform=transforms.stick_breaking,
                  *args, **kwargs):
-        shape = a.shape[-1]
-        kwargs.setdefault("shape", shape)
-        super(Dirichlet, self).__init__(transform=transform, *args, **kwargs)
+        self.a = tt.as_tensor_variable(a)
+        self.mean = self.a / tt.sum(self.a)
+        self.mode = tt.switch(tt.all(self.a > 1),
+                              (self.a - 1) / tt.sum(self.a - 1),
+                              np.nan)
 
-        self.k = shape
-        self.a = a
-        self.mean = a / tt.sum(a)
+        self.dist_params = (self.a,)
 
-        self.mode = tt.switch(tt.all(a > 1),
-                              (a - 1) / tt.sum(a - 1),
-                              np.nan)
+        shape_supp = (self.a.shape[-1],)
+        shape_ind = tuple(self.a.shape[:-1])
+
+        shape_reps = kwargs.pop('shape', None)
+        if shape_reps is not None:
+            warnings.warn(('The `shape` parameter is deprecated; use `size` to'
+                           ' specify the shape and number of replications.'),
+                          DeprecationWarning)
+            shape_reps = kwargs.pop('size', ())
+
+        # FIXME: this isn't correct/ideal
+        if self.a.ndim > 0:
+            bcast = (False,) * (1 + tt.get_vector_length(shape_ind))
+        else:
+            bcast = (False,)
+
+        super(Dirichlet, self).__init__(shape_supp, shape_ind, shape_reps,
+                                        bcast, dtype, transform=transform,
+                                        *args, **kwargs)
 
     def random(self, point=None, size=None):
         a = draw_values([self.a], point=point)
@@ -233,12 +294,12 @@ def _random(a, size=None):
             return stats.dirichlet.rvs(a, None if size == a.shape else size)
 
         samples = generate_samples(_random, a,
-                                   dist_shape=self.shape,
+                                   dist_shape=self.shape_supp,
                                    size=size)
         return samples
 
     def logp(self, value):
-        k = self.k
+        k = self.shape_supp
         a = self.a
 
         # only defined for sum(value) == 1
@@ -249,7 +310,7 @@ def logp(self, value):
                      broadcast_conditions=False)
 
 
-class Multinomial(Discrete):
+class Multinomial(Multivariate, Discrete):
     R"""
     Multinomial log-likelihood.
 
@@ -281,26 +342,42 @@ class Multinomial(Discrete):
         rescaled otherwise.
     """
 
-    def __init__(self, n, p, *args, **kwargs):
-        super(Multinomial, self).__init__(*args, **kwargs)
+    def __init__(self, n, p, ndim=None, size=None, dtype='int32',
+                 *args, **kwargs):
+        self.n = tt.as_tensor_variable(n, ndim=0)
+        self.p = tt.as_tensor_variable(p)
+        self.p = self.p / tt.sum(self.p, axis=-1, keepdims=True)
+
+        self.mean = self.n * self.p
+        self.mode = tt.cast(tt.round(self.mean), dtype)
 
-        p = p / tt.sum(p, axis=-1, keepdims=True)
+        self.dist_params = (self.n, self.p)
 
-        if len(self.shape) == 2:
-            try:
-                assert n.shape == (self.shape[0],)
-            except AttributeError:
-                # this occurs when n is a scalar Python int or float
-                n *= tt.ones(self.shape[0])
+        # XXX, FIXME: Is this really multivariate?
+        shape_supp = (1,)
+        shape_ind = ()
+        # shape_supp = (self.p.shape[-1],)
+        # shape_ind = tuple(self.p.shape[:-1])
 
-            self.n = tt.shape_padright(n)
-            self.p = p if p.ndim == 2 else tt.shape_padleft(p)
-        else:
-            self.n = n
-            self.p = p
+        shape_reps = kwargs.pop('shape', None)
+        if shape_reps is not None:
+            warnings.warn(('The `shape` parameter is deprecated; use `size` to'
+                           ' specify the shape and number of replications.'),
+                          DeprecationWarning)
+            shape_reps = kwargs.pop('size', ())
 
-        self.mean = self.n * self.p
-        self.mode = tt.cast(tt.round(self.mean), 'int32')
+        # XXX, FIXME: Is this really multivariate?
+        bcast = (False,)
+        # if self.p.ndim > 0:
+        #     bcast = (False,) * (1 + tt.get_vector_length(shape_ind))
+        # else:
+        #     bcast = (False,)
+
+        if dtype is None:
+            dtype = self.p.dtype
+
+        super(Multinomial, self).__init__(shape_supp, shape_ind, shape_reps,
+                                          bcast, dtype, *args, **kwargs)
 
     def _random(self, n, p, size=None):
         if size == p.shape:
@@ -378,7 +455,7 @@ def __str__(self):
 matrix_pos_def = PosDefMatrix()
 
 
-class Wishart(Continuous):
+class Wishart(Multivariate, Continuous):
     R"""
     Wishart log-likelihood.
 
@@ -415,8 +492,8 @@ class Wishart(Continuous):
     use WishartBartlett or LKJCorr.
     """
 
-    def __init__(self, n, V, *args, **kwargs):
-        super(Wishart, self).__init__(*args, **kwargs)
+    def __init__(self, n, V, ndim=None, size=None, dtype=None,
+                 *args, **kwargs):
         warnings.warn('The Wishart distribution can currently not be used '
                       'for MCMC sampling. The probability of sampling a '
                       'symmetric matrix is basically zero. Instead, please '
@@ -424,17 +501,35 @@ def __init__(self, n, V, *args, **kwargs):
                       'For more information on the issues surrounding the '
                       'Wishart see here: https://github.com/pymc-devs/pymc3/issues/538.',
                       UserWarning)
-        self.n = n
-        self.p = p = V.shape[0]
-        self.V = V
-        self.mean = n * V
-        self.mode = tt.switch(1 * (n >= p + 1),
-                              (n - p - 1) * V,
+        # TODO: Need to apply replications/size.
+        self.n = tt.as_tensor_variable(n, ndim=0)
+        self.V = tt.as_tensor_variable(V)
+
+        self.dist_params = (self.n, self.V)
+
+        # TODO: How do we want to use ndim?
+        shape_supp = (self.V.shape[-1],)
+        shape_ind = ()
+
+        self.mode = tt.switch(1 * (self.n >= shape_supp + 1),
+                              (self.n - shape_supp - 1) * self.V, np.nan)
+        self.mean = self.n * self.V
+
+        # FIXME: this isn't correct/ideal
+        bcast = (False,)
+        if dtype is None:
+            dtype = self.V.dtype
+
+        self.mode = tt.switch(1 * (self.n >= self.shape_supp + 1),
+                              (self.n - self.shape_supp - 1) * self.V,
                               np.nan)
 
+        super(Wishart, self).__init__(shape_supp, shape_ind, (), bcast, dtype,
+                                      *args, **kwargs)
+
     def logp(self, X):
         n = self.n
-        p = self.p
+        p = self.shape_supp
         V = self.V
 
         IVI = det(V)
@@ -447,8 +542,7 @@ def logp(self, X):
                      matrix_pos_def(X),
                      tt.eq(X, X.T),
                      n > (p - 1),
-                     broadcast_conditions=False
-        )
+                     broadcast_conditions=False)
 
 
 def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testval=None):
@@ -529,7 +623,7 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
         return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
 
 
-class LKJCorr(Continuous):
+class LKJCorr(Multivariate, Continuous):
     R"""
     The LKJ (Lewandowski, Kurowicka and Joe) log-likelihood.
 
@@ -569,16 +663,34 @@ class LKJCorr(Continuous):
         100(9), pp.1989-2001.
     """
 
-    def __init__(self, n, p, *args, **kwargs):
-        self.n = n
-        self.p = p
-        n_elem = int(p * (p - 1) / 2)
-        self.mean = np.zeros(n_elem)
-        super(LKJCorr, self).__init__(shape=n_elem, *args, **kwargs)
+    def __init__(self, n, p, ndim=None, size=None, dtype=tt.config.floatX,
+                 *args, **kwargs):
+        # TODO: Need to apply replications/size.
+        self.n = tt.as_tensor_variable(n, ndim=0)
+        self.p = tt.as_tensor_variable(p, ndim=0)
+
+        self.dist_params = (self.n, self.p)
 
-        self.tri_index = np.zeros([p, p], dtype=int)
-        self.tri_index[np.triu_indices(p, k=1)] = np.arange(n_elem)
-        self.tri_index[np.triu_indices(p, k=1)[::-1]] = np.arange(n_elem)
+        # TODO: How do we want to use ndim?
+        n_elem = (self.p * (self.p - 1) / 2)
+        shape_supp = (n_elem,)
+        self.mean = tt.zeros(n_elem)
+
+        # FIXME: triu, bcast, etc.
+        self.tri_index = tt.zeros((self.p, self.p), dtype=int)
+        self.tri_index[tt.triu(self.p, k=1)] = tt.arange(n_elem)
+        self.tri_index[tt.triu(self.p, k=1)[::-1]] = tt.arange(n_elem)
+
+        shape_ind = ()
+
+        # FIXME: this isn't correct/ideal
+        bcast = (False,)
+        if dtype is None:
+            dtype = self.n.dtype
+
+        super(LKJCorr, self).__init__(shape_supp, shape_ind, (),
+                                      bcast, dtype,
+                                      *args, **kwargs)
 
     def _normalizing_constant(self, n, p):
         if n == 1:
@@ -611,5 +723,4 @@ def logp(self, x):
                      tt.all(X <= 1), tt.all(X >= -1),
                      matrix_pos_def(X),
                      n > 0,
-                     broadcast_conditions=False
-        )
+                     broadcast_conditions=False)
diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py
index 8dd2e282fbc..94fc95bb34d 100644
--- a/pymc3/distributions/timeseries.py
+++ b/pymc3/distributions/timeseries.py
@@ -20,7 +20,7 @@ class AR1(Continuous):
     """
 
     def __init__(self, k, tau_e, *args, **kwargs):
-        super(AR1, self).__init__(*args, **kwargs)
+        super(AR1, self).__init__(ndim=1, size=(), *args, **kwargs)
         self.k = k
         self.tau_e = tau_e
         self.tau = tau_e * (1 - k ** 2)
@@ -56,7 +56,7 @@ class GaussianRandomWalk(Continuous):
 
     def __init__(self, tau=None, init=Flat.dist(), sd=None, mu=0.,
                  *args, **kwargs):
-        super(GaussianRandomWalk, self).__init__(*args, **kwargs)
+        super(GaussianRandomWalk, self).__init__(ndim=1, size=(), *args, **kwargs)
         self.tau = tau
         self.sd = sd
         self.mu = mu
@@ -100,7 +100,7 @@ class GARCH11(Continuous):
 
     def __init__(self, omega=None, alpha_1=None, beta_1=None,
                  initial_vol=None, *args, **kwargs):
-        super(GARCH11, self).__init__(*args, **kwargs)
+        super(GARCH11, self).__init__(ndim=1, size=(), *args, **kwargs)
 
         self.omega = omega
         self.alpha_1 = alpha_1
diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py
index 4234d4530b1..c02779e07c2 100644
--- a/pymc3/distributions/transforms.py
+++ b/pymc3/distributions/transforms.py
@@ -46,13 +46,14 @@ class TransformedDistribution(Distribution):
     """A distribution that has been transformed from one space into another."""
 
     def __init__(self, dist, transform, *args, **kwargs):
-        """
+        r"""
         Parameters
         ----------
         dist : Distribution
+            TODO
         transform : Transform
-        args, kwargs
-            arguments to Distribution"""
+            TODO
+        """
         forward = transform.forward
         testval = forward(dist.default())
 
@@ -61,9 +62,18 @@ def __init__(self, dist, transform, *args, **kwargs):
         v = forward(FreeRV(name='v', distribution=dist))
         self.type = v.type
 
+        # We can get the transformed support shape from a single dummy var in
+        # only the support (i.e. without the independent or replication dimensions).
+        shape_supp = forward(tt.alloc(1, *dist.shape_supp)).shape
+
+        # XXX: We assume these two shapes don't change under a transform.
+        shape_ind = dist.shape_ind
+        shape_reps = dist.shape_reps
+
         super(TransformedDistribution, self).__init__(
-            v.shape.tag.test_value, v.dtype,
-            testval, dist.defaults, *args, **kwargs)
+              shape_supp, shape_ind, shape_reps,
+              v.broadcastable, v.dtype,
+              testval.tag.test_value, dist.defaults, *args, **kwargs)
 
         if transform.name == 'stickbreaking':
             b = np.hstack(((np.atleast_1d(self.shape) == 1)[:-1], False))
@@ -193,7 +203,7 @@ class StickBreaking(Transform):
     Parameters
     ----------
     eps : float, positive value
-        A small value for numerical stability in invlogit. 
+        A small value for numerical stability in invlogit.
     """
 
     name = "stickbreaking"
@@ -250,7 +260,7 @@ def backward(self, y):
 
     def forward(self, x):
         return x
-        
+
     def jacobian_det(self, x):
         return 0
 
diff --git a/pymc3/model.py b/pymc3/model.py
index d28f2340e05..12d657cef25 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -183,7 +183,6 @@ def __init__(self):
         self.potentials = []
         self.missing_values = []
         self.model = self
-
     @property
     @memoize
     def bijection(self):
@@ -243,6 +242,11 @@ def unobserved_RVs(self):
         """List of all random variable, including deterministic ones."""
         return self.vars + self.deterministics
 
+    @property
+    def untransformed_RVs(self):
+        """All variables except those transformed for MCMC"""
+        return [v for v in self.vars if v not in self.hidden_RVs] + self.deterministics
+
     @property
     def test_point(self):
         """Test point used to check that the model doesn't generate errors"""
@@ -494,17 +498,34 @@ def __init__(self, type=None, owner=None, index=None, name=None,
         owner : theano owner (optional)
         name : str
         distribution : Distribution
-        model : Model"""
+        model : Model
+        """
         if type is None:
             type = distribution.type
         super(FreeRV, self).__init__(type, owner, index, name)
 
         if distribution is not None:
-            self.dshape = tuple(distribution.shape)
-            self.dsize = int(np.prod(distribution.shape))
+            self.dshape = distribution.shape
+
+            const_dsize = 1
+            const_shape = tuple()
+            for s in distribution.shape:
+                try:
+                    shape_val = tt.get_scalar_constant_value(s)
+                    const_dsize *= shape_val
+                    const_shape += shape_val
+                except tt.NotScalarConstantError:
+                    const_dsize = None
+                    const_shape = None
+                    break
+
+            if const_dsize is None:
+                self.dsize = tt.prod(distribution.shape)
+            else:
+                self.dsize = const_dsize
+
             self.distribution = distribution
-            self.tag.test_value = np.ones(
-                distribution.shape, distribution.dtype) * distribution.default()
+            self.tag.test_value = distribution.testval
             self.logp_elemwiset = distribution.logp(self)
             self.model = model
 
@@ -536,17 +557,44 @@ def as_tensor(data, name, model, distribution):
     dtype = distribution.dtype
     data = pandas_to_array(data).astype(dtype)
 
+    missing_in_dims = 0
     if hasattr(data, 'mask'):
+        missing_in_dims = np.ma.count_masked(data, axis=0)
+
+    if not np.all(missing_in_dims == 0):
         from .distributions import NoDistribution
-        testval = distribution.testval or data.mean().astype(dtype)
-        fakedist = NoDistribution.dist(shape=data.mask.sum(), dtype=dtype,
-                                       testval=testval, parent_dist=distribution)
+
+        # XXX: There was an `or` between these two values; was this a
+        # condition on None values (as interpreted below), or some kind of
+        # bitwise operation on the values?
+        testval = distribution.testval
+        if testval is None:
+            testval = data.mean()
+        testval = testval.astype(dtype)
+
+        # FIXME: What are the correct values for these?  Does it
+        # really matter?
+        shape_supp = tuple()
+        shape_ind = tuple()
+        missing_count = np.sum(missing_in_dims)
+        shape_reps = tt.as_tensor_variable(np.atleast_1d(missing_count))
+
+        bcast = (False,)
+
+        # XXX: If these missing value distributions are being used
+        # directly, then the result will be incorrect when
+        # the parent distribution is multivariate and the missing values were
+        # found about the support and/or independent dimensions.
+        fakedist = NoDistribution.dist(shape_supp, shape_ind, shape_reps,
+                                       bcast, dtype,
+                                       testval=testval[data.mask.nonzero()],
+                                       parent_dist=distribution)
         missing_values = FreeRV(name=name + '_missing', distribution=fakedist,
                                 model=model)
         constant = tt.as_tensor_variable(data.filled())
 
-        dataTensor = tt.set_subtensor(
-            constant[data.mask.nonzero()], missing_values)
+        dataTensor = tt.set_subtensor(constant[data.mask.nonzero()],
+                                      missing_values)
         dataTensor.missing_values = missing_values
         return dataTensor
     else:
@@ -579,7 +627,6 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None,
         super(TensorVariable, self).__init__(type, None, None, name)
 
         if distribution is not None:
-
             data = as_tensor(data, name, model, distribution)
             self.missing_values = data.missing_values
 
diff --git a/pymc3/tests/test_distribution_defaults.py b/pymc3/tests/test_distribution_defaults.py
index e70b480422c..549c402a643 100644
--- a/pymc3/tests/test_distribution_defaults.py
+++ b/pymc3/tests/test_distribution_defaults.py
@@ -1,19 +1,21 @@
 from __future__ import division
 
 from ..model import Model
-from ..distributions import DiscreteUniform, Continuous
+from ..distributions import DiscreteUniform, Univariate, Continuous
 
 import numpy as np
 from nose.tools import raises
 
 
-class DistTest(Continuous):
+class DistTest(Univariate, Continuous):
 
     def __init__(self, a, b, *args, **kwargs):
-        super(DistTest, self).__init__(*args, **kwargs)
         self.a = a
         self.b = b
 
+        dist_params = (self.a, self.b)
+        super(DistTest, self).__init__(dist_params, *args, **kwargs)
+
     def logp(self, v):
         return 0
 
diff --git a/pymc3/tests/test_distribution_shapes.py b/pymc3/tests/test_distribution_shapes.py
new file mode 100644
index 00000000000..d7637952be2
--- /dev/null
+++ b/pymc3/tests/test_distribution_shapes.py
@@ -0,0 +1,146 @@
+from __future__ import division
+
+from ..model import Model
+from ..distributions import Normal
+
+import numpy as np
+from nose.tools import raises
+
+
+def test_univariate_shape_info():
+    with Model() as model:
+        # Test scalar case with replications.
+        x = Normal('x', 0, 1, size=(3, 2))
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, (3, 2))
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2))
+        np.testing.assert_array_equal(np.shape(x.random()), (3, 2))
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), ())
+        assert x.distribution.ndim_ind == 0
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2))
+        assert x.distribution.ndim_reps == 2
+
+    with Model() as model:
+        # Test scalar case with empty size input.
+        x = Normal('x', 0, 1, size=())
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, ())
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), ())
+        # FIXME: `Distribution.random` adds an unnecessary dimension.
+        # np.testing.assert_array_equal(np.shape(x.random()), ())
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.bcast, ())
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), ())
+        assert x.distribution.ndim_ind == 0
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), ())
+        assert x.distribution.ndim_reps == 0
+
+    with Model() as model:
+        # Test independent terms alone.
+        x = Normal('x', mu=np.arange(0, 2), tau=np.arange(1, 3))
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, (2,))
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), (2,))
+        np.testing.assert_array_equal(np.shape(x.random()), (2,))
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.bcast, (False,))
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,))
+        assert x.distribution.ndim_ind == 1
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), ())
+        assert x.distribution.ndim_reps == 0
+
+    with Model() as model:
+        # Test independent terms and replication.
+        x = Normal('x', mu=np.arange(0, 2), tau=np.arange(1, 3),
+                   size=(3, 2))
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.random()), (3, 2, 2))
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.bcast,
+                                      (False, False, False))
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,))
+        assert x.distribution.ndim_ind == 1
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2))
+        assert x.distribution.ndim_reps == 2
+
+    with Model() as model:
+        # Test broadcasting among the independent terms.
+        x = Normal('x', mu=np.arange(0, 2), tau=1, size=(3, 2))
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.random()), (3, 2, 2))
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.bcast,
+                                      (False, False, False))
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,))
+        assert x.distribution.ndim_ind == 1
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2))
+        assert x.distribution.ndim_reps == 2
+
+    with Model() as model:
+        # Test broadcasting among the independent terms, where independent
+        # terms are determined by a non-default test value parameter.
+        x = Normal('x', mu=0, tau=np.r_[1, 2], size=(3, 2))
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.random()), (3, 2, 2))
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.bcast,
+                                      (False, False, False))
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,))
+        assert x.distribution.ndim_ind == 1
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2))
+        assert x.distribution.ndim_reps == 2
+
+
+# TODO
+# def test_multivariate_shape_info():
+#     with Model() as model:
+#         x = MvNormal('x', np.ones((3,)), np.zeros((3, 3)))
+#
+#     with Model() as model:
+#         x = MvNormal('x', np.ones((3,)), np.zeros((3,)), size=(3, 2))
+#
+#     with Model() as model:
+#         x = MvNormal('x', np.ones((3, 2)), np.zeros((3, 3)))
+#
+#     with Model() as model:
+#         x = Dirichlet('x', np.r_[1, 1, 1], size=(3, 2))
+#
+#     with Model() as model:
+#         x = Multinomial('x', 2, np.r_[0.5, 0.5], size=(3, 2))
+#
+#     with Model() as model:
+#         x = Multinomial('x', np.r_[2, 2], np.r_[0.5, 0.5])
+#
+#     with Model() as model:
+#         x = Multinomial('x', 2, np.r_[[0.5, 0.5], [0.1, 0.9]])
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 7f18ea0958d..d81cdefd68c 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -118,19 +118,19 @@ def build_model(distfam, valuedomain, vardomains, extra_args={}):
 
 
 def integrate_nd(f, domain, shape, dtype):
-    if shape == () or shape == (1,):
+    if np.array_equal(shape, ()) or np.array_equal(shape, (1,)):
         if dtype in continuous_types:
             return integrate.quad(f, domain.lower, domain.upper, epsabs=1e-8)[0]
         else:
             return sum(f(j) for j in range(domain.lower, domain.upper + 1))
-    elif shape == (2,):
+    elif np.array_equal(shape, (2,)):
         def f2(a, b):
             return f([a, b])
 
         return integrate.dblquad(f2, domain.lower[0], domain.upper[0],
                                  lambda _: domain.lower[1],
                                  lambda _: domain.upper[1])[0]
-    elif shape == (3,):
+    elif np.array_equal(shape, (3,)):
         def f3(a, b, c):
             return f([a, b, c])
 
diff --git a/pymc3/variational/advi.py b/pymc3/variational/advi.py
index 67889529b0c..879df2d72ee 100644
--- a/pymc3/variational/advi.py
+++ b/pymc3/variational/advi.py
@@ -208,11 +208,11 @@ def logp_(input):
         r = MRG_RandomStreams(seed=random_seed)
 
     if n_mcsamples == 1:
-        n = r.normal(size=inarray.tag.test_value.shape)
+        n = r.normal(size=np.shape(inarray.tag.test_value))
         q = n * tt.exp(w) + u
         elbo = logp_(q) + tt.sum(w) + 0.5 * l * (1 + tt.log(2.0 * np.pi))
     else:
-        n = r.normal(size=(n_mcsamples, u.tag.test_value.shape[0]))
+        n = r.normal(size=(n_mcsamples, np.shape(u.tag.test_value)[0]))
         qs = n * tt.exp(w) + u
         logps, _ = theano.scan(fn=lambda q: logp_(q),
                                outputs_info=None,
@@ -255,7 +255,7 @@ def optimizer(loss, param):
             i_int = i.astype('int64')
             value = param_.get_value(borrow=True)
             accu = theano.shared(
-                np.zeros(value.shape + (n_win,), dtype=value.dtype))
+                np.zeros(np.shape(value) + (n_win,), dtype=value.dtype))
             grad = tt.grad(loss, param_)
 
             # Append squared gradient vector to accu_new
@@ -324,17 +324,17 @@ def rvs(x):
     for v in global_RVs:
         u = theano.shared(vparams['means'][str(v)]).ravel()
         w = theano.shared(vparams['stds'][str(v)]).ravel()
-        n = r.normal(size=u.tag.test_value.shape)
-        updates.update({v: (n * w + u).reshape(v.tag.test_value.shape)})
+        n = r.normal(size=np.shape(u.tag.test_value))
+        updates.update({v: (n * w + u).reshape(np.shape(v.tag.test_value))})
 
     if local_RVs is not None:
         for v_, (uw, _) in local_RVs.items():
             v = get_transformed(v_)
             u = uw[0].ravel()
             w = uw[1].ravel()
-            n = r.normal(size=u.tag.test_value.shape)
+            n = r.normal(size=np.shape(u.tag.test_value))
             updates.update(
-                {v: (n * tt.exp(w) + u).reshape(v.tag.test_value.shape)})
+                {v: (n * tt.exp(w) + u).reshape(np.shape(v.tag.test_value))})
 
     # Replace some nodes of the graph with variational distributions
     vars = model.free_RVs