Skip to content

Commit

Permalink
Sample conditional returns mean and var as well as samples, and gener…
Browse files Browse the repository at this point in the history
…ates more than one sample (GPflow#836)
  • Loading branch information
st-- authored and awav committed Sep 12, 2018
1 parent 2475891 commit 460afc7
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 40 deletions.
88 changes: 67 additions & 21 deletions gpflow/conditionals.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,29 +131,58 @@ def _conditional(Xnew, X, kern, f, *, full_cov=False, q_sqrt=None, white=False):

@sample_conditional.register(object, InducingFeature, Kernel, object)
@name_scope("sample_conditional")
def _sample_conditional(Xnew, feat, kern, f, *, full_output_cov=False, q_sqrt=None, white=False):
def _sample_conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False, num_samples=None):
"""
`sample_conditional` will return a sample from the conditional distribution.
In most cases this means calculating the conditional mean m and variance v and then
returning m + sqrt(v) * eps, with eps ~ N(0, 1).
However, for some combinations of Mok and Mof more efficient sampling routines exists.
The dispatcher will make sure that we use the most efficient one.
:return: N x P (full_output_cov = False) or N x P x P (full_output_cov = True)
:return: samples, mean, cov
samples has shape [num_samples, N, P] or [N, P] if num_samples is None
mean and cov as for conditional()
"""
if full_cov and full_output_cov:
raise NotImplementedError("The combination of both full_cov and full_output_cov is not "
"implemented for sample_conditional.")

logger.debug("sample conditional: InducingFeature Kernel")
mean, var = conditional(Xnew, feat, kern, f, full_cov=False, full_output_cov=full_output_cov,
q_sqrt=q_sqrt, white=white) # N x P, N x P (x P)
cov_structure = "full" if full_output_cov else "diag"
return _sample_mvn(mean, var, cov_structure)
mean, cov = conditional(Xnew, feat, kern, f, q_sqrt=q_sqrt, white=white,
full_cov=full_cov, full_output_cov=full_output_cov)
if full_cov:
# mean: N x P
# cov: P x N x N
mean = tf.matrix_transpose(mean) # now P x N
samples = _sample_mvn(mean, cov, 'full', num_samples=num_samples) # (S x) P x N
samples = tf.matrix_transpose(samples) # now (S x) N x P

else:
cov_structure = "full" if full_output_cov else "diag"
samples = _sample_mvn(mean, cov, cov_structure, num_samples=num_samples) # [(S,), N, P]

return samples, mean, cov


@sample_conditional.register(object, object, Kernel, object)
@name_scope("sample_conditional")
def _sample_conditional(Xnew, X, kern, f, *, q_sqrt=None, white=False):
def _sample_conditional(Xnew, X, kern, f, *, q_sqrt=None, white=False, full_cov=False, full_output_cov=False, num_samples=None):
if full_cov and full_output_cov:
raise NotImplementedError("The combination of both full_cov and full_output_cov is not "
"implemented for sample_conditional.")

logger.debug("sample conditional: Kernel")
mean, var = conditional(Xnew, X, kern, f, q_sqrt=q_sqrt, white=white, full_cov=False) # N x P, N x P
return _sample_mvn(mean, var, "diag") # N x P
if full_output_cov:
raise NotImplementedError("full_output_cov is not implemented")

mean, cov = conditional(Xnew, X, kern, f, q_sqrt=q_sqrt, white=white, full_cov=full_cov)
if full_cov:
mean = tf.matrix_transpose(mean)
cov_structure = "full" if full_cov else "diag"
samples = _sample_mvn(mean, cov, cov_structure, num_samples=num_samples)
if full_cov:
samples = tf.matrix_transpose(samples)
return samples, mean, cov


# ----------------------------------------------------------------------------
Expand Down Expand Up @@ -207,7 +236,7 @@ def base_conditional(Kmn, Kmm, Knn, f, *, full_cov=False, q_sqrt=None, white=Fal
if q_sqrt.get_shape().ndims == 2:
LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2) # R x M x N
elif q_sqrt.get_shape().ndims == 3:
L = tf.matrix_band_part(q_sqrt, -1, 0) # R x M x M
L = q_sqrt
A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1]))
LTA = tf.matmul(L, A_tiled, transpose_a=True) # R x M x N
else: # pragma: no cover
Expand Down Expand Up @@ -334,7 +363,7 @@ def uncertain_conditional(Xnew_mu, Xnew_var, feat, kern, q_mu, q_sqrt, *,
########################## HELPERS ##############################
# ---------------------------------------------------------------

def _sample_mvn(mean, cov, cov_structure):
def _sample_mvn(mean, cov, cov_structure=None, num_samples=None):
"""
Returns a sample from a D-dimensional Multivariate Normal distribution
:param mean: N x D
Expand All @@ -344,17 +373,34 @@ def _sample_mvn(mean, cov, cov_structure):
- "full": cov holds the full covariance matrix (without jitter)
:return: sample from the MVN of shape N x D
"""
eps = tf.random_normal(tf.shape(mean), dtype=settings.float_type) # N x P
if cov_structure == "diag":
sample = mean + tf.sqrt(cov) * eps # N x P
elif cov_structure == "full":
cov = cov + (tf.eye(tf.shape(mean)[1], dtype=settings.float_type) * settings.numerics.jitter_level)[None, ...] # N x P x P
chol = tf.cholesky(cov) # N x P x P
return mean + (tf.matmul(chol, eps[..., None])[..., 0]) # N x P
else:
raise NotImplementedError # pragma: no cover
mean_shape = tf.shape(mean)
cov_shape = tf.shape(cov)
N, D = mean_shape[0], mean_shape[1]
S = num_samples if num_samples is not None else 1
# assert shape(cov) == (N, D) or (N, D, D)
with tf.control_dependencies([
tf.Assert(tf.equal(cov_shape[0], N) & tf.reduce_all(tf.equal(cov_shape[1:], D)),
data=[mean_shape, cov_shape])
]):

if cov_structure == "diag":
with tf.control_dependencies([tf.assert_equal(tf.rank(mean), tf.rank(cov))]):
eps = tf.random_normal([S, N, D], dtype=settings.float_type) # S x N x D
samples = mean + tf.sqrt(cov) * eps # S x N x D
elif cov_structure == "full":
with tf.control_dependencies([tf.assert_equal(tf.rank(mean) + 1, tf.rank(cov))]):
jittermat = settings.numerics.jitter_level * \
tf.eye(D, batch_shape=[N], dtype=settings.float_type) # N x D x D
eps = tf.random_normal([N, D, S], dtype=settings.float_type) # N x D x S
chol = tf.cholesky(cov + jittermat) # N x D x D
samples = mean[..., None] + tf.matmul(chol, eps) # N x D x S
samples = tf.transpose(samples, [2, 0, 1]) # S x N x D
else:
raise NotImplementedError # pragma: no cover

return sample # N x P
if num_samples is None:
return samples[0] # N x D
return samples # S x N x D

def _expand_independent_outputs(fvar, full_cov, full_output_cov):
"""
Expand Down
17 changes: 14 additions & 3 deletions gpflow/multioutput/conditionals.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def _conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False,

@sample_conditional.register(object, (MixedKernelSharedMof, MixedKernelSeparateMof), SeparateMixedMok, object)
@name_scope("sample_conditional")
def _sample_conditional(Xnew, feat, kern, f, *, full_output_cov=False, q_sqrt=None, white=False):
def _sample_conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False, num_samples=None):
"""
`sample_conditional` will return a sample from the conditinoal distribution.
In most cases this means calculating the conditional mean m and variance v and then
Expand All @@ -269,13 +269,24 @@ def _sample_conditional(Xnew, feat, kern, f, *, full_output_cov=False, q_sqrt=No
:return: N x P (full_output_cov = False) or N x P x P (full_output_cov = True)
"""
logger.debug("sample conditional: (MixedKernelSharedMof, MixedKernelSeparateMof), SeparateMixedMok")
if full_cov:
raise NotImplementedError("full_cov not yet implemented")
if full_output_cov:
raise NotImplementedError("full_output_cov not yet implemented")
independent_cond = conditional.dispatch(object, SeparateIndependentMof, SeparateIndependentMok, object)
g_mu, g_var = independent_cond(Xnew, feat, kern, f, white=white, q_sqrt=q_sqrt,
full_output_cov=False, full_cov=False) # N x L, N x L
g_sample = _sample_mvn(g_mu, g_var, "diag") # N x L
g_sample = _sample_mvn(g_mu, g_var, "diag", num_samples=num_samples) # N x L
with params_as_tensors_for(kern):
f_sample = tf.einsum("pl,nl->np", kern.W, g_sample)
return f_sample
f_mu = tf.einsum("pl,nl->np", kern.W, g_mu)
# W g_var W.T
# [P, L] @ [L, L] @ [L, P]
# \sum_l,l' W_pl g_var_ll' W_p'l'
# \sum_l W_pl g_var_nl W_p'l
# ->
f_var = tf.einsum("pl,nl,pl->np", kern.W, g_var, kern.W)
return f_sample, f_mu, f_var


# -----------------
Expand Down
50 changes: 34 additions & 16 deletions tests/test_multioutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ class DataMixedKernel(Data):


@pytest.mark.parametrize("cov_structure", ["full", "diag"])
def test_sample_mvn(session_tf, cov_structure):
@pytest.mark.parametrize("num_samples", [None, 1, 10])
def test_sample_mvn(session_tf, cov_structure, num_samples):
"""
Draws 10,000 samples from a distribution
with known mean and covariance. The test checks
Expand All @@ -190,8 +191,15 @@ def test_sample_mvn(session_tf, cov_structure):
elif cov_structure == "diag":
covs = tf.ones((N, D), dtype=float_type)

samples = _sample_mvn(means, covs, cov_structure)
samples = _sample_mvn(means, covs, cov_structure, num_samples=num_samples)
value = session_tf.run(samples)

if num_samples is None:
assert value.shape == (N, D)
else:
assert value.shape == (num_samples, N, D)
value = value.reshape(-1, D)

samples_mean = np.mean(value, axis=0)
samples_cov = np.cov(value, rowvar=False)
np.testing.assert_array_almost_equal(samples_mean, [1., 1.], decimal=1)
Expand All @@ -206,11 +214,12 @@ def _create_feed_dict(placeholders_dict, value_dict):


@pytest.mark.parametrize("whiten", [True, False])
def test_sample_conditional(session_tf, whiten):
@pytest.mark.parametrize("full_cov,full_output_cov", [(False, False), (False, True), (True, False)])
def test_sample_conditional(session_tf, whiten, full_cov, full_output_cov):
q_mu = np.random.randn(Data.M , Data.P) # M x P
q_sqrt = np.array([np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)]) # P x M x M
Z = Data.X[:Data.M, ...] # M x D
Xs = np.ones((int(10e5), Data.D), dtype=float_type)
Xs = np.ones((Data.N, Data.D), dtype=float_type)

feature = InducingPoints(Z.copy())
kernel = RBF(Data.D)
Expand All @@ -220,20 +229,29 @@ def test_sample_conditional(session_tf, whiten):
feed_dict = _create_feed_dict(placeholders, values)

# Path 1
sample = sample_conditional(placeholders["Xnew"], placeholders["Z"], kernel,
placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=whiten)
value = session_tf.run(sample, feed_dict=feed_dict)
sample_f = sample_conditional(placeholders["Xnew"], feature, kernel,
placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=whiten,
full_cov=full_cov, full_output_cov=full_output_cov, num_samples=int(1e5))
value_f, mean_f, var_f = session_tf.run(sample_f, feed_dict=feed_dict)
value_f = value_f.reshape((-1,) + value_f.shape[2:])

# Path 2
sample2 = sample_conditional(placeholders["Xnew"], feature, kernel,
placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=whiten)
value2 = session_tf.run(sample2, feed_dict=feed_dict)
if full_output_cov:
pytest.skip("sample_conditional with X instead of feature does not support full_output_cov")

sample_x = sample_conditional(placeholders["Xnew"], placeholders["Z"], kernel,
placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=whiten,
full_cov=full_cov, full_output_cov=full_output_cov, num_samples=int(1e5))
value_x, mean_x, var_x = session_tf.run(sample_x, feed_dict=feed_dict)
value_x = value_x.reshape((-1,) + value_x.shape[2:])

# check if mean and covariance of samples are similar
np.testing.assert_array_almost_equal(np.mean(value, axis=0),
np.mean(value2, axis=0), decimal=1)
np.testing.assert_array_almost_equal(np.cov(value, rowvar=False),
np.cov(value2, rowvar=False), decimal=1)
np.testing.assert_array_almost_equal(np.mean(value_x, axis=0),
np.mean(value_f, axis=0), decimal=1)
np.testing.assert_array_almost_equal(np.cov(value_x, rowvar=False),
np.cov(value_f, rowvar=False), decimal=1)
np.testing.assert_allclose(mean_x, mean_f)
np.testing.assert_allclose(var_x, var_f)


def test_sample_conditional_mixedkernel(session_tf):
Expand All @@ -255,15 +273,15 @@ def test_sample_conditional_mixedkernel(session_tf):

sample = sample_conditional(placeholders["Xnew"], mixed_feature, mixed_kernel,
placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True)
value = session_tf.run(sample, feed_dict=feed_dict)
value, mean, var = session_tf.run(sample, feed_dict=feed_dict)


# Path 2: independent kernels, mixed later
separate_kernel = mk.SeparateIndependentMok([RBF(Data.D) for _ in range(Data.L)])
shared_feature = mf.SharedIndependentMof(InducingPoints(Z.copy()))
sample2 = sample_conditional(placeholders["Xnew"], shared_feature, separate_kernel,
placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True)
value2 = session_tf.run(sample2, feed_dict=feed_dict)
value2, mean2, var2 = session_tf.run(sample2, feed_dict=feed_dict)
value2 = np.matmul(value2, W.T)
# check if mean and covariance of samples are similar
np.testing.assert_array_almost_equal(np.mean(value, axis=0),
Expand Down

0 comments on commit 460afc7

Please sign in to comment.