Skip to content

Commit

Permalink
Support broadcasting (GPflow#829)
Browse files Browse the repository at this point in the history
Modifies the kernels and conditionals to support broadcasting over (arbitrarily many) extra leading dimensions, similar to most of the tensorflow ops. So e.g. kern.K(X) now supports X being shape `[S, ..., T, N, D]`, and returns `[S, ..., T, N, N]`
  • Loading branch information
hughsalimbeni authored and awav committed Aug 22, 2018
1 parent af89b6d commit fd6beaa
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 25 deletions.
62 changes: 37 additions & 25 deletions gpflow/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,15 @@ def _slice(self, X, X2):
:return: Sliced X, X2, (Nxself.input_dim).
"""
if isinstance(self.active_dims, slice):
X = X[:, self.active_dims]
X = X[..., self.active_dims]
if X2 is not None:
X2 = X2[:, self.active_dims]
X2 = X2[..., self.active_dims]
else:
X = tf.transpose(tf.gather(tf.transpose(X), self.active_dims))
X = tf.gather(X, self.active_dims, axis=-1)
if X2 is not None:
X2 = tf.transpose(tf.gather(tf.transpose(X2), self.active_dims))
input_dim_shape = tf.shape(X)[1]
X2 = tf.gather(X2, self.active_dims, axis=-1)

input_dim_shape = tf.shape(X)[-1]
input_dim = tf.convert_to_tensor(self.input_dim, dtype=settings.tf_int)
with tf.control_dependencies([tf.assert_equal(input_dim_shape, input_dim)]):
X = tf.identity(X)
Expand Down Expand Up @@ -178,7 +179,7 @@ def __init__(self, input_dim, variance=1.0, active_dims=None, name=None):

@params_as_tensors
def Kdiag(self, X):
return tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
return tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance))


class White(Static):
Expand All @@ -189,10 +190,12 @@ class White(Static):
@params_as_tensors
def K(self, X, X2=None, presliced=False):
if X2 is None:
d = tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
d = tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance))
return tf.matrix_diag(d)
else:
shape = tf.stack([tf.shape(X)[0], tf.shape(X2)[0]])
shape = tf.concat([tf.shape(X)[:-2],
tf.reshape(tf.shape(X)[-2], [1]),
tf.reshape(tf.shape(X2)[-2], [1])], 0)
return tf.zeros(shape, settings.float_type)


Expand All @@ -204,9 +207,10 @@ class Constant(Static):
@params_as_tensors
def K(self, X, X2=None, presliced=False):
if X2 is None:
shape = tf.stack([tf.shape(X)[0], tf.shape(X)[0]])
else:
shape = tf.stack([tf.shape(X)[0], tf.shape(X2)[0]])
X2 = X
shape = tf.concat([tf.shape(X)[:-2],
tf.reshape(tf.shape(X)[-2], [1]),
tf.reshape(tf.shape(X2)[-2], [1])], 0)
return tf.fill(shape, tf.squeeze(self.variance))


Expand Down Expand Up @@ -259,17 +263,17 @@ def _scaled_square_dist(self, X, X2):
close to each other.
"""
X = X / self.lengthscales
Xs = tf.reduce_sum(tf.square(X), axis=1)
Xs = tf.reduce_sum(tf.square(X), axis=-1, keepdims=True)

if X2 is None:
dist = -2 * tf.matmul(X, X, transpose_b=True)
dist += tf.reshape(Xs, (-1, 1)) + tf.reshape(Xs, (1, -1))
dist += Xs + tf.matrix_transpose(Xs)
return dist

X2 = X2 / self.lengthscales
X2s = tf.reduce_sum(tf.square(X2), axis=1)
X2s = tf.reduce_sum(tf.square(X2), axis=-1, keepdims=True)
dist = -2 * tf.matmul(X, X2, transpose_b=True)
dist += tf.reshape(Xs, (-1, 1)) + tf.reshape(X2s, (1, -1))
dist += Xs + tf.matrix_transpose(X2s)
return dist


Expand Down Expand Up @@ -299,7 +303,7 @@ def scaled_euclid_dist(self, X, X2): # pragma: no cover

@params_as_tensors
def Kdiag(self, X, presliced=False):
return tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
return tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance))

@params_as_tensors
def K(self, X, X2=None, presliced=False):
Expand Down Expand Up @@ -401,7 +405,7 @@ def K(self, X, X2=None, presliced=False):
def Kdiag(self, X, presliced=False):
if not presliced:
X, _ = self._slice(X, None)
return tf.reduce_sum(tf.square(X) * self.variance, 1)
return tf.reduce_sum(tf.square(X) * self.variance, -1)


class Polynomial(Linear):
Expand Down Expand Up @@ -545,7 +549,7 @@ def __init__(self, input_dim,
@params_as_tensors
def _weighted_product(self, X, X2=None):
if X2 is None:
return tf.reduce_sum(self.weight_variances * tf.square(X), axis=1) + self.bias_variance
return tf.reduce_sum(self.weight_variances * tf.square(X), axis=-1) + self.bias_variance
return tf.matmul((self.weight_variances * X), X2, transpose_b=True) + self.bias_variance

def _J(self, theta):
Expand Down Expand Up @@ -574,13 +578,15 @@ def K(self, X, X2=None, presliced=False):
X2_denominator = tf.sqrt(self._weighted_product(X2))

numerator = self._weighted_product(X, X2)
cos_theta = numerator / X_denominator[:, None] / X2_denominator[None, :]
X_denominator = tf.expand_dims(X_denominator, -1)
X2_denominator = tf.matrix_transpose(tf.expand_dims(X2_denominator, -1))
cos_theta = numerator / X_denominator / X2_denominator
jitter = 1e-15
theta = tf.acos(jitter + (1 - 2 * jitter) * cos_theta)

return self.variance * (1. / np.pi) * self._J(theta) * \
X_denominator[:, None] ** self.order * \
X2_denominator[None, :] ** self.order
X_denominator ** self.order * \
X2_denominator ** self.order

@params_as_tensors
def Kdiag(self, X, presliced=False):
Expand Down Expand Up @@ -622,7 +628,7 @@ def __init__(self, input_dim, period=1.0, variance=1.0,

@params_as_tensors
def Kdiag(self, X, presliced=False):
return tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
return tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance))

@params_as_tensors
def K(self, X, X2=None, presliced=False):
Expand All @@ -632,11 +638,17 @@ def K(self, X, X2=None, presliced=False):
X2 = X

# Introduce dummy dimension so we can use broadcasting
f = tf.expand_dims(X, 1) # now N x 1 x D
f2 = tf.expand_dims(X2, 0) # now 1 x M x D
f = tf.expand_dims(X, -2) # ... x N x 1 x D
f2 = tf.expand_dims(X2, -2) # ... x M x 1 x D
K = tf.rank(f2) # 3, or 4 if broadcasting
perm = tf.concat([tf.reshape(tf.range(K-3), [K-3]), # [], or [0] if broadcasting
tf.reshape(K-2, [1]), # [1], or [2] if broadcasting
tf.reshape(K-3, [1]), # [0], or [1] if broadcasting
tf.reshape(K-1, [1])], 0) # [2], or [2] if broadcasting
f2 = tf.transpose(f2, perm) # ... x 1 x M x D

r = np.pi * (f - f2) / self.period
r = tf.reduce_sum(tf.square(tf.sin(r) / self.lengthscales), 2)
r = tf.reduce_sum(tf.square(tf.sin(r) / self.lengthscales), -1)

return self.variance * tf.exp(-0.5 * r)

Expand Down
100 changes: 100 additions & 0 deletions tests/test_broadcasting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright 2017 the GPflow authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import tensorflow as tf

import numpy as np
from numpy.testing import assert_allclose
import pytest

import gpflow
from gpflow.test_util import session_tf
from gpflow import settings
from gpflow import kernels

# TODO: kernels.Coregion

Kerns = [
# Static
kernels.White,
kernels.Constant,

# Stationary
kernels.RBF,
kernels.RationalQuadratic,
kernels.Exponential,
kernels.Matern12,
kernels.Matern32,
kernels.Matern52,
kernels.Cosine,

kernels.Linear,
kernels.Polynomial,
kernels.ArcCosine,
kernels.Periodic,
]

def _test_no_active_dims(Kern, sess):
S, N, M, D = 5, 4, 3, 2
X1 = tf.identity(np.random.randn(S, N, D))
X2 = tf.identity(np.random.randn(S, M, D))
kern = Kern(D) + gpflow.kernels.White(2)

compare_vs_map(X1, X2, kern, sess)

def _test_slice_active_dims(Kern, sess):
S, N, M, D = 5, 4, 3, 4
d = 2
X1 = tf.identity(np.random.randn(S, N, D))
X2 = tf.identity(np.random.randn(S, M, D))
kern = Kern(d, active_dims=slice(1, 1+d))

compare_vs_map(X1, X2, kern, sess)

def _test_indices_active_dims(Kern, sess):
S, N, M, D = 5, 4, 3, 4

X1 = tf.identity(np.random.randn(S, N, D))
X2 = tf.identity(np.random.randn(S, M, D))
kern = Kern(2, active_dims=[1, 3])

compare_vs_map(X1, X2, kern, sess)


def compare_vs_map(X1, X2, kern, sess):
K12_map = tf.map_fn(lambda x: kern.K(x[0], x[1]), [X1, X2], dtype=settings.float_type)
K12_native = kern.K(X1, X2)
assert_allclose(*sess.run([K12_map, K12_native]))

K_map = tf.map_fn(kern.K, X1, dtype=settings.float_type)
K_native = kern.K(X1)
assert_allclose(*sess.run([K_map, K_native]))

Kdiag_map = tf.map_fn(kern.Kdiag, X1, dtype=settings.float_type)
Kdiag_native = kern.Kdiag(X1)
assert_allclose(*sess.run([Kdiag_map, Kdiag_native]))

def test_rbf_no_active_dims(session_tf):
_test_no_active_dims(gpflow.kernels.RBF, session_tf)

def test_rbf_slice_active_dims(session_tf):
_test_slice_active_dims(gpflow.kernels.RBF, session_tf)

def test_rbf_indices_active_dims(session_tf):
_test_indices_active_dims(gpflow.kernels.RBF, session_tf)

@pytest.mark.parametrize("Kern", Kerns)
def test_all_no_active_dims(session_tf, Kern):
_test_no_active_dims(Kern, session_tf)

0 comments on commit fd6beaa

Please sign in to comment.