Support broadcasting (GPflow#829)

Modifies the kernels and conditionals to support broadcasting over (arbitrarily many) extra leading dimensions, similar to most of the tensorflow ops. So e.g. kern.K(X) now supports X being shape `[S, ..., T, N, D]`, and returns `[S, ..., T, N, N]`
ahsimb · Aug 22, 2018 · fd6beaa · fd6beaa
1 parent af89b6d
commit fd6beaa
Show file tree

Hide file tree

Showing 2 changed files with 137 additions and 25 deletions.
diff --git a/gpflow/kernels.py b/gpflow/kernels.py
@@ -122,14 +122,15 @@ def _slice(self, X, X2):
         :return: Sliced X, X2, (Nxself.input_dim).
         """
         if isinstance(self.active_dims, slice):
-            X = X[:, self.active_dims]
+            X = X[..., self.active_dims]
             if X2 is not None:
-                X2 = X2[:, self.active_dims]
+                X2 = X2[..., self.active_dims]
         else:
-            X = tf.transpose(tf.gather(tf.transpose(X), self.active_dims))
+            X = tf.gather(X, self.active_dims, axis=-1)
             if X2 is not None:
-                X2 = tf.transpose(tf.gather(tf.transpose(X2), self.active_dims))
-        input_dim_shape = tf.shape(X)[1]
+                X2 = tf.gather(X2, self.active_dims, axis=-1)
+
+        input_dim_shape = tf.shape(X)[-1]
         input_dim = tf.convert_to_tensor(self.input_dim, dtype=settings.tf_int)
         with tf.control_dependencies([tf.assert_equal(input_dim_shape, input_dim)]):
             X = tf.identity(X)
@@ -178,7 +179,7 @@ def __init__(self, input_dim, variance=1.0, active_dims=None, name=None):
 
     @params_as_tensors
     def Kdiag(self, X):
-        return tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
+        return tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance))
 
 
 class White(Static):
@@ -189,10 +190,12 @@ class White(Static):
     @params_as_tensors
     def K(self, X, X2=None, presliced=False):
         if X2 is None:
-            d = tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
+            d = tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance))
             return tf.matrix_diag(d)
         else:
-            shape = tf.stack([tf.shape(X)[0], tf.shape(X2)[0]])
+            shape = tf.concat([tf.shape(X)[:-2],
+                               tf.reshape(tf.shape(X)[-2], [1]),
+                               tf.reshape(tf.shape(X2)[-2], [1])], 0)
             return tf.zeros(shape, settings.float_type)
 
 
@@ -204,9 +207,10 @@ class Constant(Static):
     @params_as_tensors
     def K(self, X, X2=None, presliced=False):
         if X2 is None:
-            shape = tf.stack([tf.shape(X)[0], tf.shape(X)[0]])
-        else:
-            shape = tf.stack([tf.shape(X)[0], tf.shape(X2)[0]])
+            X2 = X
+        shape = tf.concat([tf.shape(X)[:-2],
+                           tf.reshape(tf.shape(X)[-2], [1]),
+                           tf.reshape(tf.shape(X2)[-2], [1])], 0)
         return tf.fill(shape, tf.squeeze(self.variance))
 
 
@@ -259,17 +263,17 @@ def _scaled_square_dist(self, X, X2):
         close to each other.
         """
         X = X / self.lengthscales
-        Xs = tf.reduce_sum(tf.square(X), axis=1)
+        Xs = tf.reduce_sum(tf.square(X), axis=-1, keepdims=True)
 
         if X2 is None:
             dist = -2 * tf.matmul(X, X, transpose_b=True)
-            dist += tf.reshape(Xs, (-1, 1))  + tf.reshape(Xs, (1, -1))
+            dist += Xs + tf.matrix_transpose(Xs)
             return dist
 
         X2 = X2 / self.lengthscales
-        X2s = tf.reduce_sum(tf.square(X2), axis=1)
+        X2s = tf.reduce_sum(tf.square(X2), axis=-1, keepdims=True)
         dist = -2 * tf.matmul(X, X2, transpose_b=True)
-        dist += tf.reshape(Xs, (-1, 1)) + tf.reshape(X2s, (1, -1))
+        dist += Xs + tf.matrix_transpose(X2s)
         return dist
 
 
@@ -299,7 +303,7 @@ def scaled_euclid_dist(self, X, X2):  # pragma: no cover
 
     @params_as_tensors
     def Kdiag(self, X, presliced=False):
-        return tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
+        return tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance))
 
     @params_as_tensors
     def K(self, X, X2=None, presliced=False):
@@ -401,7 +405,7 @@ def K(self, X, X2=None, presliced=False):
     def Kdiag(self, X, presliced=False):
         if not presliced:
             X, _ = self._slice(X, None)
-        return tf.reduce_sum(tf.square(X) * self.variance, 1)
+        return tf.reduce_sum(tf.square(X) * self.variance, -1)
 
 
 class Polynomial(Linear):
@@ -545,7 +549,7 @@ def __init__(self, input_dim,
     @params_as_tensors
     def _weighted_product(self, X, X2=None):
         if X2 is None:
-            return tf.reduce_sum(self.weight_variances * tf.square(X), axis=1) + self.bias_variance
+            return tf.reduce_sum(self.weight_variances * tf.square(X), axis=-1) + self.bias_variance
         return tf.matmul((self.weight_variances * X), X2, transpose_b=True) + self.bias_variance
 
     def _J(self, theta):
@@ -574,13 +578,15 @@ def K(self, X, X2=None, presliced=False):
             X2_denominator = tf.sqrt(self._weighted_product(X2))
 
         numerator = self._weighted_product(X, X2)
-        cos_theta = numerator / X_denominator[:, None] / X2_denominator[None, :]
+        X_denominator = tf.expand_dims(X_denominator, -1)
+        X2_denominator = tf.matrix_transpose(tf.expand_dims(X2_denominator, -1))
+        cos_theta = numerator / X_denominator / X2_denominator
         jitter = 1e-15
         theta = tf.acos(jitter + (1 - 2 * jitter) * cos_theta)
 
         return self.variance * (1. / np.pi) * self._J(theta) * \
-               X_denominator[:, None] ** self.order * \
-               X2_denominator[None, :] ** self.order
+               X_denominator ** self.order * \
+               X2_denominator ** self.order
 
     @params_as_tensors
     def Kdiag(self, X, presliced=False):
@@ -622,7 +628,7 @@ def __init__(self, input_dim, period=1.0, variance=1.0,
 
     @params_as_tensors
     def Kdiag(self, X, presliced=False):
-        return tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
+        return tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance))
 
     @params_as_tensors
     def K(self, X, X2=None, presliced=False):
@@ -632,11 +638,17 @@ def K(self, X, X2=None, presliced=False):
             X2 = X
 
         # Introduce dummy dimension so we can use broadcasting
-        f = tf.expand_dims(X, 1)  # now N x 1 x D
-        f2 = tf.expand_dims(X2, 0)  # now 1 x M x D
+        f = tf.expand_dims(X, -2)  #  ... x N x 1 x D
+        f2 = tf.expand_dims(X2, -2) # ... x M x 1 x D
+        K = tf.rank(f2)  # 3, or 4 if broadcasting
+        perm = tf.concat([tf.reshape(tf.range(K-3), [K-3]),  # [], or [0] if broadcasting
+                          tf.reshape(K-2, [1]),  # [1], or [2] if broadcasting
+                          tf.reshape(K-3, [1]),  # [0], or [1] if broadcasting
+                          tf.reshape(K-1, [1])], 0)  # [2], or [2] if broadcasting
+        f2 = tf.transpose(f2, perm)  # ... x 1 x M x D
 
         r = np.pi * (f - f2) / self.period
-        r = tf.reduce_sum(tf.square(tf.sin(r) / self.lengthscales), 2)
+        r = tf.reduce_sum(tf.square(tf.sin(r) / self.lengthscales), -1)
 
         return self.variance * tf.exp(-0.5 * r)
 

diff --git a/tests/test_broadcasting.py b/tests/test_broadcasting.py
@@ -0,0 +1,100 @@
+# Copyright 2017 the GPflow authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tensorflow as tf
+
+import numpy as np
+from numpy.testing import assert_allclose
+import pytest
+
+import gpflow
+from gpflow.test_util import session_tf
+from gpflow import settings
+from gpflow import kernels
+
+# TODO: kernels.Coregion
+
+Kerns = [
+    # Static
+    kernels.White,
+    kernels.Constant,
+
+    # Stationary
+    kernels.RBF,
+    kernels.RationalQuadratic,
+    kernels.Exponential,
+    kernels.Matern12,
+    kernels.Matern32,
+    kernels.Matern52,
+    kernels.Cosine,
+
+    kernels.Linear,
+    kernels.Polynomial,
+    kernels.ArcCosine,
+    kernels.Periodic,
+]
+
+def _test_no_active_dims(Kern, sess):
+    S, N, M, D = 5, 4, 3, 2
+    X1 = tf.identity(np.random.randn(S, N, D))
+    X2 = tf.identity(np.random.randn(S, M, D))
+    kern = Kern(D) + gpflow.kernels.White(2)
+
+    compare_vs_map(X1, X2, kern, sess)
+
+def _test_slice_active_dims(Kern, sess):
+    S, N, M, D = 5, 4, 3, 4
+    d = 2
+    X1 = tf.identity(np.random.randn(S, N, D))
+    X2 = tf.identity(np.random.randn(S, M, D))
+    kern = Kern(d, active_dims=slice(1, 1+d))
+
+    compare_vs_map(X1, X2, kern, sess)
+
+def _test_indices_active_dims(Kern, sess):
+    S, N, M, D = 5, 4, 3, 4
+
+    X1 = tf.identity(np.random.randn(S, N, D))
+    X2 = tf.identity(np.random.randn(S, M, D))
+    kern = Kern(2, active_dims=[1, 3])
+
+    compare_vs_map(X1, X2, kern, sess)
+
+
+def compare_vs_map(X1, X2, kern, sess):
+    K12_map = tf.map_fn(lambda x: kern.K(x[0], x[1]), [X1, X2], dtype=settings.float_type)
+    K12_native = kern.K(X1, X2)
+    assert_allclose(*sess.run([K12_map, K12_native]))
+
+    K_map = tf.map_fn(kern.K, X1, dtype=settings.float_type)
+    K_native = kern.K(X1)
+    assert_allclose(*sess.run([K_map, K_native]))
+
+    Kdiag_map = tf.map_fn(kern.Kdiag, X1, dtype=settings.float_type)
+    Kdiag_native = kern.Kdiag(X1)
+    assert_allclose(*sess.run([Kdiag_map, Kdiag_native]))
+
+def test_rbf_no_active_dims(session_tf):
+    _test_no_active_dims(gpflow.kernels.RBF, session_tf)
+
+def test_rbf_slice_active_dims(session_tf):
+    _test_slice_active_dims(gpflow.kernels.RBF, session_tf)
+
+def test_rbf_indices_active_dims(session_tf):
+    _test_indices_active_dims(gpflow.kernels.RBF, session_tf)
+
+@pytest.mark.parametrize("Kern", Kerns)
+def test_all_no_active_dims(session_tf, Kern):
+    _test_no_active_dims(Kern, session_tf)
+