Merge pull request #788 from SheffieldML/devel

Release 1.9.9
SheffieldML · Oct 17, 2019 · 92f2e87 · 92f2e87
2 parents e4848ce + 2c8245d
commit 92f2e87
Show file tree

Hide file tree

Showing 60 changed files with 846 additions and 145 deletions.
diff --git a/GPy/__version__.py b/GPy/__version__.py
@@ -1 +1 @@
-__version__ = "1.9.8"
+__version__ = "1.9.9"
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
@@ -581,3 +581,56 @@ def warped_gp_cubic_sine(max_iters=100):
     m.plot(title="Standard GP")
     warp_m.plot_warping()
     pb.show()
+    return warp_m
+
+
+
+def multioutput_gp_with_derivative_observations():
+    def plot_gp_vs_real(m, x, yreal, size_inputs, title, fixed_input=1, xlim=[0,11], ylim=[-1.5,3]):
+        fig, ax = pb.subplots()
+        ax.set_title(title)
+        pb.plot(x, yreal, "r", label='Real function')
+        rows = slice(0, size_inputs[0]) if fixed_input == 0 else slice(size_inputs[0], size_inputs[0]+size_inputs[1])
+        m.plot(fixed_inputs=[(1, fixed_input)], which_data_rows=rows, xlim=xlim, ylim=ylim, ax=ax)
+    f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
+    fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
+    N=10 # Number of observations
+    M=10 # Number of derivative observations
+    Npred=100 # Number of prediction points
+    sigma = 0.05 # Noise of observations
+    sigma_der = 0.05 # Noise of derivative observations
+    x = np.array([np.linspace(1,10,N)]).T
+    y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
+
+    xd = np.array([np.linspace(2,8,M)]).T
+    yd = fd(xd) + np.array(sigma_der*np.random.normal(0,1,(M,1)))
+
+    xpred = np.array([np.linspace(0,11,Npred)]).T
+    ypred_true = f(xpred)
+    ydpred_true = fd(xpred)
+
+    # squared exponential kernel:
+    se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
+    # We need to generate separate kernel for the derivative observations and give the created kernel as an input:
+    se_der = GPy.kern.DiffKern(se, 0)
+
+    #Then
+    gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
+    gauss_der = GPy.likelihoods.Gaussian(variance=sigma_der**2)
+
+    # Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
+    # Now we have the regular observations first and derivative observations second, meaning that the kernels and
+    # the likelihoods must follow the same order. Crosscovariances are automatically taken car of
+    m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
+
+    # Optimize the model
+    m.optimize(messages=0, ipython_notebook=False)
+
+    #Plot the model, the syntax is same as for multioutput models:
+    plot_gp_vs_real(m, xpred, ydpred_true, [x.shape[0], xd.shape[0]], title='Latent function derivatives', fixed_input=1, xlim=[0,11], ylim=[-1.5,3])
+    plot_gp_vs_real(m, xpred, ypred_true, [x.shape[0], xd.shape[0]], title='Latent function', fixed_input=0, xlim=[0,11], ylim=[-1.5,3])
+
+    #making predictions for the values:
+    mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0,1))])
+
+    return m
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
@@ -43,4 +43,6 @@
 from .src.sde_static import sde_White, sde_Bias
 from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
 from .src.sde_brownian import sde_Brownian
-from .src.multioutput_kern import MultioutputKern
+from .src.multioutput_kern import MultioutputKern
+from .src.multioutput_derivative_kern import MultioutputDerivativeKern
+from .src.diff_kern import DiffKern
diff --git a/GPy/kern/src/ODE_UY.py b/GPy/kern/src/ODE_UY.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 from .kern import Kern
-from .independent_outputs import index_to_slices
+from ...util.multioutput import index_to_slices
 from ...core.parameterization import Param
 from paramz.transformations import Logexp
 import numpy as np

diff --git a/GPy/kern/src/ODE_UYC.py b/GPy/kern/src/ODE_UYC.py
@@ -5,7 +5,7 @@
 from ...core.parameterization import Param
 from paramz.transformations import Logexp
 import numpy as np
-from .independent_outputs import index_to_slices
+from ...util.multioutput import index_to_slices
 
 class ODE_UYC(Kern):
     def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'):

diff --git a/GPy/kern/src/ODE_st.py b/GPy/kern/src/ODE_st.py
@@ -4,7 +4,7 @@
 from ...core.parameterization import Param
 from paramz.transformations import Logexp
 import numpy as np
-from .independent_outputs import index_to_slices
+from ...util.multioutput import index_to_slices
 
 
 class ODE_st(Kern):

diff --git a/GPy/kern/src/ODE_t.py b/GPy/kern/src/ODE_t.py
@@ -2,7 +2,7 @@
 from ...core.parameterization import Param
 from paramz.transformations import Logexp
 import numpy as np
-from .independent_outputs import index_to_slices
+from ...util.multioutput import index_to_slices
 
 
 class ODE_t(Kern):

diff --git a/GPy/kern/src/diff_kern.py b/GPy/kern/src/diff_kern.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+from .kern import Kern
+import numpy as np
+from paramz.caching import Cache_this
+
+class DiffKern(Kern):
+    """
+    Diff kernel is a thin wrapper for using partial derivatives of kernels as kernels. Eg. in combination with
+    Multioutput kernel this allows the user to train GPs with observations of latent function and latent
+    function derivatives. NOTE: DiffKern only works when used with Multioutput kernel. Do not use the kernel as standalone
+    
+    The parameters the kernel needs are:
+    -'base_kern': a member of Kernel class that is used for observations
+    -'dimension': integer that indigates in which dimensions the partial derivative observations are
+    """
+    def __init__(self, base_kern, dimension):
+        super(DiffKern, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='DiffKern')
+        self.base_kern = base_kern
+        self.dimension = dimension
+
+    def parameters_changed(self):
+        self.base_kern.parameters_changed()
+
+    @Cache_this(limit=3, ignore_args=())
+    def K(self, X, X2=None, dimX2 = None): #X in dimension self.dimension
+        if X2 is None:
+            X2 = X
+        if dimX2 is None:
+            dimX2 = self.dimension
+        return self.base_kern.dK2_dXdX2(X,X2, self.dimension, dimX2)
+
+    @Cache_this(limit=3, ignore_args=())
+    def Kdiag(self, X):
+        return np.diag(self.base_kern.dK2_dXdX2(X,X, self.dimension, self.dimension))
+
+    @Cache_this(limit=3, ignore_args=())
+    def dK_dX_wrap(self, X, X2): #X in dimension self.dimension
+        return self.base_kern.dK_dX(X,X2, self.dimension)
+
+    @Cache_this(limit=3, ignore_args=())
+    def dK_dX2_wrap(self, X, X2): #X in dimension self.dimension
+        return self.base_kern.dK_dX2(X,X2, self.dimension)
+
+    def reset_gradients(self):
+        self.base_kern.reset_gradients()
+
+    @property
+    def gradient(self):
+        return self.base_kern.gradient
+
+    @gradient.setter
+    def gradient(self, gradient):
+        self.base_kern.gradient = gradient
+
+    def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None):
+        if dimX2 is None:
+            dimX2 = self.dimension
+        gradients = self.base_kern.dgradients2_dXdX2(X,X2,self.dimension,dimX2)
+        self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
+
+    def update_gradients_diag(self, dL_dK_diag, X):
+        gradients = self.base_kern.dgradients2_dXdX2(X,X, self.dimension, self.dimension)
+        self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK_diag, gradient, f=np.diag) for gradient in gradients])
+
+    def update_gradients_dK_dX(self, dL_dK, X, X2=None):
+        if X2 is None:
+            X2 = X
+        gradients = self.base_kern.dgradients_dX(X,X2, self.dimension)
+        self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
+
+    def update_gradients_dK_dX2(self, dL_dK, X, X2=None):
+        gradients = self.base_kern.dgradients_dX2(X,X2, self.dimension)
+        self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
+
+    def gradients_X(self, dL_dK, X, X2):
+        tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:, self.dimension]
+        return np.sum(tmp, axis=1)
+
+    def gradients_X2(self, dL_dK, X, X2):
+        tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:, :, self.dimension, :]
+        return np.sum(tmp, axis=1)
+
+    def _convert_gradients(self, l,g, f = lambda x:x):
+        if type(g) is np.ndarray:
+            return np.sum(f(l)*f(g))
+        else:
+            return np.array([np.sum(f(l)*f(gi)) for gi in g])
diff --git a/GPy/kern/src/independent_outputs.py b/GPy/kern/src/independent_outputs.py
@@ -5,34 +5,8 @@
 from .kern import CombinationKernel
 import numpy as np
 import itertools
+from ...util.multioutput import index_to_slices
 
-def index_to_slices(index):
-    """
-    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index.
-
-    e.g.
-    >>> index = np.asarray([0,0,0,1,1,1,2,2,2])
-    returns
-    >>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
-
-    or, a more complicated example
-    >>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
-    returns
-    >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
-    """
-    if len(index)==0:
-        return[]
-
-    #contruct the return structure
-    ind = np.asarray(index,dtype=np.int)
-    ret = [[] for i in range(ind.max()+1)]
-
-    #find the switchpoints
-    ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
-    switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
-
-    [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
-    return ret
 
 class IndependentOutputs(CombinationKernel):
     """

diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py
@@ -116,6 +116,12 @@ def _slice_X(self, X):
         except:
             return X[:, self._all_dims_active]
 
+    def _project_dim(self, dim):
+        try:
+            return np.where(self._all_dims_active == dim)[0][0]
+        except:
+            return None
+
     def K(self, X, X2):
         """
         Compute the kernel function.