diff --git a/doc/source/notebooks/advanced_usage.ipynb b/doc/source/notebooks/advanced_usage.ipynb new file mode 100644 index 000000000..d47ed7cf1 --- /dev/null +++ b/doc/source/notebooks/advanced_usage.ipynb @@ -0,0 +1,638 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T10:53:12.620472Z", + "start_time": "2018-06-20T10:53:11.541346Z" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "from matplotlib import pyplot as plt\n", + "import gpflow\n", + "from gpflow.test_util import notebook_niter, is_continuous_integration\n", + "from scipy.cluster.vq import kmeans2\n", + "\n", + "float_type = gpflow.settings.float_type \n", + "\n", + "ITERATIONS = notebook_niter(1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Mixing tensorflow models with gpflow\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 1: a convnet inside a gpflow model\n", + "Here we'll use the gpflow functionality, but we'll put a non-gpflow model inside the kernel" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T10:53:16.316761Z", + "start_time": "2018-06-20T10:53:12.621686Z" + } + }, + "outputs": [], + "source": [ + "from tensorflow.examples.tutorials.mnist import input_data\n", + "mnist = input_data.read_data_sets(\"./data/MNIST_data/\", one_hot=False)\n", + "\n", + "class Mnist:\n", + " input_dim = 784\n", + " Nclasses = 10\n", + " X = mnist.train.images.astype(float)\n", + " Y = mnist.train.labels.astype(float)[:, None]\n", + " Xtest = mnist.test.images.astype(float)\n", + " Ytest = mnist.test.labels.astype(float)[:, None]\n", + "\n", + "if is_continuous_integration():\n", + " mask = (Mnist.Y <= 1).squeeze()\n", + " Mnist.X = Mnist.X[mask][:105, 300:305]\n", + " Mnist.Y = Mnist.Y[mask][:105]\n", + " mask = (Mnist.Ytest <= 1).squeeze()\n", + " Mnist.Xtest = Mnist.Xtest[mask][:10, 300:305]\n", + " Mnist.Ytest = Mnist.Ytest[mask][:10]\n", + " Mnist.input_dim = 5\n", + " Mnist.Nclasses = 2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T10:53:16.335949Z", + "start_time": "2018-06-20T10:53:16.317967Z" + } + }, + "outputs": [], + "source": [ + "# a vanilla conv net\n", + "# this gets 97.3% accuracy on MNIST when used on its own (+ final linear layer) after 20K iterations\n", + "def cnn_fn(x, output_dim):\n", + " \"\"\"\n", + " Adapted from https://www.tensorflow.org/tutorials/layers\n", + " \"\"\"\n", + " conv1 = tf.layers.conv2d(\n", + " inputs=tf.reshape(x, [-1, 28, 28, 1]),\n", + " filters=32,\n", + " kernel_size=[5, 5],\n", + " padding=\"same\",\n", + " activation=tf.nn.relu)\n", + "\n", + " pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)\n", + "\n", + " conv2 = tf.layers.conv2d(\n", + " inputs=pool1,\n", + " filters=64,\n", + " kernel_size=[5, 5],\n", + " padding=\"same\",\n", + " activation=tf.nn.relu)\n", + " \n", + " pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)\n", + "\n", + " pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])\n", + " return tf.layers.dense(inputs=pool2_flat, units=output_dim, activation=tf.nn.relu)\n", + "\n", + "if is_continuous_integration():\n", + " def cnn_fn(x, output_dim):\n", + " return tf.layers.dense(inputs=tf.reshape(x, [-1, Mnist.input_dim]), units=output_dim)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T10:53:16.475537Z", + "start_time": "2018-06-20T10:53:16.337305Z" + } + }, + "outputs": [], + "source": [ + "class KernelWithNN(gpflow.kernels.Kernel):\n", + " \"\"\"\n", + " This kernel class allows for easily adding a NN (or other function) to a GP model.\n", + " The kernel does not actually do anything with the NN.\n", + " \"\"\"\n", + " \n", + " def __init__(self, kern, f):\n", + " \"\"\"\n", + " kern.input_dim needs to be consistent with the output dimension of f\n", + " \"\"\"\n", + " super().__init__(kern.input_dim)\n", + " self.kern = kern\n", + " self._f = f\n", + " \n", + " def f(self, X):\n", + " if X is not None:\n", + " with tf.variable_scope('forward', reuse=tf.AUTO_REUSE):\n", + " return self._f(X)\n", + " \n", + " def _get_f_vars(self):\n", + " return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='forward')\n", + "\n", + " @gpflow.autoflow([gpflow.settings.float_type, [None,None]])\n", + " def compute_f(self, X):\n", + " return self.f(X)\n", + " \n", + " def K(self, X, X2=None):\n", + " return self.kern.K(X, X2)\n", + " \n", + " def Kdiag(self, X):\n", + " return self.kern.Kdiag(X)\n", + "\n", + "class KernelSpaceInducingPoints(gpflow.features.InducingPointsBase):\n", + " pass\n", + "\n", + "# same Kuu as regular inducing points\n", + "gpflow.features.Kuu.register(KernelSpaceInducingPoints, KernelWithNN)(\n", + " gpflow.features.Kuu.dispatch(gpflow.features.InducingPoints, gpflow.kernels.Kernel)\n", + ")\n", + "\n", + "# Kuf is in NN output space\n", + "@gpflow.features.dispatch(KernelSpaceInducingPoints, KernelWithNN, object)\n", + "def Kuf(feat, kern, Xnew):\n", + " with gpflow.params_as_tensors_for(feat):\n", + " return kern.K(feat.Z, kern.f(Xnew))\n", + "\n", + "class NNComposedKernel(KernelWithNN):\n", + " \"\"\"\n", + " This kernel class applies f() to X before calculating K\n", + " \"\"\"\n", + " \n", + " def K(self, X, X2=None):\n", + " return super().K(self.f(X), self.f(X2))\n", + " \n", + " def Kdiag(self, X):\n", + " return super().Kdiag(self.f(X))\n", + " \n", + "# we need to add these extra functions to the model so the tensorflow variables get picked up\n", + "class NN_SVGP(gpflow.models.SVGP):\n", + " @property\n", + " def trainable_tensors(self):\n", + " return super().trainable_tensors + self.kern._get_f_vars()\n", + "\n", + " @property\n", + " def initializables(self):\n", + " return super().initializables + self.kern._get_f_vars()\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T11:10:46.858524Z", + "start_time": "2018-06-20T10:53:16.477021Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "accuracy is 98.6800%\n" + ] + } + ], + "source": [ + "def ex1():\n", + " fX_dim = 5 \n", + " M = 100\n", + "\n", + " # annoyingly only float32 and lower is supported by the conv layers \n", + " f = lambda x: tf.cast(cnn_fn(tf.cast(x, tf.float32), fX_dim), float_type)\n", + " kern = NNComposedKernel(gpflow.kernels.Matern32(fX_dim), f)\n", + "\n", + " # build the model \n", + "\n", + " lik = gpflow.likelihoods.MultiClass(Mnist.Nclasses)\n", + "\n", + " Z = kmeans2(Mnist.X, M, minit='points')[0]\n", + "\n", + " model = NN_SVGP(Mnist.X, Mnist.Y, kern, lik, Z=Z, num_latent=Mnist.Nclasses, minibatch_size=1000)\n", + "\n", + " # use gpflow wrappers to train. NB all session handling is done for us\n", + " gpflow.training.AdamOptimizer(0.001).minimize(model, maxiter=ITERATIONS)\n", + "\n", + " # predictions\n", + " m, v = model.predict_y(Mnist.Xtest)\n", + " preds = np.argmax(m, 1).reshape(Mnist.Ytest.shape)\n", + " correct = preds == Mnist.Ytest.astype(int)\n", + " acc = np.average(correct.astype(float)) * 100.\n", + "\n", + " print('accuracy is {:.4f}%'.format(acc))\n", + "\n", + "gpflow.reset_default_graph_and_session()\n", + "ex1()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T11:25:46.165332Z", + "start_time": "2018-06-20T11:10:46.860361Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "accuracy is 97.5600%\n" + ] + } + ], + "source": [ + "def ex2b():\n", + " fX_dim = 5 \n", + " minibatch_size = notebook_niter(1000, test_n=10)\n", + " M = notebook_niter(100, test_n=5)\n", + "\n", + " # annoyingly only float32 and lower is supported by the conv layers \n", + " f = lambda x: tf.cast(cnn_fn(tf.cast(x, tf.float32), fX_dim), float_type)\n", + " kern = KernelWithNN(gpflow.kernels.Matern32(fX_dim), f)\n", + " \n", + " ## reset inducing (they live in a different space as X, so need to be careful with this)\n", + " ind = np.random.choice(Mnist.X.shape[0], minibatch_size, replace=False)\n", + " \n", + " # currently we need a hack due to model initialization.\n", + " feat = KernelSpaceInducingPoints(np.empty((M, fX_dim)))\n", + " #feat = FFeature(Z_0) # ideally, we could move the calculation of Z_0\n", + " \n", + " # build the model \n", + "\n", + " lik = gpflow.likelihoods.MultiClass(Mnist.Nclasses)\n", + "\n", + " #Z = kmeans2(Mnist.X, M, minit='points')[0]\n", + "\n", + " model = NN_SVGP(Mnist.X, Mnist.Y, kern, lik, feat=feat, num_latent=Mnist.Nclasses, minibatch_size=minibatch_size)\n", + "\n", + " fZ = model.kern.compute_f(Mnist.X[ind])\n", + " # Z_0 = kmeans2(fZ, M)[0] might fail\n", + " Z_0 = fZ[np.random.choice(len(fZ), M, replace=False)]\n", + " model.feature.Z = Z_0\n", + "\n", + " # use gpflow wrappers to train. NB all session handling is done for us\n", + " gpflow.training.AdamOptimizer(0.001).minimize(model, maxiter=ITERATIONS)\n", + "\n", + " # predictions\n", + " m, v = model.predict_y(Mnist.Xtest)\n", + " preds = np.argmax(m, 1).reshape(Mnist.Ytest.shape)\n", + " correct = preds == Mnist.Ytest.astype(int)\n", + " acc = np.average(correct.astype(float)) * 100.\n", + "\n", + " print('accuracy is {:.4f}%'.format(acc))\n", + "\n", + "gpflow.reset_default_graph_and_session()\n", + "ex2b()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 2: a gpflow model on top of a tensorflow model\n", + "Now we'll do things the other way: we'll take a model implemented in pure tensorflow, and show how we can put a gpflow model on the top" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T11:40:21.347439Z", + "start_time": "2018-06-20T11:25:46.167693Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "acc is 98.5900\n" + ] + } + ], + "source": [ + "def ex2():\n", + " minibatch_size = notebook_niter(1000, test_n=10)\n", + " gp_dim = 5\n", + " M = notebook_niter(100, test_n=5)\n", + "\n", + " ## placeholders\n", + " X = tf.placeholder(tf.float32, [minibatch_size, Mnist.input_dim]) # fixed shape so num_data works in SVGP\n", + " Y = tf.placeholder(tf.float32, [minibatch_size, 1])\n", + " Xtest = tf.placeholder(tf.float32, [None, Mnist.input_dim])\n", + "\n", + " ## build graph\n", + "\n", + " with tf.variable_scope('cnn'):\n", + " f_X = tf.cast(cnn_fn(X, gp_dim), dtype=float_type)\n", + "\n", + " with tf.variable_scope('cnn', reuse=True):\n", + " f_Xtest = tf.cast(cnn_fn(Xtest, gp_dim), dtype=float_type)\n", + "\n", + " gp_model = gpflow.models.SVGP(f_X, tf.cast(Y, dtype=float_type), \n", + " gpflow.kernels.RBF(gp_dim), gpflow.likelihoods.MultiClass(Mnist.Nclasses), \n", + " Z=np.zeros((M, gp_dim)), # we'll set this later\n", + " num_latent=Mnist.Nclasses)\n", + "\n", + " loss = -gp_model.likelihood_tensor\n", + "\n", + " m, v = gp_model._build_predict(f_Xtest)\n", + " my, yv = gp_model.likelihood.predict_mean_and_var(m, v)\n", + "\n", + " with tf.variable_scope('adam'):\n", + " opt_step = tf.train.AdamOptimizer(0.001).minimize(loss)\n", + "\n", + " tf_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='adam')\n", + " tf_vars += tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='cnn')\n", + "\n", + " ## initialize\n", + " sess = tf.Session()\n", + " sess.run(tf.variables_initializer(var_list=tf_vars))\n", + " gp_model.initialize(session=sess)\n", + " \n", + " ## reset inducing (they live in a different space as X, so need to be careful with this)\n", + " ind = np.random.choice(Mnist.X.shape[0], minibatch_size, replace=False)\n", + "\n", + " fZ = sess.run(f_X, feed_dict={X:Mnist.X[ind]})\n", + " # Z_0 = kmeans2(fZ, M)[0] might fail\n", + " Z_0 = fZ[np.random.choice(len(fZ), M, replace=False)]\n", + "\n", + " def set_gp_param(param, value):\n", + " sess.run(tf.assign(param.unconstrained_tensor, param.transform.backward(value)))\n", + "\n", + " set_gp_param(gp_model.feature.Z, Z_0)\n", + "\n", + " ## train\n", + " for i in range(ITERATIONS):\n", + " ind = np.random.choice(Mnist.X.shape[0], minibatch_size, replace=False)\n", + " sess.run(opt_step, feed_dict={X:Mnist.X[ind], Y:Mnist.Y[ind]})\n", + "\n", + " ## predict\n", + " preds = np.argmax(sess.run(my, feed_dict={Xtest:Mnist.Xtest}), 1).reshape(Mnist.Ytest.shape)\n", + " correct = preds == Mnist.Ytest.astype(int)\n", + " acc = np.average(correct.astype(float)) * 100.\n", + " print('acc is {:.4f}'.format(acc))\n", + "\n", + "gpflow.reset_default_graph_and_session()\n", + "ex2()\n", + "\n", + "gpflow.reset_default_graph_and_session()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using feed dicts with autoflow\n", + "\n", + "We'll recreate something like Fig 5.5 from GPML, which is a plot of the marginal likelihood against hyperparameter configurations. We want to loop over a grid of hyperparameters, but it turns out gpflow can be very slow to do this using assign. We can get around this using a feed dict with autoflow. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T11:40:21.510889Z", + "start_time": "2018-06-20T11:40:21.349272Z" + } + }, + "outputs": [], + "source": [ + "X = np.array((-6, -4, -2.1, -2, 2, 5.5, 6)).reshape(-1, 1)\n", + "Y = np.array((-0.5, -0.5, 1.7, 1.6, 1, 2, 1.9)).reshape(-1, 1)\n", + "\n", + "model = gpflow.models.GPR(X, Y, gpflow.kernels.RBF(1))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T11:40:35.875839Z", + "start_time": "2018-06-20T11:40:21.512123Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.78 ms ± 159 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "# this is how long it takes to do the calculation with fixed parameters\n", + "model.compute_log_likelihood()\n", + "pass" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T11:40:43.636125Z", + "start_time": "2018-06-20T11:40:35.877115Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "100 ms ± 14.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "# the assign method\n", + "model.kern.lengthscales = 1.\n", + "model.likelihood.variance = 0.1\n", + "model.compute_log_likelihood()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T11:40:43.640682Z", + "start_time": "2018-06-20T11:40:43.637867Z" + } + }, + "outputs": [], + "source": [ + "def make_feed(param, value):\n", + " return {param.unconstrained_tensor : param.transform.backward(value)}" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T11:40:56.972382Z", + "start_time": "2018-06-20T11:40:43.642145Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.64 ms ± 25.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "# the feed dict method\n", + "feed_dict = {}\n", + "feed_dict.update(make_feed(model.kern.lengthscales, 1.))\n", + "feed_dict.update(make_feed(model.likelihood.variance, 0.1))\n", + "model.compute_log_likelihood(feed_dict=feed_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T11:40:56.977340Z", + "start_time": "2018-06-20T11:40:56.973737Z" + } + }, + "outputs": [], + "source": [ + "def evaluate(func, param_feed_dict):\n", + " tensor_feed_dict = {}\n", + " for param, value in param_feed_dict.items():\n", + " tensor_feed_dict.update(make_feed(param, value))\n", + " return func(feed_dict=tensor_feed_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T11:40:58.371053Z", + "start_time": "2018-06-20T11:40:56.978641Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.66 ms ± 50.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "evaluate(model.compute_log_likelihood, {model.kern.lengthscales: 1., model.likelihood.variance: 0.1})" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "ExecuteTime": { + "end_time": "2018-06-20T11:41:02.717145Z", + "start_time": "2018-06-20T11:40:58.372035Z" + } + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# with the feed dict method this example is feasible\n", + "\n", + "log_noises = np.linspace(np.log(0.001), np.log(2.), 50)\n", + "log_lengthscales = np.linspace(np.log(0.1), np.log(20), 50)\n", + "\n", + "xx, yy = np.meshgrid(log_lengthscales, log_noises)\n", + "zz = []\n", + "\n", + "for lengthscale, noise in zip(np.exp(xx.flatten()), np.exp(yy.flatten())):\n", + " feed_dict = {}\n", + " feed_dict.update(make_feed(model.kern.lengthscales, lengthscale))\n", + " feed_dict.update(make_feed(model.likelihood.variance, noise))\n", + " # adapt GPflow so that we can use the following instead:\n", + " #zz.append(model.evaluate(model.compute_log_likelihood,\n", + " # {model.kern.lengthscales: lengthscale,\n", + " # model.likelihood.variance: noise}\n", + " # ))\n", + " # to pass arguments e.g. for predict_f(XX), could use\n", + " # model.evaluate(functools.partial(model.predict_f, XX), {...: ..., ...})\n", + " zz.append(model.compute_log_likelihood(feed_dict=feed_dict))\n", + " \n", + "plt.contour(xx, yy, np.array(zz).reshape(xx.shape),\n", + " levels = np.linspace(np.max(zz)-4, np.max(zz), 25))\n", + "plt.colorbar()\n", + "plt.show()\n", + "\n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/source/notebooks/natural_gradients.ipynb b/doc/source/notebooks/natural_gradients.ipynb index b63c87011..5864765d5 100644 --- a/doc/source/notebooks/natural_gradients.ipynb +++ b/doc/source/notebooks/natural_gradients.ipynb @@ -76,7 +76,12 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T09:59:57.799754Z", + "start_time": "2018-04-30T09:59:57.547423Z" + } + }, "outputs": [], "source": [ "vgp = VGP(X, Y, make_matern_kernel(), gpflow.likelihoods.Gaussian())\n", @@ -93,7 +98,12 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T09:59:57.826302Z", + "start_time": "2018-04-30T09:59:57.800812Z" + } + }, "outputs": [ { "data": { @@ -114,13 +124,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "* VGP likelihood is before natural gradient step:" + "* VGP likelihood before natural gradient step:" ] }, { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T09:59:57.863144Z", + "start_time": "2018-04-30T09:59:57.827383Z" + } + }, "outputs": [ { "data": { @@ -147,7 +162,12 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T09:59:58.254414Z", + "start_time": "2018-04-30T09:59:57.864617Z" + } + }, "outputs": [ { "data": { @@ -184,7 +204,12 @@ { "cell_type": "code", "execution_count": 6, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T09:59:58.259075Z", + "start_time": "2018-04-30T09:59:58.255558Z" + } + }, "outputs": [], "source": [ "def run_adam(model, lr, iterations, callback=None):\n", @@ -198,13 +223,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Method for running Adam and Natural gradients optimizationon VGP. The hyperparameters at the end should match the GPR model." + "Method for running Adam and Natural gradients optimization on VGP. The hyperparameters at the end should match the GPR model." ] }, { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T09:59:58.270623Z", + "start_time": "2018-04-30T09:59:58.260311Z" + } + }, "outputs": [], "source": [ "def run_nat_grads_with_adam(model, lr, gamma, iterations, var_list=None, callback=None):\n", @@ -236,7 +266,12 @@ { "cell_type": "code", "execution_count": 8, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T09:59:58.664235Z", + "start_time": "2018-04-30T09:59:58.271714Z" + } + }, "outputs": [ { "name": "stdout", @@ -264,7 +299,12 @@ { "cell_type": "code", "execution_count": 9, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T09:59:59.501684Z", + "start_time": "2018-04-30T09:59:58.665452Z" + } + }, "outputs": [ { "name": "stdout", @@ -292,7 +332,12 @@ { "cell_type": "code", "execution_count": 10, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T09:59:59.505965Z", + "start_time": "2018-04-30T09:59:59.503129Z" + } + }, "outputs": [ { "data": { @@ -306,7 +351,9 @@ } ], "source": [ - "\"GPR lengthscales = {0:.4f}, VGP lengthscales = {1:.4f}\".format(float(gpr.kern.lengthscales.read_value()), float(vgp.kern.lengthscales.read_value()))" + "\"GPR lengthscales = {:.4f}, VGP lengthscales = {:.4f}\".format(\n", + " gpr.kern.lengthscales.read_value()[()],\n", + " vgp.kern.lengthscales.read_value()[()])" ] }, { @@ -320,7 +367,12 @@ { "cell_type": "code", "execution_count": 11, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T09:59:59.946016Z", + "start_time": "2018-04-30T09:59:59.507143Z" + } + }, "outputs": [], "source": [ "svgp = SVGP(X, Y, make_matern_kernel(), gpflow.likelihoods.Gaussian(), Z=Z)\n", @@ -340,7 +392,12 @@ { "cell_type": "code", "execution_count": 12, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T09:59:59.994121Z", + "start_time": "2018-04-30T09:59:59.947360Z" + } + }, "outputs": [ { "data": { @@ -367,7 +424,12 @@ { "cell_type": "code", "execution_count": 13, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T10:00:00.052847Z", + "start_time": "2018-04-30T09:59:59.995545Z" + } + }, "outputs": [ { "data": { @@ -394,7 +456,12 @@ { "cell_type": "code", "execution_count": 14, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T10:00:00.691979Z", + "start_time": "2018-04-30T10:00:00.053823Z" + } + }, "outputs": [ { "data": { @@ -423,7 +490,12 @@ { "cell_type": "code", "execution_count": 15, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T10:00:01.667254Z", + "start_time": "2018-04-30T10:00:00.692973Z" + } + }, "outputs": [], "source": [ "svgp = SVGP(X, Y, make_matern_kernel(), gpflow.likelihoods.Gaussian(), Z=Z, minibatch_size=50)\n", @@ -442,7 +514,12 @@ { "cell_type": "code", "execution_count": 16, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T10:00:03.742541Z", + "start_time": "2018-04-30T10:00:01.668715Z" + } + }, "outputs": [ { "data": { @@ -469,13 +546,18 @@ "\n", "Compared with doing SVGP with ordinary gradients with minibatches, the natural gradient optimizer is much faster in the Gaussian case. \n", "\n", - "Here we'll do hyperparameter learning together optimization of the variational parameters, comparing the interleaved nat grad approach and using ordinary gradients for the hyperparameters and variational parameters jointly " + "Here we'll do hyperparameter learning together with optimization of the variational parameters, comparing the interleaved nat grad approach and using ordinary gradients for the hyperparameters and variational parameters jointly." ] }, { "cell_type": "code", "execution_count": 17, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2018-04-30T10:00:07.102187Z", + "start_time": "2018-04-30T10:00:03.743831Z" + } + }, "outputs": [], "source": [ "gpflow.reset_default_graph_and_session()" @@ -658,7 +740,7 @@ "metadata": {}, "source": [ "We can also choose to run natural gradients in another parameterization. The \n", - "sensible choice might is the model parameters (q_mu, q_sqrt), which is already in gpflow." + "sensible choice is the model parameters (q_mu, q_sqrt), which is already in gpflow." ] }, { @@ -710,7 +792,7 @@ "metadata": {}, "source": [ "With sufficiently small steps, it shouldn't make a difference which transform is used, but for large \n", - "step this can make a difference in practice." + "steps this can make a difference in practice." ] } ], @@ -730,7 +812,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.5.2" } }, "nbformat": 4, diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py index c541e1031..b3d822e83 100644 --- a/tests/test_notebooks.py +++ b/tests/test_notebooks.py @@ -26,26 +26,26 @@ from nbconvert.preprocessors.execute import CellExecutionError NOTEBOOK_FILES = [ + "advanced_usage.ipynb", "classification.ipynb", "coreg_demo.ipynb", + "FITCvsVFE.ipynb", + "GPLVM.ipynb", "kernels.ipynb", "mcmc.ipynb", - "ordinal.ipynb", - "Sanity_check.ipynb", - "settings.ipynb", - "SGPR_notes.ipynb", - "vgp_notes.ipynb", "models.ipynb", + "monitor-tensorboard.ipynb", "multiclass.ipynb", - "classification.ipynb", "multioutput.ipynb", - "monitor-tensorboard.ipynb", - "FITCvsVFE.ipynb", "natural_gradients.ipynb", - "upper_bound.ipynb", - "GPLVM.ipynb", + "ordinal.ipynb", "regression.ipynb", + "Sanity_check.ipynb", + "settings.ipynb", + "SGPR_notes.ipynb", "svi_test.ipynb", + "upper_bound.ipynb", + "vgp_notes.ipynb", ] BLACKLISTED_NOTEBOOKS = [