From 7a12f89f0731f4a302795016cdc2d3394167f016 Mon Sep 17 00:00:00 2001 From: ouyang jin Date: Fri, 6 Aug 2021 15:20:15 +0800 Subject: [PATCH] Fix embedding variable tutorial notebook format error --- .../embedding_variable_tutorial.ipynb | 904 +++++++++--------- 1 file changed, 446 insertions(+), 458 deletions(-) diff --git a/docs/tutorials/embedding_variable_tutorial.ipynb b/docs/tutorials/embedding_variable_tutorial.ipynb index cff7327ad..a903923a3 100644 --- a/docs/tutorials/embedding_variable_tutorial.ipynb +++ b/docs/tutorials/embedding_variable_tutorial.ipynb @@ -1,461 +1,449 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Tce3stUlHN0L" - }, - "source": [ - "##### Copyright 2021 The TensorFlow Authors." - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "05cf26dc9b18" + }, + "source": [ + "# TFRA - EmbeddingVariable Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dc653218a95f" + }, + "source": [ + "## Eager Mode" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ac41b69489ae" + }, + "source": [ + "### 1. Import tensorflow and tensorflow_recommenders_addons" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0ec04b6192ff" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow.keras.layers import Dense\n", + "\n", + "import tensorflow_datasets as tfds\n", + "import tensorflow_recommenders_addons as tfra" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d85a1d4753ff" + }, + "source": [ + "### 2. Preparing the dataset\n", + "This tutorial uses movies reviews provided by the MovieLens 100K dataset, a classic dataset from the GroupLens research group at the University of Minnesota. In order to facilitate processing, we convert the data type of movie_id and user_id into int64." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "99940220fef6" + }, + "outputs": [], + "source": [ + "ratings = tfds.load(\"movielens/100k-ratings\", split=\"train\")\n", + "\n", + "ratings = ratings.map(lambda x: {\n", + " \"movie_id\": tf.strings.to_number(x[\"movie_id\"], tf.int64),\n", + " \"user_id\": tf.strings.to_number(x[\"user_id\"], tf.int64),\n", + " \"user_rating\": x[\"user_rating\"]\n", + "})\n", + "\n", + "tf.random.set_seed(2021)\n", + "shuffled = ratings.shuffle(100_000, seed=2021, reshuffle_each_iteration=False)\n", + "\n", + "dataset_train = shuffled.take(100_000).batch(256)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6d237ed66cb7" + }, + "source": [ + "### 3. Implementing a model\n", + "The NCFModel we implemented is very similar to the conventional one, and the main difference lies in the embedding layer. We specify the variable of embedding layer by tfra.embedding_variable.EmbeddingVariable." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "3887a87a2372" + }, + "outputs": [], + "source": [ + "class NCFModel(tf.keras.Model):\n", + "\n", + " def __init__(self):\n", + " super(NCFModel, self).__init__()\n", + " self.embedding_size = 32\n", + " self.d0 = Dense(\n", + " 256,\n", + " activation='relu',\n", + " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", + " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", + " self.d1 = Dense(\n", + " 64,\n", + " activation='relu',\n", + " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", + " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", + " self.d2 = Dense(\n", + " 1,\n", + " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", + " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", + " self.user_embeddings = tfra.embedding_variable.EmbeddingVariable(\n", + " name=\"user_dynamic_embeddings\",\n", + " embedding_dim=self.embedding_size,\n", + " initializer=tf.keras.initializers.RandomNormal(-1, 1))\n", + " self.movie_embeddings = tfra.embedding_variable.EmbeddingVariable(\n", + " name=\"moive_dynamic_embeddings\",\n", + " embedding_dim=self.embedding_size,\n", + " initializer=tf.keras.initializers.RandomNormal(-1, 1))\n", + " self.loss = tf.keras.losses.MeanSquaredError()\n", + "\n", + " def call(self, batch):\n", + " movie_id = batch[\"movie_id\"]\n", + " user_id = batch[\"user_id\"]\n", + " rating = batch[\"user_rating\"]\n", + "\n", + " user_id_val, user_id_idx = tf.unique(user_id=True)\n", + " user_id_weights = tf.nn.embedding_lookup(\n", + " params=self.user_embeddings,\n", + " ids=user_id_val,\n", + " name=\"user-id-weights\")\n", + " user_id_weights = tf.gather(user_id_weights, user_id_idx)\n", + "\n", + " movie_id_val, movie_id_idx = tf.unique(movie_id)\n", + " movie_id_weights = tf.nn.embedding_lookup(\n", + " params=self.movie_embeddings,\n", + " ids=movie_id_val,\n", + " name=\"movie-id-weights\")\n", + " movie_id_weights = tf.gather(movie_id_weights, movie_id_idx)\n", + "\n", + " embeddings = tf.concat([user_id_weights, movie_id_weights], axis=1)\n", + " dnn = self.d0(embeddings)\n", + " dnn = self.d1(dnn)\n", + " dnn = self.d2(dnn)\n", + " out = tf.reshape(dnn, shape=[-1])\n", + " loss = self.loss(rating, out)\n", + " return loss\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cb07cc348aea" + }, + "source": [ + "Let's instantiate the model, and specify the optimizer.\n", + "TF Optimizer(GradientDescent/Adagrad/Adam) are wrapped by tfra, use tfra.embedding_variable.Optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "e294e730c794" + }, + "outputs": [], + "source": [ + "model = NCFModel()\n", + "optimizer = tfra.embedding_variable.AdamOptimizer(learning_rate=0.001)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6376d2a17bb5" + }, + "source": [ + "### 4. Training the model\n", + "After defining the model, we can train the model and observe the change of loss." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "9288e8ec77cb" + }, + "outputs": [], + "source": [ + "def train(epoch=1):\n", + " for i in range(epoch):\n", + " total_loss = np.array([])\n", + " for (_, batch) in enumerate(dataset_train):\n", + " with tf.GradientTape() as tape:\n", + " loss = model(batch)\n", + " total_loss = np.append(total_loss, loss)\n", + " grads = tape.gradient(loss, model.trainable_variables)\n", + " optimizer.apply_gradients(zip(grads, model.trainable_variables))\n", + " print(\"epoch:\", i, \"mean_squared_error:\", np.mean(total_loss))\n", + "\n", + "if __name__==\"__main__\":\n", + " train(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6fb127e74992" + }, + "source": [ + "### Graph Mode" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a6495a6f963f" + }, + "source": [ + "#### 1. Import tensorflow and tensorflow_recommenders_addons\n", + "disable_eager_execution" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "5218cb658f44" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow.keras.layers import Dense\n", + "\n", + "import tensorflow_datasets as tfds\n", + "import tensorflow_recommenders_addons as tfra\n", + "\n", + "tf.compat.v1.disable_eager_execution()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3d349f3bfc07" + }, + "source": [ + "#### 2. Preparing the dataset\n", + "This tutorial uses movies reviews provided by the MovieLens 100K dataset, a classic dataset from the GroupLens research group at the University of Minnesota. In order to facilitate processing, we convert the data type of movie_id and user_id into int64." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "b19e0f30dfb1" + }, + "outputs": [], + "source": [ + "ratings = tfds.load(\"movielens/100k-ratings\", split=\"train\")\n", + "\n", + "ratings = ratings.map(lambda x: {\n", + " \"movie_id\": tf.strings.to_number(x[\"movie_id\"], tf.int64),\n", + " \"user_id\": tf.strings.to_number(x[\"user_id\"], tf.int64),\n", + " \"user_rating\": x[\"user_rating\"]\n", + "})\n", + "\n", + "tf.random.set_seed(2021)\n", + "shuffled = ratings.shuffle(100_000, seed=2021, reshuffle_each_iteration=False)\n", + "\n", + "dataset_train = shuffled.take(100_000).batch(256)\n", + "iterator = tf.compat.v1.data.make_one_shot_iterator(dataset_train)\n", + "dataset_train = iterator.get_next()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d0031b8e091a" + }, + "source": [ + "### 3. Implementing a model\n", + "The NCFModel we implemented is very similar to the conventional one, and the main difference lies in the embedding layer. We specify the variable of embedding layer by tfra.embedding_variable.EmbeddingVariable." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "1b5619f1a319" + }, + "outputs": [], + "source": [ + "class NCFModel(tf.keras.Model):\n", + "\n", + " def __init__(self):\n", + " super(NCFModel, self).__init__()\n", + " self.embedding_size = 32\n", + " self.d0 = Dense(\n", + " 256,\n", + " activation='relu',\n", + " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", + " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", + " self.d1 = Dense(\n", + " 64,\n", + " activation='relu',\n", + " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", + " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", + " self.d2 = Dense(\n", + " 1,\n", + " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", + " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", + " self.user_embeddings = tfra.embedding_variable.EmbeddingVariable(\n", + " name=\"user_dynamic_embeddings\",\n", + " embedding_dim=self.embedding_size,\n", + " ktype=tf.int64,\n", + " initializer=tf.keras.initializers.RandomNormal(-1, 1))\n", + " self.movie_embeddings = tfra.embedding_variable.EmbeddingVariable(\n", + " name=\"moive_dynamic_embeddings\",\n", + " embedding_dim=self.embedding_size,\n", + " ktype=tf.int64,\n", + " initializer=tf.keras.initializers.RandomNormal(-1, 1))\n", + " self.loss = tf.keras.losses.MeanSquaredError()\n", + "\n", + " def call(self, batch):\n", + " movie_id = batch[\"movie_id\"]\n", + " user_id = batch[\"user_id\"]\n", + " rating = batch[\"user_rating\"]\n", + "\n", + " user_id_val, user_id_idx = np.unique(user_id, return_inverse=True)\n", + " user_id_weights = tf.nn.embedding_lookup(\n", + " params=self.user_embeddings,\n", + " ids=user_id_val,\n", + " name=\"user-id-weights\")\n", + " user_id_weights = tf.gather(user_id_weights, user_id_idx)\n", + "\n", + " movie_id_val, movie_id_idx = np.unique(movie_id, return_inverse=True)\n", + " movie_id_weights = tf.nn.embedding_lookup(\n", + " params=self.movie_embeddings,\n", + " ids=movie_id_val,\n", + " name=\"movie-id-weights\")\n", + " movie_id_weights = tf.gather(movie_id_weights, movie_id_idx)\n", + "\n", + " embeddings = tf.concat([user_id_weights, movie_id_weights], axis=1)\n", + " dnn = self.d0(embeddings)\n", + " dnn = self.d1(dnn)\n", + " dnn = self.d2(dnn)\n", + " out = tf.reshape(dnn, shape=[-1])\n", + " loss = self.loss(rating, out)\n", + " return loss\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0c27d0bbbd00" + }, + "source": [ + "Let's instantiate the model, and specify the optimizer.\n", + "TF Optimizer(GradientDescent/Adagrad/Adam) are wrapped by tfra, use tfra.embedding_variable.Optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "4df6f03efbda" + }, + "outputs": [], + "source": [ + "model = NCFModel()\n", + "loss = model(dataset_train)\n", + "optimizer = tfra.embedding_variable.AdamOptimizer(learning_rate=0.001)\n", + "train_op = optimizer.minimize(loss)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b8d8074c46c2" + }, + "source": [ + "### 4. Training the model\n", + "After defining the model, we can train the model and observe the change of loss." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "95883928d2a3" + }, + "outputs": [], + "source": [ + "epoch = 10\n", + "with tf.compat.v1.Session() as sess:\n", + " for i in range(epoch):\n", + " loss_t, _ = sess.run([loss, train_op])\n", + " print(\"epoch:\", i, \"loss:\", loss_t)" + ] + } + ], + "metadata": { + "colab": { + "name": "embedding_variable_tutorial.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "tuOe1ymfHZPu" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "05cf26dc9b18" - }, - "source": [ - "# TFRA - EmbeddingVariable Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dc653218a95f" - }, - "source": [ - "## Eager Mode" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ac41b69489ae" - }, - "source": [ - "### 1. Import tensorflow and tensorflow_recommenders_addons" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "0ec04b6192ff" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import tensorflow as tf\n", - "from tensorflow.keras.layers import Dense\n", - "\n", - "import tensorflow_datasets as tfds\n", - "import tensorflow_recommenders_addons as tfra" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "d85a1d4753ff" - }, - "source": [ - "### 2. Preparing the dataset\n", - "This tutorial uses movies reviews provided by the MovieLens 100K dataset, a classic dataset from the GroupLens research group at the University of Minnesota. In order to facilitate processing, we convert the data type of movie_id and user_id into int64." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "99940220fef6" - }, - "outputs": [], - "source": [ - "ratings = tfds.load(\"movielens/100k-ratings\", split=\"train\")\n", - "\n", - "ratings = ratings.map(lambda x: {\n", - " \"movie_id\": tf.strings.to_number(x[\"movie_id\"], tf.int64),\n", - " \"user_id\": tf.strings.to_number(x[\"user_id\"], tf.int64),\n", - " \"user_rating\": x[\"user_rating\"]\n", - "})\n", - "\n", - "tf.random.set_seed(2021)\n", - "shuffled = ratings.shuffle(100_000, seed=2021, reshuffle_each_iteration=False)\n", - "\n", - "dataset_train = shuffled.take(100_000).batch(256)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6d237ed66cb7" - }, - "source": [ - "### 3. Implementing a model\n", - "The NCFModel we implemented is very similar to the conventional one, and the main difference lies in the embedding layer. We specify the variable of embedding layer by tfra.embedding_variable.EmbeddingVariable." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "3887a87a2372" - }, - "outputs": [], - "source": [ - "class NCFModel(tf.keras.Model):\n", - "\n", - " def __init__(self):\n", - " super(NCFModel, self).__init__()\n", - " self.embedding_size = 32\n", - " self.d0 = Dense(\n", - " 256,\n", - " activation='relu',\n", - " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", - " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", - " self.d1 = Dense(\n", - " 64,\n", - " activation='relu',\n", - " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", - " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", - " self.d2 = Dense(\n", - " 1,\n", - " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", - " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", - " self.user_embeddings = tfra.embedding_variable.EmbeddingVariable(\n", - " name=\"user_dynamic_embeddings\",\n", - " embedding_dim=self.embedding_size,\n", - " initializer=tf.keras.initializers.RandomNormal(-1, 1))\n", - " self.movie_embeddings = tfra.embedding_variable.EmbeddingVariable(\n", - " name=\"moive_dynamic_embeddings\",\n", - " embedding_dim=self.embedding_size,\n", - " initializer=tf.keras.initializers.RandomNormal(-1, 1))\n", - " self.loss = tf.keras.losses.MeanSquaredError()\n", - "\n", - " def call(self, batch):\n", - " movie_id = batch[\"movie_id\"]\n", - " user_id = batch[\"user_id\"]\n", - " rating = batch[\"user_rating\"]\n", - "\n", - " user_id_val, user_id_idx = np.unique(user_id, return_inverse=True)\n", - " user_id_weights = tf.nn.embedding_lookup(\n", - " params=self.user_embeddings,\n", - " ids=user_id_val,\n", - " name=\"user-id-weights\")\n", - " user_id_weights = tf.gather(user_id_weights, user_id_idx)\n", - "\n", - " movie_id_val, movie_id_idx = np.unique(movie_id, return_inverse=True)\n", - " movie_id_weights = tf.nn.embedding_lookup(\n", - " params=self.movie_embeddings,\n", - " ids=movie_id_val,\n", - " name=\"movie-id-weights\")\n", - " movie_id_weights = tf.gather(movie_id_weights, movie_id_idx)\n", - "\n", - " embeddings = tf.concat([user_id_weights, movie_id_weights], axis=1)\n", - " dnn = self.d0(embeddings)\n", - " dnn = self.d1(dnn)\n", - " dnn = self.d2(dnn)\n", - " out = tf.reshape(dnn, shape=[-1])\n", - " loss = self.loss(rating, out)\n", - " return loss\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cb07cc348aea" - }, - "source": [ - "Let's instantiate the model, and specify the optimizer.\n", - "TF Optimizer(GradientDescent/Adagrad/Adam) are wrapped by tfra, use tfra.embedding_variable.Optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "e294e730c794" - }, - "outputs": [], - "source": [ - "model = NCFModel()\n", - "optimizer = tfra.embedding_variable.AdamOptimizer(learning_rate=0.001)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6376d2a17bb5" - }, - "source": [ - "### 4. Training the model\n", - "After defining the model, we can train the model and observe the change of loss." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "9288e8ec77cb" - }, - "outputs": [], - "source": [ - "def train(epoch=1):\n", - " for i in range(epoch):\n", - " total_loss = np.array([])\n", - " for (_, batch) in enumerate(dataset_train):\n", - " with tf.GradientTape() as tape:\n", - " loss = model(batch)\n", - " total_loss = np.append(total_loss, loss)\n", - " grads = tape.gradient(loss, model.trainable_variables)\n", - " optimizer.apply_gradients(zip(grads, model.trainable_variables))\n", - " print(\"epoch:\", i, \"mean_squared_error:\", np.mean(total_loss))\n", - "\n", - "if __name__==\"__main__\":\n", - " train(10)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6fb127e74992" - }, - "source": [ - "### Graph Mode" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "a6495a6f963f" - }, - "source": [ - "#### 1. Import tensorflow and tensorflow_recommenders_addons" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "5218cb658f44" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import tensorflow as tf\n", - "from tensorflow.keras.layers import Dense\n", - "\n", - "import tensorflow_datasets as tfds\n", - "import tensorflow_recommenders_addons as tfra\n", - "\n", - "tf.compat.v1.disable_eager_execution()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3d349f3bfc07" - }, - "source": [ - "#### 2. Preparing the dataset\n", - "This tutorial uses movies reviews provided by the MovieLens 100K dataset, a classic dataset from the GroupLens research group at the University of Minnesota. In order to facilitate processing, we convert the data type of movie_id and user_id into int64." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "b19e0f30dfb1" - }, - "outputs": [], - "source": [ - "ratings = tfds.load(\"movielens/100k-ratings\", split=\"train\")\n", - "\n", - "ratings = ratings.map(lambda x: {\n", - " \"movie_id\": tf.strings.to_number(x[\"movie_id\"], tf.int64),\n", - " \"user_id\": tf.strings.to_number(x[\"user_id\"], tf.int64),\n", - " \"user_rating\": x[\"user_rating\"]\n", - "})\n", - "\n", - "tf.random.set_seed(2021)\n", - "shuffled = ratings.shuffle(100_000, seed=2021, reshuffle_each_iteration=False)\n", - "\n", - "dataset_train = shuffled.take(100_000).batch(256)\n", - "iterator = tf.compat.v1.data.make_one_shot_iterator(dataset_train)\n", - "dataset_train = iterator.get_next()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "d0031b8e091a" - }, - "source": [ - "### 3. Implementing a model\n", - "The NCFModel we implemented is very similar to the conventional one, and the main difference lies in the embedding layer. We specify the variable of embedding layer by tfra.embedding_variable.EmbeddingVariable." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "1b5619f1a319" - }, - "outputs": [], - "source": [ - "class NCFModel(tf.keras.Model):\n", - "\n", - " def __init__(self):\n", - " super(NCFModel, self).__init__()\n", - " self.embedding_size = 32\n", - " self.d0 = Dense(\n", - " 256,\n", - " activation='relu',\n", - " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", - " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", - " self.d1 = Dense(\n", - " 64,\n", - " activation='relu',\n", - " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", - " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", - " self.d2 = Dense(\n", - " 1,\n", - " kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),\n", - " bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))\n", - " self.user_embeddings = tfra.embedding_variable.EmbeddingVariable(\n", - " name=\"user_dynamic_embeddings\",\n", - " embedding_dim=self.embedding_size,\n", - " ktype=tf.int64,\n", - " initializer=tf.keras.initializers.RandomNormal(-1, 1))\n", - " self.movie_embeddings = tfra.embedding_variable.EmbeddingVariable(\n", - " name=\"moive_dynamic_embeddings\",\n", - " embedding_dim=self.embedding_size,\n", - " ktype=tf.int64,\n", - " initializer=tf.keras.initializers.RandomNormal(-1, 1))\n", - " self.loss = tf.keras.losses.MeanSquaredError()\n", - "\n", - " def call(self, batch):\n", - " movie_id = batch[\"movie_id\"]\n", - " user_id = batch[\"user_id\"]\n", - " rating = batch[\"user_rating\"]\n", - "\n", - " user_id_val, user_id_idx = tf.unique(user_id)\n", - " user_id_weights = tf.nn.embedding_lookup(\n", - " params=self.user_embeddings,\n", - " ids=user_id_val,\n", - " name=\"user-id-weights\")\n", - " user_id_weights = tf.gather(user_id_weights, user_id_idx)\n", - "\n", - " movie_id_val, movie_id_idx = tf.unique(movie_id)\n", - " movie_id_weights = tf.nn.embedding_lookup(\n", - " params=self.movie_embeddings,\n", - " ids=movie_id_val,\n", - " name=\"movie-id-weights\")\n", - " movie_id_weights = tf.gather(movie_id_weights, movie_id_idx)\n", - "\n", - " embeddings = tf.concat([user_id_weights, movie_id_weights], axis=1)\n", - " dnn = self.d0(embeddings)\n", - " dnn = self.d1(dnn)\n", - " dnn = self.d2(dnn)\n", - " out = tf.reshape(dnn, shape=[-1])\n", - " loss = self.loss(rating, out)\n", - " return loss\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0c27d0bbbd00" - }, - "source": [ - "Let's instantiate the model, and specify the optimizer.\n", - "TF Optimizer(GradientDescent/Adagrad/Adam) are wrapped by tfra, use tfra.embedding_variable.Optimizer" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "4df6f03efbda" - }, - "outputs": [], - "source": [ - "model = NCFModel()\n", - "loss = model(dataset_train)\n", - "optimizer = tfra.embedding_variable.AdamOptimizer(learning_rate=0.001)\n", - "train_op = optimizer.minimize(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "b8d8074c46c2" - }, - "source": [ - "### 4. Training the model\n", - "After defining the model, we can train the model and observe the change of loss." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "95883928d2a3" - }, - "outputs": [], - "source": [ - "epoch = 10\n", - "with tf.compat.v1.Session() as sess:\n", - " for i in range(epoch):\n", - " loss_t, _ = sess.run([loss, train_op])\n", - " print(\"epoch:\", i, \"loss:\", loss_t)" - ] - } - ], - "metadata": { - "colab": { - "name": "embedding_variable_tutorial.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 1 + "nbformat": 4, + "nbformat_minor": 0 }