From 1082162581070a2bbe3720cf9c8d49a728dc2776 Mon Sep 17 00:00:00 2001 From: Christoph Deil Date: Wed, 4 Dec 2019 19:59:23 +0100 Subject: [PATCH] Remove .ipynb_checkpoints temp folder --- ...earning Edges and Corners-checkpoint.ipynb | 491 ---- ... that Understand Language-checkpoint.ipynb | 585 ----- ... Predicting the Next Word-checkpoint.ipynb | 619 ----- ...A Deep Learning Framework-checkpoint.ipynb | 2334 ----------------- ...loding Gradients Examples-checkpoint.ipynb | 140 - ...to Write Like Shakespeare-checkpoint.ipynb | 1034 -------- ...to Write Like Shakespeare-checkpoint.ipynb | 950 ------- ...p Learning on Unseen Data-checkpoint.ipynb | 1047 -------- ...ntro to Neural Prediction-checkpoint.ipynb | 572 ---- ... Intro to Neural Learning-checkpoint.ipynb | 1752 ------------- ...ultiple Weights at a Time-checkpoint.ipynb | 464 ---- ...First DEEP Neural Network-checkpoint.ipynb | 731 ------ ...Signal and Ignoring Noise-checkpoint.ipynb | 485 ---- ... - Modeling Probabilities-checkpoint.ipynb | 155 -- 14 files changed, 11359 deletions(-) delete mode 100644 .ipynb_checkpoints/Chapter10 - Intro to Convolutional Neural Networks - Learning Edges and Corners-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter11 - Intro to Word Embeddings - Neural Networks that Understand Language-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter12 - Intro to Recurrence - Predicting the Next Word-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter13 - Intro to Automatic Differentiation - Let's Build A Deep Learning Framework-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter14 - Exploding Gradients Examples-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter14 - Intro to LSTMs - Learn to Write Like Shakespeare-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter14 - Intro to LSTMs - Part 2 - Learn to Write Like Shakespeare-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter15 - Intro to Federated Learning - Deep Learning on Unseen Data-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter3 - Forward Propagation - Intro to Neural Prediction-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter4 - Gradient Descent - Intro to Neural Learning-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter5 - Generalizing Gradient Descent - Learning Multiple Weights at a Time-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter6 - Intro to Backpropagation - Building Your First DEEP Neural Network-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter8 - Intro to Regularization - Learning Signal and Ignoring Noise-checkpoint.ipynb delete mode 100644 .ipynb_checkpoints/Chapter9 - Intro to Activation Functions - Modeling Probabilities-checkpoint.ipynb diff --git a/.ipynb_checkpoints/Chapter10 - Intro to Convolutional Neural Networks - Learning Edges and Corners-checkpoint.ipynb b/.ipynb_checkpoints/Chapter10 - Intro to Convolutional Neural Networks - Learning Edges and Corners-checkpoint.ipynb deleted file mode 100644 index ab04fc5..0000000 --- a/.ipynb_checkpoints/Chapter10 - Intro to Convolutional Neural Networks - Learning Edges and Corners-checkpoint.ipynb +++ /dev/null @@ -1,491 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Upgrading our MNIST Network" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "I:0 Test-Acc:0.0288 Train-Acc:0.055\n", - "I:1 Test-Acc:0.0273 Train-Acc:0.037\n", - "I:2 Test-Acc:0.028 Train-Acc:0.037\n", - "I:3 Test-Acc:0.0292 Train-Acc:0.04\n", - "I:4 Test-Acc:0.0339 Train-Acc:0.046\n", - "I:5 Test-Acc:0.0478 Train-Acc:0.068\n", - "I:6 Test-Acc:0.076 Train-Acc:0.083\n", - "I:7 Test-Acc:0.1316 Train-Acc:0.096\n", - "I:8 Test-Acc:0.2137 Train-Acc:0.127\n", - "I:9 Test-Acc:0.2941 Train-Acc:0.148\n", - "I:10 Test-Acc:0.3563 Train-Acc:0.181\n", - "I:11 Test-Acc:0.4023 Train-Acc:0.209\n", - "I:12 Test-Acc:0.4358 Train-Acc:0.238\n", - "I:13 Test-Acc:0.4473 Train-Acc:0.286\n", - "I:14 Test-Acc:0.4389 Train-Acc:0.274\n", - "I:15 Test-Acc:0.3951 Train-Acc:0.257\n", - "I:16 Test-Acc:0.2222 Train-Acc:0.243\n", - "I:17 Test-Acc:0.0613 Train-Acc:0.112\n", - "I:18 Test-Acc:0.0266 Train-Acc:0.035\n", - "I:19 Test-Acc:0.0127 Train-Acc:0.026\n", - "I:20 Test-Acc:0.0133 Train-Acc:0.022\n", - "I:21 Test-Acc:0.0185 Train-Acc:0.038\n", - "I:22 Test-Acc:0.0363 Train-Acc:0.038\n", - "I:23 Test-Acc:0.0928 Train-Acc:0.067\n", - "I:24 Test-Acc:0.1994 Train-Acc:0.081\n", - "I:25 Test-Acc:0.3086 Train-Acc:0.154\n", - "I:26 Test-Acc:0.4276 Train-Acc:0.204\n", - "I:27 Test-Acc:0.5323 Train-Acc:0.256\n", - "I:28 Test-Acc:0.5919 Train-Acc:0.305\n", - "I:29 Test-Acc:0.6324 Train-Acc:0.341\n", - "I:30 Test-Acc:0.6608 Train-Acc:0.426\n", - "I:31 Test-Acc:0.6815 Train-Acc:0.439\n", - "I:32 Test-Acc:0.7048 Train-Acc:0.462\n", - "I:33 Test-Acc:0.7171 Train-Acc:0.484\n", - "I:34 Test-Acc:0.7313 Train-Acc:0.505\n", - "I:35 Test-Acc:0.7355 Train-Acc:0.53\n", - "I:36 Test-Acc:0.7417 Train-Acc:0.548\n", - "I:37 Test-Acc:0.747 Train-Acc:0.534\n", - "I:38 Test-Acc:0.7491 Train-Acc:0.55\n", - "I:39 Test-Acc:0.7459 Train-Acc:0.562\n", - "I:40 Test-Acc:0.7352 Train-Acc:0.54\n", - "I:41 Test-Acc:0.7082 Train-Acc:0.496\n", - "I:42 Test-Acc:0.6487 Train-Acc:0.456\n", - "I:43 Test-Acc:0.5209 Train-Acc:0.353\n", - "I:44 Test-Acc:0.3305 Train-Acc:0.234\n", - "I:45 Test-Acc:0.2052 Train-Acc:0.174\n", - "I:46 Test-Acc:0.2149 Train-Acc:0.136\n", - "I:47 Test-Acc:0.2679 Train-Acc:0.171\n", - "I:48 Test-Acc:0.3237 Train-Acc:0.172\n", - "I:49 Test-Acc:0.3581 Train-Acc:0.186\n", - "I:50 Test-Acc:0.4202 Train-Acc:0.21\n", - "I:51 Test-Acc:0.5165 Train-Acc:0.223\n", - "I:52 Test-Acc:0.6007 Train-Acc:0.262\n", - "I:53 Test-Acc:0.6476 Train-Acc:0.308\n", - "I:54 Test-Acc:0.676 Train-Acc:0.363\n", - "I:55 Test-Acc:0.696 Train-Acc:0.402\n", - "I:56 Test-Acc:0.7077 Train-Acc:0.434\n", - "I:57 Test-Acc:0.7204 Train-Acc:0.441\n", - "I:58 Test-Acc:0.7303 Train-Acc:0.475\n", - "I:59 Test-Acc:0.7359 Train-Acc:0.475\n", - "I:60 Test-Acc:0.7401 Train-Acc:0.525\n", - "I:61 Test-Acc:0.7493 Train-Acc:0.517\n", - "I:62 Test-Acc:0.7533 Train-Acc:0.517\n", - "I:63 Test-Acc:0.7606 Train-Acc:0.538\n", - "I:64 Test-Acc:0.7644 Train-Acc:0.554\n", - "I:65 Test-Acc:0.7724 Train-Acc:0.57\n", - "I:66 Test-Acc:0.7788 Train-Acc:0.586\n", - "I:67 Test-Acc:0.7855 Train-Acc:0.595\n", - "I:68 Test-Acc:0.7853 Train-Acc:0.591\n", - "I:69 Test-Acc:0.7925 Train-Acc:0.605\n", - "I:70 Test-Acc:0.7973 Train-Acc:0.64\n", - "I:71 Test-Acc:0.8013 Train-Acc:0.621\n", - "I:72 Test-Acc:0.8029 Train-Acc:0.626\n", - "I:73 Test-Acc:0.8092 Train-Acc:0.631\n", - "I:74 Test-Acc:0.8099 Train-Acc:0.638\n", - "I:75 Test-Acc:0.8156 Train-Acc:0.661\n", - "I:76 Test-Acc:0.8156 Train-Acc:0.639\n", - "I:77 Test-Acc:0.8184 Train-Acc:0.65\n", - "I:78 Test-Acc:0.8216 Train-Acc:0.67\n", - "I:79 Test-Acc:0.8246 Train-Acc:0.675\n", - "I:80 Test-Acc:0.8237 Train-Acc:0.666\n", - "I:81 Test-Acc:0.8273 Train-Acc:0.673\n", - "I:82 Test-Acc:0.8273 Train-Acc:0.704\n", - "I:83 Test-Acc:0.8314 Train-Acc:0.674\n", - "I:84 Test-Acc:0.8292 Train-Acc:0.686\n", - "I:85 Test-Acc:0.8335 Train-Acc:0.699\n", - "I:86 Test-Acc:0.8359 Train-Acc:0.694\n", - "I:87 Test-Acc:0.8375 Train-Acc:0.704\n", - "I:88 Test-Acc:0.8373 Train-Acc:0.697\n", - "I:89 Test-Acc:0.8398 Train-Acc:0.704\n", - "I:90 Test-Acc:0.8393 Train-Acc:0.687\n", - "I:91 Test-Acc:0.8436 Train-Acc:0.705\n", - "I:92 Test-Acc:0.8437 Train-Acc:0.711\n", - "I:93 Test-Acc:0.8446 Train-Acc:0.721\n", - "I:94 Test-Acc:0.845 Train-Acc:0.719\n", - "I:95 Test-Acc:0.8469 Train-Acc:0.724\n", - "I:96 Test-Acc:0.8476 Train-Acc:0.726\n", - "I:97 Test-Acc:0.848 Train-Acc:0.718\n", - "I:98 Test-Acc:0.8496 Train-Acc:0.719\n", - "I:99 Test-Acc:0.85 Train-Acc:0.73\n", - "I:100 Test-Acc:0.8511 Train-Acc:0.737\n", - "I:101 Test-Acc:0.8503 Train-Acc:0.73\n", - "I:102 Test-Acc:0.8504 Train-Acc:0.717\n", - "I:103 Test-Acc:0.8528 Train-Acc:0.74\n", - "I:104 Test-Acc:0.8532 Train-Acc:0.733\n", - "I:105 Test-Acc:0.8537 Train-Acc:0.73\n", - "I:106 Test-Acc:0.8568 Train-Acc:0.721\n", - "I:107 Test-Acc:0.857 Train-Acc:0.75\n", - "I:108 Test-Acc:0.8558 Train-Acc:0.731\n", - "I:109 Test-Acc:0.8578 Train-Acc:0.744\n", - "I:110 Test-Acc:0.8588 Train-Acc:0.754\n", - "I:111 Test-Acc:0.8579 Train-Acc:0.732\n", - "I:112 Test-Acc:0.8582 Train-Acc:0.747\n", - "I:113 Test-Acc:0.8593 Train-Acc:0.747\n", - "I:114 Test-Acc:0.8598 Train-Acc:0.751\n", - "I:115 Test-Acc:0.8603 Train-Acc:0.74\n", - "I:116 Test-Acc:0.86 Train-Acc:0.753\n", - "I:117 Test-Acc:0.8588 Train-Acc:0.746\n", - "I:118 Test-Acc:0.861 Train-Acc:0.741\n", - "I:119 Test-Acc:0.8616 Train-Acc:0.731\n", - "I:120 Test-Acc:0.8629 Train-Acc:0.753\n", - "I:121 Test-Acc:0.8609 Train-Acc:0.743\n", - "I:122 Test-Acc:0.8627 Train-Acc:0.752\n", - "I:123 Test-Acc:0.8646 Train-Acc:0.76\n", - "I:124 Test-Acc:0.8649 Train-Acc:0.766\n", - "I:125 Test-Acc:0.8659 Train-Acc:0.752\n", - "I:126 Test-Acc:0.868 Train-Acc:0.756\n", - "I:127 Test-Acc:0.8648 Train-Acc:0.767\n", - "I:128 Test-Acc:0.8662 Train-Acc:0.747\n", - "I:129 Test-Acc:0.8669 Train-Acc:0.753\n", - "I:130 Test-Acc:0.8694 Train-Acc:0.753\n", - "I:131 Test-Acc:0.8692 Train-Acc:0.76\n", - "I:132 Test-Acc:0.8658 Train-Acc:0.756\n", - "I:133 Test-Acc:0.8666 Train-Acc:0.769\n", - "I:134 Test-Acc:0.8692 Train-Acc:0.77\n", - "I:135 Test-Acc:0.8681 Train-Acc:0.757\n", - "I:136 Test-Acc:0.8705 Train-Acc:0.77\n", - "I:137 Test-Acc:0.8706 Train-Acc:0.77\n", - "I:138 Test-Acc:0.8684 Train-Acc:0.768\n", - "I:139 Test-Acc:0.8664 Train-Acc:0.774\n", - "I:140 Test-Acc:0.8666 Train-Acc:0.756\n", - "I:141 Test-Acc:0.8705 Train-Acc:0.783\n", - "I:142 Test-Acc:0.87 Train-Acc:0.775\n", - "I:143 Test-Acc:0.8729 Train-Acc:0.769\n", - "I:144 Test-Acc:0.8725 Train-Acc:0.776\n", - "I:145 Test-Acc:0.8721 Train-Acc:0.772\n", - "I:146 Test-Acc:0.8718 Train-Acc:0.765\n", - "I:147 Test-Acc:0.8746 Train-Acc:0.777\n", - "I:148 Test-Acc:0.8746 Train-Acc:0.77\n", - "I:149 Test-Acc:0.8734 Train-Acc:0.778\n", - "I:150 Test-Acc:0.873 Train-Acc:0.785\n", - "I:151 Test-Acc:0.8732 Train-Acc:0.76\n", - "I:152 Test-Acc:0.8727 Train-Acc:0.779\n", - "I:153 Test-Acc:0.8754 Train-Acc:0.772\n", - "I:154 Test-Acc:0.8729 Train-Acc:0.773\n", - "I:155 Test-Acc:0.8758 Train-Acc:0.784\n", - "I:156 Test-Acc:0.8732 Train-Acc:0.774\n", - "I:157 Test-Acc:0.8743 Train-Acc:0.782\n", - "I:158 Test-Acc:0.8762 Train-Acc:0.772\n", - "I:159 Test-Acc:0.8755 Train-Acc:0.79\n", - "I:160 Test-Acc:0.8751 Train-Acc:0.774\n", - "I:161 Test-Acc:0.8749 Train-Acc:0.782\n", - "I:162 Test-Acc:0.8744 Train-Acc:0.78\n", - "I:163 Test-Acc:0.8765 Train-Acc:0.782\n", - "I:164 Test-Acc:0.8738 Train-Acc:0.796\n", - "I:165 Test-Acc:0.8753 Train-Acc:0.798\n", - "I:166 Test-Acc:0.8767 Train-Acc:0.794\n", - "I:167 Test-Acc:0.8746 Train-Acc:0.784\n", - "I:168 Test-Acc:0.8769 Train-Acc:0.796\n", - "I:169 Test-Acc:0.8758 Train-Acc:0.789\n", - "I:170 Test-Acc:0.8764 Train-Acc:0.79\n", - "I:171 Test-Acc:0.873 Train-Acc:0.791\n", - "I:172 Test-Acc:0.8765 Train-Acc:0.797\n", - "I:173 Test-Acc:0.8772 Train-Acc:0.789\n", - "I:174 Test-Acc:0.8778 Train-Acc:0.781\n", - "I:175 Test-Acc:0.8758 Train-Acc:0.799\n", - "I:176 Test-Acc:0.8773 Train-Acc:0.785\n", - "I:177 Test-Acc:0.8766 Train-Acc:0.796\n", - "I:178 Test-Acc:0.8782 Train-Acc:0.803\n", - "I:179 Test-Acc:0.8789 Train-Acc:0.794\n", - "I:180 Test-Acc:0.8778 Train-Acc:0.794\n", - "I:181 Test-Acc:0.8778 Train-Acc:0.8\n", - "I:182 Test-Acc:0.8785 Train-Acc:0.791\n", - "I:183 Test-Acc:0.8777 Train-Acc:0.787\n", - "I:184 Test-Acc:0.8769 Train-Acc:0.781\n", - "I:185 Test-Acc:0.8765 Train-Acc:0.786\n", - "I:186 Test-Acc:0.8765 Train-Acc:0.793\n", - "I:187 Test-Acc:0.8785 Train-Acc:0.796\n", - "I:188 Test-Acc:0.879 Train-Acc:0.789\n", - "I:189 Test-Acc:0.8763 Train-Acc:0.79\n", - "I:190 Test-Acc:0.8774 Train-Acc:0.787\n", - "I:191 Test-Acc:0.8766 Train-Acc:0.782\n", - "I:192 Test-Acc:0.8803 Train-Acc:0.798\n", - "I:193 Test-Acc:0.8781 Train-Acc:0.789\n", - "I:194 Test-Acc:0.8795 Train-Acc:0.785\n", - "I:195 Test-Acc:0.8791 Train-Acc:0.807\n", - "I:196 Test-Acc:0.8778 Train-Acc:0.796\n", - "I:197 Test-Acc:0.8783 Train-Acc:0.801\n", - "I:198 Test-Acc:0.8778 Train-Acc:0.81\n", - "I:199 Test-Acc:0.8771 Train-Acc:0.784\n", - "I:200 Test-Acc:0.8776 Train-Acc:0.792\n", - "I:201 Test-Acc:0.8784 Train-Acc:0.794\n", - "I:202 Test-Acc:0.8787 Train-Acc:0.795\n", - "I:203 Test-Acc:0.8803 Train-Acc:0.781\n", - "I:204 Test-Acc:0.8798 Train-Acc:0.804\n", - "I:205 Test-Acc:0.8779 Train-Acc:0.779\n", - "I:206 Test-Acc:0.8788 Train-Acc:0.792\n", - "I:207 Test-Acc:0.8764 Train-Acc:0.793\n", - "I:208 Test-Acc:0.8792 Train-Acc:0.792\n", - "I:209 Test-Acc:0.8798 Train-Acc:0.803\n", - "I:210 Test-Acc:0.8788 Train-Acc:0.804\n", - "I:211 Test-Acc:0.8793 Train-Acc:0.797\n", - "I:212 Test-Acc:0.8764 Train-Acc:0.791\n", - "I:213 Test-Acc:0.8801 Train-Acc:0.801\n", - "I:214 Test-Acc:0.8814 Train-Acc:0.799\n", - "I:215 Test-Acc:0.8806 Train-Acc:0.79\n", - "I:216 Test-Acc:0.8799 Train-Acc:0.8\n", - "I:217 Test-Acc:0.8803 Train-Acc:0.802\n", - "I:218 Test-Acc:0.8782 Train-Acc:0.807\n", - "I:219 Test-Acc:0.8818 Train-Acc:0.797\n", - "I:220 Test-Acc:0.8793 Train-Acc:0.799\n", - "I:221 Test-Acc:0.8789 Train-Acc:0.815\n", - "I:222 Test-Acc:0.8791 Train-Acc:0.816\n", - "I:223 Test-Acc:0.8793 Train-Acc:0.809\n", - "I:224 Test-Acc:0.8814 Train-Acc:0.795\n", - "I:225 Test-Acc:0.8798 Train-Acc:0.799\n", - "I:226 Test-Acc:0.8805 Train-Acc:0.806\n", - "I:227 Test-Acc:0.88 Train-Acc:0.808\n", - "I:228 Test-Acc:0.8782 Train-Acc:0.801\n", - "I:229 Test-Acc:0.8802 Train-Acc:0.814\n", - "I:230 Test-Acc:0.8807 Train-Acc:0.8\n", - "I:231 Test-Acc:0.8809 Train-Acc:0.798\n", - "I:232 Test-Acc:0.8805 Train-Acc:0.82\n", - "I:233 Test-Acc:0.8795 Train-Acc:0.794\n", - "I:234 Test-Acc:0.8807 Train-Acc:0.806\n", - "I:235 Test-Acc:0.8806 Train-Acc:0.808\n", - "I:236 Test-Acc:0.8787 Train-Acc:0.802\n", - "I:237 Test-Acc:0.8796 Train-Acc:0.81\n", - "I:238 Test-Acc:0.8766 Train-Acc:0.805\n", - "I:239 Test-Acc:0.8781 Train-Acc:0.792\n", - "I:240 Test-Acc:0.8787 Train-Acc:0.809\n", - "I:241 Test-Acc:0.8762 Train-Acc:0.802\n", - "I:242 Test-Acc:0.8775 Train-Acc:0.811\n", - "I:243 Test-Acc:0.8804 Train-Acc:0.814\n", - "I:244 Test-Acc:0.8794 Train-Acc:0.804\n", - "I:245 Test-Acc:0.8788 Train-Acc:0.801\n", - "I:246 Test-Acc:0.8777 Train-Acc:0.795\n", - "I:247 Test-Acc:0.8785 Train-Acc:0.808\n", - "I:248 Test-Acc:0.8788 Train-Acc:0.803\n", - "I:249 Test-Acc:0.8773 Train-Acc:0.813\n", - "I:250 Test-Acc:0.8786 Train-Acc:0.808\n", - "I:251 Test-Acc:0.8787 Train-Acc:0.803\n", - "I:252 Test-Acc:0.8789 Train-Acc:0.812\n", - "I:253 Test-Acc:0.8792 Train-Acc:0.804\n", - "I:254 Test-Acc:0.8779 Train-Acc:0.815\n", - "I:255 Test-Acc:0.8796 Train-Acc:0.811\n", - "I:256 Test-Acc:0.8798 Train-Acc:0.806\n", - "I:257 Test-Acc:0.88 Train-Acc:0.803\n", - "I:258 Test-Acc:0.8776 Train-Acc:0.795\n", - "I:259 Test-Acc:0.8798 Train-Acc:0.803\n", - "I:260 Test-Acc:0.8799 Train-Acc:0.805\n", - "I:261 Test-Acc:0.8789 Train-Acc:0.807\n", - "I:262 Test-Acc:0.8784 Train-Acc:0.804\n", - "I:263 Test-Acc:0.8792 Train-Acc:0.806\n", - "I:264 Test-Acc:0.8777 Train-Acc:0.796\n", - "I:265 Test-Acc:0.8785 Train-Acc:0.821\n", - "I:266 Test-Acc:0.8794 Train-Acc:0.81\n", - "I:267 Test-Acc:0.8783 Train-Acc:0.816\n", - "I:268 Test-Acc:0.8777 Train-Acc:0.812\n", - "I:269 Test-Acc:0.8791 Train-Acc:0.812\n", - "I:270 Test-Acc:0.878 Train-Acc:0.813\n", - "I:271 Test-Acc:0.8784 Train-Acc:0.82\n", - "I:272 Test-Acc:0.8792 Train-Acc:0.821\n", - "I:273 Test-Acc:0.8781 Train-Acc:0.823\n", - "I:274 Test-Acc:0.8788 Train-Acc:0.816\n", - "I:275 Test-Acc:0.8793 Train-Acc:0.82\n", - "I:276 Test-Acc:0.8781 Train-Acc:0.829\n", - "I:277 Test-Acc:0.8795 Train-Acc:0.809\n", - "I:278 Test-Acc:0.875 Train-Acc:0.806\n", - "I:279 Test-Acc:0.8795 Train-Acc:0.813\n", - "I:280 Test-Acc:0.88 Train-Acc:0.816\n", - "I:281 Test-Acc:0.8796 Train-Acc:0.819\n", - "I:282 Test-Acc:0.8802 Train-Acc:0.809\n", - "I:283 Test-Acc:0.8804 Train-Acc:0.811\n", - "I:284 Test-Acc:0.8779 Train-Acc:0.808\n", - "I:285 Test-Acc:0.8816 Train-Acc:0.82\n", - "I:286 Test-Acc:0.8792 Train-Acc:0.822\n", - "I:287 Test-Acc:0.8791 Train-Acc:0.817\n", - "I:288 Test-Acc:0.8769 Train-Acc:0.814\n", - "I:289 Test-Acc:0.8785 Train-Acc:0.807\n", - "I:290 Test-Acc:0.8778 Train-Acc:0.817\n", - "I:291 Test-Acc:0.8794 Train-Acc:0.82\n", - "I:292 Test-Acc:0.8804 Train-Acc:0.824\n", - "I:293 Test-Acc:0.8779 Train-Acc:0.812\n", - "I:294 Test-Acc:0.8784 Train-Acc:0.816\n", - "I:295 Test-Acc:0.877 Train-Acc:0.817\n", - "I:296 Test-Acc:0.8767 Train-Acc:0.826\n", - "I:297 Test-Acc:0.8774 Train-Acc:0.816\n", - "I:298 Test-Acc:0.8774 Train-Acc:0.804\n", - "I:299 Test-Acc:0.8774 Train-Acc:0.814" - ] - } - ], - "source": [ - "import numpy as np, sys\n", - "np.random.seed(1)\n", - "\n", - "from keras.datasets import mnist\n", - "\n", - "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", - "\n", - "images, labels = (x_train[0:1000].reshape(1000,28*28) / 255,\n", - " y_train[0:1000])\n", - "\n", - "\n", - "one_hot_labels = np.zeros((len(labels),10))\n", - "for i,l in enumerate(labels):\n", - " one_hot_labels[i][l] = 1\n", - "labels = one_hot_labels\n", - "\n", - "test_images = x_test.reshape(len(x_test),28*28) / 255\n", - "test_labels = np.zeros((len(y_test),10))\n", - "for i,l in enumerate(y_test):\n", - " test_labels[i][l] = 1\n", - "\n", - "def tanh(x):\n", - " return np.tanh(x)\n", - "\n", - "def tanh2deriv(output):\n", - " return 1 - (output ** 2)\n", - "\n", - "def softmax(x):\n", - " temp = np.exp(x)\n", - " return temp / np.sum(temp, axis=1, keepdims=True)\n", - "\n", - "alpha, iterations = (2, 300)\n", - "pixels_per_image, num_labels = (784, 10)\n", - "batch_size = 128\n", - "\n", - "input_rows = 28\n", - "input_cols = 28\n", - "\n", - "kernel_rows = 3\n", - "kernel_cols = 3\n", - "num_kernels = 16\n", - "\n", - "hidden_size = ((input_rows - kernel_rows) * \n", - " (input_cols - kernel_cols)) * num_kernels\n", - "\n", - "# weights_0_1 = 0.02*np.random.random((pixels_per_image,hidden_size))-0.01\n", - "kernels = 0.02*np.random.random((kernel_rows*kernel_cols,\n", - " num_kernels))-0.01\n", - "\n", - "weights_1_2 = 0.2*np.random.random((hidden_size,\n", - " num_labels)) - 0.1\n", - "\n", - "\n", - "\n", - "def get_image_section(layer,row_from, row_to, col_from, col_to):\n", - " section = layer[:,row_from:row_to,col_from:col_to]\n", - " return section.reshape(-1,1,row_to-row_from, col_to-col_from)\n", - "\n", - "for j in range(iterations):\n", - " correct_cnt = 0\n", - " for i in range(int(len(images) / batch_size)):\n", - " batch_start, batch_end=((i * batch_size),((i+1)*batch_size))\n", - " layer_0 = images[batch_start:batch_end]\n", - " layer_0 = layer_0.reshape(layer_0.shape[0],28,28)\n", - " layer_0.shape\n", - "\n", - " sects = list()\n", - " for row_start in range(layer_0.shape[1]-kernel_rows):\n", - " for col_start in range(layer_0.shape[2] - kernel_cols):\n", - " sect = get_image_section(layer_0,\n", - " row_start,\n", - " row_start+kernel_rows,\n", - " col_start,\n", - " col_start+kernel_cols)\n", - " sects.append(sect)\n", - "\n", - " expanded_input = np.concatenate(sects,axis=1)\n", - " es = expanded_input.shape\n", - " flattened_input = expanded_input.reshape(es[0]*es[1],-1)\n", - "\n", - " kernel_output = flattened_input.dot(kernels)\n", - " layer_1 = tanh(kernel_output.reshape(es[0],-1))\n", - " dropout_mask = np.random.randint(2,size=layer_1.shape)\n", - " layer_1 *= dropout_mask * 2\n", - " layer_2 = softmax(np.dot(layer_1,weights_1_2))\n", - "\n", - " for k in range(batch_size):\n", - " labelset = labels[batch_start+k:batch_start+k+1]\n", - " _inc = int(np.argmax(layer_2[k:k+1]) == \n", - " np.argmax(labelset))\n", - " correct_cnt += _inc\n", - "\n", - " layer_2_delta = (labels[batch_start:batch_end]-layer_2)\\\n", - " / (batch_size * layer_2.shape[0])\n", - " layer_1_delta = layer_2_delta.dot(weights_1_2.T) * \\\n", - " tanh2deriv(layer_1)\n", - " layer_1_delta *= dropout_mask\n", - " weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n", - " l1d_reshape = layer_1_delta.reshape(kernel_output.shape)\n", - " k_update = flattened_input.T.dot(l1d_reshape)\n", - " kernels -= alpha * k_update\n", - " \n", - " test_correct_cnt = 0\n", - "\n", - " for i in range(len(test_images)):\n", - "\n", - " layer_0 = test_images[i:i+1]\n", - "# layer_1 = tanh(np.dot(layer_0,weights_0_1))\n", - " layer_0 = layer_0.reshape(layer_0.shape[0],28,28)\n", - " layer_0.shape\n", - "\n", - " sects = list()\n", - " for row_start in range(layer_0.shape[1]-kernel_rows):\n", - " for col_start in range(layer_0.shape[2] - kernel_cols):\n", - " sect = get_image_section(layer_0,\n", - " row_start,\n", - " row_start+kernel_rows,\n", - " col_start,\n", - " col_start+kernel_cols)\n", - " sects.append(sect)\n", - "\n", - " expanded_input = np.concatenate(sects,axis=1)\n", - " es = expanded_input.shape\n", - " flattened_input = expanded_input.reshape(es[0]*es[1],-1)\n", - "\n", - " kernel_output = flattened_input.dot(kernels)\n", - " layer_1 = tanh(kernel_output.reshape(es[0],-1))\n", - " layer_2 = np.dot(layer_1,weights_1_2)\n", - "\n", - " test_correct_cnt += int(np.argmax(layer_2) == \n", - " np.argmax(test_labels[i:i+1]))\n", - " if(j % 1 == 0):\n", - " sys.stdout.write(\"\\n\"+ \\\n", - " \"I:\" + str(j) + \\\n", - " \" Test-Acc:\"+str(test_correct_cnt/float(len(test_images)))+\\\n", - " \" Train-Acc:\" + str(correct_cnt/float(len(images))))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter11 - Intro to Word Embeddings - Neural Networks that Understand Language-checkpoint.ipynb b/.ipynb_checkpoints/Chapter11 - Intro to Word Embeddings - Neural Networks that Understand Language-checkpoint.ipynb deleted file mode 100644 index b67c086..0000000 --- a/.ipynb_checkpoints/Chapter11 - Intro to Word Embeddings - Neural Networks that Understand Language-checkpoint.ipynb +++ /dev/null @@ -1,585 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Download the IMDB Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "# Download reviews.txt and labels.txt from here: https://github.com/udacity/deep-learning/tree/master/sentiment-network\n", - "\n", - "def pretty_print_review_and_label(i):\n", - " print(labels[i] + \"\\t:\\t\" + reviews[i][:80] + \"...\")\n", - "\n", - "g = open('reviews.txt','r') # What we know!\n", - "reviews = list(map(lambda x:x[:-1],g.readlines()))\n", - "g.close()\n", - "\n", - "g = open('labels.txt','r') # What we WANT to know!\n", - "labels = list(map(lambda x:x[:-1].upper(),g.readlines()))\n", - "g.close()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Capturing Word Correlation in Input Data" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sent Encoding:[1 1 0 1]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "onehots = {}\n", - "onehots['cat'] = np.array([1,0,0,0])\n", - "onehots['the'] = np.array([0,1,0,0])\n", - "onehots['dog'] = np.array([0,0,1,0])\n", - "onehots['sat'] = np.array([0,0,0,1])\n", - "\n", - "sentence = ['the','cat','sat']\n", - "x = onehots[sentence[0]] + \\\n", - " onehots[sentence[1]] + \\\n", - " onehots[sentence[2]]\n", - "\n", - "print(\"Sent Encoding:\" + str(x))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Predicting Movie Reviews" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "f = open('reviews.txt')\n", - "raw_reviews = f.readlines()\n", - "f.close()\n", - "\n", - "f = open('labels.txt')\n", - "raw_labels = f.readlines()\n", - "f.close()\n", - "\n", - "tokens = list(map(lambda x:set(x.split(\" \")),raw_reviews))\n", - "\n", - "vocab = set()\n", - "for sent in tokens:\n", - " for word in sent:\n", - " if(len(word)>0):\n", - " vocab.add(word)\n", - "vocab = list(vocab)\n", - "\n", - "word2index = {}\n", - "for i,word in enumerate(vocab):\n", - " word2index[word]=i\n", - "\n", - "input_dataset = list()\n", - "for sent in tokens:\n", - " sent_indices = list()\n", - " for word in sent:\n", - " try:\n", - " sent_indices.append(word2index[word])\n", - " except:\n", - " \"\"\n", - " input_dataset.append(list(set(sent_indices)))\n", - "\n", - "target_dataset = list()\n", - "for label in raw_labels:\n", - " if label == 'positive\\n':\n", - " target_dataset.append(1)\n", - " else:\n", - " target_dataset.append(0)" - ] - }, - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "import numpy as np\n", - "np.random.seed(1)\n", - "\n", - "def sigmoid(x):\n", - " return 1/(1 + np.exp(-x))\n", - "\n", - "alpha, iterations = (0.01, 2)\n", - "hidden_size = 100\n", - "\n", - "weights_0_1 = 0.2*np.random.random((len(vocab),hidden_size)) - 0.1\n", - "weights_1_2 = 0.2*np.random.random((hidden_size,1)) - 0.1\n", - "\n", - "correct,total = (0,0)\n", - "for iter in range(iterations):\n", - " \n", - " # train on first 24,000\n", - " for i in range(len(input_dataset)-1000):\n", - "\n", - " x,y = (input_dataset[i],target_dataset[i])\n", - " layer_1 = sigmoid(np.sum(weights_0_1[x],axis=0)) #embed + sigmoid\n", - " layer_2 = sigmoid(np.dot(layer_1,weights_1_2)) # linear + softmax\n", - "\n", - " layer_2_delta = layer_2 - y # compare pred with truth\n", - " layer_1_delta = layer_2_delta.dot(weights_1_2.T) #backprop\n", - "\n", - " weights_0_1[x] -= layer_1_delta * alpha\n", - " weights_1_2 -= np.outer(layer_1,layer_2_delta) * alpha\n", - "\n", - " if(np.abs(layer_2_delta) < 0.5):\n", - " correct += 1\n", - " total += 1\n", - " if(i % 10 == 9):\n", - " progress = str(i/float(len(input_dataset)))\n", - " sys.stdout.write('\\rIter:'+str(iter)\\\n", - " +' Progress:'+progress[2:4]\\\n", - " +'.'+progress[4:6]\\\n", - " +'% Training Accuracy:'\\\n", - " + str(correct/float(total)) + '%')\n", - " print()\n", - "correct,total = (0,0)\n", - "for i in range(len(input_dataset)-1000,len(input_dataset)):\n", - "\n", - " x = input_dataset[i]\n", - " y = target_dataset[i]\n", - "\n", - " layer_1 = sigmoid(np.sum(weights_0_1[x],axis=0))\n", - " layer_2 = sigmoid(np.dot(layer_1,weights_1_2))\n", - " \n", - " if(np.abs(layer_2 - y) < 0.5):\n", - " correct += 1\n", - " total += 1\n", - "print(\"Test Accuracy:\" + str(correct / float(total)))" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'',\n", - " '\\n',\n", - " '.',\n", - " 'a',\n", - " 'about',\n", - " 'adults',\n", - " 'age',\n", - " 'all',\n", - " 'and',\n", - " 'as',\n", - " 'at',\n", - " 'believe',\n", - " 'bromwell',\n", - " 'burn',\n", - " 'can',\n", - " 'cartoon',\n", - " 'classic',\n", - " 'closer',\n", - " 'comedy',\n", - " 'down',\n", - " 'episode',\n", - " 'expect',\n", - " 'far',\n", - " 'fetched',\n", - " 'financially',\n", - " 'here',\n", - " 'high',\n", - " 'i',\n", - " 'immediately',\n", - " 'in',\n", - " 'insightful',\n", - " 'inspector',\n", - " 'is',\n", - " 'isn',\n", - " 'it',\n", - " 'knew',\n", - " 'lead',\n", - " 'life',\n", - " 'line',\n", - " 'm',\n", - " 'many',\n", - " 'me',\n", - " 'much',\n", - " 'my',\n", - " 'of',\n", - " 'one',\n", - " 'other',\n", - " 'pathetic',\n", - " 'pettiness',\n", - " 'pity',\n", - " 'pomp',\n", - " 'profession',\n", - " 'programs',\n", - " 'ran',\n", - " 'reality',\n", - " 'recalled',\n", - " 'remind',\n", - " 'repeatedly',\n", - " 'right',\n", - " 's',\n", - " 'sack',\n", - " 'same',\n", - " 'satire',\n", - " 'saw',\n", - " 'school',\n", - " 'schools',\n", - " 'scramble',\n", - " 'see',\n", - " 'situation',\n", - " 'some',\n", - " 'student',\n", - " 'students',\n", - " 'such',\n", - " 'survive',\n", - " 't',\n", - " 'teachers',\n", - " 'teaching',\n", - " 'than',\n", - " 'that',\n", - " 'the',\n", - " 'their',\n", - " 'think',\n", - " 'through',\n", - " 'time',\n", - " 'to',\n", - " 'tried',\n", - " 'welcome',\n", - " 'what',\n", - " 'when',\n", - " 'which',\n", - " 'who',\n", - " 'whole',\n", - " 'years',\n", - " 'your'}" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tokens[0]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Comparing Word Embeddings" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [], - "source": [ - "from collections import Counter\n", - "import math \n", - "\n", - "def similar(target='beautiful'):\n", - " target_index = word2index[target]\n", - " scores = Counter()\n", - " for word,index in word2index.items():\n", - " raw_difference = weights_0_1[index] - (weights_0_1[target_index])\n", - " squared_difference = raw_difference * raw_difference\n", - " scores[word] = -math.sqrt(sum(squared_difference))\n", - "\n", - " return scores.most_common(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[('beautiful', -0.0), ('heart', -0.7461901055360456), ('captures', -0.7767713774499612), ('impact', -0.7851006592549541), ('unexpected', -0.8024296074764704), ('bit', -0.8041029062033365), ('touching', -0.8041105203290175), ('true', -0.8092335336931215), ('worth', -0.8095649927927353), ('strong', -0.8095814455120289)]\n" - ] - } - ], - "source": [ - "print(similar('beautiful'))" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[('terrible', -0.0), ('boring', -0.7591663900380615), ('lame', -0.7732283645546325), ('horrible', -0.788081854105546), ('disappointing', -0.7893120726668719), ('avoid', -0.7939105009456955), ('badly', -0.8054784389155504), ('annoying', -0.8067172753479477), ('dull', -0.8072650189634973), ('mess', -0.8139036459320503)]\n" - ] - } - ], - "source": [ - "print(similar('terrible'))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Filling in the Blank" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [], - "source": [ - "import sys,random,math\n", - "from collections import Counter\n", - "import numpy as np\n", - "\n", - "np.random.seed(1)\n", - "random.seed(1)\n", - "f = open('reviews.txt')\n", - "raw_reviews = f.readlines()\n", - "f.close()\n", - "\n", - "tokens = list(map(lambda x:(x.split(\" \")),raw_reviews))\n", - "wordcnt = Counter()\n", - "for sent in tokens:\n", - " for word in sent:\n", - " wordcnt[word] -= 1\n", - "vocab = list(set(map(lambda x:x[0],wordcnt.most_common())))\n", - "\n", - "word2index = {}\n", - "for i,word in enumerate(vocab):\n", - " word2index[word]=i\n", - "\n", - "concatenated = list()\n", - "input_dataset = list()\n", - "for sent in tokens:\n", - " sent_indices = list()\n", - " for word in sent:\n", - " try:\n", - " sent_indices.append(word2index[word])\n", - " concatenated.append(word2index[word])\n", - " except:\n", - " \"\"\n", - " input_dataset.append(sent_indices)\n", - "concatenated = np.array(concatenated)\n", - "random.shuffle(input_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Progress:0.99998[('terrible', -0.0), ('horrible', -3.488841411481131), ('bad', -4.0636425093941595), ('brilliant', -4.211247495138625), ('pathetic', -4.304645745396163), ('fantastic', -4.341998952418319), ('fabulous', -4.356925869405997), ('phenomenal', -4.361301237074382), ('marvelous', -4.3856957968039145), ('spectacular', -4.413156799233535)]\n" - ] - } - ], - "source": [ - "alpha, iterations = (0.05, 2)\n", - "hidden_size,window,negative = (50,2,5)\n", - "\n", - "weights_0_1 = (np.random.rand(len(vocab),hidden_size) - 0.5) * 0.2\n", - "weights_1_2 = np.random.rand(len(vocab),hidden_size)*0\n", - "\n", - "layer_2_target = np.zeros(negative+1)\n", - "layer_2_target[0] = 1\n", - "\n", - "def similar(target='beautiful'):\n", - " target_index = word2index[target]\n", - "\n", - " scores = Counter()\n", - " for word,index in word2index.items():\n", - " raw_difference = weights_0_1[index] - (weights_0_1[target_index])\n", - " squared_difference = raw_difference * raw_difference\n", - " scores[word] = -math.sqrt(sum(squared_difference))\n", - " return scores.most_common(10)\n", - "\n", - "def sigmoid(x):\n", - " return 1/(1 + np.exp(-x))\n", - "\n", - "for rev_i,review in enumerate(input_dataset * iterations):\n", - " for target_i in range(len(review)):\n", - " \n", - " # since it's really expensive to predict every vocabulary\n", - " # we're only going to predict a random subset\n", - " target_samples = [review[target_i]]+list(concatenated\\\n", - " [(np.random.rand(negative)*len(concatenated)).astype('int').tolist()])\n", - "\n", - " left_context = review[max(0,target_i-window):target_i]\n", - " right_context = review[target_i+1:min(len(review),target_i+window)]\n", - "\n", - " layer_1 = np.mean(weights_0_1[left_context+right_context],axis=0)\n", - " layer_2 = sigmoid(layer_1.dot(weights_1_2[target_samples].T))\n", - " layer_2_delta = layer_2 - layer_2_target\n", - " layer_1_delta = layer_2_delta.dot(weights_1_2[target_samples])\n", - "\n", - " weights_0_1[left_context+right_context] -= layer_1_delta * alpha\n", - " weights_1_2[target_samples] -= np.outer(layer_2_delta,layer_1)*alpha\n", - "\n", - " if(rev_i % 250 == 0):\n", - " sys.stdout.write('\\rProgress:'+str(rev_i/float(len(input_dataset)\n", - " *iterations)) + \" \" + str(similar('terrible')))\n", - " sys.stdout.write('\\rProgress:'+str(rev_i/float(len(input_dataset)\n", - " *iterations)))\n", - "print(similar('terrible'))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# King - Man + Woman ~= Queen" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [], - "source": [ - "def analogy(positive=['terrible','good'],negative=['bad']):\n", - " \n", - " norms = np.sum(weights_0_1 * weights_0_1,axis=1)\n", - " norms.resize(norms.shape[0],1)\n", - " \n", - " normed_weights = weights_0_1 * norms\n", - " \n", - " query_vect = np.zeros(len(weights_0_1[0]))\n", - " for word in positive:\n", - " query_vect += normed_weights[word2index[word]]\n", - " for word in negative:\n", - " query_vect -= normed_weights[word2index[word]]\n", - " \n", - " scores = Counter()\n", - " for word,index in word2index.items():\n", - " raw_difference = weights_0_1[index] - query_vect\n", - " squared_difference = raw_difference * raw_difference\n", - " scores[word] = -math.sqrt(sum(squared_difference))\n", - " \n", - " return scores.most_common(10)[1:]" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('terrific', -210.46593317724228),\n", - " ('perfect', -210.52652806032205),\n", - " ('worth', -210.53162266358495),\n", - " ('good', -210.55072184482773),\n", - " ('terrible', -210.58429046605724),\n", - " ('decent', -210.87945442008805),\n", - " ('superb', -211.01143515971094),\n", - " ('great', -211.1327058081335),\n", - " ('worthy', -211.13577238103477)]" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "analogy(['terrible','good'],['bad'])" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('simon', -193.82490698964878),\n", - " ('obsessed', -193.91805919583555),\n", - " ('stanwyck', -194.22311983847902),\n", - " ('sandler', -194.22846640800597),\n", - " ('branagh', -194.24551334589853),\n", - " ('daniel', -194.24631020485714),\n", - " ('peter', -194.29908544092078),\n", - " ('tony', -194.31388897167716),\n", - " ('aged', -194.35115773165094)]" - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "analogy(['elizabeth','he'],['she'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter12 - Intro to Recurrence - Predicting the Next Word-checkpoint.ipynb b/.ipynb_checkpoints/Chapter12 - Intro to Recurrence - Predicting the Next Word-checkpoint.ipynb deleted file mode 100644 index 25f5bab..0000000 --- a/.ipynb_checkpoints/Chapter12 - Intro to Recurrence - Predicting the Next Word-checkpoint.ipynb +++ /dev/null @@ -1,619 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Download & Preprocess the IMDB Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "# Download reviews.txt and labels.txt from here: https://github.com/udacity/deep-learning/tree/master/sentiment-network\n", - "\n", - "def pretty_print_review_and_label(i):\n", - " print(labels[i] + \"\\t:\\t\" + reviews[i][:80] + \"...\")\n", - "\n", - "g = open('reviews.txt','r') # What we know!\n", - "reviews = list(map(lambda x:x[:-1],g.readlines()))\n", - "g.close()\n", - "\n", - "g = open('labels.txt','r') # What we WANT to know!\n", - "labels = list(map(lambda x:x[:-1].upper(),g.readlines()))\n", - "g.close()\n", - "\n", - "\n", - "# Preprocess dataset:\n", - "\n", - "import sys\n", - "\n", - "f = open('reviews.txt')\n", - "raw_reviews = f.readlines()\n", - "f.close()\n", - "\n", - "f = open('labels.txt')\n", - "raw_labels = f.readlines()\n", - "f.close()\n", - "\n", - "tokens = list(map(lambda x:set(x.split(\" \")),raw_reviews))\n", - "\n", - "vocab = set()\n", - "for sent in tokens:\n", - " for word in sent:\n", - " if(len(word)>0):\n", - " vocab.add(word)\n", - "vocab = list(vocab)\n", - "\n", - "word2index = {}\n", - "for i,word in enumerate(vocab):\n", - " word2index[word]=i\n", - "\n", - "input_dataset = list()\n", - "for sent in tokens:\n", - " sent_indices = list()\n", - " for word in sent:\n", - " try:\n", - " sent_indices.append(word2index[word])\n", - " except:\n", - " \"\"\n", - " input_dataset.append(list(set(sent_indices)))\n", - "\n", - "target_dataset = list()\n", - "for label in raw_labels:\n", - " if label == 'positive\\n':\n", - " target_dataset.append(1)\n", - " else:\n", - " target_dataset.append(0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The Surprising Power of Averaged Word Vectors" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['this tim burton remake of the original ',\n", - " 'certainly one of the dozen or so worst m',\n", - " 'boring and appallingly acted summer phe']" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "norms = np.sum(weights_0_1 * weights_0_1,axis=1)\n", - "norms.resize(norms.shape[0],1)\n", - "normed_weights = weights_0_1 * norms\n", - "\n", - "def make_sent_vect(words):\n", - " indices = list(map(lambda x:word2index[x],filter(lambda x:x in word2index,words)))\n", - " return np.mean(normed_weights[indices],axis=0)\n", - "\n", - "reviews2vectors = list()\n", - "for review in tokens: # tokenized reviews\n", - " reviews2vectors.append(make_sent_vect(review))\n", - "reviews2vectors = np.array(reviews2vectors)\n", - "\n", - "def most_similar_reviews(review):\n", - " v = make_sent_vect(review)\n", - " scores = Counter()\n", - " for i,val in enumerate(reviews2vectors.dot(v)):\n", - " scores[i] = val\n", - " most_similar = list()\n", - " \n", - " for idx,score in scores.most_common(3):\n", - " most_similar.append(raw_reviews[idx][0:40])\n", - " return most_similar\n", - "\n", - "most_similar_reviews(['boring','awful'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Matrices that Change Absolutely Nothing" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[1. 0. 0.]\n", - " [0. 1. 0.]\n", - " [0. 0. 1.]]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "a = np.array([1,2,3])\n", - "b = np.array([0.1,0.2,0.3])\n", - "c = np.array([-1,-0.5,0])\n", - "d = np.array([0,0,0])\n", - "\n", - "identity = np.eye(3)\n", - "print(identity)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1. 2. 3.]\n", - "[0.1 0.2 0.3]\n", - "[-1. -0.5 0. ]\n", - "[0. 0. 0.]\n" - ] - } - ], - "source": [ - "print(a.dot(identity))\n", - "print(b.dot(identity))\n", - "print(c.dot(identity))\n", - "print(d.dot(identity))" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[13 15 17]\n", - "[13. 15. 17.]\n" - ] - } - ], - "source": [ - "this = np.array([2,4,6])\n", - "movie = np.array([10,10,10])\n", - "rocks = np.array([1,1,1])\n", - "\n", - "print(this + movie + rocks)\n", - "print((this.dot(identity) + movie).dot(identity) + rocks)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Forward Propagation in Python" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "def softmax(x_):\n", - " x = np.atleast_2d(x_)\n", - " temp = np.exp(x)\n", - " return temp / np.sum(temp, axis=1, keepdims=True)\n", - "\n", - "word_vects = {}\n", - "word_vects['yankees'] = np.array([[0.,0.,0.]])\n", - "word_vects['bears'] = np.array([[0.,0.,0.]])\n", - "word_vects['braves'] = np.array([[0.,0.,0.]])\n", - "word_vects['red'] = np.array([[0.,0.,0.]])\n", - "word_vects['socks'] = np.array([[0.,0.,0.]])\n", - "word_vects['lose'] = np.array([[0.,0.,0.]])\n", - "word_vects['defeat'] = np.array([[0.,0.,0.]])\n", - "word_vects['beat'] = np.array([[0.,0.,0.]])\n", - "word_vects['tie'] = np.array([[0.,0.,0.]])\n", - "\n", - "sent2output = np.random.rand(3,len(word_vects))\n", - "\n", - "identity = np.eye(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0.11111111 0.11111111 0.11111111 0.11111111 0.11111111 0.11111111\n", - " 0.11111111 0.11111111 0.11111111]]\n" - ] - } - ], - "source": [ - "layer_0 = word_vects['red']\n", - "layer_1 = layer_0.dot(identity) + word_vects['socks']\n", - "layer_2 = layer_1.dot(identity) + word_vects['defeat']\n", - "\n", - "pred = softmax(layer_2.dot(sent2output))\n", - "print(pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# How do we Backpropagate into this?" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [], - "source": [ - "y = np.array([1,0,0,0,0,0,0,0,0]) # target one-hot vector for \"yankees\"\n", - "\n", - "pred_delta = pred - y\n", - "layer_2_delta = pred_delta.dot(sent2output.T)\n", - "defeat_delta = layer_2_delta * 1 # can ignore the \"1\" like prev. chapter\n", - "layer_1_delta = layer_2_delta.dot(identity.T)\n", - "socks_delta = layer_1_delta * 1 # again... can ignore the \"1\"\n", - "layer_0_delta = layer_1_delta.dot(identity.T)\n", - "alpha = 0.01\n", - "word_vects['red'] -= layer_0_delta * alpha\n", - "word_vects['socks'] -= socks_delta * alpha\n", - "word_vects['defeat'] -= defeat_delta * alpha\n", - "identity -= np.outer(layer_0,layer_1_delta) * alpha\n", - "identity -= np.outer(layer_1,layer_2_delta) * alpha\n", - "sent2output -= np.outer(layer_2,pred_delta) * alpha" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Let's Train it!" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[['mary', 'moved', 'to', 'the', 'bathroom.'], ['john', 'went', 'to', 'the', 'hallway.'], ['where', 'is', 'mary?', '\\tbathroom\\t1']]\n" - ] - } - ], - "source": [ - "import sys,random,math\n", - "from collections import Counter\n", - "import numpy as np\n", - "\n", - "f = open('tasksv11/en/qa1_single-supporting-fact_train.txt','r')\n", - "raw = f.readlines()\n", - "f.close()\n", - "\n", - "tokens = list()\n", - "for line in raw[0:1000]:\n", - " tokens.append(line.lower().replace(\"\\n\",\"\").split(\" \")[1:])\n", - "\n", - "print(tokens[0:3])" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [], - "source": [ - "vocab = set()\n", - "for sent in tokens:\n", - " for word in sent:\n", - " vocab.add(word)\n", - "\n", - "vocab = list(vocab)\n", - "\n", - "word2index = {}\n", - "for i,word in enumerate(vocab):\n", - " word2index[word]=i\n", - " \n", - "def words2indices(sentence):\n", - " idx = list()\n", - " for word in sentence:\n", - " idx.append(word2index[word])\n", - " return idx\n", - "\n", - "def softmax(x):\n", - " e_x = np.exp(x - np.max(x))\n", - " return e_x / e_x.sum(axis=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(1)\n", - "embed_size = 10\n", - "\n", - "# word embeddings\n", - "embed = (np.random.rand(len(vocab),embed_size) - 0.5) * 0.1\n", - "\n", - "# embedding -> embedding (initially the identity matrix)\n", - "recurrent = np.eye(embed_size)\n", - "\n", - "# sentence embedding for empty sentence\n", - "start = np.zeros(embed_size)\n", - "\n", - "# embedding -> output weights\n", - "decoder = (np.random.rand(embed_size, len(vocab)) - 0.5) * 0.1\n", - "\n", - "# one hot lookups (for loss function)\n", - "one_hot = np.eye(len(vocab))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Forward Propagation with Arbitrary Length" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [], - "source": [ - "def predict(sent):\n", - " \n", - " layers = list()\n", - " layer = {}\n", - " layer['hidden'] = start\n", - " layers.append(layer)\n", - "\n", - " loss = 0\n", - "\n", - " # forward propagate\n", - " preds = list()\n", - " for target_i in range(len(sent)):\n", - "\n", - " layer = {}\n", - "\n", - " # try to predict the next term\n", - " layer['pred'] = softmax(layers[-1]['hidden'].dot(decoder))\n", - "\n", - " loss += -np.log(layer['pred'][sent[target_i]])\n", - "\n", - " # generate the next hidden state\n", - " layer['hidden'] = layers[-1]['hidden'].dot(recurrent) + embed[sent[target_i]]\n", - " layers.append(layer)\n", - "\n", - " return layers, loss" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Backpropagation with Arbitrary Length" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "metadata": {}, - "outputs": [], - "source": [ - "# forward\n", - "for iter in range(30000):\n", - " alpha = 0.001\n", - " sent = words2indices(tokens[iter%len(tokens)][1:])\n", - " layers,loss = predict(sent) \n", - "\n", - " # back propagate\n", - " for layer_idx in reversed(range(len(layers))):\n", - " layer = layers[layer_idx]\n", - " target = sent[layer_idx-1]\n", - "\n", - " if(layer_idx > 0): # if not the first layer\n", - " layer['output_delta'] = layer['pred'] - one_hot[target]\n", - " new_hidden_delta = layer['output_delta'].dot(decoder.transpose())\n", - "\n", - " # if the last layer - don't pull from a later one becasue it doesn't exist\n", - " if(layer_idx == len(layers)-1):\n", - " layer['hidden_delta'] = new_hidden_delta\n", - " else:\n", - " layer['hidden_delta'] = new_hidden_delta + layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose())\n", - " else: # if the first layer\n", - " layer['hidden_delta'] = layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Weight Update with Arbitrary Length" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Perplexity:82.09227500075585\n", - "Perplexity:81.87615610433569\n", - "Perplexity:81.53705034457951\n", - "Perplexity:80.88879456876245\n", - "Perplexity:79.50015694256045\n", - "Perplexity:76.04440447063566\n", - "Perplexity:63.76523100870378\n", - "Perplexity:34.69262611144399\n", - "Perplexity:21.77439314730968\n", - "Perplexity:19.74440305631078\n", - "Perplexity:18.813349002926333\n", - "Perplexity:17.920571868736154\n", - "Perplexity:16.84823833832929\n", - "Perplexity:15.302868260393344\n", - "Perplexity:12.898616378336536\n", - "Perplexity:9.781678937443305\n", - "Perplexity:7.546724222346714\n", - "Perplexity:6.4277474041777305\n", - "Perplexity:5.685698933881173\n", - "Perplexity:5.240514920446924\n", - "Perplexity:4.916476504398705\n", - "Perplexity:4.674677629541541\n", - "Perplexity:4.494159385603734\n", - "Perplexity:4.365041755388302\n", - "Perplexity:4.289971726173599\n", - "Perplexity:4.243384558378477\n", - "Perplexity:4.192001080475404\n", - "Perplexity:4.132556753967558\n", - "Perplexity:4.071667181580819\n", - "Perplexity:4.0167814473718435\n" - ] - } - ], - "source": [ - "# forward\n", - "for iter in range(30000):\n", - " alpha = 0.001\n", - " sent = words2indices(tokens[iter%len(tokens)][1:])\n", - "\n", - " layers,loss = predict(sent) \n", - "\n", - " # back propagate\n", - " for layer_idx in reversed(range(len(layers))):\n", - " layer = layers[layer_idx]\n", - " target = sent[layer_idx-1]\n", - "\n", - " if(layer_idx > 0):\n", - " layer['output_delta'] = layer['pred'] - one_hot[target]\n", - " new_hidden_delta = layer['output_delta'].dot(decoder.transpose())\n", - "\n", - " # if the last layer - don't pull from a \n", - " # later one becasue it doesn't exist\n", - " if(layer_idx == len(layers)-1):\n", - " layer['hidden_delta'] = new_hidden_delta\n", - " else:\n", - " layer['hidden_delta'] = new_hidden_delta + layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose())\n", - " else:\n", - " layer['hidden_delta'] = layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose())\n", - "\n", - " # update weights\n", - " start -= layers[0]['hidden_delta'] * alpha / float(len(sent))\n", - " for layer_idx,layer in enumerate(layers[1:]):\n", - " \n", - " decoder -= np.outer(layers[layer_idx]['hidden'], layer['output_delta']) * alpha / float(len(sent))\n", - " \n", - " embed_idx = sent[layer_idx]\n", - " embed[embed_idx] -= layers[layer_idx]['hidden_delta'] * alpha / float(len(sent))\n", - " recurrent -= np.outer(layers[layer_idx]['hidden'], layer['hidden_delta']) * alpha / float(len(sent))\n", - " \n", - " if(iter % 1000 == 0):\n", - " print(\"Perplexity:\" + str(np.exp(loss/len(sent))))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Execution and Output Analysis" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['sandra', 'moved', 'to', 'the', 'garden.']\n", - "Prev Input:sandra True:moved Pred:is\n", - "Prev Input:moved True:to Pred:to\n", - "Prev Input:to True:the Pred:the\n", - "Prev Input:the True:garden. Pred:bedroom.\n" - ] - } - ], - "source": [ - "sent_index = 4\n", - "\n", - "l,_ = predict(words2indices(tokens[sent_index]))\n", - "\n", - "print(tokens[sent_index])\n", - "\n", - "for i,each_layer in enumerate(l[1:-1]):\n", - " input = tokens[sent_index][i]\n", - " true = tokens[sent_index][i+1]\n", - " pred = vocab[each_layer['pred'].argmax()]\n", - " print(\"Prev Input:\" + input + (' ' * (12 - len(input))) +\\\n", - " \"True:\" + true + (\" \" * (15 - len(true))) + \"Pred:\" + pred)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter13 - Intro to Automatic Differentiation - Let's Build A Deep Learning Framework-checkpoint.ipynb b/.ipynb_checkpoints/Chapter13 - Intro to Automatic Differentiation - Let's Build A Deep Learning Framework-checkpoint.ipynb deleted file mode 100644 index 64f310c..0000000 --- a/.ipynb_checkpoints/Chapter13 - Intro to Automatic Differentiation - Let's Build A Deep Learning Framework-checkpoint.ipynb +++ /dev/null @@ -1,2334 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 1: Introduction to Tensors" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1 2 3 4 5]\n", - "[ 2 4 6 8 10]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "class Tensor (object):\n", - " \n", - " def __init__(self, data):\n", - " self.data = np.array(data)\n", - " \n", - " def __add__(self, other):\n", - " return Tensor(self.data + other.data)\n", - " \n", - " def __repr__(self):\n", - " return str(self.data.__repr__())\n", - " \n", - " def __str__(self):\n", - " return str(self.data.__str__())\n", - " \n", - "x = Tensor([1,2,3,4,5])\n", - "print(x)\n", - "\n", - "y = x + x\n", - "print(y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 2: Introduction to Autograd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "class Tensor (object):\n", - " \n", - " def __init__(self, data, creators=None, creation_op = None):\n", - " self.data = np.array(data)\n", - " self.creation_op = creation_op\n", - " self.creators = creators\n", - " self.grad = None\n", - " \n", - " def backward(self, grad):\n", - " self.grad = grad\n", - " \n", - " if(self.creation_op == \"add\"):\n", - " self.creators[0].backward(grad)\n", - " self.creators[1].backward(grad)\n", - "\n", - " def __add__(self, other):\n", - " return Tensor(self.data + other.data, creators=[self,other], creation_op=\"add\")\n", - " \n", - " def __repr__(self):\n", - " return str(self.data.__repr__())\n", - " \n", - " def __str__(self):\n", - " return str(self.data.__str__())\n", - " \n", - "x = Tensor([1,2,3,4,5])\n", - "y = Tensor([2,2,2,2,2])\n", - "\n", - "z = x + y\n", - "z.backward(Tensor(np.array([1,1,1,1,1])))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1 1 1 1 1]\n", - "[1 1 1 1 1]\n", - "[array([1, 2, 3, 4, 5]), array([2, 2, 2, 2, 2])]\n", - "add\n" - ] - } - ], - "source": [ - "print(x.grad)\n", - "print(y.grad)\n", - "print(z.creators)\n", - "print(z.creation_op)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1 1 1 1 1]\n" - ] - } - ], - "source": [ - "a = Tensor([1,2,3,4,5])\n", - "b = Tensor([2,2,2,2,2])\n", - "c = Tensor([5,4,3,2,1])\n", - "d = Tensor([-1,-2,-3,-4,-5])\n", - "\n", - "e = a + b\n", - "f = c + d\n", - "g = e + f\n", - "\n", - "g.backward(Tensor(np.array([1,1,1,1,1])))\n", - "\n", - "print(a.grad)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 3: Tensors That Are Used Multiple Times" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([False, False, False, False, False])" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a = Tensor([1,2,3,4,5])\n", - "b = Tensor([2,2,2,2,2])\n", - "c = Tensor([5,4,3,2,1])\n", - "\n", - "d = a + b\n", - "e = b + c\n", - "f = d + e\n", - "f.backward(Tensor(np.array([1,1,1,1,1])))\n", - "\n", - "b.grad.data == np.array([2,2,2,2,2])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 4: Upgrading Autograd to Support Multiple Tensors" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[ True True True True True]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "class Tensor (object):\n", - " \n", - " def __init__(self,data,\n", - " autograd=False,\n", - " creators=None,\n", - " creation_op=None,\n", - " id=None):\n", - " \n", - " self.data = np.array(data)\n", - " self.autograd = autograd\n", - " self.grad = None\n", - " if(id is None):\n", - " self.id = np.random.randint(0,100000)\n", - " else:\n", - " self.id = id\n", - " \n", - " self.creators = creators\n", - " self.creation_op = creation_op\n", - " self.children = {}\n", - " \n", - " if(creators is not None):\n", - " for c in creators:\n", - " if(self.id not in c.children):\n", - " c.children[self.id] = 1\n", - " else:\n", - " c.children[self.id] += 1\n", - "\n", - " def all_children_grads_accounted_for(self):\n", - " for id,cnt in self.children.items():\n", - " if(cnt != 0):\n", - " return False\n", - " return True \n", - " \n", - " def backward(self,grad=None, grad_origin=None):\n", - " if(self.autograd):\n", - " if(grad is None):\n", - " grad = FloatTensor(np.ones_like(self.data))\n", - " \n", - " if(grad_origin is not None):\n", - " if(self.children[grad_origin.id] == 0):\n", - " raise Exception(\"cannot backprop more than once\")\n", - " else:\n", - " self.children[grad_origin.id] -= 1\n", - "\n", - " if(self.grad is None):\n", - " self.grad = grad\n", - " else:\n", - " self.grad += grad\n", - " \n", - " # grads must not have grads of their own\n", - " assert grad.autograd == False\n", - " \n", - " # only continue backpropping if there's something to\n", - " # backprop into and if all gradients (from children)\n", - " # are accounted for override waiting for children if\n", - " # \"backprop\" was called on this variable directly\n", - " if(self.creators is not None and \n", - " (self.all_children_grads_accounted_for() or \n", - " grad_origin is None)):\n", - "\n", - " if(self.creation_op == \"add\"):\n", - " self.creators[0].backward(self.grad, self)\n", - " self.creators[1].backward(self.grad, self)\n", - " \n", - " def __add__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data + other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"add\")\n", - " return Tensor(self.data + other.data)\n", - "\n", - " def __repr__(self):\n", - " return str(self.data.__repr__())\n", - " \n", - " def __str__(self):\n", - " return str(self.data.__str__()) \n", - " \n", - "a = Tensor([1,2,3,4,5], autograd=True)\n", - "b = Tensor([2,2,2,2,2], autograd=True)\n", - "c = Tensor([5,4,3,2,1], autograd=True)\n", - "\n", - "d = a + b\n", - "e = b + c\n", - "f = d + e\n", - "\n", - "f.backward(Tensor(np.array([1,1,1,1,1])))\n", - "\n", - "print(b.grad.data == np.array([2,2,2,2,2]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 5: Add Support for Negation" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[ True True True True True]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "class Tensor (object):\n", - " \n", - " def __init__(self,data,\n", - " autograd=False,\n", - " creators=None,\n", - " creation_op=None,\n", - " id=None):\n", - " \n", - " self.data = np.array(data)\n", - " self.autograd = autograd\n", - " self.grad = None\n", - " if(id is None):\n", - " self.id = np.random.randint(0,100000)\n", - " else:\n", - " self.id = id\n", - " \n", - " self.creators = creators\n", - " self.creation_op = creation_op\n", - " self.children = {}\n", - " \n", - " if(creators is not None):\n", - " for c in creators:\n", - " if(self.id not in c.children):\n", - " c.children[self.id] = 1\n", - " else:\n", - " c.children[self.id] += 1\n", - "\n", - " def all_children_grads_accounted_for(self):\n", - " for id,cnt in self.children.items():\n", - " if(cnt != 0):\n", - " return False\n", - " return True \n", - " \n", - " def backward(self,grad=None, grad_origin=None):\n", - " if(self.autograd):\n", - " if(grad is None):\n", - " grad = FloatTensor(np.ones_like(self.data))\n", - " \n", - " if(grad_origin is not None):\n", - " if(self.children[grad_origin.id] == 0):\n", - " raise Exception(\"cannot backprop more than once\")\n", - " else:\n", - " self.children[grad_origin.id] -= 1\n", - "\n", - " if(self.grad is None):\n", - " self.grad = grad\n", - " else:\n", - " self.grad += grad\n", - " \n", - " # grads must not have grads of their own\n", - " assert grad.autograd == False\n", - " \n", - " # only continue backpropping if there's something to\n", - " # backprop into and if all gradients (from children)\n", - " # are accounted for override waiting for children if\n", - " # \"backprop\" was called on this variable directly\n", - " if(self.creators is not None and \n", - " (self.all_children_grads_accounted_for() or \n", - " grad_origin is None)):\n", - "\n", - " if(self.creation_op == \"add\"):\n", - " self.creators[0].backward(self.grad, self)\n", - " self.creators[1].backward(self.grad, self)\n", - " \n", - " if(self.creation_op == \"neg\"):\n", - " self.creators[0].backward(self.grad.__neg__())\n", - " \n", - " def __add__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data + other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"add\")\n", - " return Tensor(self.data + other.data)\n", - "\n", - " def __neg__(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data * -1,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"neg\")\n", - " return Tensor(self.data * -1) \n", - " \n", - " def __repr__(self):\n", - " return str(self.data.__repr__())\n", - " \n", - " def __str__(self):\n", - " return str(self.data.__str__()) \n", - " \n", - "a = Tensor([1,2,3,4,5], autograd=True)\n", - "b = Tensor([2,2,2,2,2], autograd=True)\n", - "c = Tensor([5,4,3,2,1], autograd=True)\n", - "\n", - "d = a + (-b)\n", - "e = (-b) + c\n", - "f = d + e\n", - "\n", - "f.backward(Tensor(np.array([1,1,1,1,1])))\n", - "\n", - "print(b.grad.data == np.array([-2,-2,-2,-2,-2]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 6: Add Support for Additional Functions" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[ True True True True True]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "class Tensor (object):\n", - " \n", - " def __init__(self,data,\n", - " autograd=False,\n", - " creators=None,\n", - " creation_op=None,\n", - " id=None):\n", - " \n", - " self.data = np.array(data)\n", - " self.autograd = autograd\n", - " self.grad = None\n", - " if(id is None):\n", - " self.id = np.random.randint(0,100000)\n", - " else:\n", - " self.id = id\n", - " \n", - " self.creators = creators\n", - " self.creation_op = creation_op\n", - " self.children = {}\n", - " \n", - " if(creators is not None):\n", - " for c in creators:\n", - " if(self.id not in c.children):\n", - " c.children[self.id] = 1\n", - " else:\n", - " c.children[self.id] += 1\n", - "\n", - " def all_children_grads_accounted_for(self):\n", - " for id,cnt in self.children.items():\n", - " if(cnt != 0):\n", - " return False\n", - " return True \n", - " \n", - " def backward(self,grad=None, grad_origin=None):\n", - " if(self.autograd):\n", - " \n", - " if(grad is None):\n", - " grad = Tensor(np.ones_like(self.data))\n", - "\n", - " if(grad_origin is not None):\n", - " if(self.children[grad_origin.id] == 0):\n", - " raise Exception(\"cannot backprop more than once\")\n", - " else:\n", - " self.children[grad_origin.id] -= 1\n", - "\n", - " if(self.grad is None):\n", - " self.grad = grad\n", - " else:\n", - " self.grad += grad\n", - " \n", - " # grads must not have grads of their own\n", - " assert grad.autograd == False\n", - " \n", - " # only continue backpropping if there's something to\n", - " # backprop into and if all gradients (from children)\n", - " # are accounted for override waiting for children if\n", - " # \"backprop\" was called on this variable directly\n", - " if(self.creators is not None and \n", - " (self.all_children_grads_accounted_for() or \n", - " grad_origin is None)):\n", - "\n", - " if(self.creation_op == \"add\"):\n", - " self.creators[0].backward(self.grad, self)\n", - " self.creators[1].backward(self.grad, self)\n", - " \n", - " if(self.creation_op == \"sub\"):\n", - " self.creators[0].backward(Tensor(self.grad.data), self)\n", - " self.creators[1].backward(Tensor(self.grad.__neg__().data), self)\n", - "\n", - " if(self.creation_op == \"mul\"):\n", - " new = self.grad * self.creators[1]\n", - " self.creators[0].backward(new , self)\n", - " new = self.grad * self.creators[0]\n", - " self.creators[1].backward(new, self) \n", - " \n", - " if(self.creation_op == \"mm\"):\n", - " c0 = self.creators[0]\n", - " c1 = self.creators[1]\n", - " new = self.grad.mm(c1.transpose())\n", - " c0.backward(new)\n", - " new = self.grad.transpose().mm(c0).transpose()\n", - " c1.backward(new)\n", - " \n", - " if(self.creation_op == \"transpose\"):\n", - " self.creators[0].backward(self.grad.transpose())\n", - "\n", - " if(\"sum\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.expand(dim,\n", - " self.creators[0].data.shape[dim]))\n", - "\n", - " if(\"expand\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.sum(dim))\n", - " \n", - " if(self.creation_op == \"neg\"):\n", - " self.creators[0].backward(self.grad.__neg__())\n", - " \n", - " def __add__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data + other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"add\")\n", - " return Tensor(self.data + other.data)\n", - "\n", - " def __neg__(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data * -1,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"neg\")\n", - " return Tensor(self.data * -1)\n", - " \n", - " def __sub__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data - other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"sub\")\n", - " return Tensor(self.data - other.data)\n", - " \n", - " def __mul__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data * other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"mul\")\n", - " return Tensor(self.data * other.data) \n", - "\n", - " def sum(self, dim):\n", - " if(self.autograd):\n", - " return Tensor(self.data.sum(dim),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sum_\"+str(dim))\n", - " return Tensor(self.data.sum(dim))\n", - " \n", - " def expand(self, dim,copies):\n", - "\n", - " trans_cmd = list(range(0,len(self.data.shape)))\n", - " trans_cmd.insert(dim,len(self.data.shape))\n", - " new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)\n", - " \n", - " if(self.autograd):\n", - " return Tensor(new_data,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"expand_\"+str(dim))\n", - " return Tensor(new_data)\n", - " \n", - " def transpose(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data.transpose(),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"transpose\")\n", - " \n", - " return Tensor(self.data.transpose())\n", - " \n", - " def mm(self, x):\n", - " if(self.autograd):\n", - " return Tensor(self.data.dot(x.data),\n", - " autograd=True,\n", - " creators=[self,x],\n", - " creation_op=\"mm\")\n", - " return Tensor(self.data.dot(x.data))\n", - " \n", - " def __repr__(self):\n", - " return str(self.data.__repr__())\n", - " \n", - " def __str__(self):\n", - " return str(self.data.__str__()) \n", - " \n", - "a = Tensor([1,2,3,4,5], autograd=True)\n", - "b = Tensor([2,2,2,2,2], autograd=True)\n", - "c = Tensor([5,4,3,2,1], autograd=True)\n", - "\n", - "d = a + b\n", - "e = b + c\n", - "f = d + e\n", - "\n", - "f.backward(Tensor(np.array([1,1,1,1,1])))\n", - "\n", - "print(b.grad.data == np.array([2,2,2,2,2]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# A few Notes on Sum and Expand" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "x = Tensor(np.array([[1,2,3],\n", - " [4,5,6]]))" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([5, 7, 9])" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x.sum(0)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 6, 15])" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x.sum(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[[1, 1, 1, 1],\n", - " [2, 2, 2, 2],\n", - " [3, 3, 3, 3]],\n", - "\n", - " [[4, 4, 4, 4],\n", - " [5, 5, 5, 5],\n", - " [6, 6, 6, 6]]])" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x.expand(dim=2, copies=4)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 7: Use Autograd to Train a Neural Network" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Previously we would train a model like this" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5.066439994622395\n", - "0.4959907791902342\n", - "0.4180671892167177\n", - "0.35298133007809646\n", - "0.2972549636567377\n", - "0.2492326038163328\n", - "0.20785392075862477\n", - "0.17231260916265176\n", - "0.14193744536652986\n", - "0.11613979792168384\n" - ] - } - ], - "source": [ - "import numpy\n", - "np.random.seed(0)\n", - "\n", - "data = np.array([[0,0],[0,1],[1,0],[1,1]])\n", - "target = np.array([[0],[1],[0],[1]])\n", - "\n", - "weights_0_1 = np.random.rand(2,3)\n", - "weights_1_2 = np.random.rand(3,1)\n", - "\n", - "for i in range(10):\n", - " \n", - " # Predict\n", - " layer_1 = data.dot(weights_0_1)\n", - " layer_2 = layer_1.dot(weights_1_2)\n", - " \n", - " # Compare\n", - " diff = (layer_2 - target)\n", - " sqdiff = (diff * diff)\n", - " loss = sqdiff.sum(0) # mean squared error loss\n", - "\n", - " # Learn: this is the backpropagation piece\n", - " layer_1_grad = diff.dot(weights_1_2.transpose())\n", - " weight_1_2_update = layer_1.transpose().dot(diff)\n", - " weight_0_1_update = data.transpose().dot(layer_1_grad)\n", - " \n", - " weights_1_2 -= weight_1_2_update * 0.1\n", - " weights_0_1 -= weight_0_1_update * 0.1\n", - " print(loss[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.58128304]\n", - "[0.48988149]\n", - "[0.41375111]\n", - "[0.34489412]\n", - "[0.28210124]\n", - "[0.2254484]\n", - "[0.17538853]\n", - "[0.1324231]\n", - "[0.09682769]\n", - "[0.06849361]\n" - ] - } - ], - "source": [ - "import numpy\n", - "np.random.seed(0)\n", - "\n", - "data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)\n", - "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n", - "\n", - "w = list()\n", - "w.append(Tensor(np.random.rand(2,3), autograd=True))\n", - "w.append(Tensor(np.random.rand(3,1), autograd=True))\n", - "\n", - "for i in range(10):\n", - "\n", - " # Predict\n", - " pred = data.mm(w[0]).mm(w[1])\n", - " \n", - " # Compare\n", - " loss = ((pred - target)*(pred - target)).sum(0)\n", - " \n", - " # Learn\n", - " loss.backward(Tensor(np.ones_like(loss.data)))\n", - "\n", - " for w_ in w:\n", - " w_.data -= w_.grad.data * 0.1\n", - " w_.grad.data *= 0\n", - "\n", - " print(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 8: Adding Automatic Optimization" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "class SGD(object):\n", - " \n", - " def __init__(self, parameters, alpha=0.1):\n", - " self.parameters = parameters\n", - " self.alpha = alpha\n", - " \n", - " def zero(self):\n", - " for p in self.parameters:\n", - " p.grad.data *= 0\n", - " \n", - " def step(self, zero=True):\n", - " \n", - " for p in self.parameters:\n", - " \n", - " p.data -= p.grad.data * self.alpha\n", - " \n", - " if(zero):\n", - " p.grad.data *= 0" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.58128304]\n", - "[0.48988149]\n", - "[0.41375111]\n", - "[0.34489412]\n", - "[0.28210124]\n", - "[0.2254484]\n", - "[0.17538853]\n", - "[0.1324231]\n", - "[0.09682769]\n", - "[0.06849361]\n" - ] - } - ], - "source": [ - "import numpy\n", - "np.random.seed(0)\n", - "\n", - "data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)\n", - "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n", - "\n", - "w = list()\n", - "w.append(Tensor(np.random.rand(2,3), autograd=True))\n", - "w.append(Tensor(np.random.rand(3,1), autograd=True))\n", - "\n", - "optim = SGD(parameters=w, alpha=0.1)\n", - "\n", - "for i in range(10):\n", - "\n", - " # Predict\n", - " pred = data.mm(w[0]).mm(w[1])\n", - " \n", - " # Compare\n", - " loss = ((pred - target)*(pred - target)).sum(0)\n", - " \n", - " # Learn\n", - " loss.backward(Tensor(np.ones_like(loss.data)))\n", - " optim.step()\n", - "\n", - " print(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 9: Adding Support for Layer Types" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "class Layer(object):\n", - " \n", - " def __init__(self):\n", - " self.parameters = list()\n", - " \n", - " def get_parameters(self):\n", - " return self.parameters\n", - "\n", - "\n", - "class Linear(Layer):\n", - "\n", - " def __init__(self, n_inputs, n_outputs):\n", - " super().__init__()\n", - " W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0/(n_inputs))\n", - " self.weight = Tensor(W, autograd=True)\n", - " self.bias = Tensor(np.zeros(n_outputs), autograd=True)\n", - " \n", - " self.parameters.append(self.weight)\n", - " self.parameters.append(self.bias)\n", - "\n", - " def forward(self, input):\n", - " return input.mm(self.weight)+self.bias.expand(0,len(input.data))\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 10: Layers Which Contain Layers" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2.33428272]\n", - "[0.06743796]\n", - "[0.0521849]\n", - "[0.04079507]\n", - "[0.03184365]\n", - "[0.02479336]\n", - "[0.01925443]\n", - "[0.01491699]\n", - "[0.01153118]\n", - "[0.00889602]\n" - ] - } - ], - "source": [ - "\n", - "class Sequential(Layer):\n", - " \n", - " def __init__(self, layers=list()):\n", - " super().__init__()\n", - " \n", - " self.layers = layers\n", - " \n", - " def add(self, layer):\n", - " self.layers.append(layer)\n", - " \n", - " def forward(self, input):\n", - " for layer in self.layers:\n", - " input = layer.forward(input)\n", - " return input\n", - " \n", - " def get_parameters(self):\n", - " params = list()\n", - " for l in self.layers:\n", - " params += l.get_parameters()\n", - " return params\n", - " \n", - "import numpy\n", - "np.random.seed(0)\n", - "\n", - "data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)\n", - "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n", - "\n", - "model = Sequential([Linear(2,3), Linear(3,1)])\n", - "\n", - "optim = SGD(parameters=model.get_parameters(), alpha=0.05)\n", - "\n", - "for i in range(10):\n", - " \n", - " # Predict\n", - " pred = model.forward(data)\n", - " \n", - " # Compare\n", - " loss = ((pred - target)*(pred - target)).sum(0)\n", - " \n", - " # Learn\n", - " loss.backward(Tensor(np.ones_like(loss.data)))\n", - " optim.step()\n", - " print(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 11: Loss Function Layers" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2.33428272]\n", - "[0.06743796]\n", - "[0.0521849]\n", - "[0.04079507]\n", - "[0.03184365]\n", - "[0.02479336]\n", - "[0.01925443]\n", - "[0.01491699]\n", - "[0.01153118]\n", - "[0.00889602]\n" - ] - } - ], - "source": [ - "class MSELoss(Layer):\n", - " \n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, pred, target):\n", - " return ((pred - target)*(pred - target)).sum(0)\n", - " \n", - "import numpy\n", - "np.random.seed(0)\n", - "\n", - "data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)\n", - "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n", - "\n", - "model = Sequential([Linear(2,3), Linear(3,1)])\n", - "criterion = MSELoss()\n", - "\n", - "optim = SGD(parameters=model.get_parameters(), alpha=0.05)\n", - "\n", - "for i in range(10):\n", - " \n", - " # Predict\n", - " pred = model.forward(data)\n", - " \n", - " # Compare\n", - " loss = criterion.forward(pred, target)\n", - " \n", - " # Learn\n", - " loss.backward(Tensor(np.ones_like(loss.data)))\n", - " optim.step()\n", - " print(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 12: Non-linearity Layers" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "class Tensor (object):\n", - " \n", - " def __init__(self,data,\n", - " autograd=False,\n", - " creators=None,\n", - " creation_op=None,\n", - " id=None):\n", - " \n", - " self.data = np.array(data)\n", - " self.autograd = autograd\n", - " self.grad = None\n", - " if(id is None):\n", - " self.id = np.random.randint(0,100000)\n", - " else:\n", - " self.id = id\n", - " \n", - " self.creators = creators\n", - " self.creation_op = creation_op\n", - " self.children = {}\n", - " \n", - " if(creators is not None):\n", - " for c in creators:\n", - " if(self.id not in c.children):\n", - " c.children[self.id] = 1\n", - " else:\n", - " c.children[self.id] += 1\n", - "\n", - " def all_children_grads_accounted_for(self):\n", - " for id,cnt in self.children.items():\n", - " if(cnt != 0):\n", - " return False\n", - " return True \n", - " \n", - " def backward(self,grad=None, grad_origin=None):\n", - " if(self.autograd):\n", - " \n", - " if(grad is None):\n", - " grad = Tensor(np.ones_like(self.data))\n", - "\n", - " if(grad_origin is not None):\n", - " if(self.children[grad_origin.id] == 0):\n", - " raise Exception(\"cannot backprop more than once\")\n", - " else:\n", - " self.children[grad_origin.id] -= 1\n", - "\n", - " if(self.grad is None):\n", - " self.grad = grad\n", - " else:\n", - " self.grad += grad\n", - " \n", - " # grads must not have grads of their own\n", - " assert grad.autograd == False\n", - " \n", - " # only continue backpropping if there's something to\n", - " # backprop into and if all gradients (from children)\n", - " # are accounted for override waiting for children if\n", - " # \"backprop\" was called on this variable directly\n", - " if(self.creators is not None and \n", - " (self.all_children_grads_accounted_for() or \n", - " grad_origin is None)):\n", - "\n", - " if(self.creation_op == \"add\"):\n", - " self.creators[0].backward(self.grad, self)\n", - " self.creators[1].backward(self.grad, self)\n", - " \n", - " if(self.creation_op == \"sub\"):\n", - " self.creators[0].backward(Tensor(self.grad.data), self)\n", - " self.creators[1].backward(Tensor(self.grad.__neg__().data), self)\n", - "\n", - " if(self.creation_op == \"mul\"):\n", - " new = self.grad * self.creators[1]\n", - " self.creators[0].backward(new , self)\n", - " new = self.grad * self.creators[0]\n", - " self.creators[1].backward(new, self) \n", - " \n", - " if(self.creation_op == \"mm\"):\n", - " c0 = self.creators[0]\n", - " c1 = self.creators[1]\n", - " new = self.grad.mm(c1.transpose())\n", - " c0.backward(new)\n", - " new = self.grad.transpose().mm(c0).transpose()\n", - " c1.backward(new)\n", - " \n", - " if(self.creation_op == \"transpose\"):\n", - " self.creators[0].backward(self.grad.transpose())\n", - "\n", - " if(\"sum\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.expand(dim,\n", - " self.creators[0].data.shape[dim]))\n", - "\n", - " if(\"expand\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.sum(dim))\n", - " \n", - " if(self.creation_op == \"neg\"):\n", - " self.creators[0].backward(self.grad.__neg__())\n", - " \n", - " if(self.creation_op == \"sigmoid\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (self * (ones - self)))\n", - " \n", - " if(self.creation_op == \"tanh\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (ones - (self * self)))\n", - " \n", - " def __add__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data + other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"add\")\n", - " return Tensor(self.data + other.data)\n", - "\n", - " def __neg__(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data * -1,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"neg\")\n", - " return Tensor(self.data * -1)\n", - " \n", - " def __sub__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data - other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"sub\")\n", - " return Tensor(self.data - other.data)\n", - " \n", - " def __mul__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data * other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"mul\")\n", - " return Tensor(self.data * other.data) \n", - "\n", - " def sum(self, dim):\n", - " if(self.autograd):\n", - " return Tensor(self.data.sum(dim),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sum_\"+str(dim))\n", - " return Tensor(self.data.sum(dim))\n", - " \n", - " def expand(self, dim,copies):\n", - "\n", - " trans_cmd = list(range(0,len(self.data.shape)))\n", - " trans_cmd.insert(dim,len(self.data.shape))\n", - " new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)\n", - " \n", - " if(self.autograd):\n", - " return Tensor(new_data,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"expand_\"+str(dim))\n", - " return Tensor(new_data)\n", - " \n", - " def transpose(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data.transpose(),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"transpose\")\n", - " \n", - " return Tensor(self.data.transpose())\n", - " \n", - " def mm(self, x):\n", - " if(self.autograd):\n", - " return Tensor(self.data.dot(x.data),\n", - " autograd=True,\n", - " creators=[self,x],\n", - " creation_op=\"mm\")\n", - " return Tensor(self.data.dot(x.data))\n", - " \n", - " def sigmoid(self):\n", - " if(self.autograd):\n", - " return Tensor(1 / (1 + np.exp(-self.data)),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sigmoid\")\n", - " return Tensor(1 / (1 + np.exp(-self.data)))\n", - "\n", - " def tanh(self):\n", - " if(self.autograd):\n", - " return Tensor(np.tanh(self.data),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"tanh\")\n", - " return Tensor(np.tanh(self.data))\n", - " \n", - " \n", - " def __repr__(self):\n", - " return str(self.data.__repr__())\n", - " \n", - " def __str__(self):\n", - " return str(self.data.__str__()) \n", - " \n", - "class Tanh(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.tanh()\n", - " \n", - "class Sigmoid(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.sigmoid()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1.06372865]\n", - "[0.75148144]\n", - "[0.57384259]\n", - "[0.39574294]\n", - "[0.2482279]\n", - "[0.15515294]\n", - "[0.10423398]\n", - "[0.07571169]\n", - "[0.05837623]\n", - "[0.04700013]\n" - ] - } - ], - "source": [ - "import numpy\n", - "np.random.seed(0)\n", - "\n", - "data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)\n", - "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n", - "\n", - "model = Sequential([Linear(2,3), Tanh(), Linear(3,1), Sigmoid()])\n", - "criterion = MSELoss()\n", - "\n", - "optim = SGD(parameters=model.get_parameters(), alpha=1)\n", - "\n", - "for i in range(10):\n", - " \n", - " # Predict\n", - " pred = model.forward(data)\n", - " \n", - " # Compare\n", - " loss = criterion.forward(pred, target)\n", - " \n", - " # Learn\n", - " loss.backward(Tensor(np.ones_like(loss.data)))\n", - " optim.step()\n", - " print(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 13: The Embedding Layer" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "class Embedding(Layer):\n", - " \n", - " def __init__(self, vocab_size, dim):\n", - " super().__init__()\n", - " \n", - " self.vocab_size = vocab_size\n", - " self.dim = dim\n", - " \n", - " # this random initialiation style is just a convention from word2vec\n", - " self.weight = (np.random.rand(vocab_size, dim) - 0.5) / dim\n", - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 14: Add Indexing to Autograd" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "class Tensor (object):\n", - " \n", - " def __init__(self,data,\n", - " autograd=False,\n", - " creators=None,\n", - " creation_op=None,\n", - " id=None):\n", - " \n", - " self.data = np.array(data)\n", - " self.autograd = autograd\n", - " self.grad = None\n", - " if(id is None):\n", - " self.id = np.random.randint(0,100000)\n", - " else:\n", - " self.id = id\n", - " \n", - " self.creators = creators\n", - " self.creation_op = creation_op\n", - " self.children = {}\n", - " \n", - " if(creators is not None):\n", - " for c in creators:\n", - " if(self.id not in c.children):\n", - " c.children[self.id] = 1\n", - " else:\n", - " c.children[self.id] += 1\n", - "\n", - " def all_children_grads_accounted_for(self):\n", - " for id,cnt in self.children.items():\n", - " if(cnt != 0):\n", - " return False\n", - " return True \n", - " \n", - " def backward(self,grad=None, grad_origin=None):\n", - " if(self.autograd):\n", - " \n", - " if(grad is None):\n", - " grad = Tensor(np.ones_like(self.data))\n", - "\n", - " if(grad_origin is not None):\n", - " if(self.children[grad_origin.id] == 0):\n", - " raise Exception(\"cannot backprop more than once\")\n", - " else:\n", - " self.children[grad_origin.id] -= 1\n", - "\n", - " if(self.grad is None):\n", - " self.grad = grad\n", - " else:\n", - " self.grad += grad\n", - " \n", - " # grads must not have grads of their own\n", - " assert grad.autograd == False\n", - " \n", - " # only continue backpropping if there's something to\n", - " # backprop into and if all gradients (from children)\n", - " # are accounted for override waiting for children if\n", - " # \"backprop\" was called on this variable directly\n", - " if(self.creators is not None and \n", - " (self.all_children_grads_accounted_for() or \n", - " grad_origin is None)):\n", - "\n", - " if(self.creation_op == \"add\"):\n", - " self.creators[0].backward(self.grad, self)\n", - " self.creators[1].backward(self.grad, self)\n", - " \n", - " if(self.creation_op == \"sub\"):\n", - " self.creators[0].backward(Tensor(self.grad.data), self)\n", - " self.creators[1].backward(Tensor(self.grad.__neg__().data), self)\n", - "\n", - " if(self.creation_op == \"mul\"):\n", - " new = self.grad * self.creators[1]\n", - " self.creators[0].backward(new , self)\n", - " new = self.grad * self.creators[0]\n", - " self.creators[1].backward(new, self) \n", - " \n", - " if(self.creation_op == \"mm\"):\n", - " c0 = self.creators[0]\n", - " c1 = self.creators[1]\n", - " new = self.grad.mm(c1.transpose())\n", - " c0.backward(new)\n", - " new = self.grad.transpose().mm(c0).transpose()\n", - " c1.backward(new)\n", - " \n", - " if(self.creation_op == \"transpose\"):\n", - " self.creators[0].backward(self.grad.transpose())\n", - "\n", - " if(\"sum\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.expand(dim,\n", - " self.creators[0].data.shape[dim]))\n", - "\n", - " if(\"expand\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.sum(dim))\n", - " \n", - " if(self.creation_op == \"neg\"):\n", - " self.creators[0].backward(self.grad.__neg__())\n", - " \n", - " if(self.creation_op == \"sigmoid\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (self * (ones - self)))\n", - " \n", - " if(self.creation_op == \"tanh\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (ones - (self * self)))\n", - " \n", - " if(self.creation_op == \"index_select\"):\n", - " new_grad = np.zeros_like(self.creators[0].data)\n", - " indices_ = self.index_select_indices.data.flatten()\n", - " grad_ = grad.data.reshape(len(indices_), -1)\n", - " for i in range(len(indices_)):\n", - " new_grad[indices_[i]] += grad_[i]\n", - " self.creators[0].backward(Tensor(new_grad))\n", - " \n", - " def __add__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data + other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"add\")\n", - " return Tensor(self.data + other.data)\n", - "\n", - " def __neg__(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data * -1,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"neg\")\n", - " return Tensor(self.data * -1)\n", - " \n", - " def __sub__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data - other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"sub\")\n", - " return Tensor(self.data - other.data)\n", - " \n", - " def __mul__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data * other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"mul\")\n", - " return Tensor(self.data * other.data) \n", - "\n", - " def sum(self, dim):\n", - " if(self.autograd):\n", - " return Tensor(self.data.sum(dim),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sum_\"+str(dim))\n", - " return Tensor(self.data.sum(dim))\n", - " \n", - " def expand(self, dim,copies):\n", - "\n", - " trans_cmd = list(range(0,len(self.data.shape)))\n", - " trans_cmd.insert(dim,len(self.data.shape))\n", - " new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)\n", - " \n", - " if(self.autograd):\n", - " return Tensor(new_data,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"expand_\"+str(dim))\n", - " return Tensor(new_data)\n", - " \n", - " def transpose(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data.transpose(),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"transpose\")\n", - " \n", - " return Tensor(self.data.transpose())\n", - " \n", - " def mm(self, x):\n", - " if(self.autograd):\n", - " return Tensor(self.data.dot(x.data),\n", - " autograd=True,\n", - " creators=[self,x],\n", - " creation_op=\"mm\")\n", - " return Tensor(self.data.dot(x.data))\n", - " \n", - " def sigmoid(self):\n", - " if(self.autograd):\n", - " return Tensor(1 / (1 + np.exp(-self.data)),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sigmoid\")\n", - " return Tensor(1 / (1 + np.exp(-self.data)))\n", - "\n", - " def tanh(self):\n", - " if(self.autograd):\n", - " return Tensor(np.tanh(self.data),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"tanh\")\n", - " return Tensor(np.tanh(self.data))\n", - " \n", - " def index_select(self, indices):\n", - "\n", - " if(self.autograd):\n", - " new = Tensor(self.data[indices.data],\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"index_select\")\n", - " new.index_select_indices = indices\n", - " return new\n", - " return Tensor(self.data[indices.data])\n", - " \n", - " def __repr__(self):\n", - " return str(self.data.__repr__())\n", - " \n", - " def __str__(self):\n", - " return str(self.data.__str__()) \n", - " \n", - "class Tanh(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.tanh()\n", - " \n", - "class Sigmoid(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.sigmoid()" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0. 0. 0. 0. 0.]\n", - " [1. 1. 1. 1. 1.]\n", - " [2. 2. 2. 2. 2.]\n", - " [2. 2. 2. 2. 2.]\n", - " [1. 1. 1. 1. 1.]]\n" - ] - } - ], - "source": [ - "x = Tensor(np.eye(5), autograd=True)\n", - "x.index_select(Tensor([[1,2,3],[2,3,4]])).backward()\n", - "print(x.grad)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 15: The Embedding Layer (revisited)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "class Embedding(Layer):\n", - " \n", - " def __init__(self, vocab_size, dim):\n", - " super().__init__()\n", - " \n", - " self.vocab_size = vocab_size\n", - " self.dim = dim\n", - " \n", - " # this random initialiation style is just a convention from word2vec\n", - " self.weight = Tensor((np.random.rand(vocab_size, dim) - 0.5) / dim, autograd=True)\n", - " \n", - " self.parameters.append(self.weight)\n", - " \n", - " def forward(self, input):\n", - " return self.weight.index_select(input)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.98874126]\n", - "[0.6658868]\n", - "[0.45639889]\n", - "[0.31608168]\n", - "[0.2260925]\n", - "[0.16877423]\n", - "[0.13120515]\n", - "[0.10555487]\n", - "[0.08731868]\n", - "[0.07387834]\n" - ] - } - ], - "source": [ - "import numpy\n", - "np.random.seed(0)\n", - "\n", - "data = Tensor(np.array([1,2,1,2]), autograd=True)\n", - "target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)\n", - "\n", - "embed = Embedding(5,3)\n", - "model = Sequential([embed, Tanh(), Linear(3,1), Sigmoid()])\n", - "criterion = MSELoss()\n", - "\n", - "optim = SGD(parameters=model.get_parameters(), alpha=0.5)\n", - "\n", - "for i in range(10):\n", - " \n", - " # Predict\n", - " pred = model.forward(data)\n", - " \n", - " # Compare\n", - " loss = criterion.forward(pred, target)\n", - " \n", - " # Learn\n", - " loss.backward(Tensor(np.ones_like(loss.data)))\n", - " optim.step()\n", - " print(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 16: The Cross Entropy Layer" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "class Tensor (object):\n", - " \n", - " def __init__(self,data,\n", - " autograd=False,\n", - " creators=None,\n", - " creation_op=None,\n", - " id=None):\n", - " \n", - " self.data = np.array(data)\n", - " self.autograd = autograd\n", - " self.grad = None\n", - " if(id is None):\n", - " self.id = np.random.randint(0,100000)\n", - " else:\n", - " self.id = id\n", - " \n", - " self.creators = creators\n", - " self.creation_op = creation_op\n", - " self.children = {}\n", - " \n", - " if(creators is not None):\n", - " for c in creators:\n", - " if(self.id not in c.children):\n", - " c.children[self.id] = 1\n", - " else:\n", - " c.children[self.id] += 1\n", - "\n", - " def all_children_grads_accounted_for(self):\n", - " for id,cnt in self.children.items():\n", - " if(cnt != 0):\n", - " return False\n", - " return True \n", - " \n", - " def backward(self,grad=None, grad_origin=None):\n", - " if(self.autograd):\n", - " \n", - " if(grad is None):\n", - " grad = Tensor(np.ones_like(self.data))\n", - "\n", - " if(grad_origin is not None):\n", - " if(self.children[grad_origin.id] == 0):\n", - " raise Exception(\"cannot backprop more than once\")\n", - " else:\n", - " self.children[grad_origin.id] -= 1\n", - "\n", - " if(self.grad is None):\n", - " self.grad = grad\n", - " else:\n", - " self.grad += grad\n", - " \n", - " # grads must not have grads of their own\n", - " assert grad.autograd == False\n", - " \n", - " # only continue backpropping if there's something to\n", - " # backprop into and if all gradients (from children)\n", - " # are accounted for override waiting for children if\n", - " # \"backprop\" was called on this variable directly\n", - " if(self.creators is not None and \n", - " (self.all_children_grads_accounted_for() or \n", - " grad_origin is None)):\n", - "\n", - " if(self.creation_op == \"add\"):\n", - " self.creators[0].backward(self.grad, self)\n", - " self.creators[1].backward(self.grad, self)\n", - " \n", - " if(self.creation_op == \"sub\"):\n", - " self.creators[0].backward(Tensor(self.grad.data), self)\n", - " self.creators[1].backward(Tensor(self.grad.__neg__().data), self)\n", - "\n", - " if(self.creation_op == \"mul\"):\n", - " new = self.grad * self.creators[1]\n", - " self.creators[0].backward(new , self)\n", - " new = self.grad * self.creators[0]\n", - " self.creators[1].backward(new, self) \n", - " \n", - " if(self.creation_op == \"mm\"):\n", - " c0 = self.creators[0]\n", - " c1 = self.creators[1]\n", - " new = self.grad.mm(c1.transpose())\n", - " c0.backward(new)\n", - " new = self.grad.transpose().mm(c0).transpose()\n", - " c1.backward(new)\n", - " \n", - " if(self.creation_op == \"transpose\"):\n", - " self.creators[0].backward(self.grad.transpose())\n", - "\n", - " if(\"sum\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.expand(dim,\n", - " self.creators[0].data.shape[dim]))\n", - "\n", - " if(\"expand\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.sum(dim))\n", - " \n", - " if(self.creation_op == \"neg\"):\n", - " self.creators[0].backward(self.grad.__neg__())\n", - " \n", - " if(self.creation_op == \"sigmoid\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (self * (ones - self)))\n", - " \n", - " if(self.creation_op == \"tanh\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (ones - (self * self)))\n", - " \n", - " if(self.creation_op == \"index_select\"):\n", - " new_grad = np.zeros_like(self.creators[0].data)\n", - " indices_ = self.index_select_indices.data.flatten()\n", - " grad_ = grad.data.reshape(len(indices_), -1)\n", - " for i in range(len(indices_)):\n", - " new_grad[indices_[i]] += grad_[i]\n", - " self.creators[0].backward(Tensor(new_grad))\n", - " \n", - " if(self.creation_op == \"cross_entropy\"):\n", - " dx = self.softmax_output - self.target_dist\n", - " self.creators[0].backward(Tensor(dx))\n", - " \n", - " def __add__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data + other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"add\")\n", - " return Tensor(self.data + other.data)\n", - "\n", - " def __neg__(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data * -1,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"neg\")\n", - " return Tensor(self.data * -1)\n", - " \n", - " def __sub__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data - other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"sub\")\n", - " return Tensor(self.data - other.data)\n", - " \n", - " def __mul__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data * other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"mul\")\n", - " return Tensor(self.data * other.data) \n", - "\n", - " def sum(self, dim):\n", - " if(self.autograd):\n", - " return Tensor(self.data.sum(dim),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sum_\"+str(dim))\n", - " return Tensor(self.data.sum(dim))\n", - " \n", - " def expand(self, dim,copies):\n", - "\n", - " trans_cmd = list(range(0,len(self.data.shape)))\n", - " trans_cmd.insert(dim,len(self.data.shape))\n", - " new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)\n", - " \n", - " if(self.autograd):\n", - " return Tensor(new_data,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"expand_\"+str(dim))\n", - " return Tensor(new_data)\n", - " \n", - " def transpose(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data.transpose(),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"transpose\")\n", - " \n", - " return Tensor(self.data.transpose())\n", - " \n", - " def mm(self, x):\n", - " if(self.autograd):\n", - " return Tensor(self.data.dot(x.data),\n", - " autograd=True,\n", - " creators=[self,x],\n", - " creation_op=\"mm\")\n", - " return Tensor(self.data.dot(x.data))\n", - " \n", - " def sigmoid(self):\n", - " if(self.autograd):\n", - " return Tensor(1 / (1 + np.exp(-self.data)),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sigmoid\")\n", - " return Tensor(1 / (1 + np.exp(-self.data)))\n", - "\n", - " def tanh(self):\n", - " if(self.autograd):\n", - " return Tensor(np.tanh(self.data),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"tanh\")\n", - " return Tensor(np.tanh(self.data))\n", - " \n", - " def index_select(self, indices):\n", - "\n", - " if(self.autograd):\n", - " new = Tensor(self.data[indices.data],\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"index_select\")\n", - " new.index_select_indices = indices\n", - " return new\n", - " return Tensor(self.data[indices.data])\n", - " \n", - " def cross_entropy(self, target_indices):\n", - "\n", - " temp = np.exp(self.data)\n", - " softmax_output = temp / np.sum(temp,\n", - " axis=len(self.data.shape)-1,\n", - " keepdims=True)\n", - " \n", - " t = target_indices.data.flatten()\n", - " p = softmax_output.reshape(len(t),-1)\n", - " target_dist = np.eye(p.shape[1])[t]\n", - " loss = -(np.log(p) * (target_dist)).sum(1).mean()\n", - " \n", - " if(self.autograd):\n", - " out = Tensor(loss,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"cross_entropy\")\n", - " out.softmax_output = softmax_output\n", - " out.target_dist = target_dist\n", - " return out\n", - "\n", - " return Tensor(loss)\n", - " \n", - " \n", - " def __repr__(self):\n", - " return str(self.data.__repr__())\n", - " \n", - " def __str__(self):\n", - " return str(self.data.__str__()) \n", - " \n", - "class Tanh(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.tanh()\n", - " \n", - "class Sigmoid(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.sigmoid()" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "class CrossEntropyLoss(object):\n", - " \n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input, target):\n", - " return input.cross_entropy(target)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.3885032434928422\n", - "0.9558181509266037\n", - "0.6823083585795604\n", - "0.5095259967493119\n", - "0.39574491472895856\n", - "0.31752527285348264\n", - "0.2617222861964216\n", - "0.22061283923954234\n", - "0.18946427334830068\n", - "0.16527389263866668\n" - ] - } - ], - "source": [ - "import numpy\n", - "np.random.seed(0)\n", - "\n", - "# data indices\n", - "data = Tensor(np.array([1,2,1,2]), autograd=True)\n", - "\n", - "# target indices\n", - "target = Tensor(np.array([0,1,0,1]), autograd=True)\n", - "\n", - "model = Sequential([Embedding(3,3), Tanh(), Linear(3,4)])\n", - "criterion = CrossEntropyLoss()\n", - "\n", - "optim = SGD(parameters=model.get_parameters(), alpha=0.1)\n", - "\n", - "for i in range(10):\n", - " \n", - " # Predict\n", - " pred = model.forward(data)\n", - " \n", - " # Compare\n", - " loss = criterion.forward(pred, target)\n", - " \n", - " # Learn\n", - " loss.backward(Tensor(np.ones_like(loss.data)))\n", - " optim.step()\n", - " print(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 17: The Recurrent Neural Network Layer" - ] - }, - { - "cell_type": "code", - "execution_count": 400, - "metadata": {}, - "outputs": [], - "source": [ - "class RNNCell(Layer):\n", - " \n", - " def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):\n", - " super().__init__()\n", - "\n", - " self.n_inputs = n_inputs\n", - " self.n_hidden = n_hidden\n", - " self.n_output = n_output\n", - " \n", - " if(activation == 'sigmoid'):\n", - " self.activation = Sigmoid()\n", - " elif(activation == 'tanh'):\n", - " self.activation == Tanh()\n", - " else:\n", - " raise Exception(\"Non-linearity not found\")\n", - "\n", - " self.w_ih = Linear(n_inputs, n_hidden)\n", - " self.w_hh = Linear(n_hidden, n_hidden)\n", - " self.w_ho = Linear(n_hidden, n_output)\n", - " \n", - " self.parameters += self.w_ih.get_parameters()\n", - " self.parameters += self.w_hh.get_parameters()\n", - " self.parameters += self.w_ho.get_parameters() \n", - " \n", - " def forward(self, input, hidden):\n", - " from_prev_hidden = self.w_hh.forward(hidden)\n", - " combined = self.w_ih.forward(input) + from_prev_hidden\n", - " new_hidden = self.activation.forward(combined)\n", - " output = self.w_ho.forward(new_hidden)\n", - " return output, new_hidden\n", - " \n", - " def init_hidden(self, batch_size=1):\n", - " return Tensor(np.zeros((batch_size,self.n_hidden)), autograd=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 401, - "metadata": {}, - "outputs": [], - "source": [ - "import sys,random,math\n", - "from collections import Counter\n", - "import numpy as np\n", - "\n", - "f = open('tasksv11/en/qa1_single-supporting-fact_train.txt','r')\n", - "raw = f.readlines()\n", - "f.close()\n", - "\n", - "tokens = list()\n", - "for line in raw[0:1000]:\n", - " tokens.append(line.lower().replace(\"\\n\",\"\").split(\" \")[1:])\n", - "\n", - "new_tokens = list()\n", - "for line in tokens:\n", - " new_tokens.append(['-'] * (6 - len(line)) + line)\n", - "\n", - "tokens = new_tokens\n", - "\n", - "vocab = set()\n", - "for sent in tokens:\n", - " for word in sent:\n", - " vocab.add(word)\n", - "\n", - "vocab = list(vocab)\n", - "\n", - "word2index = {}\n", - "for i,word in enumerate(vocab):\n", - " word2index[word]=i\n", - " \n", - "def words2indices(sentence):\n", - " idx = list()\n", - " for word in sentence:\n", - " idx.append(word2index[word])\n", - " return idx\n", - "\n", - "indices = list()\n", - "for line in tokens:\n", - " idx = list()\n", - " for w in line:\n", - " idx.append(word2index[w])\n", - " indices.append(idx)\n", - "\n", - "data = np.array(indices)" - ] - }, - { - "cell_type": "code", - "execution_count": 402, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 403, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 404, - "metadata": {}, - "outputs": [], - "source": [ - "embed = Embedding(vocab_size=len(vocab),dim=16)\n", - "model = RNNCell(n_inputs=16, n_hidden=16, n_output=len(vocab))\n", - "\n", - "criterion = CrossEntropyLoss()\n", - "optim = SGD(parameters=model.get_parameters() + embed.get_parameters(), alpha=0.05)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 405, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loss: 0.47631100976371393 % Correct: 0.01\n", - "Loss: 0.17189538896184856 % Correct: 0.28\n", - "Loss: 0.1460940222788725 % Correct: 0.37\n", - "Loss: 0.13845863915406884 % Correct: 0.37\n", - "Loss: 0.135574472565278 % Correct: 0.37\n" - ] - } - ], - "source": [ - "for iter in range(1000):\n", - " batch_size = 100\n", - " total_loss = 0\n", - " \n", - " hidden = model.init_hidden(batch_size=batch_size)\n", - "\n", - " for t in range(5):\n", - " input = Tensor(data[0:batch_size,t], autograd=True)\n", - " rnn_input = embed.forward(input=input)\n", - " output, hidden = model.forward(input=rnn_input, hidden=hidden)\n", - "\n", - " target = Tensor(data[0:batch_size,t+1], autograd=True) \n", - " loss = criterion.forward(output, target)\n", - " loss.backward()\n", - " optim.step()\n", - " total_loss += loss.data\n", - " if(iter % 200 == 0):\n", - " p_correct = (target.data == np.argmax(output.data,axis=1)).mean()\n", - " print(\"Loss:\",total_loss / (len(data)/batch_size),\"% Correct:\",p_correct)" - ] - }, - { - "cell_type": "code", - "execution_count": 362, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 406, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Context: - mary moved to the \n", - "True: bathroom.\n", - "Pred: office.\n" - ] - } - ], - "source": [ - "batch_size = 1\n", - "hidden = model.init_hidden(batch_size=batch_size)\n", - "for t in range(5):\n", - " input = Tensor(data[0:batch_size,t], autograd=True)\n", - " rnn_input = embed.forward(input=input)\n", - " output, hidden = model.forward(input=rnn_input, hidden=hidden)\n", - "\n", - "target = Tensor(data[0:batch_size,t+1], autograd=True) \n", - "loss = criterion.forward(output, target)\n", - "\n", - "ctx = \"\"\n", - "for idx in data[0:batch_size][0][0:-1]:\n", - " ctx += vocab[idx] + \" \"\n", - "print(\"Context:\",ctx)\n", - "print(\"True:\",vocab[target.data[0]])\n", - "print(\"Pred:\", vocab[output.data.argmax()])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter14 - Exploding Gradients Examples-checkpoint.ipynb b/.ipynb_checkpoints/Chapter14 - Exploding Gradients Examples-checkpoint.ipynb deleted file mode 100644 index 507fe35..0000000 --- a/.ipynb_checkpoints/Chapter14 - Exploding Gradients Examples-checkpoint.ipynb +++ /dev/null @@ -1,140 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 158, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Activations\n", - "[0.93940638 0.96852968]\n", - "[0.9919462 0.99121735]\n", - "[0.99301385 0.99302901]\n", - "[0.9930713 0.99307098]\n", - "[0.99307285 0.99307285]\n", - "[0.99307291 0.99307291]\n", - "[0.99307291 0.99307291]\n", - "[0.99307291 0.99307291]\n", - "[0.99307291 0.99307291]\n", - "[0.99307291 0.99307291]\n", - "\n", - "Gradients\n", - "[0.03439552 0.03439552]\n", - "[0.00118305 0.00118305]\n", - "[4.06916726e-05 4.06916726e-05]\n", - "[1.39961115e-06 1.39961115e-06]\n", - "[4.81403643e-08 4.81403637e-08]\n", - "[1.65582672e-09 1.65582765e-09]\n", - "[5.69682675e-11 5.69667160e-11]\n", - "[1.97259346e-12 1.97517920e-12]\n", - "[8.45387597e-14 8.02306381e-14]\n", - "[1.45938177e-14 2.16938983e-14]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "sigmoid = lambda x:1/(1 + np.exp(-x))\n", - "relu = lambda x:(x>0).astype(float)*x\n", - "\n", - "weights = np.array([[1,4],[4,1]])\n", - "activation = sigmoid(np.array([1,0.01]))\n", - "\n", - "print(\"Activations\")\n", - "activations = list()\n", - "for iter in range(10):\n", - " activation = sigmoid(activation.dot(weights))\n", - " activations.append(activation)\n", - " print(activation)\n", - "print(\"\\nGradients\")\n", - "gradient = np.ones_like(activation)\n", - "for activation in reversed(activations):\n", - " gradient = (activation * (1 - activation) * gradient)\n", - " gradient = gradient.dot(weights.transpose())\n", - " print(gradient)" - ] - }, - { - "cell_type": "code", - "execution_count": 160, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Relu Activations\n", - "[23.71814585 23.98025559]\n", - "[119.63916823 118.852839 ]\n", - "[595.05052421 597.40951192]\n", - "[2984.68857188 2977.61160877]\n", - "[14895.13500696 14916.36589628]\n", - "[74560.59859209 74496.90592414]\n", - "[372548.22228863 372739.30029248]\n", - "[1863505.42345854 1862932.18944699]\n", - "[9315234.18124649 9316953.88328115]\n", - "[46583049.71437107 46577890.60826711]\n", - "\n", - "Relu Gradients\n", - "[5. 5.]\n", - "[25. 25.]\n", - "[125. 125.]\n", - "[625. 625.]\n", - "[3125. 3125.]\n", - "[15625. 15625.]\n", - "[78125. 78125.]\n", - "[390625. 390625.]\n", - "[1953125. 1953125.]\n", - "[9765625. 9765625.]\n" - ] - } - ], - "source": [ - "print(\"Relu Activations\")\n", - "activations = list()\n", - "for iter in range(10):\n", - " activation = relu(activation.dot(weights))\n", - " activations.append(activation)\n", - " print(activation)\n", - "\n", - "print(\"\\nRelu Gradients\")\n", - "gradient = np.ones_like(activation)\n", - "for activation in reversed(activations):\n", - " gradient = ((activation > 0) * gradient).dot(weights.transpose())\n", - " print(gradient)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter14 - Intro to LSTMs - Learn to Write Like Shakespeare-checkpoint.ipynb b/.ipynb_checkpoints/Chapter14 - Intro to LSTMs - Learn to Write Like Shakespeare-checkpoint.ipynb deleted file mode 100644 index 83edff0..0000000 --- a/.ipynb_checkpoints/Chapter14 - Intro to LSTMs - Learn to Write Like Shakespeare-checkpoint.ipynb +++ /dev/null @@ -1,1034 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "class Tensor (object):\n", - " \n", - " def __init__(self,data,\n", - " autograd=False,\n", - " creators=None,\n", - " creation_op=None,\n", - " id=None):\n", - " \n", - " self.data = np.array(data)\n", - " self.autograd = autograd\n", - " self.grad = None\n", - "\n", - " if(id is None):\n", - " self.id = np.random.randint(0,1000000000)\n", - " else:\n", - " self.id = id\n", - " \n", - " self.creators = creators\n", - " self.creation_op = creation_op\n", - " self.children = {}\n", - " \n", - " if(creators is not None):\n", - " for c in creators:\n", - " if(self.id not in c.children):\n", - " c.children[self.id] = 1\n", - " else:\n", - " c.children[self.id] += 1\n", - "\n", - " def all_children_grads_accounted_for(self):\n", - " for id,cnt in self.children.items():\n", - " if(cnt != 0):\n", - " return False\n", - " return True \n", - " \n", - " def backward(self,grad=None, grad_origin=None):\n", - " if(self.autograd):\n", - " \n", - " if(grad is None):\n", - " grad = Tensor(np.ones_like(self.data))\n", - "\n", - " if(grad_origin is not None):\n", - " if(self.children[grad_origin.id] == 0):\n", - " return\n", - " print(self.id)\n", - " print(self.creation_op)\n", - " print(len(self.creators))\n", - " for c in self.creators:\n", - " print(c.creation_op)\n", - " raise Exception(\"cannot backprop more than once\")\n", - " else:\n", - " self.children[grad_origin.id] -= 1\n", - "\n", - " if(self.grad is None):\n", - " self.grad = grad\n", - " else:\n", - " self.grad += grad\n", - " \n", - " # grads must not have grads of their own\n", - " assert grad.autograd == False\n", - " \n", - " # only continue backpropping if there's something to\n", - " # backprop into and if all gradients (from children)\n", - " # are accounted for override waiting for children if\n", - " # \"backprop\" was called on this variable directly\n", - " if(self.creators is not None and \n", - " (self.all_children_grads_accounted_for() or \n", - " grad_origin is None)):\n", - "\n", - " if(self.creation_op == \"add\"):\n", - " self.creators[0].backward(self.grad, self)\n", - " self.creators[1].backward(self.grad, self)\n", - " \n", - " if(self.creation_op == \"sub\"):\n", - " self.creators[0].backward(Tensor(self.grad.data), self)\n", - " self.creators[1].backward(Tensor(self.grad.__neg__().data), self)\n", - "\n", - " if(self.creation_op == \"mul\"):\n", - " new = self.grad * self.creators[1]\n", - " self.creators[0].backward(new , self)\n", - " new = self.grad * self.creators[0]\n", - " self.creators[1].backward(new, self) \n", - " \n", - " if(self.creation_op == \"mm\"):\n", - " c0 = self.creators[0]\n", - " c1 = self.creators[1]\n", - " new = self.grad.mm(c1.transpose())\n", - " c0.backward(new)\n", - " new = self.grad.transpose().mm(c0).transpose()\n", - " c1.backward(new)\n", - " \n", - " if(self.creation_op == \"transpose\"):\n", - " self.creators[0].backward(self.grad.transpose())\n", - "\n", - " if(\"sum\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.expand(dim,\n", - " self.creators[0].data.shape[dim]))\n", - "\n", - " if(\"expand\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.sum(dim))\n", - " \n", - " if(self.creation_op == \"neg\"):\n", - " self.creators[0].backward(self.grad.__neg__())\n", - " \n", - " if(self.creation_op == \"sigmoid\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (self * (ones - self)))\n", - " \n", - " if(self.creation_op == \"tanh\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (ones - (self * self)))\n", - " \n", - " if(self.creation_op == \"index_select\"):\n", - " new_grad = np.zeros_like(self.creators[0].data)\n", - " indices_ = self.index_select_indices.data.flatten()\n", - " grad_ = grad.data.reshape(len(indices_), -1)\n", - " for i in range(len(indices_)):\n", - " new_grad[indices_[i]] += grad_[i]\n", - " self.creators[0].backward(Tensor(new_grad))\n", - " \n", - " if(self.creation_op == \"cross_entropy\"):\n", - " dx = self.softmax_output - self.target_dist\n", - " self.creators[0].backward(Tensor(dx))\n", - " \n", - " def __add__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data + other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"add\")\n", - " return Tensor(self.data + other.data)\n", - "\n", - " def __neg__(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data * -1,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"neg\")\n", - " return Tensor(self.data * -1)\n", - " \n", - " def __sub__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data - other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"sub\")\n", - " return Tensor(self.data - other.data)\n", - " \n", - " def __mul__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data * other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"mul\")\n", - " return Tensor(self.data * other.data) \n", - "\n", - " def sum(self, dim):\n", - " if(self.autograd):\n", - " return Tensor(self.data.sum(dim),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sum_\"+str(dim))\n", - " return Tensor(self.data.sum(dim))\n", - " \n", - " def expand(self, dim,copies):\n", - "\n", - " trans_cmd = list(range(0,len(self.data.shape)))\n", - " trans_cmd.insert(dim,len(self.data.shape))\n", - " new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)\n", - " \n", - " if(self.autograd):\n", - " return Tensor(new_data,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"expand_\"+str(dim))\n", - " return Tensor(new_data)\n", - " \n", - " def transpose(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data.transpose(),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"transpose\")\n", - " \n", - " return Tensor(self.data.transpose())\n", - " \n", - " def mm(self, x):\n", - " if(self.autograd):\n", - " return Tensor(self.data.dot(x.data),\n", - " autograd=True,\n", - " creators=[self,x],\n", - " creation_op=\"mm\")\n", - " return Tensor(self.data.dot(x.data))\n", - " \n", - " def sigmoid(self):\n", - " if(self.autograd):\n", - " return Tensor(1 / (1 + np.exp(-self.data)),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sigmoid\")\n", - " return Tensor(1 / (1 + np.exp(-self.data)))\n", - "\n", - " def tanh(self):\n", - " if(self.autograd):\n", - " return Tensor(np.tanh(self.data),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"tanh\")\n", - " return Tensor(np.tanh(self.data))\n", - " \n", - " def index_select(self, indices):\n", - "\n", - " if(self.autograd):\n", - " new = Tensor(self.data[indices.data],\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"index_select\")\n", - " new.index_select_indices = indices\n", - " return new\n", - " return Tensor(self.data[indices.data])\n", - " \n", - " def softmax(self):\n", - " temp = np.exp(self.data)\n", - " softmax_output = temp / np.sum(temp,\n", - " axis=len(self.data.shape)-1,\n", - " keepdims=True)\n", - " return softmax_output\n", - " \n", - " def cross_entropy(self, target_indices):\n", - "\n", - " temp = np.exp(self.data)\n", - " softmax_output = temp / np.sum(temp,\n", - " axis=len(self.data.shape)-1,\n", - " keepdims=True)\n", - " \n", - " t = target_indices.data.flatten()\n", - " p = softmax_output.reshape(len(t),-1)\n", - " target_dist = np.eye(p.shape[1])[t]\n", - " loss = -(np.log(p) * (target_dist)).sum(1).mean()\n", - " \n", - " if(self.autograd):\n", - " out = Tensor(loss,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"cross_entropy\")\n", - " out.softmax_output = softmax_output\n", - " out.target_dist = target_dist\n", - " return out\n", - "\n", - " return Tensor(loss)\n", - " \n", - " \n", - " def __repr__(self):\n", - " return str(self.data.__repr__())\n", - " \n", - " def __str__(self):\n", - " return str(self.data.__str__()) \n", - "\n", - "class Layer(object):\n", - " \n", - " def __init__(self):\n", - " self.parameters = list()\n", - " \n", - " def get_parameters(self):\n", - " return self.parameters\n", - "\n", - " \n", - "class SGD(object):\n", - " \n", - " def __init__(self, parameters, alpha=0.1):\n", - " self.parameters = parameters\n", - " self.alpha = alpha\n", - " \n", - " def zero(self):\n", - " for p in self.parameters:\n", - " p.grad.data *= 0\n", - " \n", - " def step(self, zero=True):\n", - " \n", - " for p in self.parameters:\n", - " \n", - " p.data -= p.grad.data * self.alpha\n", - " \n", - " if(zero):\n", - " p.grad.data *= 0\n", - "\n", - "\n", - "class Linear(Layer):\n", - "\n", - " def __init__(self, n_inputs, n_outputs, bias=True):\n", - " super().__init__()\n", - " \n", - " self.use_bias = bias\n", - " \n", - " W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0/(n_inputs))\n", - " self.weight = Tensor(W, autograd=True)\n", - " if(self.use_bias):\n", - " self.bias = Tensor(np.zeros(n_outputs), autograd=True)\n", - " \n", - " self.parameters.append(self.weight)\n", - " \n", - " if(self.use_bias): \n", - " self.parameters.append(self.bias)\n", - "\n", - " def forward(self, input):\n", - " if(self.use_bias):\n", - " return input.mm(self.weight)+self.bias.expand(0,len(input.data))\n", - " return input.mm(self.weight)\n", - "\n", - "\n", - "class Sequential(Layer):\n", - " \n", - " def __init__(self, layers=list()):\n", - " super().__init__()\n", - " \n", - " self.layers = layers\n", - " \n", - " def add(self, layer):\n", - " self.layers.append(layer)\n", - " \n", - " def forward(self, input):\n", - " for layer in self.layers:\n", - " input = layer.forward(input)\n", - " return input\n", - " \n", - " def get_parameters(self):\n", - " params = list()\n", - " for l in self.layers:\n", - " params += l.get_parameters()\n", - " return params\n", - "\n", - "\n", - "class Embedding(Layer):\n", - " \n", - " def __init__(self, vocab_size, dim):\n", - " super().__init__()\n", - " \n", - " self.vocab_size = vocab_size\n", - " self.dim = dim\n", - " \n", - " # this random initialiation style is just a convention from word2vec\n", - " self.weight = Tensor((np.random.rand(vocab_size, dim) - 0.5) / dim, autograd=True)\n", - " \n", - " self.parameters.append(self.weight)\n", - " \n", - " def forward(self, input):\n", - " return self.weight.index_select(input)\n", - "\n", - "\n", - "class Tanh(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.tanh()\n", - "\n", - "\n", - "class Sigmoid(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.sigmoid()\n", - " \n", - "\n", - "class CrossEntropyLoss(object):\n", - " \n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input, target):\n", - " return input.cross_entropy(target)\n", - "\n", - " \n", - "class RNNCell(Layer):\n", - " \n", - " def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):\n", - " super().__init__()\n", - "\n", - " self.n_inputs = n_inputs\n", - " self.n_hidden = n_hidden\n", - " self.n_output = n_output\n", - " \n", - " if(activation == 'sigmoid'):\n", - " self.activation = Sigmoid()\n", - " elif(activation == 'tanh'):\n", - " self.activation == Tanh()\n", - " else:\n", - " raise Exception(\"Non-linearity not found\")\n", - "\n", - " self.w_ih = Linear(n_inputs, n_hidden)\n", - " self.w_hh = Linear(n_hidden, n_hidden)\n", - " self.w_ho = Linear(n_hidden, n_output)\n", - " \n", - " self.parameters += self.w_ih.get_parameters()\n", - " self.parameters += self.w_hh.get_parameters()\n", - " self.parameters += self.w_ho.get_parameters() \n", - " \n", - " def forward(self, input, hidden):\n", - " from_prev_hidden = self.w_hh.forward(hidden)\n", - " combined = self.w_ih.forward(input) + from_prev_hidden\n", - " new_hidden = self.activation.forward(combined)\n", - " output = self.w_ho.forward(new_hidden)\n", - " return output, new_hidden\n", - " \n", - " def init_hidden(self, batch_size=1):\n", - " return Tensor(np.zeros((batch_size,self.n_hidden)), autograd=True)\n", - " \n", - "class LSTMCell(Layer):\n", - " \n", - " def __init__(self, n_inputs, n_hidden, n_output):\n", - " super().__init__()\n", - "\n", - " self.n_inputs = n_inputs\n", - " self.n_hidden = n_hidden\n", - " self.n_output = n_output\n", - "\n", - " self.xf = Linear(n_inputs, n_hidden)\n", - " self.xi = Linear(n_inputs, n_hidden)\n", - " self.xo = Linear(n_inputs, n_hidden) \n", - " self.xc = Linear(n_inputs, n_hidden) \n", - " \n", - " self.hf = Linear(n_hidden, n_hidden, bias=False)\n", - " self.hi = Linear(n_hidden, n_hidden, bias=False)\n", - " self.ho = Linear(n_hidden, n_hidden, bias=False)\n", - " self.hc = Linear(n_hidden, n_hidden, bias=False) \n", - " \n", - " self.w_ho = Linear(n_hidden, n_output, bias=False)\n", - " \n", - " self.parameters += self.xf.get_parameters()\n", - " self.parameters += self.xi.get_parameters()\n", - " self.parameters += self.xo.get_parameters()\n", - " self.parameters += self.xc.get_parameters()\n", - "\n", - " self.parameters += self.hf.get_parameters()\n", - " self.parameters += self.hi.get_parameters() \n", - " self.parameters += self.ho.get_parameters() \n", - " self.parameters += self.hc.get_parameters() \n", - " \n", - " self.parameters += self.w_ho.get_parameters() \n", - " \n", - " def forward(self, input, hidden):\n", - " \n", - " prev_hidden = hidden[0] \n", - " prev_cell = hidden[1]\n", - " \n", - " f = (self.xf.forward(input) + self.hf.forward(prev_hidden)).sigmoid()\n", - " i = (self.xi.forward(input) + self.hi.forward(prev_hidden)).sigmoid()\n", - " o = (self.xo.forward(input) + self.ho.forward(prev_hidden)).sigmoid() \n", - " g = (self.xc.forward(input) + self.hc.forward(prev_hidden)).tanh() \n", - " c = (f * prev_cell) + (i * g)\n", - "\n", - " h = o * c.tanh()\n", - " \n", - " output = self.w_ho.forward(h)\n", - " return output, (h, c)\n", - " \n", - " def init_hidden(self, batch_size=1):\n", - " init_hidden = Tensor(np.zeros((batch_size,self.n_hidden)), autograd=True)\n", - " init_cell = Tensor(np.zeros((batch_size,self.n_hidden)), autograd=True)\n", - " init_hidden.data[:,0] += 1\n", - " init_cell.data[:,0] += 1\n", - " return (init_hidden, init_cell)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 1: RNN Character Language Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "import sys,random,math\n", - "from collections import Counter\n", - "import numpy as np\n", - "import sys\n", - "\n", - "np.random.seed(0)\n", - "\n", - "# dataset from http://karpathy.github.io/2015/05/21/rnn-effectiveness/\n", - "f = open('shakespear.txt','r')\n", - "raw = f.read()\n", - "f.close()\n", - "\n", - "vocab = list(set(raw))\n", - "word2index = {}\n", - "for i,word in enumerate(vocab):\n", - " word2index[word]=i\n", - "indices = np.array(list(map(lambda x:word2index[x], raw)))\n", - "\n", - "embed = Embedding(vocab_size=len(vocab),dim=512)\n", - "model = LSTMCell(n_inputs=512, n_hidden=512, n_output=len(vocab))\n", - "model.w_ho.weight.data *= 0\n", - "\n", - "criterion = CrossEntropyLoss()\n", - "optim = SGD(parameters=model.get_parameters() + embed.get_parameters(), alpha=0.05)\n", - "\n", - "def generate_sample(n=30, init_char=' '):\n", - " s = \"\"\n", - " hidden = model.init_hidden(batch_size=1)\n", - " input = Tensor(np.array([word2index[init_char]]))\n", - " for i in range(n):\n", - " rnn_input = embed.forward(input)\n", - " output, hidden = model.forward(input=rnn_input, hidden=hidden)\n", - "# output.data *= 25\n", - "# temp_dist = output.softmax()\n", - "# temp_dist /= temp_dist.sum()\n", - "\n", - "# m = (temp_dist > np.random.rand()).argmax()\n", - " m = output.data.argmax()\n", - " c = vocab[m]\n", - " input = Tensor(np.array([m]))\n", - " s += c\n", - " return s\n", - "\n", - "batch_size = 16\n", - "bptt = 25\n", - "n_batches = int((indices.shape[0] / (batch_size)))\n", - "\n", - "trimmed_indices = indices[:n_batches*batch_size]\n", - "batched_indices = trimmed_indices.reshape(batch_size, n_batches).transpose()\n", - "\n", - "input_batched_indices = batched_indices[0:-1]\n", - "target_batched_indices = batched_indices[1:]\n", - "\n", - "n_bptt = int(((n_batches-1) / bptt))\n", - "input_batches = input_batched_indices[:n_bptt*bptt].reshape(n_bptt,bptt,batch_size)\n", - "target_batches = target_batched_indices[:n_bptt*bptt].reshape(n_bptt, bptt, batch_size)\n", - "min_loss = 1000" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def train(iterations=400):\n", - " for iter in range(iterations):\n", - " total_loss = 0\n", - " n_loss = 0\n", - "\n", - " hidden = model.init_hidden(batch_size=batch_size)\n", - " batches_to_train = len(input_batches)\n", - " # batches_to_train = 32\n", - " for batch_i in range(batches_to_train):\n", - "\n", - " hidden = (Tensor(hidden[0].data, autograd=True), Tensor(hidden[1].data, autograd=True))\n", - "\n", - " losses = list()\n", - " for t in range(bptt):\n", - " input = Tensor(input_batches[batch_i][t], autograd=True)\n", - " rnn_input = embed.forward(input=input)\n", - " output, hidden = model.forward(input=rnn_input, hidden=hidden)\n", - "\n", - " target = Tensor(target_batches[batch_i][t], autograd=True) \n", - " batch_loss = criterion.forward(output, target)\n", - "\n", - " if(t == 0):\n", - " losses.append(batch_loss)\n", - " else:\n", - " losses.append(batch_loss + losses[-1])\n", - "\n", - " loss = losses[-1]\n", - "\n", - " loss.backward()\n", - " optim.step()\n", - " total_loss += loss.data / bptt\n", - "\n", - " epoch_loss = np.exp(total_loss / (batch_i+1))\n", - " if(epoch_loss < min_loss):\n", - " min_loss = epoch_loss\n", - " print()\n", - "\n", - " log = \"\\r Iter:\" + str(iter)\n", - " log += \" - Alpha:\" + str(optim.alpha)[0:5]\n", - " log += \" - Batch \"+str(batch_i+1)+\"/\"+str(len(input_batches))\n", - " log += \" - Min Loss:\" + str(min_loss)[0:5]\n", - " log += \" - Loss:\" + str(epoch_loss)\n", - " if(batch_i == 0):\n", - " log += \" - \" + generate_sample(n=70, init_char='T').replace(\"\\n\",\" \")\n", - " if(batch_i % 1 == 0):\n", - " sys.stdout.write(log)\n", - " optim.alpha *= 0.99\n", - " # print()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " Iter:0 - Alpha:0.05 - Batch 1/249 - Min Loss:62.00 - Loss:62.000000000000064 - eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee\n", - " Iter:0 - Alpha:0.05 - Batch 2/249 - Min Loss:61.99 - Loss:61.999336055802885\n", - " Iter:0 - Alpha:0.05 - Batch 3/249 - Min Loss:61.98 - Loss:61.989983546689196\n", - " Iter:0 - Alpha:0.05 - Batch 4/249 - Min Loss:61.97 - Loss:61.972948235165255\n", - " Iter:0 - Alpha:0.05 - Batch 5/249 - Min Loss:61.94 - Loss:61.941383549495384\n", - " Iter:0 - Alpha:0.05 - Batch 6/249 - Min Loss:61.88 - Loss:61.88023671827271\n", - " Iter:0 - Alpha:0.05 - Batch 7/249 - Min Loss:61.77 - Loss:61.77690827437837\n", - " Iter:0 - Alpha:0.05 - Batch 8/249 - Min Loss:61.52 - Loss:61.52953899883961\n", - " Iter:0 - Alpha:0.05 - Batch 9/249 - Min Loss:61.00 - Loss:61.00486153547285\n", - " Iter:0 - Alpha:0.05 - Batch 10/249 - Min Loss:60.23 - Loss:60.236912186726684\n", - " Iter:0 - Alpha:0.05 - Batch 11/249 - Min Loss:58.70 - Loss:58.7055559369767\n", - " Iter:0 - Alpha:0.05 - Batch 12/249 - Min Loss:56.73 - Loss:56.73775220158473\n", - " Iter:0 - Alpha:0.05 - Batch 13/249 - Min Loss:54.10 - Loss:54.10996106485584\n", - " Iter:0 - Alpha:0.05 - Batch 14/249 - Min Loss:52.75 - Loss:52.75637293050057\n", - " Iter:0 - Alpha:0.05 - Batch 15/249 - Min Loss:51.07 - Loss:51.078681882080105\n", - " Iter:0 - Alpha:0.05 - Batch 16/249 - Min Loss:49.37 - Loss:49.37743406427449\n", - " Iter:0 - Alpha:0.05 - Batch 17/249 - Min Loss:47.81 - Loss:47.81006661764188\n", - " Iter:0 - Alpha:0.05 - Batch 18/249 - Min Loss:46.68 - Loss:46.68131330399904\n", - " Iter:0 - Alpha:0.05 - Batch 19/249 - Min Loss:45.76 - Loss:45.76135529411921\n", - " Iter:0 - Alpha:0.05 - Batch 20/249 - Min Loss:44.63 - Loss:44.63742967139992\n", - " Iter:0 - Alpha:0.05 - Batch 21/249 - Min Loss:43.43 - Loss:43.43315342999167\n", - " Iter:0 - Alpha:0.05 - Batch 22/249 - Min Loss:43.13 - Loss:43.133727315170454\n", - " Iter:0 - Alpha:0.05 - Batch 23/249 - Min Loss:43.08 - Loss:43.08924458053491\n", - " Iter:0 - Alpha:0.05 - Batch 24/249 - Min Loss:42.48 - Loss:42.48625785761426\n", - " Iter:0 - Alpha:0.05 - Batch 25/249 - Min Loss:41.59 - Loss:41.59564764008973\n", - " Iter:0 - Alpha:0.05 - Batch 26/249 - Min Loss:40.64 - Loss:40.64633262212879\n", - " Iter:0 - Alpha:0.05 - Batch 27/249 - Min Loss:40.08 - Loss:40.08437857978491\n", - " Iter:0 - Alpha:0.05 - Batch 28/249 - Min Loss:39.38 - Loss:39.38197568983813\n", - " Iter:0 - Alpha:0.05 - Batch 29/249 - Min Loss:38.85 - Loss:38.85036603038319\n", - " Iter:0 - Alpha:0.05 - Batch 30/249 - Min Loss:38.32 - Loss:38.32050246588233\n", - " Iter:0 - Alpha:0.05 - Batch 31/249 - Min Loss:38.02 - Loss:38.028742643067304\n", - " Iter:0 - Alpha:0.05 - Batch 32/249 - Min Loss:37.57 - Loss:37.579230715808585\n", - " Iter:0 - Alpha:0.05 - Batch 33/249 - Min Loss:37.15 - Loss:37.1513332533316\n", - " Iter:0 - Alpha:0.05 - Batch 34/249 - Min Loss:36.72 - Loss:36.72716819545398\n", - " Iter:0 - Alpha:0.05 - Batch 35/249 - Min Loss:36.50 - Loss:36.505523013835905\n", - " Iter:0 - Alpha:0.05 - Batch 36/249 - Min Loss:36.26 - Loss:36.264791172196766\n", - " Iter:0 - Alpha:0.05 - Batch 37/249 - Min Loss:35.93 - Loss:35.93241785924657\n", - " Iter:0 - Alpha:0.05 - Batch 39/249 - Min Loss:35.69 - Loss:35.69162009265761\n", - " Iter:0 - Alpha:0.05 - Batch 40/249 - Min Loss:35.39 - Loss:35.391766263709975\n", - " Iter:0 - Alpha:0.05 - Batch 41/249 - Min Loss:35.24 - Loss:35.24024995428248\n", - " Iter:0 - Alpha:0.05 - Batch 42/249 - Min Loss:35.16 - Loss:35.16636943540858\n", - " Iter:0 - Alpha:0.05 - Batch 43/249 - Min Loss:34.82 - Loss:34.82115954562641\n", - " Iter:0 - Alpha:0.05 - Batch 44/249 - Min Loss:34.60 - Loss:34.60065020684661\n", - " Iter:0 - Alpha:0.05 - Batch 45/249 - Min Loss:34.54 - Loss:34.549754104397785\n", - " Iter:0 - Alpha:0.05 - Batch 46/249 - Min Loss:34.32 - Loss:34.32999305867251\n", - " Iter:0 - Alpha:0.05 - Batch 47/249 - Min Loss:34.11 - Loss:34.117257032452486\n", - " Iter:0 - Alpha:0.05 - Batch 48/249 - Min Loss:33.90 - Loss:33.90487349892798\n", - " Iter:0 - Alpha:0.05 - Batch 49/249 - Min Loss:33.75 - Loss:33.75966234624244\n", - " Iter:0 - Alpha:0.05 - Batch 50/249 - Min Loss:33.61 - Loss:33.61016131907992\n", - " Iter:0 - Alpha:0.05 - Batch 51/249 - Min Loss:33.31 - Loss:33.31167842936299\n", - " Iter:0 - Alpha:0.05 - Batch 52/249 - Min Loss:33.11 - Loss:33.11904817623289\n", - " Iter:0 - Alpha:0.05 - Batch 53/249 - Min Loss:33.00 - Loss:33.004345599234625\n", - " Iter:0 - Alpha:0.05 - Batch 54/249 - Min Loss:32.81 - Loss:32.817426265009786\n", - " Iter:0 - Alpha:0.05 - Batch 55/249 - Min Loss:32.60 - Loss:32.60854105028041\n", - " Iter:0 - Alpha:0.05 - Batch 56/249 - Min Loss:32.42 - Loss:32.42050431188535\n", - " Iter:0 - Alpha:0.05 - Batch 57/249 - Min Loss:32.19 - Loss:32.198334721563576\n", - " Iter:0 - Alpha:0.05 - Batch 58/249 - Min Loss:32.02 - Loss:32.027256645802886\n", - " Iter:0 - Alpha:0.05 - Batch 59/249 - Min Loss:31.81 - Loss:31.818082530542316\n", - " Iter:0 - Alpha:0.05 - Batch 60/249 - Min Loss:31.63 - Loss:31.631415472551268\n", - " Iter:0 - Alpha:0.05 - Batch 61/249 - Min Loss:31.39 - Loss:31.393959746851287\n", - " Iter:0 - Alpha:0.05 - Batch 62/249 - Min Loss:31.19 - Loss:31.19942305047541\n", - " Iter:0 - Alpha:0.05 - Batch 63/249 - Min Loss:30.95 - Loss:30.95343987653838\n", - " Iter:0 - Alpha:0.05 - Batch 64/249 - Min Loss:30.74 - Loss:30.7489265136333\n", - " Iter:0 - Alpha:0.05 - Batch 65/249 - Min Loss:30.66 - Loss:30.665132078887083\n", - " Iter:0 - Alpha:0.05 - Batch 66/249 - Min Loss:30.60 - Loss:30.605053974596405\n", - " Iter:0 - Alpha:0.05 - Batch 67/249 - Min Loss:30.45 - Loss:30.456479778353064\n", - " Iter:0 - Alpha:0.05 - Batch 68/249 - Min Loss:30.24 - Loss:30.241772045820696\n", - " Iter:0 - Alpha:0.05 - Batch 69/249 - Min Loss:30.14 - Loss:30.14494883095973\n", - " Iter:0 - Alpha:0.05 - Batch 70/249 - Min Loss:30.00 - Loss:30.004543678506863\n", - " Iter:0 - Alpha:0.05 - Batch 71/249 - Min Loss:29.89 - Loss:29.899002156894124\n", - " Iter:0 - Alpha:0.05 - Batch 72/249 - Min Loss:29.85 - Loss:29.85125401656389\n", - " Iter:0 - Alpha:0.05 - Batch 73/249 - Min Loss:29.74 - Loss:29.742558662511755\n", - " Iter:0 - Alpha:0.05 - Batch 74/249 - Min Loss:29.55 - Loss:29.554013392531395\n", - " Iter:0 - Alpha:0.05 - Batch 75/249 - Min Loss:29.41 - Loss:29.413451221065877\n", - " Iter:0 - Alpha:0.05 - Batch 76/249 - Min Loss:29.30 - Loss:29.300279999145584\n", - " Iter:0 - Alpha:0.05 - Batch 77/249 - Min Loss:29.16 - Loss:29.162816380218032\n", - " Iter:0 - Alpha:0.05 - Batch 78/249 - Min Loss:29.09 - Loss:29.09592033129947\n", - " Iter:0 - Alpha:0.05 - Batch 79/249 - Min Loss:28.96 - Loss:28.969066622778954\n", - " Iter:0 - Alpha:0.05 - Batch 80/249 - Min Loss:28.84 - Loss:28.847024497772598\n", - " Iter:0 - Alpha:0.05 - Batch 81/249 - Min Loss:28.68 - Loss:28.682792440948468\n", - " Iter:0 - Alpha:0.05 - Batch 82/249 - Min Loss:28.59 - Loss:28.598158009843733\n", - " Iter:0 - Alpha:0.05 - Batch 86/249 - Min Loss:28.54 - Loss:28.672844929166207\n", - " Iter:0 - Alpha:0.05 - Batch 87/249 - Min Loss:28.50 - Loss:28.5084865620548\n", - " Iter:0 - Alpha:0.05 - Batch 88/249 - Min Loss:28.35 - Loss:28.352102311265327\n", - " Iter:0 - Alpha:0.05 - Batch 89/249 - Min Loss:28.19 - Loss:28.192980003869685\n", - " Iter:0 - Alpha:0.05 - Batch 90/249 - Min Loss:28.05 - Loss:28.052399362314123\n", - " Iter:0 - Alpha:0.05 - Batch 91/249 - Min Loss:27.92 - Loss:27.928541406577803\n", - " Iter:0 - Alpha:0.05 - Batch 92/249 - Min Loss:27.82 - Loss:27.826934205543306\n", - " Iter:0 - Alpha:0.05 - Batch 93/249 - Min Loss:27.67 - Loss:27.67999675592469\n", - " Iter:0 - Alpha:0.05 - Batch 94/249 - Min Loss:27.54 - Loss:27.549420657967516\n", - " Iter:0 - Alpha:0.05 - Batch 95/249 - Min Loss:27.40 - Loss:27.40841828220945\n", - " Iter:0 - Alpha:0.05 - Batch 96/249 - Min Loss:27.31 - Loss:27.318381171485257\n", - " Iter:0 - Alpha:0.05 - Batch 97/249 - Min Loss:27.19 - Loss:27.199553368220013\n", - " Iter:0 - Alpha:0.05 - Batch 98/249 - Min Loss:27.08 - Loss:27.085005756069428\n", - " Iter:0 - Alpha:0.05 - Batch 99/249 - Min Loss:26.92 - Loss:26.920652799695258\n", - " Iter:0 - Alpha:0.05 - Batch 100/249 - Min Loss:26.77 - Loss:26.778338171170603\n", - " Iter:0 - Alpha:0.05 - Batch 101/249 - Min Loss:26.63 - Loss:26.63444492542303\n", - " Iter:0 - Alpha:0.05 - Batch 102/249 - Min Loss:26.50 - Loss:26.503214805264342\n", - " Iter:0 - Alpha:0.05 - Batch 103/249 - Min Loss:26.38 - Loss:26.38627303021265\n", - " Iter:0 - Alpha:0.05 - Batch 104/249 - Min Loss:26.28 - Loss:26.28984072160501\n", - " Iter:0 - Alpha:0.05 - Batch 105/249 - Min Loss:26.19 - Loss:26.197356511158755\n", - " Iter:0 - Alpha:0.05 - Batch 106/249 - Min Loss:26.11 - Loss:26.110568826540085\n", - " Iter:0 - Alpha:0.05 - Batch 107/249 - Min Loss:26.08 - Loss:26.08702025964796\n", - " Iter:0 - Alpha:0.05 - Batch 108/249 - Min Loss:25.98 - Loss:25.98179026707607\n", - " Iter:0 - Alpha:0.05 - Batch 109/249 - Min Loss:25.89 - Loss:25.891771314535525\n", - " Iter:0 - Alpha:0.05 - Batch 110/249 - Min Loss:25.80 - Loss:25.802742883596974\n", - " Iter:0 - Alpha:0.05 - Batch 111/249 - Min Loss:25.71 - Loss:25.717937269876025\n", - " Iter:0 - Alpha:0.05 - Batch 112/249 - Min Loss:25.64 - Loss:25.641765977541738\n", - " Iter:0 - Alpha:0.05 - Batch 113/249 - Min Loss:25.57 - Loss:25.57325789739905\n", - " Iter:0 - Alpha:0.05 - Batch 114/249 - Min Loss:25.49 - Loss:25.499436282313912\n", - " Iter:0 - Alpha:0.05 - Batch 115/249 - Min Loss:25.44 - Loss:25.442407943785195\n", - " Iter:0 - Alpha:0.05 - Batch 116/249 - Min Loss:25.35 - Loss:25.35542832928056\n", - " Iter:0 - Alpha:0.05 - Batch 117/249 - Min Loss:25.27 - Loss:25.276678269744394\n", - " Iter:0 - Alpha:0.05 - Batch 118/249 - Min Loss:25.17 - Loss:25.175983940560986\n", - " Iter:0 - Alpha:0.05 - Batch 119/249 - Min Loss:25.10 - Loss:25.10534037752\n", - " Iter:0 - Alpha:0.05 - Batch 120/249 - Min Loss:25.01 - Loss:25.010115025991002\n", - " Iter:0 - Alpha:0.05 - Batch 121/249 - Min Loss:24.92 - Loss:24.920317590856904\n", - " Iter:0 - Alpha:0.05 - Batch 122/249 - Min Loss:24.82 - Loss:24.822277409798907\n", - " Iter:0 - Alpha:0.05 - Batch 123/249 - Min Loss:24.72 - Loss:24.72203946090018\n", - " Iter:0 - Alpha:0.05 - Batch 124/249 - Min Loss:24.62 - Loss:24.626004292971295\n", - " Iter:0 - Alpha:0.05 - Batch 125/249 - Min Loss:24.54 - Loss:24.548408835881084\n", - " Iter:0 - Alpha:0.05 - Batch 126/249 - Min Loss:24.48 - Loss:24.481874816478964\n", - " Iter:0 - Alpha:0.05 - Batch 127/249 - Min Loss:24.39 - Loss:24.397815037786064\n", - " Iter:0 - Alpha:0.05 - Batch 128/249 - Min Loss:24.29 - Loss:24.29141790917974\n", - " Iter:0 - Alpha:0.05 - Batch 129/249 - Min Loss:24.19 - Loss:24.19678204070675\n", - " Iter:0 - Alpha:0.05 - Batch 130/249 - Min Loss:24.12 - Loss:24.126267171636325\n", - " Iter:0 - Alpha:0.05 - Batch 131/249 - Min Loss:24.03 - Loss:24.031656923161755\n", - " Iter:0 - Alpha:0.05 - Batch 132/249 - Min Loss:23.93 - Loss:23.93408720034178\n", - " Iter:0 - Alpha:0.05 - Batch 133/249 - Min Loss:23.85 - Loss:23.850310797547163\n", - " Iter:0 - Alpha:0.05 - Batch 134/249 - Min Loss:23.76 - Loss:23.76517754738187\n", - " Iter:0 - Alpha:0.05 - Batch 135/249 - Min Loss:23.71 - Loss:23.715494372742555\n", - " Iter:0 - Alpha:0.05 - Batch 136/249 - Min Loss:23.70 - Loss:23.701413122242627\n", - " Iter:0 - Alpha:0.05 - Batch 137/249 - Min Loss:23.62 - Loss:23.628566884474214\n", - " Iter:0 - Alpha:0.05 - Batch 138/249 - Min Loss:23.53 - Loss:23.536622255870185\n", - " Iter:0 - Alpha:0.05 - Batch 140/249 - Min Loss:23.51 - Loss:23.553200160956592\n", - " Iter:0 - Alpha:0.05 - Batch 141/249 - Min Loss:23.49 - Loss:23.4954391983157\n", - " Iter:0 - Alpha:0.05 - Batch 142/249 - Min Loss:23.40 - Loss:23.407151066893466\n", - " Iter:0 - Alpha:0.05 - Batch 143/249 - Min Loss:23.32 - Loss:23.322406347920353\n", - " Iter:0 - Alpha:0.05 - Batch 144/249 - Min Loss:23.23 - Loss:23.23392038507518\n", - " Iter:0 - Alpha:0.05 - Batch 145/249 - Min Loss:23.15 - Loss:23.158962886202442\n", - " Iter:0 - Alpha:0.05 - Batch 146/249 - Min Loss:23.13 - Loss:23.13027389905198\n", - " Iter:0 - Alpha:0.05 - Batch 147/249 - Min Loss:23.08 - Loss:23.08432984727065\n", - " Iter:0 - Alpha:0.05 - Batch 148/249 - Min Loss:23.05 - Loss:23.052812278291672\n", - " Iter:0 - Alpha:0.05 - Batch 149/249 - Min Loss:22.97 - Loss:22.977617993036645\n", - " Iter:0 - Alpha:0.05 - Batch 150/249 - Min Loss:22.90 - Loss:22.90860982682122\n", - " Iter:0 - Alpha:0.05 - Batch 151/249 - Min Loss:22.86 - Loss:22.86285029247915\n", - " Iter:0 - Alpha:0.05 - Batch 152/249 - Min Loss:22.79 - Loss:22.798324855724506\n", - " Iter:0 - Alpha:0.05 - Batch 153/249 - Min Loss:22.71 - Loss:22.714156980919036\n", - " Iter:0 - Alpha:0.05 - Batch 154/249 - Min Loss:22.64 - Loss:22.649942295215556\n", - " Iter:0 - Alpha:0.05 - Batch 155/249 - Min Loss:22.60 - Loss:22.60987787211344\n", - " Iter:0 - Alpha:0.05 - Batch 156/249 - Min Loss:22.58 - Loss:22.586330875896408\n", - " Iter:0 - Alpha:0.05 - Batch 157/249 - Min Loss:22.53 - Loss:22.539866221935924\n", - " Iter:0 - Alpha:0.05 - Batch 158/249 - Min Loss:22.48 - Loss:22.481459540414978\n", - " Iter:0 - Alpha:0.05 - Batch 159/249 - Min Loss:22.44 - Loss:22.44458654448001\n", - " Iter:0 - Alpha:0.05 - Batch 160/249 - Min Loss:22.38 - Loss:22.383503615633796\n", - " Iter:0 - Alpha:0.05 - Batch 161/249 - Min Loss:22.31 - Loss:22.317076639885897\n", - " Iter:0 - Alpha:0.05 - Batch 162/249 - Min Loss:22.25 - Loss:22.259430027034902\n", - " Iter:0 - Alpha:0.05 - Batch 163/249 - Min Loss:22.19 - Loss:22.19904509067437\n", - " Iter:0 - Alpha:0.05 - Batch 164/249 - Min Loss:22.14 - Loss:22.148098444656362\n", - " Iter:0 - Alpha:0.05 - Batch 165/249 - Min Loss:22.10 - Loss:22.101528454497767\n", - " Iter:0 - Alpha:0.05 - Batch 166/249 - Min Loss:22.04 - Loss:22.04778889335714\n", - " Iter:0 - Alpha:0.05 - Batch 167/249 - Min Loss:22.01 - Loss:22.015539256206182\n", - " Iter:0 - Alpha:0.05 - Batch 168/249 - Min Loss:21.97 - Loss:21.97610738320577\n", - " Iter:0 - Alpha:0.05 - Batch 169/249 - Min Loss:21.91 - Loss:21.91886403464624\n", - " Iter:0 - Alpha:0.05 - Batch 170/249 - Min Loss:21.87 - Loss:21.87209957981593\n", - " Iter:0 - Alpha:0.05 - Batch 171/249 - Min Loss:21.83 - Loss:21.83197231706187\n", - " Iter:0 - Alpha:0.05 - Batch 172/249 - Min Loss:21.79 - Loss:21.797138291435964\n", - " Iter:0 - Alpha:0.05 - Batch 173/249 - Min Loss:21.75 - Loss:21.752929637839053\n", - " Iter:0 - Alpha:0.05 - Batch 174/249 - Min Loss:21.71 - Loss:21.71451452173225\n", - " Iter:0 - Alpha:0.05 - Batch 175/249 - Min Loss:21.68 - Loss:21.68635181976672\n", - " Iter:0 - Alpha:0.05 - Batch 176/249 - Min Loss:21.64 - Loss:21.643368363210932\n", - " Iter:0 - Alpha:0.05 - Batch 177/249 - Min Loss:21.59 - Loss:21.59547234826887\n", - " Iter:0 - Alpha:0.05 - Batch 178/249 - Min Loss:21.54 - Loss:21.548969384961\n", - " Iter:0 - Alpha:0.05 - Batch 179/249 - Min Loss:21.52 - Loss:21.528790503616882\n", - " Iter:0 - Alpha:0.05 - Batch 180/249 - Min Loss:21.48 - Loss:21.484217083500187\n", - " Iter:0 - Alpha:0.05 - Batch 181/249 - Min Loss:21.43 - Loss:21.43585185733848\n", - " Iter:0 - Alpha:0.05 - Batch 182/249 - Min Loss:21.38 - Loss:21.386494316884\n", - " Iter:0 - Alpha:0.05 - Batch 183/249 - Min Loss:21.33 - Loss:21.333293821976987\n", - " Iter:0 - Alpha:0.05 - Batch 184/249 - Min Loss:21.29 - Loss:21.292714546692963\n", - " Iter:0 - Alpha:0.05 - Batch 185/249 - Min Loss:21.24 - Loss:21.2479883334045\n", - " Iter:0 - Alpha:0.05 - Batch 186/249 - Min Loss:21.21 - Loss:21.21454412091733\n", - " Iter:0 - Alpha:0.05 - Batch 187/249 - Min Loss:21.18 - Loss:21.18709730607852\n", - " Iter:0 - Alpha:0.05 - Batch 188/249 - Min Loss:21.18 - Loss:21.18500879669473\n", - " Iter:0 - Alpha:0.05 - Batch 189/249 - Min Loss:21.14 - Loss:21.149192169258725\n", - " Iter:0 - Alpha:0.05 - Batch 190/249 - Min Loss:21.12 - Loss:21.121492290739525\n", - " Iter:0 - Alpha:0.05 - Batch 191/249 - Min Loss:21.09 - Loss:21.090529064322375\n", - " Iter:0 - Alpha:0.05 - Batch 192/249 - Min Loss:21.06 - Loss:21.061615675909103\n", - " Iter:0 - Alpha:0.05 - Batch 193/249 - Min Loss:21.00 - Loss:21.000099027990963\n", - " Iter:0 - Alpha:0.05 - Batch 194/249 - Min Loss:20.98 - Loss:20.98468231590407\n", - " Iter:0 - Alpha:0.05 - Batch 195/249 - Min Loss:20.98 - Loss:20.983618463178825\n", - " Iter:0 - Alpha:0.05 - Batch 196/249 - Min Loss:20.94 - Loss:20.941073976275163\n", - " Iter:0 - Alpha:0.05 - Batch 197/249 - Min Loss:20.90 - Loss:20.909982209463195\n", - " Iter:0 - Alpha:0.05 - Batch 198/249 - Min Loss:20.87 - Loss:20.8792320482417\n", - " Iter:0 - Alpha:0.05 - Batch 199/249 - Min Loss:20.83 - Loss:20.836144396820785\n", - " Iter:0 - Alpha:0.05 - Batch 200/249 - Min Loss:20.78 - Loss:20.786702940643394\n", - " Iter:0 - Alpha:0.05 - Batch 201/249 - Min Loss:20.75 - Loss:20.755678930297545\n", - " Iter:0 - Alpha:0.05 - Batch 202/249 - Min Loss:20.72 - Loss:20.72621131450987\n", - " Iter:0 - Alpha:0.05 - Batch 203/249 - Min Loss:20.68 - Loss:20.68893807384813\n", - " Iter:0 - Alpha:0.05 - Batch 204/249 - Min Loss:20.65 - Loss:20.650964182346172\n", - " Iter:0 - Alpha:0.05 - Batch 205/249 - Min Loss:20.61 - Loss:20.617254141780762\n", - " Iter:0 - Alpha:0.05 - Batch 206/249 - Min Loss:20.57 - Loss:20.573633574910954\n", - " Iter:0 - Alpha:0.05 - Batch 207/249 - Min Loss:20.53 - Loss:20.53542759736034\n", - " Iter:0 - Alpha:0.05 - Batch 208/249 - Min Loss:20.49 - Loss:20.49739996174571\n", - " Iter:0 - Alpha:0.05 - Batch 209/249 - Min Loss:20.44 - Loss:20.449686230569824\n", - " Iter:0 - Alpha:0.05 - Batch 210/249 - Min Loss:20.40 - Loss:20.403291221330196\n", - " Iter:0 - Alpha:0.05 - Batch 211/249 - Min Loss:20.34 - Loss:20.3475100925815\n", - " Iter:0 - Alpha:0.05 - Batch 212/249 - Min Loss:20.30 - Loss:20.301789206440752\n", - " Iter:0 - Alpha:0.05 - Batch 213/249 - Min Loss:20.26 - Loss:20.26542625947414\n", - " Iter:0 - Alpha:0.05 - Batch 214/249 - Min Loss:20.23 - Loss:20.23111001746059\n", - " Iter:0 - Alpha:0.05 - Batch 215/249 - Min Loss:20.18 - Loss:20.18481394544832\n", - " Iter:0 - Alpha:0.05 - Batch 216/249 - Min Loss:20.14 - Loss:20.144828981387178\n", - " Iter:0 - Alpha:0.05 - Batch 217/249 - Min Loss:20.11 - Loss:20.117436374376467\n", - " Iter:0 - Alpha:0.05 - Batch 218/249 - Min Loss:20.07 - Loss:20.07285790792156\n", - " Iter:0 - Alpha:0.05 - Batch 219/249 - Min Loss:20.01 - Loss:20.019440903545878\n", - " Iter:0 - Alpha:0.05 - Batch 220/249 - Min Loss:19.99 - Loss:19.995961465663378\n", - " Iter:0 - Alpha:0.05 - Batch 221/249 - Min Loss:19.98 - Loss:19.986751087464363\n", - " Iter:0 - Alpha:0.05 - Batch 222/249 - Min Loss:19.95 - Loss:19.95413004376577\n", - " Iter:0 - Alpha:0.05 - Batch 223/249 - Min Loss:19.92 - Loss:19.920535628027963\n", - " Iter:0 - Alpha:0.05 - Batch 224/249 - Min Loss:19.88 - Loss:19.88982739248521\n", - " Iter:0 - Alpha:0.05 - Batch 225/249 - Min Loss:19.85 - Loss:19.85661224385163\n", - " Iter:0 - Alpha:0.05 - Batch 226/249 - Min Loss:19.81 - Loss:19.818270498440558\n", - " Iter:0 - Alpha:0.05 - Batch 227/249 - Min Loss:19.78 - Loss:19.78879746898553\n", - " Iter:0 - Alpha:0.05 - Batch 228/249 - Min Loss:19.77 - Loss:19.773987163273635\n", - " Iter:0 - Alpha:0.05 - Batch 229/249 - Min Loss:19.74 - Loss:19.74321893462951\n", - " Iter:0 - Alpha:0.05 - Batch 230/249 - Min Loss:19.71 - Loss:19.714114028548735\n", - " Iter:0 - Alpha:0.05 - Batch 231/249 - Min Loss:19.69 - Loss:19.696300464252033\n", - " Iter:0 - Alpha:0.05 - Batch 232/249 - Min Loss:19.66 - Loss:19.664281554171573\n", - " Iter:0 - Alpha:0.05 - Batch 233/249 - Min Loss:19.64 - Loss:19.64497900030178\n", - " Iter:0 - Alpha:0.05 - Batch 234/249 - Min Loss:19.61 - Loss:19.61390018795743\n", - " Iter:0 - Alpha:0.05 - Batch 235/249 - Min Loss:19.57 - Loss:19.578057993323174\n", - " Iter:0 - Alpha:0.05 - Batch 236/249 - Min Loss:19.53 - Loss:19.539290922887076\n", - " Iter:0 - Alpha:0.05 - Batch 237/249 - Min Loss:19.51 - Loss:19.51336152702854\n", - " Iter:0 - Alpha:0.05 - Batch 238/249 - Min Loss:19.49 - Loss:19.494216997205427\n", - " Iter:0 - Alpha:0.05 - Batch 239/249 - Min Loss:19.47 - Loss:19.474255434858627\n", - " Iter:0 - Alpha:0.05 - Batch 240/249 - Min Loss:19.45 - Loss:19.45452512691269\n", - " Iter:0 - Alpha:0.05 - Batch 241/249 - Min Loss:19.41 - Loss:19.41567509465724\n", - " Iter:0 - Alpha:0.05 - Batch 242/249 - Min Loss:19.38 - Loss:19.384809146622928\n", - " Iter:0 - Alpha:0.05 - Batch 244/249 - Min Loss:19.35 - Loss:19.35866317796798\n", - " Iter:0 - Alpha:0.05 - Batch 245/249 - Min Loss:19.33 - Loss:19.33615864113177\n", - " Iter:0 - Alpha:0.05 - Batch 246/249 - Min Loss:19.31 - Loss:19.311764056907894\n", - " Iter:0 - Alpha:0.05 - Batch 247/249 - Min Loss:19.28 - Loss:19.28639653162862\n", - " Iter:0 - Alpha:0.05 - Batch 248/249 - Min Loss:19.25 - Loss:19.259355873841894\n", - " Iter:0 - Alpha:0.05 - Batch 249/249 - Min Loss:19.23 - Loss:19.233214769358476\n", - " Iter:1 - Alpha:0.049 - Batch 1/249 - Min Loss:13.06 - Loss:13.063830116471486 - her t tere t tere t tere t tere t tere t tere t tere t tere t tere t t\n", - " Iter:1 - Alpha:0.049 - Batch 3/249 - Min Loss:12.94 - Loss:13.045405787590937\n", - " Iter:1 - Alpha:0.049 - Batch 4/249 - Min Loss:12.93 - Loss:12.931715871054474\n", - " Iter:1 - Alpha:0.049 - Batch 249/249 - Min Loss:12.88 - Loss:13.297767268945559\n", - " Iter:2 - Alpha:0.049 - Batch 249/249 - Min Loss:11.98 - Loss:12.467682706214898 hen theren theren therer then theren theren therer then theren theren \n", - " Iter:3 - Alpha:0.048 - Batch 2/249 - Min Loss:11.43 - Loss:11.463986229895673 - hen theres thes thes thes thes thes thes thes thes thes thes thes thes\n", - " Iter:3 - Alpha:0.048 - Batch 3/249 - Min Loss:11.43 - Loss:11.433608994379455\n", - " Iter:3 - Alpha:0.048 - Batch 4/249 - Min Loss:11.29 - Loss:11.292592685693808\n", - " Iter:5 - Alpha:0.047 - Batch 55/249 - Min Loss:11.19 - Loss:11.211233778075991- hend seates, and seates, and seates, and seates, and seates, and seate\n", - " Iter:5 - Alpha:0.047 - Batch 56/249 - Min Loss:11.17 - Loss:11.172951972803446\n", - " Iter:5 - Alpha:0.047 - Batch 100/249 - Min Loss:11.14 - Loss:11.15109606209378\n", - " Iter:5 - Alpha:0.047 - Batch 101/249 - Min Loss:11.13 - Loss:11.13802901054302\n", - " Iter:5 - Alpha:0.047 - Batch 102/249 - Min Loss:11.11 - Loss:11.119285037049702\n", - " Iter:5 - Alpha:0.047 - Batch 103/249 - Min Loss:11.11 - Loss:11.112100526728414\n", - " Iter:5 - Alpha:0.047 - Batch 104/249 - Min Loss:11.10 - Loss:11.104394523353339\n", - " Iter:5 - Alpha:0.047 - Batch 105/249 - Min Loss:11.09 - Loss:11.099332194264191\n", - " Iter:5 - Alpha:0.047 - Batch 107/249 - Min Loss:11.08 - Loss:11.084563410952635\n", - " Iter:5 - Alpha:0.047 - Batch 128/249 - Min Loss:11.07 - Loss:11.080755098118637\n", - " Iter:5 - Alpha:0.047 - Batch 129/249 - Min Loss:11.07 - Loss:11.074532810524575\n", - " Iter:5 - Alpha:0.047 - Batch 130/249 - Min Loss:11.07 - Loss:11.070308307744044\n", - " Iter:5 - Alpha:0.047 - Batch 131/249 - Min Loss:11.05 - Loss:11.053205895772722\n", - " Iter:5 - Alpha:0.047 - Batch 133/249 - Min Loss:11.03 - Loss:11.041628774766117\n", - " Iter:5 - Alpha:0.047 - Batch 135/249 - Min Loss:11.03 - Loss:11.035707041019423\n", - " Iter:5 - Alpha:0.047 - Batch 137/249 - Min Loss:11.02 - Loss:11.024606766677806\n", - " Iter:5 - Alpha:0.047 - Batch 138/249 - Min Loss:11.00 - Loss:11.007025857860137\n", - " Iter:5 - Alpha:0.047 - Batch 144/249 - Min Loss:11.00 - Loss:11.004993617647228\n", - " Iter:5 - Alpha:0.047 - Batch 152/249 - Min Loss:11.00 - Loss:11.003699896686872\n", - " Iter:5 - Alpha:0.047 - Batch 153/249 - Min Loss:10.99 - Loss:10.996498599690339\n", - " Iter:5 - Alpha:0.047 - Batch 208/249 - Min Loss:10.98 - Loss:10.995703693907218\n", - " Iter:5 - Alpha:0.047 - Batch 209/249 - Min Loss:10.98 - Loss:10.987160856768003\n", - " Iter:5 - Alpha:0.047 - Batch 210/249 - Min Loss:10.97 - Loss:10.979813712335138\n", - " Iter:5 - Alpha:0.047 - Batch 211/249 - Min Loss:10.96 - Loss:10.961275932129482\n", - " Iter:5 - Alpha:0.047 - Batch 212/249 - Min Loss:10.95 - Loss:10.954692498871907\n", - " Iter:5 - Alpha:0.047 - Batch 213/249 - Min Loss:10.94 - Loss:10.948507347452539\n", - " Iter:5 - Alpha:0.047 - Batch 214/249 - Min Loss:10.94 - Loss:10.947598863066297\n", - " Iter:5 - Alpha:0.047 - Batch 215/249 - Min Loss:10.93 - Loss:10.937779129758908\n", - " Iter:5 - Alpha:0.047 - Batch 217/249 - Min Loss:10.93 - Loss:10.933565142331665\n", - " Iter:5 - Alpha:0.047 - Batch 218/249 - Min Loss:10.92 - Loss:10.927695951351394\n", - " Iter:5 - Alpha:0.047 - Batch 223/249 - Min Loss:10.91 - Loss:10.920012740008673\n", - " Iter:5 - Alpha:0.047 - Batch 224/249 - Min Loss:10.91 - Loss:10.915941202659539\n", - " Iter:5 - Alpha:0.047 - Batch 225/249 - Min Loss:10.91 - Loss:10.911412416260603\n", - " Iter:5 - Alpha:0.047 - Batch 226/249 - Min Loss:10.90 - Loss:10.908265904635522\n", - " Iter:5 - Alpha:0.047 - Batch 235/249 - Min Loss:10.90 - Loss:10.905597365624304\n", - " Iter:5 - Alpha:0.047 - Batch 236/249 - Min Loss:10.89 - Loss:10.898794557204706\n", - " Iter:5 - Alpha:0.047 - Batch 237/249 - Min Loss:10.89 - Loss:10.890970313767603\n", - " Iter:5 - Alpha:0.047 - Batch 240/249 - Min Loss:10.88 - Loss:10.88534610730771\n", - " Iter:5 - Alpha:0.047 - Batch 241/249 - Min Loss:10.87 - Loss:10.876534714857252\n", - " Iter:5 - Alpha:0.047 - Batch 242/249 - Min Loss:10.87 - Loss:10.87459117637956\n", - " Iter:5 - Alpha:0.047 - Batch 248/249 - Min Loss:10.86 - Loss:10.872254639188432\n", - " Iter:5 - Alpha:0.047 - Batch 249/249 - Min Loss:10.86 - Loss:10.86373688723013\n", - " Iter:6 - Alpha:0.047 - Batch 1/249 - Min Loss:10.69 - Loss:10.690402702580894 - hen theres, and theres, and theres, and theres, and theres, and theres\n", - " Iter:7 - Alpha:0.046 - Batch 140/249 - Min Loss:10.55 - Loss:10.736922784153954heres, and seent thees, and seent thees, and seent thees, and seent th" - ] - } - ], - "source": [ - "train(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Iter:91 - Alpha:0.016 - Batch 176/249 - Min Loss:9.900 - Loss:11.975722569949843\n" - ] - } - ], - "source": [ - "train(100)" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Intestay thee.\n", - "\n", - "SIR:\n", - "It thou my thar the sentastar the see the see:\n", - "Imentary take the subloud I\n", - "Stall my thentaring fook the senternight pead me, the gakentlenternot they day them.\n", - "\n", - "KENNOR:\n", - "I stay the see talk :\n", - "Non the seady!\n", - "\n", - "Sustar thou shour in the suble the see the senternow the antently the see the seaventlace peake,\n", - "I sentlentony my thent:\n", - "I the sentastar thamy this not thame.\n", - "\n", - "From the stay the sentastar star the see the senternce thentlent\n", - "stay you, he shad be his say the senterny astak\n" - ] - } - ], - "source": [ - "def generate_sample(n=30, init_char=' '):\n", - " s = \"\"\n", - " hidden = model.init_hidden(batch_size=1)\n", - " input = Tensor(np.array([word2index[init_char]]))\n", - " for i in range(n):\n", - " rnn_input = embed.forward(input)\n", - " output, hidden = model.forward(input=rnn_input, hidden=hidden)\n", - " output.data *= 15\n", - " temp_dist = output.softmax()\n", - " temp_dist /= temp_dist.sum()\n", - "\n", - "# m = (temp_dist > np.random.rand()).argmax() # sample from predictions\n", - " m = output.data.argmax() # take the max prediction\n", - " c = vocab[m]\n", - " input = Tensor(np.array([m]))\n", - " s += c\n", - " return s\n", - "print(generate_sample(n=500, init_char='\\n'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter14 - Intro to LSTMs - Part 2 - Learn to Write Like Shakespeare-checkpoint.ipynb b/.ipynb_checkpoints/Chapter14 - Intro to LSTMs - Part 2 - Learn to Write Like Shakespeare-checkpoint.ipynb deleted file mode 100644 index 418069b..0000000 --- a/.ipynb_checkpoints/Chapter14 - Intro to LSTMs - Part 2 - Learn to Write Like Shakespeare-checkpoint.ipynb +++ /dev/null @@ -1,950 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "class Tensor (object):\n", - " \n", - " def __init__(self,data,\n", - " autograd=False,\n", - " creators=None,\n", - " creation_op=None,\n", - " id=None):\n", - " \n", - " self.data = np.array(data)\n", - " self.autograd = autograd\n", - " self.grad = None\n", - " if(id is None):\n", - " self.id = np.random.randint(0,100000)\n", - " else:\n", - " self.id = id\n", - " \n", - " self.creators = creators\n", - " self.creation_op = creation_op\n", - " self.children = {}\n", - " \n", - " if(creators is not None):\n", - " for c in creators:\n", - " if(self.id not in c.children):\n", - " c.children[self.id] = 1\n", - " else:\n", - " c.children[self.id] += 1\n", - "\n", - " def all_children_grads_accounted_for(self):\n", - " for id,cnt in self.children.items():\n", - " if(cnt != 0):\n", - " return False\n", - " return True \n", - " \n", - " def backward(self,grad=None, grad_origin=None):\n", - " if(self.autograd):\n", - " \n", - " if(grad is None):\n", - " grad = Tensor(np.ones_like(self.data))\n", - "\n", - " if(grad_origin is not None):\n", - " if(self.children[grad_origin.id] == 0):\n", - " raise Exception(\"cannot backprop more than once\")\n", - " else:\n", - " self.children[grad_origin.id] -= 1\n", - "\n", - " if(self.grad is None):\n", - " self.grad = grad\n", - " else:\n", - " self.grad += grad\n", - " \n", - " # grads must not have grads of their own\n", - " assert grad.autograd == False\n", - " \n", - " # only continue backpropping if there's something to\n", - " # backprop into and if all gradients (from children)\n", - " # are accounted for override waiting for children if\n", - " # \"backprop\" was called on this variable directly\n", - " if(self.creators is not None and \n", - " (self.all_children_grads_accounted_for() or \n", - " grad_origin is None)):\n", - "\n", - " if(self.creation_op == \"add\"):\n", - " self.creators[0].backward(self.grad, self)\n", - " self.creators[1].backward(self.grad, self)\n", - " \n", - " if(self.creation_op == \"sub\"):\n", - " self.creators[0].backward(Tensor(self.grad.data), self)\n", - " self.creators[1].backward(Tensor(self.grad.__neg__().data), self)\n", - "\n", - " if(self.creation_op == \"mul\"):\n", - " new = self.grad * self.creators[1]\n", - " self.creators[0].backward(new , self)\n", - " new = self.grad * self.creators[0]\n", - " self.creators[1].backward(new, self) \n", - " \n", - " if(self.creation_op == \"mm\"):\n", - " c0 = self.creators[0]\n", - " c1 = self.creators[1]\n", - " new = self.grad.mm(c1.transpose())\n", - " c0.backward(new)\n", - " new = self.grad.transpose().mm(c0).transpose()\n", - " c1.backward(new)\n", - " \n", - " if(self.creation_op == \"transpose\"):\n", - " self.creators[0].backward(self.grad.transpose())\n", - "\n", - " if(\"sum\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.expand(dim,\n", - " self.creators[0].data.shape[dim]))\n", - "\n", - " if(\"expand\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.sum(dim))\n", - " \n", - " if(self.creation_op == \"neg\"):\n", - " self.creators[0].backward(self.grad.__neg__())\n", - " \n", - " if(self.creation_op == \"sigmoid\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (self * (ones - self)))\n", - " \n", - " if(self.creation_op == \"tanh\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (ones - (self * self)))\n", - " \n", - " if(self.creation_op == \"index_select\"):\n", - " new_grad = np.zeros_like(self.creators[0].data)\n", - " indices_ = self.index_select_indices.data.flatten()\n", - " grad_ = grad.data.reshape(len(indices_), -1)\n", - " for i in range(len(indices_)):\n", - " new_grad[indices_[i]] += grad_[i]\n", - " self.creators[0].backward(Tensor(new_grad))\n", - " \n", - " if(self.creation_op == \"cross_entropy\"):\n", - " dx = self.softmax_output - self.target_dist\n", - " self.creators[0].backward(Tensor(dx))\n", - " \n", - " def __add__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data + other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"add\")\n", - " return Tensor(self.data + other.data)\n", - "\n", - " def __neg__(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data * -1,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"neg\")\n", - " return Tensor(self.data * -1)\n", - " \n", - " def __sub__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data - other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"sub\")\n", - " return Tensor(self.data - other.data)\n", - " \n", - " def __mul__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data * other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"mul\")\n", - " return Tensor(self.data * other.data) \n", - "\n", - " def sum(self, dim):\n", - " if(self.autograd):\n", - " return Tensor(self.data.sum(dim),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sum_\"+str(dim))\n", - " return Tensor(self.data.sum(dim))\n", - " \n", - " def expand(self, dim,copies):\n", - "\n", - " trans_cmd = list(range(0,len(self.data.shape)))\n", - " trans_cmd.insert(dim,len(self.data.shape))\n", - " new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)\n", - " \n", - " if(self.autograd):\n", - " return Tensor(new_data,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"expand_\"+str(dim))\n", - " return Tensor(new_data)\n", - " \n", - " def transpose(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data.transpose(),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"transpose\")\n", - " \n", - " return Tensor(self.data.transpose())\n", - " \n", - " def mm(self, x):\n", - " if(self.autograd):\n", - " return Tensor(self.data.dot(x.data),\n", - " autograd=True,\n", - " creators=[self,x],\n", - " creation_op=\"mm\")\n", - " return Tensor(self.data.dot(x.data))\n", - " \n", - " def sigmoid(self):\n", - " if(self.autograd):\n", - " return Tensor(1 / (1 + np.exp(-self.data)),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sigmoid\")\n", - " return Tensor(1 / (1 + np.exp(-self.data)))\n", - "\n", - " def tanh(self):\n", - " if(self.autograd):\n", - " return Tensor(np.tanh(self.data),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"tanh\")\n", - " return Tensor(np.tanh(self.data))\n", - " \n", - " def index_select(self, indices):\n", - "\n", - " if(self.autograd):\n", - " new = Tensor(self.data[indices.data],\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"index_select\")\n", - " new.index_select_indices = indices\n", - " return new\n", - " return Tensor(self.data[indices.data])\n", - "\n", - " def softmax(self):\n", - " temp = np.exp(self.data)\n", - " softmax_output = temp / np.sum(temp,\n", - " axis=len(self.data.shape)-1,\n", - " keepdims=True)\n", - " return softmax_output \n", - " \n", - " def cross_entropy(self, target_indices):\n", - "\n", - " temp = np.exp(self.data)\n", - " softmax_output = temp / np.sum(temp,\n", - " axis=len(self.data.shape)-1,\n", - " keepdims=True)\n", - " \n", - " t = target_indices.data.flatten()\n", - " p = softmax_output.reshape(len(t),-1)\n", - " target_dist = np.eye(p.shape[1])[t]\n", - " loss = -(np.log(p) * (target_dist)).sum(1).mean()\n", - " \n", - " if(self.autograd):\n", - " out = Tensor(loss,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"cross_entropy\")\n", - " out.softmax_output = softmax_output\n", - " out.target_dist = target_dist\n", - " return out\n", - "\n", - " return Tensor(loss)\n", - " \n", - " \n", - " def __repr__(self):\n", - " return str(self.data.__repr__())\n", - " \n", - " def __str__(self):\n", - " return str(self.data.__str__()) \n", - " \n", - "class Tanh(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.tanh()\n", - " \n", - "class Sigmoid(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.sigmoid()\n", - "\n", - "class Layer(object):\n", - " \n", - " def __init__(self):\n", - " self.parameters = list()\n", - " \n", - " def get_parameters(self):\n", - " return self.parameters\n", - "\n", - " \n", - "class SGD(object):\n", - " \n", - " def __init__(self, parameters, alpha=0.1):\n", - " self.parameters = parameters\n", - " self.alpha = alpha\n", - " \n", - " def zero(self):\n", - " for p in self.parameters:\n", - " p.grad.data *= 0\n", - " \n", - " def step(self, zero=True):\n", - " \n", - " for p in self.parameters:\n", - " \n", - " p.data -= p.grad.data * self.alpha\n", - " \n", - " if(zero):\n", - " p.grad.data *= 0\n", - "\n", - "\n", - "class Linear(Layer):\n", - "\n", - " def __init__(self, n_inputs, n_outputs):\n", - " super().__init__()\n", - " W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0/(n_inputs))\n", - " self.weight = Tensor(W, autograd=True)\n", - " self.bias = Tensor(np.zeros(n_outputs), autograd=True)\n", - " \n", - " self.parameters.append(self.weight)\n", - " self.parameters.append(self.bias)\n", - "\n", - " def forward(self, input):\n", - " return input.mm(self.weight)+self.bias.expand(0,len(input.data))\n", - "\n", - "\n", - "class Sequential(Layer):\n", - " \n", - " def __init__(self, layers=list()):\n", - " super().__init__()\n", - " \n", - " self.layers = layers\n", - " \n", - " def add(self, layer):\n", - " self.layers.append(layer)\n", - " \n", - " def forward(self, input):\n", - " for layer in self.layers:\n", - " input = layer.forward(input)\n", - " return input\n", - " \n", - " def get_parameters(self):\n", - " params = list()\n", - " for l in self.layers:\n", - " params += l.get_parameters()\n", - " return params\n", - "\n", - "\n", - "class Embedding(Layer):\n", - " \n", - " def __init__(self, vocab_size, dim):\n", - " super().__init__()\n", - " \n", - " self.vocab_size = vocab_size\n", - " self.dim = dim\n", - " \n", - " # this random initialiation style is just a convention from word2vec\n", - " self.weight = Tensor((np.random.rand(vocab_size, dim) - 0.5) / dim, autograd=True)\n", - " \n", - " self.parameters.append(self.weight)\n", - " \n", - " def forward(self, input):\n", - " return self.weight.index_select(input)\n", - "\n", - "\n", - "class Tanh(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.tanh()\n", - "\n", - "\n", - "class Sigmoid(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.sigmoid()\n", - " \n", - "\n", - "class CrossEntropyLoss(object):\n", - " \n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input, target):\n", - " return input.cross_entropy(target)\n", - "\n", - " \n", - "class RNNCell(Layer):\n", - " \n", - " def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):\n", - " super().__init__()\n", - "\n", - " self.n_inputs = n_inputs\n", - " self.n_hidden = n_hidden\n", - " self.n_output = n_output\n", - " \n", - " if(activation == 'sigmoid'):\n", - " self.activation = Sigmoid()\n", - " elif(activation == 'tanh'):\n", - " self.activation == Tanh()\n", - " else:\n", - " raise Exception(\"Non-linearity not found\")\n", - "\n", - " self.w_ih = Linear(n_inputs, n_hidden)\n", - " self.w_hh = Linear(n_hidden, n_hidden)\n", - " self.w_ho = Linear(n_hidden, n_output)\n", - " \n", - " self.parameters += self.w_ih.get_parameters()\n", - " self.parameters += self.w_hh.get_parameters()\n", - " self.parameters += self.w_ho.get_parameters() \n", - " \n", - " def forward(self, input, hidden):\n", - " from_prev_hidden = self.w_hh.forward(hidden)\n", - " combined = self.w_ih.forward(input) + from_prev_hidden\n", - " new_hidden = self.activation.forward(combined)\n", - " output = self.w_ho.forward(new_hidden)\n", - " return output, new_hidden\n", - " \n", - " def init_hidden(self, batch_size=1):\n", - " return Tensor(np.zeros((batch_size,self.n_hidden)), autograd=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 1: RNN Character Language Model" - ] - }, - { - "cell_type": "code", - "execution_count": 132, - "metadata": {}, - "outputs": [], - "source": [ - "import sys,random,math\n", - "from collections import Counter\n", - "import numpy as np\n", - "import sys\n", - "\n", - "np.random.seed(0)\n", - "# dataset from http://karpathy.github.io/2015/05/21/rnn-effectiveness/\n", - "f = open('shakespear.txt','r')\n", - "raw = f.read()\n", - "f.close()\n", - "\n", - "vocab = list(set(raw))\n", - "word2index = {}\n", - "for i,word in enumerate(vocab):\n", - " word2index[word]=i\n", - "indices = np.array(list(map(lambda x:word2index[x], raw)))\n", - "\n", - "embed = Embedding(vocab_size=len(vocab),dim=512)\n", - "model = RNNCell(n_inputs=512, n_hidden=512, n_output=len(vocab))\n", - "\n", - "criterion = CrossEntropyLoss()\n", - "optim = SGD(parameters=model.get_parameters() + embed.get_parameters(), alpha=0.05)\n", - "\n", - "batch_size = 32\n", - "bptt = 16\n", - "n_batches = int((indices.shape[0] / (batch_size)))\n", - "\n", - "trimmed_indices = indices[:n_batches*batch_size]\n", - "batched_indices = trimmed_indices.reshape(batch_size, n_batches).transpose()\n", - "\n", - "input_batched_indices = batched_indices[0:-1]\n", - "target_batched_indices = batched_indices[1:]\n", - "\n", - "n_bptt = int(((n_batches-1) / bptt))\n", - "input_batches = input_batched_indices[:n_bptt*bptt].reshape(n_bptt,bptt,batch_size)\n", - "target_batches = target_batched_indices[:n_bptt*bptt].reshape(n_bptt, bptt, batch_size)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 143, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'That,'" - ] - }, - "execution_count": 143, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "raw[0:5]" - ] - }, - { - "cell_type": "code", - "execution_count": 142, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([30, 4, 37, 42, 52])" - ] - }, - "execution_count": 142, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "indices[0:5]" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[30, 23, 7, 31, 50, 4, 30, 0, 0, 37, 37, 9, 50, 0, 52, 21,\n", - " 0, 61, 4, 7, 9, 37, 0, 0, 0, 33, 0, 33, 33, 0, 0, 8],\n", - " [ 4, 21, 21, 31, 26, 9, 37, 53, 61, 8, 59, 9, 59, 42, 0, 21,\n", - " 27, 50, 50, 21, 59, 1, 27, 57, 4, 8, 13, 20, 8, 20, 58, 0],\n", - " [37, 21, 14, 9, 9, 37, 59, 51, 50, 0, 59, 7, 57, 4, 16, 55,\n", - " 9, 53, 59, 21, 57, 9, 35, 9, 9, 40, 50, 52, 0, 50, 50, 27],\n", - " [42, 17, 30, 35, 0, 35, 27, 0, 53, 8, 0, 0, 23, 9, 9, 43,\n", - " 9, 52, 61, 39, 51, 0, 50, 9, 35, 59, 53, 21, 11, 35, 20, 50],\n", - " [52, 43, 33, 14, 16, 0, 50, 35, 0, 50, 16, 36, 21, 0, 0, 38,\n", - " 8, 0, 0, 50, 23, 61, 16, 26, 52, 37, 59, 43, 53, 9, 9, 61]])" - ] - }, - "execution_count": 144, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "batched_indices[0:5]" - ] - }, - { - "cell_type": "code", - "execution_count": 145, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[30, 23, 7, 31, 50, 4, 30, 0, 0, 37, 37, 9, 50, 0, 52, 21,\n", - " 0, 61, 4, 7, 9, 37, 0, 0, 0, 33, 0, 33, 33, 0, 0, 8],\n", - " [ 4, 21, 21, 31, 26, 9, 37, 53, 61, 8, 59, 9, 59, 42, 0, 21,\n", - " 27, 50, 50, 21, 59, 1, 27, 57, 4, 8, 13, 20, 8, 20, 58, 0],\n", - " [37, 21, 14, 9, 9, 37, 59, 51, 50, 0, 59, 7, 57, 4, 16, 55,\n", - " 9, 53, 59, 21, 57, 9, 35, 9, 9, 40, 50, 52, 0, 50, 50, 27],\n", - " [42, 17, 30, 35, 0, 35, 27, 0, 53, 8, 0, 0, 23, 9, 9, 43,\n", - " 9, 52, 61, 39, 51, 0, 50, 9, 35, 59, 53, 21, 11, 35, 20, 50],\n", - " [52, 43, 33, 14, 16, 0, 50, 35, 0, 50, 16, 36, 21, 0, 0, 38,\n", - " 8, 0, 0, 50, 23, 61, 16, 26, 52, 37, 59, 43, 53, 9, 9, 61]])" - ] - }, - "execution_count": 145, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "input_batches[0][0:5]" - ] - }, - { - "cell_type": "code", - "execution_count": 146, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 4, 21, 21, 31, 26, 9, 37, 53, 61, 8, 59, 9, 59, 42, 0, 21,\n", - " 27, 50, 50, 21, 59, 1, 27, 57, 4, 8, 13, 20, 8, 20, 58, 0],\n", - " [37, 21, 14, 9, 9, 37, 59, 51, 50, 0, 59, 7, 57, 4, 16, 55,\n", - " 9, 53, 59, 21, 57, 9, 35, 9, 9, 40, 50, 52, 0, 50, 50, 27],\n", - " [42, 17, 30, 35, 0, 35, 27, 0, 53, 8, 0, 0, 23, 9, 9, 43,\n", - " 9, 52, 61, 39, 51, 0, 50, 9, 35, 59, 53, 21, 11, 35, 20, 50],\n", - " [52, 43, 33, 14, 16, 0, 50, 35, 0, 50, 16, 36, 21, 0, 0, 38,\n", - " 8, 0, 0, 50, 23, 61, 16, 26, 52, 37, 59, 43, 53, 9, 9, 61],\n", - " [ 0, 48, 51, 57, 33, 9, 42, 9, 16, 0, 9, 4, 21, 57, 16, 3,\n", - " 0, 20, 27, 51, 21, 50, 0, 0, 0, 16, 57, 8, 31, 0, 52, 51]])" - ] - }, - "execution_count": 146, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "target_batches[0][0:5]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def train(iterations=400):\n", - " for iter in range(iterations):\n", - " total_loss = 0\n", - " n_loss = 0\n", - "\n", - " hidden = model.init_hidden(batch_size=batch_size)\n", - " for batch_i in range(len(input_batches)):\n", - "\n", - " hidden = Tensor(hidden.data, autograd=True)\n", - " loss = None\n", - " losses = list()\n", - " for t in range(bptt):\n", - " input = Tensor(input_batches[batch_i][t], autograd=True)\n", - " rnn_input = embed.forward(input=input)\n", - " output, hidden = model.forward(input=rnn_input, hidden=hidden)\n", - "\n", - " target = Tensor(target_batches[batch_i][t], autograd=True) \n", - " batch_loss = criterion.forward(output, target)\n", - " losses.append(batch_loss)\n", - " if(t == 0):\n", - " loss = batch_loss\n", - " else:\n", - " loss = loss + batch_loss\n", - " for loss in losses:\n", - " \"\"\n", - " loss.backward()\n", - " optim.step()\n", - " total_loss += loss.data\n", - " log = \"\\r Iter:\" + str(iter)\n", - " log += \" - Batch \"+str(batch_i+1)+\"/\"+str(len(input_batches))\n", - " log += \" - Loss:\" + str(np.exp(total_loss / (batch_i+1)))\n", - " if(batch_i == 0):\n", - " log += \" - \" + generate_sample(n=70, init_char='\\n').replace(\"\\n\",\" \")\n", - " if(batch_i % 10 == 0 or batch_i-1 == len(input_batches)):\n", - " sys.stdout.write(log)\n", - " optim.alpha *= 0.99\n", - " print()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Iter:0 - Batch 191/195 - Loss:148.00388828554404 \n", - " Iter:1 - Batch 191/195 - Loss:20.588816924127116 mhnethet tttttt t t t thett ttth thetttt thetth t tt t tttheth t t ttt\n", - " Iter:2 - Batch 191/195 - Loss:15.282461756020384 h th the the the th the the thet the the the the the the the t the th\n", - " Iter:3 - Batch 191/195 - Loss:13.048394405821716dh th the the the t the t the the the t the th the the the the the the\n", - " Iter:4 - Batch 191/195 - Loss:11.774988723385185dh the the t th the t the the the the the the th th the t the th th th\n", - " Iter:5 - Batch 191/195 - Loss:10.989391522842189dh the the the the the the t the the the the the the t the the the \n", - " Iter:6 - Batch 191/195 - Loss:10.400423045063138dh the the the t the t the the the the the the the the the the the the\n", - " Iter:7 - Batch 191/195 - Loss:9.9208775761205764da th the the the the the t the the the the the the the the the the th\n", - " Iter:8 - Batch 191/195 - Loss:9.511516020913637 da the the the the the the the the the the the the the the the the the\n", - " Iter:9 - Batch 191/195 - Loss:9.148623407454805 th the the the the the the the the the the the the t the the the the \n", - " Iter:10 - Batch 191/195 - Loss:8.821849800958601th the the the the the the the the the the the the the the the th the \n", - " Iter:11 - Batch 191/195 - Loss:8.523034059325719 th the the the the the the the the the the the the the the the the th \n", - " Iter:12 - Batch 191/195 - Loss:8.253664350983296 the th the the t the the the the the the the the the the the the the t\n", - " Iter:13 - Batch 191/195 - Loss:8.0069672337493755th the the the the the the the the the th the the the the the the the \n", - " Iter:14 - Batch 191/195 - Loss:7.7739372955274885the the to the the the the the the the the the the the the the the th\n", - " Iter:15 - Batch 191/195 - Loss:7.5511986438587625 the th the the the the the the the the the the the the the the the the\n", - " Iter:16 - Batch 191/195 - Loss:7.3390047318671625the the th the the the the the the the the the the the the the the ha\n", - " Iter:17 - Batch 191/195 - Loss:7.1373636919747545the the the the th the the the he the the the the the t the the the \n", - " Iter:18 - Batch 191/195 - Loss:6.9446057248731465the the the the the the the the h the the the the the the the the he\n", - " Iter:19 - Batch 191/195 - Loss:6.759464262811306 the the the the the the he the the the the the the to the the the th\n", - " Iter:20 - Batch 191/195 - Loss:6.5803850681516345the the the the the the the the the the the hich the the the the the\n", - " Iter:21 - Batch 191/195 - Loss:6.4063886200003965t wh we a to the the the w the the the the the ha the hace the th\n", - " Iter:22 - Batch 191/195 - Loss:6.2365373926845556were the the h the the hace the the w the the the the the the th\n", - " Iter:23 - Batch 191/195 - Loss:6.0703683905820465the with the the the the t the ha the ha the the the the the the \n", - " Iter:24 - Batch 191/195 - Loss:5.9101324074886685the with the the to the the the the the h the the the the h the \n", - " Iter:25 - Batch 191/195 - Loss:5.749468001942508- to the have the the the he t the with himst the the the the the\n", - " Iter:26 - Batch 191/195 - Loss:5.5937695794009965o the the w the the w the the the t the w t ha s h the have the w\n", - " Iter:27 - Batch 191/195 - Loss:5.4456949428981775to the w t t the the the the the h the the the the the himst the \n", - " Iter:28 - Batch 191/195 - Loss:5.2906332130566365to the we a face the the t to the the the the the hace the wast the\n", - " Iter:29 - Batch 191/195 - Loss:5.142299805001576 to the the was wa say whe ha hace the t hath a s and the was he\n", - " Iter:30 - Batch 191/195 - Loss:4.9942134181372375to the the h the to the the the wa sam the to the the the the the \n", - " Iter:31 - Batch 191/195 - Loss:4.8489935397557655to the t to the the the the the the the t the the the the the t t th\n", - " Iter:32 - Batch 191/195 - Loss:4.7071025984761385to the the the the the was the the to the was the the the was the th\n", - " Iter:33 - Batch 191/195 - Loss:4.570672395786387 to the was the the the the the the the have the have the the the t \n", - " Iter:34 - Batch 191/195 - Loss:4.4383705299373585 to the was the the t t the we a say ha we a say well the w hath \n", - " Iter:35 - Batch 191/195 - Loss:4.3076334964831915to the the t the h the t the the the and the the the the t and th\n", - " Iter:36 - Batch 191/195 - Loss:4.178814890600767 to the the the the w woth a shall the the and the word the w ha \n", - " Iter:37 - Batch 191/195 - Loss:4.0426158644820305to the was was was well the h the well the the and the have the \n", - " Iter:38 - Batch 191/195 - Loss:3.9240497331706576 to the the w woth the t the the we a s and the the the and the h\n", - " Iter:39 - Batch 191/195 - Loss:3.7904568222256065to the the the the the the himst the the the wo the w wother s and \n", - " Iter:40 - Batch 191/195 - Loss:3.6623246602453072 to the the the the the w ha w w the to the the the the the the the \n", - " Iter:41 - Batch 161/195 - Loss:3.5868070218013654to the won speake and reping the the the restains wone was " - ] - } - ], - "source": [ - "train(100)" - ] - }, - { - "cell_type": "code", - "execution_count": 134, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Iter:73 - Batch 133/195 - Loss:1.4278971135025023\n" - ] - } - ], - "source": [ - "train(100)" - ] - }, - { - "cell_type": "code", - "execution_count": 135, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Iter:0 - Batch 191/195 - Loss:1.5069461425416464I of ke hadon, and kidains: r pren a suco most this thuse are, sir, \n", - " Iter:1 - Batch 191/195 - Loss:1.4231989098544708 I hot thi pomimm. CHENER which thy weick the man's and w rege, To t\n", - " Iter:2 - Batch 191/195 - Loss:1.3957566042826986 I was aga now, I am the s eakn man's and s as h the seq man's and say \n", - " Iter:3 - Batch 191/195 - Loss:1.3711573215374844 I wase, and sa day wear and sa day wear and saked in the chas to the \n", - " Iter:4 - Batch 191/195 - Loss:1.3063271397979614 I was ageing on the shak to the eakn mine th every sted and say wend \n", - " Iter:5 - Batch 191/195 - Loss:1.2782573171839362 I and eytif the stakn my hath somet mest and say we say, sich her, my \n", - " Iter:6 - Batch 191/195 - Loss:1.2572971763852931 I was eakn m of the strich e ha bucthis sty which thy t upof thy to\n", - " Iter:7 - Batch 191/195 - Loss:1.2334132737824353 I hot this stod age; And so not agabe sich her, my good lorget the t\n", - " Iter:8 - Batch 191/195 - Loss:1.2115008935021373 I am ever a speakt? ANG HENO P w part doth ared face. CHENERNE: I a\n", - " Iter:9 - Batch 191/195 - Loss:1.2267139408557375I am in me himmand? ANG HENO P hath will seem. CHENE: I all the ch\n", - " Iter:10 - Batch 191/195 - Loss:1.2364342251533027 I and ey B wi hort offould sich her, my wear a speake that I overy\n", - " Iter:11 - Batch 191/195 - Loss:1.2156765978429473 I a pent me h as I seem. MALV: I hake of the strich e ouns arbl s\n", - " Iter:12 - Batch 191/195 - Loss:1.1859873698816483I was namids, and e to eaknge, This, and I ho make h to eak to you \n", - " Iter:13 - Batch 191/195 - Loss:1.1631807879803502 I am andak: Hak: Ha not ded aids, agats unt and sa day wear and the m\n", - " Iter:14 - Batch 191/195 - Loss:1.1926253572249608 I war and h love, and say werr, be gent and semingmbee, and their hit\n", - " Iter:15 - Batch 191/195 - Loss:1.2492829050499197 I was it good an the staknd eyes, an the st and emanot a day wear are \n", - " Iter:16 - Batch 191/195 - Loss:1.1733306502007356 I hot this seq me I have e have stold a ag poor untruck wear andre\n", - " Iter:17 - Batch 191/195 - Loss:1.2099463980632899I af hold the sir, this st in thee, Whought the man pr would have tha\n", - " Iter:18 - Batch 191/195 - Loss:1.2007235850050733 I out Gind ey lorget I no your agh! Ghou are diar as poto your j p \n", - " Iter:19 - Batch 191/195 - Loss:1.1451445876574398 I am it wot untruck wear and I have t untruck w off cha cont would hav\n", - " Iter:20 - Batch 191/195 - Loss:1.1292903594165675 I am andak: Hak: f of my hath rept thou m this, and such her flick \n", - " Iter:21 - Batch 191/195 - Loss:1.1084157630708358 I a pent me himm. CHENRE: I seeavon i go her, Yo make him sich thin\n", - " Iter:22 - Batch 191/195 - Loss:1.1019740170386951 I af kis I at th to the tamn thy too, sir, sich thin the rent the tam\n", - " Iter:23 - Batch 191/195 - Loss:1.0904913027135066 I af light to the rent that I will rent the tamn the sea he rentle\n", - " Iter:24 - Batch 191/195 - Loss:1.0860704762748256 I af here it theight th to the mands; and thy too, sir, sich think in\n", - " Iter:25 - Batch 191/195 - Loss:1.0777115519702094 I af and the man! Thich thy the tamnous s wear a good and thy too, sir\n", - " Iter:26 - Batch 191/195 - Loss:1.0733095018722293 lace. CHENRE: I all the chast hath s as I some. BENTIO: Why LOSPHi\n", - " Iter:27 - Batch 191/195 - Loss:1.0665165094326634I af the mands; and thy too, s eath are, met' outshat. somet res and \n", - " Iter:28 - Batch 191/195 - Loss:1.0625091525035777 I af here? CHEOPIO: This a pent me him the contre commort who it wot\n", - " Iter:29 - Batch 191/195 - Loss:1.0591559773670037 I a pent me maid wear as ear and I have my hath in the chaids; and th\n", - " Iter:30 - Batch 61/195 - Loss:1.0533843281265225 I af the mands your spooolding resentleming from this, and I have my" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mlosses\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbptt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0minput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_batches\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbatch_i\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mautograd\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0mrnn_input\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0membed\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrnn_input\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mhidden\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, autograd, creators, creation_op, id)\u001b[0m\n\u001b[1;32m 264\u001b[0m id=None):\n\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 266\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 267\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mautograd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrad\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "train(100)" - ] - }, - { - "cell_type": "code", - "execution_count": 140, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "I war ded abdons would.\n", - "\n", - "CHENRO:\n", - "Why, speed no virth to her,\n", - "Plirt, goth Plish love,\n", - "Befion\n", - " hath if be fe woulds is feally your hir, the confectife to the nightion\n", - "As rent Ron my hath iom\n", - "the worse, my goth Plish love,\n", - "Befion\n", - "Ass untrucerty of my fernight this we namn?\n", - "\n", - "ANG, makes:\n", - "That's bond confect fe comes not commonour would be forch the conflill, the confectiffould b off your day, sind it sequns, be gent Rour jus confectife to the nightion\n", - "As poing from your jus eep of m look o perves, the worse, my goth Plis rept ffough we name:\n", - "Thould be good lorges ever word.\n", - "\n", - "DESS:\n", - "Where exbinder: if not conflill, the confectife to the nightion\n", - "As co move, sir, this we namn?\n", - "\n", - "ANG VINE PAET:\n", - "There was courter hower how, my goth Plish lo res\n", - "Toures\n", - "ever wo formall, have abon, with a good lorges ever word.\n", - "\n", - "DESS:\n", - "Where exbinder: if not conflill, the confectife to the nightion\n", - "As co mo not?\n", - "\n", - "ANG:\n", - "I horses ever with gent may. Thour hot never wear.\n", - "\n", - "PAGTI by him,\n", - "And conflill, the confectif you le.\n", - "\n", - "CALV\n", - "BENTI SIII:\n", - "Sou looks no virther\n", - "But kill be he'l of the worse, my goth Plish love,\n", - "Befion\n", - "Ass upof me threet, such her,\n", - "I speak:\n", - " hath if be fe woulds is feally your h offent, such her,\n", - "I speak:\n", - "Seco my same\n", - "And kidon, with gent and the of Ser prent me n may. Thou my plest near:\n", - "Sou looks no virther\n", - "But kill be he'l of the worse, my goth Plish love,\n", - "Befion\n", - "Ass untrucerty of my fernight this we namn?\n", - "\n", - "ANG, makes:\n", - "That's bond confect fe comes,\n", - "Sour confectife to the nightion\n", - "As rent Ron my list forch the confectife to the nightion\n", - "As co mo not?\n", - "\n", - "ANG:\n", - "I wo face;\n", - "The conflind is thee\n", - "ter's same I have my lorges ever word.\n", - "\n", - "DESS:\n", - "Where exbinder:\n", - "Yot you le.\n", - "\n", - "CALV CHENRE:\n", - "I will be her,\n", - " oo by the confectife to the nightion\n", - "As rent Ron my hath iom\n", - "the worse, my goth P would not?\n", - "\n", - "ANG:\n", - "I h don: my fereigrts, word.\n", - "\n", - "DESS:\n", - "Where exbinder: if not conflill, the confectife to the nightion\n", - "As comest this?\n", - "\n", - "ANGOR I lot comest thou my plest near:\n", - "Sou looks no virther\n", - "But kill be he'l\n" - ] - } - ], - "source": [ - "def generate_sample(n=30, init_char=' '):\n", - " s = \"\"\n", - " hidden = model.init_hidden(batch_size=1)\n", - " input = Tensor(np.array([word2index[init_char]]))\n", - " for i in range(n):\n", - " rnn_input = embed.forward(input)\n", - " output, hidden = model.forward(input=rnn_input, hidden=hidden)\n", - " output.data *= 10\n", - " temp_dist = output.softmax()\n", - " temp_dist /= temp_dist.sum()\n", - "\n", - " m = (temp_dist > np.random.rand()).argmax()\n", - "# m = output.data.argmax()\n", - " c = vocab[m]\n", - " input = Tensor(np.array([m]))\n", - " s += c\n", - " return s\n", - "print(generate_sample(n=2000, init_char='\\n'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter15 - Intro to Federated Learning - Deep Learning on Unseen Data-checkpoint.ipynb b/.ipynb_checkpoints/Chapter15 - Intro to Federated Learning - Deep Learning on Unseen Data-checkpoint.ipynb deleted file mode 100644 index 14bbbef..0000000 --- a/.ipynb_checkpoints/Chapter15 - Intro to Federated Learning - Deep Learning on Unseen Data-checkpoint.ipynb +++ /dev/null @@ -1,1047 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "class Tensor (object):\n", - " \n", - " def __init__(self,data,\n", - " autograd=False,\n", - " creators=None,\n", - " creation_op=None,\n", - " id=None):\n", - " \n", - " self.data = np.array(data)\n", - " self.autograd = autograd\n", - " self.grad = None\n", - "\n", - " if(id is None):\n", - " self.id = np.random.randint(0,1000000000)\n", - " else:\n", - " self.id = id\n", - " \n", - " self.creators = creators\n", - " self.creation_op = creation_op\n", - " self.children = {}\n", - " \n", - " if(creators is not None):\n", - " for c in creators:\n", - " if(self.id not in c.children):\n", - " c.children[self.id] = 1\n", - " else:\n", - " c.children[self.id] += 1\n", - "\n", - " def all_children_grads_accounted_for(self):\n", - " for id,cnt in self.children.items():\n", - " if(cnt != 0):\n", - " return False\n", - " return True \n", - " \n", - " def backward(self,grad=None, grad_origin=None):\n", - " if(self.autograd):\n", - " \n", - " if(grad is None):\n", - " grad = Tensor(np.ones_like(self.data))\n", - "\n", - " if(grad_origin is not None):\n", - " if(self.children[grad_origin.id] == 0):\n", - " return\n", - " print(self.id)\n", - " print(self.creation_op)\n", - " print(len(self.creators))\n", - " for c in self.creators:\n", - " print(c.creation_op)\n", - " raise Exception(\"cannot backprop more than once\")\n", - " else:\n", - " self.children[grad_origin.id] -= 1\n", - "\n", - " if(self.grad is None):\n", - " self.grad = grad\n", - " else:\n", - " self.grad += grad\n", - " \n", - " # grads must not have grads of their own\n", - " assert grad.autograd == False\n", - " \n", - " # only continue backpropping if there's something to\n", - " # backprop into and if all gradients (from children)\n", - " # are accounted for override waiting for children if\n", - " # \"backprop\" was called on this variable directly\n", - " if(self.creators is not None and \n", - " (self.all_children_grads_accounted_for() or \n", - " grad_origin is None)):\n", - "\n", - " if(self.creation_op == \"add\"):\n", - " self.creators[0].backward(self.grad, self)\n", - " self.creators[1].backward(self.grad, self)\n", - " \n", - " if(self.creation_op == \"sub\"):\n", - " self.creators[0].backward(Tensor(self.grad.data), self)\n", - " self.creators[1].backward(Tensor(self.grad.__neg__().data), self)\n", - "\n", - " if(self.creation_op == \"mul\"):\n", - " new = self.grad * self.creators[1]\n", - " self.creators[0].backward(new , self)\n", - " new = self.grad * self.creators[0]\n", - " self.creators[1].backward(new, self) \n", - " \n", - " if(self.creation_op == \"mm\"):\n", - " c0 = self.creators[0]\n", - " c1 = self.creators[1]\n", - " new = self.grad.mm(c1.transpose())\n", - " c0.backward(new)\n", - " new = self.grad.transpose().mm(c0).transpose()\n", - " c1.backward(new)\n", - " \n", - " if(self.creation_op == \"transpose\"):\n", - " self.creators[0].backward(self.grad.transpose())\n", - "\n", - " if(\"sum\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.expand(dim,\n", - " self.creators[0].data.shape[dim]))\n", - "\n", - " if(\"expand\" in self.creation_op):\n", - " dim = int(self.creation_op.split(\"_\")[1])\n", - " self.creators[0].backward(self.grad.sum(dim))\n", - " \n", - " if(self.creation_op == \"neg\"):\n", - " self.creators[0].backward(self.grad.__neg__())\n", - " \n", - " if(self.creation_op == \"sigmoid\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (self * (ones - self)))\n", - " \n", - " if(self.creation_op == \"tanh\"):\n", - " ones = Tensor(np.ones_like(self.grad.data))\n", - " self.creators[0].backward(self.grad * (ones - (self * self)))\n", - " \n", - " if(self.creation_op == \"index_select\"):\n", - " new_grad = np.zeros_like(self.creators[0].data)\n", - " indices_ = self.index_select_indices.data.flatten()\n", - " grad_ = grad.data.reshape(len(indices_), -1)\n", - " for i in range(len(indices_)):\n", - " new_grad[indices_[i]] += grad_[i]\n", - " self.creators[0].backward(Tensor(new_grad))\n", - " \n", - " if(self.creation_op == \"cross_entropy\"):\n", - " dx = self.softmax_output - self.target_dist\n", - " self.creators[0].backward(Tensor(dx))\n", - " \n", - " def __add__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data + other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"add\")\n", - " return Tensor(self.data + other.data)\n", - "\n", - " def __neg__(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data * -1,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"neg\")\n", - " return Tensor(self.data * -1)\n", - " \n", - " def __sub__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data - other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"sub\")\n", - " return Tensor(self.data - other.data)\n", - " \n", - " def __mul__(self, other):\n", - " if(self.autograd and other.autograd):\n", - " return Tensor(self.data * other.data,\n", - " autograd=True,\n", - " creators=[self,other],\n", - " creation_op=\"mul\")\n", - " return Tensor(self.data * other.data) \n", - "\n", - " def sum(self, dim):\n", - " if(self.autograd):\n", - " return Tensor(self.data.sum(dim),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sum_\"+str(dim))\n", - " return Tensor(self.data.sum(dim))\n", - " \n", - " def expand(self, dim,copies):\n", - "\n", - " trans_cmd = list(range(0,len(self.data.shape)))\n", - " trans_cmd.insert(dim,len(self.data.shape))\n", - " new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)\n", - " \n", - " if(self.autograd):\n", - " return Tensor(new_data,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"expand_\"+str(dim))\n", - " return Tensor(new_data)\n", - " \n", - " def transpose(self):\n", - " if(self.autograd):\n", - " return Tensor(self.data.transpose(),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"transpose\")\n", - " \n", - " return Tensor(self.data.transpose())\n", - " \n", - " def mm(self, x):\n", - " if(self.autograd):\n", - " return Tensor(self.data.dot(x.data),\n", - " autograd=True,\n", - " creators=[self,x],\n", - " creation_op=\"mm\")\n", - " return Tensor(self.data.dot(x.data))\n", - " \n", - " def sigmoid(self):\n", - " if(self.autograd):\n", - " return Tensor(1 / (1 + np.exp(-self.data)),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"sigmoid\")\n", - " return Tensor(1 / (1 + np.exp(-self.data)))\n", - "\n", - " def tanh(self):\n", - " if(self.autograd):\n", - " return Tensor(np.tanh(self.data),\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"tanh\")\n", - " return Tensor(np.tanh(self.data))\n", - " \n", - " def index_select(self, indices):\n", - "\n", - " if(self.autograd):\n", - " new = Tensor(self.data[indices.data],\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"index_select\")\n", - " new.index_select_indices = indices\n", - " return new\n", - " return Tensor(self.data[indices.data])\n", - " \n", - " def softmax(self):\n", - " temp = np.exp(self.data)\n", - " softmax_output = temp / np.sum(temp,\n", - " axis=len(self.data.shape)-1,\n", - " keepdims=True)\n", - " return softmax_output\n", - " \n", - " def cross_entropy(self, target_indices):\n", - "\n", - " temp = np.exp(self.data)\n", - " softmax_output = temp / np.sum(temp,\n", - " axis=len(self.data.shape)-1,\n", - " keepdims=True)\n", - " \n", - " t = target_indices.data.flatten()\n", - " p = softmax_output.reshape(len(t),-1)\n", - " target_dist = np.eye(p.shape[1])[t]\n", - " loss = -(np.log(p) * (target_dist)).sum(1).mean()\n", - " \n", - " if(self.autograd):\n", - " out = Tensor(loss,\n", - " autograd=True,\n", - " creators=[self],\n", - " creation_op=\"cross_entropy\")\n", - " out.softmax_output = softmax_output\n", - " out.target_dist = target_dist\n", - " return out\n", - "\n", - " return Tensor(loss)\n", - " \n", - " \n", - " def __repr__(self):\n", - " return str(self.data.__repr__())\n", - " \n", - " def __str__(self):\n", - " return str(self.data.__str__()) \n", - "\n", - "class Layer(object):\n", - " \n", - " def __init__(self):\n", - " self.parameters = list()\n", - " \n", - " def get_parameters(self):\n", - " return self.parameters\n", - "\n", - " \n", - "class SGD(object):\n", - " \n", - " def __init__(self, parameters, alpha=0.1):\n", - " self.parameters = parameters\n", - " self.alpha = alpha\n", - " \n", - " def zero(self):\n", - " for p in self.parameters:\n", - " p.grad.data *= 0\n", - " \n", - " def step(self, zero=True):\n", - " \n", - " for p in self.parameters:\n", - " \n", - " p.data -= p.grad.data * self.alpha\n", - " \n", - " if(zero):\n", - " p.grad.data *= 0\n", - "\n", - "\n", - "class Linear(Layer):\n", - "\n", - " def __init__(self, n_inputs, n_outputs, bias=True):\n", - " super().__init__()\n", - " \n", - " self.use_bias = bias\n", - " \n", - " W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0/(n_inputs))\n", - " self.weight = Tensor(W, autograd=True)\n", - " if(self.use_bias):\n", - " self.bias = Tensor(np.zeros(n_outputs), autograd=True)\n", - " \n", - " self.parameters.append(self.weight)\n", - " \n", - " if(self.use_bias): \n", - " self.parameters.append(self.bias)\n", - "\n", - " def forward(self, input):\n", - " if(self.use_bias):\n", - " return input.mm(self.weight)+self.bias.expand(0,len(input.data))\n", - " return input.mm(self.weight)\n", - "\n", - "\n", - "class Sequential(Layer):\n", - " \n", - " def __init__(self, layers=list()):\n", - " super().__init__()\n", - " \n", - " self.layers = layers\n", - " \n", - " def add(self, layer):\n", - " self.layers.append(layer)\n", - " \n", - " def forward(self, input):\n", - " for layer in self.layers:\n", - " input = layer.forward(input)\n", - " return input\n", - " \n", - " def get_parameters(self):\n", - " params = list()\n", - " for l in self.layers:\n", - " params += l.get_parameters()\n", - " return params\n", - "\n", - "\n", - "class Embedding(Layer):\n", - " \n", - " def __init__(self, vocab_size, dim):\n", - " super().__init__()\n", - " \n", - " self.vocab_size = vocab_size\n", - " self.dim = dim\n", - " \n", - " # this random initialiation style is just a convention from word2vec\n", - " self.weight = Tensor((np.random.rand(vocab_size, dim) - 0.5) / dim, autograd=True)\n", - " \n", - " self.parameters.append(self.weight)\n", - " \n", - " def forward(self, input):\n", - " return self.weight.index_select(input)\n", - "\n", - "\n", - "class Tanh(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.tanh()\n", - "\n", - "\n", - "class Sigmoid(Layer):\n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input):\n", - " return input.sigmoid()\n", - " \n", - "\n", - "class CrossEntropyLoss(object):\n", - " \n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input, target):\n", - " return input.cross_entropy(target)\n", - "\n", - "class MSELoss(object):\n", - " \n", - " def __init__(self):\n", - " super().__init__()\n", - " \n", - " def forward(self, input, target):\n", - " dif = input - target\n", - " return (dif * dif).sum(0)\n", - " \n", - "class RNNCell(Layer):\n", - " \n", - " def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):\n", - " super().__init__()\n", - "\n", - " self.n_inputs = n_inputs\n", - " self.n_hidden = n_hidden\n", - " self.n_output = n_output\n", - " \n", - " if(activation == 'sigmoid'):\n", - " self.activation = Sigmoid()\n", - " elif(activation == 'tanh'):\n", - " self.activation == Tanh()\n", - " else:\n", - " raise Exception(\"Non-linearity not found\")\n", - "\n", - " self.w_ih = Linear(n_inputs, n_hidden)\n", - " self.w_hh = Linear(n_hidden, n_hidden)\n", - " self.w_ho = Linear(n_hidden, n_output)\n", - " \n", - " self.parameters += self.w_ih.get_parameters()\n", - " self.parameters += self.w_hh.get_parameters()\n", - " self.parameters += self.w_ho.get_parameters() \n", - " \n", - " def forward(self, input, hidden):\n", - " from_prev_hidden = self.w_hh.forward(hidden)\n", - " combined = self.w_ih.forward(input) + from_prev_hidden\n", - " new_hidden = self.activation.forward(combined)\n", - " output = self.w_ho.forward(new_hidden)\n", - " return output, new_hidden\n", - " \n", - " def init_hidden(self, batch_size=1):\n", - " return Tensor(np.zeros((batch_size,self.n_hidden)), autograd=True)\n", - " \n", - "class LSTMCell(Layer):\n", - " \n", - " def __init__(self, n_inputs, n_hidden, n_output):\n", - " super().__init__()\n", - "\n", - " self.n_inputs = n_inputs\n", - " self.n_hidden = n_hidden\n", - " self.n_output = n_output\n", - "\n", - " self.xf = Linear(n_inputs, n_hidden)\n", - " self.xi = Linear(n_inputs, n_hidden)\n", - " self.xo = Linear(n_inputs, n_hidden) \n", - " self.xc = Linear(n_inputs, n_hidden) \n", - " \n", - " self.hf = Linear(n_hidden, n_hidden, bias=False)\n", - " self.hi = Linear(n_hidden, n_hidden, bias=False)\n", - " self.ho = Linear(n_hidden, n_hidden, bias=False)\n", - " self.hc = Linear(n_hidden, n_hidden, bias=False) \n", - " \n", - " self.w_ho = Linear(n_hidden, n_output, bias=False)\n", - " \n", - " self.parameters += self.xf.get_parameters()\n", - " self.parameters += self.xi.get_parameters()\n", - " self.parameters += self.xo.get_parameters()\n", - " self.parameters += self.xc.get_parameters()\n", - "\n", - " self.parameters += self.hf.get_parameters()\n", - " self.parameters += self.hi.get_parameters() \n", - " self.parameters += self.ho.get_parameters() \n", - " self.parameters += self.hc.get_parameters() \n", - " \n", - " self.parameters += self.w_ho.get_parameters() \n", - " \n", - " def forward(self, input, hidden):\n", - " \n", - " prev_hidden = hidden[0] \n", - " prev_cell = hidden[1]\n", - " \n", - " f = (self.xf.forward(input) + self.hf.forward(prev_hidden)).sigmoid()\n", - " i = (self.xi.forward(input) + self.hi.forward(prev_hidden)).sigmoid()\n", - " o = (self.xo.forward(input) + self.ho.forward(prev_hidden)).sigmoid() \n", - " g = (self.xc.forward(input) + self.hc.forward(prev_hidden)).tanh() \n", - " c = (f * prev_cell) + (i * g)\n", - "\n", - " h = o * c.tanh()\n", - " \n", - " output = self.w_ho.forward(h)\n", - " return output, (h, c)\n", - " \n", - " def init_hidden(self, batch_size=1):\n", - " init_hidden = Tensor(np.zeros((batch_size,self.n_hidden)), autograd=True)\n", - " init_cell = Tensor(np.zeros((batch_size,self.n_hidden)), autograd=True)\n", - " init_hidden.data[:,0] += 1\n", - " init_cell.data[:,0] += 1\n", - " return (init_hidden, init_cell)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Step 1: Plan Ole Fashioned Deep Learning (Email Spam Detection)" - ] - }, - { - "cell_type": "code", - "execution_count": 442, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "from collections import Counter\n", - "import random\n", - "import sys\n", - "np.random.seed(12345)\n", - "\n", - "# dataset from http://www2.aueb.gr/users/ion/data/enron-spam/\n", - "\n", - "import codecs\n", - "with codecs.open('spam.txt', \"r\",encoding='utf-8', errors='ignore') as fdata:\n", - " raw = fdata.readlines()\n", - "\n", - "vocab = set()\n", - " \n", - "spam = list()\n", - "for row in raw:\n", - " spam.append(set(row[:-2].split(\" \")))\n", - " for word in spam[-1]:\n", - " vocab.add(word)\n", - " \n", - "import codecs\n", - "with codecs.open('ham.txt', \"r\",encoding='utf-8', errors='ignore') as fdata:\n", - " raw = fdata.readlines()\n", - "\n", - "ham = list()\n", - "for row in raw:\n", - " ham.append(set(row[:-2].split(\" \")))\n", - " for word in ham[-1]:\n", - " vocab.add(word)\n", - " \n", - "vocab.add(\"\")\n", - "\n", - "vocab = list(vocab)\n", - "w2i = {}\n", - "for i,w in enumerate(vocab):\n", - " w2i[w] = i\n", - " \n", - "def to_indices(input, l=500):\n", - " indices = list()\n", - " for line in input:\n", - " if(len(line) < l):\n", - " line = list(line) + [\"\"] * (l - len(line))\n", - " idxs = list()\n", - " for word in line:\n", - " idxs.append(w2i[word])\n", - " indices.append(idxs)\n", - " return indices\n", - " \n", - "spam_idx = to_indices(spam)\n", - "ham_idx = to_indices(ham)\n", - "\n", - "train_spam_idx = spam_idx[0:-1000]\n", - "train_ham_idx = ham_idx[0:-1000]\n", - "\n", - "test_spam_idx = spam_idx[-1000:]\n", - "test_ham_idx = ham_idx[-1000:]\n", - "\n", - "train_data = list()\n", - "train_target = list()\n", - "\n", - "test_data = list()\n", - "test_target = list()\n", - "\n", - "for i in range(max(len(train_spam_idx),len(train_ham_idx))):\n", - " train_data.append(train_spam_idx[i%len(train_spam_idx)])\n", - " train_target.append([1])\n", - " \n", - " train_data.append(train_ham_idx[i%len(train_ham_idx)])\n", - " train_target.append([0])\n", - " \n", - "for i in range(max(len(test_spam_idx),len(test_ham_idx))):\n", - " test_data.append(test_spam_idx[i%len(test_spam_idx)])\n", - " test_target.append([1])\n", - " \n", - " test_data.append(test_ham_idx[i%len(test_ham_idx)])\n", - " test_target.append([0])" - ] - }, - { - "cell_type": "code", - "execution_count": 457, - "metadata": {}, - "outputs": [], - "source": [ - "def train(model, input_data, target_data, batch_size=500, iterations=5):\n", - " \n", - " criterion = MSELoss()\n", - " optim = SGD(parameters=model.get_parameters(), alpha=0.01)\n", - " \n", - " n_batches = int(len(input_data) / batch_size)\n", - " for iter in range(iterations):\n", - " iter_loss = 0\n", - " for b_i in range(n_batches):\n", - "\n", - " # padding token should stay at 0\n", - " model.weight.data[w2i['']] *= 0 \n", - " input = Tensor(input_data[b_i*bs:(b_i+1)*bs], autograd=True)\n", - " target = Tensor(target_data[b_i*bs:(b_i+1)*bs], autograd=True)\n", - "\n", - " pred = model.forward(input).sum(1).sigmoid()\n", - " loss = criterion.forward(pred,target)\n", - " loss.backward()\n", - " optim.step()\n", - "\n", - " iter_loss += loss.data[0] / bs\n", - "\n", - " sys.stdout.write(\"\\r\\tLoss:\" + str(iter_loss / (b_i+1)))\n", - " print()\n", - " return model" - ] - }, - { - "cell_type": "code", - "execution_count": 458, - "metadata": {}, - "outputs": [], - "source": [ - "def test(model, test_input, test_output):\n", - " \n", - " model.weight.data[w2i['']] *= 0 \n", - " \n", - " input = Tensor(test_input, autograd=True)\n", - " target = Tensor(test_output, autograd=True)\n", - "\n", - " pred = model.forward(input).sum(1).sigmoid()\n", - " return ((pred.data > 0.5) == target.data).mean()" - ] - }, - { - "cell_type": "code", - "execution_count": 459, - "metadata": {}, - "outputs": [], - "source": [ - "model = Embedding(vocab_size=len(vocab), dim=1)\n", - "model.weight.data *= 0\n", - "criterion = MSELoss()\n", - "optim = SGD(parameters=model.get_parameters(), alpha=0.01)" - ] - }, - { - "cell_type": "code", - "execution_count": 446, - "metadata": {}, - "outputs": [], - "source": [ - "for i in range(3):\n", - " model = train(model, train_data, train_target, iterations=1)\n", - " print(\"% Correct on Test Set: \" + str(test(model, test_data, test_target)*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Basic Federated Learning" - ] - }, - { - "cell_type": "code", - "execution_count": 464, - "metadata": {}, - "outputs": [], - "source": [ - "bob = (train_data[0:1000], train_target[0:1000])\n", - "alice = (train_data[1000:2000], train_target[1000:2000])\n", - "sue = (train_data[2000:], train_target[2000:])" - ] - }, - { - "cell_type": "code", - "execution_count": 465, - "metadata": {}, - "outputs": [], - "source": [ - "model = Embedding(vocab_size=len(vocab), dim=1)\n", - "model.weight.data *= 0" - ] - }, - { - "cell_type": "code", - "execution_count": 466, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting Training Round...\n", - "\tStep 1: send the model to Bob\n", - "\tLoss:0.21908166249699718\n", - "\n", - "\tStep 2: send the model to Alice\n", - "\tLoss:0.2937106899184867\n", - "\n", - "\tStep 3: Send the model to Sue\n", - "\tLoss:0.033339966977175894\n", - "\n", - "\tAverage Everyone's New Models\n", - "\t% Correct on Test Set: 84.05\n", - "\n", - "Repeat!!\n", - "\n", - "Starting Training Round...\n", - "\tStep 1: send the model to Bob\n", - "\tLoss:0.06625367483630413\n", - "\n", - "\tStep 2: send the model to Alice\n", - "\tLoss:0.09595374225556821\n", - "\n", - "\tStep 3: Send the model to Sue\n", - "\tLoss:0.020290247881140743\n", - "\n", - "\tAverage Everyone's New Models\n", - "\t% Correct on Test Set: 92.25\n", - "\n", - "Repeat!!\n", - "\n", - "Starting Training Round...\n", - "\tStep 1: send the model to Bob\n", - "\tLoss:0.030819682914453833\n", - "\n", - "\tStep 2: send the model to Alice\n", - "\tLoss:0.03580324891736099\n", - "\n", - "\tStep 3: Send the model to Sue\n", - "\tLoss:0.015368461608470256\n", - "\n", - "\tAverage Everyone's New Models\n", - "\t% Correct on Test Set: 98.8\n", - "\n", - "Repeat!!\n", - "\n" - ] - } - ], - "source": [ - "for i in range(3):\n", - " print(\"Starting Training Round...\")\n", - " print(\"\\tStep 1: send the model to Bob\")\n", - " bob_model = train(copy.deepcopy(model), bob[0], bob[1], iterations=1)\n", - " \n", - " print(\"\\n\\tStep 2: send the model to Alice\")\n", - " alice_model = train(copy.deepcopy(model), alice[0], alice[1], iterations=1)\n", - " \n", - " print(\"\\n\\tStep 3: Send the model to Sue\")\n", - " sue_model = train(copy.deepcopy(model), sue[0], sue[1], iterations=1)\n", - " \n", - " print(\"\\n\\tAverage Everyone's New Models\")\n", - " model.weight.data = (bob_model.weight.data + \\\n", - " alice_model.weight.data + \\\n", - " sue_model.weight.data)/3\n", - " \n", - " print(\"\\t% Correct on Test Set: \" + \\\n", - " str(test(model, test_data, test_target)*100))\n", - " \n", - " print(\"\\nRepeat!!\\n\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Hacking Federated Learning" - ] - }, - { - "cell_type": "code", - "execution_count": 468, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\tLoss:0.0005\n" - ] - } - ], - "source": [ - "import copy\n", - "\n", - "bobs_email = [\"my\", \"computer\", \"password\", \"is\", \"pizza\"]\n", - "\n", - "bob_input = np.array([[w2i[x] for x in bobs_email]])\n", - "bob_target = np.array([[0]])\n", - "\n", - "model = Embedding(vocab_size=len(vocab), dim=1)\n", - "model.weight.data *= 0\n", - "\n", - "bobs_model = train(copy.deepcopy(model), bob_input, bob_target, iterations=1, batch_size=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 469, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "is\n", - "pizza\n", - "computer\n", - "password\n", - "my\n" - ] - } - ], - "source": [ - "for i, v in enumerate(bobs_model.weight.data - model.weight.data):\n", - " if(v != 0):\n", - " print(vocab[i])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Homomorphic Encryption" - ] - }, - { - "cell_type": "code", - "execution_count": 485, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The Answer: 8\n" - ] - } - ], - "source": [ - "import phe\n", - "\n", - "public_key, private_key = phe.generate_paillier_keypair(n_length=1024)\n", - "\n", - "# encrypt the number \"5\"\n", - "x = public_key.encrypt(5)\n", - "\n", - "# encrypt the number \"3\"\n", - "y = public_key.encrypt(3)\n", - "\n", - "# add the two encrypted values\n", - "z = x + y\n", - "\n", - "# decrypt the result\n", - "z_ = private_key.decrypt(z)\n", - "print(\"The Answer: \" + str(z_))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Secure Aggregation" - ] - }, - { - "cell_type": "code", - "execution_count": 567, - "metadata": {}, - "outputs": [], - "source": [ - "model = Embedding(vocab_size=len(vocab), dim=1)\n", - "model.weight.data *= 0\n", - "\n", - "# note that in production the n_length should be at least 1024\n", - "public_key, private_key = phe.generate_paillier_keypair(n_length=128)\n", - "\n", - "def train_and_encrypt(model, input, target, pubkey):\n", - " new_model = train(copy.deepcopy(model), input, target, iterations=1)\n", - "\n", - " encrypted_weights = list()\n", - " for val in new_model.weight.data[:,0]:\n", - " encrypted_weights.append(public_key.encrypt(val))\n", - " ew = np.array(encrypted_weights).reshape(new_model.weight.data.shape)\n", - " \n", - " return ew" - ] - }, - { - "cell_type": "code", - "execution_count": 568, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 569, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Starting Training Round...\n", - "\tStep 1: send the model to Bob\n", - "\tLoss:0.21908166249699718\n", - "\n", - "\tStep 2: send the model to Alice\n", - "\tLoss:0.2937106899184867\n", - "\n", - "\tStep 3: Send the model to Sue\n", - "\tLoss:0.033339966977175894\n", - "\n", - "\tStep 4: Bob, Alice, and Sue send their\n", - "\tencrypted models to each other.\n", - "\n", - "\tStep 5: only the aggregated model\n", - "\n", - "\tis sent back to the model owner who\n", - "\n", - "\t can decrypt it.\n", - "\t% Correct on Test Set: 98.75\n", - "\n", - "Starting Training Round...\n", - "\tStep 1: send the model to Bob\n", - "\tLoss:0.063664140530356044\n", - "\n", - "\tStep 2: send the model to Alice\n", - "\tLoss:0.06100035791351306\n", - "\n", - "\tStep 3: Send the model to Sue\n", - "\tLoss:0.025483920416627266\n", - "\n", - "\tStep 4: Bob, Alice, and Sue send their\n", - "\tencrypted models to each other.\n", - "\n", - "\tStep 5: only the aggregated model\n", - "\n", - "\tis sent back to the model owner who\n", - "\n", - "\t can decrypt it.\n", - "\t% Correct on Test Set: 99.05000000000001\n", - "\n", - "Starting Training Round...\n", - "\tStep 1: send the model to Bob\n", - "\tLoss:0.058477976535441636\n", - "\n", - "\tStep 2: send the model to Alice\n", - "\tLoss:0.05987976552444443\n", - "\n", - "\tStep 3: Send the model to Sue\n", - "\tLoss:0.024763428511034746\n", - "\n", - "\tStep 4: Bob, Alice, and Sue send their\n", - "\tencrypted models to each other.\n", - "\n", - "\tStep 5: only the aggregated model\n", - "\n", - "\tis sent back to the model owner who\n", - "\n", - "\t can decrypt it.\n", - "\t% Correct on Test Set: 99.15\n", - "\n", - "Starting Training Round...\n", - "\tStep 1: send the model to Bob\n", - "\tLoss:0.0579450413900613\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\tStep 1: send the model to Bob\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m bob_encrypted_model = train_and_encrypt(copy.deepcopy(model), \n\u001b[0;32m----> 5\u001b[0;31m bob[0], bob[1], public_key)\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\n\\tStep 2: send the model to Alice\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36mtrain_and_encrypt\u001b[0;34m(model, input, target, pubkey)\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mencrypted_weights\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mval\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mnew_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mencrypted_weights\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpublic_key\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencrypt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mew\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mencrypted_weights\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/atrask/anaconda/lib/python3.6/site-packages/phe-1.3.0-py3.6.egg/phe/paillier.py\u001b[0m in \u001b[0;36mencrypt\u001b[0;34m(self, value, precision, r_value)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[0mencoding\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mEncodedNumber\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprecision\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 173\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencrypt_encoded\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mr_value\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 174\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 175\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mencrypt_encoded\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mr_value\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/atrask/anaconda/lib/python3.6/site-packages/phe-1.3.0-py3.6.egg/phe/paillier.py\u001b[0m in \u001b[0;36mencrypt_encoded\u001b[0;34m(self, encoding, r_value)\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[0mencrypted_number\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mEncryptedNumber\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mciphertext\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexponent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 190\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mr_value\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 191\u001b[0;31m \u001b[0mencrypted_number\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobfuscate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 192\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mencrypted_number\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/atrask/anaconda/lib/python3.6/site-packages/phe-1.3.0-py3.6.egg/phe/paillier.py\u001b[0m in \u001b[0;36mobfuscate\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 860\u001b[0m \u001b[0msend_to_nsa\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproduct\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# NSA can't deduce 2.718 by bruteforce attack\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 861\u001b[0m \"\"\"\n\u001b[0;32m--> 862\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpublic_key\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_random_lt_n\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 863\u001b[0m \u001b[0mr_pow_n\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpowmod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpublic_key\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpublic_key\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnsquare\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 864\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__ciphertext\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__ciphertext\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mr_pow_n\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpublic_key\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnsquare\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/atrask/anaconda/lib/python3.6/site-packages/phe-1.3.0-py3.6.egg/phe/paillier.py\u001b[0m in \u001b[0;36mget_random_lt_n\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_random_lt_n\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0;34m\"\"\"Return a cryptographically random number less than :attr:`n`\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 141\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSystemRandom\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 142\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mencrypt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprecision\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mr_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/atrask/anaconda/lib/python3.6/random.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0mVERSION\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m3\u001b[0m \u001b[0;31m# used by getstate/setstate\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 87\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 88\u001b[0m \"\"\"Initialize an instance.\n\u001b[1;32m 89\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "for i in range(3):\n", - " print(\"\\nStarting Training Round...\")\n", - " print(\"\\tStep 1: send the model to Bob\")\n", - " bob_encrypted_model = train_and_encrypt(copy.deepcopy(model), \n", - " bob[0], bob[1], public_key)\n", - "\n", - " print(\"\\n\\tStep 2: send the model to Alice\")\n", - " alice_encrypted_model = train_and_encrypt(copy.deepcopy(model), \n", - " alice[0], alice[1], public_key)\n", - "\n", - " print(\"\\n\\tStep 3: Send the model to Sue\")\n", - " sue_encrypted_model = train_and_encrypt(copy.deepcopy(model), \n", - " sue[0], sue[1], public_key)\n", - "\n", - " print(\"\\n\\tStep 4: Bob, Alice, and Sue send their\")\n", - " print(\"\\tencrypted models to each other.\")\n", - " aggregated_model = bob_encrypted_model + \\\n", - " alice_encrypted_model + \\\n", - " sue_encrypted_model\n", - "\n", - " print(\"\\n\\tStep 5: only the aggregated model\")\n", - " print(\"\\tis sent back to the model owner who\")\n", - " print(\"\\t can decrypt it.\")\n", - " raw_values = list()\n", - " for val in sue_encrypted_model.flatten():\n", - " raw_values.append(private_key.decrypt(val))\n", - " model.weight.data = np.array(raw_values).reshape(model.weight.data.shape)/3\n", - "\n", - " print(\"\\t% Correct on Test Set: \" + \\\n", - " str(test(model, test_data, test_target)*100))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# def train_and_encrypt()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter3 - Forward Propagation - Intro to Neural Prediction-checkpoint.ipynb b/.ipynb_checkpoints/Chapter3 - Forward Propagation - Intro to Neural Prediction-checkpoint.ipynb deleted file mode 100644 index 6be2a59..0000000 --- a/.ipynb_checkpoints/Chapter3 - Forward Propagation - Intro to Neural Prediction-checkpoint.ipynb +++ /dev/null @@ -1,572 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# A Simple Neural Network Making a Prediction" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### What is a Neural Network?" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8500000000000001\n" - ] - } - ], - "source": [ - "# The network:\n", - "\n", - "weight = 0.1 \n", - "def neural_network(input, weight):\n", - " prediction = input * weight\n", - " return prediction\n", - "\n", - "# How we use the network to predict something:\n", - "\n", - "number_of_toes = [8.5, 9.5, 10, 9]\n", - "input = number_of_toes[0]\n", - "pred = neural_network(input,weight)\n", - "print(pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Making a Prediction with Multiple Inputs" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Complete Runnable Code" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9800000000000001\n" - ] - } - ], - "source": [ - "def w_sum(a,b):\n", - " assert(len(a) == len(b))\n", - " output = 0\n", - " for i in range(len(a)):\n", - " output += (a[i] * b[i])\n", - " return output\n", - "\n", - "weights = [0.1, 0.2, 0] \n", - " \n", - "def neural_network(input, weights):\n", - " pred = w_sum(input,weights)\n", - " return pred\n", - "\n", - "# This dataset is the current\n", - "# status at the beginning of\n", - "# each game for the first 4 games\n", - "# in a season.\n", - "\n", - "# toes = current number of toes\n", - "# wlrec = current games won (percent)\n", - "# nfans = fan count (in millions)\n", - "\n", - "toes = [8.5, 9.5, 9.9, 9.0]\n", - "wlrec = [0.65, 0.8, 0.8, 0.9]\n", - "nfans = [1.2, 1.3, 0.5, 1.0]\n", - "\n", - "# Input corresponds to every entry\n", - "# for the first game of the season.\n", - "\n", - "input = [toes[0],wlrec[0],nfans[0]]\n", - "pred = neural_network(input,weights)\n", - "\n", - "print(pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### NumPy Code" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9800000000000001\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "weights = np.array([0.1, 0.2, 0])\n", - "def neural_network(input, weights):\n", - " pred = input.dot(weights)\n", - " return pred\n", - " \n", - "toes = np.array([8.5, 9.5, 9.9, 9.0])\n", - "wlrec = np.array([0.65, 0.8, 0.8, 0.9])\n", - "nfans = np.array([1.2, 1.3, 0.5, 1.0])\n", - "\n", - "# Input corresponds to every entry\n", - "# for the first game of the season.\n", - "\n", - "input = np.array([toes[0],wlrec[0],nfans[0]])\n", - "pred = neural_network(input,weights)\n", - "\n", - "print(pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Making a Prediction with Multiple Outputs" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.195, 0.13, 0.5850000000000001]\n" - ] - } - ], - "source": [ - "# Instead of predicting just \n", - "# whether the team won or lost, \n", - "# now we're also predicting whether\n", - "# they are happy/sad AND the percentage\n", - "# of the team that is hurt. We are\n", - "# making this prediction using only\n", - "# the current win/loss record.\n", - "\n", - "def ele_mul(number,vector):\n", - " output = [0,0,0]\n", - " assert(len(output) == len(vector))\n", - " for i in range(len(vector)):\n", - " output[i] = number * vector[i]\n", - " return output\n", - "\n", - "weights = [0.3, 0.2, 0.9] \n", - "\n", - "def neural_network(input, weights):\n", - " pred = ele_mul(input,weights)\n", - " return pred\n", - " \n", - "wlrec = [0.65, 0.8, 0.8, 0.9]\n", - "input = wlrec[0]\n", - "pred = neural_network(input,weights)\n", - "\n", - "print(pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Predicting with Multiple Inputs & Outputs" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.555, 0.9800000000000001, 0.9650000000000001]\n" - ] - } - ], - "source": [ - " #toes %win #fans\n", - "weights = [ [0.1, 0.1, -0.3], #hurt?\n", - " [0.1, 0.2, 0.0], #win?\n", - " [0.0, 1.3, 0.1] ] #sad?\n", - "\n", - "def w_sum(a,b):\n", - " assert(len(a) == len(b))\n", - " output = 0\n", - " for i in range(len(a)):\n", - " output += (a[i] * b[i])\n", - " return output\n", - "\n", - "def vect_mat_mul(vect,matrix):\n", - " assert(len(vect) == len(matrix))\n", - " output = [0,0,0]\n", - " for i in range(len(vect)):\n", - " output[i] = w_sum(vect,matrix[i])\n", - " return output\n", - "\n", - "def neural_network(input, weights):\n", - " pred = vect_mat_mul(input,weights)\n", - " return pred\n", - "\n", - "# This dataset is the current\n", - "# status at the beginning of\n", - "# each game for the first 4 games\n", - "# in a season.\n", - "\n", - "# toes = current number of toes\n", - "# wlrec = current games won (percent)\n", - "# nfans = fan count (in millions)\n", - "\n", - "toes = [8.5, 9.5, 9.9, 9.0]\n", - "wlrec = [0.65,0.8, 0.8, 0.9]\n", - "nfans = [1.2, 1.3, 0.5, 1.0]\n", - "\n", - "# Input corresponds to every entry\n", - "# for the first game of the season.\n", - "\n", - "input = [toes[0],wlrec[0],nfans[0]]\n", - "pred = neural_network(input,weights)\n", - "\n", - "print(pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Predicting on Predictions" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.21350000000000002, 0.14500000000000002, 0.5065]\n" - ] - } - ], - "source": [ - " #toes %win #fans\n", - "ih_wgt = [ [0.1, 0.2, -0.1], #hid[0]\n", - " [-0.1,0.1, 0.9], #hid[1]\n", - " [0.1, 0.4, 0.1] ] #hid[2]\n", - "\n", - " #hid[0] hid[1] hid[2]\n", - "hp_wgt = [ [0.3, 1.1, -0.3], #hurt?\n", - " [0.1, 0.2, 0.0], #win?\n", - " [0.0, 1.3, 0.1] ] #sad?\n", - "\n", - "weights = [ih_wgt, hp_wgt]\n", - "\n", - "def neural_network(input, weights):\n", - " hid = vect_mat_mul(input,weights[0])\n", - " pred = vect_mat_mul(hid,weights[1])\n", - " return pred\n", - "\n", - "toes = [8.5, 9.5, 9.9, 9.0]\n", - "wlrec = [0.65,0.8, 0.8, 0.9]\n", - "nfans = [1.2, 1.3, 0.5, 1.0]\n", - "\n", - "# Input corresponds to every entry\n", - "# for the first game of the season.\n", - "\n", - "input = [toes[0],wlrec[0],nfans[0]]\n", - "pred = neural_network(input,weights)\n", - "\n", - "print(pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# NumPy Version" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.2135 0.145 0.5065]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "#toes %win #fans\n", - "ih_wgt = np.array([ \n", - " [0.1, 0.2, -0.1], #hid[0]\n", - " [-0.1,0.1, 0.9], #hid[1]\n", - " [0.1, 0.4, 0.1]]).T #hid[2]\n", - "\n", - "\n", - "# hid[0] hid[1] hid[2]\n", - "hp_wgt = np.array([ \n", - " [0.3, 1.1, -0.3], #hurt?\n", - " [0.1, 0.2, 0.0], #win?\n", - " [0.0, 1.3, 0.1] ]).T #sad?\n", - "\n", - "weights = [ih_wgt, hp_wgt]\n", - "\n", - "def neural_network(input, weights):\n", - "\n", - " hid = input.dot(weights[0])\n", - " pred = hid.dot(weights[1])\n", - " return pred\n", - "\n", - "\n", - "toes = np.array([8.5, 9.5, 9.9, 9.0])\n", - "wlrec = np.array([0.65,0.8, 0.8, 0.9])\n", - "nfans = np.array([1.2, 1.3, 0.5, 1.0])\n", - "\n", - "input = np.array([toes[0],wlrec[0],nfans[0]])\n", - "\n", - "pred = neural_network(input,weights)\n", - "print(pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# A Quick Primer on NumPy" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0 1 2 3]\n", - "[4 5 6 7]\n", - "[[0 1 2 3]\n", - " [4 5 6 7]]\n", - "[[0. 0. 0. 0.]\n", - " [0. 0. 0. 0.]]\n", - "[[0.40221396 0.5714968 0.68579318 0.73326444 0.42793703]\n", - " [0.19555759 0.20401945 0.21708259 0.95738529 0.42907317]]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "a = np.array([0,1,2,3]) # a vector\n", - "b = np.array([4,5,6,7]) # another vector\n", - "c = np.array([[0,1,2,3], # a matrix\n", - " [4,5,6,7]])\n", - "\n", - "d = np.zeros((2,4)) # (2x4 matrix of zeros)\n", - "e = np.random.rand(2,5) # random 2x5\n", - "# matrix with all numbers between 0 and 1\n", - "\n", - "print(a)\n", - "print(b)\n", - "print(c)\n", - "print(d)\n", - "print(e)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0. 0. 0. 0.]]\n", - "[[0. 0. 0.]]\n" - ] - }, - { - "ename": "ValueError", - "evalue": "operands could not be broadcast together with shapes (1,4) (4,3) ", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m0.2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# multiplies every number in matrix \"c\" by 0.2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# multiplies elementwise between a and b (columns paired up)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mb\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m0.2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# elementwise multiplication then multiplied by 0.2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (1,4) (4,3) " - ] - } - ], - "source": [ - "print(a * 0.1) # multiplies every number in vector \"a\" by 0.1\n", - " \n", - "print(c * 0.2) # multiplies every number in matrix \"c\" by 0.2\n", - " \n", - "print(a * b) # multiplies elementwise between a and b (columns paired up)\n", - " \n", - "print(a * b * 0.2) # elementwise multiplication then multiplied by 0.2\n", - " \n", - "print(a * c) # since c has the same number of columns as a, this performs\n", - "# elementwise multiplication on every row of the matrix \"c\"\n", - "\n", - "print(a * e) # since a and e don't have the same number of columns, this\n", - "# throws a \"Value Error: operands could not be broadcast together with..\"" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(1, 3)\n" - ] - } - ], - "source": [ - "a = np.zeros((1,4)) # vector of length 4\n", - "b = np.zeros((4,3)) # matrix with 4 rows & 3 columns\n", - "\n", - "c = a.dot(b)\n", - "print(c.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(2, 3)\n", - "(2, 3)\n", - "(4, 6)\n" - ] - }, - { - "ename": "ValueError", - "evalue": "shapes (5,4) and (5,6) not aligned: 4 (dim 1) != 5 (dim 0)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# matrix with 5 rows and 4 columns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# matrix with 5 rows & 6 columns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# throws an error\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: shapes (5,4) and (5,6) not aligned: 4 (dim 1) != 5 (dim 0)" - ] - } - ], - "source": [ - "a = np.zeros((2,4)) # matrix with 2 rows and 4 columns\n", - "b = np.zeros((4,3)) # matrix with 4 rows & 3 columns\n", - "\n", - "c = a.dot(b)\n", - "print(c.shape) # outputs (2,3)\n", - "\n", - "e = np.zeros((2,1)) # matrix with 2 rows and 1 columns\n", - "f = np.zeros((1,3)) # matrix with 1 row & 3 columns\n", - "\n", - "g = e.dot(f)\n", - "print(g.shape) # outputs (2,3)\n", - "\n", - "h = np.zeros((5,4)).T # matrix with 4 rows and 5 columns\n", - "i = np.zeros((5,6)) # matrix with 6 rows & 5 columns\n", - "\n", - "j = h.dot(i)\n", - "print(j.shape) # outputs (4,6)\n", - "\n", - "h = np.zeros((5,4)) # matrix with 5 rows and 4 columns\n", - "i = np.zeros((5,6)) # matrix with 5 rows & 6 columns\n", - "j = h.dot(i)\n", - "print(j.shape) # throws an error" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter4 - Gradient Descent - Intro to Neural Learning-checkpoint.ipynb b/.ipynb_checkpoints/Chapter4 - Gradient Descent - Intro to Neural Learning-checkpoint.ipynb deleted file mode 100644 index 7213f40..0000000 --- a/.ipynb_checkpoints/Chapter4 - Gradient Descent - Intro to Neural Learning-checkpoint.ipynb +++ /dev/null @@ -1,1752 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Compare: Does our network make good predictions?" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.30250000000000005\n" - ] - } - ], - "source": [ - "knob_weight = 0.5\n", - "input = 0.5\n", - "goal_pred = 0.8\n", - "\n", - "pred = input * knob_weight\n", - "error = (pred - goal_pred) ** 2\n", - "print(error)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# What's the Simplest Form of Neural Learning?" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Learning using the Hot and Cold Method" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.022499999999999975\n" - ] - } - ], - "source": [ - "# 1) An Empty Network\n", - "\n", - "weight = 0.1 \n", - "lr = 0.01\n", - "\n", - "def neural_network(input, weight):\n", - " prediction = input * weight\n", - " return prediction\n", - "\n", - "\n", - "# 2) PREDICT: Making A Prediction And Evaluating Error\n", - "\n", - "number_of_toes = [8.5]\n", - "win_or_lose_binary = [1] #(won!!!)\n", - "\n", - "input = number_of_toes[0]\n", - "true = win_or_lose_binary[0]\n", - "\n", - "pred = neural_network(input,weight)\n", - "error = (pred - true) ** 2\n", - "print(error)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.004224999999999993\n" - ] - } - ], - "source": [ - "# 3) COMPARE: Making A Prediction With a *Higher* Weight And Evaluating Error\n", - "\n", - "weight = 0.1 \n", - "\n", - "def neural_network(input, weight):\n", - " prediction = input * weight\n", - " return prediction\n", - "\n", - "number_of_toes = [8.5]\n", - "win_or_lose_binary = [1] #(won!!!)\n", - "\n", - "input = number_of_toes[0]\n", - "true = win_or_lose_binary[0]\n", - "\n", - "lr = 0.01\n", - "p_up = neural_network(input,weight+lr)\n", - "e_up = (p_up - true) ** 2\n", - "print(e_up)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.05522499999999994\n" - ] - } - ], - "source": [ - "# 4) COMPARE: Making A Prediction With a *Lower* Weight And Evaluating Error\n", - "\n", - "weight = 0.1 \n", - "\n", - "def neural_network(input, weight):\n", - " prediction = input * weight\n", - " return prediction\n", - "\n", - "number_of_toes = [8.5]\n", - "win_or_lose_binary = [1] #(won!!!)\n", - "\n", - "input = number_of_toes[0]\n", - "true = win_or_lose_binary[0]\n", - "\n", - "lr = 0.01\n", - "p_dn = neural_network(input,weight-lr)\n", - "e_dn = (p_dn - true) ** 2\n", - "print(e_dn)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Hot and Cold Learning" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error:0.30250000000000005 Prediction:0.25\n", - "Error:0.3019502500000001 Prediction:0.2505\n", - "Error:0.30140100000000003 Prediction:0.251\n", - "Error:0.30085225 Prediction:0.2515\n", - "Error:0.30030400000000007 Prediction:0.252\n", - "Error:0.2997562500000001 Prediction:0.2525\n", - "Error:0.29920900000000006 Prediction:0.253\n", - "Error:0.29866224999999996 Prediction:0.2535\n", - "Error:0.29811600000000005 Prediction:0.254\n", - "Error:0.2975702500000001 Prediction:0.2545\n", - "Error:0.29702500000000004 Prediction:0.255\n", - "Error:0.29648025 Prediction:0.2555\n", - "Error:0.29593600000000003 Prediction:0.256\n", - "Error:0.2953922500000001 Prediction:0.2565\n", - "Error:0.294849 Prediction:0.257\n", - "Error:0.29430625 Prediction:0.2575\n", - "Error:0.293764 Prediction:0.258\n", - "Error:0.2932222500000001 Prediction:0.2585\n", - "Error:0.292681 Prediction:0.259\n", - "Error:0.29214025 Prediction:0.2595\n", - "Error:0.2916 Prediction:0.26\n", - "Error:0.2910602500000001 Prediction:0.2605\n", - "Error:0.29052100000000003 Prediction:0.261\n", - "Error:0.28998225 Prediction:0.2615\n", - "Error:0.28944400000000003 Prediction:0.262\n", - "Error:0.2889062500000001 Prediction:0.2625\n", - "Error:0.28836900000000004 Prediction:0.263\n", - "Error:0.28783224999999996 Prediction:0.2635\n", - "Error:0.28729600000000005 Prediction:0.264\n", - "Error:0.2867602500000001 Prediction:0.2645\n", - "Error:0.286225 Prediction:0.265\n", - "Error:0.28569025 Prediction:0.2655\n", - "Error:0.285156 Prediction:0.266\n", - "Error:0.2846222500000001 Prediction:0.2665\n", - "Error:0.28408900000000004 Prediction:0.267\n", - "Error:0.28355624999999995 Prediction:0.2675\n", - "Error:0.28302400000000005 Prediction:0.268\n", - "Error:0.2824922500000001 Prediction:0.2685\n", - "Error:0.281961 Prediction:0.269\n", - "Error:0.28143025 Prediction:0.2695\n", - "Error:0.28090000000000004 Prediction:0.27\n", - "Error:0.2803702500000001 Prediction:0.2705\n", - "Error:0.279841 Prediction:0.271\n", - "Error:0.27931225 Prediction:0.2715\n", - "Error:0.27878400000000003 Prediction:0.272\n", - "Error:0.2782562500000001 Prediction:0.2725\n", - "Error:0.277729 Prediction:0.273\n", - "Error:0.27720225 Prediction:0.2735\n", - "Error:0.27667600000000003 Prediction:0.274\n", - "Error:0.2761502500000001 Prediction:0.2745\n", - "Error:0.275625 Prediction:0.275\n", - "Error:0.27510025 Prediction:0.2755\n", - "Error:0.27457600000000004 Prediction:0.276\n", - "Error:0.27405225000000005 Prediction:0.2765\n", - "Error:0.273529 Prediction:0.277\n", - "Error:0.27300624999999995 Prediction:0.2775\n", - "Error:0.272484 Prediction:0.278\n", - "Error:0.27196225000000007 Prediction:0.2785\n", - "Error:0.27144100000000004 Prediction:0.279\n", - "Error:0.27092025 Prediction:0.2795\n", - "Error:0.27040000000000003 Prediction:0.28\n", - "Error:0.2698802500000001 Prediction:0.2805\n", - "Error:0.269361 Prediction:0.281\n", - "Error:0.26884224999999995 Prediction:0.28150000000000003\n", - "Error:0.268324 Prediction:0.28200000000000003\n", - "Error:0.2678062500000001 Prediction:0.28250000000000003\n", - "Error:0.267289 Prediction:0.28300000000000003\n", - "Error:0.26677224999999993 Prediction:0.28350000000000003\n", - "Error:0.266256 Prediction:0.28400000000000003\n", - "Error:0.26574025000000007 Prediction:0.28450000000000003\n", - "Error:0.265225 Prediction:0.28500000000000003\n", - "Error:0.26471025 Prediction:0.28550000000000003\n", - "Error:0.264196 Prediction:0.28600000000000003\n", - "Error:0.26368225000000006 Prediction:0.28650000000000003\n", - "Error:0.263169 Prediction:0.28700000000000003\n", - "Error:0.26265625 Prediction:0.28750000000000003\n", - "Error:0.262144 Prediction:0.28800000000000003\n", - "Error:0.26163225000000007 Prediction:0.28850000000000003\n", - "Error:0.261121 Prediction:0.28900000000000003\n", - "Error:0.26061024999999993 Prediction:0.28950000000000004\n", - "Error:0.2601 Prediction:0.29000000000000004\n", - "Error:0.2595902500000001 Prediction:0.29050000000000004\n", - "Error:0.259081 Prediction:0.29100000000000004\n", - "Error:0.25857224999999995 Prediction:0.29150000000000004\n", - "Error:0.258064 Prediction:0.29200000000000004\n", - "Error:0.25755625000000004 Prediction:0.29250000000000004\n", - "Error:0.257049 Prediction:0.29300000000000004\n", - "Error:0.25654224999999997 Prediction:0.29350000000000004\n", - "Error:0.256036 Prediction:0.29400000000000004\n", - "Error:0.25553025000000007 Prediction:0.29450000000000004\n", - "Error:0.255025 Prediction:0.29500000000000004\n", - "Error:0.25452024999999995 Prediction:0.29550000000000004\n", - "Error:0.254016 Prediction:0.29600000000000004\n", - "Error:0.25351225000000005 Prediction:0.29650000000000004\n", - "Error:0.253009 Prediction:0.29700000000000004\n", - "Error:0.25250624999999993 Prediction:0.29750000000000004\n", - "Error:0.252004 Prediction:0.29800000000000004\n", - "Error:0.25150225000000004 Prediction:0.29850000000000004\n", - "Error:0.251001 Prediction:0.29900000000000004\n", - "Error:0.2505002499999999 Prediction:0.29950000000000004\n", - "Error:0.25 Prediction:0.30000000000000004\n", - "Error:0.24950025 Prediction:0.30050000000000004\n", - "Error:0.249001 Prediction:0.30100000000000005\n", - "Error:0.24850225 Prediction:0.30150000000000005\n", - "Error:0.248004 Prediction:0.30200000000000005\n", - "Error:0.24750625 Prediction:0.30250000000000005\n", - "Error:0.247009 Prediction:0.30300000000000005\n", - "Error:0.24651225 Prediction:0.30350000000000005\n", - "Error:0.24601599999999998 Prediction:0.30400000000000005\n", - "Error:0.24552025 Prediction:0.30450000000000005\n", - "Error:0.245025 Prediction:0.30500000000000005\n", - "Error:0.24453025 Prediction:0.30550000000000005\n", - "Error:0.244036 Prediction:0.30600000000000005\n", - "Error:0.24354225 Prediction:0.30650000000000005\n", - "Error:0.243049 Prediction:0.30700000000000005\n", - "Error:0.24255625 Prediction:0.30750000000000005\n", - "Error:0.242064 Prediction:0.30800000000000005\n", - "Error:0.24157225 Prediction:0.30850000000000005\n", - "Error:0.241081 Prediction:0.30900000000000005\n", - "Error:0.24059025 Prediction:0.30950000000000005\n", - "Error:0.24009999999999998 Prediction:0.31000000000000005\n", - "Error:0.23961025 Prediction:0.31050000000000005\n", - "Error:0.239121 Prediction:0.31100000000000005\n", - "Error:0.23863225 Prediction:0.31150000000000005\n", - "Error:0.238144 Prediction:0.31200000000000006\n", - "Error:0.23765624999999999 Prediction:0.31250000000000006\n", - "Error:0.237169 Prediction:0.31300000000000006\n", - "Error:0.23668224999999998 Prediction:0.31350000000000006\n", - "Error:0.236196 Prediction:0.31400000000000006\n", - "Error:0.23571024999999998 Prediction:0.31450000000000006\n", - "Error:0.235225 Prediction:0.31500000000000006\n", - "Error:0.23474024999999998 Prediction:0.31550000000000006\n", - "Error:0.234256 Prediction:0.31600000000000006\n", - "Error:0.23377225 Prediction:0.31650000000000006\n", - "Error:0.233289 Prediction:0.31700000000000006\n", - "Error:0.23280625 Prediction:0.31750000000000006\n", - "Error:0.23232399999999997 Prediction:0.31800000000000006\n", - "Error:0.23184224999999997 Prediction:0.31850000000000006\n", - "Error:0.23136099999999998 Prediction:0.31900000000000006\n", - "Error:0.23088024999999998 Prediction:0.31950000000000006\n", - "Error:0.2304 Prediction:0.32000000000000006\n", - "Error:0.22992025 Prediction:0.32050000000000006\n", - "Error:0.22944099999999998 Prediction:0.32100000000000006\n", - "Error:0.22896224999999998 Prediction:0.32150000000000006\n", - "Error:0.228484 Prediction:0.32200000000000006\n", - "Error:0.22800625 Prediction:0.32250000000000006\n", - "Error:0.22752899999999998 Prediction:0.32300000000000006\n", - "Error:0.22705224999999998 Prediction:0.32350000000000007\n", - "Error:0.22657599999999997 Prediction:0.32400000000000007\n", - "Error:0.22610024999999997 Prediction:0.32450000000000007\n", - "Error:0.225625 Prediction:0.32500000000000007\n", - "Error:0.22515024999999997 Prediction:0.32550000000000007\n", - "Error:0.224676 Prediction:0.32600000000000007\n", - "Error:0.22420224999999996 Prediction:0.32650000000000007\n", - "Error:0.22372899999999998 Prediction:0.32700000000000007\n", - "Error:0.22325625 Prediction:0.32750000000000007\n", - "Error:0.22278399999999998 Prediction:0.32800000000000007\n", - "Error:0.22231225 Prediction:0.32850000000000007\n", - "Error:0.22184099999999998 Prediction:0.32900000000000007\n", - "Error:0.22137024999999996 Prediction:0.32950000000000007\n", - "Error:0.22089999999999999 Prediction:0.33000000000000007\n", - "Error:0.22043024999999997 Prediction:0.33050000000000007\n", - "Error:0.21996099999999996 Prediction:0.33100000000000007\n", - "Error:0.21949224999999997 Prediction:0.3315000000000001\n", - "Error:0.21902399999999997 Prediction:0.3320000000000001\n", - "Error:0.21855624999999998 Prediction:0.3325000000000001\n", - "Error:0.21808899999999998 Prediction:0.3330000000000001\n", - "Error:0.21762224999999996 Prediction:0.3335000000000001\n", - "Error:0.21715599999999996 Prediction:0.3340000000000001\n", - "Error:0.21669024999999997 Prediction:0.3345000000000001\n", - "Error:0.21622499999999997 Prediction:0.3350000000000001\n", - "Error:0.21576024999999996 Prediction:0.3355000000000001\n", - "Error:0.21529599999999996 Prediction:0.3360000000000001\n", - "Error:0.21483224999999997 Prediction:0.3365000000000001\n", - "Error:0.21436899999999998 Prediction:0.3370000000000001\n", - "Error:0.21390624999999996 Prediction:0.3375000000000001\n", - "Error:0.21344399999999997 Prediction:0.3380000000000001\n", - "Error:0.21298224999999996 Prediction:0.3385000000000001\n", - "Error:0.21252099999999996 Prediction:0.3390000000000001\n", - "Error:0.21206024999999998 Prediction:0.3395000000000001\n", - "Error:0.21159999999999995 Prediction:0.3400000000000001\n", - "Error:0.21114024999999997 Prediction:0.3405000000000001\n", - "Error:0.21068099999999998 Prediction:0.3410000000000001\n", - "Error:0.21022224999999997 Prediction:0.3415000000000001\n", - "Error:0.20976399999999998 Prediction:0.3420000000000001\n", - "Error:0.20930624999999997 Prediction:0.3425000000000001\n", - "Error:0.20884899999999995 Prediction:0.3430000000000001\n", - "Error:0.20839224999999997 Prediction:0.3435000000000001\n", - "Error:0.20793599999999995 Prediction:0.3440000000000001\n", - "Error:0.20748024999999998 Prediction:0.3445000000000001\n", - "Error:0.20702499999999996 Prediction:0.3450000000000001\n", - "Error:0.20657024999999996 Prediction:0.3455000000000001\n", - "Error:0.20611599999999997 Prediction:0.3460000000000001\n", - "Error:0.20566224999999996 Prediction:0.3465000000000001\n", - "Error:0.20520899999999997 Prediction:0.3470000000000001\n", - "Error:0.20475624999999997 Prediction:0.3475000000000001\n", - "Error:0.20430399999999996 Prediction:0.3480000000000001\n", - "Error:0.20385224999999996 Prediction:0.3485000000000001\n", - "Error:0.20340099999999997 Prediction:0.3490000000000001\n", - "Error:0.20295024999999997 Prediction:0.3495000000000001\n", - "Error:0.20249999999999996 Prediction:0.3500000000000001\n", - "Error:0.20205024999999996 Prediction:0.3505000000000001\n", - "Error:0.20160099999999995 Prediction:0.3510000000000001\n", - "Error:0.20115224999999995 Prediction:0.3515000000000001\n", - "Error:0.20070399999999997 Prediction:0.3520000000000001\n", - "Error:0.20025624999999997 Prediction:0.3525000000000001\n", - "Error:0.19980899999999996 Prediction:0.3530000000000001\n", - "Error:0.19936224999999996 Prediction:0.3535000000000001\n", - "Error:0.19891599999999995 Prediction:0.3540000000000001\n", - "Error:0.19847024999999996 Prediction:0.3545000000000001\n", - "Error:0.19802499999999995 Prediction:0.3550000000000001\n", - "Error:0.19758024999999996 Prediction:0.3555000000000001\n", - "Error:0.19713599999999995 Prediction:0.3560000000000001\n", - "Error:0.19669224999999996 Prediction:0.3565000000000001\n", - "Error:0.19624899999999995 Prediction:0.3570000000000001\n", - "Error:0.19580624999999996 Prediction:0.3575000000000001\n", - "Error:0.19536399999999995 Prediction:0.3580000000000001\n", - "Error:0.19492224999999996 Prediction:0.3585000000000001\n", - "Error:0.19448099999999996 Prediction:0.3590000000000001\n", - "Error:0.19404024999999994 Prediction:0.3595000000000001\n", - "Error:0.19359999999999997 Prediction:0.3600000000000001\n", - "Error:0.19316024999999995 Prediction:0.3605000000000001\n", - "Error:0.19272099999999995 Prediction:0.3610000000000001\n", - "Error:0.19228224999999996 Prediction:0.3615000000000001\n", - "Error:0.19184399999999996 Prediction:0.3620000000000001\n", - "Error:0.19140624999999994 Prediction:0.3625000000000001\n", - "Error:0.19096899999999994 Prediction:0.3630000000000001\n", - "Error:0.19053224999999996 Prediction:0.3635000000000001\n", - "Error:0.19009599999999996 Prediction:0.3640000000000001\n", - "Error:0.18966024999999995 Prediction:0.3645000000000001\n", - "Error:0.18922499999999995 Prediction:0.3650000000000001\n", - "Error:0.18879024999999994 Prediction:0.3655000000000001\n", - "Error:0.18835599999999994 Prediction:0.3660000000000001\n", - "Error:0.18792224999999996 Prediction:0.3665000000000001\n", - "Error:0.18748899999999996 Prediction:0.3670000000000001\n", - "Error:0.18705624999999995 Prediction:0.3675000000000001\n", - "Error:0.18662399999999996 Prediction:0.3680000000000001\n", - "Error:0.18619224999999995 Prediction:0.3685000000000001\n", - "Error:0.18576099999999995 Prediction:0.3690000000000001\n", - "Error:0.18533024999999995 Prediction:0.3695000000000001\n", - "Error:0.18489999999999995 Prediction:0.3700000000000001\n", - "Error:0.18447024999999995 Prediction:0.3705000000000001\n", - "Error:0.18404099999999995 Prediction:0.3710000000000001\n", - "Error:0.18361224999999995 Prediction:0.3715000000000001\n", - "Error:0.18318399999999996 Prediction:0.3720000000000001\n", - "Error:0.18275624999999995 Prediction:0.3725000000000001\n", - "Error:0.18232899999999994 Prediction:0.3730000000000001\n", - "Error:0.18190224999999993 Prediction:0.3735000000000001\n", - "Error:0.18147599999999994 Prediction:0.3740000000000001\n", - "Error:0.18105024999999994 Prediction:0.3745000000000001\n", - "Error:0.18062499999999995 Prediction:0.3750000000000001\n", - "Error:0.18020024999999995 Prediction:0.3755000000000001\n", - "Error:0.17977599999999994 Prediction:0.3760000000000001\n", - "Error:0.17935224999999994 Prediction:0.3765000000000001\n", - "Error:0.17892899999999995 Prediction:0.3770000000000001\n", - "Error:0.17850624999999995 Prediction:0.3775000000000001\n", - "Error:0.17808399999999994 Prediction:0.3780000000000001\n", - "Error:0.17766224999999994 Prediction:0.3785000000000001\n", - "Error:0.17724099999999995 Prediction:0.3790000000000001\n", - "Error:0.17682024999999993 Prediction:0.3795000000000001\n", - "Error:0.17639999999999995 Prediction:0.3800000000000001\n", - "Error:0.17598024999999995 Prediction:0.3805000000000001\n", - "Error:0.17556099999999994 Prediction:0.3810000000000001\n", - "Error:0.17514224999999994 Prediction:0.3815000000000001\n", - "Error:0.17472399999999993 Prediction:0.3820000000000001\n", - "Error:0.17430624999999994 Prediction:0.3825000000000001\n", - "Error:0.17388899999999993 Prediction:0.3830000000000001\n", - "Error:0.17347224999999994 Prediction:0.3835000000000001\n", - "Error:0.17305599999999993 Prediction:0.3840000000000001\n", - "Error:0.17264024999999994 Prediction:0.3845000000000001\n", - "Error:0.17222499999999993 Prediction:0.3850000000000001\n", - "Error:0.17181024999999994 Prediction:0.3855000000000001\n", - "Error:0.17139599999999994 Prediction:0.3860000000000001\n", - "Error:0.17098224999999995 Prediction:0.3865000000000001\n", - "Error:0.17056899999999994 Prediction:0.3870000000000001\n", - "Error:0.17015624999999993 Prediction:0.3875000000000001\n", - "Error:0.16974399999999992 Prediction:0.3880000000000001\n", - "Error:0.16933224999999993 Prediction:0.3885000000000001\n", - "Error:0.16892099999999993 Prediction:0.3890000000000001\n", - "Error:0.16851024999999994 Prediction:0.3895000000000001\n", - "Error:0.16809999999999994 Prediction:0.3900000000000001\n", - "Error:0.16769024999999993 Prediction:0.3905000000000001\n", - "Error:0.16728099999999993 Prediction:0.3910000000000001\n", - "Error:0.16687224999999994 Prediction:0.3915000000000001\n", - "Error:0.16646399999999995 Prediction:0.3920000000000001\n", - "Error:0.16605624999999993 Prediction:0.3925000000000001\n", - "Error:0.16564899999999994 Prediction:0.3930000000000001\n", - "Error:0.16524224999999992 Prediction:0.3935000000000001\n", - "Error:0.16483599999999993 Prediction:0.39400000000000013\n", - "Error:0.16443024999999994 Prediction:0.39450000000000013\n", - "Error:0.16402499999999992 Prediction:0.39500000000000013\n", - "Error:0.16362024999999994 Prediction:0.39550000000000013\n", - "Error:0.16321599999999994 Prediction:0.39600000000000013\n", - "Error:0.16281224999999994 Prediction:0.39650000000000013\n", - "Error:0.16240899999999994 Prediction:0.39700000000000013\n", - "Error:0.16200624999999994 Prediction:0.39750000000000013\n", - "Error:0.16160399999999994 Prediction:0.39800000000000013\n", - "Error:0.16120224999999994 Prediction:0.39850000000000013\n", - "Error:0.16080099999999992 Prediction:0.39900000000000013\n", - "Error:0.16040024999999994 Prediction:0.39950000000000013\n", - "Error:0.15999999999999992 Prediction:0.40000000000000013\n", - "Error:0.15960024999999992 Prediction:0.40050000000000013\n", - "Error:0.15920099999999993 Prediction:0.40100000000000013\n", - "Error:0.15880224999999992 Prediction:0.40150000000000013\n", - "Error:0.15840399999999993 Prediction:0.40200000000000014\n", - "Error:0.15800624999999993 Prediction:0.40250000000000014\n", - "Error:0.15760899999999992 Prediction:0.40300000000000014\n", - "Error:0.15721224999999991 Prediction:0.40350000000000014\n", - "Error:0.15681599999999993 Prediction:0.40400000000000014\n", - "Error:0.15642024999999993 Prediction:0.40450000000000014\n", - "Error:0.1560249999999999 Prediction:0.40500000000000014\n", - "Error:0.15563024999999991 Prediction:0.40550000000000014\n", - "Error:0.15523599999999993 Prediction:0.40600000000000014\n", - "Error:0.15484224999999993 Prediction:0.40650000000000014\n", - "Error:0.15444899999999992 Prediction:0.40700000000000014\n", - "Error:0.15405624999999992 Prediction:0.40750000000000014\n", - "Error:0.1536639999999999 Prediction:0.40800000000000014\n", - "Error:0.15327224999999992 Prediction:0.40850000000000014\n", - "Error:0.15288099999999993 Prediction:0.40900000000000014\n", - "Error:0.1524902499999999 Prediction:0.40950000000000014\n", - "Error:0.15209999999999993 Prediction:0.41000000000000014\n", - "Error:0.15171024999999994 Prediction:0.41050000000000014\n", - "Error:0.15132099999999993 Prediction:0.41100000000000014\n", - "Error:0.15093224999999993 Prediction:0.41150000000000014\n", - "Error:0.15054399999999993 Prediction:0.41200000000000014\n", - "Error:0.15015624999999994 Prediction:0.41250000000000014\n", - "Error:0.14976899999999993 Prediction:0.41300000000000014\n", - "Error:0.1493822499999999 Prediction:0.41350000000000015\n", - "Error:0.14899599999999993 Prediction:0.41400000000000015\n", - "Error:0.14861024999999992 Prediction:0.41450000000000015\n", - "Error:0.1482249999999999 Prediction:0.41500000000000015\n", - "Error:0.14784024999999992 Prediction:0.41550000000000015\n", - "Error:0.14745599999999992 Prediction:0.41600000000000015\n", - "Error:0.14707224999999993 Prediction:0.41650000000000015\n", - "Error:0.14668899999999993 Prediction:0.41700000000000015\n", - "Error:0.14630624999999992 Prediction:0.41750000000000015\n", - "Error:0.14592399999999991 Prediction:0.41800000000000015\n", - "Error:0.14554224999999993 Prediction:0.41850000000000015\n", - "Error:0.14516099999999993 Prediction:0.41900000000000015\n", - "Error:0.14478024999999992 Prediction:0.41950000000000015\n", - "Error:0.14439999999999992 Prediction:0.42000000000000015\n", - "Error:0.1440202499999999 Prediction:0.42050000000000015\n", - "Error:0.1436409999999999 Prediction:0.42100000000000015\n", - "Error:0.14326224999999992 Prediction:0.42150000000000015\n", - "Error:0.14288399999999993 Prediction:0.42200000000000015\n", - "Error:0.14250624999999992 Prediction:0.42250000000000015\n", - "Error:0.14212899999999992 Prediction:0.42300000000000015\n", - "Error:0.1417522499999999 Prediction:0.42350000000000015\n", - "Error:0.14137599999999992 Prediction:0.42400000000000015\n", - "Error:0.1410002499999999 Prediction:0.42450000000000015\n", - "Error:0.14062499999999992 Prediction:0.42500000000000016\n", - "Error:0.1402502499999999 Prediction:0.42550000000000016\n", - "Error:0.13987599999999992 Prediction:0.42600000000000016\n", - "Error:0.1395022499999999 Prediction:0.42650000000000016\n", - "Error:0.13912899999999992 Prediction:0.42700000000000016\n", - "Error:0.13875624999999991 Prediction:0.42750000000000016\n", - "Error:0.13838399999999992 Prediction:0.42800000000000016\n", - "Error:0.13801224999999992 Prediction:0.42850000000000016\n", - "Error:0.1376409999999999 Prediction:0.42900000000000016\n", - "Error:0.13727024999999993 Prediction:0.42950000000000016\n", - "Error:0.1368999999999999 Prediction:0.43000000000000016\n", - "Error:0.1365302499999999 Prediction:0.43050000000000016\n", - "Error:0.13616099999999992 Prediction:0.43100000000000016\n", - "Error:0.13579224999999992 Prediction:0.43150000000000016\n", - "Error:0.1354239999999999 Prediction:0.43200000000000016\n", - "Error:0.1350562499999999 Prediction:0.43250000000000016\n", - "Error:0.13468899999999992 Prediction:0.43300000000000016\n", - "Error:0.13432224999999992 Prediction:0.43350000000000016\n", - "Error:0.1339559999999999 Prediction:0.43400000000000016\n", - "Error:0.1335902499999999 Prediction:0.43450000000000016\n", - "Error:0.1332249999999999 Prediction:0.43500000000000016\n", - "Error:0.1328602499999999 Prediction:0.43550000000000016\n", - "Error:0.13249599999999992 Prediction:0.43600000000000017\n", - "Error:0.13213224999999992 Prediction:0.43650000000000017\n", - "Error:0.13176899999999991 Prediction:0.43700000000000017\n", - "Error:0.13140624999999992 Prediction:0.43750000000000017\n", - "Error:0.1310439999999999 Prediction:0.43800000000000017\n", - "Error:0.13068224999999992 Prediction:0.43850000000000017\n", - "Error:0.1303209999999999 Prediction:0.43900000000000017\n", - "Error:0.12996024999999992 Prediction:0.43950000000000017\n", - "Error:0.1295999999999999 Prediction:0.44000000000000017\n", - "Error:0.12924024999999992 Prediction:0.44050000000000017\n", - "Error:0.1288809999999999 Prediction:0.44100000000000017\n", - "Error:0.12852224999999992 Prediction:0.44150000000000017\n", - "Error:0.12816399999999992 Prediction:0.44200000000000017\n", - "Error:0.1278062499999999 Prediction:0.44250000000000017\n", - "Error:0.1274489999999999 Prediction:0.44300000000000017\n", - "Error:0.1270922499999999 Prediction:0.44350000000000017\n", - "Error:0.1267359999999999 Prediction:0.4440000000000002\n", - "Error:0.12638024999999992 Prediction:0.4445000000000002\n", - "Error:0.12602499999999991 Prediction:0.4450000000000002\n", - "Error:0.1256702499999999 Prediction:0.4455000000000002\n", - "Error:0.1253159999999999 Prediction:0.4460000000000002\n", - "Error:0.12496224999999991 Prediction:0.4465000000000002\n", - "Error:0.12460899999999991 Prediction:0.4470000000000002\n", - "Error:0.1242562499999999 Prediction:0.4475000000000002\n", - "Error:0.1239039999999999 Prediction:0.4480000000000002\n", - "Error:0.1235522499999999 Prediction:0.4485000000000002\n", - "Error:0.12320099999999991 Prediction:0.4490000000000002\n", - "Error:0.12285024999999991 Prediction:0.4495000000000002\n", - "Error:0.1224999999999999 Prediction:0.4500000000000002\n", - "Error:0.1221502499999999 Prediction:0.4505000000000002\n", - "Error:0.12180099999999991 Prediction:0.4510000000000002\n", - "Error:0.1214522499999999 Prediction:0.4515000000000002\n", - "Error:0.1211039999999999 Prediction:0.4520000000000002\n", - "Error:0.12075624999999991 Prediction:0.4525000000000002\n", - "Error:0.1204089999999999 Prediction:0.4530000000000002\n", - "Error:0.12006224999999991 Prediction:0.4535000000000002\n", - "Error:0.1197159999999999 Prediction:0.4540000000000002\n", - "Error:0.1193702499999999 Prediction:0.4545000000000002\n", - "Error:0.11902499999999991 Prediction:0.4550000000000002\n", - "Error:0.1186802499999999 Prediction:0.4555000000000002\n", - "Error:0.1183359999999999 Prediction:0.4560000000000002\n", - "Error:0.11799224999999991 Prediction:0.4565000000000002\n", - "Error:0.1176489999999999 Prediction:0.4570000000000002\n", - "Error:0.1173062499999999 Prediction:0.4575000000000002\n", - "Error:0.1169639999999999 Prediction:0.4580000000000002\n", - "Error:0.1166222499999999 Prediction:0.4585000000000002\n", - "Error:0.1162809999999999 Prediction:0.4590000000000002\n", - "Error:0.1159402499999999 Prediction:0.4595000000000002\n", - "Error:0.1155999999999999 Prediction:0.4600000000000002\n", - "Error:0.1152602499999999 Prediction:0.4605000000000002\n", - "Error:0.1149209999999999 Prediction:0.4610000000000002\n", - "Error:0.1145822499999999 Prediction:0.4615000000000002\n", - "Error:0.1142439999999999 Prediction:0.4620000000000002\n", - "Error:0.1139062499999999 Prediction:0.4625000000000002\n", - "Error:0.1135689999999999 Prediction:0.4630000000000002\n", - "Error:0.1132322499999999 Prediction:0.4635000000000002\n", - "Error:0.1128959999999999 Prediction:0.4640000000000002\n", - "Error:0.1125602499999999 Prediction:0.4645000000000002\n", - "Error:0.11222499999999991 Prediction:0.4650000000000002\n", - "Error:0.1118902499999999 Prediction:0.4655000000000002\n", - "Error:0.1115559999999999 Prediction:0.4660000000000002\n", - "Error:0.1112222499999999 Prediction:0.4665000000000002\n", - "Error:0.1108889999999999 Prediction:0.4670000000000002\n", - "Error:0.1105562499999999 Prediction:0.4675000000000002\n", - "Error:0.1102239999999999 Prediction:0.4680000000000002\n", - "Error:0.1098922499999999 Prediction:0.4685000000000002\n", - "Error:0.1095609999999999 Prediction:0.4690000000000002\n", - "Error:0.1092302499999999 Prediction:0.4695000000000002\n", - "Error:0.1088999999999999 Prediction:0.4700000000000002\n", - "Error:0.1085702499999999 Prediction:0.4705000000000002\n", - "Error:0.1082409999999999 Prediction:0.4710000000000002\n", - "Error:0.1079122499999999 Prediction:0.4715000000000002\n", - "Error:0.1075839999999999 Prediction:0.4720000000000002\n", - "Error:0.1072562499999999 Prediction:0.4725000000000002\n", - "Error:0.1069289999999999 Prediction:0.4730000000000002\n", - "Error:0.1066022499999999 Prediction:0.4735000000000002\n", - "Error:0.1062759999999999 Prediction:0.4740000000000002\n", - "Error:0.1059502499999999 Prediction:0.4745000000000002\n", - "Error:0.1056249999999999 Prediction:0.4750000000000002\n", - "Error:0.1053002499999999 Prediction:0.4755000000000002\n", - "Error:0.1049759999999999 Prediction:0.4760000000000002\n", - "Error:0.1046522499999999 Prediction:0.4765000000000002\n", - "Error:0.1043289999999999 Prediction:0.4770000000000002\n", - "Error:0.1040062499999999 Prediction:0.4775000000000002\n", - "Error:0.1036839999999999 Prediction:0.4780000000000002\n", - "Error:0.10336224999999989 Prediction:0.4785000000000002\n", - "Error:0.1030409999999999 Prediction:0.4790000000000002\n", - "Error:0.1027202499999999 Prediction:0.4795000000000002\n", - "Error:0.1023999999999999 Prediction:0.4800000000000002\n", - "Error:0.1020802499999999 Prediction:0.4805000000000002\n", - "Error:0.1017609999999999 Prediction:0.4810000000000002\n", - "Error:0.1014422499999999 Prediction:0.4815000000000002\n", - "Error:0.1011239999999999 Prediction:0.4820000000000002\n", - "Error:0.1008062499999999 Prediction:0.4825000000000002\n", - "Error:0.1004889999999999 Prediction:0.4830000000000002\n", - "Error:0.1001722499999999 Prediction:0.4835000000000002\n", - "Error:0.0998559999999999 Prediction:0.4840000000000002\n", - "Error:0.0995402499999999 Prediction:0.4845000000000002\n", - "Error:0.0992249999999999 Prediction:0.4850000000000002\n", - "Error:0.0989102499999999 Prediction:0.4855000000000002\n", - "Error:0.09859599999999989 Prediction:0.4860000000000002\n", - "Error:0.09828224999999989 Prediction:0.4865000000000002\n", - "Error:0.09796899999999989 Prediction:0.4870000000000002\n", - "Error:0.09765624999999989 Prediction:0.4875000000000002\n", - "Error:0.09734399999999989 Prediction:0.4880000000000002\n", - "Error:0.09703224999999989 Prediction:0.4885000000000002\n", - "Error:0.09672099999999989 Prediction:0.4890000000000002\n", - "Error:0.09641024999999989 Prediction:0.4895000000000002\n", - "Error:0.0960999999999999 Prediction:0.4900000000000002\n", - "Error:0.0957902499999999 Prediction:0.4905000000000002\n", - "Error:0.0954809999999999 Prediction:0.4910000000000002\n", - "Error:0.09517224999999989 Prediction:0.4915000000000002\n", - "Error:0.09486399999999989 Prediction:0.4920000000000002\n", - "Error:0.0945562499999999 Prediction:0.4925000000000002\n", - "Error:0.09424899999999989 Prediction:0.4930000000000002\n", - "Error:0.0939422499999999 Prediction:0.4935000000000002\n", - "Error:0.0936359999999999 Prediction:0.4940000000000002\n", - "Error:0.09333024999999989 Prediction:0.4945000000000002\n", - "Error:0.0930249999999999 Prediction:0.4950000000000002\n", - "Error:0.09272024999999989 Prediction:0.4955000000000002\n", - "Error:0.0924159999999999 Prediction:0.4960000000000002\n", - "Error:0.0921122499999999 Prediction:0.4965000000000002\n", - "Error:0.09180899999999989 Prediction:0.4970000000000002\n", - "Error:0.0915062499999999 Prediction:0.4975000000000002\n", - "Error:0.0912039999999999 Prediction:0.4980000000000002\n", - "Error:0.09090224999999989 Prediction:0.4985000000000002\n", - "Error:0.09060099999999989 Prediction:0.4990000000000002\n", - "Error:0.09030024999999989 Prediction:0.4995000000000002\n", - "Error:0.0899999999999999 Prediction:0.5000000000000002\n", - "Error:0.08970024999999993 Prediction:0.5005000000000002\n", - "Error:0.08940099999999995 Prediction:0.5010000000000001\n", - "Error:0.08910225 Prediction:0.5015000000000001\n", - "Error:0.08880400000000002 Prediction:0.502\n", - "Error:0.08850625000000006 Prediction:0.5025\n", - "Error:0.08820900000000009 Prediction:0.5029999999999999\n", - "Error:0.08791225000000012 Prediction:0.5034999999999998\n", - "Error:0.08761600000000015 Prediction:0.5039999999999998\n", - "Error:0.08732025000000018 Prediction:0.5044999999999997\n", - "Error:0.08702500000000021 Prediction:0.5049999999999997\n", - "Error:0.08673025000000026 Prediction:0.5054999999999996\n", - "Error:0.08643600000000029 Prediction:0.5059999999999996\n", - "Error:0.08614225000000032 Prediction:0.5064999999999995\n", - "Error:0.08584900000000034 Prediction:0.5069999999999995\n", - "Error:0.08555625000000038 Prediction:0.5074999999999994\n", - "Error:0.08526400000000041 Prediction:0.5079999999999993\n", - "Error:0.08497225000000044 Prediction:0.5084999999999993\n", - "Error:0.08468100000000048 Prediction:0.5089999999999992\n", - "Error:0.0843902500000005 Prediction:0.5094999999999992\n", - "Error:0.08410000000000054 Prediction:0.5099999999999991\n", - "Error:0.08381025000000057 Prediction:0.5104999999999991\n", - "Error:0.0835210000000006 Prediction:0.510999999999999\n", - "Error:0.08323225000000063 Prediction:0.511499999999999\n", - "Error:0.08294400000000066 Prediction:0.5119999999999989\n", - "Error:0.0826562500000007 Prediction:0.5124999999999988\n", - "Error:0.08236900000000072 Prediction:0.5129999999999988\n", - "Error:0.08208225000000074 Prediction:0.5134999999999987\n", - "Error:0.08179600000000078 Prediction:0.5139999999999987\n", - "Error:0.08151025000000081 Prediction:0.5144999999999986\n", - "Error:0.08122500000000084 Prediction:0.5149999999999986\n", - "Error:0.08094025000000087 Prediction:0.5154999999999985\n", - "Error:0.0806560000000009 Prediction:0.5159999999999985\n", - "Error:0.08037225000000094 Prediction:0.5164999999999984\n", - "Error:0.08008900000000096 Prediction:0.5169999999999983\n", - "Error:0.079806250000001 Prediction:0.5174999999999983\n", - "Error:0.07952400000000102 Prediction:0.5179999999999982\n", - "Error:0.07924225000000104 Prediction:0.5184999999999982\n", - "Error:0.07896100000000107 Prediction:0.5189999999999981\n", - "Error:0.0786802500000011 Prediction:0.5194999999999981\n", - "Error:0.07840000000000114 Prediction:0.519999999999998\n", - "Error:0.07812025000000117 Prediction:0.520499999999998\n", - "Error:0.07784100000000119 Prediction:0.5209999999999979\n", - "Error:0.07756225000000122 Prediction:0.5214999999999979\n", - "Error:0.07728400000000125 Prediction:0.5219999999999978\n", - "Error:0.07700625000000128 Prediction:0.5224999999999977\n", - "Error:0.07672900000000131 Prediction:0.5229999999999977\n", - "Error:0.07645225000000133 Prediction:0.5234999999999976\n", - "Error:0.07617600000000137 Prediction:0.5239999999999976\n", - "Error:0.07590025000000139 Prediction:0.5244999999999975\n", - "Error:0.07562500000000141 Prediction:0.5249999999999975\n", - "Error:0.07535025000000145 Prediction:0.5254999999999974\n", - "Error:0.07507600000000147 Prediction:0.5259999999999974\n", - "Error:0.0748022500000015 Prediction:0.5264999999999973\n", - "Error:0.07452900000000152 Prediction:0.5269999999999972\n", - "Error:0.07425625000000155 Prediction:0.5274999999999972\n", - "Error:0.07398400000000158 Prediction:0.5279999999999971\n", - "Error:0.0737122500000016 Prediction:0.5284999999999971\n", - "Error:0.07344100000000163 Prediction:0.528999999999997\n", - "Error:0.07317025000000166 Prediction:0.529499999999997\n", - "Error:0.07290000000000169 Prediction:0.5299999999999969\n", - "Error:0.07263025000000171 Prediction:0.5304999999999969\n", - "Error:0.07236100000000174 Prediction:0.5309999999999968\n", - "Error:0.07209225000000177 Prediction:0.5314999999999968\n", - "Error:0.07182400000000179 Prediction:0.5319999999999967\n", - "Error:0.07155625000000182 Prediction:0.5324999999999966\n", - "Error:0.07128900000000185 Prediction:0.5329999999999966\n", - "Error:0.07102225000000187 Prediction:0.5334999999999965\n", - "Error:0.0707560000000019 Prediction:0.5339999999999965\n", - "Error:0.07049025000000192 Prediction:0.5344999999999964\n", - "Error:0.07022500000000195 Prediction:0.5349999999999964\n", - "Error:0.06996025000000197 Prediction:0.5354999999999963\n", - "Error:0.069696000000002 Prediction:0.5359999999999963\n", - "Error:0.06943225000000203 Prediction:0.5364999999999962\n", - "Error:0.06916900000000205 Prediction:0.5369999999999961\n", - "Error:0.06890625000000207 Prediction:0.5374999999999961\n", - "Error:0.0686440000000021 Prediction:0.537999999999996\n", - "Error:0.06838225000000213 Prediction:0.538499999999996\n", - "Error:0.06812100000000215 Prediction:0.5389999999999959\n", - "Error:0.06786025000000218 Prediction:0.5394999999999959\n", - "Error:0.0676000000000022 Prediction:0.5399999999999958\n", - "Error:0.06734025000000222 Prediction:0.5404999999999958\n", - "Error:0.06708100000000225 Prediction:0.5409999999999957\n", - "Error:0.06682225000000228 Prediction:0.5414999999999957\n", - "Error:0.0665640000000023 Prediction:0.5419999999999956\n", - "Error:0.06630625000000231 Prediction:0.5424999999999955\n", - "Error:0.06604900000000234 Prediction:0.5429999999999955\n", - "Error:0.06579225000000237 Prediction:0.5434999999999954\n", - "Error:0.06553600000000238 Prediction:0.5439999999999954\n", - "Error:0.06528025000000241 Prediction:0.5444999999999953\n", - "Error:0.06502500000000244 Prediction:0.5449999999999953\n", - "Error:0.06477025000000246 Prediction:0.5454999999999952\n", - "Error:0.06451600000000249 Prediction:0.5459999999999952\n", - "Error:0.0642622500000025 Prediction:0.5464999999999951\n", - "Error:0.06400900000000254 Prediction:0.546999999999995\n", - "Error:0.06375625000000255 Prediction:0.547499999999995\n", - "Error:0.06350400000000257 Prediction:0.5479999999999949\n", - "Error:0.06325225000000259 Prediction:0.5484999999999949\n", - "Error:0.06300100000000262 Prediction:0.5489999999999948\n", - "Error:0.06275025000000264 Prediction:0.5494999999999948\n", - "Error:0.06250000000000266 Prediction:0.5499999999999947\n", - "Error:0.062250250000002685 Prediction:0.5504999999999947\n", - "Error:0.06200100000000271 Prediction:0.5509999999999946\n", - "Error:0.06175225000000273 Prediction:0.5514999999999946\n", - "Error:0.06150400000000275 Prediction:0.5519999999999945\n", - "Error:0.061256250000002774 Prediction:0.5524999999999944\n", - "Error:0.0610090000000028 Prediction:0.5529999999999944\n", - "Error:0.060762250000002814 Prediction:0.5534999999999943\n", - "Error:0.06051600000000284 Prediction:0.5539999999999943\n", - "Error:0.06027025000000286 Prediction:0.5544999999999942\n", - "Error:0.06002500000000288 Prediction:0.5549999999999942\n", - "Error:0.0597802500000029 Prediction:0.5554999999999941\n", - "Error:0.05953600000000292 Prediction:0.555999999999994\n", - "Error:0.05929225000000295 Prediction:0.556499999999994\n", - "Error:0.05904900000000297 Prediction:0.5569999999999939\n", - "Error:0.05880625000000299 Prediction:0.5574999999999939\n", - "Error:0.058564000000003 Prediction:0.5579999999999938\n", - "Error:0.058322250000003024 Prediction:0.5584999999999938\n", - "Error:0.05808100000000305 Prediction:0.5589999999999937\n", - "Error:0.05784025000000307 Prediction:0.5594999999999937\n", - "Error:0.057600000000003086 Prediction:0.5599999999999936\n", - "Error:0.0573602500000031 Prediction:0.5604999999999936\n", - "Error:0.05712100000000313 Prediction:0.5609999999999935\n", - "Error:0.056882250000003146 Prediction:0.5614999999999934\n", - "Error:0.056644000000003164 Prediction:0.5619999999999934\n", - "Error:0.05640625000000318 Prediction:0.5624999999999933\n", - "Error:0.0561690000000032 Prediction:0.5629999999999933\n", - "Error:0.05593225000000322 Prediction:0.5634999999999932\n", - "Error:0.05569600000000324 Prediction:0.5639999999999932\n", - "Error:0.055460250000003264 Prediction:0.5644999999999931\n", - "Error:0.05522500000000328 Prediction:0.5649999999999931\n", - "Error:0.0549902500000033 Prediction:0.565499999999993\n", - "Error:0.054756000000003316 Prediction:0.565999999999993\n", - "Error:0.05452225000000334 Prediction:0.5664999999999929\n", - "Error:0.054289000000003355 Prediction:0.5669999999999928\n", - "Error:0.05405625000000337 Prediction:0.5674999999999928\n", - "Error:0.05382400000000339 Prediction:0.5679999999999927\n", - "Error:0.05359225000000341 Prediction:0.5684999999999927\n", - "Error:0.053361000000003427 Prediction:0.5689999999999926\n", - "Error:0.053130250000003446 Prediction:0.5694999999999926\n", - "Error:0.052900000000003465 Prediction:0.5699999999999925\n", - "Error:0.052670250000003485 Prediction:0.5704999999999925\n", - "Error:0.0524410000000035 Prediction:0.5709999999999924\n", - "Error:0.05221225000000352 Prediction:0.5714999999999923\n", - "Error:0.051984000000003534 Prediction:0.5719999999999923\n", - "Error:0.05175625000000355 Prediction:0.5724999999999922\n", - "Error:0.05152900000000357 Prediction:0.5729999999999922\n", - "Error:0.05130225000000359 Prediction:0.5734999999999921\n", - "Error:0.051076000000003605 Prediction:0.5739999999999921\n", - "Error:0.05085025000000362 Prediction:0.574499999999992\n", - "Error:0.05062500000000364 Prediction:0.574999999999992\n", - "Error:0.05040025000000365 Prediction:0.5754999999999919\n", - "Error:0.05017600000000367 Prediction:0.5759999999999919\n", - "Error:0.04995225000000369 Prediction:0.5764999999999918\n", - "Error:0.0497290000000037 Prediction:0.5769999999999917\n", - "Error:0.04950625000000372 Prediction:0.5774999999999917\n", - "Error:0.049284000000003735 Prediction:0.5779999999999916\n", - "Error:0.04906225000000375 Prediction:0.5784999999999916\n", - "Error:0.04884100000000377 Prediction:0.5789999999999915\n", - "Error:0.048620250000003785 Prediction:0.5794999999999915\n", - "Error:0.0484000000000038 Prediction:0.5799999999999914\n", - "Error:0.04818025000000382 Prediction:0.5804999999999914\n", - "Error:0.04796100000000383 Prediction:0.5809999999999913\n", - "Error:0.047742250000003844 Prediction:0.5814999999999912\n", - "Error:0.04752400000000386 Prediction:0.5819999999999912\n", - "Error:0.04730625000000387 Prediction:0.5824999999999911\n", - "Error:0.04708900000000389 Prediction:0.5829999999999911\n", - "Error:0.046872250000003904 Prediction:0.583499999999991\n", - "Error:0.04665600000000392 Prediction:0.583999999999991\n", - "Error:0.04644025000000394 Prediction:0.5844999999999909\n", - "Error:0.04622500000000395 Prediction:0.5849999999999909\n", - "Error:0.046010250000003965 Prediction:0.5854999999999908\n", - "Error:0.04579600000000398 Prediction:0.5859999999999908\n", - "Error:0.045582250000003995 Prediction:0.5864999999999907\n", - "Error:0.045369000000004 Prediction:0.5869999999999906\n", - "Error:0.04515625000000402 Prediction:0.5874999999999906\n", - "Error:0.044944000000004036 Prediction:0.5879999999999905\n", - "Error:0.04473225000000405 Prediction:0.5884999999999905\n", - "Error:0.044521000000004064 Prediction:0.5889999999999904\n", - "Error:0.044310250000004076 Prediction:0.5894999999999904\n", - "Error:0.04410000000000409 Prediction:0.5899999999999903\n", - "Error:0.0438902500000041 Prediction:0.5904999999999903\n", - "Error:0.04368100000000411 Prediction:0.5909999999999902\n", - "Error:0.043472250000004126 Prediction:0.5914999999999901\n", - "Error:0.04326400000000414 Prediction:0.5919999999999901\n", - "Error:0.043056250000004154 Prediction:0.59249999999999\n", - "Error:0.04284900000000417 Prediction:0.59299999999999\n", - "Error:0.04264225000000418 Prediction:0.5934999999999899\n", - "Error:0.04243600000000419 Prediction:0.5939999999999899\n", - "Error:0.0422302500000042 Prediction:0.5944999999999898\n", - "Error:0.04202500000000422 Prediction:0.5949999999999898\n", - "Error:0.04182025000000423 Prediction:0.5954999999999897\n", - "Error:0.04161600000000424 Prediction:0.5959999999999896\n", - "Error:0.04141225000000425 Prediction:0.5964999999999896\n", - "Error:0.04120900000000426 Prediction:0.5969999999999895\n", - "Error:0.041006250000004275 Prediction:0.5974999999999895\n", - "Error:0.04080400000000429 Prediction:0.5979999999999894\n", - "Error:0.0406022500000043 Prediction:0.5984999999999894\n", - "Error:0.04040100000000431 Prediction:0.5989999999999893\n", - "Error:0.04020025000000432 Prediction:0.5994999999999893\n", - "Error:0.04000000000000434 Prediction:0.5999999999999892\n", - "Error:0.039800250000004346 Prediction:0.6004999999999892\n", - "Error:0.039601000000004355 Prediction:0.6009999999999891\n", - "Error:0.039402250000004364 Prediction:0.601499999999989\n", - "Error:0.03920400000000438 Prediction:0.601999999999989\n", - "Error:0.03900625000000439 Prediction:0.6024999999999889\n", - "Error:0.0388090000000044 Prediction:0.6029999999999889\n", - "Error:0.03861225000000441 Prediction:0.6034999999999888\n", - "Error:0.03841600000000442 Prediction:0.6039999999999888\n", - "Error:0.03822025000000443 Prediction:0.6044999999999887\n", - "Error:0.038025000000004444 Prediction:0.6049999999999887\n", - "Error:0.03783025000000445 Prediction:0.6054999999999886\n", - "Error:0.03763600000000446 Prediction:0.6059999999999885\n", - "Error:0.03744225000000447 Prediction:0.6064999999999885\n", - "Error:0.03724900000000448 Prediction:0.6069999999999884\n", - "Error:0.03705625000000449 Prediction:0.6074999999999884\n", - "Error:0.0368640000000045 Prediction:0.6079999999999883\n", - "Error:0.03667225000000451 Prediction:0.6084999999999883\n", - "Error:0.03648100000000452 Prediction:0.6089999999999882\n", - "Error:0.03629025000000453 Prediction:0.6094999999999882\n", - "Error:0.03610000000000454 Prediction:0.6099999999999881\n", - "Error:0.03591025000000454 Prediction:0.610499999999988\n", - "Error:0.035721000000004555 Prediction:0.610999999999988\n", - "Error:0.03553225000000456 Prediction:0.6114999999999879\n", - "Error:0.03534400000000457 Prediction:0.6119999999999879\n", - "Error:0.03515625000000458 Prediction:0.6124999999999878\n", - "Error:0.03496900000000459 Prediction:0.6129999999999878\n", - "Error:0.034782250000004594 Prediction:0.6134999999999877\n", - "Error:0.0345960000000046 Prediction:0.6139999999999877\n", - "Error:0.03441025000000461 Prediction:0.6144999999999876\n", - "Error:0.03422500000000462 Prediction:0.6149999999999876\n", - "Error:0.03404025000000463 Prediction:0.6154999999999875\n", - "Error:0.03385600000000464 Prediction:0.6159999999999874\n", - "Error:0.03367225000000464 Prediction:0.6164999999999874\n", - "Error:0.033489000000004654 Prediction:0.6169999999999873\n", - "Error:0.03330625000000466 Prediction:0.6174999999999873\n", - "Error:0.033124000000004664 Prediction:0.6179999999999872\n", - "Error:0.032942250000004676 Prediction:0.6184999999999872\n", - "Error:0.03276100000000468 Prediction:0.6189999999999871\n", - "Error:0.03258025000000469 Prediction:0.6194999999999871\n", - "Error:0.032400000000004696 Prediction:0.619999999999987\n", - "Error:0.0322202500000047 Prediction:0.620499999999987\n", - "Error:0.032041000000004705 Prediction:0.6209999999999869\n", - "Error:0.03186225000000471 Prediction:0.6214999999999868\n", - "Error:0.03168400000000472 Prediction:0.6219999999999868\n", - "Error:0.031506250000004725 Prediction:0.6224999999999867\n", - "Error:0.031329000000004735 Prediction:0.6229999999999867\n", - "Error:0.03115225000000474 Prediction:0.6234999999999866\n", - "Error:0.030976000000004746 Prediction:0.6239999999999866\n", - "Error:0.03080025000000475 Prediction:0.6244999999999865\n", - "Error:0.030625000000004756 Prediction:0.6249999999999865\n", - "Error:0.03045025000000476 Prediction:0.6254999999999864\n", - "Error:0.030276000000004768 Prediction:0.6259999999999863\n", - "Error:0.03010225000000477 Prediction:0.6264999999999863\n", - "Error:0.029929000000004778 Prediction:0.6269999999999862\n", - "Error:0.029756250000004782 Prediction:0.6274999999999862\n", - "Error:0.029584000000004787 Prediction:0.6279999999999861\n", - "Error:0.029412250000004792 Prediction:0.6284999999999861\n", - "Error:0.029241000000004798 Prediction:0.628999999999986\n", - "Error:0.029070250000004804 Prediction:0.629499999999986\n", - "Error:0.028900000000004807 Prediction:0.6299999999999859\n", - "Error:0.02873025000000481 Prediction:0.6304999999999858\n", - "Error:0.028561000000004815 Prediction:0.6309999999999858\n", - "Error:0.02839225000000482 Prediction:0.6314999999999857\n", - "Error:0.028224000000004825 Prediction:0.6319999999999857\n", - "Error:0.02805625000000483 Prediction:0.6324999999999856\n", - "Error:0.027889000000004834 Prediction:0.6329999999999856\n", - "Error:0.027722250000004837 Prediction:0.6334999999999855\n", - "Error:0.02755600000000484 Prediction:0.6339999999999855\n", - "Error:0.027390250000004845 Prediction:0.6344999999999854\n", - "Error:0.02722500000000485 Prediction:0.6349999999999854\n", - "Error:0.02706025000000485 Prediction:0.6354999999999853\n", - "Error:0.026896000000004854 Prediction:0.6359999999999852\n", - "Error:0.026732250000004856 Prediction:0.6364999999999852\n", - "Error:0.02656900000000486 Prediction:0.6369999999999851\n", - "Error:0.026406250000004863 Prediction:0.6374999999999851\n", - "Error:0.026244000000004868 Prediction:0.637999999999985\n", - "Error:0.02608225000000487 Prediction:0.638499999999985\n", - "Error:0.02592100000000487 Prediction:0.6389999999999849\n", - "Error:0.025760250000004873 Prediction:0.6394999999999849\n", - "Error:0.025600000000004876 Prediction:0.6399999999999848\n", - "Error:0.02544025000000488 Prediction:0.6404999999999847\n", - "Error:0.025281000000004883 Prediction:0.6409999999999847\n", - "Error:0.025122250000004884 Prediction:0.6414999999999846\n", - "Error:0.024964000000004885 Prediction:0.6419999999999846\n", - "Error:0.024806250000004887 Prediction:0.6424999999999845\n", - "Error:0.02464900000000489 Prediction:0.6429999999999845\n", - "Error:0.024492250000004892 Prediction:0.6434999999999844\n", - "Error:0.024336000000004892 Prediction:0.6439999999999844\n", - "Error:0.024180250000004896 Prediction:0.6444999999999843\n", - "Error:0.024025000000004897 Prediction:0.6449999999999843\n", - "Error:0.023870250000004898 Prediction:0.6454999999999842\n", - "Error:0.023716000000004896 Prediction:0.6459999999999841\n", - "Error:0.0235622500000049 Prediction:0.6464999999999841\n", - "Error:0.023409000000004898 Prediction:0.646999999999984\n", - "Error:0.0232562500000049 Prediction:0.647499999999984\n", - "Error:0.023104000000004902 Prediction:0.6479999999999839\n", - "Error:0.022952250000004903 Prediction:0.6484999999999839\n", - "Error:0.0228010000000049 Prediction:0.6489999999999838\n", - "Error:0.022650250000004903 Prediction:0.6494999999999838\n", - "Error:0.0225000000000049 Prediction:0.6499999999999837\n", - "Error:0.022350250000004904 Prediction:0.6504999999999836\n", - "Error:0.022201000000004904 Prediction:0.6509999999999836\n", - "Error:0.0220522500000049 Prediction:0.6514999999999835\n", - "Error:0.021904000000004902 Prediction:0.6519999999999835\n", - "Error:0.021756250000004904 Prediction:0.6524999999999834\n", - "Error:0.021609000000004902 Prediction:0.6529999999999834\n", - "Error:0.0214622500000049 Prediction:0.6534999999999833\n", - "Error:0.0213160000000049 Prediction:0.6539999999999833\n", - "Error:0.0211702500000049 Prediction:0.6544999999999832\n", - "Error:0.021025000000004897 Prediction:0.6549999999999832\n", - "Error:0.0208802500000049 Prediction:0.6554999999999831\n", - "Error:0.020736000000004896 Prediction:0.655999999999983\n", - "Error:0.020592250000004895 Prediction:0.656499999999983\n", - "Error:0.020449000000004894 Prediction:0.6569999999999829\n", - "Error:0.020306250000004893 Prediction:0.6574999999999829\n", - "Error:0.02016400000000489 Prediction:0.6579999999999828\n", - "Error:0.02002225000000489 Prediction:0.6584999999999828\n", - "Error:0.019881000000004888 Prediction:0.6589999999999827\n", - "Error:0.019740250000004886 Prediction:0.6594999999999827\n", - "Error:0.019600000000004884 Prediction:0.6599999999999826\n", - "Error:0.019460250000004883 Prediction:0.6604999999999825\n", - "Error:0.01932100000000488 Prediction:0.6609999999999825\n", - "Error:0.01918225000000488 Prediction:0.6614999999999824\n", - "Error:0.019044000000004876 Prediction:0.6619999999999824\n", - "Error:0.018906250000004874 Prediction:0.6624999999999823\n", - "Error:0.01876900000000487 Prediction:0.6629999999999823\n", - "Error:0.018632250000004867 Prediction:0.6634999999999822\n", - "Error:0.018496000000004866 Prediction:0.6639999999999822\n", - "Error:0.018360250000004862 Prediction:0.6644999999999821\n", - "Error:0.01822500000000486 Prediction:0.664999999999982\n", - "Error:0.018090250000004856 Prediction:0.665499999999982\n", - "Error:0.017956000000004853 Prediction:0.6659999999999819\n", - "Error:0.017822250000004848 Prediction:0.6664999999999819\n", - "Error:0.017689000000004847 Prediction:0.6669999999999818\n", - "Error:0.017556250000004842 Prediction:0.6674999999999818\n", - "Error:0.01742400000000484 Prediction:0.6679999999999817\n", - "Error:0.017292250000004835 Prediction:0.6684999999999817\n", - "Error:0.01716100000000483 Prediction:0.6689999999999816\n", - "Error:0.017030250000004826 Prediction:0.6694999999999816\n", - "Error:0.01690000000000482 Prediction:0.6699999999999815\n", - "Error:0.016770250000004816 Prediction:0.6704999999999814\n", - "Error:0.01664100000000481 Prediction:0.6709999999999814\n", - "Error:0.016512250000004808 Prediction:0.6714999999999813\n", - "Error:0.016384000000004804 Prediction:0.6719999999999813\n", - "Error:0.016256250000004798 Prediction:0.6724999999999812\n", - "Error:0.016129000000004796 Prediction:0.6729999999999812\n", - "Error:0.01600225000000479 Prediction:0.6734999999999811\n", - "Error:0.015876000000004786 Prediction:0.6739999999999811\n", - "Error:0.015750250000004778 Prediction:0.674499999999981\n", - "Error:0.015625000000004774 Prediction:0.674999999999981\n", - "Error:0.015500250000004769 Prediction:0.6754999999999809\n", - "Error:0.015376000000004763 Prediction:0.6759999999999808\n", - "Error:0.015252250000004757 Prediction:0.6764999999999808\n", - "Error:0.015129000000004751 Prediction:0.6769999999999807\n", - "Error:0.015006250000004747 Prediction:0.6774999999999807\n", - "Error:0.01488400000000474 Prediction:0.6779999999999806\n", - "Error:0.014762250000004733 Prediction:0.6784999999999806\n", - "Error:0.014641000000004728 Prediction:0.6789999999999805\n", - "Error:0.014520250000004722 Prediction:0.6794999999999805\n", - "Error:0.014400000000004715 Prediction:0.6799999999999804\n", - "Error:0.01428025000000471 Prediction:0.6804999999999803\n", - "Error:0.014161000000004703 Prediction:0.6809999999999803\n", - "Error:0.014042250000004695 Prediction:0.6814999999999802\n", - "Error:0.013924000000004688 Prediction:0.6819999999999802\n", - "Error:0.013806250000004681 Prediction:0.6824999999999801\n", - "Error:0.013689000000004675 Prediction:0.6829999999999801\n", - "Error:0.013572250000004667 Prediction:0.68349999999998\n", - "Error:0.01345600000000466 Prediction:0.68399999999998\n", - "Error:0.013340250000004652 Prediction:0.6844999999999799\n", - "Error:0.013225000000004644 Prediction:0.6849999999999798\n", - "Error:0.013110250000004637 Prediction:0.6854999999999798\n", - "Error:0.01299600000000463 Prediction:0.6859999999999797\n", - "Error:0.012882250000004623 Prediction:0.6864999999999797\n", - "Error:0.012769000000004615 Prediction:0.6869999999999796\n", - "Error:0.012656250000004607 Prediction:0.6874999999999796\n", - "Error:0.012544000000004598 Prediction:0.6879999999999795\n", - "Error:0.01243225000000459 Prediction:0.6884999999999795\n", - "Error:0.012321000000004582 Prediction:0.6889999999999794\n", - "Error:0.012210250000004573 Prediction:0.6894999999999794\n", - "Error:0.012100000000004564 Prediction:0.6899999999999793\n", - "Error:0.011990250000004556 Prediction:0.6904999999999792\n", - "Error:0.011881000000004548 Prediction:0.6909999999999792\n", - "Error:0.011772250000004538 Prediction:0.6914999999999791\n", - "Error:0.011664000000004528 Prediction:0.6919999999999791\n", - "Error:0.01155625000000452 Prediction:0.692499999999979\n", - "Error:0.011449000000004511 Prediction:0.692999999999979\n", - "Error:0.011342250000004502 Prediction:0.6934999999999789\n", - "Error:0.011236000000004492 Prediction:0.6939999999999789\n", - "Error:0.011130250000004482 Prediction:0.6944999999999788\n", - "Error:0.011025000000004472 Prediction:0.6949999999999787\n", - "Error:0.010920250000004463 Prediction:0.6954999999999787\n", - "Error:0.010816000000004452 Prediction:0.6959999999999786\n", - "Error:0.010712250000004442 Prediction:0.6964999999999786\n", - "Error:0.010609000000004433 Prediction:0.6969999999999785\n", - "Error:0.010506250000004422 Prediction:0.6974999999999785\n", - "Error:0.010404000000004411 Prediction:0.6979999999999784\n", - "Error:0.010302250000004402 Prediction:0.6984999999999784\n", - "Error:0.01020100000000439 Prediction:0.6989999999999783\n", - "Error:0.01010025000000438 Prediction:0.6994999999999783\n", - "Error:0.01000000000000437 Prediction:0.6999999999999782\n", - "Error:0.009900250000004359 Prediction:0.7004999999999781\n", - "Error:0.009801000000004348 Prediction:0.7009999999999781\n", - "Error:0.009702250000004338 Prediction:0.701499999999978\n", - "Error:0.009604000000004326 Prediction:0.701999999999978\n", - "Error:0.009506250000004315 Prediction:0.7024999999999779\n", - "Error:0.009409000000004303 Prediction:0.7029999999999779\n", - "Error:0.009312250000004291 Prediction:0.7034999999999778\n", - "Error:0.00921600000000428 Prediction:0.7039999999999778\n", - "Error:0.009120250000004267 Prediction:0.7044999999999777\n", - "Error:0.009025000000004255 Prediction:0.7049999999999776\n", - "Error:0.008930250000004244 Prediction:0.7054999999999776\n", - "Error:0.008836000000004231 Prediction:0.7059999999999775\n", - "Error:0.008742250000004219 Prediction:0.7064999999999775\n", - "Error:0.008649000000004207 Prediction:0.7069999999999774\n", - "Error:0.008556250000004194 Prediction:0.7074999999999774\n", - "Error:0.008464000000004182 Prediction:0.7079999999999773\n", - "Error:0.00837225000000417 Prediction:0.7084999999999773\n", - "Error:0.008281000000004157 Prediction:0.7089999999999772\n", - "Error:0.008190250000004144 Prediction:0.7094999999999771\n", - "Error:0.008100000000004132 Prediction:0.7099999999999771\n", - "Error:0.008010250000004118 Prediction:0.710499999999977\n", - "Error:0.007921000000004105 Prediction:0.710999999999977\n", - "Error:0.007832250000004091 Prediction:0.7114999999999769\n", - "Error:0.007744000000004078 Prediction:0.7119999999999769\n", - "Error:0.007656250000004064 Prediction:0.7124999999999768\n", - "Error:0.007569000000004051 Prediction:0.7129999999999768\n", - "Error:0.007482250000004037 Prediction:0.7134999999999767\n", - "Error:0.0073960000000040235 Prediction:0.7139999999999767\n", - "Error:0.007310250000004009 Prediction:0.7144999999999766\n", - "Error:0.007225000000003996 Prediction:0.7149999999999765\n", - "Error:0.007140250000003981 Prediction:0.7154999999999765\n", - "Error:0.007056000000003967 Prediction:0.7159999999999764\n", - "Error:0.006972250000003953 Prediction:0.7164999999999764\n", - "Error:0.006889000000003938 Prediction:0.7169999999999763\n", - "Error:0.006806250000003923 Prediction:0.7174999999999763\n", - "Error:0.006724000000003908 Prediction:0.7179999999999762\n", - "Error:0.006642250000003893 Prediction:0.7184999999999762\n", - "Error:0.006561000000003879 Prediction:0.7189999999999761\n", - "Error:0.006480250000003863 Prediction:0.719499999999976\n", - "Error:0.006400000000003848 Prediction:0.719999999999976\n", - "Error:0.006320250000003833 Prediction:0.7204999999999759\n", - "Error:0.006241000000003817 Prediction:0.7209999999999759\n", - "Error:0.006162250000003802 Prediction:0.7214999999999758\n", - "Error:0.006084000000003786 Prediction:0.7219999999999758\n", - "Error:0.006006250000003771 Prediction:0.7224999999999757\n", - "Error:0.005929000000003755 Prediction:0.7229999999999757\n", - "Error:0.005852250000003739 Prediction:0.7234999999999756\n", - "Error:0.005776000000003723 Prediction:0.7239999999999756\n", - "Error:0.005700250000003707 Prediction:0.7244999999999755\n", - "Error:0.00562500000000369 Prediction:0.7249999999999754\n", - "Error:0.005550250000003674 Prediction:0.7254999999999754\n", - "Error:0.005476000000003658 Prediction:0.7259999999999753\n", - "Error:0.005402250000003641 Prediction:0.7264999999999753\n", - "Error:0.005329000000003624 Prediction:0.7269999999999752\n", - "Error:0.005256250000003607 Prediction:0.7274999999999752\n", - "Error:0.00518400000000359 Prediction:0.7279999999999751\n", - "Error:0.005112250000003573 Prediction:0.7284999999999751\n", - "Error:0.0050410000000035565 Prediction:0.728999999999975\n", - "Error:0.004970250000003539 Prediction:0.729499999999975\n", - "Error:0.004900000000003521 Prediction:0.7299999999999749\n", - "Error:0.004830250000003504 Prediction:0.7304999999999748\n", - "Error:0.004761000000003486 Prediction:0.7309999999999748\n", - "Error:0.004692250000003469 Prediction:0.7314999999999747\n", - "Error:0.004624000000003451 Prediction:0.7319999999999747\n", - "Error:0.0045562500000034326 Prediction:0.7324999999999746\n", - "Error:0.004489000000003415 Prediction:0.7329999999999746\n", - "Error:0.0044222500000033966 Prediction:0.7334999999999745\n", - "Error:0.004356000000003378 Prediction:0.7339999999999745\n", - "Error:0.00429025000000336 Prediction:0.7344999999999744\n", - "Error:0.0042250000000033415 Prediction:0.7349999999999743\n", - "Error:0.004160250000003323 Prediction:0.7354999999999743\n", - "Error:0.0040960000000033045 Prediction:0.7359999999999742\n", - "Error:0.004032250000003285 Prediction:0.7364999999999742\n", - "Error:0.003969000000003267 Prediction:0.7369999999999741\n", - "Error:0.003906250000003247 Prediction:0.7374999999999741\n", - "Error:0.003844000000003228 Prediction:0.737999999999974\n", - "Error:0.003782250000003209 Prediction:0.738499999999974\n", - "Error:0.0037210000000031896 Prediction:0.7389999999999739\n", - "Error:0.00366025000000317 Prediction:0.7394999999999738\n", - "Error:0.0036000000000031506 Prediction:0.7399999999999738\n", - "Error:0.0035402500000031307 Prediction:0.7404999999999737\n", - "Error:0.003481000000003111 Prediction:0.7409999999999737\n", - "Error:0.0034222500000030912 Prediction:0.7414999999999736\n", - "Error:0.003364000000003071 Prediction:0.7419999999999736\n", - "Error:0.003306250000003051 Prediction:0.7424999999999735\n", - "Error:0.0032490000000030307 Prediction:0.7429999999999735\n", - "Error:0.0031922500000030104 Prediction:0.7434999999999734\n", - "Error:0.0031360000000029897 Prediction:0.7439999999999733\n", - "Error:0.003080250000002969 Prediction:0.7444999999999733\n", - "Error:0.0030250000000029485 Prediction:0.7449999999999732\n", - "Error:0.0029702500000029276 Prediction:0.7454999999999732\n", - "Error:0.0029160000000029067 Prediction:0.7459999999999731\n", - "Error:0.002862250000002886 Prediction:0.7464999999999731\n", - "Error:0.0028090000000028648 Prediction:0.746999999999973\n", - "Error:0.0027562500000028437 Prediction:0.747499999999973\n", - "Error:0.002704000000002822 Prediction:0.7479999999999729\n", - "Error:0.002652250000002801 Prediction:0.7484999999999729\n", - "Error:0.002601000000002779 Prediction:0.7489999999999728\n", - "Error:0.0025502500000027573 Prediction:0.7494999999999727\n", - "Error:0.0025000000000027357 Prediction:0.7499999999999727\n", - "Error:0.0024502500000027137 Prediction:0.7504999999999726\n", - "Error:0.0024010000000026918 Prediction:0.7509999999999726\n", - "Error:0.0023522500000026695 Prediction:0.7514999999999725\n", - "Error:0.0023040000000026472 Prediction:0.7519999999999725\n", - "Error:0.002256250000002625 Prediction:0.7524999999999724\n", - "Error:0.0022090000000026025 Prediction:0.7529999999999724\n", - "Error:0.00216225000000258 Prediction:0.7534999999999723\n", - "Error:0.0021160000000025572 Prediction:0.7539999999999722\n", - "Error:0.0020702500000025345 Prediction:0.7544999999999722\n", - "Error:0.0020250000000025118 Prediction:0.7549999999999721\n", - "Error:0.0019802500000024887 Prediction:0.7554999999999721\n", - "Error:0.0019360000000024655 Prediction:0.755999999999972\n", - "Error:0.0018922500000024423 Prediction:0.756499999999972\n", - "Error:0.0018490000000024188 Prediction:0.7569999999999719\n", - "Error:0.0018062500000023956 Prediction:0.7574999999999719\n", - "Error:0.001764000000002372 Prediction:0.7579999999999718\n", - "Error:0.0017222500000023482 Prediction:0.7584999999999718\n", - "Error:0.0016810000000023245 Prediction:0.7589999999999717\n", - "Error:0.0016402500000023007 Prediction:0.7594999999999716\n", - "Error:0.0016000000000022767 Prediction:0.7599999999999716\n", - "Error:0.0015602500000022525 Prediction:0.7604999999999715\n", - "Error:0.0015210000000022283 Prediction:0.7609999999999715\n", - "Error:0.001482250000002204 Prediction:0.7614999999999714\n", - "Error:0.0014440000000021794 Prediction:0.7619999999999714\n", - "Error:0.001406250000002155 Prediction:0.7624999999999713\n", - "Error:0.0013690000000021302 Prediction:0.7629999999999713\n", - "Error:0.0013322500000021056 Prediction:0.7634999999999712\n", - "Error:0.0012960000000020806 Prediction:0.7639999999999711\n", - "Error:0.0012602500000020557 Prediction:0.7644999999999711\n", - "Error:0.0012250000000020305 Prediction:0.764999999999971\n", - "Error:0.0011902500000020052 Prediction:0.765499999999971\n", - "Error:0.00115600000000198 Prediction:0.7659999999999709\n", - "Error:0.0011222500000019546 Prediction:0.7664999999999709\n", - "Error:0.0010890000000019291 Prediction:0.7669999999999708\n", - "Error:0.0010562500000019033 Prediction:0.7674999999999708\n", - "Error:0.0010240000000018776 Prediction:0.7679999999999707\n", - "Error:0.0009922500000018517 Prediction:0.7684999999999707\n", - "Error:0.0009610000000018258 Prediction:0.7689999999999706\n", - "Error:0.0009302500000017998 Prediction:0.7694999999999705\n", - "Error:0.0009000000000017735 Prediction:0.7699999999999705\n", - "Error:0.0008702500000017472 Prediction:0.7704999999999704\n", - "Error:0.0008410000000017208 Prediction:0.7709999999999704\n", - "Error:0.0008122500000016942 Prediction:0.7714999999999703\n", - "Error:0.0007840000000016676 Prediction:0.7719999999999703\n", - "Error:0.0007562500000016409 Prediction:0.7724999999999702\n", - "Error:0.000729000000001614 Prediction:0.7729999999999702\n", - "Error:0.000702250000001587 Prediction:0.7734999999999701\n", - "Error:0.0006760000000015599 Prediction:0.77399999999997\n", - "Error:0.0006502500000015327 Prediction:0.77449999999997\n", - "Error:0.0006250000000015054 Prediction:0.7749999999999699\n", - "Error:0.0006002500000014781 Prediction:0.7754999999999699\n", - "Error:0.0005760000000014506 Prediction:0.7759999999999698\n", - "Error:0.0005522500000014229 Prediction:0.7764999999999698\n", - "Error:0.0005290000000013951 Prediction:0.7769999999999697\n", - "Error:0.0005062500000013673 Prediction:0.7774999999999697\n", - "Error:0.00048400000000133937 Prediction:0.7779999999999696\n", - "Error:0.0004622500000013113 Prediction:0.7784999999999695\n", - "Error:0.0004410000000012831 Prediction:0.7789999999999695\n", - "Error:0.0004202500000012548 Prediction:0.7794999999999694\n", - "Error:0.0004000000000012264 Prediction:0.7799999999999694\n", - "Error:0.0003802500000011979 Prediction:0.7804999999999693\n", - "Error:0.00036100000000116925 Prediction:0.7809999999999693\n", - "Error:0.0003422500000011405 Prediction:0.7814999999999692\n", - "Error:0.0003240000000011117 Prediction:0.7819999999999692\n", - "Error:0.00030625000000108273 Prediction:0.7824999999999691\n", - "Error:0.00028900000000105366 Prediction:0.782999999999969\n", - "Error:0.0002722500000010245 Prediction:0.783499999999969\n", - "Error:0.00025600000000099523 Prediction:0.7839999999999689\n", - "Error:0.00024025000000096582 Prediction:0.7844999999999689\n", - "Error:0.0002250000000009363 Prediction:0.7849999999999688\n", - "Error:0.0002102500000009067 Prediction:0.7854999999999688\n", - "Error:0.00019600000000087698 Prediction:0.7859999999999687\n", - "Error:0.00018225000000084715 Prediction:0.7864999999999687\n", - "Error:0.0001690000000008172 Prediction:0.7869999999999686\n", - "Error:0.00015625000000078716 Prediction:0.7874999999999686\n", - "Error:0.000144000000000757 Prediction:0.7879999999999685\n", - "Error:0.0001322500000007267 Prediction:0.7884999999999684\n", - "Error:0.00012100000000069633 Prediction:0.7889999999999684\n", - "Error:0.00011025000000066583 Prediction:0.7894999999999683\n", - "Error:0.00010000000000063523 Prediction:0.7899999999999683\n", - "Error:9.025000000060451e-05 Prediction:0.7904999999999682\n", - "Error:8.100000000057368e-05 Prediction:0.7909999999999682\n", - "Error:7.225000000054275e-05 Prediction:0.7914999999999681\n", - "Error:6.40000000005117e-05 Prediction:0.7919999999999681\n", - "Error:5.625000000048055e-05 Prediction:0.792499999999968\n", - "Error:4.9000000000449285e-05 Prediction:0.792999999999968\n", - "Error:4.225000000041791e-05 Prediction:0.7934999999999679\n", - "Error:3.6000000000386424e-05 Prediction:0.7939999999999678\n", - "Error:3.0250000000354826e-05 Prediction:0.7944999999999678\n", - "Error:2.500000000032312e-05 Prediction:0.7949999999999677\n", - "Error:2.0250000000291302e-05 Prediction:0.7954999999999677\n", - "Error:1.6000000000259378e-05 Prediction:0.7959999999999676\n", - "Error:1.225000000022734e-05 Prediction:0.7964999999999676\n", - "Error:9.000000000195194e-06 Prediction:0.7969999999999675\n", - "Error:6.250000000162936e-06 Prediction:0.7974999999999675\n", - "Error:4.000000000130569e-06 Prediction:0.7979999999999674\n", - "Error:2.2500000000980924e-06 Prediction:0.7984999999999673\n", - "Error:1.000000000065505e-06 Prediction:0.7989999999999673\n", - "Error:2.5000000003280753e-07 Prediction:0.7994999999999672\n", - "Error:1.0799505792475652e-27 Prediction:0.7999999999999672\n" - ] - } - ], - "source": [ - "weight = 0.5\n", - "input = 0.5\n", - "goal_prediction = 0.8\n", - "\n", - "step_amount = 0.001\n", - "\n", - "for iteration in range(1101):\n", - "\n", - " prediction = input * weight\n", - " error = (prediction - goal_prediction) ** 2\n", - "\n", - " print(\"Error:\" + str(error) + \" Prediction:\" + str(prediction))\n", - " \n", - " up_prediction = input * (weight + step_amount)\n", - " up_error = (goal_prediction - up_prediction) ** 2\n", - "\n", - " down_prediction = input * (weight - step_amount)\n", - " down_error = (goal_prediction - down_prediction) ** 2\n", - "\n", - " if(down_error < up_error):\n", - " weight = weight - step_amount\n", - " \n", - " if(down_error > up_error):\n", - " weight = weight + step_amount" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Calculating Both Direction and Amount from Error" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error:0.30250000000000005 Prediction:0.25\n", - "Error:0.17015625000000004 Prediction:0.3875\n", - "Error:0.095712890625 Prediction:0.49062500000000003\n", - "Error:0.05383850097656251 Prediction:0.56796875\n", - "Error:0.03028415679931642 Prediction:0.6259765625\n", - "Error:0.0170348381996155 Prediction:0.669482421875\n", - "Error:0.00958209648728372 Prediction:0.70211181640625\n", - "Error:0.005389929274097089 Prediction:0.7265838623046875\n", - "Error:0.0030318352166796153 Prediction:0.7449378967285156\n", - "Error:0.0017054073093822882 Prediction:0.7587034225463867\n", - "Error:0.0009592916115275371 Prediction:0.76902756690979\n", - "Error:0.0005396015314842384 Prediction:0.7767706751823426\n", - "Error:0.000303525861459885 Prediction:0.7825780063867569\n", - "Error:0.00017073329707118678 Prediction:0.7869335047900676\n", - "Error:9.603747960254256e-05 Prediction:0.7902001285925507\n", - "Error:5.402108227642978e-05 Prediction:0.7926500964444131\n", - "Error:3.038685878049206e-05 Prediction:0.7944875723333098\n", - "Error:1.7092608064027242e-05 Prediction:0.7958656792499823\n", - "Error:9.614592036015323e-06 Prediction:0.7968992594374867\n", - "Error:5.408208020258491e-06 Prediction:0.7976744445781151\n" - ] - } - ], - "source": [ - "weight = 0.5\n", - "goal_pred = 0.8\n", - "input = 0.5\n", - "\n", - "for iteration in range(20):\n", - " pred = input * weight\n", - " error = (pred - goal_pred) ** 2\n", - " direction_and_amount = (pred - goal_pred) * input\n", - " weight = weight - direction_and_amount\n", - "\n", - " print(\"Error:\" + str(error) + \" Prediction:\" + str(pred))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# One Iteration of Gradient Descent" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [], - "source": [ - "# 1) An Empty Network\n", - "\n", - "weight = 0.1 \n", - "alpha = 0.01\n", - "\n", - "def neural_network(input, weight):\n", - " prediction = input * weight\n", - " return prediction\n", - "\n", - "# 2) PREDICT: Making A Prediction And Evaluating Error\n", - "\n", - "number_of_toes = [8.5]\n", - "win_or_lose_binary = [1] # (won!!!)\n", - "\n", - "input = number_of_toes[0]\n", - "goal_pred = win_or_lose_binary[0]\n", - "\n", - "pred = neural_network(input,weight)\n", - "error = (pred - goal_pred) ** 2\n", - "\n", - "# 3) COMPARE: Calculating \"Node Delta\" and Putting it on the Output Node\n", - "\n", - "delta = pred - goal_pred\n", - "\n", - "# 4) LEARN: Calculating \"Weight Delta\" and Putting it on the Weight\n", - "\n", - "weight_delta = input * delta\n", - "\n", - "# 5) LEARN: Updating the Weight\n", - "\n", - "alpha = 0.01 # fixed before training\n", - "weight -= weight_delta * alpha\n", - "\n", - "weight, goal_pred, input = (0.0, 0.8, 0.5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Learning is just Reducing Error" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error:0.6400000000000001 Prediction:0.0\n", - "Error:0.3600000000000001 Prediction:0.2\n", - "Error:0.2025 Prediction:0.35000000000000003\n", - "Error:0.11390625000000001 Prediction:0.4625\n" - ] - } - ], - "source": [ - "for iteration in range(4):\n", - " \n", - " pred = input * weight\n", - " error = (pred - goal_pred) ** 2\n", - " delta = pred - goal_pred\n", - " weight_delta = delta * input\n", - " weight = weight - weight_delta\n", - " print(\"Error:\" + str(error) + \" Prediction:\" + str(pred))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Let's Watch Several Steps of Learning" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----\n", - "Weight:0.0\n", - "Error:0.6400000000000001 Prediction:0.0\n", - "Delta:-0.8 Weight Delta:-0.8800000000000001\n", - "-----\n", - "Weight:0.8800000000000001\n", - "Error:0.02822400000000005 Prediction:0.9680000000000002\n", - "Delta:0.16800000000000015 Weight Delta:0.1848000000000002\n", - "-----\n", - "Weight:0.6951999999999999\n", - "Error:0.0012446784000000064 Prediction:0.76472\n", - "Delta:-0.03528000000000009 Weight Delta:-0.0388080000000001\n", - "-----\n", - "Weight:0.734008\n", - "Error:5.4890317439999896e-05 Prediction:0.8074088\n", - "Delta:0.007408799999999993 Weight Delta:0.008149679999999992\n" - ] - } - ], - "source": [ - "weight, goal_pred, input = (0.0, 0.8, 1.1)\n", - "\n", - "for iteration in range(4):\n", - " print(\"-----\\nWeight:\" + str(weight))\n", - " pred = input * weight\n", - " error = (pred - goal_pred) ** 2\n", - " delta = pred - goal_pred\n", - " weight_delta = delta * input\n", - " weight = weight - weight_delta\n", - " print(\"Error:\" + str(error) + \" Prediction:\" + str(pred))\n", - " print(\"Delta:\" + str(delta) + \" Weight Delta:\" + str(weight_delta))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Why does this work? What really is weight_delta?" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error:0.30250000000000005 Prediction:0.25\n", - "Error:0.17015625000000004 Prediction:0.3875\n", - "Error:0.095712890625 Prediction:0.49062500000000003\n", - "Error:0.05383850097656251 Prediction:0.56796875\n", - "Error:0.03028415679931642 Prediction:0.6259765625\n", - "Error:0.0170348381996155 Prediction:0.669482421875\n", - "Error:0.00958209648728372 Prediction:0.70211181640625\n", - "Error:0.005389929274097089 Prediction:0.7265838623046875\n", - "Error:0.0030318352166796153 Prediction:0.7449378967285156\n", - "Error:0.0017054073093822882 Prediction:0.7587034225463867\n", - "Error:0.0009592916115275371 Prediction:0.76902756690979\n", - "Error:0.0005396015314842384 Prediction:0.7767706751823426\n", - "Error:0.000303525861459885 Prediction:0.7825780063867569\n", - "Error:0.00017073329707118678 Prediction:0.7869335047900676\n", - "Error:9.603747960254256e-05 Prediction:0.7902001285925507\n", - "Error:5.402108227642978e-05 Prediction:0.7926500964444131\n", - "Error:3.038685878049206e-05 Prediction:0.7944875723333098\n", - "Error:1.7092608064027242e-05 Prediction:0.7958656792499823\n", - "Error:9.614592036015323e-06 Prediction:0.7968992594374867\n", - "Error:5.408208020258491e-06 Prediction:0.7976744445781151\n" - ] - } - ], - "source": [ - "weight = 0.5\n", - "goal_pred = 0.8\n", - "input = 0.5\n", - "\n", - "for iteration in range(20):\n", - " pred = input * weight\n", - " error = (pred - goal_pred) ** 2\n", - " direction_and_amount = (pred - goal_pred) * input\n", - " weight = weight - direction_and_amount\n", - "\n", - " print(\"Error:\" + str(error) + \" Prediction:\" + str(pred))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# How to use a Derivative to Learn" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error:0.6400000000000001 Prediction:0.0\n", - "Error:0.02822400000000005 Prediction:0.9680000000000002\n", - "Error:0.0012446784000000064 Prediction:0.76472\n", - "Error:5.4890317439999896e-05 Prediction:0.8074088\n" - ] - } - ], - "source": [ - "weight = 0.0\n", - "goal_pred = 0.8\n", - "input = 1.1\n", - "\n", - "for iteration in range(4):\n", - " pred = input * weight\n", - " error = (pred - goal_pred) ** 2\n", - " delta = pred - goal_pred\n", - " weight_delta = delta * input\n", - " weight = weight - weight_delta\n", - "\n", - " print(\"Error:\" + str(error) + \" Prediction:\" + str(pred))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Breaking Gradient Descent" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error:0.30250000000000005 Prediction:0.25\n", - "Error:0.17015625000000004 Prediction:0.3875\n", - "Error:0.095712890625 Prediction:0.49062500000000003\n", - "Error:0.05383850097656251 Prediction:0.56796875\n", - "Error:0.03028415679931642 Prediction:0.6259765625\n", - "Error:0.0170348381996155 Prediction:0.669482421875\n", - "Error:0.00958209648728372 Prediction:0.70211181640625\n", - "Error:0.005389929274097089 Prediction:0.7265838623046875\n", - "Error:0.0030318352166796153 Prediction:0.7449378967285156\n", - "Error:0.0017054073093822882 Prediction:0.7587034225463867\n", - "Error:0.0009592916115275371 Prediction:0.76902756690979\n", - "Error:0.0005396015314842384 Prediction:0.7767706751823426\n", - "Error:0.000303525861459885 Prediction:0.7825780063867569\n", - "Error:0.00017073329707118678 Prediction:0.7869335047900676\n", - "Error:9.603747960254256e-05 Prediction:0.7902001285925507\n", - "Error:5.402108227642978e-05 Prediction:0.7926500964444131\n", - "Error:3.038685878049206e-05 Prediction:0.7944875723333098\n", - "Error:1.7092608064027242e-05 Prediction:0.7958656792499823\n", - "Error:9.614592036015323e-06 Prediction:0.7968992594374867\n", - "Error:5.408208020258491e-06 Prediction:0.7976744445781151\n" - ] - } - ], - "source": [ - "weight = 0.5\n", - "goal_pred = 0.8\n", - "input = 0.5\n", - "\n", - "for iteration in range(20):\n", - " pred = input * weight\n", - " error = (pred - goal_pred) ** 2\n", - " delta = pred - goal_pred\n", - " weight_delta = input * delta\n", - " weight = weight - weight_delta\n", - " print(\"Error:\" + str(error) + \" Prediction:\" + str(pred))" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error:0.03999999999999998 Prediction:1.0\n", - "Error:0.3599999999999998 Prediction:0.20000000000000018\n", - "Error:3.2399999999999984 Prediction:2.5999999999999996\n", - "Error:29.159999999999986 Prediction:-4.599999999999999\n", - "Error:262.4399999999999 Prediction:16.999999999999996\n", - "Error:2361.959999999998 Prediction:-47.79999999999998\n", - "Error:21257.639999999978 Prediction:146.59999999999994\n", - "Error:191318.75999999983 Prediction:-436.5999999999998\n", - "Error:1721868.839999999 Prediction:1312.9999999999995\n", - "Error:15496819.559999991 Prediction:-3935.799999999999\n", - "Error:139471376.03999993 Prediction:11810.599999999997\n", - "Error:1255242384.3599997 Prediction:-35428.59999999999\n", - "Error:11297181459.239996 Prediction:106288.99999999999\n", - "Error:101674633133.15994 Prediction:-318863.79999999993\n", - "Error:915071698198.4395 Prediction:956594.5999999997\n", - "Error:8235645283785.954 Prediction:-2869780.599999999\n", - "Error:74120807554073.56 Prediction:8609344.999999996\n", - "Error:667087267986662.1 Prediction:-25828031.799999986\n", - "Error:6003785411879960.0 Prediction:77484098.59999996\n", - "Error:5.403406870691965e+16 Prediction:-232452292.5999999\n" - ] - } - ], - "source": [ - "# Now let's break it:\n", - "\n", - "weight = 0.5\n", - "goal_pred = 0.8\n", - "input = 2\n", - "\n", - "for iteration in range(20):\n", - " pred = input * weight\n", - " error = (pred - goal_pred) ** 2\n", - " delta = pred - goal_pred\n", - " weight_delta = input * delta\n", - " weight = weight - weight_delta\n", - " print(\"Error:\" + str(error) + \" Prediction:\" + str(pred))" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error:0.03999999999999998 Prediction:1.0\n", - "Error:0.0144 Prediction:0.92\n", - "Error:0.005183999999999993 Prediction:0.872\n", - "Error:0.0018662400000000014 Prediction:0.8432000000000001\n", - "Error:0.0006718464000000028 Prediction:0.8259200000000001\n", - "Error:0.00024186470400000033 Prediction:0.815552\n", - "Error:8.70712934399997e-05 Prediction:0.8093312\n", - "Error:3.134566563839939e-05 Prediction:0.80559872\n", - "Error:1.1284439629823931e-05 Prediction:0.803359232\n", - "Error:4.062398266736526e-06 Prediction:0.8020155392\n", - "Error:1.4624633760252567e-06 Prediction:0.8012093235200001\n", - "Error:5.264868153690924e-07 Prediction:0.8007255941120001\n", - "Error:1.8953525353291194e-07 Prediction:0.8004353564672001\n", - "Error:6.82326912718715e-08 Prediction:0.8002612138803201\n", - "Error:2.456376885786678e-08 Prediction:0.8001567283281921\n", - "Error:8.842956788836216e-09 Prediction:0.8000940369969153\n", - "Error:3.1834644439835434e-09 Prediction:0.8000564221981492\n", - "Error:1.1460471998340758e-09 Prediction:0.8000338533188895\n", - "Error:4.125769919393652e-10 Prediction:0.8000203119913337\n", - "Error:1.485277170987127e-10 Prediction:0.8000121871948003\n" - ] - } - ], - "source": [ - "weight = 0.5\n", - "goal_pred = 0.8\n", - "input = 2\n", - "alpha = 0.1\n", - "\n", - "for iteration in range(20):\n", - " pred = input * weight\n", - " error = (pred - goal_pred) ** 2\n", - " derivative = input * (pred - goal_pred)\n", - " weight = weight - (alpha * derivative)\n", - " \n", - " print(\"Error:\" + str(error) + \" Prediction:\" + str(pred))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter5 - Generalizing Gradient Descent - Learning Multiple Weights at a Time-checkpoint.ipynb b/.ipynb_checkpoints/Chapter5 - Generalizing Gradient Descent - Learning Multiple Weights at a Time-checkpoint.ipynb deleted file mode 100644 index e660a01..0000000 --- a/.ipynb_checkpoints/Chapter5 - Generalizing Gradient Descent - Learning Multiple Weights at a Time-checkpoint.ipynb +++ /dev/null @@ -1,464 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Gradient Descent Learning with Multiple Inputs" - ] - }, - { - "cell_type": "code", - "execution_count": 129, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Weights:[0.1119, 0.20091, -0.09832]\n", - "Weight Deltas:[-1.189999999999999, -0.09099999999999994, -0.16799999999999987]\n" - ] - } - ], - "source": [ - "def w_sum(a,b):\n", - " assert(len(a) == len(b))\n", - " output = 0\n", - "\n", - " for i in range(len(a)):\n", - " output += (a[i] * b[i])\n", - "\n", - " return output\n", - "\n", - "weights = [0.1, 0.2, -.1] \n", - "\n", - "def neural_network(input,weights):\n", - " pred = w_sum(input,weights)\n", - " return pred\n", - "\n", - "toes = [8.5, 9.5, 9.9, 9.0]\n", - "wlrec = [0.65, 0.8, 0.8, 0.9]\n", - "nfans = [1.2, 1.3, 0.5, 1.0]\n", - "\n", - "win_or_lose_binary = [1, 1, 0, 1]\n", - "\n", - "true = win_or_lose_binary[0]\n", - "\n", - "# Input corresponds to every entry\n", - "# for the first game of the season.\n", - "\n", - "input = [toes[0],wlrec[0],nfans[0]]\n", - "\n", - "pred = neural_network(input,weights)\n", - "error = (pred - true) ** 2\n", - "delta = pred - true\n", - "\n", - "def ele_mul(number,vector):\n", - " output = [0,0,0]\n", - "\n", - " assert(len(output) == len(vector))\n", - "\n", - " for i in range(len(vector)):\n", - " output[i] = number * vector[i]\n", - "\n", - " return output\n", - "\n", - " \n", - "\n", - "alpha = 0.01\n", - "\n", - "for i in range(len(weights)):\n", - " weights[i] -= alpha * weight_deltas[i]\n", - " \n", - "print(\"Weights:\" + str(weights))\n", - "print(\"Weight Deltas:\" + str(weight_deltas))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Let's Watch Several Steps of Learning" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Iteration:1\n", - "Pred:0.8600000000000001\n", - "Error:0.01959999999999997\n", - "Delta:-0.1399999999999999\n", - "Weights:[0.1, 0.2, -0.1]\n", - "Weight_Deltas:\n", - "[-1.189999999999999, -0.09099999999999994, -0.16799999999999987]\n", - "\n", - "Iteration:2\n", - "Pred:0.9637574999999999\n", - "Error:0.0013135188062500048\n", - "Delta:-0.036242500000000066\n", - "Weights:[0.1119, 0.20091, -0.09832]\n", - "Weight_Deltas:\n", - "[-0.30806125000000056, -0.023557625000000044, -0.04349100000000008]\n", - "\n", - "Iteration:3\n", - "Pred:0.9906177228125002\n", - "Error:8.802712522307997e-05\n", - "Delta:-0.009382277187499843\n", - "Weights:[0.11498061250000001, 0.20114557625, -0.09788509000000001]\n", - "Weight_Deltas:\n", - "[-0.07974935609374867, -0.006098480171874899, -0.011258732624999811]\n", - "\n" - ] - } - ], - "source": [ - "def neural_network(input, weights):\n", - " out = 0\n", - " for i in range(len(input)):\n", - " out += (input[i] * weights[i])\n", - " return out\n", - "\n", - "def ele_mul(scalar, vector):\n", - " out = [0,0,0]\n", - " for i in range(len(out)):\n", - " out[i] = vector[i] * scalar\n", - " return out\n", - "\n", - "toes = [8.5, 9.5, 9.9, 9.0]\n", - "wlrec = [0.65, 0.8, 0.8, 0.9]\n", - "nfans = [1.2, 1.3, 0.5, 1.0]\n", - "\n", - "win_or_lose_binary = [1, 1, 0, 1]\n", - "true = win_or_lose_binary[0]\n", - "\n", - "alpha = 0.01\n", - "weights = [0.1, 0.2, -.1]\n", - "input = [toes[0],wlrec[0],nfans[0]]\n", - "\n", - "for iter in range(3):\n", - "\n", - " pred = neural_network(input,weights)\n", - "\n", - " error = (pred - true) ** 2\n", - " delta = pred - true\n", - "\n", - " weight_deltas=ele_mul(delta,input)\n", - "\n", - " print(\"Iteration:\" + str(iter+1))\n", - " print(\"Pred:\" + str(pred))\n", - " print(\"Error:\" + str(error))\n", - " print(\"Delta:\" + str(delta))\n", - " print(\"Weights:\" + str(weights))\n", - " print(\"Weight_Deltas:\")\n", - " print(str(weight_deltas))\n", - " print(\n", - " )\n", - "\n", - " for i in range(len(weights)):\n", - " weights[i]-=alpha*weight_deltas[i]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Freezing One Weight - What Does It Do?" - ] - }, - { - "cell_type": "code", - "execution_count": 157, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Iteration:1\n", - "Pred:0.8600000000000001\n", - "Error:0.01959999999999997\n", - "Delta:-0.1399999999999999\n", - "Weights:[0.1, 0.2, -0.1]\n", - "Weight_Deltas:\n", - "[0, -0.09099999999999994, -0.16799999999999987]\n", - "\n", - "Iteration:2\n", - "Pred:0.9382250000000001\n", - "Error:0.003816150624999989\n", - "Delta:-0.06177499999999991\n", - "Weights:[0.1, 0.2273, -0.04960000000000005]\n", - "Weight_Deltas:\n", - "[0, -0.040153749999999946, -0.07412999999999989]\n", - "\n", - "Iteration:3\n", - "Pred:0.97274178125\n", - "Error:0.000743010489422852\n", - "Delta:-0.027258218750000007\n", - "Weights:[0.1, 0.239346125, -0.02736100000000008]\n", - "Weight_Deltas:\n", - "[0, -0.017717842187500006, -0.032709862500000006]\n", - "\n" - ] - } - ], - "source": [ - "def neural_network(input, weights):\n", - " out = 0\n", - " for i in range(len(input)):\n", - " out += (input[i] * weights[i])\n", - " return out\n", - "\n", - "def ele_mul(scalar, vector):\n", - " out = [0,0,0]\n", - " for i in range(len(out)):\n", - " out[i] = vector[i] * scalar\n", - " return out\n", - "\n", - "toes = [8.5, 9.5, 9.9, 9.0]\n", - "wlrec = [0.65, 0.8, 0.8, 0.9]\n", - "nfans = [1.2, 1.3, 0.5, 1.0]\n", - "\n", - "win_or_lose_binary = [1, 1, 0, 1]\n", - "true = win_or_lose_binary[0]\n", - "\n", - "alpha = 0.3\n", - "weights = [0.1, 0.2, -.1]\n", - "input = [toes[0],wlrec[0],nfans[0]]\n", - "\n", - "for iter in range(3):\n", - "\n", - " pred = neural_network(input,weights)\n", - "\n", - " error = (pred - true) ** 2\n", - " delta = pred - true\n", - "\n", - " weight_deltas=ele_mul(delta,input)\n", - " weight_deltas[0] = 0\n", - "\n", - " print(\"Iteration:\" + str(iter+1))\n", - " print(\"Pred:\" + str(pred))\n", - " print(\"Error:\" + str(error))\n", - " print(\"Delta:\" + str(delta))\n", - " print(\"Weights:\" + str(weights))\n", - " print(\"Weight_Deltas:\")\n", - " print(str(weight_deltas))\n", - " print(\n", - " )\n", - "\n", - " for i in range(len(weights)):\n", - " weights[i]-=alpha*weight_deltas[i]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Gradient Descent Learning with Multiple Outputs" - ] - }, - { - "cell_type": "code", - "execution_count": 174, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Weights:[0.293825, 0.25655, 0.868475]\n", - "Weight Deltas:[0.061750000000000006, -0.5655, 0.3152500000000001]\n" - ] - } - ], - "source": [ - "# Instead of predicting just \n", - "# whether the team won or lost, \n", - "# now we're also predicting whether\n", - "# they are happy/sad AND the\n", - "# percentage of the team that is\n", - "# hurt. We are making this\n", - "# prediction using only\n", - "# the current win/loss record.\n", - "\n", - "weights = [0.3, 0.2, 0.9] \n", - "\n", - "def neural_network(input, weights):\n", - " pred = ele_mul(input,weights)\n", - " return pred\n", - "\n", - "wlrec = [0.65, 1.0, 1.0, 0.9]\n", - "\n", - "hurt = [0.1, 0.0, 0.0, 0.1]\n", - "win = [ 1, 1, 0, 1]\n", - "sad = [0.1, 0.0, 0.1, 0.2]\n", - "\n", - "input = wlrec[0]\n", - "true = [hurt[0], win[0], sad[0]]\n", - "\n", - "pred = neural_network(input,weights)\n", - "\n", - "error = [0, 0, 0] \n", - "delta = [0, 0, 0]\n", - "\n", - "for i in range(len(true)):\n", - " error[i] = (pred[i] - true[i]) ** 2\n", - " delta[i] = pred[i] - true[i]\n", - " \n", - "def scalar_ele_mul(number,vector):\n", - " output = [0,0,0]\n", - "\n", - " assert(len(output) == len(vector))\n", - "\n", - " for i in range(len(vector)):\n", - " output[i] = number * vector[i]\n", - "\n", - " return output\n", - "\n", - "weight_deltas = scalar_ele_mul(input,delta)\n", - "\n", - "alpha = 0.1\n", - "\n", - "for i in range(len(weights)):\n", - " weights[i] -= (weight_deltas[i] * alpha)\n", - " \n", - "print(\"Weights:\" + str(weights))\n", - "print(\"Weight Deltas:\" + str(weight_deltas))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Gradient Descent with Multiple Inputs & Outputs" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - " #toes %win #fans\n", - "weights = [ [0.1, 0.1, -0.3],#hurt?\n", - " [0.1, 0.2, 0.0], #win?\n", - " [0.0, 1.3, 0.1] ]#sad?\n", - "\n", - "def w_sum(a,b):\n", - " assert(len(a) == len(b))\n", - " output = 0\n", - "\n", - " for i in range(len(a)):\n", - " output += (a[i] * b[i])\n", - "\n", - " return output\n", - "\n", - "def vect_mat_mul(vect,matrix):\n", - " assert(len(vect) == len(matrix))\n", - " output = [0,0,0]\n", - " for i in range(len(vect)):\n", - " output[i] = w_sum(vect,matrix[i])\n", - " return output\n", - "\n", - "def neural_network(input, weights):\n", - " pred = vect_mat_mul(input,weights)\n", - " return pred\n", - "\n", - "toes = [8.5, 9.5, 9.9, 9.0]\n", - "wlrec = [0.65,0.8, 0.8, 0.9]\n", - "nfans = [1.2, 1.3, 0.5, 1.0]\n", - "\n", - "hurt = [0.1, 0.0, 0.0, 0.1]\n", - "win = [ 1, 1, 0, 1]\n", - "sad = [0.1, 0.0, 0.1, 0.2]\n", - "\n", - "alpha = 0.01\n", - "\n", - "input = [toes[0],wlrec[0],nfans[0]]\n", - "true = [hurt[0], win[0], sad[0]]\n", - "\n", - "pred = neural_network(input,weights)\n", - "\n", - "error = [0, 0, 0] \n", - "delta = [0, 0, 0]\n", - "\n", - "for i in range(len(true)):\n", - " error[i] = (pred[i] - true[i]) ** 2\n", - " delta[i] = pred[i] - true[i]" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "def outer_prod(a, b):\n", - " \n", - " # just a matrix of zeros\n", - " out = np.zeros((len(a), len(b)))\n", - "\n", - " for i in range(len(a)):\n", - " for j in range(len(b)):\n", - " out[i][j] = a[i] * b[j]\n", - " return out\n", - "\n", - "weight_deltas = outer_prod(input,delta)\n", - "\n", - "for i in range(len(weights)):\n", - " for j in range(len(weights[0])):\n", - " weights[i][j] -= alpha * weight_deltas[i][j]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[[0.02264999999999999, 0.10339999999999999, -0.44705],\n", - " [0.094085, 0.20026, -0.011245000000000003],\n", - " [-0.010920000000000001, 1.30048, 0.07924]]" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "weights" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter6 - Intro to Backpropagation - Building Your First DEEP Neural Network-checkpoint.ipynb b/.ipynb_checkpoints/Chapter6 - Intro to Backpropagation - Building Your First DEEP Neural Network-checkpoint.ipynb deleted file mode 100644 index b3b4650..0000000 --- a/.ipynb_checkpoints/Chapter6 - Intro to Backpropagation - Building Your First DEEP Neural Network-checkpoint.ipynb +++ /dev/null @@ -1,731 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Creating a Matrix or Two in Python" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error:0.03999999999999998 Prediction:-0.19999999999999996\n", - "Error:0.025599999999999973 Prediction:-0.15999999999999992\n", - "Error:0.01638399999999997 Prediction:-0.1279999999999999\n", - "Error:0.010485759999999964 Prediction:-0.10239999999999982\n", - "Error:0.006710886399999962 Prediction:-0.08191999999999977\n", - "Error:0.004294967295999976 Prediction:-0.06553599999999982\n", - "Error:0.002748779069439994 Prediction:-0.05242879999999994\n", - "Error:0.0017592186044416036 Prediction:-0.04194304000000004\n", - "Error:0.0011258999068426293 Prediction:-0.03355443200000008\n", - "Error:0.0007205759403792803 Prediction:-0.02684354560000002\n", - "Error:0.0004611686018427356 Prediction:-0.021474836479999926\n", - "Error:0.0002951479051793508 Prediction:-0.01717986918399994\n", - "Error:0.00018889465931478573 Prediction:-0.013743895347199997\n", - "Error:0.00012089258196146188 Prediction:-0.010995116277759953\n", - "Error:7.737125245533561e-05 Prediction:-0.008796093022207963\n", - "Error:4.951760157141604e-05 Prediction:-0.007036874417766459\n", - "Error:3.169126500570676e-05 Prediction:-0.0056294995342132115\n", - "Error:2.028240960365233e-05 Prediction:-0.004503599627370569\n", - "Error:1.298074214633813e-05 Prediction:-0.003602879701896544\n", - "Error:8.307674973656916e-06 Prediction:-0.002882303761517324\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "weights = np.array([0.5,0.48,-0.7])\n", - "alpha = 0.1\n", - "\n", - "streetlights = np.array( [ [ 1, 0, 1 ],\n", - " [ 0, 1, 1 ],\n", - " [ 0, 0, 1 ],\n", - " [ 1, 1, 1 ],\n", - " [ 0, 1, 1 ],\n", - " [ 1, 0, 1 ] ] )\n", - "\n", - "walk_vs_stop = np.array( [ 0, 1, 0, 1, 1, 0 ] )\n", - "\n", - "input = streetlights[0] # [1,0,1]\n", - "goal_prediction = walk_vs_stop[0] # equals 0... i.e. \"stop\"\n", - "\n", - "for iteration in range(20):\n", - " prediction = input.dot(weights)\n", - " error = (goal_prediction - prediction) ** 2\n", - " delta = prediction - goal_prediction\n", - " weights = weights - (alpha * (input * delta))\t\n", - "\n", - " print(\"Error:\" + str(error) + \" Prediction:\" + str(prediction))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Building Our Neural Network" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0 2 4 3]\n", - "[2 3 4 4]\n", - "[0. 0.5 1. 0.5]\n", - "[0.5 1.5 2.5 1.5]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "a = np.array([0,1,2,1])\n", - "b = np.array([2,2,2,3])\n", - "\n", - "print(a*b) #elementwise multiplication\n", - "print(a+b) #elementwise addition\n", - "print(a * 0.5) # vector-scalar multiplication\n", - "print(a + 0.5) # vector-scalar addition" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Learning the whole dataset!" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Prediction:-0.19999999999999996\n", - "Prediction:-0.19999999999999996\n", - "Prediction:-0.5599999999999999\n", - "Prediction:0.6160000000000001\n", - "Prediction:0.17279999999999995\n", - "Prediction:0.17552\n", - "Error:2.6561231104\n", - "\n", - "Prediction:0.14041599999999999\n", - "Prediction:0.3066464\n", - "Prediction:-0.34513824\n", - "Prediction:1.006637344\n", - "Prediction:0.4785034751999999\n", - "Prediction:0.26700416768\n", - "Error:0.9628701776715985\n", - "\n", - "Prediction:0.213603334144\n", - "Prediction:0.5347420299776\n", - "Prediction:-0.26067345110016\n", - "Prediction:1.1319428845096962\n", - "Prediction:0.6274723921901568\n", - "Prediction:0.25433999330650114\n", - "Error:0.5509165866836797\n", - "\n", - "Prediction:0.20347199464520088\n", - "Prediction:0.6561967149569552\n", - "Prediction:-0.221948503950995\n", - "Prediction:1.166258650532124\n", - "Prediction:0.7139004922542389\n", - "Prediction:0.21471099528371604\n", - "Error:0.36445836852222424\n", - "\n", - "Prediction:0.17176879622697283\n", - "Prediction:0.7324724146523222\n", - "Prediction:-0.19966478845083285\n", - "Prediction:1.1697769945341199\n", - "Prediction:0.7719890116601171\n", - "Prediction:0.17297997428859369\n", - "Error:0.2516768662079895\n", - "\n", - "Prediction:0.13838397943087496\n", - "Prediction:0.7864548139561468\n", - "Prediction:-0.1836567869927348\n", - "Prediction:1.163248019006011\n", - "Prediction:0.8148799260629888\n", - "Prediction:0.1362897844408577\n", - "Error:0.17797575048089034\n", - "\n", - "Prediction:0.10903182755268614\n", - "Prediction:0.8273717796510367\n", - "Prediction:-0.17037324196481937\n", - "Prediction:1.1537962739591756\n", - "Prediction:0.8481754931254761\n", - "Prediction:0.1059488041691444\n", - "Error:0.12864460733422164\n", - "\n", - "Prediction:0.0847590433353155\n", - "Prediction:0.859469609749935\n", - "Prediction:-0.1585508402022421\n", - "Prediction:1.1438418857156731\n", - "Prediction:0.8746623946770374\n", - "Prediction:0.08148074110264475\n", - "Error:0.09511036950476208\n", - "\n", - "Prediction:0.06518459288211581\n", - "Prediction:0.8850633823431538\n", - "Prediction:-0.14771905585408038\n", - "Prediction:1.1341830033853888\n", - "Prediction:0.8959860107828534\n", - "Prediction:0.0619780399014222\n", - "Error:0.07194564247043436\n", - "\n", - "Prediction:0.04958243192113776\n", - "Prediction:0.9056327614440267\n", - "Prediction:-0.13768337501215525\n", - "Prediction:1.1250605910610996\n", - "Prediction:0.9132624284442169\n", - "Prediction:0.04653264583708144\n", - "Error:0.05564914990717743\n", - "\n", - "Prediction:0.03722611666966513\n", - "Prediction:0.922234066504699\n", - "Prediction:-0.12834662236261596\n", - "Prediction:1.116526024487899\n", - "Prediction:0.9273167105424409\n", - "Prediction:0.03435527296969987\n", - "Error:0.04394763937673939\n", - "\n", - "Prediction:0.027484218375759886\n", - "Prediction:0.9356694192994068\n", - "Prediction:-0.11964712469387503\n", - "Prediction:1.1085678053734553\n", - "Prediction:0.9387866868342218\n", - "Prediction:0.024792915481941458\n", - "Error:0.035357967050948465\n", - "\n", - "Prediction:0.019834332385553155\n", - "Prediction:0.946566624680628\n", - "Prediction:-0.11153724870006754\n", - "Prediction:1.1011550767549563\n", - "Prediction:0.948176009263518\n", - "Prediction:0.017315912033043404\n", - "Error:0.02890700056547436\n", - "\n", - "Prediction:0.013852729626434732\n", - "Prediction:0.9554239432448665\n", - "Prediction:-0.10397589092234266\n", - "Prediction:1.0942524239871314\n", - "Prediction:0.9558862588907013\n", - "Prediction:0.011498267782398985\n", - "Error:0.023951660591138853\n", - "\n", - "Prediction:0.009198614225919194\n", - "Prediction:0.9626393189117293\n", - "Prediction:-0.09692579020989642\n", - "Prediction:1.087824783849832\n", - "Prediction:0.9622390773804066\n", - "Prediction:0.006998674002545002\n", - "Error:0.020063105176016144\n", - "\n", - "Prediction:0.005598939202035996\n", - "Prediction:0.9685315005838672\n", - "Prediction:-0.09035250869077546\n", - "Prediction:1.0818389613301889\n", - "Prediction:0.9674926590701334\n", - "Prediction:0.003544193999268516\n", - "Error:0.016952094519447087\n", - "\n", - "Prediction:0.0028353551994148157\n", - "Prediction:0.9733561723362383\n", - "Prediction:-0.0842239920152223\n", - "Prediction:1.0762639960116431\n", - "Prediction:0.9718545378681842\n", - "Prediction:0.0009168131382832068\n", - "Error:0.014420818295271236\n", - "\n", - "Prediction:0.0007334505106265654\n", - "Prediction:0.9773186039296565\n", - "Prediction:-0.07851033295953944\n", - "Prediction:1.0710711494147542\n", - "Prediction:0.9754916865567282\n", - "Prediction:-0.0010574652271341245\n", - "Error:0.012331739998443648\n", - "\n", - "Prediction:-0.0008459721817072885\n", - "Prediction:0.9805836929862668\n", - "Prediction:-0.07318360881847627\n", - "Prediction:1.066233777045345\n", - "Prediction:0.9785385598617921\n", - "Prediction:-0.0025173975573930946\n", - "Error:0.010587393171639842\n", - "\n", - "Prediction:-0.002013918045914484\n", - "Prediction:0.9832839794497644\n", - "Prediction:-0.06821774801198803\n", - "Prediction:1.0617271739912904\n", - "Prediction:0.9811035235627523\n", - "Prediction:-0.0035735447350425317\n", - "Error:0.009117233405426495\n", - "\n", - "Prediction:-0.002858835788034024\n", - "Prediction:0.9855260569025094\n", - "Prediction:-0.06358841060413677\n", - "Prediction:1.05752842286588\n", - "Prediction:0.9832740020092452\n", - "Prediction:-0.004313918034364962\n", - "Error:0.00786904226904208\n", - "\n", - "Prediction:-0.003451134427491974\n", - "Prediction:0.9873957068535818\n", - "Prediction:-0.059272877470408075\n", - "Prediction:1.0536162524729626\n", - "Prediction:0.9851206027353137\n", - "Prediction:-0.004808501248434842\n", - "Error:0.006803273214640502\n", - "\n", - "Prediction:-0.0038468009987478735\n", - "Prediction:0.9889620124129692\n", - "Prediction:-0.05524994626077355\n", - "Prediction:1.049970908776931\n", - "Prediction:0.9867004228010665\n", - "Prediction:-0.005112871449710697\n", - "Error:0.005889303541837786\n", - "\n", - "Prediction:-0.004090297159768559\n", - "Prediction:0.9902806551018011\n", - "Prediction:-0.051499833441728114\n", - "Prediction:1.0465740376293469\n", - "Prediction:0.9880596998997442\n", - "Prediction:-0.0052710974096659285\n", - "Error:0.0051029252561172675\n", - "\n", - "Prediction:-0.004216877927732746\n", - "Prediction:0.9913965574535352\n", - "Prediction:-0.048004082062078055\n", - "Prediction:1.043408578143574\n", - "Prediction:0.9892359385403211\n", - "Prediction:-0.005318059364078823\n", - "Error:0.004424644608684828\n", - "\n", - "Prediction:-0.0042544474912630525\n", - "Prediction:0.992346001517791\n", - "Prediction:-0.044745474990504665\n", - "Prediction:1.0404586655589985\n", - "Prediction:0.9902596156014837\n", - "Prediction:-0.005281305317687134\n", - "Error:0.0038385124412518303\n", - "\n", - "Prediction:-0.0042250442541497055\n", - "Prediction:0.9931583274383705\n", - "Prediction:-0.041707953394155776\n", - "Prediction:1.0377095425371112\n", - "Prediction:0.9911555487826897\n", - "Prediction:-0.005182536193432452\n", - "Error:0.0033313054558089675\n", - "\n", - "Prediction:-0.004146028954745959\n", - "Prediction:0.9938572955409696\n", - "Prediction:-0.03887654022599941\n", - "Prediction:1.0351474779634813\n", - "Prediction:0.9919439948626794\n", - "Prediction:-0.00503879377425797\n", - "Error:0.0028919416227737734\n", - "\n", - "Prediction:-0.004031035019406375\n", - "Prediction:0.9944621787695098\n", - "Prediction:-0.03623726848360008\n", - "Prediction:1.032759692455092\n", - "Prediction:0.9926415313729495\n", - "Prediction:-0.004863410672429416\n", - "Error:0.002511053608117256\n", - "\n", - "Prediction:-0.003890728537943533\n", - "Prediction:0.9949886390193969\n", - "Prediction:-0.03377711399894662\n", - "Prediction:1.0305342898820642\n", - "Prediction:0.9932617646389992\n", - "Prediction:-0.004666769772712614\n", - "Error:0.0021806703520253884\n", - "\n", - "Prediction:-0.003733415818170091\n", - "Prediction:0.9954494302702878\n", - "Prediction:-0.03148393251909879\n", - "Prediction:1.0284601943056741\n", - "Prediction:0.9938158986070053\n", - "Prediction:-0.004456911151490314\n", - "Error:0.0018939739123713475\n", - "\n", - "Prediction:-0.003565528921192253\n", - "Prediction:0.9958549628928723\n", - "Prediction:-0.029346400840475826\n", - "Prediction:1.0265270918125804\n", - "Prediction:0.9943131920358295\n", - "Prediction:-0.004240016908292479\n", - "Error:0.0016451096996342332\n", - "\n", - "Prediction:-0.0033920135266339822\n", - "Prediction:0.9962137566721563\n", - "Prediction:-0.02735396176499221\n", - "Prediction:1.0247253767906936\n", - "Prediction:0.9947613261560856\n", - "Prediction:-0.004020798285770878\n", - "Error:0.0014290353984827077\n", - "\n", - "Prediction:-0.003216638628616701\n", - "Prediction:0.9965328046163073\n", - "Prediction:-0.025496772653362886\n", - "Prediction:1.0230461022472208\n", - "Prediction:0.9951667005089379\n", - "Prediction:-0.0038028045995257484\n", - "Error:0.0012413985592149145\n", - "\n", - "Prediction:-0.003042243679620596\n", - "Prediction:0.996817865235065\n", - "Prediction:-0.023765657359234325\n", - "Prediction:1.0214809338160067\n", - "Prediction:0.995534671160774\n", - "Prediction:-0.0035886696105582767\n", - "Error:0.0010784359268087556\n", - "\n", - "Prediction:-0.0028709356884466207\n", - "Prediction:0.9970736974585198\n", - "Prediction:-0.022152061336940452\n", - "Prediction:1.0200221071408409\n", - "Prediction:0.9958697426723416\n", - "Prediction:-0.0033803078583175654\n", - "Error:0.0009368896209360312\n", - "\n", - "Prediction:-0.0027042462866540516\n", - "Prediction:0.9973042495523706\n", - "Prediction:-0.02064800972530455\n", - "Prediction:1.018662388355171\n", - "Prediction:0.9961757229433927\n", - "Prediction:-0.0031790709774033414\n", - "Error:0.0008139366504753339\n", - "\n", - "Prediction:-0.002543256781922673\n", - "Prediction:0.9975128111306469\n", - "Prediction:-0.019246068219762574\n", - "Prediction:1.0173950374076535\n", - "Prediction:0.9964558482449631\n", - "Prediction:-0.0029858720226535913\n", - "Error:0.0007071291752624441\n", - "\n", - "Prediction:-0.002388697618122871\n", - "Prediction:0.9977021355600483\n", - "Prediction:-0.01793930655497516\n", - "Prediction:1.0162137740080082\n", - "Prediction:0.9967128843019345\n", - "Prediction:-0.0028012842268006904\n", - "Error:0.0006143435674831474\n", - "\n", - "Prediction:-0.0022410273814405524\n", - "Prediction:0.9978745386023716\n", - "Prediction:-0.016721264429884947\n", - "Prediction:1.0151127459893812\n", - "Prediction:0.9969492081270097\n", - "Prediction:-0.0026256193329783125\n", - "Error:0.00053373677328488\n", - "\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "weights = np.array([0.5,0.48,-0.7])\n", - "alpha = 0.1\n", - "\n", - "streetlights = np.array( [[ 1, 0, 1 ],\n", - " [ 0, 1, 1 ],\n", - " [ 0, 0, 1 ],\n", - " [ 1, 1, 1 ],\n", - " [ 0, 1, 1 ],\n", - " [ 1, 0, 1 ] ] )\n", - "\n", - "walk_vs_stop = np.array( [ 0, 1, 0, 1, 1, 0 ] )\n", - "\n", - "input = streetlights[0] # [1,0,1]\n", - "goal_prediction = walk_vs_stop[0] # equals 0... i.e. \"stop\"\n", - "\n", - "for iteration in range(40):\n", - " error_for_all_lights = 0\n", - " for row_index in range(len(walk_vs_stop)):\n", - " input = streetlights[row_index]\n", - " goal_prediction = walk_vs_stop[row_index]\n", - " \n", - " prediction = input.dot(weights)\n", - " \n", - " error = (goal_prediction - prediction) ** 2\n", - " error_for_all_lights += error\n", - " \n", - " delta = prediction - goal_prediction\n", - " weights = weights - (alpha * (input * delta))\t\n", - " print(\"Prediction:\" + str(prediction))\n", - " print(\"Error:\" + str(error_for_all_lights) + \"\\n\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Our First \"Deep\" Neural Network" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "np.random.seed(1)\n", - "\n", - "def relu(x):\n", - " return (x > 0) * x \n", - "\n", - "alpha = 0.2\n", - "hidden_size = 4\n", - "\n", - "streetlights = np.array( [[ 1, 0, 1 ],\n", - " [ 0, 1, 1 ],\n", - " [ 0, 0, 1 ],\n", - " [ 1, 1, 1 ] ] )\n", - "\n", - "walk_vs_stop = np.array([[ 1, 1, 0, 0]]).T\n", - "\n", - "weights_0_1 = 2*np.random.random((3,hidden_size)) - 1\n", - "weights_1_2 = 2*np.random.random((hidden_size,1)) - 1\n", - "\n", - "layer_0 = streetlights[0]\n", - "layer_1 = relu(np.dot(layer_0,weights_0_1))\n", - "layer_2 = np.dot(layer_1,weights_1_2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Backpropagation in Code" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error:0.6342311598444467\n", - "Error:0.35838407676317513\n", - "Error:0.0830183113303298\n", - "Error:0.006467054957103705\n", - "Error:0.0003292669000750734\n", - "Error:1.5055622665134859e-05\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "np.random.seed(1)\n", - "\n", - "def relu(x):\n", - " return (x > 0) * x # returns x if x > 0\n", - " # return 0 otherwise\n", - "\n", - "def relu2deriv(output):\n", - " return output>0 # returns 1 for input > 0\n", - " # return 0 otherwise\n", - "alpha = 0.2\n", - "hidden_size = 4\n", - "\n", - "weights_0_1 = 2*np.random.random((3,hidden_size)) - 1\n", - "weights_1_2 = 2*np.random.random((hidden_size,1)) - 1\n", - "\n", - "for iteration in range(60):\n", - " layer_2_error = 0\n", - " for i in range(len(streetlights)):\n", - " layer_0 = streetlights[i:i+1]\n", - " layer_1 = relu(np.dot(layer_0,weights_0_1))\n", - " layer_2 = np.dot(layer_1,weights_1_2)\n", - "\n", - " layer_2_error += np.sum((layer_2 - walk_vs_stop[i:i+1]) ** 2)\n", - "\n", - " layer_2_delta = (walk_vs_stop[i:i+1] - layer_2)\n", - " layer_1_delta=layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1)\n", - "\n", - " weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n", - " weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)\n", - "\n", - " if(iteration % 10 == 9):\n", - " print(\"Error:\" + str(layer_2_error))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# One Iteration of Backpropagation" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "np.random.seed(1)\n", - "\n", - "def relu(x):\n", - " return (x > 0) * x \n", - "\n", - "def relu2deriv(output):\n", - " return output>0 \n", - "\n", - "lights = np.array( [[ 1, 0, 1 ],\n", - " [ 0, 1, 1 ],\n", - " [ 0, 0, 1 ],\n", - " [ 1, 1, 1 ] ] )\n", - "\n", - "walk_stop = np.array([[ 1, 1, 0, 0]]).T\n", - "\n", - "alpha = 0.2\n", - "hidden_size = 3\n", - "\n", - "weights_0_1 = 2*np.random.random((3,hidden_size)) - 1\n", - "weights_1_2 = 2*np.random.random((hidden_size,1)) - 1\n", - "\n", - "layer_0 = lights[0:1]\n", - "layer_1 = np.dot(layer_0,weights_0_1)\n", - "layer_1 = relu(layer_1)\n", - "layer_2 = np.dot(layer_1,weights_1_2)\n", - "\n", - "error = (layer_2-walk_stop[0:1])**2\n", - "\n", - "layer_2_delta=(layer_2-walk_stop[0:1])\n", - "\n", - "layer_1_delta=layer_2_delta.dot(weights_1_2.T)\n", - "layer_1_delta *= relu2deriv(layer_1)\n", - "\n", - "weight_delta_1_2 = layer_1.T.dot(layer_2_delta)\n", - "weight_delta_0_1 = layer_0.T.dot(layer_1_delta)\n", - "\n", - "weights_1_2 -= alpha * weight_delta_1_2\n", - "weights_0_1 -= alpha * weight_delta_0_1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Putting it all Together" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error:0.6342311598444467\n", - "Error:0.35838407676317513\n", - "Error:0.0830183113303298\n", - "Error:0.006467054957103705\n", - "Error:0.0003292669000750734\n", - "Error:1.5055622665134859e-05\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "np.random.seed(1)\n", - "\n", - "def relu(x):\n", - " return (x > 0) * x # returns x if x > 0\n", - " # return 0 otherwise\n", - "\n", - "def relu2deriv(output):\n", - " return output>0 # returns 1 for input > 0\n", - " # return 0 otherwise\n", - "\n", - "streetlights = np.array( [[ 1, 0, 1 ],\n", - " [ 0, 1, 1 ],\n", - " [ 0, 0, 1 ],\n", - " [ 1, 1, 1 ] ] )\n", - "\n", - "walk_vs_stop = np.array([[ 1, 1, 0, 0]]).T\n", - " \n", - "alpha = 0.2\n", - "hidden_size = 4\n", - "\n", - "weights_0_1 = 2*np.random.random((3,hidden_size)) - 1\n", - "weights_1_2 = 2*np.random.random((hidden_size,1)) - 1\n", - "\n", - "for iteration in range(60):\n", - " layer_2_error = 0\n", - " for i in range(len(streetlights)):\n", - " layer_0 = streetlights[i:i+1]\n", - " layer_1 = relu(np.dot(layer_0,weights_0_1))\n", - " layer_2 = np.dot(layer_1,weights_1_2)\n", - "\n", - " layer_2_error += np.sum((layer_2 - walk_vs_stop[i:i+1]) ** 2)\n", - "\n", - " layer_2_delta = (layer_2 - walk_vs_stop[i:i+1])\n", - " layer_1_delta=layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1)\n", - "\n", - " weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)\n", - " weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)\n", - "\n", - " if(iteration % 10 == 9):\n", - " print(\"Error:\" + str(layer_2_error))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter8 - Intro to Regularization - Learning Signal and Ignoring Noise-checkpoint.ipynb b/.ipynb_checkpoints/Chapter8 - Intro to Regularization - Learning Signal and Ignoring Noise-checkpoint.ipynb deleted file mode 100644 index 8aa7057..0000000 --- a/.ipynb_checkpoints/Chapter8 - Intro to Regularization - Learning Signal and Ignoring Noise-checkpoint.ipynb +++ /dev/null @@ -1,485 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 3 Layer Network on MNIST" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " I:349 Train-Err:0.108 Train-Acc:1.0" - ] - } - ], - "source": [ - "import sys, numpy as np\n", - "from keras.datasets import mnist\n", - "\n", - "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", - "\n", - "images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])\n", - "\n", - "one_hot_labels = np.zeros((len(labels),10))\n", - "for i,l in enumerate(labels):\n", - " one_hot_labels[i][l] = 1\n", - "labels = one_hot_labels\n", - "\n", - "test_images = x_test.reshape(len(x_test),28*28) / 255\n", - "test_labels = np.zeros((len(y_test),10))\n", - "for i,l in enumerate(y_test):\n", - " test_labels[i][l] = 1\n", - " \n", - "np.random.seed(1)\n", - "relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise\n", - "relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise\n", - "alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)\n", - "\n", - "weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1\n", - "weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1\n", - "\n", - "for j in range(iterations):\n", - " error, correct_cnt = (0.0, 0)\n", - " \n", - " for i in range(len(images)):\n", - " layer_0 = images[i:i+1]\n", - " layer_1 = relu(np.dot(layer_0,weights_0_1))\n", - " layer_2 = np.dot(layer_1,weights_1_2)\n", - "\n", - " error += np.sum((labels[i:i+1] - layer_2) ** 2)\n", - " correct_cnt += int(np.argmax(layer_2) == \\\n", - " np.argmax(labels[i:i+1]))\n", - "\n", - " layer_2_delta = (labels[i:i+1] - layer_2)\n", - " layer_1_delta = layer_2_delta.dot(weights_1_2.T)\\\n", - " * relu2deriv(layer_1)\n", - " weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n", - " weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)\n", - "\n", - " sys.stdout.write(\"\\r I:\"+str(j)+ \\\n", - " \" Train-Err:\" + str(error/float(len(images)))[0:5] +\\\n", - " \" Train-Acc:\" + str(correct_cnt/float(len(images))))" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Test-Err:0.653 Test-Acc:0.7073\n", - "\n" - ] - } - ], - "source": [ - "if(j % 10 == 0 or j == iterations-1):\n", - " error, correct_cnt = (0.0, 0)\n", - "\n", - " for i in range(len(test_images)):\n", - "\n", - " layer_0 = test_images[i:i+1]\n", - " layer_1 = relu(np.dot(layer_0,weights_0_1))\n", - " layer_2 = np.dot(layer_1,weights_1_2)\n", - "\n", - " error += np.sum((test_labels[i:i+1] - layer_2) ** 2)\n", - " correct_cnt += int(np.argmax(layer_2) == \\\n", - " np.argmax(test_labels[i:i+1]))\n", - " sys.stdout.write(\" Test-Err:\" + str(error/float(len(test_images)))[0:5] +\\\n", - " \" Test-Acc:\" + str(correct_cnt/float(len(test_images))) + \"\\n\")\n", - " print()" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " I:0 Train-Err:0.722 Train-Acc:0.537 Test-Err:0.601 Test-Acc:0.6488\n", - " I:10 Train-Err:0.312 Train-Acc:0.901 Test-Err:0.420 Test-Acc:0.8114\n", - " I:20 Train-Err:0.260 Train-Acc:0.93 Test-Err:0.414 Test-Acc:0.8111\n", - " I:30 Train-Err:0.232 Train-Acc:0.946 Test-Err:0.417 Test-Acc:0.8066\n", - " I:40 Train-Err:0.215 Train-Acc:0.956 Test-Err:0.426 Test-Acc:0.8019\n", - " I:50 Train-Err:0.204 Train-Acc:0.966 Test-Err:0.437 Test-Acc:0.7982\n", - " I:60 Train-Err:0.194 Train-Acc:0.967 Test-Err:0.448 Test-Acc:0.7921\n", - " I:70 Train-Err:0.186 Train-Acc:0.975 Test-Err:0.458 Test-Acc:0.7864\n", - " I:80 Train-Err:0.179 Train-Acc:0.979 Test-Err:0.466 Test-Acc:0.7817\n", - " I:90 Train-Err:0.172 Train-Acc:0.981 Test-Err:0.474 Test-Acc:0.7758\n", - " I:100 Train-Err:0.166 Train-Acc:0.984 Test-Err:0.482 Test-Acc:0.7706\n", - " I:110 Train-Err:0.161 Train-Acc:0.984 Test-Err:0.489 Test-Acc:0.7686\n", - " I:120 Train-Err:0.157 Train-Acc:0.986 Test-Err:0.496 Test-Acc:0.766\n", - " I:130 Train-Err:0.153 Train-Acc:0.99 Test-Err:0.502 Test-Acc:0.7622\n", - " I:140 Train-Err:0.149 Train-Acc:0.991 Test-Err:0.508 Test-Acc:0.758\n", - " I:150 Train-Err:0.145 Train-Acc:0.991 Test-Err:0.513 Test-Acc:0.7558\n", - " I:160 Train-Err:0.141 Train-Acc:0.992 Test-Err:0.518 Test-Acc:0.7553\n", - " I:170 Train-Err:0.138 Train-Acc:0.992 Test-Err:0.524 Test-Acc:0.751\n", - " I:180 Train-Err:0.135 Train-Acc:0.995 Test-Err:0.528 Test-Acc:0.7505\n", - " I:190 Train-Err:0.132 Train-Acc:0.995 Test-Err:0.533 Test-Acc:0.7482\n", - " I:200 Train-Err:0.130 Train-Acc:0.998 Test-Err:0.538 Test-Acc:0.7464\n", - " I:210 Train-Err:0.127 Train-Acc:0.998 Test-Err:0.544 Test-Acc:0.7446\n", - " I:220 Train-Err:0.125 Train-Acc:0.998 Test-Err:0.552 Test-Acc:0.7416\n", - " I:230 Train-Err:0.123 Train-Acc:0.998 Test-Err:0.560 Test-Acc:0.7372\n", - " I:240 Train-Err:0.121 Train-Acc:0.998 Test-Err:0.569 Test-Acc:0.7344\n", - " I:250 Train-Err:0.120 Train-Acc:0.999 Test-Err:0.577 Test-Acc:0.7316\n", - " I:260 Train-Err:0.118 Train-Acc:0.999 Test-Err:0.585 Test-Acc:0.729\n", - " I:270 Train-Err:0.117 Train-Acc:0.999 Test-Err:0.593 Test-Acc:0.7259\n", - " I:280 Train-Err:0.115 Train-Acc:0.999 Test-Err:0.600 Test-Acc:0.723\n", - " I:290 Train-Err:0.114 Train-Acc:0.999 Test-Err:0.607 Test-Acc:0.7196\n", - " I:300 Train-Err:0.113 Train-Acc:0.999 Test-Err:0.614 Test-Acc:0.7183\n", - " I:310 Train-Err:0.112 Train-Acc:0.999 Test-Err:0.622 Test-Acc:0.7165\n", - " I:320 Train-Err:0.111 Train-Acc:0.999 Test-Err:0.629 Test-Acc:0.7133\n", - " I:330 Train-Err:0.110 Train-Acc:0.999 Test-Err:0.637 Test-Acc:0.7125\n", - " I:340 Train-Err:0.109 Train-Acc:1.0 Test-Err:0.645 Test-Acc:0.71\n", - " I:349 Train-Err:0.108 Train-Acc:1.0 Test-Err:0.653 Test-Acc:0.7073\n" - ] - } - ], - "source": [ - "import sys, numpy as np\n", - "from keras.datasets import mnist\n", - "\n", - "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", - "\n", - "images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])\n", - "\n", - "one_hot_labels = np.zeros((len(labels),10))\n", - "for i,l in enumerate(labels):\n", - " one_hot_labels[i][l] = 1\n", - "labels = one_hot_labels\n", - "\n", - "test_images = x_test.reshape(len(x_test),28*28) / 255\n", - "test_labels = np.zeros((len(y_test),10))\n", - "for i,l in enumerate(y_test):\n", - " test_labels[i][l] = 1\n", - "\n", - "np.random.seed(1)\n", - "relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise\n", - "relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise\n", - "alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)\n", - "\n", - "weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1\n", - "weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1\n", - "\n", - "for j in range(iterations):\n", - " error, correct_cnt = (0.0, 0)\n", - " \n", - " for i in range(len(images)):\n", - " layer_0 = images[i:i+1]\n", - " layer_1 = relu(np.dot(layer_0,weights_0_1))\n", - " layer_2 = np.dot(layer_1,weights_1_2)\n", - "\n", - " error += np.sum((labels[i:i+1] - layer_2) ** 2)\n", - " correct_cnt += int(np.argmax(layer_2) == \\\n", - " np.argmax(labels[i:i+1]))\n", - "\n", - " layer_2_delta = (labels[i:i+1] - layer_2)\n", - " layer_1_delta = layer_2_delta.dot(weights_1_2.T)\\\n", - " * relu2deriv(layer_1)\n", - " weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n", - " weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)\n", - "\n", - " sys.stdout.write(\"\\r I:\"+str(j)+ \\\n", - " \" Train-Err:\" + str(error/float(len(images)))[0:5] +\\\n", - " \" Train-Acc:\" + str(correct_cnt/float(len(images))))\n", - " \n", - " if(j % 10 == 0 or j == iterations-1):\n", - " error, correct_cnt = (0.0, 0)\n", - "\n", - " for i in range(len(test_images)):\n", - "\n", - " layer_0 = test_images[i:i+1]\n", - " layer_1 = relu(np.dot(layer_0,weights_0_1))\n", - " layer_2 = np.dot(layer_1,weights_1_2)\n", - "\n", - " error += np.sum((test_labels[i:i+1] - layer_2) ** 2)\n", - " correct_cnt += int(np.argmax(layer_2) == \\\n", - " np.argmax(test_labels[i:i+1]))\n", - " sys.stdout.write(\" Test-Err:\" + str(error/float(len(test_images)))[0:5] +\\\n", - " \" Test-Acc:\" + str(correct_cnt/float(len(test_images))))\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Dropout In Code" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [], - "source": [ - "i = 0\n", - "layer_0 = images[i:i+1]\n", - "dropout_mask = np.random.randint(2,size=layer_1.shape)\n", - "\n", - "layer_1 *= dropout_mask * 2\n", - "layer_2 = np.dot(layer_1, weights_1_2)\n", - "\n", - "error += np.sum((labels[i:i+1] - layer_2) ** 2)\n", - "\n", - "correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i+i+1]))\n", - "\n", - "layer_2_delta = (labels[i:i+1] - layer_2)\n", - "layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)\n", - "\n", - "layer_1_delta *= dropout_mask\n", - "\n", - "weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n", - "weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "I:0 Test-Err:0.641 Test-Acc:0.6333 Train-Err:0.891 Train-Acc:0.413\n", - "I:10 Test-Err:0.458 Test-Acc:0.787 Train-Err:0.472 Train-Acc:0.764\n", - "I:20 Test-Err:0.415 Test-Acc:0.8133 Train-Err:0.430 Train-Acc:0.809\n", - "I:30 Test-Err:0.421 Test-Acc:0.8114 Train-Err:0.415 Train-Acc:0.811\n", - "I:40 Test-Err:0.419 Test-Acc:0.8112 Train-Err:0.413 Train-Acc:0.827\n", - "I:50 Test-Err:0.409 Test-Acc:0.8133 Train-Err:0.392 Train-Acc:0.836\n", - "I:60 Test-Err:0.412 Test-Acc:0.8236 Train-Err:0.402 Train-Acc:0.836\n", - "I:70 Test-Err:0.412 Test-Acc:0.8033 Train-Err:0.383 Train-Acc:0.857\n", - "I:80 Test-Err:0.410 Test-Acc:0.8054 Train-Err:0.386 Train-Acc:0.854\n", - "I:90 Test-Err:0.411 Test-Acc:0.8144 Train-Err:0.376 Train-Acc:0.868\n", - "I:100 Test-Err:0.411 Test-Acc:0.7903 Train-Err:0.369 Train-Acc:0.864\n", - "I:110 Test-Err:0.411 Test-Acc:0.8003 Train-Err:0.371 Train-Acc:0.868\n", - "I:120 Test-Err:0.402 Test-Acc:0.8046 Train-Err:0.353 Train-Acc:0.857\n", - "I:130 Test-Err:0.408 Test-Acc:0.8091 Train-Err:0.352 Train-Acc:0.867\n", - "I:140 Test-Err:0.405 Test-Acc:0.8083 Train-Err:0.355 Train-Acc:0.885\n", - "I:150 Test-Err:0.404 Test-Acc:0.8107 Train-Err:0.342 Train-Acc:0.883\n", - "I:160 Test-Err:0.399 Test-Acc:0.8146 Train-Err:0.361 Train-Acc:0.876\n", - "I:170 Test-Err:0.404 Test-Acc:0.8074 Train-Err:0.344 Train-Acc:0.889\n", - "I:180 Test-Err:0.399 Test-Acc:0.807 Train-Err:0.333 Train-Acc:0.892\n", - "I:190 Test-Err:0.407 Test-Acc:0.8066 Train-Err:0.335 Train-Acc:0.898\n", - "I:200 Test-Err:0.405 Test-Acc:0.8036 Train-Err:0.347 Train-Acc:0.893\n", - "I:210 Test-Err:0.405 Test-Acc:0.8034 Train-Err:0.336 Train-Acc:0.894\n", - "I:220 Test-Err:0.402 Test-Acc:0.8067 Train-Err:0.325 Train-Acc:0.896\n", - "I:230 Test-Err:0.404 Test-Acc:0.8091 Train-Err:0.321 Train-Acc:0.894\n", - "I:240 Test-Err:0.415 Test-Acc:0.8091 Train-Err:0.332 Train-Acc:0.898\n", - "I:250 Test-Err:0.395 Test-Acc:0.8182 Train-Err:0.320 Train-Acc:0.899\n", - "I:260 Test-Err:0.390 Test-Acc:0.8204 Train-Err:0.321 Train-Acc:0.899\n", - "I:270 Test-Err:0.382 Test-Acc:0.8194 Train-Err:0.312 Train-Acc:0.906\n", - "I:280 Test-Err:0.396 Test-Acc:0.8208 Train-Err:0.317 Train-Acc:0.9\n", - "I:290 Test-Err:0.399 Test-Acc:0.8181 Train-Err:0.301 Train-Acc:0.908" - ] - } - ], - "source": [ - "import numpy, sys\n", - "np.random.seed(1)\n", - "def relu(x):\n", - " return (x >= 0) * x # returns x if x > 0\n", - " # returns 0 otherwise\n", - "\n", - "def relu2deriv(output):\n", - " return output >= 0 #returns 1 for input > 0\n", - "\n", - "alpha, iterations, hidden_size = (0.005, 300, 100)\n", - "pixels_per_image, num_labels = (784, 10)\n", - "\n", - "weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1\n", - "weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1\n", - "\n", - "for j in range(iterations):\n", - " error, correct_cnt = (0.0,0)\n", - " for i in range(len(images)):\n", - " layer_0 = images[i:i+1]\n", - " layer_1 = relu(np.dot(layer_0,weights_0_1))\n", - " dropout_mask = np.random.randint(2, size=layer_1.shape)\n", - " layer_1 *= dropout_mask * 2\n", - " layer_2 = np.dot(layer_1,weights_1_2)\n", - "\n", - " error += np.sum((labels[i:i+1] - layer_2) ** 2)\n", - " correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))\n", - " layer_2_delta = (labels[i:i+1] - layer_2)\n", - " layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)\n", - " layer_1_delta *= dropout_mask\n", - "\n", - " weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n", - " weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)\n", - "\n", - " if(j%10 == 0):\n", - " test_error = 0.0\n", - " test_correct_cnt = 0\n", - "\n", - " for i in range(len(test_images)):\n", - " layer_0 = test_images[i:i+1]\n", - " layer_1 = relu(np.dot(layer_0,weights_0_1))\n", - " layer_2 = np.dot(layer_1, weights_1_2)\n", - "\n", - " test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)\n", - " test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))\n", - "\n", - " sys.stdout.write(\"\\n\" + \\\n", - " \"I:\" + str(j) + \\\n", - " \" Test-Err:\" + str(test_error/ float(len(test_images)))[0:5] +\\\n", - " \" Test-Acc:\" + str(test_correct_cnt/ float(len(test_images)))+\\\n", - " \" Train-Err:\" + str(error/ float(len(images)))[0:5] +\\\n", - " \" Train-Acc:\" + str(correct_cnt/ float(len(images))))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Batch Gradient Descent" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "I:0 Test-Err:0.815 Test-Acc:0.3832 Train-Err:1.284 Train-Acc:0.165\n", - "I:10 Test-Err:0.568 Test-Acc:0.7173 Train-Err:0.591 Train-Acc:0.672\n", - "I:20 Test-Err:0.510 Test-Acc:0.7571 Train-Err:0.532 Train-Acc:0.729\n", - "I:30 Test-Err:0.485 Test-Acc:0.7793 Train-Err:0.498 Train-Acc:0.754\n", - "I:40 Test-Err:0.468 Test-Acc:0.7877 Train-Err:0.489 Train-Acc:0.749\n", - "I:50 Test-Err:0.458 Test-Acc:0.793 Train-Err:0.468 Train-Acc:0.775\n", - "I:60 Test-Err:0.452 Test-Acc:0.7995 Train-Err:0.452 Train-Acc:0.799\n", - "I:70 Test-Err:0.446 Test-Acc:0.803 Train-Err:0.453 Train-Acc:0.792\n", - "I:80 Test-Err:0.451 Test-Acc:0.7968 Train-Err:0.457 Train-Acc:0.786\n", - "I:90 Test-Err:0.447 Test-Acc:0.795 Train-Err:0.454 Train-Acc:0.799\n", - "I:100 Test-Err:0.448 Test-Acc:0.793 Train-Err:0.447 Train-Acc:0.796\n", - "I:110 Test-Err:0.441 Test-Acc:0.7943 Train-Err:0.426 Train-Acc:0.816\n", - "I:120 Test-Err:0.442 Test-Acc:0.7966 Train-Err:0.431 Train-Acc:0.813\n", - "I:130 Test-Err:0.441 Test-Acc:0.7906 Train-Err:0.434 Train-Acc:0.816\n", - "I:140 Test-Err:0.447 Test-Acc:0.7874 Train-Err:0.437 Train-Acc:0.822\n", - "I:150 Test-Err:0.443 Test-Acc:0.7899 Train-Err:0.414 Train-Acc:0.823\n", - "I:160 Test-Err:0.438 Test-Acc:0.797 Train-Err:0.427 Train-Acc:0.811\n", - "I:170 Test-Err:0.440 Test-Acc:0.7884 Train-Err:0.418 Train-Acc:0.828\n", - "I:180 Test-Err:0.436 Test-Acc:0.7935 Train-Err:0.407 Train-Acc:0.834\n", - "I:190 Test-Err:0.434 Test-Acc:0.7935 Train-Err:0.410 Train-Acc:0.831\n", - "I:200 Test-Err:0.435 Test-Acc:0.7972 Train-Err:0.416 Train-Acc:0.829\n", - "I:210 Test-Err:0.434 Test-Acc:0.7923 Train-Err:0.409 Train-Acc:0.83\n", - "I:220 Test-Err:0.433 Test-Acc:0.8032 Train-Err:0.396 Train-Acc:0.832\n", - "I:230 Test-Err:0.431 Test-Acc:0.8036 Train-Err:0.393 Train-Acc:0.853\n", - "I:240 Test-Err:0.430 Test-Acc:0.8047 Train-Err:0.397 Train-Acc:0.844\n", - "I:250 Test-Err:0.429 Test-Acc:0.8028 Train-Err:0.386 Train-Acc:0.843\n", - "I:260 Test-Err:0.431 Test-Acc:0.8038 Train-Err:0.394 Train-Acc:0.843\n", - "I:270 Test-Err:0.428 Test-Acc:0.8014 Train-Err:0.384 Train-Acc:0.845\n", - "I:280 Test-Err:0.430 Test-Acc:0.8067 Train-Err:0.401 Train-Acc:0.846\n", - "I:290 Test-Err:0.428 Test-Acc:0.7975 Train-Err:0.383 Train-Acc:0.851" - ] - } - ], - "source": [ - "import numpy as np\n", - "np.random.seed(1)\n", - "\n", - "def relu(x):\n", - " return (x >= 0) * x # returns x if x > 0\n", - "\n", - "def relu2deriv(output):\n", - " return output >= 0 # returns 1 for input > 0\n", - "\n", - "batch_size = 100\n", - "alpha, iterations = (0.001, 300)\n", - "pixels_per_image, num_labels, hidden_size = (784, 10, 100)\n", - "\n", - "weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1\n", - "weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1\n", - "\n", - "for j in range(iterations):\n", - " error, correct_cnt = (0.0, 0)\n", - " for i in range(int(len(images) / batch_size)):\n", - " batch_start, batch_end = ((i * batch_size),((i+1)*batch_size))\n", - "\n", - " layer_0 = images[batch_start:batch_end]\n", - " layer_1 = relu(np.dot(layer_0,weights_0_1))\n", - " dropout_mask = np.random.randint(2,size=layer_1.shape)\n", - " layer_1 *= dropout_mask * 2\n", - " layer_2 = np.dot(layer_1,weights_1_2)\n", - "\n", - " error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)\n", - " for k in range(batch_size):\n", - " correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))\n", - "\n", - " layer_2_delta = (labels[batch_start:batch_end]-layer_2)/batch_size\n", - " layer_1_delta = layer_2_delta.dot(weights_1_2.T)* relu2deriv(layer_1)\n", - " layer_1_delta *= dropout_mask\n", - "\n", - " weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n", - " weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)\n", - " \n", - " if(j%10 == 0):\n", - " test_error = 0.0\n", - " test_correct_cnt = 0\n", - "\n", - " for i in range(len(test_images)):\n", - " layer_0 = test_images[i:i+1]\n", - " layer_1 = relu(np.dot(layer_0,weights_0_1))\n", - " layer_2 = np.dot(layer_1, weights_1_2)\n", - "\n", - " test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)\n", - " test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))\n", - "\n", - " sys.stdout.write(\"\\n\" + \\\n", - " \"I:\" + str(j) + \\\n", - " \" Test-Err:\" + str(test_error/ float(len(test_images)))[0:5] +\\\n", - " \" Test-Acc:\" + str(test_correct_cnt/ float(len(test_images)))+\\\n", - " \" Train-Err:\" + str(error/ float(len(images)))[0:5] +\\\n", - " \" Train-Acc:\" + str(correct_cnt/ float(len(images))))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.ipynb_checkpoints/Chapter9 - Intro to Activation Functions - Modeling Probabilities-checkpoint.ipynb b/.ipynb_checkpoints/Chapter9 - Intro to Activation Functions - Modeling Probabilities-checkpoint.ipynb deleted file mode 100644 index 80d323a..0000000 --- a/.ipynb_checkpoints/Chapter9 - Intro to Activation Functions - Modeling Probabilities-checkpoint.ipynb +++ /dev/null @@ -1,155 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Upgrading our MNIST Network" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "I:0 Test-Acc:0.394 Train-Acc:0.156\n", - "I:10 Test-Acc:0.6867 Train-Acc:0.723\n", - "I:20 Test-Acc:0.7025 Train-Acc:0.732\n", - "I:30 Test-Acc:0.734 Train-Acc:0.763\n", - "I:40 Test-Acc:0.7663 Train-Acc:0.794\n", - "I:50 Test-Acc:0.7913 Train-Acc:0.819\n", - "I:60 Test-Acc:0.8102 Train-Acc:0.849\n", - "I:70 Test-Acc:0.8228 Train-Acc:0.864\n", - "I:80 Test-Acc:0.831 Train-Acc:0.867\n", - "I:90 Test-Acc:0.8364 Train-Acc:0.885\n", - "I:100 Test-Acc:0.8407 Train-Acc:0.883\n", - "I:110 Test-Acc:0.845 Train-Acc:0.891\n", - "I:120 Test-Acc:0.8481 Train-Acc:0.901\n", - "I:130 Test-Acc:0.8505 Train-Acc:0.901\n", - "I:140 Test-Acc:0.8526 Train-Acc:0.905\n", - "I:150 Test-Acc:0.8555 Train-Acc:0.914\n", - "I:160 Test-Acc:0.8577 Train-Acc:0.925\n", - "I:170 Test-Acc:0.8596 Train-Acc:0.918\n", - "I:180 Test-Acc:0.8619 Train-Acc:0.933\n", - "I:190 Test-Acc:0.863 Train-Acc:0.933\n", - "I:200 Test-Acc:0.8642 Train-Acc:0.926\n", - "I:210 Test-Acc:0.8653 Train-Acc:0.931\n", - "I:220 Test-Acc:0.8668 Train-Acc:0.93\n", - "I:230 Test-Acc:0.8672 Train-Acc:0.937\n", - "I:240 Test-Acc:0.8681 Train-Acc:0.938\n", - "I:250 Test-Acc:0.8687 Train-Acc:0.937\n", - "I:260 Test-Acc:0.8684 Train-Acc:0.945\n", - "I:270 Test-Acc:0.8703 Train-Acc:0.951\n", - "I:280 Test-Acc:0.8699 Train-Acc:0.949\n", - "I:290 Test-Acc:0.8701 Train-Acc:0.94" - ] - } - ], - "source": [ - "import numpy as np, sys\n", - "np.random.seed(1)\n", - "\n", - "from keras.datasets import mnist\n", - "\n", - "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", - "\n", - "images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])\n", - "\n", - "one_hot_labels = np.zeros((len(labels),10))\n", - "for i,l in enumerate(labels):\n", - " one_hot_labels[i][l] = 1\n", - "labels = one_hot_labels\n", - "\n", - "test_images = x_test.reshape(len(x_test),28*28) / 255\n", - "test_labels = np.zeros((len(y_test),10))\n", - "for i,l in enumerate(y_test):\n", - " test_labels[i][l] = 1\n", - "\n", - "def tanh(x):\n", - " return np.tanh(x)\n", - "\n", - "def tanh2deriv(output):\n", - " return 1 - (output ** 2)\n", - "\n", - "def softmax(x):\n", - " temp = np.exp(x)\n", - " return temp / np.sum(temp, axis=1, keepdims=True)\n", - "\n", - "alpha, iterations, hidden_size = (2, 300, 100)\n", - "pixels_per_image, num_labels = (784, 10)\n", - "batch_size = 100\n", - "\n", - "weights_0_1 = 0.02*np.random.random((pixels_per_image,hidden_size))-0.01\n", - "weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1\n", - "\n", - "for j in range(iterations):\n", - " correct_cnt = 0\n", - " for i in range(int(len(images) / batch_size)):\n", - " batch_start, batch_end=((i * batch_size),((i+1)*batch_size))\n", - " layer_0 = images[batch_start:batch_end]\n", - " layer_1 = tanh(np.dot(layer_0,weights_0_1))\n", - " dropout_mask = np.random.randint(2,size=layer_1.shape)\n", - " layer_1 *= dropout_mask * 2\n", - " layer_2 = softmax(np.dot(layer_1,weights_1_2))\n", - "\n", - " for k in range(batch_size):\n", - " correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))\n", - "\n", - " layer_2_delta = (labels[batch_start:batch_end]-layer_2) / (batch_size * layer_2.shape[0])\n", - " layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)\n", - " layer_1_delta *= dropout_mask\n", - "\n", - " weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)\n", - " weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)\n", - "\n", - " test_correct_cnt = 0\n", - "\n", - " for i in range(len(test_images)):\n", - "\n", - " layer_0 = test_images[i:i+1]\n", - " layer_1 = tanh(np.dot(layer_0,weights_0_1))\n", - " layer_2 = np.dot(layer_1,weights_1_2)\n", - "\n", - " test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))\n", - " if(j % 10 == 0):\n", - " sys.stdout.write(\"\\n\"+ \\\n", - " \"I:\" + str(j) + \\\n", - " \" Test-Acc:\"+str(test_correct_cnt/float(len(test_images)))+\\\n", - " \" Train-Acc:\" + str(correct_cnt/float(len(images))))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}