diff --git a/notes/Machine-Learning-with-Gaussian-elimination.ipynb b/notes/Machine-Learning-with-Gaussian-elimination.ipynb index 37b2e4f..8e45a02 100644 --- a/notes/Machine-Learning-with-Gaussian-elimination.ipynb +++ b/notes/Machine-Learning-with-Gaussian-elimination.ipynb @@ -9,7 +9,7 @@ "We show that a simple linear neuron can be \"learned\" with Gaussian elimination, and indeed is much\n", "faster and more accurate upon doing so. (Much of machine learning is non-linear.)\n", "\n", - "Our model of the universe is that we have an unknow 3-vector\n", + "Our model of the universe is that we have an unknown 3-vector\n", "\n", "$w = \\left[ \\begin{array}{c} w_1 \\\\ w_2 \\\\ w_3 \\end{array} \\right]$\n", "\n", @@ -23,19 +23,19 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "3-element Array{Float64,1}:\n", - " 0.982331\n", - " 0.1774 \n", - " 0.212845" + "3-element Vector{Float64}:\n", + " 0.25180627297687963\n", + " 0.5918222257721607\n", + " 0.641067683043823" ] }, - "execution_count": 76, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -46,19 +46,28 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "using LinearAlgebra" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "3-element Array{Float64,1}:\n", - " 0.881336\n", - " 1.0557 \n", - " 0.485883" + "3-element Vector{Float64}:\n", + " 1.0689046339958201\n", + " 1.0755410829151852\n", + " 0.8485464788238004" ] }, - "execution_count": 84, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -76,19 +85,19 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "3-element Array{Float64,1}:\n", + "3-element Vector{Float64}:\n", " 0.0\n", " 0.0\n", " 0.0" ] }, - "execution_count": 81, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -100,19 +109,19 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "3-element Array{Float64,1}:\n", - " 0.982331\n", - " 0.1774 \n", - " 0.212845" + "3-element Vector{Float64}:\n", + " 0.2518062729768782\n", + " 0.5918222257721599\n", + " 0.6410676830438242" ] }, - "execution_count": 83, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -124,19 +133,19 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "3-element Array{Float64,1}:\n", - " 0.982331\n", - " 0.1774 \n", - " 0.212845" + "3-element Vector{Float64}:\n", + " 0.25180627297687963\n", + " 0.5918222257721607\n", + " 0.641067683043823" ] }, - "execution_count": 85, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -147,10 +156,8 @@ }, { "cell_type": "code", - "execution_count": 115, - "metadata": { - "collapsed": true - }, + "execution_count": 7, + "metadata": {}, "outputs": [], "source": [ "## Recover w with a machine learning package -- 18.06 students might just want to execute as a black box\n", @@ -168,74 +175,138 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[0.982331 0.1774 0.212845] : <== estimate after training\n" + "Float32[0.060145017 0.4654307 0.5881613] : <== estimate after training\n" ] } ], "source": [ "# t ... a model to be learned to fit the data\n", "t = Dense(3,1)\n", - "loss(x,y) = Flux.mse(t(x),y)\n", - "opt = ADAM(Flux.params(t)[1:1])\n", - "Flux.train!(loss, Iterators.repeated( (X',y'), 20000), opt) # 20000 steps of training\n", - "println((t.W).data, \" : <== estimate after training\")" + "loss(t,x,y) = Flux.mse(t(x),y)\n", + "opt_state = Flux.setup(ADAM(), t)\n", + "Flux.train!(loss, t, Iterators.repeated( (X',y'), 20000), opt_state) # 20000 steps of training\n", + "println((t.weight), \" : <== estimate after training\")" ] }, { "cell_type": "code", - "execution_count": 102, - "metadata": { - "collapsed": true - }, + "execution_count": 9, + "metadata": {}, "outputs": [], "source": [ - "## Adding more data does not help a whole lot" + "using Statistics" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "## Adding more data helps pretty quickly, but many cycles are still important\n", + "\n", + "# sample sizes to test\n", + "ns = collect(1:5)\n", + "\n", + "# how many repeats to test at each sample size\n", + "nrepeats = 50\n", + "nrs = range(start = 1,stop = nrepeats,step = 1)\n", + "\n", + "# initialize matricies to capture results\n", + "mses = zeros(length(ns), nrepeats)\n", + "shortmses = zeros(length(ns), nrepeats)\n", + "\n", + "for (ix, n) in enumerate(ns)\n", + " for (jx, rn) in enumerate(nrs)\n", + " X = randn(n,3)\n", + " y = X*w\n", + " t = Dense(3,1)\n", + " loss(t,x,y) = Flux.mse(t(x),y)\n", + " opt_state = Flux.setup(ADAM(), t)\n", + " Flux.train!(loss, t, Iterators.repeated( (X',y'), 1000), opt_state) # 1000 steps of training\n", + " shortmses[ix, jx] = Flux.mse(t.weight[1,:], w)\n", + " Flux.train!(loss, t, Iterators.repeated( (X',y'), 20000), opt_state) # 20000 steps of training\n", + " mses[ix, jx] = Flux.mse(t.weight[1,:], w)\n", + " end\n", + "end\n" ] }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 11, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.948837 0.17883 0.218774] : <== estimate after training\n" - ] + "data": { + "image/png": "", + "text/plain": [ + "Figure(PyObject
)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "PyObject " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "n = 3000\n", - "X = randn(n,3)\n", - "y = X*w\n", - "t = Dense(3,1)\n", - "loss(x,y) = Flux.mse(t(x),y)\n", - "opt = ADAM(Flux.params(t)[1:1])\n", - "Flux.train!(loss, Iterators.repeated( (X',y'), 2000), opt) # 2000 steps of training\n", - "println((t.W).data, \" : <== estimate after training\")" + "using PyPlot\n", + "fig, ax = subplots(1)\n", + "ax.errorbar(ns .- 0.01,\n", + " mean(shortmses, dims=2)[:, 1],\n", + " yerr=std(shortmses, dims=2)[:, 1],\n", + " label=\"1000\")\n", + "ax.errorbar(ns .+ 0.01, \n", + " mean(mses, dims=2)[:, 1],\n", + " yerr=std(mses, dims=2)[:, 1],\n", + " label=\"20000\")\n", + "\n", + "\n", + "ax.set_ylabel(\"MSE between network weights and w\")\n", + "ax.set_xlabel(\"Sample size\")\n", + "ax.set_xticks(ns)\n", + "ax.legend(title=\"Training steps\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { + "@webio": { + "lastCommId": null, + "lastKernelId": null + }, "anaconda-cloud": {}, "kernelspec": { - "display_name": "Julia 0.6.0", + "display_name": "Julia 1.8.3", "language": "julia", - "name": "julia-0.6" + "name": "julia-1.8" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", - "version": "0.6.0" + "version": "1.8.3" }, "widgets": { "state": {