From edad40920865497a114c5ec1a8dc6de2d0022d8d Mon Sep 17 00:00:00 2001 From: Tyler Burch Date: Fri, 27 Sep 2019 15:06:30 -0500 Subject: [PATCH] Add express solution --- .../notebooks/express.ipynb | 193 ++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 riddler538_2019_Sept27/notebooks/express.ipynb diff --git a/riddler538_2019_Sept27/notebooks/express.ipynb b/riddler538_2019_Sept27/notebooks/express.ipynb new file mode 100644 index 0000000..c2758f1 --- /dev/null +++ b/riddler538_2019_Sept27/notebooks/express.ipynb @@ -0,0 +1,193 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Riddler Express**\n", + "\n", + "_If a baseball team is truly .500, meaning it has a 50 percent chance of winning each game, what’s the probability that it has won two of its last four games and four of its last eight games?_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To do this, we'll simulate many instances of 8 games with 50/50 odds to win any of them. The aggregate probability that a team will accomplish 2-4 and 4-8 is eqivalent to the fractional probability of (times where this does happen / total attempts) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, define some functions. \n", + "The first, simulate_games, simulates instances of 8 games and returns a boolean array of length 8 that indicates wins and losses\n", + "The second, simulate_instances calls the first for indicated number of instances and returns another boolean array where the passing condition (both 2-4 and 4-8) is true, and otherwise is false." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def simulate_games():\n", + " # create numpy array of size 8 with values 0 and 1 to represent the last 8 games\n", + " return np.random.randint(2, size=8) \n", + "\n", + "def simulate_instances(nInstances):\n", + " trues = np.zeros(nInstances)\n", + " for i in range(0,nInstances):\n", + " game_results = simulate_games()\n", + " # Check for 2 wins in last four and 4 wins in last 8\n", + " if game_results[:4].sum() == 2 and game_results.sum() == 4:\n", + " trues[i] = 1\n", + " return(trues)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll choose the number of simulations we want to run, then call simulate_instances for that many iterations.\n", + "\n", + "The cumulative sum along this array is the number of successful attempts (dentoed truth in code)\n", + "We'll also do an analogous cumulative sum of a ones array to indicate how many instances have occured so far.\n", + "\n", + "Dividing those two gives us our answer at each point, the last element being the most accurate, as it represents the most simulated attempts" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "instances = 100000000 # number of trials\n", + "truth_representation_arr = simulate_instances(instances) # array of number of wins\n", + "truth_cumsum = truth_representation_arr.cumsum() \n", + "total_cumsum = np.ones(instances).cumsum()\n", + "fractional_results = truth_cumsum / total_cumsum" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Output final answer \n", + "\n", + "Being a binary counting variable, I'll use the Agresti–Coull interval (https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval) to estimate the confidence interval" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Solution: 0.1406 ± 0.0001\n" + ] + } + ], + "source": [ + "p = fractional_results[-1]\n", + "n = instances\n", + "z = 1.96 # 95% CI\n", + "CI = z * np.sqrt( (p * (1 - p)) / n)\n", + "\n", + "solution = str(round(p,4)) + \" ± \" + str(round(CI,4))\n", + "print(\"Solution: \" + solution)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll make a plot of how accurate our solution gets over simulation iterations." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# generate fig\n", + "fig = plt.figure(figsize=(12,6))\n", + "ax = plt.gca()\n", + "ax.set_xscale('log')\n", + "\n", + "# plot probability evolution\n", + "plt.plot(total_cumsum, fractional_results,color=\"cornflowerblue\")\n", + "\n", + "# plot solution line\n", + "ax.axhline(fractional_results[-1],linestyle=\":\",color=\"firebrick\",alpha=0.7)\n", + "\n", + "# Make plot better looking\n", + "sns.despine()\n", + "ax.tick_params(axis='both', which='major', labelsize=12)\n", + "\n", + "# annotate\n", + "plt.xlabel(\"Simulations\",fontsize=15)\n", + "plt.ylabel(\"Fraction of both 2-4 and 4-8\",fontsize=15)\n", + "plt.annotate(solution,\n", + " xy=(instances,fractional_results[-1]+.01), xycoords=\"data\", \n", + " ha=\"right\", va=\"bottom\", color=\"firebrick\",alpha=0.7, fontsize=16)\n", + "plt.annotate(s=\"Tyler James Burch\", xy=(.05,.03), xycoords='figure fraction',\n", + " textcoords='figure fraction', color='grey',alpha=0.7, fontsize=14)\n", + "\n", + "\n", + "plt.savefig(\"plots/express_solution.png\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "puzzle3.7", + "language": "python", + "name": "puzzle3.7" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}