From 2bac71d88801602fbe4018fbb3f33fe9adbbf4e2 Mon Sep 17 00:00:00 2001
From: ColtAllen <colt.allen1@gmail.com>
Date: Mon, 11 Dec 2023 11:41:25 -0700
Subject: [PATCH] dev notebook added

---
 .../clv/dev/beta_geo_beta_binom.ipynb         | 383 ++++++++++++++++++
 1 file changed, 383 insertions(+)
 create mode 100644 docs/source/notebooks/clv/dev/beta_geo_beta_binom.ipynb

diff --git a/docs/source/notebooks/clv/dev/beta_geo_beta_binom.ipynb b/docs/source/notebooks/clv/dev/beta_geo_beta_binom.ipynb
new file mode 100644
index 000000000..bc48cfa9c
--- /dev/null
+++ b/docs/source/notebooks/clv/dev/beta_geo_beta_binom.ipynb
@@ -0,0 +1,383 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "5e06e043-4631-47ae-a658-a9a928ff15e5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from lifetimes import BetaGeoBetaBinomFitter\n",
+    "from lifetimes.datasets import load_donations, load_cdnow_summary_data_with_monetary_value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "8ff42e71-fc43-4d45-8446-7205e3d37bce",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ -3.94031398, -10.25427751,  -6.82582822])"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "value = np.array([[1.5, 1], [5.3, 4], [6, 2]])\n",
+    "alpha = 0.55\n",
+    "beta = 10.58\n",
+    "gamma = 0.61\n",
+    "delta = 11.67\n",
+    "T = 12\n",
+    "\n",
+    "BetaGeoBetaBinomFitter._loglikelihood((alpha, beta, gamma, delta), value[..., 1], value[..., 0], T)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "371bb7a1-9f5c-4bdf-81b1-a9504261badb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<lifetimes.BetaGeoBetaBinomFitter: fitted with 22 subjects, alpha: 1.20, beta: 0.75, delta: 2.78, gamma: 0.66>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "discrete_noncontract_df = load_donations()\n",
+    "\n",
+    "periods = 6\n",
+    "bgbb = BetaGeoBetaBinomFitter().fit(discrete_noncontract_df['frequency'].values,\n",
+    "                             discrete_noncontract_df['recency'].values,\n",
+    "                             discrete_noncontract_df['periods'].values,\n",
+    "                             discrete_noncontract_df['weights'].values)\n",
+    "bgbb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "f8052f01-5aca-48fd-917e-1f2bbeb6326f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['conditional_expected_number_of_purchases_up_to_time', 'conditional_probability_alive', 'expected_number_of_transactions_in_first_n_periods', 'fit', 'load_model', 'save_model', 'summary']\n"
+     ]
+    }
+   ],
+   "source": [
+    "method_list = [method for method in dir(BetaGeoBetaBinomFitter) if not method.startswith('_')]\n",
+    "print(method_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f4f56b83-f830-4610-8f4f-e67ff242967e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0     0.072863\n",
+       "1     0.085696\n",
+       "2     0.314238\n",
+       "3     0.593853\n",
+       "4     0.839396\n",
+       "5     1.021689\n",
+       "6     1.147885\n",
+       "7     0.119121\n",
+       "8     0.536111\n",
+       "9     1.057604\n",
+       "10    1.443042\n",
+       "11    1.668817\n",
+       "12    0.223595\n",
+       "13    1.034572\n",
+       "14    1.804703\n",
+       "15    2.189749\n",
+       "16    0.583192\n",
+       "17    2.030024\n",
+       "18    2.710681\n",
+       "19    1.812942\n",
+       "20    3.231612\n",
+       "21    3.752544\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# equation 13 in paper\n",
+    "bgbb.conditional_expected_number_of_purchases_up_to_time(5,\n",
+    "    discrete_noncontract_df['frequency'],\n",
+    "    discrete_noncontract_df['recency'],\n",
+    "    discrete_noncontract_df['periods'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "7ea5dc42-160f-4f96-9e16-a97f01dd4bdc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0     0.070072\n",
+       "1     0.045012\n",
+       "2     0.165056\n",
+       "3     0.311927\n",
+       "4     0.440900\n",
+       "5     0.536651\n",
+       "6     0.602936\n",
+       "7     0.043038\n",
+       "8     0.193695\n",
+       "9     0.382108\n",
+       "10    0.521365\n",
+       "11    0.602936\n",
+       "12    0.061566\n",
+       "13    0.284864\n",
+       "14    0.496916\n",
+       "15    0.602936\n",
+       "16    0.129719\n",
+       "17    0.451538\n",
+       "18    0.602936\n",
+       "19    0.338249\n",
+       "20    0.602936\n",
+       "21    0.602936\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# equation 11 in paper\n",
+    "bgbb.conditional_probability_alive(10,\n",
+    "    discrete_noncontract_df['frequency'],\n",
+    "    discrete_noncontract_df['recency'],\n",
+    "    discrete_noncontract_df['periods'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "96bcab46-7279-400a-82c1-e8b509ece774",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>model</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>frequency</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3195.925987</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1560.549020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>964.135361</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>668.795916</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>497.960966</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>389.113685</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>314.983874</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 model\n",
+       "frequency             \n",
+       "0          3195.925987\n",
+       "1          1560.549020\n",
+       "2           964.135361\n",
+       "3           668.795916\n",
+       "4           497.960966\n",
+       "5           389.113685\n",
+       "6           314.983874"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# TODO: write and test (8) as a replacement. Compare against just aggregating means across the exploded DF \n",
+    "# TODO: Can the arviz functions in the BetaGeoBetaBinom distribution block preclude the need for this?\n",
+    "# TODO: Replace this with (9) or (10) in a future PR, since that expression can predict interval ranges\n",
+    "\n",
+    "# equation 7 in paper, but that's for probabilities. should it be 8 for predicting mean n?\n",
+    "# yeah, this function should be renamed for clarity. \n",
+    "# it distributes customers in the dataset across n transaction opportunies\n",
+    "# it works better as an evaluation function, since it assumes a fixed customer population size\n",
+    "# if n > n_periods, it will keep right on predicting. This may be a bug\n",
+    "bgbb.expected_number_of_transactions_in_first_n_periods(n=50)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9d55e986-d1f2-4c0d-8c25-3e289e90d5fe",
+   "metadata": {},
+   "source": [
+    "### Expected transactions in N periods\n",
+    "This expression will blow up to inf with large values of n (n=167 in this example). Recalculating on the log scale will allow for larger values, but this isn't possible if gamma < 1 because term1 will be negative."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "2e82f5b4-1b4a-4477-843b-58cbd411d348",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "average of 1.938137499995133 purchases expected in 5 opportunities\n"
+     ]
+    }
+   ],
+   "source": [
+    "from scipy import special\n",
+    "from numpy import log,exp\n",
+    "\n",
+    "n = 5\n",
+    "alpha,beta,delta,gamma = bgbb._unload_params('alpha','beta','delta','gamma')\n",
+    "\n",
+    "# add a larger gamma value for testing\n",
+    "#gamma = .9\n",
+    "\n",
+    "log_scale = False\n",
+    "\n",
+    "if not log_scale:\n",
+    "    term1 = alpha/(alpha+beta)*delta/(gamma-1)\n",
+    "    term2 = 1-(special.gamma(gamma+delta))/special.gamma(gamma+delta+n)*(special.gamma(1+delta+n))/special.gamma(1+delta)\n",
+    "    expected_purchases_n_periods = term1 * term2\n",
+    "else:\n",
+    "    term1 = log(alpha/(alpha+beta)) + log(delta/(gamma-1))\n",
+    "    term2 = special.gammaln(gamma+delta) - special.gammaln(gamma+delta+n) + special.gammaln(1+delta+n) - special.gammaln(1+delta)\n",
+    "    expected_purchases_n_periods = exp(term1) - exp(term2)\n",
+    "\n",
+    "print(f'average of {expected_purchases_n_periods} purchases expected in {n} opportunities')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "5186cf4d-710d-4e85-bef9-b66ccced5586",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[1.2035223936080357,\n",
+       " 0.7497163581757648,\n",
+       " 2.7834419828877737,\n",
+       " 0.6567181695499797]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bgbb._unload_params('alpha','beta','delta','gamma')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "80d11cc8-98fb-426e-89b2-693f0a8d22fa",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}