Skip to content

Commit

Permalink
dev notebook added
Browse files Browse the repository at this point in the history
  • Loading branch information
ColtAllen committed Dec 11, 2023
1 parent 0aac157 commit 2bac71d
Showing 1 changed file with 383 additions and 0 deletions.
383 changes: 383 additions & 0 deletions docs/source/notebooks/clv/dev/beta_geo_beta_binom.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,383 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "5e06e043-4631-47ae-a658-a9a928ff15e5",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from lifetimes import BetaGeoBetaBinomFitter\n",
"from lifetimes.datasets import load_donations, load_cdnow_summary_data_with_monetary_value"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8ff42e71-fc43-4d45-8446-7205e3d37bce",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ -3.94031398, -10.25427751, -6.82582822])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"value = np.array([[1.5, 1], [5.3, 4], [6, 2]])\n",
"alpha = 0.55\n",
"beta = 10.58\n",
"gamma = 0.61\n",
"delta = 11.67\n",
"T = 12\n",
"\n",
"BetaGeoBetaBinomFitter._loglikelihood((alpha, beta, gamma, delta), value[..., 1], value[..., 0], T)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "371bb7a1-9f5c-4bdf-81b1-a9504261badb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<lifetimes.BetaGeoBetaBinomFitter: fitted with 22 subjects, alpha: 1.20, beta: 0.75, delta: 2.78, gamma: 0.66>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"discrete_noncontract_df = load_donations()\n",
"\n",
"periods = 6\n",
"bgbb = BetaGeoBetaBinomFitter().fit(discrete_noncontract_df['frequency'].values,\n",
" discrete_noncontract_df['recency'].values,\n",
" discrete_noncontract_df['periods'].values,\n",
" discrete_noncontract_df['weights'].values)\n",
"bgbb"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f8052f01-5aca-48fd-917e-1f2bbeb6326f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['conditional_expected_number_of_purchases_up_to_time', 'conditional_probability_alive', 'expected_number_of_transactions_in_first_n_periods', 'fit', 'load_model', 'save_model', 'summary']\n"
]
}
],
"source": [
"method_list = [method for method in dir(BetaGeoBetaBinomFitter) if not method.startswith('_')]\n",
"print(method_list)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f4f56b83-f830-4610-8f4f-e67ff242967e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0.072863\n",
"1 0.085696\n",
"2 0.314238\n",
"3 0.593853\n",
"4 0.839396\n",
"5 1.021689\n",
"6 1.147885\n",
"7 0.119121\n",
"8 0.536111\n",
"9 1.057604\n",
"10 1.443042\n",
"11 1.668817\n",
"12 0.223595\n",
"13 1.034572\n",
"14 1.804703\n",
"15 2.189749\n",
"16 0.583192\n",
"17 2.030024\n",
"18 2.710681\n",
"19 1.812942\n",
"20 3.231612\n",
"21 3.752544\n",
"dtype: float64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# equation 13 in paper\n",
"bgbb.conditional_expected_number_of_purchases_up_to_time(5,\n",
" discrete_noncontract_df['frequency'],\n",
" discrete_noncontract_df['recency'],\n",
" discrete_noncontract_df['periods'])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "7ea5dc42-160f-4f96-9e16-a97f01dd4bdc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0.070072\n",
"1 0.045012\n",
"2 0.165056\n",
"3 0.311927\n",
"4 0.440900\n",
"5 0.536651\n",
"6 0.602936\n",
"7 0.043038\n",
"8 0.193695\n",
"9 0.382108\n",
"10 0.521365\n",
"11 0.602936\n",
"12 0.061566\n",
"13 0.284864\n",
"14 0.496916\n",
"15 0.602936\n",
"16 0.129719\n",
"17 0.451538\n",
"18 0.602936\n",
"19 0.338249\n",
"20 0.602936\n",
"21 0.602936\n",
"dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# equation 11 in paper\n",
"bgbb.conditional_probability_alive(10,\n",
" discrete_noncontract_df['frequency'],\n",
" discrete_noncontract_df['recency'],\n",
" discrete_noncontract_df['periods'])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "96bcab46-7279-400a-82c1-e8b509ece774",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>model</th>\n",
" </tr>\n",
" <tr>\n",
" <th>frequency</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3195.925987</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1560.549020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>964.135361</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>668.795916</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>497.960966</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>389.113685</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>314.983874</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" model\n",
"frequency \n",
"0 3195.925987\n",
"1 1560.549020\n",
"2 964.135361\n",
"3 668.795916\n",
"4 497.960966\n",
"5 389.113685\n",
"6 314.983874"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# TODO: write and test (8) as a replacement. Compare against just aggregating means across the exploded DF \n",
"# TODO: Can the arviz functions in the BetaGeoBetaBinom distribution block preclude the need for this?\n",
"# TODO: Replace this with (9) or (10) in a future PR, since that expression can predict interval ranges\n",
"\n",
"# equation 7 in paper, but that's for probabilities. should it be 8 for predicting mean n?\n",
"# yeah, this function should be renamed for clarity. \n",
"# it distributes customers in the dataset across n transaction opportunies\n",
"# it works better as an evaluation function, since it assumes a fixed customer population size\n",
"# if n > n_periods, it will keep right on predicting. This may be a bug\n",
"bgbb.expected_number_of_transactions_in_first_n_periods(n=50)"
]
},
{
"cell_type": "markdown",
"id": "9d55e986-d1f2-4c0d-8c25-3e289e90d5fe",
"metadata": {},
"source": [
"### Expected transactions in N periods\n",
"This expression will blow up to inf with large values of n (n=167 in this example). Recalculating on the log scale will allow for larger values, but this isn't possible if gamma < 1 because term1 will be negative."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "2e82f5b4-1b4a-4477-843b-58cbd411d348",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"average of 1.938137499995133 purchases expected in 5 opportunities\n"
]
}
],
"source": [
"from scipy import special\n",
"from numpy import log,exp\n",
"\n",
"n = 5\n",
"alpha,beta,delta,gamma = bgbb._unload_params('alpha','beta','delta','gamma')\n",
"\n",
"# add a larger gamma value for testing\n",
"#gamma = .9\n",
"\n",
"log_scale = False\n",
"\n",
"if not log_scale:\n",
" term1 = alpha/(alpha+beta)*delta/(gamma-1)\n",
" term2 = 1-(special.gamma(gamma+delta))/special.gamma(gamma+delta+n)*(special.gamma(1+delta+n))/special.gamma(1+delta)\n",
" expected_purchases_n_periods = term1 * term2\n",
"else:\n",
" term1 = log(alpha/(alpha+beta)) + log(delta/(gamma-1))\n",
" term2 = special.gammaln(gamma+delta) - special.gammaln(gamma+delta+n) + special.gammaln(1+delta+n) - special.gammaln(1+delta)\n",
" expected_purchases_n_periods = exp(term1) - exp(term2)\n",
"\n",
"print(f'average of {expected_purchases_n_periods} purchases expected in {n} opportunities')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "5186cf4d-710d-4e85-bef9-b66ccced5586",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[1.2035223936080357,\n",
" 0.7497163581757648,\n",
" 2.7834419828877737,\n",
" 0.6567181695499797]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bgbb._unload_params('alpha','beta','delta','gamma')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80d11cc8-98fb-426e-89b2-693f0a8d22fa",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 2bac71d

Please sign in to comment.