diff --git a/docs/source/notebooks/mmm/mmm_budget_allocation_example.ipynb b/docs/source/notebooks/mmm/mmm_budget_allocation_example.ipynb index 89876275f..90bad1999 100644 --- a/docs/source/notebooks/mmm/mmm_budget_allocation_example.ipynb +++ b/docs/source/notebooks/mmm/mmm_budget_allocation_example.ipynb @@ -34,21 +34,6 @@ "Before delving into the specifics of budget allocation, the initial step is to install the PyMC-Marketing library and ascertain its version. This step will confirm support for the budget allocation function. The following pip command can be run on your Jupyter Notebook:" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "-LnDMZnXIh-x", - "outputId": "8d14a8c8-d5c7-4894-8333-eb437b69cd17" - }, - "outputs": [], - "source": [ - "!pip install pymc-marketing" - ] - }, { "cell_type": "markdown", "metadata": { @@ -119,7 +104,7 @@ }, "outputs": [], "source": [ - "name = '/pymc-marketing/data/budget_optimizer_model.nc'\n", + "name = \"/pymc-marketing/data/budget_optimizer_model.nc\"\n", "mmm = DelayedSaturatedMMM.load(name)" ] }, @@ -308,7 +293,7 @@ ], "source": [ "sigmoid_response_curve_fig = mmm.plot_direct_contribution_curves(\n", - "show_fit = True, method='michaelis-menten'\n", + " show_fit=True, method=\"michaelis-menten\"\n", ")" ] }, @@ -337,9 +322,13 @@ }, "outputs": [], "source": [ - "sigmoid_params = mmm.compute_channel_curve_optimization_parameters_original_scale(method='sigmoid')\n", + "sigmoid_params = mmm.compute_channel_curve_optimization_parameters_original_scale(\n", + " method=\"sigmoid\"\n", + ")\n", "\n", - "menten_params = mmm.compute_channel_curve_optimization_parameters_original_scale(method='michaelis-menten')" + "menten_params = mmm.compute_channel_curve_optimization_parameters_original_scale(\n", + " method=\"michaelis-menten\"\n", + ")" ] }, { @@ -431,15 +420,15 @@ }, "outputs": [], "source": [ - "total_budget = 5 #Imagine is 5K or 5M\n", - "#Define your channels\n", - "channels = ['x1','x2']\n", - "#The initial split per channel\n", + "total_budget = 5 # Imagine is 5K or 5M\n", + "# Define your channels\n", + "channels = [\"x1\", \"x2\"]\n", + "# The initial split per channel\n", "budget_per_channel = total_budget / len(channels)\n", - "#Initial budget per channel as dictionary.\n", + "# Initial budget per channel as dictionary.\n", "initial_budget_dict = {channel: budget_per_channel for channel in channels}\n", - "#bounds for each channel\n", - "min_budget, max_budget = 1,5\n", + "# bounds for each channel\n", + "min_budget, max_budget = 1, 5\n", "budget_bounds = {channel: [min_budget, max_budget] for channel in channels}" ] }, @@ -733,10 +722,10 @@ ], "source": [ "result_sigmoid = mmm.optimize_channel_budget_for_maximum_contribution(\n", - " method = 'sigmoid', #define saturation function\n", - " total_budget = total_budget,\n", - " parameters = sigmoid_params,\n", - " budget_bounds = budget_bounds\n", + " method=\"sigmoid\", # define saturation function\n", + " total_budget=total_budget,\n", + " parameters=sigmoid_params,\n", + " budget_bounds=budget_bounds,\n", ")\n", "\n", "result_sigmoid" @@ -1032,10 +1021,10 @@ ], "source": [ "result_menten = mmm.optimize_channel_budget_for_maximum_contribution(\n", - " method = 'michaelis-menten',\n", - " total_budget = total_budget,\n", - " parameters = menten_params,\n", - " budget_bounds = budget_bounds\n", + " method=\"michaelis-menten\",\n", + " total_budget=total_budget,\n", + " parameters=menten_params,\n", + " budget_bounds=budget_bounds,\n", ")\n", "result_menten" ] @@ -1059,18 +1048,16 @@ }, "outputs": [], "source": [ - "#Use the function `calculate_expected_contribution` to estimate\n", - "#the contribution of your initial budget based on the curve parameters.\n", + "# Use the function `calculate_expected_contribution` to estimate\n", + "# the contribution of your initial budget based on the curve parameters.\n", "initial_contribution = calculate_expected_contribution(\n", - " method='sigmoid',\n", - " parameters = sigmoid_params,\n", - " budget = initial_budget_dict\n", + " method=\"sigmoid\", parameters=sigmoid_params, budget=initial_budget_dict\n", ")\n", "\n", "# Initial budget & contribution dictionary\n", "initial_scenario = {\n", - " 'initial_contribution': initial_contribution,\n", - " 'initial_budget': initial_budget_dict\n", + " \"initial_contribution\": initial_contribution,\n", + " \"initial_budget\": initial_budget_dict,\n", "}" ] }, @@ -1114,9 +1101,11 @@ } ], "source": [ - "#Use the function `compare_budget_scenearios` to validate\n", - "#The estimated contribution from one scenario agains the other\n", - "figure_ = mmm.plot_budget_scenearios(base_data=initial_scenario, method='sigmoid', scenarios_data=[result_sigmoid])" + "# Use the function `compare_budget_scenearios` to validate\n", + "# The estimated contribution from one scenario agains the other\n", + "figure_ = mmm.plot_budget_scenearios(\n", + " base_data=initial_scenario, method=\"sigmoid\", scenarios_data=[result_sigmoid]\n", + ")" ] }, { @@ -1158,30 +1147,37 @@ }, "outputs": [], "source": [ - "#Initialize two variables to save the results and base conditions for each scenario.\n", + "# Initialize two variables to save the results and base conditions for each scenario.\n", "scenarios_result = []\n", "scenarios_base = []\n", "\n", "for scenario in np.array([0.6, 0.8, 1.2, 1.8]):\n", - " scenarios_result.append(\n", - " mmm.optimize_channel_budget_for_maximum_contribution(\n", - " method = 'sigmoid', #define saturation function\n", - " total_budget = total_budget * scenario,\n", - " parameters = sigmoid_params,\n", - " budget_bounds = {channel: [1, total_budget * scenario] for channel in channels}\n", - " ).to_dict()\n", - " )\n", - "\n", - " scenarios_base.append(\n", - " {'initial_contribution': calculate_expected_contribution(\n", - " method='sigmoid', #define saturation function\n", - " parameters = sigmoid_params,\n", - " budget = {channel: total_budget * scenario / len(channels) for channel in channels}\n", - " ),\n", - "\n", - " 'initial_budget': {channel: total_budget * scenario / len(channels) for channel in channels}\n", - " }\n", - " )" + " scenarios_result.append(\n", + " mmm.optimize_channel_budget_for_maximum_contribution(\n", + " method=\"sigmoid\", # define saturation function\n", + " total_budget=total_budget * scenario,\n", + " parameters=sigmoid_params,\n", + " budget_bounds={\n", + " channel: [1, total_budget * scenario] for channel in channels\n", + " },\n", + " ).to_dict()\n", + " )\n", + "\n", + " scenarios_base.append(\n", + " {\n", + " \"initial_contribution\": calculate_expected_contribution(\n", + " method=\"sigmoid\", # define saturation function\n", + " parameters=sigmoid_params,\n", + " budget={\n", + " channel: total_budget * scenario / len(channels)\n", + " for channel in channels\n", + " },\n", + " ),\n", + " \"initial_budget\": {\n", + " channel: total_budget * scenario / len(channels) for channel in channels\n", + " },\n", + " }\n", + " )" ] }, { @@ -1222,9 +1218,11 @@ } ], "source": [ - "#Use the function `compare_budget_scenearios` to validate\n", - "#The estimated contribution from one scenario agains the other\n", - "_figure = mmm.plot_budget_scenearios(base_data=initial_scenario, method='sigmoid', scenarios_data=scenarios_result)" + "# Use the function `compare_budget_scenearios` to validate\n", + "# The estimated contribution from one scenario agains the other\n", + "_figure = mmm.plot_budget_scenearios(\n", + " base_data=initial_scenario, method=\"sigmoid\", scenarios_data=scenarios_result\n", + ")" ] }, { @@ -1526,12 +1524,10 @@ ], "source": [ "platform_base_optimization = mmm.optimize_channel_budget_for_maximum_contribution(\n", - " method = 'sigmoid',\n", - " total_budget = total_budget,\n", - " parameters = sigmoid_params,\n", - " budget_bounds = {'x1':[0,1.5],\n", - " 'x2':[0,1.5]\n", - " }\n", + " method=\"sigmoid\",\n", + " total_budget=total_budget,\n", + " parameters=sigmoid_params,\n", + " budget_bounds={\"x1\": [0, 1.5], \"x2\": [0, 1.5]},\n", ")\n", "\n", "platform_base_optimization" @@ -1580,7 +1576,7 @@ ], "source": [ "sigmoid_response_curve_fig = mmm.plot_direct_contribution_curves(\n", - "show_fit = True, method='sigmoid', xlim_max=3\n", + " show_fit=True, method=\"sigmoid\", xlim_max=3\n", ")" ] }, @@ -1874,12 +1870,10 @@ ], "source": [ "platform_base_optimization = mmm.optimize_channel_budget_for_maximum_contribution(\n", - " method = 'sigmoid',\n", - " total_budget = total_budget,\n", - " parameters = sigmoid_params,\n", - " budget_bounds = {'x1':[0,1.2],\n", - " 'x2':[0,1.5]\n", - " }\n", + " method=\"sigmoid\",\n", + " total_budget=total_budget,\n", + " parameters=sigmoid_params,\n", + " budget_bounds={\"x1\": [0, 1.2], \"x2\": [0, 1.5]},\n", ")\n", "\n", "platform_base_optimization" @@ -1923,7 +1917,11 @@ } ], "source": [ - "_figure = mmm.plot_budget_scenearios(base_data=initial_scenario, method='sigmoid', scenarios_data=[platform_base_optimization])" + "_figure = mmm.plot_budget_scenearios(\n", + " base_data=initial_scenario,\n", + " method=\"sigmoid\",\n", + " scenarios_data=[platform_base_optimization],\n", + ")" ] }, { @@ -1952,15 +1950,6 @@ "\n", "Consequently, your engagements, feedback, and thoughts are not merely welcomed but actively solicited to make this tool as practical and universally applicable as possible." ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Hy7ueyLb4F4Q" - }, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/pymc_marketing/clv/distributions.py b/pymc_marketing/clv/distributions.py index e7c03f49f..d1bdebbae 100644 --- a/pymc_marketing/clv/distributions.py +++ b/pymc_marketing/clv/distributions.py @@ -350,7 +350,9 @@ class ParetoNBD(PositiveContinuous): Population-level distribution class for a continuous, non-contractual, Pareto/NBD process, based on Schmittlein, et al. in [2]_. - The likelihood function is derived from equations (22) and (23) of [3]_, with terms rearranged for numerical stability. + The likelihood function is derived from equations (22) and (23) of [3]_, with terms + rearranged for numerical stability. + The modified expression is provided below: .. math:: @@ -389,7 +391,7 @@ class ParetoNBD(PositiveContinuous): .. [3] Fader, Peter & G. S. Hardie, Bruce (2005). "A Note on Deriving the Pareto/NBD Model and Related Expressions." http://brucehardie.com/notes/009/pareto_nbd_derivations_2005-11-05.pdf - """ + """ # noqa: E501 rv_op = pareto_nbd diff --git a/pymc_marketing/clv/models/beta_geo.py b/pymc_marketing/clv/models/beta_geo.py index 2e6104770..dc78a8f1b 100644 --- a/pymc_marketing/clv/models/beta_geo.py +++ b/pymc_marketing/clv/models/beta_geo.py @@ -37,10 +37,12 @@ class BetaGeoModel(CLVModel): DataFrame containing the following columns: * `frequency`: number of repeat purchases (with possible values 0, 1, 2, ...) * `recency`: time between the first and the last purchase (with possible values 0, 1, 2, ...) - * `T`: time between the first purchase and the end of the observation period (with possible values 0, 1, 2, ...) + * `T`: time between the first purchase and the end of the observation + period (with possible values 0, 1, 2, ...) * `customer_id`: unique customer identifier model_config: dict, optional - Dictionary of model prior parameters. If not provided, the model will use default priors specified in the `default_model_config` class attribute. + Dictionary of model prior parameters. If not provided, the model will use default priors specified in + the `default_model_config` class attribute. sampler_config: dict, optional Dictionary of sampler parameters. Defaults to None. diff --git a/pymc_marketing/clv/models/gamma_gamma.py b/pymc_marketing/clv/models/gamma_gamma.py index 3bd2c8fda..efbd5b159 100644 --- a/pymc_marketing/clv/models/gamma_gamma.py +++ b/pymc_marketing/clv/models/gamma_gamma.py @@ -52,8 +52,8 @@ def expected_customer_spend( Eq 5 from [1], p.3 - Adapted from: https://github.com/CamDavidsonPilon/lifetimes/blob/aae339c5437ec31717309ba0ec394427e19753c4/lifetimes/fitters/gamma_gamma_fitter.py#L117 # noqa: E501 - """ + Adapted from: https://github.com/CamDavidsonPilon/lifetimes/blob/aae339c5437ec31717309ba0ec394427e19753c4/lifetimes/fitters/gamma_gamma_fitter.py#L117 + """ # noqa: E501 mean_transaction_value, frequency = to_xarray( customer_id, mean_transaction_value, frequency @@ -162,7 +162,8 @@ class GammaGammaModel(BaseGammaGammaModel): - mean_transaction_value: Mean transaction value of each customer. - frequency: Number of transactions observed for each customer. model_config: dict, optional - Dictionary of model prior parameters. If not provided, the model will use default priors specified in the `default_model_config` class attribute. + Dictionary of model prior parameters. If not provided, the model will use default priors specified in the + `default_model_config` class attribute. sampler_config: dict, optional Dictionary of sampler parameters. Defaults to None. @@ -295,7 +296,8 @@ class GammaGammaModelIndividual(BaseGammaGammaModel): coming from the same customer. - individual_transaction_value: Value of individual transactions. model_config: dict, optional - Dictionary of model prior parameters. If not provided, the model will use default priors specified in the `default_model_config` class attribute. + Dictionary of model prior parameters. If not provided, the model will use default priors specified in the + `default_model_config` class attribute. sampler_config: dict, optional Dictionary of sampler parameters. Defaults to None. diff --git a/pymc_marketing/clv/models/pareto_nbd.py b/pymc_marketing/clv/models/pareto_nbd.py index 5fec84e39..7bdf58ae3 100644 --- a/pymc_marketing/clv/models/pareto_nbd.py +++ b/pymc_marketing/clv/models/pareto_nbd.py @@ -68,7 +68,8 @@ class ParetoNBDModel(CLVModel): DataFrame containing the following columns: * `frequency`: number of repeat purchases * `recency`: time between the first and the last purchase - * `T`: time between the first purchase and the end of the observation period; model assumptions require T >= recency + * `T`: time between the first purchase and the end of the observation period. + Model assumptions require T >= recency * `customer_id`: unique customer identifier Along with optional covariate columns. @@ -328,7 +329,10 @@ def fit(self, fit_method: str = "map", **kwargs): # type: ignore # Suppress annoying warning with warnings.catch_warnings(): warnings.filterwarnings( - message="Optimization Warning: The Op hyp2f1 does not provide a C implementation. As well as being potentially slow, this also disables loop fusion.", + message=""" + Optimization Warning: The Op hyp2f1 does not provide a C implementation. + As well as being potentially slow, this also disables loop fusion. + """, action="ignore", category=UserWarning, ) @@ -460,7 +464,8 @@ def expected_purchases( * `customer_id`: unique customer identifier * `frequency`: number of repeat purchases * `recency`: time between the first and the last purchase - * `T`: time between the first purchase and the end of the observation period, model assumptions require T >= recency + * `T`: time between the first purchase and the end of the observation period. + Model assumptions require T >= recency * `future_t`: Number of time periods to predict expected purchases. * covariates: Purchase and dropout covariate columns if original model had any. If not provided, the method will use the fit dataset. @@ -532,7 +537,8 @@ def expected_probability_alive( * `customer_id`: unique customer identifier * `frequency`: number of repeat purchases * `recency`: time between the first and the last purchase - * `T`: time between the first purchase and the end of the observation period, model assumptions require T >= recency + * `T`: time between the first purchase and the end of the observation period. + Model assumptions require T >= recency * `future_t`: Number of time periods in the future to estimate alive probability; defaults to 0. * covariates: Purchase and dropout covariate columns if original model had any. If not provided, the method will use the fit dataset. @@ -602,9 +608,11 @@ def expected_purchase_probability( * `customer_id`: unique customer identifier * `frequency`: number of repeat purchases * `recency`: time between the first and the last purchase - * `T`: time between the first purchase and the end of the observation period, model assumptions require T >= recency + * `T`: time between the first purchase and the end of the observation period. + Model assumptions require T >= recency * `future_t`: Number of time periods to predict expected purchases. - * `n_purchases`: Number of purchases to predict probability for. Currently restricted to the same number for all customers. + * `n_purchases`: Number of purchases to predict probability for. + Currently restricted to the same number for all customers. * covariates: Purchase and dropout covariate columns if original model had any. If not provided, the method will use the fit dataset. n_purchases: int, optional @@ -822,7 +830,8 @@ def distribution_new_customer( "recency_frequency", ), ) -> xarray.Dataset: - """Utility function for posterior predictive sampling of dropout, purchase rate and frequency/recency of new customers. + """Utility function for posterior predictive sampling of dropout, purchase rate + and frequency/recency of new customers. In a model with covariates, if `data` is not specified, the dataset used for fitting will be used. A prediction will be computed for a new customer with each set of covariates. diff --git a/pymc_marketing/clv/models/shifted_beta_geo.py b/pymc_marketing/clv/models/shifted_beta_geo.py index d814eb984..c0dc673e5 100644 --- a/pymc_marketing/clv/models/shifted_beta_geo.py +++ b/pymc_marketing/clv/models/shifted_beta_geo.py @@ -31,7 +31,8 @@ class ShiftedBetaGeoModelIndividual(CLVModel): observation period * `T`: Maximum observed time period (starting at 0) model_config: dict, optional - Dictionary of model prior parameters. If not provided, the model will use default priors specified in the `default_model_config` class attribute. + Dictionary of model prior parameters. If not provided, the model will use default priors specified in the + `default_model_config` class attribute. sampler_config: dict, optional Dictionary of sampler parameters. Defaults to None. diff --git a/pymc_marketing/clv/utils.py b/pymc_marketing/clv/utils.py index 111e8a697..a6b91d4f5 100644 --- a/pymc_marketing/clv/utils.py +++ b/pymc_marketing/clv/utils.py @@ -511,9 +511,9 @@ def rfm_train_test_split( ] if training_transactions.empty: - raise ValueError( - "No data available. Check `test_transactions` and `train_period_end` and confirm values in `transactions` occur prior to those time periods." - ) + error_msg = """No data available. Check `test_transactions` and `train_period_end` + and confirm values in `transactions` occur prior to those time periods.""" + raise ValueError(error_msg) training_rfm_data = rfm_summary( training_transactions, @@ -535,9 +535,9 @@ def rfm_train_test_split( ].copy() if test_transactions.empty: - raise ValueError( - "No data available. Check `test_transactions` and `train_period_end` and confirm values in `transactions` occur prior to those time periods." - ) + error_msg = """No data available. Check `test_transactions` and `train_period_end` + and confirm values in `transactions` occur prior to those time periods.""" + raise ValueError(error_msg) test_transactions[datetime_col] = test_transactions[datetime_col].dt.to_period( time_unit diff --git a/pymc_marketing/mmm/base.py b/pymc_marketing/mmm/base.py index 3043e6f01..cf39d31a4 100644 --- a/pymc_marketing/mmm/base.py +++ b/pymc_marketing/mmm/base.py @@ -89,13 +89,15 @@ def validation_methods( A property that provides validation methods for features ("X") and the target variable ("y"). This property scans the methods of the object and returns those marked for validation. - The methods are marked by having a _tags dictionary attribute, with either "validation_X" or "validation_y" set to True. - The "validation_X" tag indicates a method used for validating features, and "validation_y" indicates a method used for validating the target variable. + The methods are marked by having a _tags dictionary attribute,with either "validation_X" or "validation_y" + set to True. The "validation_X" tag indicates a method used for validating features, and "validation_y" + indicates a method used for validating the target variable. Returns ------- tuple of list of Callable[["BaseMMM", pd.DataFrame], None] - A tuple where the first element is a list of methods for "X" validation, and the second element is a list of methods for "y" validation. + A tuple where the first element is a list of methods for "X" validation, and the second element is + a list of methods for "y" validation. """ return ( @@ -164,13 +166,15 @@ def preprocessing_methods( A property that provides preprocessing methods for features ("X") and the target variable ("y"). This property scans the methods of the object and returns those marked for preprocessing. - The methods are marked by having a _tags dictionary attribute, with either "preprocessing_X" or "preprocessing_y" set to True. - The "preprocessing_X" tag indicates a method used for preprocessing features, and "preprocessing_y" indicates a method used for preprocessing the target variable. + The methods are marked by having a _tags dictionary attribute, with either "preprocessing_X" + or "preprocessing_y" set to True. The "preprocessing_X" tag indicates a method used for preprocessing + features, and "preprocessing_y" indicates a method used for preprocessing the target variable. Returns ------- tuple of list of Callable[["BaseMMM", pd.DataFrame], pd.DataFrame] - A tuple where the first element is a list of methods for "X" preprocessing, and the second element is a list of methods for "y" preprocessing. + A tuple where the first element is a list of methods for "X" preprocessing, and the second element is a + list of methods for "y" preprocessing. """ return ( [ @@ -191,8 +195,9 @@ def preprocess( """ Preprocess the provided data according to the specified target. - This method applies preprocessing methods to the data ("X" or "y"), which are specified in the preprocessing_methods property of this object. - It iteratively applies each method in the appropriate list (either for "X" or "y") to the data. + This method applies preprocessing methods to the data ("X" or "y"), which are specified in the + preprocessing_methods property of this object. It iteratively applies each method in the appropriate + list (either for "X" or "y") to the data. Parameters ---------- @@ -506,12 +511,14 @@ def _estimate_budget_contribution_fit( and calculates the lower and upper bounds of the contribution fit. The function is used in the `plot_budget_scenearios` function to estimate the contribution fit for each channel - and budget scenario. The estimated fit is then used to plot the contribution optimization bounds for each scenario. + and budget scenario. The estimated fit is then used to plot the contribution optimization bounds + for each scenario. Parameters ---------- method : str - The method used to fit the contribution & spent non-linear relationship. It can be either 'sigmoid' or 'michaelis-menten'. + The method used to fit the contribution & spent non-linear relationship. + It can be either 'sigmoid' or 'michaelis-menten'. channel : str The name of the channel for which the contribution fit is being estimated. budget : float @@ -575,7 +582,8 @@ def _plot_scenario( ax : matplotlib.axes.Axes The axes on which to plot the scenario. data : dict - Dictionary containing the data for the scenario. Keys are the names of the channels and values are the corresponding values. + Dictionary containing the data for the scenario. + Keys are the names of the channels and values are the corresponding values. label : str Label for the scenario. color : str @@ -585,9 +593,11 @@ def _plot_scenario( bar_width: float Bar width. upper_bound : dict, optional - Dictionary containing the upper bounds for the data. Keys should match those in the `data` dictionary. Only used if `contribution` is True. + Dictionary containing the upper bounds for the data. Keys should match those in the `data` dictionary. + Only used if `contribution` is True. lower_bound : dict, optional - Dictionary containing the lower bounds for the data. Keys should match those in the `data` dictionary. Only used if `contribution` is True. + Dictionary containing the lower bounds for the data. Keys should match those in the `data` dictionary. + Only used if `contribution` is True. contribution : bool, optional If True, plot the upper and lower bounds for the data. Default is False. @@ -731,13 +741,15 @@ def _plot_response_curve_fit( """ Plot the curve fit for the given channel based on the estimation of the parameters. - The function computes the mean channel contributions, estimates the parameters based on the specified method (either 'sigmoid' or 'michaelis-menten'), and plots - the curve fit. An inflection point on the curve is also highlighted. + The function computes the mean channel contributions, estimates the parameters based on the specified method + (either 'sigmoid' or 'michaelis-menten'), and plots the curve fit. An inflection point on the curve is + also highlighted. Parameters ---------- x : np.ndarray - The x-axis data, usually representing the amount of input (e.g., substrate concentration in enzymology terms). + The x-axis data, usually representing the amount of + input (e.g., substrate concentration in enzymology terms). ax : plt.Axes The matplotlib axes object where the plot should be drawn. channel : str @@ -747,7 +759,8 @@ def _plot_response_curve_fit( xlim_max: int The maximum value to be plot on the X-axis method: str - The method used to fit the contribution & spent non-linear relationship. It can be either 'sigmoid' or 'michaelis-menten'. + The method used to fit the contribution & spent non-linear relationship. + It can be either 'sigmoid' or 'michaelis-menten'. Returns ------- @@ -855,18 +868,21 @@ def optimize_channel_budget_for_maximum_contribution( parameters: Dict[str, Tuple[float, float]], ) -> pd.DataFrame: """ - Experimental: Optimize the allocation of a given total budget across multiple channels to maximize the expected contribution. + Experimental: Optimize the allocation of a given total budget across multiple + channels to maximize the expected contribution. The optimization is based on the method provided, where each channel's contribution follows a saturating function of its allocated budget. The function seeks the budget allocation - that maximizes the total expected contribution across all channels. The method can be either 'sigmoid' or 'michaelis-menten'. + that maximizes the total expected contribution across all channels. + The method can be either 'sigmoid' or 'michaelis-menten'. Parameters ---------- total_budget : int, required The total budget to be distributed across channels. method : str, required - The method used to fit the contribution & spent non-linear relationship. It can be either 'sigmoid' or 'michaelis-menten'. + The method used to fit the contribution & spent non-linear relationship. + It can be either 'sigmoid' or 'michaelis-menten'. parameters : Dict, required A dictionary where keys are channel names and values are tuples (L, k) representing the parameters for each channel based on the method used. @@ -913,13 +929,15 @@ def compute_channel_curve_optimization_parameters_original_scale( """ Experimental: Estimate the parameters for the saturating function of each channel's contribution. - The function estimates the parameters (alpha, constant) for each channel based on the specified method (either 'sigmoid' or 'michaelis-menten'). - These parameters represent the maximum possible contribution (alpha) and the constant parameter which vary their definition based on the function (constant) for each channel. + The function estimates the parameters (alpha, constant) for each channel based on the specified method + (either 'sigmoid' or 'michaelis-menten'). These parameters represent the maximum possible contribution (alpha) + and the constant parameter which vary their definition based on the function (constant) for each channel. Parameters ---------- method : str, required - The method used to fit the contribution & spent non-linear relationship. It can be either 'sigmoid' or 'michaelis-menten'. + The method used to fit the contribution & spent non-linear relationship. + It can be either 'sigmoid' or 'michaelis-menten'. Returns ------- @@ -967,7 +985,8 @@ def plot_direct_contribution_curves( xlim_max : int, optional The maximum value to be plot on the X-axis. If not provided, the maximum value in the data will be used. method : str, optional - The method used to fit the contribution & spent non-linear relationship. It can be either 'sigmoid' or 'michaelis-menten'. Defaults to 'sigmoid'. + The method used to fit the contribution & spent non-linear relationship. + It can be either 'sigmoid' or 'michaelis-menten'. Defaults to 'sigmoid'. channels : List[str], optional A list of channels to plot. If not provided, all channels will be plotted. same_axes : bool, optional diff --git a/pymc_marketing/mmm/delayed_saturated_mmm.py b/pymc_marketing/mmm/delayed_saturated_mmm.py index 8645f63d9..85e8707e0 100644 --- a/pymc_marketing/mmm/delayed_saturated_mmm.py +++ b/pymc_marketing/mmm/delayed_saturated_mmm.py @@ -63,9 +63,11 @@ def __init__( channel_columns : List[str] Column names of the media channel variables. model_config : Dictionary, optional - dictionary of parameters that initialise model configuration. Class-default defined by the user default_model_config method. + dictionary of parameters that initialise model configuration. + Class-default defined by the user default_model_config method. sampler_config : Dictionary, optional - dictionary of parameters that initialise sampler configuration. Class-default defined by the user default_sampler_config method. + dictionary of parameters that initialise sampler configuration. + Class-default defined by the user default_sampler_config method. validate_data : bool, optional Whether to validate the data before fitting to model, by default True. control_columns : Optional[List[str]], optional @@ -210,7 +212,10 @@ def _create_likelihood_distribution( if dist["dist"] not in allowed_distributions: raise ValueError( - f"The distribution used for the likelihood is not allowed. Please, use one of the following distributions: {allowed_distributions}." + f""" + The distribution used for the likelihood is not allowed. + Please, use one of the following distributions: {allowed_distributions}. + """ ) # Validate that 'kwargs' is present and is a dictionary @@ -242,7 +247,10 @@ def _create_likelihood_distribution( parameter_distributions[param] = param_config else: raise ValueError( - f"Invalid parameter configuration for '{param}'. It must be either a dictionary with a 'dist' key or a numeric value." + f""" + Invalid parameter configuration for '{param}'. + It must be either a dictionary with a 'dist' key or a numeric value. + """ ) # Extract the likelihood distribution name and instantiate it @@ -593,9 +601,9 @@ def load(cls, fname: str): model.build_model(X, y) # All previously used data is in idata. if model.id != idata.attrs["id"]: - raise ValueError( - f"The file '{fname}' does not contain an inference data of the same model or configuration as '{cls._model_type}'" - ) + error_msg = f"""The file '{fname}' does not contain an inference data of the same model + or configuration as '{cls._model_type}'""" + raise ValueError(error_msg) return model @@ -689,8 +697,9 @@ def identity(x): @classmethod def _model_config_formatting(cls, model_config: Dict) -> Dict: """ - Because of json serialization, model_config values that were originally tuples or numpy are being encoded as lists. - This function converts them back to tuples and numpy arrays to ensure correct id encoding. + Because of json serialization, model_config values that were originally tuples + or numpy are being encoded as lists. This function converts them back to tuples + and numpy arrays to ensure correct id encoding. """ def format_nested_dict(d: Dict) -> Dict: @@ -736,12 +745,14 @@ class DelayedSaturatedMMM( This enable us to have a more stable model and better convergence. If control variables are present, we do not scale them! If needed please do it before passing the data to the model. - 2. We allow to add yearly seasonality controls as Fourier modes. You can use the `yearly_seasonality` parameter to specify the number of Fourier modes to include. + 2. We allow to add yearly seasonality controls as Fourier modes. + You can use the `yearly_seasonality` parameter to specify the number of Fourier modes to include. 3. This class also allow us to calibrate the model using: - - Custom priors for the parameters via the `model_config` parameter. You can also set the likelihood distribution. - - Adding lift tests to the likelihood function via the :meth:`add_lift_test_measurements ` method. + * Custom priors for the parameters via the `model_config` parameter. You can also set the likelihood distribution. + + * Adding lift tests to the likelihood function via the :meth:`add_lift_test_measurements ` method. For details on a vanilla implementation in PyMC, see [2]_. @@ -830,7 +841,7 @@ class DelayedSaturatedMMM( ---------- .. [1] Jin, Yuxue, et al. “Bayesian methods for media mix modeling with carryover and shape effects.” (2017). .. [2] Orduz, J. `"Media Effect Estimation with PyMC: Adstock, Saturation & Diminishing Returns" `_. - """ + """ # noqa: E501 def channel_contributions_forward_pass( self, channel_data: npt.NDArray[np.float_] @@ -1237,8 +1248,8 @@ def sample_posterior_predictive( data in order to carry over costs with the adstock transformation. Assumes that X_pred are the next predictions following the training data. Defaults to False. - original_scale: Boolean determining whether to return the predictions in the original scale of the target variable. - Defaults to True. + original_scale: Boolean determining whether to return the predictions in the original scale + of the target variable. Defaults to True. **sample_posterior_predictive_kwargs: Additional arguments to pass to pymc.sample_posterior_predictive Returns diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py index a4e8b515d..066aeaf63 100644 --- a/pymc_marketing/mmm/transformers.py +++ b/pymc_marketing/mmm/transformers.py @@ -65,11 +65,14 @@ def batched_convolution( axis : int The axis of ``x`` along witch to apply the convolution mode : ConvMode, optional - The convolution mode determines how the convolution is applied at the boundaries of the input signal, denoted as "x." The default mode is ConvMode.Before. + The convolution mode determines how the convolution is applied at the boundaries + of the input signal, denoted as "x." The default mode is ConvMode.Before. - ConvMode.After: Applies the convolution with the "Adstock" effect, resulting in a trailing decay effect. - - ConvMode.Before: Applies the convolution with the "Excitement" effect, creating a leading effect similar to the wow factor. - - ConvMode.Overlap: Applies the convolution with both "Pull-Forward" and "Pull-Backward" effects, where the effect overlaps with both preceding and succeeding elements. + - ConvMode.Before: Applies the convolution with the "Excitement" effect, creating a leading effect + similar to the wow factor. + - ConvMode.Overlap: Applies the convolution with both "Pull-Forward" and "Pull-Backward" effects, + where the effect overlaps with both preceding and succeeding elements. Returns ------- @@ -285,7 +288,8 @@ def weibull_adstock( ): R"""Weibull Adstocking Transformation. - This transformation is similar to geometric adstock transformation but has more degrees of freedom, adding more flexibility. + This transformation is similar to geometric adstock transformation but has more + degrees of freedom, adding more flexibility. .. plot:: :context: close-figs @@ -509,7 +513,7 @@ def tanh_saturation( References ---------- See https://www.pymc-labs.io/blog-posts/reducing-customer-acquisition-costs-how-we-helped-optimizing-hellofreshs-marketing-budget/ # noqa: E501 - """ + """ # noqa: E501 return b * pt.tanh(x / (b * c)) @@ -673,7 +677,9 @@ def michaelis_menten( r""" Evaluate the Michaelis-Menten function for given values of x, alpha, and lambda. - The Michaelis-Menten function models enzyme kinetics and describes how the rate of a chemical reaction increases with substrate concentration until it reaches its maximum value. + The Michaelis-Menten function models enzyme kinetics and describes how the rate of + a chemical reaction increases with substrate concentration until it reaches its + maximum value. .. math:: \alpha \cdot \frac{x}{\lambda + x} diff --git a/pymc_marketing/mmm/utils.py b/pymc_marketing/mmm/utils.py index 3d9b89398..489db6395 100644 --- a/pymc_marketing/mmm/utils.py +++ b/pymc_marketing/mmm/utils.py @@ -216,8 +216,8 @@ def standardize_scenarios_dict_keys(d: Dict, keywords: List[str]): """ Standardize the keys in a dictionary based on a list of keywords. - This function iterates over the keys in the dictionary and the keywords. If a keyword is found in a key (case-insensitive), - the key is replaced with the keyword. + This function iterates over the keys in the dictionary and the keywords. + If a keyword is found in a key (case-insensitive), the key is replaced with the keyword. Parameters ---------- diff --git a/pymc_marketing/model_builder.py b/pymc_marketing/model_builder.py index 875786f55..325145584 100644 --- a/pymc_marketing/model_builder.py +++ b/pymc_marketing/model_builder.py @@ -65,9 +65,11 @@ def __init__( data : Dictionary, optional It is the data we need to train the model on. model_config : Dictionary, optional - dictionary of parameters that initialise model configuration. Class-default defined by the user default_model_config method. + dictionary of parameters that initialise model configuration. + Class-default defined by the user default_model_config method. sampler_config : Dictionary, optional - dictionary of parameters that initialise sampler configuration. Class-default defined by the user default_sampler_config method. + dictionary of parameters that initialise sampler configuration. + Class-default defined by the user default_sampler_config method. Examples -------- >>> class MyModel(ModelBuilder): @@ -349,8 +351,9 @@ def save(self, fname: str) -> None: @classmethod def _model_config_formatting(cls, model_config: Dict) -> Dict: """ - Because of json serialization, model_config values that were originally tuples or numpy are being encoded as lists. - This function converts them back to tuples and numpy arrays to ensure correct id encoding. + Because of json serialization, model_config values that were originally tuples + or numpy are being encoded as lists. This function converts them back to tuples + and numpy arrays to ensure correct id encoding. """ for key in model_config: if isinstance(model_config[key], dict): @@ -412,9 +415,9 @@ def load(cls, fname: str): # All previously used data is in idata. if model.id != idata.attrs["id"]: - raise ValueError( - f"The file '{fname}' does not contain an inference data of the same model or configuration as '{cls._model_type}'" - ) + error_msg = f"""The file '{fname}' does not contain an inference data of the same model + or configuration as '{cls._model_type}'""" + raise ValueError(error_msg) return model @@ -442,7 +445,8 @@ def fit( Specifies whether the fit progressbar should be displayed predictor_names: Optional[List[str]] = None, Allows for custom naming of predictors given in a form of 2dArray - allows for naming of predictors when given in a form of np.ndarray, if not provided the predictors will be named like predictor1, predictor2... + Allows for naming of predictors when given in a form of np.ndarray, if not provided + the predictors will be named like predictor1, predictor2... random_seed : Optional[RandomState] Provides sampler with initial random seed for obtaining reproducible samples **kwargs : Any @@ -714,8 +718,8 @@ def id(self) -> str: """ Generate a unique hash value for the model. - The hash value is created using the last 16 characters of the SHA256 hash encoding, based on the model configuration, - version, and model type. + The hash value is created using the last 16 characters of the SHA256 hash encoding, + based on the model configuration, version, and model type. Returns ------- diff --git a/pyproject.toml b/pyproject.toml index 8e5b4e708..b27f53130 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,9 +60,9 @@ repository = "https://github.com/pymc-labs/pymc-marketing" [tool.ruff.lint] select = ["E", "F", "I", "W"] -ignore = [ - "E501", # Line too long -] + +[tool.ruff.lint.pycodestyle] +max-line-length = 120 [tool.pytest.ini_options] addopts = [ diff --git a/tests/clv/test_utils.py b/tests/clv/test_utils.py index ea468e3d1..798f8f6c7 100644 --- a/tests/clv/test_utils.py +++ b/tests/clv/test_utils.py @@ -763,10 +763,10 @@ def test_rfm_train_test_split_throws_better_error_if_test_period_end_is_too_earl test_end = "2014-02-07" - with pytest.raises( - ValueError, - match="No data available. Check `test_transactions` and `train_period_end` and confirm values in `transactions` occur prior to those time periods.", - ): + error_msg = """No data available. Check `test_transactions` and `train_period_end` + and confirm values in `transactions` occur prior to those time periods.""" + + with pytest.raises(ValueError, match=error_msg): rfm_train_test_split( transaction_data, "id", "date", train_end, test_period_end=test_end ) diff --git a/tests/mmm/test_delayed_saturated_mmm.py b/tests/mmm/test_delayed_saturated_mmm.py index d7827c61b..eaca2768b 100644 --- a/tests/mmm/test_delayed_saturated_mmm.py +++ b/tests/mmm/test_delayed_saturated_mmm.py @@ -537,10 +537,11 @@ def mock_property(self): DSMMM.save("test_model") # Apply the monkeypatch for the property monkeypatch.setattr(DelayedSaturatedMMM, "id", property(mock_property)) - with pytest.raises( - ValueError, - match="The file 'test_model' does not contain an inference data of the same model or configuration as 'DelayedSaturatedMMM'", - ): + + error_msg = """The file 'test_model' does not contain an inference data of the same model + or configuration as 'DelayedSaturatedMMM'""" + + with pytest.raises(ValueError, match=error_msg): DelayedSaturatedMMM.load("test_model") os.remove("test_model")