diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 62851c13c..d8d63773b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ ci: repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.5 + rev: v0.3.7 hooks: - id: ruff args: ["--fix", "--output-format=full"] diff --git a/docs/source/notebooks/clv/sBG.ipynb b/docs/source/notebooks/clv/sBG.ipynb index b83cffa7e..093319180 100644 --- a/docs/source/notebooks/clv/sBG.ipynb +++ b/docs/source/notebooks/clv/sBG.ipynb @@ -1169,8 +1169,8 @@ "source": [ "weeks = xr.DataArray(np.arange(1, 14), dims=[\"weeks\"])\n", "\n", - "survival_perc_highend = 100-100*((churn_highend < weeks))\n", - "survival_perc_regular = 100-100*((churn_regular < weeks))" + "survival_perc_highend = 100-100*(churn_highend < weeks)\n", + "survival_perc_regular = 100-100*(churn_regular < weeks)" ] }, { diff --git a/pymc_marketing/clv/models/basic.py b/pymc_marketing/clv/models/basic.py index 29f48815a..997e96ffd 100644 --- a/pymc_marketing/clv/models/basic.py +++ b/pymc_marketing/clv/models/basic.py @@ -1,7 +1,8 @@ import json import warnings +from collections.abc import Sequence from pathlib import Path -from typing import Dict, Optional, Sequence, cast +from typing import cast import arviz as az import pandas as pd @@ -21,8 +22,8 @@ def __init__( self, data: pd.DataFrame, *, - model_config: Optional[Dict] = None, - sampler_config: Optional[Dict] = None, + model_config: dict | None = None, + sampler_config: dict | None = None, ): super().__init__(model_config, sampler_config) self.data = data @@ -211,18 +212,18 @@ def thin_fit_result(self, keep_every: int): return type(self)._build_with_idata(new_idata) @staticmethod - def _create_distribution(dist: Dict, shape=()): + def _create_distribution(dist: dict, shape=()): try: return getattr(pm, dist["dist"]).dist(**dist.get("kwargs", {}), shape=shape) except AttributeError: raise ValueError(f"Distribution {dist['dist']} does not exist in PyMC") @property - def default_sampler_config(self) -> Dict: + def default_sampler_config(self) -> dict: return {} @property - def _serializable_model_config(self) -> Dict: + def _serializable_model_config(self) -> dict: return self.model_config @property diff --git a/pymc_marketing/clv/models/beta_geo.py b/pymc_marketing/clv/models/beta_geo.py index dc78a8f1b..83fd57304 100644 --- a/pymc_marketing/clv/models/beta_geo.py +++ b/pymc_marketing/clv/models/beta_geo.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Sequence, Union +from collections.abc import Sequence import numpy as np import pandas as pd @@ -117,8 +117,8 @@ class BetaGeoModel(CLVModel): def __init__( self, data: pd.DataFrame, - model_config: Optional[Dict] = None, - sampler_config: Optional[Dict] = None, + model_config: dict | None = None, + sampler_config: dict | None = None, ): self._validate_cols( data, @@ -132,7 +132,7 @@ def __init__( ) @property - def default_model_config(self) -> Dict[str, Dict]: + def default_model_config(self) -> dict[str, dict]: return { "a_prior": {"dist": "HalfFlat", "kwargs": {}}, "b_prior": {"dist": "HalfFlat", "kwargs": {}}, @@ -211,11 +211,11 @@ def _unload_params(self): # taken from https://lifetimes.readthedocs.io/en/latest/lifetimes.fitters.html def expected_num_purchases( self, - customer_id: Union[np.ndarray, pd.Series], - t: Union[np.ndarray, pd.Series, TensorVariable], - frequency: Union[np.ndarray, pd.Series, TensorVariable], - recency: Union[np.ndarray, pd.Series, TensorVariable], - T: Union[np.ndarray, pd.Series, TensorVariable], + customer_id: np.ndarray | pd.Series, + t: np.ndarray | pd.Series | TensorVariable, + frequency: np.ndarray | pd.Series | TensorVariable, + recency: np.ndarray | pd.Series | TensorVariable, + T: np.ndarray | pd.Series | TensorVariable, ) -> xr.DataArray: r""" Given a purchase history/profile of :math:`x` and :math:`t_x` for an individual @@ -273,10 +273,10 @@ def expected_num_purchases( def expected_probability_alive( self, - customer_id: Union[np.ndarray, pd.Series], - frequency: Union[np.ndarray, pd.Series], - recency: Union[np.ndarray, pd.Series], - T: Union[np.ndarray, pd.Series], + customer_id: np.ndarray | pd.Series, + frequency: np.ndarray | pd.Series, + recency: np.ndarray | pd.Series, + T: np.ndarray | pd.Series, ) -> xr.DataArray: r""" Posterior expected value of the probability of being alive at time T. The @@ -310,7 +310,7 @@ def expected_probability_alive( def expected_num_purchases_new_customer( self, - t: Union[np.ndarray, pd.Series], + t: np.ndarray | pd.Series, ): r""" Posterior expected number of purchases for any interval of length :math:`t`. See @@ -345,7 +345,7 @@ def expected_num_purchases_new_customer( def _distribution_new_customers( self, - random_seed: Optional[RandomState] = None, + random_seed: RandomState | None = None, var_names: Sequence[str] = ("population_dropout", "population_purchase_rate"), ) -> xr.Dataset: with pm.Model(): @@ -372,7 +372,7 @@ def _distribution_new_customers( def distribution_new_customer_dropout( self, - random_seed: Optional[RandomState] = None, + random_seed: RandomState | None = None, ) -> xr.Dataset: """Sample the Beta distribution for the population-level dropout rate. @@ -396,7 +396,7 @@ def distribution_new_customer_dropout( def distribution_new_customer_purchase_rate( self, - random_seed: Optional[RandomState] = None, + random_seed: RandomState | None = None, ) -> xr.Dataset: """Sample the Gamma distribution for the population-level purchase rate. diff --git a/pymc_marketing/clv/models/gamma_gamma.py b/pymc_marketing/clv/models/gamma_gamma.py index efbd5b159..a660bbdd3 100644 --- a/pymc_marketing/clv/models/gamma_gamma.py +++ b/pymc_marketing/clv/models/gamma_gamma.py @@ -1,5 +1,3 @@ -from typing import Dict, Optional, Union - import numpy as np import pandas as pd import pymc as pm @@ -15,10 +13,10 @@ class BaseGammaGammaModel(CLVModel): def distribution_customer_spend( self, - customer_id: Union[np.ndarray, pd.Series], - mean_transaction_value: Union[np.ndarray, pd.Series, TensorVariable], - frequency: Union[np.ndarray, pd.Series, TensorVariable], - random_seed: Optional[RandomState] = None, + customer_id: np.ndarray | pd.Series, + mean_transaction_value: np.ndarray | pd.Series | TensorVariable, + frequency: np.ndarray | pd.Series | TensorVariable, + random_seed: RandomState | None = None, ) -> xarray.DataArray: """Posterior distribution of transaction value per customer""" @@ -44,9 +42,9 @@ def distribution_customer_spend( def expected_customer_spend( self, - customer_id: Union[np.ndarray, pd.Series], - mean_transaction_value: Union[np.ndarray, pd.Series], - frequency: Union[np.ndarray, pd.Series], + customer_id: np.ndarray | pd.Series, + mean_transaction_value: np.ndarray | pd.Series, + frequency: np.ndarray | pd.Series, ) -> xarray.DataArray: """Expected transaction value per customer @@ -107,11 +105,11 @@ def expected_new_customer_spend(self) -> xarray.DataArray: def expected_customer_lifetime_value( self, transaction_model: CLVModel, - customer_id: Union[np.ndarray, pd.Series], - mean_transaction_value: Union[np.ndarray, pd.Series], - frequency: Union[np.ndarray, pd.Series], - recency: Union[np.ndarray, pd.Series], - T: Union[np.ndarray, pd.Series], + customer_id: np.ndarray | pd.Series, + mean_transaction_value: np.ndarray | pd.Series, + frequency: np.ndarray | pd.Series, + recency: np.ndarray | pd.Series, + T: np.ndarray | pd.Series, time: int = 12, discount_rate: float = 0.01, freq: str = "D", @@ -226,8 +224,8 @@ class GammaGammaModel(BaseGammaGammaModel): def __init__( self, data: pd.DataFrame, - model_config: Optional[Dict] = None, - sampler_config: Optional[Dict] = None, + model_config: dict | None = None, + sampler_config: dict | None = None, ): self._validate_cols( data, @@ -239,7 +237,7 @@ def __init__( ) @property - def default_model_config(self) -> Dict: + def default_model_config(self) -> dict: return { "p_prior": {"dist": "HalfFlat", "kwargs": {}}, "q_prior": {"dist": "HalfFlat", "kwargs": {}}, @@ -362,8 +360,8 @@ class GammaGammaModelIndividual(BaseGammaGammaModel): def __init__( self, data: pd.DataFrame, - model_config: Optional[Dict] = None, - sampler_config: Optional[Dict] = None, + model_config: dict | None = None, + sampler_config: dict | None = None, ): self._validate_cols( data, required_cols=["customer_id", "individual_transaction_value"] @@ -373,7 +371,7 @@ def __init__( ) @property - def default_model_config(self) -> Dict: + def default_model_config(self) -> dict: return { "p_prior": {"dist": "HalfFlat", "kwargs": {}}, "q_prior": {"dist": "HalfFlat", "kwargs": {}}, @@ -419,9 +417,9 @@ def _summarize_mean_data(self, customer_id, individual_transaction_value): def distribution_customer_spend( # type: ignore [override] self, - customer_id: Union[np.ndarray, pd.Series], - individual_transaction_value: Union[np.ndarray, pd.Series, TensorVariable], - random_seed: Optional[RandomState] = None, + customer_id: np.ndarray | pd.Series, + individual_transaction_value: np.ndarray | pd.Series | TensorVariable, + random_seed: RandomState | None = None, ) -> xarray.DataArray: """Return distribution of transaction value per customer""" @@ -438,9 +436,9 @@ def distribution_customer_spend( # type: ignore [override] def expected_customer_spend( self, - customer_id: Union[np.ndarray, pd.Series], - individual_transaction_value: Union[np.ndarray, pd.Series, TensorVariable], - random_seed: Optional[RandomState] = None, + customer_id: np.ndarray | pd.Series, + individual_transaction_value: np.ndarray | pd.Series | TensorVariable, + random_seed: RandomState | None = None, ) -> xarray.DataArray: """Return expected transaction value per customer""" @@ -458,10 +456,10 @@ def expected_customer_spend( def expected_customer_lifetime_value( # type: ignore [override] self, transaction_model: CLVModel, - customer_id: Union[np.ndarray, pd.Series], - individual_transaction_value: Union[np.ndarray, pd.Series, TensorVariable], - recency: Union[np.ndarray, pd.Series], - T: Union[np.ndarray, pd.Series], + customer_id: np.ndarray | pd.Series, + individual_transaction_value: np.ndarray | pd.Series | TensorVariable, + recency: np.ndarray | pd.Series, + T: np.ndarray | pd.Series, time: int = 12, discount_rate: float = 0.01, freq: str = "D", diff --git a/pymc_marketing/clv/models/pareto_nbd.py b/pymc_marketing/clv/models/pareto_nbd.py index 7bdf58ae3..66f5ff7c1 100644 --- a/pymc_marketing/clv/models/pareto_nbd.py +++ b/pymc_marketing/clv/models/pareto_nbd.py @@ -1,5 +1,6 @@ import warnings -from typing import Any, Dict, Literal, Optional, Sequence, Union, cast +from collections.abc import Sequence +from typing import Any, Literal, cast import numpy as np import pandas as pd @@ -174,8 +175,8 @@ def __init__( self, data: pd.DataFrame, *, - model_config: Optional[Dict] = None, - sampler_config: Optional[Dict] = None, + model_config: dict | None = None, + sampler_config: dict | None = None, ): super().__init__( data=data, @@ -195,7 +196,7 @@ def __init__( ) @property - def default_model_config(self) -> Dict[str, Any]: + def default_model_config(self) -> dict[str, Any]: return { "r_prior": {"dist": "Weibull", "kwargs": {"alpha": 2, "beta": 1}}, "alpha_prior": {"dist": "Weibull", "kwargs": {"alpha": 2, "beta": 10}}, @@ -446,9 +447,9 @@ def _extract_predictive_variables( def expected_purchases( self, - data: Optional[pd.DataFrame] = None, + data: pd.DataFrame | None = None, *, - future_t: Optional[Union[int, np.ndarray, pd.Series]] = None, + future_t: int | np.ndarray | pd.Series | None = None, ) -> xarray.DataArray: """ Given *recency*, *frequency*, and *T* for an individual customer, this method predicts the @@ -520,9 +521,9 @@ def expected_purchases( def expected_probability_alive( self, - data: Optional[pd.DataFrame] = None, + data: pd.DataFrame | None = None, *, - future_t: Optional[Union[int, np.ndarray, pd.Series]] = None, + future_t: int | np.ndarray | pd.Series | None = None, ) -> xarray.DataArray: """ Compute the probability that a customer with history *frequency*, *recency*, and *T* @@ -588,10 +589,10 @@ def expected_probability_alive( def expected_purchase_probability( self, - data: Optional[pd.DataFrame] = None, + data: pd.DataFrame | None = None, *, - n_purchases: Optional[int] = None, - future_t: Optional[Union[int, np.ndarray, pd.Series]] = None, + n_purchases: int | None = None, + future_t: int | np.ndarray | pd.Series | None = None, ) -> xarray.DataArray: """ Estimate probability of *n_purchases* over *future_t* time periods, @@ -764,9 +765,9 @@ def _log_B_three(i): def expected_purchases_new_customer( self, - data: Optional[pd.DataFrame] = None, + data: pd.DataFrame | None = None, *, - t: Optional[Union[int, np.ndarray, pd.Series]] = None, + t: int | np.ndarray | pd.Series | None = None, ) -> xarray.DataArray: """ Expected number of purchases for a new customer across *t* time periods. @@ -818,10 +819,10 @@ def expected_purchases_new_customer( def distribution_new_customer( self, - data: Optional[pd.DataFrame] = None, + data: pd.DataFrame | None = None, *, - T: Optional[Union[int, np.ndarray, pd.Series]] = None, - random_seed: Optional[RandomState] = None, + T: int | np.ndarray | pd.Series | None = None, + random_seed: RandomState | None = None, var_names: Sequence[ Literal["dropout", "purchase_rate", "recency_frequency"] ] = ( @@ -915,9 +916,9 @@ def distribution_new_customer( def distribution_new_customer_dropout( self, - data: Optional[pd.DataFrame] = None, + data: pd.DataFrame | None = None, *, - random_seed: Optional[RandomState] = None, + random_seed: RandomState | None = None, ) -> xarray.Dataset: """Sample from the Gamma distribution representing dropout times for new customers. @@ -946,9 +947,9 @@ def distribution_new_customer_dropout( def distribution_new_customer_purchase_rate( self, - data: Optional[pd.DataFrame] = None, + data: pd.DataFrame | None = None, *, - random_seed: Optional[RandomState] = None, + random_seed: RandomState | None = None, ) -> xarray.Dataset: """Sample from the Gamma distribution representing purchase rates for new customers. @@ -978,10 +979,10 @@ def distribution_new_customer_purchase_rate( def distribution_new_customer_recency_frequency( self, - data: Optional[pd.DataFrame] = None, + data: pd.DataFrame | None = None, *, - T: Optional[Union[int, np.ndarray, pd.Series]] = None, - random_seed: Optional[RandomState] = None, + T: int | np.ndarray | pd.Series | None = None, + random_seed: RandomState | None = None, ) -> xarray.Dataset: """Pareto/NBD process representing purchases across the customer population. diff --git a/pymc_marketing/clv/models/shifted_beta_geo.py b/pymc_marketing/clv/models/shifted_beta_geo.py index c0dc673e5..d8d61a9dc 100644 --- a/pymc_marketing/clv/models/shifted_beta_geo.py +++ b/pymc_marketing/clv/models/shifted_beta_geo.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Sequence, Union +from collections.abc import Sequence import numpy as np import pandas as pd @@ -90,8 +90,8 @@ class ShiftedBetaGeoModelIndividual(CLVModel): def __init__( self, data: pd.DataFrame, - model_config: Optional[Dict] = None, - sampler_config: Optional[Dict] = None, + model_config: dict | None = None, + sampler_config: dict | None = None, ): self._validate_cols( data, @@ -113,7 +113,7 @@ def __init__( ) @property - def default_model_config(self) -> Dict: + def default_model_config(self) -> dict: return { "alpha_prior": {"dist": "HalfFlat", "kwargs": {}}, "beta_prior": {"dist": "HalfFlat", "kwargs": {}}, @@ -141,7 +141,7 @@ def build_model(self): ) def distribution_customer_churn_time( - self, customer_id: Union[np.ndarray, pd.Series], random_seed: RandomState = None + self, customer_id: np.ndarray | pd.Series, random_seed: RandomState = None ) -> DataArray: """Sample distribution of churn time for existing customers. diff --git a/pymc_marketing/clv/plotting.py b/pymc_marketing/clv/plotting.py index b9b1e6049..154ee1dab 100644 --- a/pymc_marketing/clv/plotting.py +++ b/pymc_marketing/clv/plotting.py @@ -1,4 +1,4 @@ -from typing import Optional, Sequence, Tuple, Union +from collections.abc import Sequence import matplotlib.pyplot as plt import numpy as np @@ -16,12 +16,12 @@ def plot_customer_exposure( df: pd.DataFrame, - linewidth: Optional[float] = None, - size: Optional[float] = None, - labels: Optional[Sequence[str]] = None, - colors: Optional[Sequence[str]] = None, + linewidth: float | None = None, + size: float | None = None, + labels: Sequence[str] | None = None, + colors: Sequence[str] | None = None, padding: float = 0.25, - ax: Optional[plt.Axes] = None, + ax: plt.Axes | None = None, ) -> plt.Axes: """Plot the recency and T of DataFrame of customers. @@ -149,7 +149,7 @@ def plot_customer_exposure( def _create_frequency_recency_meshes( max_frequency: int, max_recency: int, -) -> Tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: frequency = np.arange(max_frequency + 1) recency = np.arange(max_recency + 1) mesh_frequency, mesh_recency = np.meshgrid(frequency, recency) @@ -158,14 +158,14 @@ def _create_frequency_recency_meshes( def plot_frequency_recency_matrix( - model: Union[BetaGeoModel, ParetoNBDModel], + model: BetaGeoModel | ParetoNBDModel, t=1, - max_frequency: Optional[int] = None, - max_recency: Optional[int] = None, - title: Optional[str] = None, + max_frequency: int | None = None, + max_recency: int | None = None, + title: str | None = None, xlabel: str = "Customer's Historical Frequency", ylabel: str = "Customer's Recency", - ax: Optional[plt.Axes] = None, + ax: plt.Axes | None = None, **kwargs, ) -> plt.Axes: """ @@ -269,13 +269,13 @@ def plot_frequency_recency_matrix( def plot_probability_alive_matrix( - model: Union[BetaGeoModel, ParetoNBDModel], - max_frequency: Optional[int] = None, - max_recency: Optional[int] = None, + model: BetaGeoModel | ParetoNBDModel, + max_frequency: int | None = None, + max_recency: int | None = None, title: str = "Probability Customer is Alive,\nby Frequency and Recency of a Customer", xlabel: str = "Customer's Historical Frequency", ylabel: str = "Customer's Recency", - ax: Optional[plt.Axes] = None, + ax: plt.Axes | None = None, **kwargs, ) -> plt.Axes: """ diff --git a/pymc_marketing/clv/utils.py b/pymc_marketing/clv/utils.py index a6b91d4f5..3e237675e 100644 --- a/pymc_marketing/clv/utils.py +++ b/pymc_marketing/clv/utils.py @@ -1,6 +1,5 @@ import warnings from datetime import date, datetime -from typing import Optional, Union import numpy as np import pandas as pd @@ -29,11 +28,11 @@ def to_xarray(customer_id, *arrays, dim: str = "customer_id"): def customer_lifetime_value( transaction_model, - customer_id: Union[pd.Series, np.ndarray], - frequency: Union[pd.Series, np.ndarray], - recency: Union[pd.Series, np.ndarray], - T: Union[pd.Series, np.ndarray], - monetary_value: Union[pd.Series, np.ndarray, xarray.DataArray], + customer_id: pd.Series | np.ndarray, + frequency: pd.Series | np.ndarray, + recency: pd.Series | np.ndarray, + T: pd.Series | np.ndarray, + monetary_value: pd.Series | np.ndarray | xarray.DataArray, time: int = 12, discount_rate: float = 0.01, freq: str = "D", @@ -167,11 +166,11 @@ def _find_first_transactions( transactions: pd.DataFrame, customer_id_col: str, datetime_col: str, - monetary_value_col: Optional[str] = None, - datetime_format: Optional[str] = None, - observation_period_end: Optional[Union[str, pd.Period, datetime]] = None, + monetary_value_col: str | None = None, + datetime_format: str | None = None, + observation_period_end: str | pd.Period | datetime | None = None, time_unit: str = "D", - sort_transactions: Optional[bool] = True, + sort_transactions: bool | None = True, ) -> pd.DataFrame: """ Return dataframe with first transactions. @@ -279,13 +278,13 @@ def rfm_summary( transactions: pd.DataFrame, customer_id_col: str, datetime_col: str, - monetary_value_col: Optional[str] = None, - datetime_format: Optional[str] = None, - observation_period_end: Optional[Union[str, pd.Period, datetime]] = None, + monetary_value_col: str | None = None, + datetime_format: str | None = None, + observation_period_end: str | pd.Period | datetime | None = None, time_unit: str = "D", - time_scaler: Optional[float] = 1, - include_first_transaction: Optional[bool] = False, - sort_transactions: Optional[bool] = True, + time_scaler: float | None = 1, + include_first_transaction: bool | None = False, + sort_transactions: bool | None = True, ) -> pd.DataFrame: """ Summarize transaction data for use in CLV modeling and/or RFM segmentation. @@ -422,16 +421,14 @@ def rfm_train_test_split( transactions: pd.DataFrame, customer_id_col: str, datetime_col: str, - train_period_end: Union[Union[float, str], datetime, datetime64, date], - test_period_end: Optional[ - Union[Union[float, str], datetime, datetime64, date] - ] = None, + train_period_end: float | str | datetime | datetime64 | date, + test_period_end: float | str | datetime | datetime64 | date | None = None, time_unit: str = "D", - time_scaler: Optional[float] = 1, - datetime_format: Optional[str] = None, - monetary_value_col: Optional[str] = None, - include_first_transaction: Optional[bool] = False, - sort_transactions: Optional[bool] = True, + time_scaler: float | None = 1, + datetime_format: str | None = None, + monetary_value_col: str | None = None, + include_first_transaction: bool | None = False, + sort_transactions: bool | None = True, ) -> pd.DataFrame: """ Summarize transaction data and split into training and tests datasets for CLV modeling. diff --git a/pymc_marketing/mmm/base.py b/pymc_marketing/mmm/base.py index cf39d31a4..ff36870c1 100644 --- a/pymc_marketing/mmm/base.py +++ b/pymc_marketing/mmm/base.py @@ -1,6 +1,7 @@ """Base class for Marketing Mix Models (MMM).""" import warnings +from collections.abc import Callable from inspect import ( getattr_static, isdatadescriptor, @@ -9,7 +10,7 @@ ismethoddescriptor, ) from itertools import repeat -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any import arviz as az import matplotlib.pyplot as plt @@ -49,22 +50,22 @@ class BaseMMM(ModelBuilder): def __init__( self, date_column: str, - channel_columns: Union[List[str], Tuple[str]], - model_config: Optional[Dict] = None, - sampler_config: Optional[Dict] = None, + channel_columns: list[str] | tuple[str], + model_config: dict | None = None, + sampler_config: dict | None = None, **kwargs, ) -> None: - self.X: Optional[pd.DataFrame] = None - self.y: Optional[Union[pd.Series, np.ndarray]] = None + self.X: pd.DataFrame | None = None + self.y: pd.Series | np.ndarray | None = None self.date_column: str = date_column - self.channel_columns: Union[List[str], Tuple[str]] = channel_columns + self.channel_columns: list[str] | tuple[str] = channel_columns self.n_channel: int = len(channel_columns) - self._fit_result: Optional[az.InferenceData] = None - self._posterior_predictive: Optional[az.InferenceData] = None + self._fit_result: az.InferenceData | None = None + self._posterior_predictive: az.InferenceData | None = None super().__init__(model_config=model_config, sampler_config=sampler_config) @property - def methods(self) -> List[Any]: + def methods(self) -> list[Any]: maybe_methods = [getattr_static(self, attr) for attr in dir(self)] return [ method @@ -81,9 +82,9 @@ def methods(self) -> List[Any]: @property def validation_methods( self, - ) -> Tuple[ - List[Callable[["BaseMMM", Union[pd.DataFrame, pd.Series, np.ndarray]], None]], - List[Callable[["BaseMMM", Union[pd.DataFrame, pd.Series, np.ndarray]], None]], + ) -> tuple[ + list[Callable[["BaseMMM", pd.DataFrame | pd.Series | np.ndarray], None]], + list[Callable[["BaseMMM", pd.DataFrame | pd.Series | np.ndarray], None]], ]: """ A property that provides validation methods for features ("X") and the target variable ("y"). @@ -114,7 +115,7 @@ def validation_methods( ) def validate( - self, target: str, data: Union[pd.DataFrame, pd.Series, np.ndarray] + self, target: str, data: pd.DataFrame | pd.Series | np.ndarray ) -> None: """ Validates the input data based on the specified target type. @@ -148,17 +149,17 @@ def validate( @property def preprocessing_methods( self, - ) -> Tuple[ - List[ + ) -> tuple[ + list[ Callable[ - ["BaseMMM", Union[pd.DataFrame, pd.Series, np.ndarray]], - Union[pd.DataFrame, pd.Series, np.ndarray], + ["BaseMMM", pd.DataFrame | pd.Series | np.ndarray], + pd.DataFrame | pd.Series | np.ndarray, ] ], - List[ + list[ Callable[ - ["BaseMMM", Union[pd.DataFrame, pd.Series, np.ndarray]], - Union[pd.DataFrame, pd.Series, np.ndarray], + ["BaseMMM", pd.DataFrame | pd.Series | np.ndarray], + pd.DataFrame | pd.Series | np.ndarray, ] ], ]: @@ -190,8 +191,8 @@ def preprocessing_methods( ) def preprocess( - self, target: str, data: Union[pd.DataFrame, pd.Series, np.ndarray] - ) -> Union[pd.DataFrame, pd.Series, np.ndarray]: + self, target: str, data: pd.DataFrame | pd.Series | np.ndarray + ) -> pd.DataFrame | pd.Series | np.ndarray: """ Preprocess the provided data according to the specified target. @@ -503,7 +504,7 @@ def compute_channel_contribution_original_scale(self) -> DataArray: def _estimate_budget_contribution_fit( self, channel: str, budget: float, method: str = "sigmoid" - ) -> Tuple: + ) -> tuple: """ Estimate the lower and upper bounds of the contribution fit for a given channel and budget. This function computes the quantiles (0.05 & 0.95) of the channel contributions, estimates @@ -636,7 +637,7 @@ def _plot_scenario( ) def plot_budget_scenearios( - self, *, base_data: Dict, method: str = "sigmoid", **kwargs + self, *, base_data: dict, method: str = "sigmoid", **kwargs ) -> plt.Figure: """ Experimental: Plots the budget and contribution bars side by side for multiple scenarios. @@ -863,9 +864,9 @@ def optimize_channel_budget_for_maximum_contribution( self, method: str, total_budget: int, - budget_bounds: Optional[Dict[str, Tuple[float, float]]] = None, + budget_bounds: dict[str, tuple[float, float]] | None = None, *, - parameters: Dict[str, Tuple[float, float]], + parameters: dict[str, tuple[float, float]], ) -> pd.DataFrame: """ Experimental: Optimize the allocation of a given total budget across multiple @@ -900,10 +901,10 @@ def optimize_channel_budget_for_maximum_contribution( ValueError If any of the required parameters are not provided or have an incorrect type. """ - if not isinstance(budget_bounds, (dict, type(None))): + if not isinstance(budget_bounds, dict | type(None)): raise TypeError("`budget_ranges` should be a dictionary or None.") - if not isinstance(total_budget, (int, float)): + if not isinstance(total_budget, int | float): raise ValueError( "The 'total_budget' parameter must be an integer or float." ) @@ -925,7 +926,7 @@ def optimize_channel_budget_for_maximum_contribution( def compute_channel_curve_optimization_parameters_original_scale( self, method: str = "sigmoid" - ) -> Dict: + ) -> dict: """ Experimental: Estimate the parameters for the saturating function of each channel's contribution. @@ -970,7 +971,7 @@ def plot_direct_contribution_curves( show_fit: bool = False, xlim_max=None, method: str = "sigmoid", - channels: Optional[List[str]] = None, + channels: list[str] | None = None, same_axes: bool = False, ) -> plt.Figure: """ @@ -1079,7 +1080,7 @@ def legend_title_func(channel): fig.suptitle("Direct response curves", fontsize=16) return fig - def _get_distribution(self, dist: Dict) -> Callable: + def _get_distribution(self, dist: dict) -> Callable: """ Retrieve a PyMC distribution callable based on the provided dictionary. @@ -1204,9 +1205,9 @@ def compute_mean_contributions_over_time( def plot_grouped_contribution_breakdown_over_time( self, - stack_groups: Optional[Dict[str, List[str]]] = None, + stack_groups: dict[str, list[str]] | None = None, original_scale: bool = False, - area_kwargs: Optional[Dict[str, Any]] = None, + area_kwargs: dict[str, Any] | None = None, **plt_kwargs: Any, ) -> plt.Figure: """Plot a time series area chart for all channel contributions. diff --git a/pymc_marketing/mmm/budget_optimizer.py b/pymc_marketing/mmm/budget_optimizer.py index fc18126ad..694182833 100644 --- a/pymc_marketing/mmm/budget_optimizer.py +++ b/pymc_marketing/mmm/budget_optimizer.py @@ -1,7 +1,5 @@ """Budget optimization module.""" -from typing import Dict, List, Optional, Tuple - import numpy as np from pandas import DataFrame from scipy.optimize import minimize @@ -12,9 +10,9 @@ def calculate_expected_contribution( method: str, - parameters: Dict[str, Tuple[float, float]], - budget: Dict[str, float], -) -> Dict[str, float]: + parameters: dict[str, tuple[float, float]], + budget: dict[str, float], +) -> dict[str, float]: """ Calculate expected contributions using the specified model. @@ -74,10 +72,10 @@ def calculate_expected_contribution( def objective_distribution( - x: List[float], + x: list[float], method: str, - channels: List[str], - parameters: Dict[str, Tuple[float, float]], + channels: list[str], + parameters: dict[str, tuple[float, float]], ) -> float: """ Compute the total contribution for a given budget distribution. @@ -121,10 +119,10 @@ def objective_distribution( def optimize_budget_distribution( method: str, total_budget: int, - budget_ranges: Optional[Dict[str, Tuple[float, float]]], - parameters: Dict[str, Tuple[float, float]], - channels: List[str], -) -> Dict[str, float]: + budget_ranges: dict[str, tuple[float, float]] | None, + parameters: dict[str, tuple[float, float]], + channels: list[str], +) -> dict[str, float]: """ Optimize the budget allocation across channels to maximize total contribution. @@ -161,7 +159,7 @@ def optimize_budget_distribution( """ # Check if budget_ranges is the correct type - if not isinstance(budget_ranges, (dict, type(None))): + if not isinstance(budget_ranges, dict | type(None)): raise TypeError("`budget_ranges` should be a dictionary or None.") if budget_ranges is None: @@ -190,9 +188,9 @@ def optimize_budget_distribution( def budget_allocator( method: str, total_budget: int, - channels: List[str], - parameters: Dict[str, Tuple[float, float]], - budget_ranges: Optional[Dict[str, Tuple[float, float]]], + channels: list[str], + parameters: dict[str, tuple[float, float]], + budget_ranges: dict[str, tuple[float, float]] | None, ) -> DataFrame: optimal_budget = optimize_budget_distribution( method=method, diff --git a/pymc_marketing/mmm/delayed_saturated_mmm.py b/pymc_marketing/mmm/delayed_saturated_mmm.py index 85e8707e0..3ef533eef 100644 --- a/pymc_marketing/mmm/delayed_saturated_mmm.py +++ b/pymc_marketing/mmm/delayed_saturated_mmm.py @@ -2,7 +2,7 @@ import json from pathlib import Path -from typing import Any, Dict, List, Optional, Union +from typing import Any import arviz as az import matplotlib.pyplot as plt @@ -45,13 +45,13 @@ class BaseDelayedSaturatedMMM(MMM): def __init__( self, date_column: str, - channel_columns: List[str], + channel_columns: list[str], adstock_max_lag: int, - model_config: Optional[Dict] = None, - sampler_config: Optional[Dict] = None, + model_config: dict | None = None, + sampler_config: dict | None = None, validate_data: bool = True, - control_columns: Optional[List[str]] = None, - yearly_seasonality: Optional[int] = None, + control_columns: list[str] | None = None, + yearly_seasonality: int | None = None, **kwargs, ) -> None: """Constructor method. @@ -92,7 +92,7 @@ def __init__( ) @property - def default_sampler_config(self) -> Dict: + def default_sampler_config(self) -> dict: return {} @property @@ -101,7 +101,7 @@ def output_var(self): return "y" def _generate_and_preprocess_model_data( # type: ignore - self, X: Union[pd.DataFrame, pd.Series], y: Union[pd.Series, np.ndarray] + self, X: pd.DataFrame | pd.Series, y: pd.Series | np.ndarray ) -> None: """Applies preprocessing to the data before fitting the model. @@ -116,10 +116,10 @@ def _generate_and_preprocess_model_data( # type: ignore date_data = X[self.date_column] channel_data = X[self.channel_columns] - self.coords_mutable: Dict[str, Any] = { + self.coords_mutable: dict[str, Any] = { "date": date_data, } - coords: Dict[str, Any] = { + coords: dict[str, Any] = { "channel": self.channel_columns, } @@ -128,13 +128,13 @@ def _generate_and_preprocess_model_data( # type: ignore } X_data = pd.DataFrame.from_dict(new_X_dict) X_data = pd.concat([X_data, channel_data], axis=1) - control_data: Optional[Union[pd.DataFrame, pd.Series]] = None + control_data: pd.DataFrame | pd.Series | None = None if self.control_columns is not None: control_data = X[self.control_columns] coords["control"] = self.control_columns X_data = pd.concat([X_data, control_data], axis=1) - fourier_features: Optional[pd.DataFrame] = None + fourier_features: pd.DataFrame | None = None if self.yearly_seasonality is not None: fourier_features = self._get_fourier_models_data(X=X) self.fourier_columns = fourier_features.columns @@ -145,12 +145,12 @@ def _generate_and_preprocess_model_data( # type: ignore if self.validate_data: self.validate("X", X_data) self.validate("y", y) - self.preprocessed_data: Dict[str, Union[pd.DataFrame, pd.Series]] = { + self.preprocessed_data: dict[str, pd.DataFrame | pd.Series] = { "X": self.preprocess("X", X_data), # type: ignore "y": self.preprocess("y", y), # type: ignore } self.X: pd.DataFrame = X_data - self.y: Union[pd.Series, np.ndarray] = y + self.y: pd.Series | np.ndarray = y def _save_input_params(self, idata) -> None: """Saves input parameters to the attrs of idata.""" @@ -163,9 +163,9 @@ def _save_input_params(self, idata) -> None: def _create_likelihood_distribution( self, - dist: Dict, + dist: dict, mu: TensorVariable, - observed: Union[np.ndarray, pd.Series], + observed: np.ndarray | pd.Series, dims: str, ) -> TensorVariable: """ @@ -242,7 +242,7 @@ def _create_likelihood_distribution( parameter_distributions[param] = self._get_distribution( dist=param_config )(**param_config["kwargs"], name=f"likelihood_{param}") - elif isinstance(param_config, (int, float)): + elif isinstance(param_config, int | float): # Use the value directly parameter_distributions[param] = param_config else: @@ -268,7 +268,7 @@ def _create_likelihood_distribution( def build_model( self, X: pd.DataFrame, - y: Union[pd.Series, np.ndarray], + y: pd.Series | np.ndarray, **kwargs, ) -> None: """ @@ -466,7 +466,7 @@ def build_model( ) @property - def default_model_config(self) -> Dict: + def default_model_config(self) -> dict: return { "intercept": {"dist": "Normal", "kwargs": {"mu": 0, "sigma": 2}}, "beta_channel": {"dist": "HalfNormal", "kwargs": {"sigma": 2}}, @@ -544,8 +544,8 @@ def channel_contributions_forward_pass( return channel_contribution_forward_pass.eval() @property - def _serializable_model_config(self) -> Dict[str, Any]: - def ndarray_to_list(d: Dict) -> Dict: + def _serializable_model_config(self) -> dict[str, Any]: + def ndarray_to_list(d: dict) -> dict: new_d = d.copy() # Copy the dictionary to avoid mutating the original one for key, value in new_d.items(): if isinstance(value, np.ndarray): @@ -609,8 +609,8 @@ def load(cls, fname: str): def _data_setter( self, - X: Union[np.ndarray, pd.DataFrame], - y: Optional[Union[np.ndarray, pd.Series]] = None, + X: np.ndarray | pd.DataFrame, + y: np.ndarray | pd.Series | None = None, ) -> None: """ Sets new data in the model. @@ -646,7 +646,7 @@ def _data_setter( msg = "X must be a pandas DataFrame in order to access the columns" raise TypeError(msg) - new_channel_data: Optional[np.ndarray] = None + new_channel_data: np.ndarray | None = None coords = {"date": X[self.date_column].to_numpy()} try: @@ -663,7 +663,7 @@ def identity(x): else self.channel_transformer.transform ) - data: Dict[str, Union[np.ndarray, Any]] = { + data: dict[str, np.ndarray | Any] = { "channel_data": channel_transformation(new_channel_data) } if self.control_columns is not None: @@ -695,14 +695,14 @@ def identity(x): pm.set_data(data, coords=coords) @classmethod - def _model_config_formatting(cls, model_config: Dict) -> Dict: + def _model_config_formatting(cls, model_config: dict) -> dict: """ Because of json serialization, model_config values that were originally tuples or numpy are being encoded as lists. This function converts them back to tuples and numpy arrays to ensure correct id encoding. """ - def format_nested_dict(d: Dict) -> Dict: + def format_nested_dict(d: dict) -> dict: for key, value in d.items(): if isinstance(value, dict): d[key] = format_nested_dict(value) @@ -995,9 +995,9 @@ def plot_channel_contributions_grid( def new_spend_contributions( self, - spend: Optional[np.ndarray] = None, + spend: np.ndarray | None = None, one_time: bool = True, - spend_leading_up: Optional[np.ndarray] = None, + spend_leading_up: np.ndarray | None = None, prior: bool = False, original_scale: bool = True, **sample_posterior_predictive_kwargs, @@ -1127,11 +1127,11 @@ def plot_new_spend_contributions( lower: float = 0.025, upper: float = 0.975, ylabel: str = "Sales", - idx: Optional[slice] = None, - channels: Optional[List[str]] = None, + idx: slice | None = None, + channels: list[str] | None = None, prior: bool = False, original_scale: bool = True, - ax: Optional[plt.Axes] = None, + ax: plt.Axes | None = None, **sample_posterior_predictive_kwargs, ) -> plt.Axes: """Plot the upcoming sales for a given spend amount. diff --git a/pymc_marketing/mmm/preprocessing.py b/pymc_marketing/mmm/preprocessing.py index ddb768657..d7568cdeb 100644 --- a/pymc_marketing/mmm/preprocessing.py +++ b/pymc_marketing/mmm/preprocessing.py @@ -1,6 +1,7 @@ """Preprocessing methods for the Marketing Mix Model.""" -from typing import Any, Callable, List, Tuple, Union +from collections.abc import Callable +from typing import Any import numpy as np import pandas as pd @@ -35,8 +36,8 @@ class MaxAbsScaleTarget: @preprocessing_method_y def max_abs_scale_target_data( - self, data: Union[pd.Series, np.ndarray] - ) -> Union[np.ndarray, pd.Series]: + self, data: pd.Series | np.ndarray + ) -> np.ndarray | pd.Series: if isinstance(data, pd.Series): data = data.to_numpy() @@ -49,15 +50,12 @@ def max_abs_scale_target_data( class MaxAbsScaleChannels: - channel_columns: Union[List[str], Tuple[str]] + channel_columns: list[str] | tuple[str] @preprocessing_method_X def max_abs_scale_channel_data(self, data: pd.DataFrame) -> pd.DataFrame: data_cp = data.copy() - channel_data: Union[ - pd.DataFrame, - pd.Series[Any], - ] = data_cp[self.channel_columns] + channel_data: pd.DataFrame | pd.Series[Any] = data_cp[self.channel_columns] transformers = [("scaler", MaxAbsScaler())] pipeline: Pipeline = Pipeline(steps=transformers) self.channel_transformer: Pipeline = pipeline.fit(X=channel_data.to_numpy()) @@ -68,7 +66,7 @@ def max_abs_scale_channel_data(self, data: pd.DataFrame) -> pd.DataFrame: class StandardizeControls: - control_columns: List[str] # TODO: Handle Optional[List[str]] + control_columns: list[str] # TODO: Handle Optional[List[str]] @preprocessing_method_X def standardize_control_data(self, data: pd.DataFrame) -> pd.DataFrame: diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py index 066aeaf63..a7f54410c 100644 --- a/pymc_marketing/mmm/transformers.py +++ b/pymc_marketing/mmm/transformers.py @@ -1,7 +1,7 @@ """Media transformation functions for Marketing Mix Models.""" from enum import Enum -from typing import Any, NamedTuple, Union +from typing import Any, NamedTuple import numpy as np import numpy.typing as npt @@ -27,7 +27,7 @@ def batched_convolution( x, w, axis: int = 0, - mode: Union[ConvMode, str] = ConvMode.After, + mode: ConvMode | str = ConvMode.After, ): R"""Apply a 1D convolution in a vectorized way across multiple batch dimensions. @@ -284,7 +284,7 @@ def weibull_adstock( k=1, l_max: int = 12, axis: int = 0, - type: Union[WeibullType, str] = WeibullType.PDF, + type: WeibullType | str = WeibullType.PDF, ): R"""Weibull Adstocking Transformation. @@ -377,7 +377,7 @@ def weibull_adstock( return batched_convolution(x, w, axis=axis) -def logistic_saturation(x, lam: Union[npt.NDArray[np.float_], float] = 0.5): +def logistic_saturation(x, lam: npt.NDArray[np.float_] | float = 0.5): """Logistic saturation transformation. .. math:: @@ -670,10 +670,10 @@ def tanh_saturation_baselined( def michaelis_menten( - x: Union[float, np.ndarray, npt.NDArray[np.float64]], - alpha: Union[float, np.ndarray, npt.NDArray[np.float64]], - lam: Union[float, np.ndarray, npt.NDArray[np.float64]], -) -> Union[float, Any]: + x: float | np.ndarray | npt.NDArray[np.float64], + alpha: float | np.ndarray | npt.NDArray[np.float64], + lam: float | np.ndarray | npt.NDArray[np.float64], +) -> float | Any: r""" Evaluate the Michaelis-Menten function for given values of x, alpha, and lambda. diff --git a/pymc_marketing/mmm/utils.py b/pymc_marketing/mmm/utils.py index 489db6395..e4f2f467d 100644 --- a/pymc_marketing/mmm/utils.py +++ b/pymc_marketing/mmm/utils.py @@ -1,7 +1,8 @@ """Utility functions for the Marketing Mix Modeling module.""" import re -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from collections.abc import Callable +from typing import Any import numpy as np import numpy.typing as npt @@ -45,11 +46,11 @@ def generate_fourier_modes( def estimate_menten_parameters( - channel: Union[str, Any], - original_dataframe: Union[pd.DataFrame, Any], - contributions: Union[xr.DataArray, Any], + channel: str | Any, + original_dataframe: pd.DataFrame | Any, + contributions: xr.DataArray | Any, **kwargs, -) -> List[float]: +) -> list[float]: """ Estimate the parameters for the Michaelis-Menten function using curve fitting. @@ -91,11 +92,11 @@ def estimate_menten_parameters( def estimate_sigmoid_parameters( - channel: Union[str, Any], - original_dataframe: Union[pd.DataFrame, Any], - contributions: Union[xr.DataArray, Any], + channel: str | Any, + original_dataframe: pd.DataFrame | Any, + contributions: xr.DataArray | Any, **kwargs, -) -> List[float]: +) -> list[float]: """ Estimate the parameters for the sigmoid function using curve fitting. @@ -141,10 +142,10 @@ def estimate_sigmoid_parameters( def compute_sigmoid_second_derivative( - x: Union[float, np.ndarray, npt.NDArray[np.float64]], - alpha: Union[float, np.ndarray, npt.NDArray[np.float64]], - lam: Union[float, np.ndarray, npt.NDArray[np.float64]], -) -> Union[float, Any]: + x: float | np.ndarray | npt.NDArray[np.float64], + alpha: float | np.ndarray | npt.NDArray[np.float64], + lam: float | np.ndarray | npt.NDArray[np.float64], +) -> float | Any: """ Compute the second derivative of the extended sigmoid function. @@ -177,9 +178,9 @@ def compute_sigmoid_second_derivative( def find_sigmoid_inflection_point( - alpha: Union[float, np.ndarray, npt.NDArray[np.float64]], - lam: Union[float, np.ndarray, npt.NDArray[np.float64]], -) -> Tuple[Any, float]: + alpha: float | np.ndarray | npt.NDArray[np.float64], + lam: float | np.ndarray | npt.NDArray[np.float64], +) -> tuple[Any, float]: """ Find the inflection point of the extended sigmoid function. @@ -212,7 +213,7 @@ def find_sigmoid_inflection_point( return x_inflection, y_inflection -def standardize_scenarios_dict_keys(d: Dict, keywords: List[str]): +def standardize_scenarios_dict_keys(d: dict, keywords: list[str]): """ Standardize the keys in a dictionary based on a list of keywords. @@ -280,10 +281,10 @@ def apply_sklearn_transformer_across_dim( def sigmoid_saturation( - x: Union[float, np.ndarray, npt.NDArray[np.float64]], - alpha: Union[float, np.ndarray, npt.NDArray[np.float64]], - lam: Union[float, np.ndarray, npt.NDArray[np.float64]], -) -> Union[float, Any]: + x: float | np.ndarray | npt.NDArray[np.float64], + alpha: float | np.ndarray | npt.NDArray[np.float64], + lam: float | np.ndarray | npt.NDArray[np.float64], +) -> float | Any: """ Parameters ---------- @@ -304,7 +305,7 @@ def create_new_spend_data( spend: np.ndarray, adstock_max_lag: int, one_time: bool, - spend_leading_up: Optional[np.ndarray] = None, + spend_leading_up: np.ndarray | None = None, ) -> np.ndarray: """Create new spend data for the channel forward pass. diff --git a/pymc_marketing/mmm/validating.py b/pymc_marketing/mmm/validating.py index 3532c586c..8665e6db7 100644 --- a/pymc_marketing/mmm/validating.py +++ b/pymc_marketing/mmm/validating.py @@ -1,6 +1,6 @@ """Validating methods for MMM classes.""" -from typing import Callable, List, Optional, Tuple, Union +from collections.abc import Callable import pandas as pd @@ -47,11 +47,11 @@ def validate_date_col(self, data: pd.DataFrame) -> None: class ValidateChannelColumns: - channel_columns: Union[List[str], Tuple[str]] + channel_columns: list[str] | tuple[str] @validation_method_X def validate_channel_columns(self, data: pd.DataFrame) -> None: - if not isinstance(self.channel_columns, (list, tuple)): + if not isinstance(self.channel_columns, list | tuple): raise ValueError("channel_columns must be a list or tuple") if len(self.channel_columns) == 0: raise ValueError("channel_columns must not be empty") @@ -68,13 +68,13 @@ def validate_channel_columns(self, data: pd.DataFrame) -> None: class ValidateControlColumns: - control_columns: Optional[List[str]] + control_columns: list[str] | None @validation_method_X def validate_control_columns(self, data: pd.DataFrame) -> None: if self.control_columns is None: return None - if not isinstance(self.control_columns, (list, tuple)): + if not isinstance(self.control_columns, list | tuple): raise ValueError("control_columns must be None, a list or tuple") if len(self.control_columns) == 0: raise ValueError( diff --git a/pymc_marketing/model_builder.py b/pymc_marketing/model_builder.py index 325145584..a8240ded6 100644 --- a/pymc_marketing/model_builder.py +++ b/pymc_marketing/model_builder.py @@ -18,7 +18,7 @@ import warnings from abc import ABC, abstractmethod from pathlib import Path -from typing import Any, Dict, List, Optional, Union +from typing import Any import arviz as az import numpy as np @@ -49,13 +49,13 @@ class ModelBuilder(ABC): _model_type = "BaseClass" version = "None" - X: Optional[pd.DataFrame] = None - y: Optional[Union[pd.Series, np.ndarray]] = None + X: pd.DataFrame | None = None + y: pd.Series | np.ndarray | None = None def __init__( self, - model_config: Optional[Dict] = None, - sampler_config: Optional[Dict] = None, + model_config: dict | None = None, + sampler_config: dict | None = None, ): """ Initializes model configuration and sampler configuration for the model @@ -86,10 +86,8 @@ def __init__( self.model_config = ( self.default_model_config | model_config ) # parameters for priors etc. - self.model: Optional[pm.Model] = None # Set by build_model - self.idata: Optional[az.InferenceData] = ( - None # idata is generated during fitting - ) + self.model: pm.Model | None = None # Set by build_model + self.idata: az.InferenceData | None = None # idata is generated during fitting self.is_fitted_ = False def _validate_data(self, X, y=None): @@ -103,8 +101,8 @@ def _validate_data(self, X, y=None): @abstractmethod def _data_setter( self, - X: Union[np.ndarray, pd.DataFrame], - y: Optional[Union[np.ndarray, pd.Series]] = None, + X: np.ndarray | pd.DataFrame, + y: np.ndarray | pd.Series | None = None, ) -> None: """ Sets new data in the model. @@ -149,7 +147,7 @@ def output_var(self): @property @abstractmethod - def default_model_config(self) -> Dict: + def default_model_config(self) -> dict: """ Returns a class default config dict for model builder if no model_config is provided on class initialization Useful for understanding structure of required model_config to allow its customization by users @@ -178,7 +176,7 @@ def default_model_config(self) -> Dict: @property @abstractmethod - def default_sampler_config(self) -> Dict: + def default_sampler_config(self) -> dict: """ Returns a class default sampler dict for model builder if no sampler_config is provided on class initialization Useful for understanding structure of required sampler_config to allow its customization by users @@ -202,7 +200,7 @@ def default_sampler_config(self) -> Dict: @abstractmethod def _generate_and_preprocess_model_data( - self, X: Union[pd.DataFrame, pd.Series], y: np.ndarray + self, X: pd.DataFrame | pd.Series, y: np.ndarray ) -> None: """ Applies preprocessing to the data before fitting the model. @@ -237,7 +235,7 @@ def _generate_and_preprocess_model_data( def build_model( self, X: pd.DataFrame, - y: Union[pd.Series, np.ndarray], + y: pd.Series | np.ndarray, **kwargs, ) -> None: """ @@ -349,7 +347,7 @@ def save(self, fname: str) -> None: raise RuntimeError("The model hasn't been fit yet, call .fit() first") @classmethod - def _model_config_formatting(cls, model_config: Dict) -> Dict: + def _model_config_formatting(cls, model_config: dict) -> dict: """ Because of json serialization, model_config values that were originally tuples or numpy are being encoded as lists. This function converts them back to tuples @@ -424,10 +422,10 @@ def load(cls, fname: str): def fit( self, X: pd.DataFrame, - y: Optional[Union[pd.Series, np.ndarray]] = None, + y: pd.Series | np.ndarray | None = None, progressbar: bool = True, - predictor_names: Optional[List[str]] = None, - random_seed: Optional[RandomState] = None, + predictor_names: list[str] | None = None, + random_seed: RandomState | None = None, **kwargs: Any, ) -> az.InferenceData: """ @@ -501,7 +499,7 @@ def fit( def predict( self, - X_pred: Union[np.ndarray, pd.DataFrame, pd.Series], + X_pred: np.ndarray | pd.DataFrame | pd.Series, extend_idata: bool = True, **kwargs, ) -> np.ndarray: @@ -549,7 +547,7 @@ def sample_prior_predictive( self, X_pred, y_pred=None, - samples: Optional[int] = None, + samples: int | None = None, extend_idata: bool = False, combined: bool = True, **kwargs, @@ -654,7 +652,7 @@ def set_params(self, **params): @property @abstractmethod - def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]: + def _serializable_model_config(self) -> dict[str, int | float | dict]: """ Converts non-serializable values from model_config to their serializable reversable equivalent. Data types like pandas DataFrame, Series or datetime aren't JSON serializable, @@ -667,7 +665,7 @@ def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]: def predict_proba( self, - X_pred: Union[np.ndarray, pd.DataFrame, pd.Series], + X_pred: np.ndarray | pd.DataFrame | pd.Series, extend_idata: bool = True, combined: bool = False, **kwargs, @@ -677,7 +675,7 @@ def predict_proba( def predict_posterior( self, - X_pred: Union[np.ndarray, pd.DataFrame, pd.Series], + X_pred: np.ndarray | pd.DataFrame | pd.Series, extend_idata: bool = True, combined: bool = True, **kwargs, diff --git a/pyproject.toml b/pyproject.toml index b27f53130..5dc940efc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ repository = "https://github.com/pymc-labs/pymc-marketing" #changelog = "" [tool.ruff.lint] -select = ["E", "F", "I", "W"] +select = ["E", "F", "I", "W", "UP"] [tool.ruff.lint.pycodestyle] max-line-length = 120 diff --git a/tests/clv/test_plotting.py b/tests/clv/test_plotting.py index 42ded8f3b..9a136eafc 100644 --- a/tests/clv/test_plotting.py +++ b/tests/clv/test_plotting.py @@ -1,5 +1,3 @@ -from typing import Union - import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -18,9 +16,7 @@ class MockModel: def __init__(self, data: pd.DataFrame): self.data = data - def _mock_posterior( - self, customer_id: Union[np.ndarray, pd.Series] - ) -> xr.DataArray: + def _mock_posterior(self, customer_id: np.ndarray | pd.Series) -> xr.DataArray: n_customers = len(customer_id) n_chains = 4 n_draws = 10 @@ -34,30 +30,30 @@ def _mock_posterior( def expected_probability_alive( self, - customer_id: Union[np.ndarray, pd.Series], - frequency: Union[np.ndarray, pd.Series], - recency: Union[np.ndarray, pd.Series], - T: Union[np.ndarray, pd.Series], + customer_id: np.ndarray | pd.Series, + frequency: np.ndarray | pd.Series, + recency: np.ndarray | pd.Series, + T: np.ndarray | pd.Series, ): return self._mock_posterior(customer_id) def expected_purchases( self, - customer_id: Union[np.ndarray, pd.Series], + customer_id: np.ndarray | pd.Series, data: pd.DataFrame, *, - future_t: Union[np.ndarray, pd.Series, TensorVariable], + future_t: np.ndarray | pd.Series | TensorVariable, ): return self._mock_posterior(customer_id) # TODO: This is required until CLV API is standardized. def expected_num_purchases( self, - customer_id: Union[np.ndarray, pd.Series], - t: Union[np.ndarray, pd.Series, TensorVariable], - frequency: Union[np.ndarray, pd.Series, TensorVariable], - recency: Union[np.ndarray, pd.Series, TensorVariable], - T: Union[np.ndarray, pd.Series, TensorVariable], + customer_id: np.ndarray | pd.Series, + t: np.ndarray | pd.Series | TensorVariable, + frequency: np.ndarray | pd.Series | TensorVariable, + recency: np.ndarray | pd.Series | TensorVariable, + T: np.ndarray | pd.Series | TensorVariable, ): return self._mock_posterior(customer_id) diff --git a/tests/conftest.py b/tests/conftest.py index fdcab7958..0ed37fa7f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,3 @@ -from typing import Union - import numpy as np import pandas as pd import pytest @@ -61,7 +59,7 @@ def test_summary_data() -> pd.DataFrame: return df -def set_model_fit(model: CLVModel, fit: Union[InferenceData, Dataset]): +def set_model_fit(model: CLVModel, fit: InferenceData | Dataset): if isinstance(fit, InferenceData): assert "posterior" in fit.groups() else: diff --git a/tests/mmm/test_delayed_saturated_mmm.py b/tests/mmm/test_delayed_saturated_mmm.py index eaca2768b..643cdea66 100644 --- a/tests/mmm/test_delayed_saturated_mmm.py +++ b/tests/mmm/test_delayed_saturated_mmm.py @@ -1,5 +1,4 @@ import os -from typing import Dict, List, Optional import arviz as az import numpy as np @@ -48,7 +47,7 @@ def toy_X(generate_data) -> pd.DataFrame: @pytest.fixture(scope="class") -def model_config_requiring_serialization() -> Dict: +def model_config_requiring_serialization() -> dict: model_config = { "intercept": {"dist": "Normal", "kwargs": {"mu": 0, "sigma": 2}}, "beta_channel": { @@ -198,9 +197,9 @@ def test_init( self, toy_X: pd.DataFrame, toy_y: pd.Series, - yearly_seasonality: Optional[int], - channel_columns: List[str], - control_columns: List[str], + yearly_seasonality: int | None, + channel_columns: list[str], + control_columns: list[str], adstock_max_lag: int, ) -> None: mmm = BaseDelayedSaturatedMMM( @@ -343,7 +342,7 @@ def test_fit(self, toy_X: pd.DataFrame, toy_y: pd.Series) -> None: ids=["no_yearly_seasonality", "yearly_seasonality=1", "yearly_seasonality=2"], ) def test_get_fourier_models_data( - self, toy_X: pd.DataFrame, toy_y: pd.Series, yearly_seasonality: Optional[int] + self, toy_X: pd.DataFrame, toy_y: pd.Series, yearly_seasonality: int | None ) -> None: mmm = BaseDelayedSaturatedMMM( date_column="date", @@ -357,7 +356,7 @@ def test_get_fourier_models_data( mmm._get_fourier_models_data(toy_X) else: - fourier_modes_data: Optional[pd.DataFrame] = mmm._get_fourier_models_data( + fourier_modes_data: pd.DataFrame | None = mmm._get_fourier_models_data( toy_X ) assert fourier_modes_data.shape == ( @@ -580,7 +579,7 @@ def mock_property(self): ids=["default_config", "custom_config"], ) def test_model_config( - self, model_config: Dict, toy_X: pd.DataFrame, toy_y: pd.Series + self, model_config: dict, toy_X: pd.DataFrame, toy_y: pd.Series ): # Create model instance with specified config model = DelayedSaturatedMMM( diff --git a/tests/mmm/test_transformers.py b/tests/mmm/test_transformers.py index dd0ed7f24..4743c843a 100644 --- a/tests/mmm/test_transformers.py +++ b/tests/mmm/test_transformers.py @@ -41,7 +41,7 @@ def dummy_design_matrix(): ) def convolution_inputs(request): x_val = np.ones((3, 4, 5)) - w_val = np.ones((2)) + w_val = np.ones(2) if request.param == "ndarray": return x_val, w_val, None, None elif request.param == "TensorConstant": diff --git a/tests/model_builder/test_model_builder.py b/tests/model_builder/test_model_builder.py index 2eeff355e..bda63b957 100644 --- a/tests/model_builder/test_model_builder.py +++ b/tests/model_builder/test_model_builder.py @@ -16,7 +16,6 @@ import json import sys import tempfile -from typing import Dict import numpy as np import pandas as pd @@ -142,7 +141,7 @@ def _generate_and_preprocess_model_data(self, X: pd.DataFrame, y: pd.Series): self.y = y @property - def default_model_config(self) -> Dict: + def default_model_config(self) -> dict: return { "a": {"loc": 0, "scale": 10, "dims": ("numbers",)}, "b": {"loc": 0, "scale": 10}, @@ -150,7 +149,7 @@ def default_model_config(self) -> Dict: } @property - def default_sampler_config(self) -> Dict: + def default_sampler_config(self) -> dict: return { "draws": 1_000, "tune": 1_000,