From f6c2c1780261d467316ab5d3e49089fc33d1d1ec Mon Sep 17 00:00:00 2001 From: Will Dean Date: Mon, 8 Jul 2024 12:40:54 +0200 Subject: [PATCH] migrate to Data and non-mutable coords --- pymc_marketing/clv/models/pareto_nbd.py | 8 ++++---- pymc_marketing/mmm/delayed_saturated_mmm.py | 10 +--------- pyproject.toml | 2 +- tests/clv/test_distributions.py | 4 ++-- tests/mmm/test_lift_test.py | 6 ++---- tests/test_model_builder.py | 4 ++-- 6 files changed, 12 insertions(+), 22 deletions(-) diff --git a/pymc_marketing/clv/models/pareto_nbd.py b/pymc_marketing/clv/models/pareto_nbd.py index 6c35f9a5b..db5f10abe 100644 --- a/pymc_marketing/clv/models/pareto_nbd.py +++ b/pymc_marketing/clv/models/pareto_nbd.py @@ -240,11 +240,11 @@ def build_model(self) -> None: # type: ignore[override] "purchase_covariate": self.purchase_covariate_cols, "dropout_covariate": self.dropout_covariate_cols, "obs_var": ["recency", "frequency"], + "customer_id": self.data["customer_id"], } - mutable_coords = {"customer_id": self.data["customer_id"]} - with pm.Model(coords=coords, coords_mutable=mutable_coords) as self.model: + with pm.Model(coords=coords) as self.model: if self.purchase_covariate_cols: - purchase_data = pm.MutableData( + purchase_data = pm.Data( "purchase_data", self.data[self.purchase_covariate_cols], dims=["customer_id", "purchase_covariate"], @@ -273,7 +273,7 @@ def build_model(self) -> None: # type: ignore[override] # churn priors if self.dropout_covariate_cols: - dropout_data = pm.MutableData( + dropout_data = pm.Data( "dropout_data", self.data[self.dropout_covariate_cols], dims=["customer_id", "dropout_covariate"], diff --git a/pymc_marketing/mmm/delayed_saturated_mmm.py b/pymc_marketing/mmm/delayed_saturated_mmm.py index 148762d1c..2c17d64f0 100644 --- a/pymc_marketing/mmm/delayed_saturated_mmm.py +++ b/pymc_marketing/mmm/delayed_saturated_mmm.py @@ -203,11 +203,9 @@ def _generate_and_preprocess_model_data( # type: ignore date_data = X[self.date_column] channel_data = X[self.channel_columns] - self.coords_mutable: dict[str, Any] = { - "date": date_data, - } coords: dict[str, Any] = { "channel": self.channel_columns, + "date": date_data, } new_X_dict = { @@ -347,20 +345,17 @@ def build_model( self._generate_and_preprocess_model_data(X, y) with pm.Model( coords=self.model_coords, - coords_mutable=self.coords_mutable, ) as self.model: channel_data_ = pm.Data( name="channel_data", value=self.preprocessed_data["X"][self.channel_columns], dims=("date", "channel"), - mutable=True, ) target_ = pm.Data( name="target", value=self.preprocessed_data["y"], dims="date", - mutable=True, ) if self.time_varying_intercept | self.time_varying_media: time_index = pm.Data( @@ -441,7 +436,6 @@ def build_model( name="control_data", value=self.preprocessed_data["X"][self.control_columns], dims=("date", "control"), - mutable=True, ) control_contributions = pm.Deterministic( @@ -459,7 +453,6 @@ def build_model( self.date_column ].dt.dayofyear.to_numpy(), dims="date", - mutable=True, ) def create_deterministic(x: pt.TensorVariable) -> None: @@ -544,7 +537,6 @@ def channel_contributions_forward_pass( """ coords = { **self.model_coords, - **self.coords_mutable, } with pm.Model(coords=coords): pm.Deterministic( diff --git a/pyproject.toml b/pyproject.toml index eba88b28e..0c899c71c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "numpy>=1.17", "pandas", # NOTE: Used as minimum pymc version with ci.yml `OLDEST_PYMC_VERSION` - "pymc>=5.12.0,<5.16.0", + "pymc>=5.13.0,<5.16.0", "scikit-learn>=1.1.1", "seaborn>=0.12.2", "xarray", diff --git a/tests/clv/test_distributions.py b/tests/clv/test_distributions.py index c868d3c12..663564669 100644 --- a/tests/clv/test_distributions.py +++ b/tests/clv/test_distributions.py @@ -261,7 +261,7 @@ def test_pareto_nbd_sample_prior( s = pm.Gamma(name="s", alpha=5, beta=1, size=s_size) beta = pm.Gamma(name="beta", alpha=5, beta=1, size=beta_size) - T = pm.MutableData(name="T", value=np.array(10)) + T = pm.Data(name="T", value=np.array(10)) ParetoNBD( name="pareto_nbd", @@ -436,7 +436,7 @@ def test_beta_geo_beta_binom_sample_prior( gamma = pm.Normal(name="gamma", mu=gamma_true, sigma=1e-4, size=gamma_size) delta = pm.Normal(name="delta", mu=delta_true, sigma=1e-4, size=delta_size) - T = pm.MutableData(name="T", value=np.array(T_true)) + T = pm.Data(name="T", value=np.array(T_true)) BetaGeoBetaBinom( name="beta_geo_beta_binom", diff --git a/tests/mmm/test_lift_test.py b/tests/mmm/test_lift_test.py index 679bd4643..118258a20 100644 --- a/tests/mmm/test_lift_test.py +++ b/tests/mmm/test_lift_test.py @@ -325,15 +325,13 @@ def test_add_lift_measurements_before_new_data( channels = ["organic", "paid", "social"] coords = { "channel": channels, - } - coords_mutable = { "date": ["2020-01-01", "2020-01-02", "2020-01-03"], } - with pm.Model(coords=coords, coords_mutable=coords_mutable) as model: + with pm.Model(coords=coords) as model: alpha = pm.HalfNormal("alpha", dims="channel") lam = pm.HalfNormal("lam", dims="channel") - X = pm.Data("X", np.ones((3, 3)), dims=("date", "channel"), mutable=True) + X = pm.Data("X", np.ones((3, 3)), dims=("date", "channel")) pm.Deterministic( "random_operation", X + alpha + lam, diff --git a/tests/test_model_builder.py b/tests/test_model_builder.py index 73bac7af6..130ebf458 100644 --- a/tests/test_model_builder.py +++ b/tests/test_model_builder.py @@ -110,8 +110,8 @@ def build_model(self, X: pd.DataFrame, y: pd.Series, model_config=None): with pm.Model(coords=coords) as self.model: if model_config is None: model_config = self.default_model_config - x = pm.MutableData("x", self.X["input"].values) - y_data = pm.MutableData("y_data", self.y) + x = pm.Data("x", self.X["input"].values) + y_data = pm.Data("y_data", self.y) # prior parameters a_loc = model_config["a"]["loc"]