From c0bab50d87f42faf98f2d164552a75067f175e7b Mon Sep 17 00:00:00 2001 From: Christian Luhmann Date: Thu, 22 Aug 2024 10:55:21 -0400 Subject: [PATCH 1/4] add error when target is in X_df --- pymc_marketing/model_builder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pymc_marketing/model_builder.py b/pymc_marketing/model_builder.py index d243b0309..0d8d149e8 100644 --- a/pymc_marketing/model_builder.py +++ b/pymc_marketing/model_builder.py @@ -548,6 +548,8 @@ def fit( self._generate_and_preprocess_model_data(X, y_df.values.flatten()) if self.X is None or self.y is None: raise ValueError("X and y must be set before calling build_model!") + if self.output_var in X.columns: + raise ValueError(f"X includes a column named '{self.output_var}', which conflicts with the target variable.") if not hasattr(self, "model"): self.build_model(self.X, self.y) From eb5dd795dc2d89d2ec7128bd23eafbf36038e7db Mon Sep 17 00:00:00 2001 From: Christian Luhmann Date: Thu, 22 Aug 2024 11:07:46 -0400 Subject: [PATCH 2/4] add test --- tests/test_model_builder.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_model_builder.py b/tests/test_model_builder.py index 986438bed..978404b8e 100644 --- a/tests/test_model_builder.py +++ b/tests/test_model_builder.py @@ -256,6 +256,14 @@ def test_fit_no_t(toy_X): assert "posterior" in model_builder.idata.groups() +@pytest.mark.xfail +def test_fit_no_t(toy_X): + # create redundant target column in X + toy_X = pd.concat((toy_X, toy_y), axis=1) + model_builder = ModelBuilderTest() + model_builder.fit(X=toy_X, chains=1, draws=100, tune=100) + + @pytest.mark.skipif( sys.platform == "win32", reason="Permissions for temp files not granted on windows CI.", From cd649e38d104de8f30fe474965cfd422a62f82ae Mon Sep 17 00:00:00 2001 From: Christian Luhmann Date: Thu, 22 Aug 2024 11:14:47 -0400 Subject: [PATCH 3/4] rename test --- tests/test_model_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_model_builder.py b/tests/test_model_builder.py index 978404b8e..2aff3d79e 100644 --- a/tests/test_model_builder.py +++ b/tests/test_model_builder.py @@ -257,7 +257,7 @@ def test_fit_no_t(toy_X): @pytest.mark.xfail -def test_fit_no_t(toy_X): +def test_fit_dup_Y(toy_X): # create redundant target column in X toy_X = pd.concat((toy_X, toy_y), axis=1) model_builder = ModelBuilderTest() From a3334c39b4f9d7ffb5b58f53ab8073d68c189934 Mon Sep 17 00:00:00 2001 From: Christian Luhmann Date: Thu, 22 Aug 2024 11:28:16 -0400 Subject: [PATCH 4/4] format --- pymc_marketing/model_builder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pymc_marketing/model_builder.py b/pymc_marketing/model_builder.py index 0d8d149e8..3a7cf7cc1 100644 --- a/pymc_marketing/model_builder.py +++ b/pymc_marketing/model_builder.py @@ -549,7 +549,9 @@ def fit( if self.X is None or self.y is None: raise ValueError("X and y must be set before calling build_model!") if self.output_var in X.columns: - raise ValueError(f"X includes a column named '{self.output_var}', which conflicts with the target variable.") + raise ValueError( + f"X includes a column named '{self.output_var}', which conflicts with the target variable." + ) if not hasattr(self, "model"): self.build_model(self.X, self.y)