pymc-labs · juanitorduz · Mar 8, 2024 · Dec 7, 2023 · Dec 7, 2023 · Dec 7, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -42,7 +42,7 @@ jobs:
       - name: Run tests
         run: |
           pip install -e .[test]
-          pytest --cov-report=xml --no-cov-on-fail --durations=50
+          pytest --mpl --mpl-baseline-path=tests/baseline --cov-report=xml --no-cov-on-fail --durations=50
       - name: Check oldest version of PyMC
         if: ${{ matrix.config.oldest-pymc }}
         run: python -c "import pymc; assert pymc.__version__ == '${{ env.OLDEST_PYMC_VERSION }}'"

diff --git a/pymc_marketing/mmm/base.py b/pymc_marketing/mmm/base.py
@@ -233,6 +233,14 @@ def get_target_transformer(self) -> Pipeline:
             identity_transformer = FunctionTransformer()
             return Pipeline(steps=[("scaler", identity_transformer)])
 
+    @property
+    def prior(self) -> Dataset:
+        if self.idata is None or "prior" not in self.idata:
+            raise RuntimeError(
+                "The model hasn't been fit yet, call .sample_prior_predictive() with extend_idata=True first"
+            )
+        return self.idata["prior"]
+
     @property
     def prior_predictive(self) -> az.InferenceData:
         if self.idata is None or "prior_predictive" not in self.idata:

diff --git a/pymc_marketing/mmm/delayed_saturated_mmm.py b/pymc_marketing/mmm/delayed_saturated_mmm.py
@@ -10,13 +10,13 @@
 import pymc as pm
 import seaborn as sns
 from pytensor.tensor import TensorVariable
-from xarray import DataArray
+from xarray import DataArray, Dataset
 
 from pymc_marketing.mmm.base import MMM
 from pymc_marketing.mmm.preprocessing import MaxAbsScaleChannels, MaxAbsScaleTarget
 from pymc_marketing.mmm.transformers import geometric_adstock, logistic_saturation
 from pymc_marketing.mmm.utils import (
-    apply_sklearn_transformer_across_date,
+    apply_sklearn_transformer_across_dim,
     generate_fourier_modes,
 )
 from pymc_marketing.mmm.validating import ValidateControlColumns
@@ -644,7 +644,6 @@
         data: Dict[str, Union[np.ndarray, Any]] = {
             "channel_data": channel_transformation(new_channel_data)
         }
-
         if self.control_columns is not None:
             control_data = X[self.control_columns].to_numpy()
             control_transformation = (
@@ -850,6 +849,208 @@
         )
         return fig
 
+    def new_spend_contributions(
+        self,
+        spend: Optional[np.ndarray] = None,
+        one_time: bool = True,
+        spend_leading_up: Optional[np.ndarray] = None,
+        prior: bool = False,
+        original_scale: bool = True,
+        **sample_posterior_predictive_kwargs,
+    ) -> DataArray:
+        """Return the upcoming contributions for a given spend.
+
+        Parameters
+        ----------
+        spend : np.ndarray, optional
+            Array of spend for each channel. If None, the average spend for each channel is used, by default None.
+        one_time : bool, optional
+            Whether the spend are one time (at start of period) or constant (over period), by default True (one time)
+        spend_leading_up : np.array, optional
+            Array of spend for each channel leading up to the spend, by default None (no spend leading up)
+        prior : bool, optional
+            Whether to use the prior or posterior, by default False (posterior)
+        **sample_posterior_predictive_kwargs
+            Additional keyword arguments passed to pm.sample_posterior_predictive
+
+        Returns
+        -------
+        DataArray
+            Upcoming contributions for each channel
+
+        """
+        if spend is None:
+            spend = self.X.loc[:, self.channel_columns].mean().to_numpy()  # type: ignore
+
+        if spend_leading_up is None:
+            spend_leading_up = np.zeros_like(spend)
+
+        if len(spend) != len(self.channel_columns):
+            raise ValueError("spend must be the same length as the number of channels")
+
+        if len(spend_leading_up) != len(self.channel_columns):
+            raise ValueError(
+                "spend_leading_up must be the same length as the number of channels"
+            )
+
+        spend_leading_up = np.tile(spend_leading_up, self.adstock_max_lag).reshape(
+            self.adstock_max_lag, -1
+        )
+
+        spend = (
+            np.vstack(
+                [spend, np.zeros((self.adstock_max_lag, len(self.channel_columns)))]
+            )
+            if one_time
+            else np.ones((self.adstock_max_lag + 1, len(self.channel_columns))) * spend
+        )
+
+        new_data = np.vstack(
+            [
+                spend_leading_up,
+                spend,
+            ]
+        )
+
+        new_data = (
+            self.channel_transformer.transform(new_data) if not prior else new_data
+        )
+
+        idata: Dataset = self.fit_result if not prior else self.prior
+
+        coords = {
+            "weeks_since_spend": np.arange(
+                -self.adstock_max_lag, self.adstock_max_lag + 1
+            ),
+            "channel": self.channel_columns,
+        }
+        with pm.Model(coords=coords):
+            alpha = pm.Uniform("alpha", lower=0, upper=1, dims=("channel",))
+            lam = pm.HalfFlat("lam", dims=("channel",))
+            beta_channel = pm.HalfFlat("beta_channel", dims=("channel",))
+
+            channel_adstock = geometric_adstock(
+                x=new_data,
+                alpha=alpha,
+                l_max=self.adstock_max_lag,
+                normalize=True,
+                axis=0,
+            )
+            channel_adstock_saturated = logistic_saturation(x=channel_adstock, lam=lam)
+            pm.Deterministic(
+                name="channel_contributions",
+                var=channel_adstock_saturated * beta_channel,
+                dims=("weeks_since_spend", "channel"),
+            )
+
+            samples = pm.sample_posterior_predictive(
+                idata,
+                var_names=["channel_contributions"],
+                **sample_posterior_predictive_kwargs,
+            )
+
+        channel_contributions = samples.posterior_predictive["channel_contributions"]
+
+        if original_scale:
+            channel_contributions = apply_sklearn_transformer_across_dim(
+                data=channel_contributions,
+                func=self.get_target_transformer().inverse_transform,
+                dim_name="weeks_since_spend",
+                combined=False,
+            )
+
+        return channel_contributions
+
+    def plot_new_spend_contributions(
+        self,
+        spend_amount: float,
+        one_time: bool = True,
+        ax: Optional[plt.Axes] = None,
+        ylabel: str = "Sales",
+        idx: Optional[slice] = None,
+        channels: Optional[List[str]] = None,
+        prior: bool = False,
+        original_scale: bool = True,
+        **sample_posterior_predictive_kwargs,
+    ) -> plt.Axes:
+        """Plot the upcoming sales for a given spend amount.
+
+        Calls the new_spend_contributions method and plots the results. For more
+        control over the plot, use new_spend_contributions directly.
+
+        Parameters
+        ----------
+        spend_amount : float
+            The amount of spend for each channel
+        one_time : bool, optional
+            Whether the spend are one time (at start of period) or constant (over period), by default True (one time)
+        ax : plt.Axes, optional
+            The axes to plot on, by default None or current axes
+        ylabel : str, optional
+            The label for the y-axis, by default "Sales"
+        idx : slice, optional
+            The index slice of days to plot, by default None or only the positive days.
+            More specifically, slice(0, None, None)
+        channels : List[str], optional
+            The channels to plot, by default None or all channels
+        prior : bool, optional
+            Whether to use the prior or posterior, by default False (posterior)
+        original_scale : bool, optional
+            Whether to plot in the original scale of the target variable, by default True
+        **sample_posterior_predictive_kwargs
+            Additional keyword arguments passed to pm.sample_posterior_predictive
+
+        Returns
+        -------
+        plt.Axes
+            The plot of upcoming sales for the given spend amount
+
+        """
+        ax = ax or plt.gca()
+        total_channels = len(self.channel_columns)
+        contributions = self.new_spend_contributions(
+            np.ones(total_channels) * spend_amount,
+            one_time=one_time,
+            spend_leading_up=np.ones(total_channels) * spend_amount,
+            prior=prior,
+            original_scale=original_scale,
+            **sample_posterior_predictive_kwargs,
+        )
+
+        contributions_groupby = contributions.to_series().groupby(
+            level=["weeks_since_spend", "channel"]
+        )
+
+        idx = idx or pd.IndexSlice[0:]
+
+        lower, upper = quantiles = [0.025, 0.975]
+        conf = (
+            contributions_groupby.quantile(quantiles)
+            .unstack("channel")
+            .unstack()
+            .loc[idx]
+        )
+
+        channels = channels or self.channel_columns  # type: ignore
+        for channel in channels:  # type: ignore
+            ax.fill_between(
+                conf.index,
+                conf[channel][lower],
+                conf[channel][upper],
+                label=f"{channel} {100 * (upper - lower):.0f}% CI",
+                alpha=0.5,
+            )
+        mean = contributions_groupby.mean().unstack("channel").loc[idx, channels]
+        color = [f"C{i}" for i in range(len(channels))]  # type: ignore
+        mean.add_suffix(" mean").plot(ax=ax, color=color, alpha=0.75)
+        ax.legend().set_title("Channel")
+        ax.set(
+            xlabel="Weeks since spend",
+            ylabel=ylabel,
+            title=f"Upcoming sales for {spend_amount:.02f} spend",
+        )
+        return ax
+
     def _validate_data(self, X, y=None):
         return X
 
@@ -910,9 +1111,10 @@
             )
 
         if original_scale:
-            posterior_predictive_samples = apply_sklearn_transformer_across_date(
+            posterior_predictive_samples = apply_sklearn_transformer_across_dim(
                 data=posterior_predictive_samples,
                 func=self.get_target_transformer().inverse_transform,
+                dim_name="date",
                 combined=combined,
             )
 

diff --git a/pymc_marketing/mmm/utils.py b/pymc_marketing/mmm/utils.py
@@ -289,9 +289,10 @@ def standardize_scenarios_dict_keys(d: Dict, keywords: List[str]):
                 break
 
 
-def apply_sklearn_transformer_across_date(
+def apply_sklearn_transformer_across_dim(
     data: xr.DataArray,
     func: Callable[[np.ndarray], np.ndarray],
+    dim_name: str,
     combined: bool = False,
 ) -> xr.DataArray:
     """Helper function in order to use scikit-learn functions with the xarray target.
@@ -300,6 +301,7 @@ def apply_sklearn_transformer_across_date(
     ----------
     data :
     func : scikit-learn method to apply to the data
+    dim_name : Name of the dimension to apply the function to
     combined : Flag to indicate if the data coords have been combined or not
 
     Returns
@@ -318,8 +320,8 @@ def apply_sklearn_transformer_across_date(
         data = xr.apply_ufunc(
             func,
             data.expand_dims(dim={"_": 1}, axis=1),
-            input_core_dims=[["date", "_"]],
-            output_core_dims=[["date", "_"]],
+            input_core_dims=[[dim_name, "_"]],
+            output_core_dims=[[dim_name, "_"]],
             vectorize=True,
         ).squeeze(dim="_")
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -38,7 +38,12 @@ docs = [
     "sphinx-design",
 ]
 lint = ["mypy", "pandas-stubs", "pre-commit>=2.19.0", "ruff>=0.1.4"]
-test = ["lifetimes==0.11.3", "pytest==7.0.1", "pytest-cov==3.0.0"]
+test = [
+    "lifetimes==0.11.3",
+    "pytest==7.0.1",
+    "pytest-cov==3.0.0",
+    "pytest-mpl==0.16.1",
+]
 
 [tool.setuptools]
 packages = [

diff --git a/tests/baseline/test_plot_new_spend_contributions_original_scale.png b/tests/baseline/test_plot_new_spend_contributions_original_scale.png
diff --git a/tests/baseline/test_plot_new_spend_contributions_prior.png b/tests/baseline/test_plot_new_spend_contributions_prior.png
diff --git a/tests/baseline/test_plot_new_spend_contributions_prior_select_channels.png b/tests/baseline/test_plot_new_spend_contributions_prior_select_channels.png
diff --git a/tests/mmm/test_base.py b/tests/mmm/test_base.py
@@ -269,3 +269,17 @@ def test_calling_fit_result_before_fit_raises_error(test_mmm, toy_X, toy_y):
     test_mmm.fit_result
     assert test_mmm.idata is not None
     assert "posterior" in test_mmm.idata
+
+
+def test_calling_prior_before_sample_prior_predictive_raises_error(
+    test_mmm, toy_X, toy_y
+):
+    # Arrange
+    test_mmm.idata = None
+    with pytest.raises(
+        RuntimeError,
+        match=re.escape(
+            "The model hasn't been fit yet, call .sample_prior_predictive() with extend_idata=True first"
+        ),
+    ):
+        test_mmm.prior