Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New spends forward pass #456

Merged
merged 41 commits into from
Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
d88f77a
current status as method
wd60622 Dec 7, 2023
32f9d02
format
wd60622 Dec 7, 2023
65f4153
Update version.txt
juanitorduz Dec 7, 2023
d00cc75
Implement different convolution modes (#454)
abdalazizrashid Dec 7, 2023
ea1c707
Add PR template
ricardoV94 Dec 7, 2023
2355584
Update pull_request_template.md
ricardoV94 Dec 11, 2023
66d5816
Fix issues in index example
ricardoV94 Dec 28, 2023
a63a12c
Update .pre-commit-config.yaml
juanitorduz Jan 4, 2024
cc11015
Update .pre-commit-config.yaml
juanitorduz Jan 5, 2024
2bcdd2e
move from other PR
wd60622 Dec 9, 2023
c11ac0e
put legend on side
wd60622 Jan 6, 2024
2f3eb53
Optimisation in customer_lifetime_value when discount_rate == 0 (#468)
vincent-grosbois Jan 6, 2024
5bd2eb6
Update README.md
juanitorduz Jan 7, 2024
f6ba1aa
add support for pre-commit-ci
juanitorduz Jan 6, 2024
4cf694e
add isort
juanitorduz Jan 9, 2024
0a8e241
modify autosummary templates
OriolAbril Jan 12, 2024
a75b968
Rename `clv_summary` to `rfm_summary` and extend functionality (#479)
ColtAllen Jan 15, 2024
a979839
Update version.txt
ricardoV94 Jan 15, 2024
def644f
improve ruff
juanitorduz Jan 15, 2024
8f271de
[pre-commit.ci] pre-commit autoupdate
pre-commit-ci[bot] Jan 22, 2024
e6bbed9
resolve conflict
wd60622 Jan 27, 2024
1157072
Add baselined saturation (#498)
ferrine Jan 26, 2024
fa16200
Swap Before and After convolution modes as per #489 (#501)
abdalazizrashid Jan 26, 2024
f4c2de9
resolve conflict
wd60622 Jan 27, 2024
eecbcaa
add dim_name arg
wd60622 Jan 27, 2024
dffc183
Merge branch 'main' into new-spends
wd60622 Jan 27, 2024
6a77fcf
add seed to tests and test methods
wd60622 Jan 28, 2024
3ece500
add slice as type hint
wd60622 Feb 6, 2024
ee3398a
use slice in docstring
wd60622 Feb 6, 2024
9edbb3b
defaults to mean for each channel
wd60622 Feb 8, 2024
ae183ce
add non-negative check
wd60622 Feb 8, 2024
4bbbcf3
ax as last arg
wd60622 Feb 11, 2024
22e6ce6
change weeks -> time
wd60622 Feb 11, 2024
7bd8d47
parameterize quantiles
wd60622 Feb 11, 2024
f0d8133
separate out and add to docs
wd60622 Feb 11, 2024
6aa264e
rerun the baseline images
wd60622 Feb 11, 2024
acc839f
mock the prior
wd60622 Mar 6, 2024
02e967b
add new images from latest env
wd60622 Mar 8, 2024
3deb8dc
migrate to toml instead of ci/cd
wd60622 Mar 8, 2024
28a7b2c
test only is axes
wd60622 Mar 8, 2024
a523f00
remove the images
wd60622 Mar 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
- name: Run tests
run: |
pip install -e .[test]
pytest --cov-report=xml --no-cov-on-fail --durations=50
pytest --mpl --mpl-baseline-path=tests/baseline --cov-report=xml --no-cov-on-fail --durations=50
- name: Check oldest version of PyMC
if: ${{ matrix.config.oldest-pymc }}
run: python -c "import pymc; assert pymc.__version__ == '${{ env.OLDEST_PYMC_VERSION }}'"
Expand Down
8 changes: 8 additions & 0 deletions pymc_marketing/mmm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,14 @@ def get_target_transformer(self) -> Pipeline:
identity_transformer = FunctionTransformer()
return Pipeline(steps=[("scaler", identity_transformer)])

@property
def prior(self) -> Dataset:
if self.idata is None or "prior" not in self.idata:
raise RuntimeError(
"The model hasn't been fit yet, call .sample_prior_predictive() with extend_idata=True first"
)
return self.idata["prior"]

@property
def prior_predictive(self) -> az.InferenceData:
if self.idata is None or "prior_predictive" not in self.idata:
Expand Down
210 changes: 206 additions & 4 deletions pymc_marketing/mmm/delayed_saturated_mmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
import pymc as pm
import seaborn as sns
from pytensor.tensor import TensorVariable
from xarray import DataArray
from xarray import DataArray, Dataset

from pymc_marketing.mmm.base import MMM
from pymc_marketing.mmm.preprocessing import MaxAbsScaleChannels, MaxAbsScaleTarget
from pymc_marketing.mmm.transformers import geometric_adstock, logistic_saturation
from pymc_marketing.mmm.utils import (
apply_sklearn_transformer_across_date,
apply_sklearn_transformer_across_dim,
generate_fourier_modes,
)
from pymc_marketing.mmm.validating import ValidateControlColumns
Expand Down Expand Up @@ -644,7 +644,6 @@
data: Dict[str, Union[np.ndarray, Any]] = {
"channel_data": channel_transformation(new_channel_data)
}

if self.control_columns is not None:
control_data = X[self.control_columns].to_numpy()
control_transformation = (
Expand Down Expand Up @@ -850,6 +849,208 @@
)
return fig

def new_spend_contributions(
self,
spend: Optional[np.ndarray] = None,
one_time: bool = True,
spend_leading_up: Optional[np.ndarray] = None,
prior: bool = False,
original_scale: bool = True,
**sample_posterior_predictive_kwargs,
) -> DataArray:
"""Return the upcoming contributions for a given spend.
wd60622 marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
spend : np.ndarray, optional
Array of spend for each channel. If None, the average spend for each channel is used, by default None.
one_time : bool, optional
Whether the spend are one time (at start of period) or constant (over period), by default True (one time)
wd60622 marked this conversation as resolved.
Show resolved Hide resolved
spend_leading_up : np.array, optional
Array of spend for each channel leading up to the spend, by default None (no spend leading up)
prior : bool, optional
Whether to use the prior or posterior, by default False (posterior)
**sample_posterior_predictive_kwargs
Additional keyword arguments passed to pm.sample_posterior_predictive

Returns
-------
DataArray
Upcoming contributions for each channel

"""
if spend is None:
spend = self.X.loc[:, self.channel_columns].mean().to_numpy() # type: ignore

Check warning on line 883 in pymc_marketing/mmm/delayed_saturated_mmm.py

View check run for this annotation

Codecov / codecov/patch

pymc_marketing/mmm/delayed_saturated_mmm.py#L883

Added line #L883 was not covered by tests

if spend_leading_up is None:
spend_leading_up = np.zeros_like(spend)

if len(spend) != len(self.channel_columns):
raise ValueError("spend must be the same length as the number of channels")

Check warning on line 889 in pymc_marketing/mmm/delayed_saturated_mmm.py

View check run for this annotation

Codecov / codecov/patch

pymc_marketing/mmm/delayed_saturated_mmm.py#L889

Added line #L889 was not covered by tests

if len(spend_leading_up) != len(self.channel_columns):
raise ValueError(

Check warning on line 892 in pymc_marketing/mmm/delayed_saturated_mmm.py

View check run for this annotation

Codecov / codecov/patch

pymc_marketing/mmm/delayed_saturated_mmm.py#L892

Added line #L892 was not covered by tests
"spend_leading_up must be the same length as the number of channels"
)

spend_leading_up = np.tile(spend_leading_up, self.adstock_max_lag).reshape(
self.adstock_max_lag, -1
)

spend = (
np.vstack(
[spend, np.zeros((self.adstock_max_lag, len(self.channel_columns)))]
)
if one_time
else np.ones((self.adstock_max_lag + 1, len(self.channel_columns))) * spend
)
wd60622 marked this conversation as resolved.
Show resolved Hide resolved

new_data = np.vstack(
[
spend_leading_up,
spend,
]
)

new_data = (
self.channel_transformer.transform(new_data) if not prior else new_data
)

idata: Dataset = self.fit_result if not prior else self.prior

coords = {
"weeks_since_spend": np.arange(
-self.adstock_max_lag, self.adstock_max_lag + 1
),
"channel": self.channel_columns,
}
with pm.Model(coords=coords):
alpha = pm.Uniform("alpha", lower=0, upper=1, dims=("channel",))
lam = pm.HalfFlat("lam", dims=("channel",))
beta_channel = pm.HalfFlat("beta_channel", dims=("channel",))
wd60622 marked this conversation as resolved.
Show resolved Hide resolved

channel_adstock = geometric_adstock(
x=new_data,
alpha=alpha,
l_max=self.adstock_max_lag,
normalize=True,
axis=0,
)
channel_adstock_saturated = logistic_saturation(x=channel_adstock, lam=lam)
pm.Deterministic(
name="channel_contributions",
var=channel_adstock_saturated * beta_channel,
dims=("weeks_since_spend", "channel"),
)

samples = pm.sample_posterior_predictive(
idata,
var_names=["channel_contributions"],
**sample_posterior_predictive_kwargs,
)

channel_contributions = samples.posterior_predictive["channel_contributions"]

if original_scale:
channel_contributions = apply_sklearn_transformer_across_dim(
data=channel_contributions,
func=self.get_target_transformer().inverse_transform,
dim_name="weeks_since_spend",
combined=False,
)

return channel_contributions

def plot_new_spend_contributions(
self,
spend_amount: float,
one_time: bool = True,
ax: Optional[plt.Axes] = None,
wd60622 marked this conversation as resolved.
Show resolved Hide resolved
ylabel: str = "Sales",
idx: Optional[slice] = None,
channels: Optional[List[str]] = None,
prior: bool = False,
original_scale: bool = True,
**sample_posterior_predictive_kwargs,
) -> plt.Axes:
"""Plot the upcoming sales for a given spend amount.

Calls the new_spend_contributions method and plots the results. For more
control over the plot, use new_spend_contributions directly.

Parameters
----------
wd60622 marked this conversation as resolved.
Show resolved Hide resolved
spend_amount : float
The amount of spend for each channel
one_time : bool, optional
Whether the spend are one time (at start of period) or constant (over period), by default True (one time)
ax : plt.Axes, optional
The axes to plot on, by default None or current axes
ylabel : str, optional
The label for the y-axis, by default "Sales"
idx : slice, optional
The index slice of days to plot, by default None or only the positive days.
More specifically, slice(0, None, None)
channels : List[str], optional
The channels to plot, by default None or all channels
prior : bool, optional
Whether to use the prior or posterior, by default False (posterior)
original_scale : bool, optional
Whether to plot in the original scale of the target variable, by default True
**sample_posterior_predictive_kwargs
Additional keyword arguments passed to pm.sample_posterior_predictive

Returns
-------
plt.Axes
The plot of upcoming sales for the given spend amount

"""
ax = ax or plt.gca()
total_channels = len(self.channel_columns)
contributions = self.new_spend_contributions(
np.ones(total_channels) * spend_amount,
one_time=one_time,
spend_leading_up=np.ones(total_channels) * spend_amount,
prior=prior,
original_scale=original_scale,
**sample_posterior_predictive_kwargs,
)

contributions_groupby = contributions.to_series().groupby(
level=["weeks_since_spend", "channel"]
wd60622 marked this conversation as resolved.
Show resolved Hide resolved
)

idx = idx or pd.IndexSlice[0:]

lower, upper = quantiles = [0.025, 0.975]
wd60622 marked this conversation as resolved.
Show resolved Hide resolved
conf = (
contributions_groupby.quantile(quantiles)
.unstack("channel")
.unstack()
.loc[idx]
)

channels = channels or self.channel_columns # type: ignore
for channel in channels: # type: ignore
ax.fill_between(
conf.index,
conf[channel][lower],
conf[channel][upper],
label=f"{channel} {100 * (upper - lower):.0f}% CI",
alpha=0.5,
)
mean = contributions_groupby.mean().unstack("channel").loc[idx, channels]
color = [f"C{i}" for i in range(len(channels))] # type: ignore
mean.add_suffix(" mean").plot(ax=ax, color=color, alpha=0.75)
ax.legend().set_title("Channel")
ax.set(
xlabel="Weeks since spend",
ylabel=ylabel,
title=f"Upcoming sales for {spend_amount:.02f} spend",
)
return ax

def _validate_data(self, X, y=None):
return X

Expand Down Expand Up @@ -910,9 +1111,10 @@
)

if original_scale:
posterior_predictive_samples = apply_sklearn_transformer_across_date(
posterior_predictive_samples = apply_sklearn_transformer_across_dim(
data=posterior_predictive_samples,
func=self.get_target_transformer().inverse_transform,
dim_name="date",
combined=combined,
)

Expand Down
8 changes: 5 additions & 3 deletions pymc_marketing/mmm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,10 @@ def standardize_scenarios_dict_keys(d: Dict, keywords: List[str]):
break


def apply_sklearn_transformer_across_date(
def apply_sklearn_transformer_across_dim(
data: xr.DataArray,
func: Callable[[np.ndarray], np.ndarray],
dim_name: str,
combined: bool = False,
) -> xr.DataArray:
"""Helper function in order to use scikit-learn functions with the xarray target.
Expand All @@ -300,6 +301,7 @@ def apply_sklearn_transformer_across_date(
----------
data :
func : scikit-learn method to apply to the data
dim_name : Name of the dimension to apply the function to
combined : Flag to indicate if the data coords have been combined or not

Returns
Expand All @@ -318,8 +320,8 @@ def apply_sklearn_transformer_across_date(
data = xr.apply_ufunc(
func,
data.expand_dims(dim={"_": 1}, axis=1),
input_core_dims=[["date", "_"]],
output_core_dims=[["date", "_"]],
input_core_dims=[[dim_name, "_"]],
output_core_dims=[[dim_name, "_"]],
vectorize=True,
).squeeze(dim="_")

Expand Down
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@ docs = [
"sphinx-design",
]
lint = ["mypy", "pandas-stubs", "pre-commit>=2.19.0", "ruff>=0.1.4"]
test = ["lifetimes==0.11.3", "pytest==7.0.1", "pytest-cov==3.0.0"]
test = [
"lifetimes==0.11.3",
"pytest==7.0.1",
"pytest-cov==3.0.0",
"pytest-mpl==0.16.1",
]

[tool.setuptools]
packages = [
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 14 additions & 0 deletions tests/mmm/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,17 @@ def test_calling_fit_result_before_fit_raises_error(test_mmm, toy_X, toy_y):
test_mmm.fit_result
assert test_mmm.idata is not None
assert "posterior" in test_mmm.idata


def test_calling_prior_before_sample_prior_predictive_raises_error(
test_mmm, toy_X, toy_y
):
# Arrange
test_mmm.idata = None
with pytest.raises(
RuntimeError,
match=re.escape(
"The model hasn't been fit yet, call .sample_prior_predictive() with extend_idata=True first"
),
):
test_mmm.prior
Loading
Loading