From 426307748fd51531a1ea14f4fa42ddbad495662f Mon Sep 17 00:00:00 2001 From: Abdalaziz Rashid Date: Thu, 25 Jan 2024 15:00:17 +0000 Subject: [PATCH 01/11] Add Weibull transformations --- pymc_marketing/mmm/transformers.py | 89 ++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py index afc388310..3e068e255 100644 --- a/pymc_marketing/mmm/transformers.py +++ b/pymc_marketing/mmm/transformers.py @@ -3,6 +3,7 @@ import numpy as np import numpy.typing as npt +import pymc as pm import pytensor.tensor as pt from pytensor.tensor.random.utils import params_broadcast_shapes @@ -13,6 +14,11 @@ class ConvMode(Enum): Overlap = "Overlap" +class WeibullType(Enum): + PDF = "PDF" + CDF = "CDF" + + def batched_convolution(x, w, axis: int = 0, mode: ConvMode = ConvMode.Before): R"""Apply a 1D convolution in a vectorized way across multiple batch dimensions. @@ -260,6 +266,89 @@ def delayed_adstock( return batched_convolution(x, w, axis=axis) +def weibull_adstock( + x, + lam=1, + k=1, + l_max: int = 12, + axis: int = 0, + type: WeibullType = WeibullType.PDF, +): + R"""Weibull Adstocking Transformation. + + This transformation is similar to geometric adstock transformation but has more degrees of freedom, adding more flexibility. + .. plot:: + :context: close-figs + spend = np.zeros(50) + spend[1] = 1 + + shapes = [0.1, 1, 5, 10] + scales = [0.1, 0.4, 1, 2] + modes = [WeibullType.PDF, WeibullType.CDF] + + fig, axes = plt.subplots( + len(shapes), len(modes), figsize=(12, 8), sharex=True, sharey=True + ) + fig.suptitle("Effect of Changing Weibull Adstock Parameters", fontsize=16) + + for m, mode in enumerate(modes): + axes[0, m].set_title(f"Mode: {mode.value}") + + for i, shape in enumerate(shapes): + for j, scale in enumerate(scales): + adstock = weibull_adstock( + spend, lam=scale, k=shape, type=mode, l_max=len(spend) + ).eval() + + axes[i, m].plot( + x, + adstock, + label=f"Scale={scale}", + linestyle="-", + ) + + fig.legend( + *axes[0, 0].get_legend_handles_labels(), + loc="center right", + bbox_to_anchor=(1.1, 0.85), + ) + + plt.tight_layout(rect=[0, 0, 0.9, 1]) + plt.show() + + Parameters + ---------- + x : tensor + Input tensor. + lam : float, by default 1. + Scale parameter of the Weibull distribution. Must be positive. + k : float, by default 1. + Shape parameter of the Weibull distribution. Must be positive. + l_max : int, by default 12 + Maximum duration of carryover effect. + type : WeibullType, by default WeibullType.PDF + Type of Weibull adstock transformation to be applied (PDF or CDF). + + Returns + ------- + tensor + Transformed tensor based on Weibull adstock transformation. + """ + + if type == WeibullType.PDF: + w = pt.exp(pm.Weibull.logp(pt.arange(l_max, dtype=x.dtype) + 1, lam, k)) + w = (w - pt.min(w)) / (pt.max(w) - pt.min(w)) + + elif type == WeibullType.CDF: + w = 1 - pt.exp(pm.Weibull.logcdf(pt.arange(l_max, dtype=x.dtype) + 1, lam, k)) + w = pt.cumprod( + pt.concatenate([pt.ones(1, dtype=x.dtype), w], axis=axis), axis=axis + ) + else: + raise ValueError(f"Wrong WeibullType: {type}, expected of WeibullType") + return batched_convolution(x, w, axis=axis) + + def logistic_saturation(x, lam: Union[npt.NDArray[np.float_], float] = 0.5): """Logistic saturation transformation. From 039e876010f77c49baae975c3e6c6597e209b275 Mon Sep 17 00:00:00 2001 From: Abdalaziz Rashid Date: Thu, 25 Jan 2024 23:37:30 +0000 Subject: [PATCH 02/11] Add Weibull tests --- tests/mmm/test_transformers.py | 83 ++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/tests/mmm/test_transformers.py b/tests/mmm/test_transformers.py index cf2336e25..e913d3b0a 100644 --- a/tests/mmm/test_transformers.py +++ b/tests/mmm/test_transformers.py @@ -2,15 +2,18 @@ import pytensor import pytensor.tensor as pt import pytest +import scipy as sp from pytensor.tensor.var import TensorVariable from pymc_marketing.mmm.transformers import ( ConvMode, + WeibullType, batched_convolution, delayed_adstock, geometric_adstock, logistic_saturation, tanh_saturation, + weibull_adstock, ) @@ -166,6 +169,86 @@ def test_delayed_adstock_vectorized(self, dummy_design_matrix): assert y.shape == x.shape np.testing.assert_almost_equal(actual=y, desired=ys, decimal=12) + def test_weibull_adstock_output_type(self): + x = np.ones(shape=(100)) + y = weibull_adstock(x=x, lam=1, k=1, l_max=7, type=WeibullType.PDF) + assert isinstance(y, TensorVariable) + assert isinstance(y.eval(), np.ndarray) + + def test_weibull_adstock_x_zero(self): + x = np.zeros(shape=(100)) + y = weibull_adstock(x=x, lam=1, k=1, l_max=4, type=WeibullType.PDF) + np.testing.assert_array_equal(x=x, y=y.eval()) + + @pytest.mark.parametrize( + "x, lam, k, l_max", + [ + (np.ones(shape=(100)), 0.3, 0.5, 10), + (np.ones(shape=(100)), 0.7, 1, 100), + (np.zeros(shape=(100)), 0.2, 0.2, 5), + (np.ones(shape=(100)), 0.5, 0.8, 7), + (np.linspace(start=0.0, stop=1.0, num=50), 0.8, 1.5, 3), + (np.linspace(start=0.0, stop=1.0, num=50), 0.8, 1, 50), + ], + ) + def test_weibull_pdf_adstock(self, x, lam, k, l_max): + y = weibull_adstock(x=x, lam=lam, k=k, l_max=l_max, type=WeibullType.PDF).eval() + + assert np.all(np.isfinite(y)) + w = sp.stats.weibull_min.pdf(np.arange(l_max) + 1, c=k, scale=lam) + w = (w - np.min(w)) / (np.max(w) - np.min(w)) + sp_y = batched_convolution(x, w).eval() + + np.testing.assert_almost_equal(y, sp_y) + + @pytest.mark.parametrize( + "x, lam, k, l_max", + [ + (np.ones(shape=(100)), 0.3, 0.5, 10), + (np.ones(shape=(100)), 0.7, 1, 100), + (np.zeros(shape=(100)), 0.2, 0.2, 5), + (np.ones(shape=(100)), 0.5, 0.8, 7), + (np.linspace(start=0.0, stop=1.0, num=50), 0.8, 1.5, 3), + (np.linspace(start=0.0, stop=1.0, num=50), 0.8, 1, 50), + ], + ) + def test_weibull_cdf_adsotck(self, x, lam, k, l_max): + y = weibull_adstock(x=x, lam=lam, k=k, l_max=l_max, type=WeibullType.CDF).eval() + + assert np.all(np.isfinite(y)) + w = 1 - sp.stats.weibull_min.cdf(np.arange(l_max) + 1, c=k, scale=lam) + w = sp.cumprod(np.concatenate([[1], w])) + sp_y = batched_convolution(x, w).eval() + np.testing.assert_almost_equal(y, sp_y) + + @pytest.mark.parametrize( + "type", + [ + WeibullType.PDF, + WeibullType.CDF, + ], + ) + def test_weibull_adstock_vectorized(self, type, dummy_design_matrix): + x = dummy_design_matrix.copy() + x_tensor = pt.as_tensor_variable(x) + lam = [0.9, 0.33, 0.5, 0.1, 1.0] + lam_tensor = pt.as_tensor_variable(lam) + k = [0.8, 0.2, 0.6, 0.4, 1.0] + k_tensor = pt.as_tensor_variable(k) + y = weibull_adstock( + x=x_tensor, lam=lam_tensor, k=k_tensor, l_max=12, type=type + ).eval() + + y_tensors = [ + weibull_adstock( + x=x_tensor[:, i], lam=lam_tensor[i], k=k_tensor[i], l_max=12, type=type + ) + for i in range(x.shape[1]) + ] + ys = np.concatenate([y_t.eval()[..., None] for y_t in y_tensors], axis=1) + assert y.shape == x.shape + np.testing.assert_almost_equal(actual=y, desired=ys, decimal=12) + class TestSaturationTransformers: def test_logistic_saturation_lam_zero(self): From fb35d50279d881e23248215e4533c99da3f35a1f Mon Sep 17 00:00:00 2001 From: Abdalaziz Rashid Date: Fri, 26 Jan 2024 00:16:04 +0000 Subject: [PATCH 03/11] Update --- pymc_marketing/mmm/transformers.py | 55 +++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py index 3e068e255..62c98f735 100644 --- a/pymc_marketing/mmm/transformers.py +++ b/pymc_marketing/mmm/transformers.py @@ -279,11 +279,17 @@ def weibull_adstock( This transformation is similar to geometric adstock transformation but has more degrees of freedom, adding more flexibility. .. plot:: :context: close-figs + import matplotlib.pyplot as plt + import numpy as np + import arviz as az + from pymc_marketing.mmm.transformers import WeibullType, weibull_adstock + plt.style.use('arviz-darkgrid') + spend = np.zeros(50) - spend[1] = 1 + spend[0] = 1 - shapes = [0.1, 1, 5, 10] - scales = [0.1, 0.4, 1, 2] + shapes = [0.5, 1., 1.5, 5.] + scales = [10, 20, 40] modes = [WeibullType.PDF, WeibullType.CDF] fig, axes = plt.subplots( @@ -301,21 +307,24 @@ def weibull_adstock( ).eval() axes[i, m].plot( - x, + np.arange(len(spend)), adstock, label=f"Scale={scale}", linestyle="-", + alpha=0.5 ) fig.legend( *axes[0, 0].get_legend_handles_labels(), loc="center right", - bbox_to_anchor=(1.1, 0.85), + bbox_to_anchor=(1.2, 0.85), ) plt.tight_layout(rect=[0, 0, 0.9, 1]) plt.show() + + Parameters ---------- x : tensor @@ -334,20 +343,42 @@ def weibull_adstock( tensor Transformed tensor based on Weibull adstock transformation. """ + lam = pt.as_tensor(lam)[..., None] + k = pt.as_tensor(k)[..., None] + t = pt.arange(l_max, dtype=x.dtype) + 1 if type == WeibullType.PDF: - w = pt.exp(pm.Weibull.logp(pt.arange(l_max, dtype=x.dtype) + 1, lam, k)) - w = (w - pt.min(w)) / (pt.max(w) - pt.min(w)) - - elif type == WeibullType.CDF: - w = 1 - pt.exp(pm.Weibull.logcdf(pt.arange(l_max, dtype=x.dtype) + 1, lam, k)) - w = pt.cumprod( - pt.concatenate([pt.ones(1, dtype=x.dtype), w], axis=axis), axis=axis + w = pt.exp(pm.Weibull.logp(t, k, lam)) + w = (w - pt.min(w, axis=-1)[..., None]) / ( + pt.max(w, axis=-1)[..., None] - pt.min(w, axis=-1)[..., None] ) + elif type == WeibullType.CDF: + w = 1 - pt.exp(pm.Weibull.logcdf(t, k, lam)) + shape = (*w.shape[:-1], w.shape[-1] + 1) + padded_w = pt.ones(shape, dtype=w.dtype) + padded_w = pt.set_subtensor(padded_w[..., 1:], w) + w = pt.cumprod(padded_w, axis=-1) else: raise ValueError(f"Wrong WeibullType: {type}, expected of WeibullType") return batched_convolution(x, w, axis=axis) + # lam = pt.as_tensor(lam) + # k = pt.as_tensor(k) + # t = pt.arange(l_max, dtype=x.dtype) + 1 + + # if type == WeibullType.PDF: + # w = pt.exp(pm.Weibull.logp(t, k, lam)) + # w = (w - pt.min(w, axis=-1)) / (pt.max(w, axis=-1) - pt.min(w, axis=-1)) + # elif type == WeibullType.CDF: + # w = 1 - pt.exp(pm.Weibull.logcdf(t, k, lam)) + # shape = (*w.shape[:-1], w.shape[-1] + 1) + # padded_w = pt.ones(shape, dtype=w.dtype) + # padded_w = pt.set_subtensor(padded_w[..., 1:], w) + # w = pt.cumprod(padded_w, axis=-1) + # else: + # raise ValueError(f"Wrong WeibullType: {type}, expected of WeibullType") + # return batched_convolution(x, w, axis=axis) + def logistic_saturation(x, lam: Union[npt.NDArray[np.float_], float] = 0.5): """Logistic saturation transformation. From 3efacdffe10b9c5bd1ccad3998b39f292d8b8afc Mon Sep 17 00:00:00 2001 From: Abdalaziz Rashid Date: Fri, 26 Jan 2024 00:17:04 +0000 Subject: [PATCH 04/11] Update --- pymc_marketing/mmm/transformers.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py index 62c98f735..f9f94e52e 100644 --- a/pymc_marketing/mmm/transformers.py +++ b/pymc_marketing/mmm/transformers.py @@ -362,23 +362,6 @@ def weibull_adstock( raise ValueError(f"Wrong WeibullType: {type}, expected of WeibullType") return batched_convolution(x, w, axis=axis) - # lam = pt.as_tensor(lam) - # k = pt.as_tensor(k) - # t = pt.arange(l_max, dtype=x.dtype) + 1 - - # if type == WeibullType.PDF: - # w = pt.exp(pm.Weibull.logp(t, k, lam)) - # w = (w - pt.min(w, axis=-1)) / (pt.max(w, axis=-1) - pt.min(w, axis=-1)) - # elif type == WeibullType.CDF: - # w = 1 - pt.exp(pm.Weibull.logcdf(t, k, lam)) - # shape = (*w.shape[:-1], w.shape[-1] + 1) - # padded_w = pt.ones(shape, dtype=w.dtype) - # padded_w = pt.set_subtensor(padded_w[..., 1:], w) - # w = pt.cumprod(padded_w, axis=-1) - # else: - # raise ValueError(f"Wrong WeibullType: {type}, expected of WeibullType") - # return batched_convolution(x, w, axis=axis) - def logistic_saturation(x, lam: Union[npt.NDArray[np.float_], float] = 0.5): """Logistic saturation transformation. From 6655cc811d25ac1550ef2960ef6f739fa67f6230 Mon Sep 17 00:00:00 2001 From: Abdalaziz Rashid Date: Fri, 26 Jan 2024 11:53:01 +0000 Subject: [PATCH 05/11] Docs fix --- pymc_marketing/mmm/transformers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py index f9f94e52e..64413dcbc 100644 --- a/pymc_marketing/mmm/transformers.py +++ b/pymc_marketing/mmm/transformers.py @@ -277,6 +277,7 @@ def weibull_adstock( R"""Weibull Adstocking Transformation. This transformation is similar to geometric adstock transformation but has more degrees of freedom, adding more flexibility. + .. plot:: :context: close-figs import matplotlib.pyplot as plt @@ -311,7 +312,6 @@ def weibull_adstock( adstock, label=f"Scale={scale}", linestyle="-", - alpha=0.5 ) fig.legend( From 78b6b9ea44689954db312ad35b58893cc9351312 Mon Sep 17 00:00:00 2001 From: Abdalaziz Rashid Date: Fri, 26 Jan 2024 11:59:45 +0000 Subject: [PATCH 06/11] Add support for strings in type specs --- pymc_marketing/mmm/transformers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py index 64413dcbc..b9635917e 100644 --- a/pymc_marketing/mmm/transformers.py +++ b/pymc_marketing/mmm/transformers.py @@ -14,7 +14,8 @@ class ConvMode(Enum): Overlap = "Overlap" -class WeibullType(Enum): +class WeibullType(str, Enum): + # TODO: use StrEnum when we upgrade to python 3.11 PDF = "PDF" CDF = "CDF" @@ -272,7 +273,7 @@ def weibull_adstock( k=1, l_max: int = 12, axis: int = 0, - type: WeibullType = WeibullType.PDF, + type: WeibullType | str = WeibullType.PDF, ): R"""Weibull Adstocking Transformation. From 04707223b08c0b881deb617cd23878798a81fd30 Mon Sep 17 00:00:00 2001 From: Abdalaziz Rashid Date: Fri, 26 Jan 2024 12:25:05 +0000 Subject: [PATCH 07/11] Improve tests --- tests/mmm/test_transformers.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/tests/mmm/test_transformers.py b/tests/mmm/test_transformers.py index e913d3b0a..938911045 100644 --- a/tests/mmm/test_transformers.py +++ b/tests/mmm/test_transformers.py @@ -1,3 +1,5 @@ +from contextlib import nullcontext as does_not_raise + import numpy as np import pytensor import pytensor.tensor as pt @@ -169,20 +171,10 @@ def test_delayed_adstock_vectorized(self, dummy_design_matrix): assert y.shape == x.shape np.testing.assert_almost_equal(actual=y, desired=ys, decimal=12) - def test_weibull_adstock_output_type(self): - x = np.ones(shape=(100)) - y = weibull_adstock(x=x, lam=1, k=1, l_max=7, type=WeibullType.PDF) - assert isinstance(y, TensorVariable) - assert isinstance(y.eval(), np.ndarray) - - def test_weibull_adstock_x_zero(self): - x = np.zeros(shape=(100)) - y = weibull_adstock(x=x, lam=1, k=1, l_max=4, type=WeibullType.PDF) - np.testing.assert_array_equal(x=x, y=y.eval()) - @pytest.mark.parametrize( "x, lam, k, l_max", [ + (np.zeros(shape=(100)), 1, 1, 4), (np.ones(shape=(100)), 0.3, 0.5, 10), (np.ones(shape=(100)), 0.7, 1, 100), (np.zeros(shape=(100)), 0.2, 0.2, 5), @@ -204,6 +196,7 @@ def test_weibull_pdf_adstock(self, x, lam, k, l_max): @pytest.mark.parametrize( "x, lam, k, l_max", [ + (np.zeros(shape=(100)), 1, 1, 4), (np.ones(shape=(100)), 0.3, 0.5, 10), (np.ones(shape=(100)), 0.7, 1, 100), (np.zeros(shape=(100)), 0.2, 0.2, 5), @@ -249,6 +242,20 @@ def test_weibull_adstock_vectorized(self, type, dummy_design_matrix): assert y.shape == x.shape np.testing.assert_almost_equal(actual=y, desired=ys, decimal=12) + @pytest.mark.parametrize( + "type, expectation", + [ + ("PDF", does_not_raise()), + ("CDF", does_not_raise()), + ("PMF", pytest.raises(ValueError)), + (WeibullType.PDF, does_not_raise()), + (WeibullType.CDF, does_not_raise()), + ], + ) + def test_weibull_adstock_type(self, type, expectation): + with expectation: + weibull_adstock(x=np.ones(shape=(100)), lam=0.5, k=0.5, l_max=10, type=type) + class TestSaturationTransformers: def test_logistic_saturation_lam_zero(self): From 3f13fcea26ab1fb2b4764465224a5c740196b67a Mon Sep 17 00:00:00 2001 From: Abdalaziz Rashid Date: Fri, 26 Jan 2024 13:09:42 +0000 Subject: [PATCH 08/11] Use Union due Python 3.9 --- pymc_marketing/mmm/transformers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py index b9635917e..08ba3cd85 100644 --- a/pymc_marketing/mmm/transformers.py +++ b/pymc_marketing/mmm/transformers.py @@ -273,7 +273,7 @@ def weibull_adstock( k=1, l_max: int = 12, axis: int = 0, - type: WeibullType | str = WeibullType.PDF, + type: Union[WeibullType, str] = WeibullType.PDF, ): R"""Weibull Adstocking Transformation. From 773bba451b1cef1c251b9a78efe61da97ab02a9e Mon Sep 17 00:00:00 2001 From: Abdalaziz Rashid Date: Fri, 26 Jan 2024 14:59:24 +0000 Subject: [PATCH 09/11] Spacing fix --- pymc_marketing/mmm/transformers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py index 08ba3cd85..cefe04dd2 100644 --- a/pymc_marketing/mmm/transformers.py +++ b/pymc_marketing/mmm/transformers.py @@ -281,6 +281,7 @@ def weibull_adstock( .. plot:: :context: close-figs + import matplotlib.pyplot as plt import numpy as np import arviz as az From fb1dcf1fe83cd7dff9f4462fbbaa91ee2bf80ff4 Mon Sep 17 00:00:00 2001 From: Abdalaziz Rashid Date: Mon, 29 Jan 2024 09:40:15 +0000 Subject: [PATCH 10/11] Update docs --- pymc_marketing/mmm/transformers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py index cefe04dd2..8a97e1dcb 100644 --- a/pymc_marketing/mmm/transformers.py +++ b/pymc_marketing/mmm/transformers.py @@ -337,7 +337,7 @@ def weibull_adstock( Shape parameter of the Weibull distribution. Must be positive. l_max : int, by default 12 Maximum duration of carryover effect. - type : WeibullType, by default WeibullType.PDF + type : WeibullType or str, by default WeibullType.PDF Type of Weibull adstock transformation to be applied (PDF or CDF). Returns From a208794bc28e21b06067b42560edb709891fea49 Mon Sep 17 00:00:00 2001 From: Abdalaziz Rashid Date: Mon, 29 Jan 2024 13:59:31 +0000 Subject: [PATCH 11/11] Use numpy for cumprod --- tests/mmm/test_transformers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/mmm/test_transformers.py b/tests/mmm/test_transformers.py index fb98c2d51..61595aa5d 100644 --- a/tests/mmm/test_transformers.py +++ b/tests/mmm/test_transformers.py @@ -212,7 +212,7 @@ def test_weibull_cdf_adsotck(self, x, lam, k, l_max): assert np.all(np.isfinite(y)) w = 1 - sp.stats.weibull_min.cdf(np.arange(l_max) + 1, c=k, scale=lam) - w = sp.cumprod(np.concatenate([[1], w])) + w = np.cumprod(np.concatenate([[1], w])) sp_y = batched_convolution(x, w).eval() np.testing.assert_almost_equal(y, sp_y)