Implement vectorized adstock transformations

lucianopaz · ricardoV94 · commit 13949febb763 · 2023-04-06T11:02:34.000+02:00
diff --git a/pymc_marketing/mmm/delayed_saturated_mmm.py b/pymc_marketing/mmm/delayed_saturated_mmm.py
@@ -5,10 +5,7 @@
 
 from pymc_marketing.mmm.base import MMM
 from pymc_marketing.mmm.preprocessing import MaxAbsScaleChannels, MaxAbsScaleTarget
-from pymc_marketing.mmm.transformers import (
-    geometric_adstock_vectorized,
-    logistic_saturation,
-)
+from pymc_marketing.mmm.transformers import geometric_adstock, logistic_saturation
 from pymc_marketing.mmm.validating import ValidateControlColumns
 
 
@@ -80,11 +77,12 @@ def build_model(
 
             channel_adstock = pm.Deterministic(
                 name="channel_adstock",
-                var=geometric_adstock_vectorized(
+                var=geometric_adstock(
                     x=channel_data_,
                     alpha=alpha,
                     l_max=adstock_max_lag,
                     normalize=True,
+                    axis=0,
                 ),
                 dims=("date", "channel"),
             )
diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py
@@ -1,7 +1,70 @@
 import pytensor.tensor as pt
+from pytensor.tensor.random.utils import params_broadcast_shapes
 
 
-def geometric_adstock(x, alpha: float = 0.0, l_max: int = 12, normalize: bool = False):
+def batched_convolution(x, w, axis: int = 0):
+    """Apply a 1D convolution in a vectorized way across multiple batch dimensions.
+
+    Parameters
+    ----------
+    x :
+        The array to convolve.
+    w :
+        The weight of the convolution. The last axis of ``w`` determines the number of steps
+        to use in the convolution.
+    axis : int
+        The axis of ``x`` along witch to apply the convolution
+
+    Returns
+    -------
+    y :
+        The result of convolving ``x`` with ``w`` along the desired axis. The shape of the
+        result will match the shape of ``x`` up to broadcasting with ``w``. The convolved
+        axis will show the results of left padding zeros to ``x`` while applying the
+        convolutions.
+    """
+    # We move the axis to the last dimension of the array so that it's easier to
+    # reason about parameter broadcasting. We will move the axis back at the end
+    orig_ndim = x.ndim
+    axis = axis if axis >= 0 else orig_ndim + axis
+    w = pt.as_tensor(w)
+    x = pt.moveaxis(x, axis, -1)
+    l_max = w.type.shape[-1]
+    if l_max is None:
+        try:
+            l_max = w.shape[-1].eval()
+        except Exception:
+            pass
+    # Get the broadcast shapes of x and w but ignoring their last dimension.
+    # The last dimension of x is the "time" axis, which doesn't get broadcast
+    # The last dimension of w is the number of time steps that go into the convolution
+    x_shape, w_shape = params_broadcast_shapes([x.shape, w.shape], [1, 1])
+    x = pt.broadcast_to(x, x_shape)
+    w = pt.broadcast_to(w, w_shape)
+    x_time = x.shape[-1]
+    shape = (*x.shape, w.shape[-1])
+    # Make a tensor with x at the different time lags needed for the convolution
+    padded_x = pt.zeros(shape, dtype=x.dtype)
+    if l_max is not None:
+        for i in range(l_max):
+            padded_x = pt.set_subtensor(
+                padded_x[..., i:x_time, i], x[..., : x_time - i]
+            )
+    else:  # pragma: no cover
+        raise NotImplementedError(
+            "At the moment, convolving with weight arrays that don't have a concrete shape "
+            "at compile time is not supported."
+        )
+    # The convolution is treated as an element-wise product, that then gets reduced
+    # along the dimension that represents the convolution time lags
+    conv = pt.sum(padded_x * w[..., None, :], axis=-1)
+    # Move the "time" axis back to where it was in the original x array
+    return pt.moveaxis(conv, -1, axis + conv.ndim - orig_ndim)
+
+
+def geometric_adstock(
+    x, alpha: float = 0.0, l_max: int = 12, normalize: bool = False, axis: int = 0
+):
     """Geometric adstock transformation.
 
     Adstock with geometric decay assumes advertising effect peaks at the same
@@ -31,29 +94,19 @@ def geometric_adstock(x, alpha: float = 0.0, l_max: int = 12, normalize: bool =
     .. [1] Jin, Yuxue, et al. "Bayesian methods for media mix modeling
        with carryover and shape effects." (2017).
     """
-    cycles = [pt.concatenate([pt.zeros(i), x[: x.shape[0] - i]]) for i in range(l_max)]
-    x_cycle = pt.stack(cycles)
-    w = pt.as_tensor_variable([pt.power(alpha, i) for i in range(l_max)])
-    w = w / pt.sum(w) if normalize else w
-    return pt.dot(w, x_cycle)
-
-
-def geometric_adstock_vectorized(x, alpha, l_max: int = 12, normalize: bool = False):
-    """Vectorized geometric adstock transformation."""
-    cycles = [
-        pt.concatenate(tensor_list=[pt.zeros(shape=x.shape)[:i], x[: x.shape[0] - i]])
-        for i in range(l_max)
-    ]
-    x_cycle = pt.stack(cycles)
-    x_cycle = pt.transpose(x=x_cycle, axes=[1, 2, 0])
-    w = pt.as_tensor_variable([pt.power(alpha, i) for i in range(l_max)])
-    w = pt.transpose(w)[None, ...]
-    w = w / pt.sum(w, axis=2, keepdims=True) if normalize else w
-    return pt.sum(pt.mul(x_cycle, w), axis=2)
+
+    w = pt.power(pt.as_tensor(alpha)[..., None], pt.arange(l_max, dtype=x.dtype))
+    w = w / pt.sum(w, axis=-1, keepdims=True) if normalize else w
+    return batched_convolution(x, w, axis=axis)
 
 
 def delayed_adstock(
-    x, alpha: float = 0.0, theta: int = 0, l_max: int = 12, normalize: bool = False
+    x,
+    alpha: float = 0.0,
+    theta: int = 0,
+    l_max: int = 12,
+    normalize: bool = False,
+    axis: int = 0,
 ):
     """Delayed adstock transformation.
 
@@ -83,31 +136,12 @@ def delayed_adstock(
     .. [1] Jin, Yuxue, et al. "Bayesian methods for media mix modeling
        with carryover and shape effects." (2017).
     """
-    cycles = [pt.concatenate([pt.zeros(i), x[: x.shape[0] - i]]) for i in range(l_max)]
-    x_cycle = pt.stack(cycles)
-    w = pt.as_tensor_variable(
-        [pt.power(alpha, ((i - theta) ** 2)) for i in range(l_max)]
-    )
-    w = w / pt.sum(w) if normalize else w
-    return pt.dot(w, x_cycle)
-
-
-def delayed_adstock_vectorized(
-    x, alpha, theta, l_max: int = 12, normalize: bool = False
-):
-    """Delayed adstock transformation."""
-    cycles = [
-        pt.concatenate(tensor_list=[pt.zeros(shape=x.shape)[:i], x[: x.shape[0] - i]])
-        for i in range(l_max)
-    ]
-    x_cycle = pt.stack(cycles)
-    x_cycle = pt.transpose(x=x_cycle, axes=[1, 2, 0])
-    w = pt.as_tensor_variable(
-        [pt.power(alpha, ((i - theta) ** 2)) for i in range(l_max)]
+    w = pt.power(
+        pt.as_tensor(alpha)[..., None],
+        (pt.arange(l_max, dtype=x.dtype) - pt.as_tensor(theta)[..., None]) ** 2,
     )
-    w = pt.transpose(w)[None, ...]
-    w = w / pt.sum(w, axis=2, keepdims=True) if normalize else w
-    return pt.sum(pt.mul(x_cycle, w), axis=2)
+    w = w / pt.sum(w, axis=-1, keepdims=True) if normalize else w
+    return batched_convolution(x, w, axis=axis)
 
 
 def logistic_saturation(x, lam: float = 0.5):
diff --git a/tests/mmm/test_transformers.py b/tests/mmm/test_transformers.py
@@ -1,13 +1,13 @@
 import numpy as np
+import pytensor
 import pytensor.tensor as pt
 import pytest
 from pytensor.tensor.var import TensorVariable
 
 from pymc_marketing.mmm.transformers import (
+    batched_convolution,
     delayed_adstock,
-    delayed_adstock_vectorized,
     geometric_adstock,
-    geometric_adstock_vectorized,
     logistic_saturation,
     tanh_saturation,
 )
@@ -27,6 +27,58 @@ def dummy_design_matrix():
     )
 
 
+@pytest.fixture(
+    scope="module", params=["ndarray", "TensorConstant", "TensorVariable"], ids=str
+)
+def convolution_inputs(request):
+    x_val = np.ones((3, 4, 5))
+    w_val = np.ones((2))
+    if request.param == "ndarray":
+        return x_val, w_val, None, None
+    elif request.param == "TensorConstant":
+        return pt.as_tensor_variable(x_val), pt.as_tensor_variable(w_val), None, None
+    elif request.param == "TensorVariable":
+        return (
+            pt.dtensor3("x"),
+            pt.specify_shape(pt.dvector("w"), w_val.shape),
+            x_val,
+            w_val,
+        )
+
+
+@pytest.fixture(scope="module", params=[0, 1, -1])
+def convolution_axis(request):
+    return request.param
+
+
+def test_batched_convolution(convolution_inputs, convolution_axis):
+    x, w, x_val, w_val = convolution_inputs
+    y = batched_convolution(x, w, convolution_axis)
+    if x_val is None:
+        y_val = y.eval()
+        expected_shape = getattr(x, "value", x).shape
+    else:
+        y_val = pytensor.function([x, w], y)(x_val, w_val)
+        expected_shape = x_val.shape
+    assert y_val.shape == expected_shape
+    y_val = np.moveaxis(y_val, convolution_axis, 0)
+    x_val = np.moveaxis(
+        x_val if x_val is not None else getattr(x, "value", x), convolution_axis, 0
+    )
+    assert np.allclose(y_val[0], x_val[0])
+    assert np.allclose(y_val[1:], x_val[1:] + x_val[:-1])
+
+
+def test_batched_convolution_broadcasting():
+    x_val = np.random.default_rng(42).normal(size=(3, 1, 5))
+    x = pt.as_tensor_variable(x_val)
+    w = pt.as_tensor_variable(np.ones((1, 1, 4, 2)))
+    y = batched_convolution(x, w, axis=-1).eval()
+    assert y.shape == (1, 3, 4, 5)
+    assert np.allclose(y[..., 0], x_val[..., 0])
+    assert np.allclose(y[..., 1:], x_val[..., 1:] + x_val[..., :-1])
+
+
 class TestsAdstockTransformers:
     def test_geometric_adstock_x_zero(self):
         x = np.zeros(shape=(100))
@@ -62,14 +114,12 @@ def test_delayed_adstock_x_zero(self):
         y = delayed_adstock(x=x, alpha=0.2, theta=2, l_max=4)
         np.testing.assert_array_equal(x=x, y=y.eval())
 
-    def test_geometric_adstock_vactorized(self, dummy_design_matrix):
+    def test_geometric_adstock_vectorized(self, dummy_design_matrix):
         x = dummy_design_matrix.copy()
         x_tensor = pt.as_tensor_variable(x)
         alpha = [0.9, 0.33, 0.5, 0.1, 0.0]
         alpha_tensor = pt.as_tensor_variable(alpha)
-        y_tensor = geometric_adstock_vectorized(
-            x=x_tensor, alpha=alpha_tensor, l_max=12
-        )
+        y_tensor = geometric_adstock(x=x_tensor, alpha=alpha_tensor, l_max=12, axis=0)
         y = y_tensor.eval()
 
         y_tensors = [
@@ -80,15 +130,15 @@ def test_geometric_adstock_vactorized(self, dummy_design_matrix):
         assert y.shape == x.shape
         np.testing.assert_almost_equal(actual=y, desired=ys, decimal=12)
 
-    def test_delayed_adstock_vactorized(self, dummy_design_matrix):
+    def test_delayed_adstock_vectorized(self, dummy_design_matrix):
         x = dummy_design_matrix
         x_tensor = pt.as_tensor_variable(x)
         alpha = [0.9, 0.33, 0.5, 0.1, 0.0]
         alpha_tensor = pt.as_tensor_variable(alpha)
         theta = [0, 1, 2, 3, 4]
         theta_tensor = pt.as_tensor_variable(theta)
-        y_tensor = delayed_adstock_vectorized(
-            x=x_tensor, alpha=alpha_tensor, theta=theta_tensor, l_max=12
+        y_tensor = delayed_adstock(
+            x=x_tensor, alpha=alpha_tensor, theta=theta_tensor, l_max=12, axis=0
         )
         y = y_tensor.eval()
 
@@ -220,7 +270,7 @@ def test_logistic_saturation_delayed_adstock_composition(
         assert z2_eval.max() <= 1
         assert z2_eval.min() >= 0
 
-    def test_geometric_adstock_vactorized_logistic_saturation(
+    def test_geometric_adstock_vectorized_logistic_saturation(
         self, dummy_design_matrix
     ):
         x = dummy_design_matrix.copy()
@@ -229,9 +279,7 @@ def test_geometric_adstock_vactorized_logistic_saturation(
         alpha_tensor = pt.as_tensor_variable(alpha)
         lam = [0.5, 1.0, 2.0, 3.0, 4.0]
         lam_tensor = pt.as_tensor_variable(lam)
-        y_tensor = geometric_adstock_vectorized(
-            x=x_tensor, alpha=alpha_tensor, l_max=12
-        )
+        y_tensor = geometric_adstock(x=x_tensor, alpha=alpha_tensor, l_max=12, axis=0)
         z_tensor = logistic_saturation(x=y_tensor, lam=lam_tensor)
         z = z_tensor.eval()
 
@@ -246,7 +294,7 @@ def test_geometric_adstock_vactorized_logistic_saturation(
         assert zs.shape == x.shape
         np.testing.assert_almost_equal(actual=z, desired=zs, decimal=12)
 
-    def test_delayed_adstock_vactorized_logistic_saturation(self, dummy_design_matrix):
+    def test_delayed_adstock_vectorized_logistic_saturation(self, dummy_design_matrix):
         x = dummy_design_matrix.copy()
         x_tensor = pt.as_tensor_variable(x)
         alpha = [0.9, 0.33, 0.5, 0.1, 0.0]
@@ -255,8 +303,8 @@ def test_delayed_adstock_vactorized_logistic_saturation(self, dummy_design_matri
         theta_tensor = pt.as_tensor_variable(theta)
         lam = [0.5, 1.0, 2.0, 3.0, 4.0]
         lam_tensor = pt.as_tensor_variable(lam)
-        y_tensor = delayed_adstock_vectorized(
-            x=x_tensor, alpha=alpha_tensor, theta=theta_tensor, l_max=12
+        y_tensor = delayed_adstock(
+            x=x_tensor, alpha=alpha_tensor, theta=theta_tensor, l_max=12, axis=0
         )
         z_tensor = logistic_saturation(x=y_tensor, lam=lam_tensor)
         z = z_tensor.eval()