Skip to content

Implement vectorized adstock transformations #221

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions pymc_marketing/mmm/delayed_saturated_mmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@

from pymc_marketing.mmm.base import MMM
from pymc_marketing.mmm.preprocessing import MaxAbsScaleChannels, MaxAbsScaleTarget
from pymc_marketing.mmm.transformers import (
geometric_adstock_vectorized,
logistic_saturation,
)
from pymc_marketing.mmm.transformers import geometric_adstock, logistic_saturation
from pymc_marketing.mmm.validating import ValidateControlColumns


Expand Down Expand Up @@ -80,11 +77,12 @@ def build_model(

channel_adstock = pm.Deterministic(
name="channel_adstock",
var=geometric_adstock_vectorized(
var=geometric_adstock(
x=channel_data_,
alpha=alpha,
l_max=adstock_max_lag,
normalize=True,
axis=0,
),
dims=("date", "channel"),
)
Expand Down
124 changes: 79 additions & 45 deletions pymc_marketing/mmm/transformers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,70 @@
import pytensor.tensor as pt
from pytensor.tensor.random.utils import params_broadcast_shapes


def geometric_adstock(x, alpha: float = 0.0, l_max: int = 12, normalize: bool = False):
def batched_convolution(x, w, axis: int = 0):
"""Apply a 1D convolution in a vectorized way across multiple batch dimensions.

Parameters
----------
x :
The array to convolve.
w :
The weight of the convolution. The last axis of ``w`` determines the number of steps
to use in the convolution.
axis : int
The axis of ``x`` along witch to apply the convolution

Returns
-------
y :
The result of convolving ``x`` with ``w`` along the desired axis. The shape of the
result will match the shape of ``x`` up to broadcasting with ``w``. The convolved
axis will show the results of left padding zeros to ``x`` while applying the
convolutions.
"""
# We move the axis to the last dimension of the array so that it's easier to
# reason about parameter broadcasting. We will move the axis back at the end
orig_ndim = x.ndim
axis = axis if axis >= 0 else orig_ndim + axis
w = pt.as_tensor(w)
x = pt.moveaxis(x, axis, -1)
l_max = w.type.shape[-1]
if l_max is None:
try:
l_max = w.shape[-1].eval()
except Exception:
pass
# Get the broadcast shapes of x and w but ignoring their last dimension.
# The last dimension of x is the "time" axis, which doesn't get broadcast
# The last dimension of w is the number of time steps that go into the convolution
x_shape, w_shape = params_broadcast_shapes([x.shape, w.shape], [1, 1])
x = pt.broadcast_to(x, x_shape)
w = pt.broadcast_to(w, w_shape)
x_time = x.shape[-1]
shape = (*x.shape, w.shape[-1])
# Make a tensor with x at the different time lags needed for the convolution
padded_x = pt.zeros(shape, dtype=x.dtype)
if l_max is not None:
for i in range(l_max):
padded_x = pt.set_subtensor(
padded_x[..., i:x_time, i], x[..., : x_time - i]
)
else: # pragma: no cover
raise NotImplementedError(
"At the moment, convolving with weight arrays that don't have a concrete shape "
"at compile time is not supported."
)
# The convolution is treated as an element-wise product, that then gets reduced
# along the dimension that represents the convolution time lags
conv = pt.sum(padded_x * w[..., None, :], axis=-1)
# Move the "time" axis back to where it was in the original x array
return pt.moveaxis(conv, -1, axis + conv.ndim - orig_ndim)


def geometric_adstock(
x, alpha: float = 0.0, l_max: int = 12, normalize: bool = False, axis: int = 0
):
"""Geometric adstock transformation.

Adstock with geometric decay assumes advertising effect peaks at the same
Expand Down Expand Up @@ -31,29 +94,19 @@ def geometric_adstock(x, alpha: float = 0.0, l_max: int = 12, normalize: bool =
.. [1] Jin, Yuxue, et al. "Bayesian methods for media mix modeling
with carryover and shape effects." (2017).
"""
cycles = [pt.concatenate([pt.zeros(i), x[: x.shape[0] - i]]) for i in range(l_max)]
x_cycle = pt.stack(cycles)
w = pt.as_tensor_variable([pt.power(alpha, i) for i in range(l_max)])
w = w / pt.sum(w) if normalize else w
return pt.dot(w, x_cycle)


def geometric_adstock_vectorized(x, alpha, l_max: int = 12, normalize: bool = False):
"""Vectorized geometric adstock transformation."""
cycles = [
pt.concatenate(tensor_list=[pt.zeros(shape=x.shape)[:i], x[: x.shape[0] - i]])
for i in range(l_max)
]
x_cycle = pt.stack(cycles)
x_cycle = pt.transpose(x=x_cycle, axes=[1, 2, 0])
w = pt.as_tensor_variable([pt.power(alpha, i) for i in range(l_max)])
w = pt.transpose(w)[None, ...]
w = w / pt.sum(w, axis=2, keepdims=True) if normalize else w
return pt.sum(pt.mul(x_cycle, w), axis=2)

w = pt.power(pt.as_tensor(alpha)[..., None], pt.arange(l_max, dtype=x.dtype))
w = w / pt.sum(w, axis=-1, keepdims=True) if normalize else w
return batched_convolution(x, w, axis=axis)


def delayed_adstock(
x, alpha: float = 0.0, theta: int = 0, l_max: int = 12, normalize: bool = False
x,
alpha: float = 0.0,
theta: int = 0,
l_max: int = 12,
normalize: bool = False,
axis: int = 0,
):
"""Delayed adstock transformation.

Expand Down Expand Up @@ -83,31 +136,12 @@ def delayed_adstock(
.. [1] Jin, Yuxue, et al. "Bayesian methods for media mix modeling
with carryover and shape effects." (2017).
"""
cycles = [pt.concatenate([pt.zeros(i), x[: x.shape[0] - i]]) for i in range(l_max)]
x_cycle = pt.stack(cycles)
w = pt.as_tensor_variable(
[pt.power(alpha, ((i - theta) ** 2)) for i in range(l_max)]
)
w = w / pt.sum(w) if normalize else w
return pt.dot(w, x_cycle)


def delayed_adstock_vectorized(
x, alpha, theta, l_max: int = 12, normalize: bool = False
):
"""Delayed adstock transformation."""
cycles = [
pt.concatenate(tensor_list=[pt.zeros(shape=x.shape)[:i], x[: x.shape[0] - i]])
for i in range(l_max)
]
x_cycle = pt.stack(cycles)
x_cycle = pt.transpose(x=x_cycle, axes=[1, 2, 0])
w = pt.as_tensor_variable(
[pt.power(alpha, ((i - theta) ** 2)) for i in range(l_max)]
w = pt.power(
pt.as_tensor(alpha)[..., None],
(pt.arange(l_max, dtype=x.dtype) - pt.as_tensor(theta)[..., None]) ** 2,
)
w = pt.transpose(w)[None, ...]
w = w / pt.sum(w, axis=2, keepdims=True) if normalize else w
return pt.sum(pt.mul(x_cycle, w), axis=2)
w = w / pt.sum(w, axis=-1, keepdims=True) if normalize else w
return batched_convolution(x, w, axis=axis)


def logistic_saturation(x, lam: float = 0.5):
Expand Down
80 changes: 64 additions & 16 deletions tests/mmm/test_transformers.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import numpy as np
import pytensor
import pytensor.tensor as pt
import pytest
from pytensor.tensor.var import TensorVariable

from pymc_marketing.mmm.transformers import (
batched_convolution,
delayed_adstock,
delayed_adstock_vectorized,
geometric_adstock,
geometric_adstock_vectorized,
logistic_saturation,
tanh_saturation,
)
Expand All @@ -27,6 +27,58 @@ def dummy_design_matrix():
)


@pytest.fixture(
scope="module", params=["ndarray", "TensorConstant", "TensorVariable"], ids=str
)
def convolution_inputs(request):
x_val = np.ones((3, 4, 5))
w_val = np.ones((2))
if request.param == "ndarray":
return x_val, w_val, None, None
elif request.param == "TensorConstant":
return pt.as_tensor_variable(x_val), pt.as_tensor_variable(w_val), None, None
elif request.param == "TensorVariable":
return (
pt.dtensor3("x"),
pt.specify_shape(pt.dvector("w"), w_val.shape),
x_val,
w_val,
)


@pytest.fixture(scope="module", params=[0, 1, -1])
def convolution_axis(request):
return request.param


def test_batched_convolution(convolution_inputs, convolution_axis):
x, w, x_val, w_val = convolution_inputs
y = batched_convolution(x, w, convolution_axis)
if x_val is None:
y_val = y.eval()
expected_shape = getattr(x, "value", x).shape
else:
y_val = pytensor.function([x, w], y)(x_val, w_val)
expected_shape = x_val.shape
assert y_val.shape == expected_shape
y_val = np.moveaxis(y_val, convolution_axis, 0)
x_val = np.moveaxis(
x_val if x_val is not None else getattr(x, "value", x), convolution_axis, 0
)
assert np.allclose(y_val[0], x_val[0])
assert np.allclose(y_val[1:], x_val[1:] + x_val[:-1])


def test_batched_convolution_broadcasting():
x_val = np.random.default_rng(42).normal(size=(3, 1, 5))
x = pt.as_tensor_variable(x_val)
w = pt.as_tensor_variable(np.ones((1, 1, 4, 2)))
y = batched_convolution(x, w, axis=-1).eval()
assert y.shape == (1, 3, 4, 5)
assert np.allclose(y[..., 0], x_val[..., 0])
assert np.allclose(y[..., 1:], x_val[..., 1:] + x_val[..., :-1])


class TestsAdstockTransformers:
def test_geometric_adstock_x_zero(self):
x = np.zeros(shape=(100))
Expand Down Expand Up @@ -62,14 +114,12 @@ def test_delayed_adstock_x_zero(self):
y = delayed_adstock(x=x, alpha=0.2, theta=2, l_max=4)
np.testing.assert_array_equal(x=x, y=y.eval())

def test_geometric_adstock_vactorized(self, dummy_design_matrix):
def test_geometric_adstock_vectorized(self, dummy_design_matrix):
x = dummy_design_matrix.copy()
x_tensor = pt.as_tensor_variable(x)
alpha = [0.9, 0.33, 0.5, 0.1, 0.0]
alpha_tensor = pt.as_tensor_variable(alpha)
y_tensor = geometric_adstock_vectorized(
x=x_tensor, alpha=alpha_tensor, l_max=12
)
y_tensor = geometric_adstock(x=x_tensor, alpha=alpha_tensor, l_max=12, axis=0)
y = y_tensor.eval()

y_tensors = [
Expand All @@ -80,15 +130,15 @@ def test_geometric_adstock_vactorized(self, dummy_design_matrix):
assert y.shape == x.shape
np.testing.assert_almost_equal(actual=y, desired=ys, decimal=12)

def test_delayed_adstock_vactorized(self, dummy_design_matrix):
def test_delayed_adstock_vectorized(self, dummy_design_matrix):
x = dummy_design_matrix
x_tensor = pt.as_tensor_variable(x)
alpha = [0.9, 0.33, 0.5, 0.1, 0.0]
alpha_tensor = pt.as_tensor_variable(alpha)
theta = [0, 1, 2, 3, 4]
theta_tensor = pt.as_tensor_variable(theta)
y_tensor = delayed_adstock_vectorized(
x=x_tensor, alpha=alpha_tensor, theta=theta_tensor, l_max=12
y_tensor = delayed_adstock(
x=x_tensor, alpha=alpha_tensor, theta=theta_tensor, l_max=12, axis=0
)
y = y_tensor.eval()

Expand Down Expand Up @@ -220,7 +270,7 @@ def test_logistic_saturation_delayed_adstock_composition(
assert z2_eval.max() <= 1
assert z2_eval.min() >= 0

def test_geometric_adstock_vactorized_logistic_saturation(
def test_geometric_adstock_vectorized_logistic_saturation(
self, dummy_design_matrix
):
x = dummy_design_matrix.copy()
Expand All @@ -229,9 +279,7 @@ def test_geometric_adstock_vactorized_logistic_saturation(
alpha_tensor = pt.as_tensor_variable(alpha)
lam = [0.5, 1.0, 2.0, 3.0, 4.0]
lam_tensor = pt.as_tensor_variable(lam)
y_tensor = geometric_adstock_vectorized(
x=x_tensor, alpha=alpha_tensor, l_max=12
)
y_tensor = geometric_adstock(x=x_tensor, alpha=alpha_tensor, l_max=12, axis=0)
z_tensor = logistic_saturation(x=y_tensor, lam=lam_tensor)
z = z_tensor.eval()

Expand All @@ -246,7 +294,7 @@ def test_geometric_adstock_vactorized_logistic_saturation(
assert zs.shape == x.shape
np.testing.assert_almost_equal(actual=z, desired=zs, decimal=12)

def test_delayed_adstock_vactorized_logistic_saturation(self, dummy_design_matrix):
def test_delayed_adstock_vectorized_logistic_saturation(self, dummy_design_matrix):
x = dummy_design_matrix.copy()
x_tensor = pt.as_tensor_variable(x)
alpha = [0.9, 0.33, 0.5, 0.1, 0.0]
Expand All @@ -255,8 +303,8 @@ def test_delayed_adstock_vactorized_logistic_saturation(self, dummy_design_matri
theta_tensor = pt.as_tensor_variable(theta)
lam = [0.5, 1.0, 2.0, 3.0, 4.0]
lam_tensor = pt.as_tensor_variable(lam)
y_tensor = delayed_adstock_vectorized(
x=x_tensor, alpha=alpha_tensor, theta=theta_tensor, l_max=12
y_tensor = delayed_adstock(
x=x_tensor, alpha=alpha_tensor, theta=theta_tensor, l_max=12, axis=0
)
z_tensor = logistic_saturation(x=y_tensor, lam=lam_tensor)
z = z_tensor.eval()
Expand Down