Skip to content

Commit f5755cd

Browse files
cetagostinitwiecki
authored andcommitted
Solving optimizer issues & typos (#933)
* Correcting typo num_days by horizon * Correcting typo num_days by horizon and scaler * Running notebooks * Update UML Diagrams * Rename horizon by periods * Adding test requested to check budget outputs * Running notebooks * Update UML Diagrams * Small notebook missing change. * Correction in tests * Change on name * running notebook modifying function * Update UML Diagrams
1 parent b528016 commit f5755cd

8 files changed

+1606
-1464
lines changed

docs/source/notebooks/mmm/mmm_budget_allocation_example.ipynb

+244-222
Large diffs are not rendered by default.

docs/source/notebooks/mmm/mmm_example.ipynb

+1,286-1,198
Large diffs are not rendered by default.

docs/source/notebooks/mmm/model.nc

153 KB
Binary file not shown.

docs/source/uml/classes_mmm.png

2.35 KB
Loading

pymc_marketing/mmm/budget_optimizer.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ class BudgetOptimizer(BaseModel):
5050
The adstock class.
5151
saturation : SaturationTransformation
5252
The saturation class.
53-
num_days : int
54-
The number of days.
53+
num_periods : int
54+
The number of time units.
5555
parameters : dict
5656
A dictionary of parameters for each channel.
5757
adstock_first : bool, optional
@@ -65,10 +65,17 @@ class BudgetOptimizer(BaseModel):
6565
saturation: SaturationTransformation = Field(
6666
..., description="The saturation transformation class."
6767
)
68-
num_days: int = Field(..., gt=0, description="The number of days.")
68+
num_periods: int = Field(
69+
...,
70+
gt=0,
71+
description="The number of time units at time granularity which the budget is to be allocated.",
72+
)
6973
parameters: dict[str, dict[str, dict[str, float]]] = Field(
7074
..., description="A dictionary of parameters for each channel."
7175
)
76+
scales: np.ndarray = Field(
77+
..., description="The scale parameter for each channel variable"
78+
)
7279
adstock_first: bool = Field(
7380
True,
7481
description="Whether to apply adstock transformation first or saturation transformation first.",
@@ -97,7 +104,7 @@ def objective(self, budgets: list[float]) -> float:
97104
else (self.saturation, self.adstock)
98105
)
99106
for idx, (_channel, params) in enumerate(self.parameters.items()):
100-
budget = budgets[idx]
107+
budget = budgets[idx] / self.scales[idx]
101108
first_params = (
102109
params["adstock_params"]
103110
if self.adstock_first
@@ -108,7 +115,7 @@ def objective(self, budgets: list[float]) -> float:
108115
if self.adstock_first
109116
else params["adstock_params"]
110117
)
111-
spend = np.full(self.num_days, budget)
118+
spend = np.full(self.num_periods, budget)
112119
spend_extended = np.concatenate([spend, np.zeros(self.adstock.l_max)])
113120
transformed_spend = second_transform.function(
114121
x=first_transform.function(x=spend_extended, **first_params),

pymc_marketing/mmm/delayed_saturated_mmm.py

+19-35
Original file line numberDiff line numberDiff line change
@@ -1990,6 +1990,7 @@ def _create_synth_dataset(
19901990
time_granularity: str,
19911991
time_length: int,
19921992
lag: int,
1993+
noise_level: float = 0.01,
19931994
) -> pd.DataFrame:
19941995
"""
19951996
Create a synthetic dataset based on the given allocation strategy (Budget) and time granularity.
@@ -2014,6 +2015,8 @@ def _create_synth_dataset(
20142015
The length of the synthetic dataset in terms of the time granularity.
20152016
lag : int
20162017
The lag value (not used in this function).
2018+
noise_level : int
2019+
The level of noise added to the allocation strategy (by default 1%).
20172020
20182021
Returns
20192022
-------
@@ -2063,7 +2066,9 @@ def _create_synth_dataset(
20632066
self.date_column: pd.to_datetime(new_date),
20642067
**{
20652068
channel: allocation_strategy.get(channel, 0)
2066-
+ np.random.normal(0, 0.1 * allocation_strategy.get(channel, 0))
2069+
+ np.random.normal(
2070+
0, noise_level * allocation_strategy.get(channel, 0)
2071+
)
20672072
for channel in channels
20682073
},
20692074
**{control: 0 for control in _controls},
@@ -2078,10 +2083,11 @@ def allocate_budget_to_maximize_response(
20782083
self,
20792084
budget: float | int,
20802085
time_granularity: str,
2081-
num_days: int,
2082-
budget_bounds: dict[str, list[Any]] | None = None,
2086+
num_periods: int,
2087+
budget_bounds: dict[str, tuple[float, float]] | None = None,
20832088
custom_constraints: dict[str, float] | None = None,
20842089
quantile: float = 0.5,
2090+
noise_level: float = 0.01,
20852091
) -> az.InferenceData:
20862092
"""
20872093
Allocate the given budget to maximize the response over a specified time period.
@@ -2101,9 +2107,9 @@ def allocate_budget_to_maximize_response(
21012107
budget : float or int
21022108
The total budget to be allocated.
21032109
time_granularity : str
2104-
The granularity of the time periods (e.g., 'daily', 'weekly', 'monthly').
2105-
num_days : int
2106-
The number of days over which the budget is to be allocated.
2110+
The granularity of the time units (num_periods) (e.g., 'daily', 'weekly', 'monthly').
2111+
num_periods : float
2112+
The number of time units over which the budget is to be allocated.
21072113
budget_bounds : dict[str, list[Any]], optional
21082114
A dictionary specifying the lower and upper bounds for the budget allocation
21092115
for each channel. If None, no bounds are applied.
@@ -2126,54 +2132,32 @@ def allocate_budget_to_maximize_response(
21262132
quantile=quantile
21272133
)
21282134

2129-
scale_budget = budget / self.channel_transformer["scaler"].scale_.max()
2130-
2131-
if isinstance(budget_bounds, dict):
2132-
scale_budget_bounds: dict[str, tuple[float, float]] | None = {
2133-
k: (
2134-
v[0] / self.channel_transformer["scaler"].scale_.max(),
2135-
v[1] / self.channel_transformer["scaler"].scale_.max(),
2136-
)
2137-
for k, v in budget_bounds.items()
2138-
}
2139-
else:
2140-
scale_budget_bounds = None
2141-
21422135
allocator = BudgetOptimizer(
21432136
adstock=self.adstock,
21442137
saturation=self.saturation,
21452138
parameters=parameters_mid,
21462139
adstock_first=self.adstock_first,
2147-
num_days=num_days,
2140+
num_periods=num_periods,
2141+
scales=self.channel_transformer["scaler"].scale_,
21482142
)
21492143

21502144
self.optimal_allocation_dict, _ = allocator.allocate_budget(
2151-
total_budget=scale_budget,
2152-
budget_bounds=scale_budget_bounds,
2145+
total_budget=budget,
2146+
budget_bounds=budget_bounds,
21532147
custom_constraints=custom_constraints,
21542148
)
21552149

2156-
inverse_scaled_channel_spend = self.channel_transformer.inverse_transform(
2157-
np.array([list(self.optimal_allocation_dict.values())])
2158-
)
2159-
original_scale_allocation_dict = dict(
2160-
zip(
2161-
self.optimal_allocation_dict.keys(),
2162-
inverse_scaled_channel_spend[0],
2163-
strict=False,
2164-
)
2165-
)
2166-
21672150
synth_dataset = self._create_synth_dataset(
21682151
df=self.X,
21692152
date_column=self.date_column,
2170-
allocation_strategy=original_scale_allocation_dict,
2153+
allocation_strategy=self.optimal_allocation_dict,
21712154
channels=self.channel_columns,
21722155
controls=self.control_columns,
21732156
target_col=self.output_var,
21742157
time_granularity=time_granularity,
2175-
time_length=num_days,
2158+
time_length=num_periods,
21762159
lag=self.adstock.l_max,
2160+
noise_level=noise_level,
21772161
)
21782162

21792163
return self.sample_posterior_predictive(

tests/mmm/test_budget_optimizer.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@ def test_allocate_budget(
8181
optimizer = BudgetOptimizer(
8282
adstock=adstock,
8383
saturation=saturation,
84-
num_days=30,
84+
num_periods=30,
8585
parameters=parameters,
8686
adstock_first=True,
87+
scales=np.array([1, 1]),
8788
)
8889

8990
# Allocate Budget
@@ -130,9 +131,10 @@ def test_allocate_budget_zero_total(
130131
optimizer = BudgetOptimizer(
131132
adstock=adstock,
132133
saturation=saturation,
133-
num_days=30,
134+
num_periods=30,
134135
parameters=parameters,
135136
adstock_first=True,
137+
scales=np.array([1, 1]),
136138
)
137139
match = "Using default equality constraint"
138140
with pytest.warns(UserWarning, match=match):
@@ -168,9 +170,10 @@ def test_allocate_budget_custom_minimize_args(minimize_mock) -> None:
168170
optimizer = optimizer = BudgetOptimizer(
169171
adstock=adstock,
170172
saturation=saturation,
171-
num_days=30,
173+
num_periods=30,
172174
parameters=parameters,
173175
adstock_first=True,
176+
scales=np.array([1, 1]),
174177
)
175178
match = "Using default equality constraint"
176179
with pytest.warns(UserWarning, match=match):
@@ -226,9 +229,10 @@ def test_allocate_budget_infeasible_constraints(
226229
optimizer = optimizer = BudgetOptimizer(
227230
adstock=adstock,
228231
saturation=saturation,
229-
num_days=30,
232+
num_periods=30,
230233
parameters=parameters,
231234
adstock_first=True,
235+
scales=np.array([1, 1]),
232236
)
233237

234238
with pytest.raises(MinimizeException, match="Optimization failed"):

tests/mmm/test_delayed_saturated_mmm.py

+37
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,43 @@ def test_channel_contributions_forward_pass_recovers_contribution(
466466
y=mmm_fitted.y.max(),
467467
)
468468

469+
def test_allocate_budget_to_maximize_response(self, mmm_fitted: MMM) -> None:
470+
budget = 2.0
471+
num_periods = 8
472+
time_granularity = "weekly"
473+
budget_bounds = {"channel_1": [0.5, 1.2], "channel_2": [0.5, 1.5]}
474+
noise_level = 0.1
475+
476+
# Call the method
477+
inference_data = mmm_fitted.allocate_budget_to_maximize_response(
478+
budget=budget,
479+
time_granularity=time_granularity,
480+
num_periods=num_periods,
481+
budget_bounds=budget_bounds,
482+
noise_level=noise_level,
483+
)
484+
485+
inference_periods = len(inference_data.coords["date"])
486+
487+
# a) Total budget consistency check
488+
allocated_budget = sum(mmm_fitted.optimal_allocation_dict.values())
489+
assert np.isclose(
490+
allocated_budget, budget, rtol=1e-5
491+
), f"Total allocated budget {allocated_budget} does not match expected budget {budget}"
492+
493+
# b) Budget boundaries check
494+
for channel, bounds in budget_bounds.items():
495+
allocation = mmm_fitted.optimal_allocation_dict[channel]
496+
lower_bound, upper_bound = bounds
497+
assert (
498+
lower_bound <= allocation <= upper_bound
499+
), f"Channel {channel} allocation {allocation} is out of bounds ({lower_bound}, {upper_bound})"
500+
501+
# c) num_periods consistency check
502+
assert (
503+
inference_periods == num_periods
504+
), f"Number of periods in the data {inference_periods} does not match the expected {num_periods}"
505+
469506
@pytest.mark.parametrize(
470507
argnames="original_scale",
471508
argvalues=[False, True],

0 commit comments

Comments
 (0)