From 762e0e4caf62ab001103e987f157c9d0f2c982cf Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Wed, 31 Mar 2021 12:50:50 -0400 Subject: [PATCH 01/27] Implement DynamicDML * DynamicDML with all the OrthoLearner functionality * API tests similar to the DML ones --- econml/_cate_estimator.py | 2 +- econml/dml/__init__.py | 7 +- econml/dml/dynamic_dml.py | 660 +++++++++++++++++++++++++++++++ econml/tests/test_dynamic_dml.py | 258 ++++++++++++ 4 files changed, 925 insertions(+), 2 deletions(-) create mode 100644 econml/dml/dynamic_dml.py create mode 100644 econml/tests/test_dynamic_dml.py diff --git a/econml/_cate_estimator.py b/econml/_cate_estimator.py index 787dc1045..f9333b1bd 100644 --- a/econml/_cate_estimator.py +++ b/econml/_cate_estimator.py @@ -563,7 +563,7 @@ def effect(self, X=None, *, T0, T1): """ Calculate the heterogeneous treatment effect :math:`\\tau(X, T0, T1)`. - The effect is calculatred between the two treatment points + The effect is calculated between the two treatment points conditional on a vector of features on a set of m test samples :math:`\\{T0_i, T1_i, X_i\\}`. Since this class assumes a linear effect, only the difference between T0ᵢ and T1ᵢ matters for this computation. diff --git a/econml/dml/__init__.py b/econml/dml/__init__.py index a88d3b693..2c3785d34 100644 --- a/econml/dml/__init__.py +++ b/econml/dml/__init__.py @@ -33,10 +33,14 @@ Orthogonal Statistical Learning. ACM Conference on Learning Theory. ``_ +.. [dynamicdml] Greg Lewis and Vasilis Syrgkanis. + Double/Debiased Machine Learning for Dynamic Treatment Effects. + ``_, 2021. """ from .dml import (DML, LinearDML, SparseLinearDML, KernelDML, NonParamDML, ForestDML) +from .dynamic_dml import DynamicDML from .causal_forest import CausalForestDML __all__ = ["DML", @@ -45,4 +49,5 @@ "KernelDML", "NonParamDML", "ForestDML", - "CausalForestDML", ] + "CausalForestDML", + "DynamicDML"] diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py new file mode 100644 index 000000000..b5cd6d917 --- /dev/null +++ b/econml/dml/dynamic_dml.py @@ -0,0 +1,660 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import abc +import numpy as np +from warnings import warn +from sklearn.base import clone +from sklearn.model_selection import GroupKFold +from scipy.stats import norm +from sklearn.linear_model import (ElasticNetCV, LassoCV, LogisticRegressionCV) +from ..sklearn_extensions.linear_model import (StatsModelsLinearRegression, WeightedLassoCVWrapper) +from ..sklearn_extensions.model_selection import WeightedStratifiedKFold +from .dml import _FirstStageWrapper, _FinalWrapper +from .._cate_estimator import TreatmentExpansionMixin, LinearModelFinalCateEstimatorMixin +from .._ortho_learner import _OrthoLearner +from ..utilities import (_deprecate_positional, add_intercept, + broadcast_unit_treatments, check_high_dimensional, + cross_product, deprecated, fit_with_groups, + hstack, inverse_onehot, ndim, reshape, + reshape_treatmentwise_effects, shape, transpose, + get_feature_names_or_default) + + +class _DynamicModelNuisance: + """ + Nuisance model fits the model_y and model_t at fit time and at predict time + calculates the residual Y and residual T based on the fitted models and returns + the residuals as two nuisance parameters. + """ + + def __init__(self, model_y, model_t, n_periods): + self._model_y = model_y + self._model_t = model_t + self.n_periods = n_periods + + def fit(self, Y, T, X=None, W=None, sample_weight=None, groups=None): + """Fit a series of nuisance models for each period or period pairs""" + assert Y.shape[0] % self.n_periods == 0, \ + "Length of training data should be an integer multiple of time periods." + inds_train = np.arange(Y.shape[0])[np.arange(Y.shape[0]) % self.n_periods == 0] + self._model_y_trained = {} + self._model_t_trained = {} + for kappa in np.arange(self.n_periods): + self._model_y_trained[kappa] = clone(self._model_y, safe=False).fit( + self._filter_or_None(X, inds_train + kappa), + self._filter_or_None( + W, inds_train + kappa), + Y[inds_train + self.n_periods - 1]) + self._model_t_trained[kappa] = {} + for tau in np.arange(kappa, self.n_periods): + self._model_t_trained[kappa][tau] = clone(self._model_t, safe=False).fit( + self._filter_or_None(X, inds_train + kappa), + self._filter_or_None(W, inds_train + kappa), + T[inds_train + tau]) + return self + + def predict(self, Y, T, X=None, W=None, sample_weight=None, groups=None): + """Calculate nuisances for each period or period pairs. + + Returns + ------- + Y_res : (n, d_y) matrix or vector of length n + Y residuals for each period in panel format. + This shape is required for _OrthoLearner's crossfitting. + T_res : (n, d_t, n_periods) matrix + T residuals for pairs of periods (kappa, tau), where the data is in panel format for kappa + and in index form for tau. For example, the residuals for (kappa, tau) can be retrieved via + T_res[np.arange(n) % n_periods == kappa, ..., tau]. For tau < kappa, the entries of this + matrix are np.nan. + This shape is required for _OrthoLearner's crossfitting. + """ + assert Y.shape[0] % self.n_periods == 0, \ + "Length of training data should be an integer multiple of time periods." + inds_predict = np.arange(Y.shape[0])[np.arange(Y.shape[0]) % self.n_periods == 0] + Y_res = np.full(Y.shape, np.nan) + T_res = np.full(T.shape + (self.n_periods, ), np.nan) + shape_formatter = self._get_shape_formatter(X, W) + for kappa in np.arange(self.n_periods): + Y_slice = Y[inds_predict + self.n_periods - 1] + Y_pred = self._model_y_trained[kappa].predict( + self._filter_or_None(X, inds_predict + kappa), + self._filter_or_None(W, inds_predict + kappa)) + Y_res[np.arange(Y.shape[0]) % self.n_periods == kappa] = Y_slice\ + - shape_formatter(Y_slice, Y_pred).reshape(Y_slice.shape) + for tau in np.arange(kappa, self.n_periods): + T_slice = T[inds_predict + tau] + T_pred = self._model_t_trained[kappa][tau].predict( + self._filter_or_None(X, inds_predict + kappa), + self._filter_or_None(W, inds_predict + kappa)) + T_res[np.arange(Y.shape[0]) % self.n_periods == kappa, ..., tau] = T_slice\ + - shape_formatter(T_slice, T_pred).reshape(T_slice.shape) + return Y_res, T_res + + def score(self, Y, T, X=None, W=None, sample_weight=None, groups=None): + # TODO: implement scores + # TODO: fix correctness? + assert Y.shape[0] % self.n_periods == 0, \ + "Length of training data should be an integer multiple of time periods." + inds_score = np.arange(Y.shape[0])[np.arange(Y.shape[0]) % self.n_periods == 0] + if hasattr(self._model_y, 'score'): + Y_score = np.full((self.n_periods, ), np.nan) + for kappa in np.arange(self.n_periods): + Y_score[kappa] = self._model_y_trained[kappa].score( + self._filter_or_None(X, inds_score + kappa), + self._filter_or_None(W, inds_score + kappa), + Y[inds_score + self.n_periods - 1]) + else: + Y_score = None + if hasattr(self._model_t, 'score'): + T_score = np.full((self.n_periods, self.n_periods), np.nan) + for kappa in np.arange(self.n_periods): + for tau in np.arange(kappa, self.n_periods): + T_score[kappa][tau] = self._model_t_trained[kappa][tau].score( + self._filter_or_None(X, inds_score + kappa), + self._filter_or_None(W, inds_score + kappa), + T[inds_score + tau]) + else: + T_score = None + return Y_score, T_score + + def _get_shape_formatter(self, X, W): + if (X is None) and (W is None): + return lambda x, x_pred: np.tile(x_pred.reshape(1, -1), (x.shape[0], 1)) + return lambda x, x_pred: x_pred + + def _filter_or_None(self, X, filter_idx): + return None if X is None else X[filter_idx] + + +class _DynamicModelFinal: + """ + Final model at fit time, fits a residual on residual regression with a heterogeneous coefficient + that depends on X, i.e. + + .. math :: + Y - E[Y | X, W] = \\theta(X) \\cdot (T - E[T | X, W]) + \\epsilon + + and at predict time returns :math:`\\theta(X)`. The score method returns the MSE of this final + residual on residual regression. + Assumes model final is parametric with no intercept. + """ + # TODO: update docs + + def __init__(self, model_final, n_periods): + self._model_final = model_final + self.n_periods = n_periods + self._model_final_trained = {k: clone(self._model_final, safe=False) for k in np.arange(n_periods)} + + def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + # TODO: handle sample weight, sample var + Y_res, T_res = nuisances + self._d_y = Y.shape[1:] + for kappa in np.arange(self.n_periods): + period = self.n_periods - 1 - kappa + period_filter = self.period_filter_gen(period, Y.shape[0]) + Y_adj = Y_res[period_filter].copy() + if kappa > 0: + Y_adj -= np.sum( + [self._model_final_trained[tau].predict_with_res( + X[self.period_filter_gen(self.n_periods - 1 - tau, Y.shape[0])] if X is not None else None, + T_res[period_filter, ..., self.n_periods - 1 - tau] + ) for tau in np.arange(kappa)], axis=0) + self._model_final_trained[kappa].fit( + X[period_filter] if X is not None else None, T[period_filter], + T_res[period_filter, ..., period], Y_adj) + + return self + + def predict(self, X=None): + """ + Return shape: m x dy x (p*dt) + """ + d_t_tuple = self._model_final_trained[0]._d_t + d_t = d_t_tuple[0] if d_t_tuple else 1 + x_dy_shape = (X.shape[0] if X is not None else 1, ) + \ + self._model_final_trained[0]._d_y + preds = np.zeros( + x_dy_shape + + (self.n_periods * d_t, ) + ) + for kappa in range(self.n_periods): + preds[..., kappa * d_t: (kappa + 1) * d_t] = \ + self._model_final_trained[kappa].predict(X).reshape( + x_dy_shape + (d_t, ) + ) + return preds + + def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + # TODO: implement score + return None + + def period_filter_gen(self, p, n): + return (np.arange(n) % self.n_periods == p) + + +class _LinearDynamicModelFinal(_DynamicModelFinal): + """Wrapper for the DynamicModelFinal with StatsModelsLinearRegression final model. + + The final model is a linear model with (d_t*n_periods) coefficients. + This model is defined after the coefficients and covariance are calculated. + """ + + def __init__(self, model_final, n_periods): + super().__init__(model_final, n_periods) + self.model_final_ = StatsModelsLinearRegression(fit_intercept=False) + + def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + super().fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, sample_var=sample_var) + # Compose final model + cov = self._get_cov(nuisances, X) + coef = self._get_coef_() + self.model_final_._n_out = self._d_y[0] if self._d_y else 0 + self.model_final_._param_var = cov / (Y.shape[0] / self.n_periods) + self.model_final_._param = coef.T if self.model_final_._n_out else coef + + def _get_coef_(self): + period_coefs = np.array([self._model_final_trained[kappa]._model.coef_ for kappa in range(self.n_periods)]) + if self._d_y: + return np.array([ + np.array([period_coefs[k, i, :] for k in range(self.n_periods)]).flatten() + for i in range(self._d_y[0]) + ]) + return period_coefs.flatten() + + def _get_cov(self, nuisances, X): + if self._d_y: + return np.array( + [self._fit_single_output_cov((nuisances[0][:, i], nuisances[1]), X, i) for i in range(self._d_y[0])] + ) + return self._fit_single_output_cov(nuisances, X, -1) + + def _fit_single_output_cov(self, nuisances, X, y_index): + """ Calculates the covariance (n_periods*n_treatments) + x (n_periods*n_treatments) matrix for a single outcome. + """ + Y_res, T_res = nuisances + XT_res = np.array([ + [ + self._model_final_trained[0]._combine( + X[self.period_filter_gen(tau, Y_res.shape[0])] if X is not None else None, + T_res[self.period_filter_gen(kappa, Y_res.shape[0]), ..., tau], + fitting=False + ) + for tau in range(self.n_periods) + ] + for kappa in range(self.n_periods) + ]) + d_xt = XT_res.shape[-1] + M = np.zeros((self.n_periods * d_xt, + self.n_periods * d_xt)) + Sigma = np.zeros((self.n_periods * d_xt, + self.n_periods * d_xt)) + for kappa in np.arange(self.n_periods): + # Calculating the (kappa, kappa) block entry (of size n_treatments x n_treatments) of matrix Sigma + period = self.n_periods - 1 - kappa + period_filter = self.period_filter_gen(period, Y_res.shape[0]) + Y_diff = np.sum([ + self._model_final_trained[tau].predict_with_res( + X[self.period_filter_gen(self.n_periods - 1 - tau, + Y_res.shape[0])] if X is not None else None, + T_res[period_filter, ..., self.n_periods - 1 - tau]) + for tau in np.arange(kappa + 1) + ], axis=0) + res_epsilon = (Y_res[period_filter] - + (Y_diff[:, y_index] if y_index >= 0 else Y_diff) + ).reshape(-1, 1, 1) + cur_resT = XT_res[period][period] + cov_cur_resT = cur_resT.reshape(-1, d_xt, 1) @ cur_resT.reshape(-1, 1, d_xt) + sigma_kappa = np.mean((res_epsilon**2) * cov_cur_resT, axis=0) + Sigma[kappa * d_xt:(kappa + 1) * d_xt, + kappa * d_xt:(kappa + 1) * d_xt] = sigma_kappa + for tau in np.arange(kappa + 1): + # Calculating the (kappa, tau) block entry (of size n_treatments x n_treatments) of matrix M + m_kappa_tau = np.mean( + XT_res[period][self.n_periods - 1 - tau].reshape(-1, d_xt, 1) @ cur_resT.reshape(-1, 1, d_xt), + axis=0) + M[kappa * d_xt:(kappa + 1) * d_xt, + tau * d_xt:(tau + 1) * d_xt] = m_kappa_tau + return np.linalg.inv(M) @ Sigma @ np.linalg.inv(M).T + + +class _DynamicFinalWrapper(_FinalWrapper): + + def predict_with_res(self, X, T_res): + fts = self._combine(X, T_res, fitting=False) + prediction = self._model.predict(fts) + if self._intercept is not None: + prediction -= self._intercept + return reshape(prediction, (prediction.shape[0],) + self._d_y) + + +class DynamicDML(LinearModelFinalCateEstimatorMixin, _OrthoLearner): + """CATE estimator for dynamic treatment effect estimation. + + This estimator is an extension of the Double ML approach for treatments assigned sequentially + over time periods. + + The estimator is a special case of an :class:`_OrthoLearner` estimator, so it follows the two + stage process, where a set of nuisance functions are estimated in the first stage in a crossfitting + manner and a final stage estimates the CATE model. See the documentation of + :class:`._OrthoLearner` for a description of this two stage process. + + Parameters + ---------- + model_y: estimator or 'auto', optional (default is 'auto') + The estimator for fitting the response to the features. Must implement + `fit` and `predict` methods. + If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen. + + model_t: estimator or 'auto', optional (default is 'auto') + The estimator for fitting the treatment to the features. + If estimator, it must implement `fit` and `predict` methods; + If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be applied for discrete treatment, + and :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` + will be applied for continuous treatment. + + featurizer : :term:`transformer`, optional, default None + Must support fit_transform and transform. Used to create composite features in the final CATE regression. + It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X). + If featurizer=None, then CATE is trained on X. + + fit_cate_intercept : bool, optional, default True + Whether the linear CATE model should have a constant term. + + linear_first_stages: bool + Whether the first stage models are linear (in which case we will expand the features passed to + `model_y` accordingly) + + discrete_treatment: bool, optional (default is ``False``) + Whether the treatment values should be treated as categorical, rather than continuous, quantities + + categories: 'auto' or list, default 'auto' + The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values). + The first category will be treated as the control treatment. + + cv: int, cross-validation generator or an iterable, optional (Default=2) + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 3-fold cross-validation, + - integer, to specify the number of folds. + - :term:`CV splitter` + - An iterable yielding (train, test) splits as arrays of indices. + Iterables should make sure a group belongs to a single split. + + For integer/None inputs, :class:`~sklearn.model_selection.GroupKFold` is used + + Unless an iterable is used, we call `split(X, T, groups)` to generate the splits. + + mc_iters: int, optional (default=None) + The number of times to rerun the first stage models to reduce the variance of the nuisances. + + mc_agg: {'mean', 'median'}, optional (default='mean') + How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of + cross-fitting. + + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None) + If int, random_state is the seed used by the random number generator; + If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; + If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used + by :mod:`np.random`. + + Examples + -------- + A simple example with default models: + + .. testcode:: + :hide: + + import numpy as np + np.set_printoptions(suppress=True) + + .. testcode:: + + from econml.dml import DynamicDML + + np.random.seed(123) + + n_panels = 100 # number of panels + n_periods = 3 # number of time periods per panel + n = n_panels * n_periods + groups = np.repeat(a=np.arange(n_panels), repeats=n_periods, axis=0) + X = np.random.normal(size=(n, 1)) + T = np.random.normal(size=(n, 2)) + y = np.random.normal(size=(n, )) + est = DynamicDML() + est.fit(y, T, X=X, W=None, groups=groups, inference="auto") + + >>> est.const_marginal_effect(X[:2]) + array([[-0.012..., 0.031..., 0.069..., 0.111..., -0.349..., + -0.076...], + [-0.411..., -0.088..., 0.021..., -0.171..., -0.126... , + 0.397...]]) + >>> est.effect(X[:2], T0=0, T1=1) + array([-0.225..., -0.378...]) + >>> est.effect(X[:2], T0=np.zeros((2, n_periods*T.shape[1])), T1=np.ones((2, n_periods*T.shape[1]))) + array([-0.225..., -0.378...]) + >>> est.coef_ + array([[-0.191...], + [-0.057...], + [-0.023...], + [-0.136...], + [ 0.107...], + [ 0.227...]]) + >>> est.coef__interval() + (array([[-0.333...], + [-0.171...], + [-0.158...], + [-0.352...], + [-0.045...], + [ 0.049...]]), + array([[-0.050...], + [ 0.056...], + [ 0.112...], + [ 0.079...], + [ 0.260...], + [ 0.405...]])) + """ + + def __init__(self, *, + model_y='auto', model_t='auto', + featurizer=None, + fit_cate_intercept=True, + linear_first_stages=False, + discrete_treatment=False, + categories='auto', + cv=2, + mc_iters=None, + mc_agg='mean', + random_state=None): + self.fit_cate_intercept = fit_cate_intercept + self.linear_first_stages = linear_first_stages + self.featurizer = clone(featurizer, safe=False) + self.model_y = clone(model_y, safe=False) + self.model_t = clone(model_t, safe=False) + super().__init__(discrete_treatment=discrete_treatment, + discrete_instrument=False, + categories=categories, + cv=GroupKFold(cv) if isinstance(cv, int) else cv, + mc_iters=mc_iters, + mc_agg=mc_agg, + random_state=random_state) + + def _gen_featurizer(self): + return clone(self.featurizer, safe=False) + + def _gen_model_y(self): + if self.model_y == 'auto': + model_y = WeightedLassoCVWrapper(random_state=self.random_state) + else: + model_y = clone(self.model_y, safe=False) + return _FirstStageWrapper(model_y, True, self._gen_featurizer(), + self.linear_first_stages, self.discrete_treatment) + + def _gen_model_t(self): + if self.model_t == 'auto': + if self.discrete_treatment: + model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold(random_state=self.random_state), + random_state=self.random_state) + else: + model_t = WeightedLassoCVWrapper(random_state=self.random_state) + else: + model_t = clone(self.model_t, safe=False) + return _FirstStageWrapper(model_t, False, self._gen_featurizer(), + self.linear_first_stages, self.discrete_treatment) + + def _gen_model_final(self): + return StatsModelsLinearRegression(fit_intercept=False) + + def _gen_ortho_learner_model_nuisance(self, n_periods): + return _DynamicModelNuisance( + model_t=self._gen_model_t(), + model_y=self._gen_model_y(), + n_periods=n_periods) + + def _gen_ortho_learner_model_final(self, n_periods): + wrapped_final_model = _DynamicFinalWrapper( + StatsModelsLinearRegression(fit_intercept=False), + fit_cate_intercept=self.fit_cate_intercept, + featurizer=self.featurizer, + use_weight_trick=False) + return _LinearDynamicModelFinal(wrapped_final_model, n_periods=n_periods) + + def _prefit(self, Y, T, *args, groups=None, only_final=False, **kwargs): + u_periods = np.unique(np.bincount(groups.astype(int))) + if len(u_periods) > 1: + raise AttributeError( + "Imbalanced panel. Method currently expects only panels with equal number of periods. Pad your data") + self._n_periods = u_periods[0] + # generate an instance of the final model + self._ortho_learner_model_final = self._gen_ortho_learner_model_final(self._n_periods) + if not only_final: + # generate an instance of the nuisance model + self._ortho_learner_model_nuisance = self._gen_ortho_learner_model_nuisance(self._n_periods) + TreatmentExpansionMixin._prefit(self, Y, T, *args, **kwargs) + + def _postfit(self, Y, T, *args, **kwargs): + super()._postfit(Y, T, *args, **kwargs) + # Set _d_t to effective number of treatments + self._d_t = (self._n_periods * self._d_t[0], ) if self._d_t else (self._n_periods, ) + + def _strata(self, Y, T, X=None, W=None, Z=None, + sample_weight=None, sample_var=None, groups=None, + cache_values=False, only_final=False, check_input=True): + # Required for bootstrap inference + return groups + + @_deprecate_positional("X, and should be passed by keyword only. In a future release " + "we will disallow passing X and W by position.", ['X', 'W']) + def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups, + cache_values=False, inference=None): + """ + Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. + + The input data has to be in panel format, i.e. a sequence of groups, each with the same size corresponding + to the number of time periods the treatments were assigned over. + + Parameters + ---------- + Y: (n, d_y) matrix or vector of length n + Outcomes for each sample (required: n = n_groups * n_periods) + T: (n, d_t) matrix or vector of length n + Treatments for each sample (required: n = n_groups * n_periods) + X: optional(n, d_x) matrix or None (Default=None) + Features for each sample (Required: n = n_groups * n_periods) + W: optional(n, d_w) matrix or None (Default=None) + Controls for each sample (Required: n = n_groups * n_periods) + sample_weight: optional(n,) vector or None (Default=None) + Weights for each samples + sample_var: optional(n,) vector or None (Default=None) + Sample variance for each sample + groups: (n,) vector, required + All rows corresponding to the same group will be kept together during splitting. + If groups is not None, the `cv` argument passed to this class's initializer + must support a 'groups' argument to its split method. + cache_values: bool, default False + Whether to cache inputs and first stage results, which will allow refitting a different final model + inference: string,:class:`.Inference` instance, or None + Method for performing inference. This estimator supports 'bootstrap' + (or an instance of :class:`.BootstrapInference`) and 'auto' + (or an instance of :class:`.LinearModelFinalInference`). + + Returns + ------- + self: DynamicDML instance + """ + if sample_weight is not None or sample_var is not None: + warn("This CATE estimator does not yet support sample weights and sample variance. " + "These inputs will be ignored during fitting.", + UserWarning) + # TODO: support sample_weight, sample_var? + return super().fit(Y, T, X=X, W=W, + sample_weight=None, sample_var=None, groups=groups, + cache_values=cache_values, + inference=inference) + + def cate_treatment_names(self, treatment_names=None): + """ + Get treatment names for each time period. + + If the treatment is discrete, it will return expanded treatment names. + + Parameters + ---------- + treatment_names: list of strings of length T.shape[1] or None + The names of the treatments. If None and the T passed to fit was a dataframe, + it defaults to the column names from the dataframe. + + Returns + ------- + out_treatment_names: list of strings + Returns (possibly expanded) treatment names. + """ + slice_treatment_names = super().cate_treatment_names(treatment_names) + treatment_names_out = [] + for k in range(self._n_periods): + treatment_names_out += [f"$({t})_{k}$" for t in slice_treatment_names] + return treatment_names_out + + def cate_feature_names(self, feature_names=None): + """ + Get the output feature names. + + Parameters + ---------- + feature_names: list of strings of length X.shape[1] or None + The names of the input features. If None and X is a dataframe, it defaults to the column names + from the dataframe. + + Returns + ------- + out_feature_names: list of strings or None + The names of the output features :math:`\\phi(X)`, i.e. the features with respect to which the + final constant marginal CATE model is linear. It is the names of the features that are associated + with each entry of the :meth:`coef_` parameter. Not available when the featurizer is not None and + does not have a method: `get_feature_names(feature_names)`. Otherwise None is returned. + """ + if self._d_x is None: + # Handles the corner case when X=None but featurizer might be not None + return None + if feature_names is None: + feature_names = self._input_names["feature_names"] + if self.original_featurizer is None: + return feature_names + return get_feature_names_or_default(self.original_featurizer, feature_names) + + def _expand_treatments(self, X, *Ts): + # Expand treatments for each time period + outTs = [] + base_expand_treatments = super()._expand_treatments + for T in Ts: + if ndim(T) == 0: + one_T = base_expand_treatments(X, T)[1] + one_T = one_T.reshape(-1, 1) if ndim(one_T) == 1 else one_T + T = np.tile(one_T, (1, self._n_periods, )) + else: + assert (T.shape[1] == self._n_periods if self.transformer else T.shape[1] == self._d_t[0]), \ + f"Expected a list of time period * d_t, instead got a treatment array of shape {T.shape}." + if self.transformer: + T = np.hstack([ + base_expand_treatments( + X, T[:, [kappa]])[1] for kappa in range(self._n_periods) + ]) + outTs.append(T) + return (X,) + tuple(outTs) + + @property + def bias_part_of_coef(self): + return self.ortho_learner_model_final_._model_final._fit_cate_intercept + + @property + def fit_cate_intercept_(self): + return self.ortho_learner_model_final_._model_final._fit_cate_intercept + + @property + def original_featurizer(self): + # NOTE: important to use the rlearner_model_final_ attribute instead of the + # attribute so that the trained featurizer will be passed through + return self.ortho_learner_model_final_._model_final_trained[0]._original_featurizer + + @property + def featurizer_(self): + # NOTE This is used by the inference methods and has to be the overall featurizer. intended + # for internal use by the library + return self.ortho_learner_model_final_._model_final_trained[0]._featurizer + + @property + def model_final_(self): + # NOTE This is used by the inference methods and is more for internal use to the library + # We need to use the rlearner's copy to retain the information from fitting + return self.ortho_learner_model_final_.model_final_ + + @property + def model_final(self): + return self._gen_model_final() + + @model_final.setter + def model_final(self, model): + if model is not None: + raise ValueError("Parameter `model_final` cannot be altered for this estimator!") diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py new file mode 100644 index 000000000..bb5aa8eac --- /dev/null +++ b/econml/tests/test_dynamic_dml.py @@ -0,0 +1,258 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import unittest +import pytest +import pickle +import numpy as np +from contextlib import ExitStack +from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, PolynomialFeatures +from sklearn.linear_model import (LinearRegression, LassoCV, Lasso, MultiTaskLasso, + MultiTaskLassoCV, LogisticRegression) +from econml.dml import DynamicDML +from econml.inference import BootstrapInference, EmpiricalInferenceResults, NormalInferenceResults +from econml.utilities import shape, hstack, vstack, reshape, cross_product +import econml.tests.utilities # bugfix for assertWarns + + +class TestDynamicDML(unittest.TestCase): + + def test_cate_api(self): + """Test that we correctly implement the CATE API.""" + n_panels = 100 # number of panels + n_periods = 3 # number of time periods per panel + n = n_panels * n_periods + groups = np.repeat(a=np.arange(n_panels), repeats=n_periods, axis=0) + + def make_random(n, is_discrete, d): + if d is None: + return None + sz = (n, d) if d >= 0 else (n,) + if is_discrete: + return np.random.choice(['a', 'b', 'c'], size=sz) + else: + return np.random.normal(size=sz) + + for d_t in [2, 1, -1]: + for is_discrete in [True, False] if d_t <= 1 else [False]: + # for is_discrete in [False]: + for d_y in [3, 1, -1]: + for d_x in [2, None]: + for d_w in [2, None]: + W, X, Y, T = [make_random(n, is_discrete, d) + for is_discrete, d in [(False, d_w), + (False, d_x), + (False, d_y), + (is_discrete, d_t)]] + T_test = np.hstack([(T.reshape(-1, 1) if d_t == -1 else T) for i in range(n_periods)]) + for featurizer, fit_cate_intercept in\ + [(None, True), + (PolynomialFeatures(degree=2, include_bias=False), True), + (PolynomialFeatures(degree=2, include_bias=True), False)]: + + d_t_final = (2 if is_discrete else max(d_t, 1)) * n_periods + + effect_shape = (n,) + ((d_y,) if d_y > 0 else ()) + effect_summaryframe_shape = (n * (d_y if d_y > 0 else 1), 6) + marginal_effect_shape = ((n,) + + ((d_y,) if d_y > 0 else ()) + + ((d_t_final,) if d_t_final > 0 else ())) + marginal_effect_summaryframe_shape = (n * (d_y if d_y > 0 else 1) * + (d_t_final if d_t_final > 0 else 1), 6) + + # since T isn't passed to const_marginal_effect, defaults to one row if X is None + const_marginal_effect_shape = ((n if d_x else 1,) + + ((d_y,) if d_y > 0 else ()) + + ((d_t_final,) if d_t_final > 0 else())) + const_marginal_effect_summaryframe_shape = ( + (n if d_x else 1) * (d_y if d_y > 0 else 1) * + (d_t_final if d_t_final > 0 else 1), 6) + + fd_x = featurizer.fit_transform(X).shape[1:] if featurizer and d_x\ + else ((d_x,) if d_x else (0,)) + coef_shape = Y.shape[1:] + (d_t_final, ) + fd_x + + coef_summaryframe_shape = ( + (d_y if d_y > 0 else 1) * (fd_x[0] if fd_x[0] > + 0 else 1) * (d_t_final), 6) + intercept_shape = Y.shape[1:] + (d_t_final, ) + intercept_summaryframe_shape = ( + (d_y if d_y > 0 else 1) * (d_t_final if d_t_final > 0 else 1), 6) + + all_infs = [None, 'auto', BootstrapInference(2)] + #all_infs = [None, 'auto'] + est = DynamicDML(model_y=Lasso() if d_y < 1 else MultiTaskLasso(), + model_t=LogisticRegression() if is_discrete else + (Lasso() if d_t < 1 else MultiTaskLasso()), + featurizer=featurizer, + fit_cate_intercept=fit_cate_intercept, + discrete_treatment=is_discrete) + + # ensure we can serialize the unfit estimator + pickle.dumps(est) + + for inf in all_infs: + with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t, + is_discrete=is_discrete, est=est, inf=inf): + + if X is None and (not fit_cate_intercept): + with pytest.raises(AttributeError): + est.fit(Y, T, X=X, W=W, groups=groups, inference=inf) + continue + + est.fit(Y, T, X=X, W=W, groups=groups, inference=inf) + + # ensure we can pickle the fit estimator + pickle.dumps(est) + + # make sure we can call the marginal_effect and effect methods + const_marg_eff = est.const_marginal_effect(X) + marg_eff = est.marginal_effect(T_test, X) + self.assertEqual(shape(marg_eff), marginal_effect_shape) + self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape) + + np.testing.assert_allclose( + marg_eff if d_x else marg_eff[0:1], const_marg_eff) + + # TODO: add score and nuisance scores + """ + assert isinstance(est.score_, float) + for score in est.nuisance_scores_y: + assert isinstance(score, float) + for score in est.nuisance_scores_t: + assert isinstance(score, float) + """ + + T0 = np.full_like(T_test, 'a') if is_discrete else np.zeros_like(T_test) + eff = est.effect(X, T0=T0, T1=T_test) + self.assertEqual(shape(eff), effect_shape) + + self.assertEqual(shape(est.coef_), coef_shape) + if fit_cate_intercept: + self.assertEqual(shape(est.intercept_), intercept_shape) + else: + with pytest.raises(AttributeError): + self.assertEqual(shape(est.intercept_), intercept_shape) + + if inf is not None: + const_marg_eff_int = est.const_marginal_effect_interval(X) + marg_eff_int = est.marginal_effect_interval(T_test, X) + self.assertEqual(shape(marg_eff_int), + (2,) + marginal_effect_shape) + self.assertEqual(shape(const_marg_eff_int), + (2,) + const_marginal_effect_shape) + self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T_test)), + (2,) + effect_shape) + self.assertEqual(shape(est.coef__interval()), + (2,) + coef_shape) + if fit_cate_intercept: + self.assertEqual(shape(est.intercept__interval()), + (2,) + intercept_shape) + else: + with pytest.raises(AttributeError): + self.assertEqual(shape(est.intercept__interval()), + (2,) + intercept_shape) + + const_marg_effect_inf = est.const_marginal_effect_inference(X) + T1 = np.full_like(T_test, 'b') if is_discrete else T_test + effect_inf = est.effect_inference(X, T0=T0, T1=T1) + marg_effect_inf = est.marginal_effect_inference(T_test, X) + # test const marginal inference + self.assertEqual(shape(const_marg_effect_inf.summary_frame()), + const_marginal_effect_summaryframe_shape) + self.assertEqual(shape(const_marg_effect_inf.point_estimate), + const_marginal_effect_shape) + self.assertEqual(shape(const_marg_effect_inf.stderr), + const_marginal_effect_shape) + self.assertEqual(shape(const_marg_effect_inf.var), + const_marginal_effect_shape) + self.assertEqual(shape(const_marg_effect_inf.pvalue()), + const_marginal_effect_shape) + self.assertEqual(shape(const_marg_effect_inf.zstat()), + const_marginal_effect_shape) + self.assertEqual(shape(const_marg_effect_inf.conf_int()), + (2,) + const_marginal_effect_shape) + np.testing.assert_array_almost_equal( + const_marg_effect_inf.conf_int()[0], + const_marg_eff_int[0], decimal=5) + const_marg_effect_inf.population_summary()._repr_html_() + + # test effect inference + self.assertEqual(shape(effect_inf.summary_frame()), + effect_summaryframe_shape) + self.assertEqual(shape(effect_inf.point_estimate), + effect_shape) + self.assertEqual(shape(effect_inf.stderr), + effect_shape) + self.assertEqual(shape(effect_inf.var), + effect_shape) + self.assertEqual(shape(effect_inf.pvalue()), + effect_shape) + self.assertEqual(shape(effect_inf.zstat()), + effect_shape) + self.assertEqual(shape(effect_inf.conf_int()), + (2,) + effect_shape) + np.testing.assert_array_almost_equal( + effect_inf.conf_int()[0], + est.effect_interval(X, T0=T0, T1=T1)[0], decimal=5) + effect_inf.population_summary()._repr_html_() + + # test marginal effect inference + self.assertEqual(shape(marg_effect_inf.summary_frame()), + marginal_effect_summaryframe_shape) + self.assertEqual(shape(marg_effect_inf.point_estimate), + marginal_effect_shape) + self.assertEqual(shape(marg_effect_inf.stderr), + marginal_effect_shape) + self.assertEqual(shape(marg_effect_inf.var), + marginal_effect_shape) + self.assertEqual(shape(marg_effect_inf.pvalue()), + marginal_effect_shape) + self.assertEqual(shape(marg_effect_inf.zstat()), + marginal_effect_shape) + self.assertEqual(shape(marg_effect_inf.conf_int()), + (2,) + marginal_effect_shape) + np.testing.assert_array_almost_equal( + marg_effect_inf.conf_int()[0], marg_eff_int[0], decimal=5) + marg_effect_inf.population_summary()._repr_html_() + + # test coef__inference and intercept__inference + if X is not None: + self.assertEqual( + shape(est.coef__inference().summary_frame()), + coef_summaryframe_shape) + np.testing.assert_array_almost_equal( + est.coef__inference().conf_int() + [0], est.coef__interval()[0], decimal=5) + + if fit_cate_intercept: + cm = ExitStack() + # ExitStack can be used as a "do nothing" ContextManager + else: + cm = pytest.raises(AttributeError) + with cm: + self.assertEqual(shape(est.intercept__inference(). + summary_frame()), + intercept_summaryframe_shape) + np.testing.assert_array_almost_equal( + est.intercept__inference().conf_int() + [0], est.intercept__interval()[0], decimal=5) + + est.summary() + + # TODO: add score to estimator + """ + est.score(Y, T, X, W) + """ + # make sure we can call effect with implied scalar treatments, + # no matter the dimensions of T, and also that we warn when there + # are multiple treatments + if d_t > 1: + cm = self.assertWarns(Warning) + else: + # ExitStack can be used as a "do nothing" ContextManager + cm = ExitStack() + with cm: + effect_shape2 = (n if d_x else 1,) + ((d_y,) if d_y > 0 else()) + eff = est.effect(X) if not is_discrete else est.effect( + X, T0='a', T1='b') + self.assertEqual(shape(eff), effect_shape2) From 1070aea9648d671c3eed82b4b592af4f57e7550a Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Wed, 7 Apr 2021 09:39:40 -0400 Subject: [PATCH 02/27] Add performance tests and an example notebook --- econml/dml/dynamic_dml.py | 2 +- econml/tests/dgp.py | 156 ++++ econml/tests/test_dynamic_dml.py | 39 +- ...mic Double Machine Learning Examples.ipynb | 677 ++++++++++++++++++ 4 files changed, 872 insertions(+), 2 deletions(-) create mode 100644 econml/tests/dgp.py create mode 100644 notebooks/Dynamic Double Machine Learning Examples.ipynb diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index b5cd6d917..9d10e33da 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -482,7 +482,7 @@ def _gen_ortho_learner_model_final(self, n_periods): return _LinearDynamicModelFinal(wrapped_final_model, n_periods=n_periods) def _prefit(self, Y, T, *args, groups=None, only_final=False, **kwargs): - u_periods = np.unique(np.bincount(groups.astype(int))) + u_periods = np.unique(np.unique(groups, return_counts=True)[1]) if len(u_periods) > 1: raise AttributeError( "Imbalanced panel. Method currently expects only panels with equal number of periods. Pad your data") diff --git a/econml/tests/dgp.py b/econml/tests/dgp.py new file mode 100644 index 000000000..16e1f1aae --- /dev/null +++ b/econml/tests/dgp.py @@ -0,0 +1,156 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import numpy as np +from econml.utilities import cross_product +from statsmodels.tools.tools import add_constant + + +class _BaseDynamicPanelDGP: + + def __init__(self, n_periods, n_treatments, n_x): + self.n_periods = n_periods + self.n_treatments = n_treatments + self.n_x = n_x + return + + def create_instance(self, *args, **kwargs): + pass + + def _gen_data_with_policy(self, n_units, policy_gen, random_seed=123): + pass + + def static_policy_data(self, n_units, tau, random_seed=123): + def policy_gen(Tpre, X, period): + return tau[period] + return self._gen_data_with_policy(n_units, policy_gen, random_seed=random_seed) + + def adaptive_policy_data(self, n_units, policy_gen, random_seed=123): + return self._gen_data_with_policy(n_units, policy_gen, random_seed=random_seed) + + def static_policy_effect(self, tau, mc_samples=1000): + Y_tau, _, _, _ = self.static_policy_data(mc_samples, tau) + Y_zero, _, _, _ = self.static_policy_data( + mc_samples, np.zeros((self.n_periods, self.n_treatments))) + return np.mean(Y_tau[np.arange(Y_tau.shape[0]) % self.n_periods == self.n_periods - 1]) - \ + np.mean(Y_zero[np.arange(Y_zero.shape[0]) % + self.n_periods == self.n_periods - 1]) + + def adaptive_policy_effect(self, policy_gen, mc_samples=1000): + Y_tau, _, _, _ = self.adaptive_policy_data(mc_samples, policy_gen) + Y_zero, _, _, _ = self.static_policy_data( + mc_samples, np.zeros((self.n_periods, self.n_treatments))) + return np.mean(Y_tau[np.arange(Y_tau.shape[0]) % self.n_periods == self.n_periods - 1]) - \ + np.mean(Y_zero[np.arange(Y_zero.shape[0]) % + self.n_periods == self.n_periods - 1]) + + +class DynamicPanelDGP(_BaseDynamicPanelDGP): + + def __init__(self, n_periods, n_treatments, n_x): + super().__init__(n_periods, n_treatments, n_x) + + def create_instance(self, s_x, sigma_x=.5, sigma_y=.5, conf_str=5, hetero_strength=0, hetero_inds=None, + autoreg=.5, state_effect=.5, random_seed=123): + np.random.seed(random_seed) + self.s_x = s_x + self.conf_str = conf_str + self.sigma_x = sigma_x + self.sigma_y = sigma_y + self.hetero_inds = hetero_inds.astype( + int) if hetero_inds is not None else hetero_inds + self.endo_inds = np.setdiff1d( + np.arange(self.n_x), hetero_inds).astype(int) + # The first s_x state variables are confounders. The final s_x variables are exogenous and can create + # heterogeneity + self.Alpha = np.random.uniform(-1, 1, + size=(self.n_x, self.n_treatments)) + self.Alpha /= np.linalg.norm(self.Alpha, axis=1, ord=1, keepdims=True) + self.Alpha *= state_effect + if self.hetero_inds is not None: + self.Alpha[self.hetero_inds] = 0 + + self.Beta = np.zeros((self.n_x, self.n_x)) + for t in range(self.n_x): + self.Beta[t, :] = autoreg * np.roll(np.random.uniform(low=4.0**(-np.arange( + 0, self.n_x)), high=4.0**(-np.arange(1, self.n_x + 1))), t) + if self.hetero_inds is not None: + self.Beta[np.ix_(self.endo_inds, self.hetero_inds)] = 0 + self.Beta[np.ix_(self.hetero_inds, self.endo_inds)] = 0 + + self.epsilon = np.random.uniform(-1, 1, size=self.n_treatments) + self.zeta = np.zeros(self.n_x) + self.zeta[:self.s_x] = self.conf_str / self.s_x + + self.y_hetero_effect = np.zeros(self.n_x) + self.x_hetero_effect = np.zeros(self.n_x) + if self.hetero_inds is not None: + self.y_hetero_effect[self.hetero_inds] = np.random.uniform(.5 * hetero_strength, 1.5 * hetero_strength) / \ + len(self.hetero_inds) + self.x_hetero_effect[self.hetero_inds] = np.random.uniform(.5 * hetero_strength, 1.5 * hetero_strength) / \ + len(self.hetero_inds) + + self.true_effect = np.zeros((self.n_periods, self.n_treatments)) + self.true_effect[0] = self.epsilon + for t in np.arange(1, self.n_periods): + self.true_effect[t, :] = (self.zeta.reshape( + 1, -1) @ np.linalg.matrix_power(self.Beta, t - 1) @ self.Alpha) + + self.true_hetero_effect = np.zeros( + (self.n_periods, (self.n_x + 1) * self.n_treatments)) + self.true_hetero_effect[0, :] = cross_product( + add_constant(self.y_hetero_effect.reshape(1, -1), has_constant='add'), + self.epsilon.reshape(1, -1)) + for t in np.arange(1, self.n_periods): + self.true_hetero_effect[t, :] = cross_product( + add_constant(self.x_hetero_effect.reshape(1, -1), has_constant='add'), + self.zeta.reshape(1, -1) @ np.linalg.matrix_power(self.Beta, t - 1) @ self.Alpha) + return self + + def hetero_effect_fn(self, t, x): + if t == 0: + return (np.dot(self.y_hetero_effect, x.flatten()) + 1) * self.epsilon + else: + return (np.dot(self.x_hetero_effect, x.flatten()) + 1) *\ + (self.zeta.reshape(1, -1) @ np.linalg.matrix_power(self.Beta, t - 1) + @ self.Alpha).flatten() + + def _gen_data_with_policy(self, n_units, policy_gen, random_seed=123): + np.random.seed(random_seed) + Y = np.zeros(n_units * self.n_periods) + T = np.zeros((n_units * self.n_periods, self.n_treatments)) + X = np.zeros((n_units * self.n_periods, self.n_x)) + groups = np.zeros(n_units * self.n_periods) + for t in range(n_units * self.n_periods): + period = t % self.n_periods + if period == 0: + X[t] = np.random.normal(0, self.sigma_x, size=self.n_x) + T[t] = policy_gen(np.zeros(self.n_treatments), X[t], period) + else: + X[t] = (np.dot(self.x_hetero_effect, X[t - 1]) + 1) * np.dot(self.Alpha, T[t - 1]) + \ + np.dot(self.Beta, X[t - 1]) + \ + np.random.normal(0, self.sigma_x, size=self.n_x) + T[t] = policy_gen(T[t - 1], X[t], period) + Y[t] = (np.dot(self.y_hetero_effect, X[t]) + 1) * np.dot(self.epsilon, T[t]) + \ + np.dot(X[t], self.zeta) + \ + np.random.normal(0, self.sigma_y) + groups[t] = t // self.n_periods + + return Y, T, X[:, self.hetero_inds] if self.hetero_inds else None, X[:, self.endo_inds], groups + + def observational_data(self, n_units, gamma=0, s_t=1, sigma_t=0.5, random_seed=123): + """ Generated observational data with some observational treatment policy parameters + + Parameters + ---------- + n_units : how many units to observe + gamma : what is the degree of auto-correlation of the treatments across periods + s_t : sparsity of treatment policy; how many states does it depend on + sigma_t : what is the std of the exploration/randomness in the treatment + """ + Delta = np.zeros((self.n_treatments, self.n_x)) + Delta[:, :s_t] = self.conf_str / s_t + + def policy_gen(Tpre, X, period): + return gamma * Tpre + (1 - gamma) * np.dot(Delta, X) + \ + np.random.normal(0, sigma_t, size=self.n_treatments) + return self._gen_data_with_policy(n_units, policy_gen, random_seed=random_seed) diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py index bb5aa8eac..409cd231c 100644 --- a/econml/tests/test_dynamic_dml.py +++ b/econml/tests/test_dynamic_dml.py @@ -12,6 +12,7 @@ from econml.inference import BootstrapInference, EmpiricalInferenceResults, NormalInferenceResults from econml.utilities import shape, hstack, vstack, reshape, cross_product import econml.tests.utilities # bugfix for assertWarns +from econml.tests.dgp import DynamicPanelDGP class TestDynamicDML(unittest.TestCase): @@ -79,7 +80,6 @@ def make_random(n, is_discrete, d): (d_y if d_y > 0 else 1) * (d_t_final if d_t_final > 0 else 1), 6) all_infs = [None, 'auto', BootstrapInference(2)] - #all_infs = [None, 'auto'] est = DynamicDML(model_y=Lasso() if d_y < 1 else MultiTaskLasso(), model_t=LogisticRegression() if is_discrete else (Lasso() if d_t < 1 else MultiTaskLasso()), @@ -256,3 +256,40 @@ def make_random(n, is_discrete, d): eff = est.effect(X) if not is_discrete else est.effect( X, T0='a', T1='b') self.assertEqual(shape(eff), effect_shape2) + + def test_perf(self): + np.random.seed(123) + n_units = 400 + n_periods = 3 + n_treatments = 1 + n_x = 100 + s_x = 10 + s_t = 10 + hetero_strength = .5 + hetero_inds = np.arange(n_x - n_treatments, n_x) + alpha_regs = [1e-4, 1e-3, 1e-2, 5e-2, .1, 1] + + def lasso_model(): + return LassoCV(cv=3, alphas=alpha_regs, max_iter=500) + # No heterogeneity + dgp = DynamicPanelDGP(n_periods, n_treatments, n_x).create_instance( + s_x, random_seed=1) + Y, T, X, W, groups = dgp.observational_data(n_units, s_t=s_t, random_seed=12) + est = DynamicDML(model_y=lasso_model(), model_t=lasso_model(), cv=3) + est.fit(Y, T, X=X, W=W, groups=groups, inference="auto") + np.testing.assert_allclose(est.intercept_, dgp.true_effect.flatten(), atol=1e-01) + np.testing.assert_array_less(est.intercept__interval()[0], dgp.true_effect.flatten()) + np.testing.assert_array_less(dgp.true_effect.flatten(), est.intercept__interval()[1]) + # Heterogeneous effects + hetero_strength = .5 + hetero_inds = np.arange(n_x - n_treatments, n_x) + dgp = DynamicPanelDGP(n_periods, n_treatments, n_x).create_instance( + s_x, hetero_strength=hetero_strength, hetero_inds=hetero_inds, random_seed=1) + Y, T, X, W, groups = dgp.observational_data(n_units, s_t=s_t, random_seed=12) + est.fit(Y, T, X=X, W=W, groups=groups, inference="auto") + np.testing.assert_allclose(est.intercept_, dgp.true_effect.flatten(), atol=0.2) + np.testing.assert_allclose(est.coef_, dgp.true_hetero_effect[:, hetero_inds + 1], atol=0.2) + np.testing.assert_array_less(est.intercept__interval()[0], dgp.true_effect.flatten()) + np.testing.assert_array_less(dgp.true_effect.flatten(), est.intercept__interval()[1]) + np.testing.assert_array_less(est.coef__interval()[0], dgp.true_hetero_effect[:, hetero_inds + 1]) + np.testing.assert_array_less(dgp.true_hetero_effect[:, hetero_inds + 1], est.coef__interval()[1]) diff --git a/notebooks/Dynamic Double Machine Learning Examples.ipynb b/notebooks/Dynamic Double Machine Learning Examples.ipynb new file mode 100644 index 000000000..268402dfb --- /dev/null +++ b/notebooks/Dynamic Double Machine Learning Examples.ipynb @@ -0,0 +1,677 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dynamic Double Machine Learning: Use Cases and Examples\n", + "\n", + "Dynamic DoubleML is an extension of the Double ML approach for treatments assigned sequentially over time periods. This estimator will account for treatments that can have causal effects on future outcomes. For more details, see [this paper](https://arxiv.org/abs/2002.07285) or the [EconML docummentation](https://econml.azurewebsites.net/).\n", + "\n", + "For example, the Dynamic DoubleML could be useful in estimating the following causal effects:\n", + "* the effect of investments on revenue at companies that receive investments at regular intervals ([see more](https://arxiv.org/abs/2103.08390))\n", + "* the effect of prices on demand in stores where prices of goods change over time\n", + "* the effect of income on health outcomes in people who receive yearly income\n", + "\n", + "The expected data format is balanced panel data. Each panel corresponds to one entity (e.g. company, store or person) and the different rows in a panel correspond to different time points. Example:\n", + "\n", + "||Company|Year|Features|Investment|Revenue|\n", + "|---|---|---|---|---|---|\n", + "|1|A|2018|...|\\$1,000|\\$10,000|\n", + "|2|A|2019|...|\\$2,000|\\$12,000|\n", + "|3|A|2020|...|\\$3,000|\\$15,000|\n", + "|4|B|2018|...|\\$0|\\$5,000|\n", + "|5|B|2019|...|\\$100|\\$10,000|\n", + "|6|B|2020|...|\\$1,200|\\$7,000|\n", + "|7|C|2018|...|\\$1,000|\\$20,000|\n", + "|8|C|2019|...|\\$1,500|\\$25,000|\n", + "|9|C|2020|...|\\$500|\\$15,000|\n", + "\n", + "(Note: when passing the data to the DynamicDML estimator, the \"Company\" column above corresponds to the `groups` argument at fit time. The \"Year\" column above should not be passed in as it will be inferred from the \"Company\" column)\n", + "\n", + "\n", + "In this notebook, we show the performance of the DynamicDML on synthetic and observational data. \n", + "\n", + "## Notebook Contents\n", + "\n", + "1. [Example Usage with Average Treatment Effects](#1.-Example-Usage-with-Average-Treatment-Effects)\n", + "2. [Example Usage with Heterogeneous Treatment Effects](#2.-Example-Usage-with-Heterogeneous-Treatment-Effects)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import econml" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Main imports\n", + "from econml.dml import DynamicDML\n", + "from econml.tests.dgp import DynamicPanelDGP\n", + "\n", + "# Helper imports\n", + "import numpy as np\n", + "from sklearn.linear_model import Lasso, LassoCV, LogisticRegression, LogisticRegressionCV, MultiTaskLassoCV\n", + "import matplotlib.pyplot as plt\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Example Usage with Average Treatment Effects" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.1 DGP\n", + "\n", + "We consider a data generating process from a markovian treatment model. \n", + "\n", + "In the example bellow, $T_t\\rightarrow$ treatment(s) at time $t$, $Y_t\\rightarrow$outcome at time $t$, $X_t\\rightarrow$ features and controls at time $t$ (the coefficients $e, f'$ will pick the features and the controls).\n", + "\\begin{align}\n", + " X_t =& (\\pi'X_{t-1} + 1) \\cdot A\\, T_{t-1} + B X_{t-1} + \\epsilon_t\\\\\n", + " T_t =& \\gamma\\, T_{t-1} + (1-\\gamma) \\cdot D X_t + \\zeta_t\\\\\n", + " Y_t =& (\\sigma' X_{t} + 1) \\cdot e\\, T_{t} + f X_t + \\eta_t\n", + "\\end{align}\n", + "\n", + "with $X_0, T_0 = 0$ and $\\epsilon_t, \\zeta_t, \\eta_t \\sim N(0, \\sigma^2)$. Moreover, $X_t \\in R^{n_x}$, $B[:, 0:s_x] \\neq 0$ and $B[:, s_x:-1] = 0$, $\\gamma\\in [0, 1]$, $D[:, 0:s_x] \\neq 0$, $D[:, s_x:-1]=0$, $f[0:s_x]\\neq 0$, $f[s_x:-1]=0$. We draw a single time series of samples of length $n\\_panels \\cdot n\\_periods$." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Define DGP parameters\n", + "np.random.seed(123)\n", + "n_panels = 400 # number of panels\n", + "n_periods = 3 # number of time periods in each panel\n", + "n_treatments = 1 # number of treatments in each period\n", + "n_x = 100 # number of features + controls\n", + "s_x = 10 # number of controls (endogeneous variables)\n", + "s_t = 10 # treatment support size" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate data\n", + "dgp = DynamicPanelDGP(n_periods, n_treatments, n_x).create_instance(\n", + " s_x, random_seed=12345)\n", + "Y, T, X, W, groups = dgp.observational_data(n_panels, s_t=s_t, random_seed=12345)\n", + "true_effect = dgp.true_effect" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.2 Train Estimator" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "alpha_regs = [1e-4, 1e-3, 1e-2, 5e-2, .1, 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "est = DynamicDML(\n", + " model_y=LassoCV(cv=3, alphas=alpha_regs, tol=1e-2), \n", + " model_t=MultiTaskLassoCV(cv=3, alphas=alpha_regs, tol=1e-2), \n", + " cv=3)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.fit(Y, T, X=None, W=W, groups=groups, inference=\"auto\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average effect of default policy: 2.26\n" + ] + } + ], + "source": [ + "# Average treatment effect of all periods on last period for unit treatments\n", + "print(f\"Average effect of default policy: {est.ate():0.2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Effect of target policy over baseline policy: 4.33\n" + ] + } + ], + "source": [ + "# Effect of target policy over baseline policy\n", + "# Must specify a treatment for each period\n", + "baseline_policy = np.zeros((1, n_periods))\n", + "target_policy = np.array([[1, 2, 3]])\n", + "eff = est.effect(T0=baseline_policy, T1=target_policy)\n", + "print(f\"Effect of target policy over baseline policy: {eff[0]:0.2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Effect of a treatment in period 1 on period 3 outcome: 0.71\n", + "Effect of a treatment in period 2 on period 3 outcome: 1.03\n", + "Effect of a treatment in period 3 on period 3 outcome: 0.52\n" + ] + } + ], + "source": [ + "# Period treatment effects + interpretation\n", + "for i, theta in enumerate(est.intercept_):\n", + " print(f\"Effect of a treatment in period {i+1} on period {n_periods} outcome: {theta:0.2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Coefficient Results: X is None, please call intercept_inference to learn the constant!\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
CATE Intercept Results
point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|$(T0)_0$ 0.711 0.041 17.224 0.0 0.643 0.779
cate_intercept|$(T0)_1$ 1.031 0.1 10.306 0.0 0.866 1.195
cate_intercept|$(T0)_2$ 0.518 0.139 3.739 0.0 0.29 0.746


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " CATE Intercept Results \n", + "=============================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "-----------------------------------------------------------------------------\n", + "cate_intercept|$(T0)_0$ 0.711 0.041 17.224 0.0 0.643 0.779\n", + "cate_intercept|$(T0)_1$ 1.031 0.1 10.306 0.0 0.866 1.195\n", + "cate_intercept|$(T0)_2$ 0.518 0.139 3.739 0.0 0.29 0.746\n", + "-----------------------------------------------------------------------------\n", + "\n", + "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", + "$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$\n", + "where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:\n", + "$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$\n", + "where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.\n", + "\"\"\"" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Period treatment effects with confidence intervals\n", + "est.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "conf_ints = est.intercept__interval(alpha=0.05)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.3 Performance Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Some plotting boilerplate code\n", + "plt.figure(figsize=(15, 5))\n", + "plt.errorbar(np.arange(n_periods*n_treatments)-.04, est.intercept_, yerr=(conf_ints[1] - est.intercept_,\n", + " est.intercept_ - conf_ints[0]), fmt='o', label='DynamicDML')\n", + "plt.errorbar(np.arange(n_periods*n_treatments), true_effect.flatten(), fmt='o', alpha=.6, label='Ground truth')\n", + "for t in np.arange(1, n_periods):\n", + " plt.axvline(x=t * n_treatments - .5, linestyle='--', alpha=.4)\n", + "plt.xticks([t * n_treatments - .5 + n_treatments/2 for t in range(n_periods)],\n", + " [\"$\\\\theta_{}$\".format(t) for t in range(n_periods)])\n", + "plt.gca().set_xlim([-.5, n_periods*n_treatments - .5])\n", + "plt.ylabel(\"Effect\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. Example Usage with Heterogeneous Treatment Effects" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2.1 DGP" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Define additional DGP parameters\n", + "het_strength = .5\n", + "het_inds = np.arange(n_x - n_treatments, n_x)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate data\n", + "dgp = DynamicPanelDGP(n_periods, n_treatments, n_x).create_instance(\n", + " s_x, hetero_strength=het_strength, hetero_inds=het_inds, random_seed=12)\n", + "Y, T, X, W, groups = dgp.observational_data(n_panels, s_t=s_t, random_seed=1)\n", + "ate_effect = dgp.true_effect\n", + "het_effect = dgp.true_hetero_effect[:, het_inds + 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2.2 Train Estimator" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "est = DynamicDML(\n", + " model_y=LassoCV(cv=3, alphas=alpha_regs, tol=1e-2), \n", + " model_t=MultiTaskLassoCV(cv=3, alphas=alpha_regs, tol=1e-2), \n", + " cv=3)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Objective did not converge. You might want to increase the number of iterations. Duality gap: 2.6550941575656566, tolerance: 1.6272718134380235\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.fit(Y, T, X=X, W=W, groups=groups, inference=\"auto\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Coefficient Results
point_estimate stderr zstat pvalue ci_lower ci_upper
X0|$(T0)_0$ 0.394 0.103 3.838 0.0 0.225 0.563
X0|$(T0)_1$ -0.066 0.192 -0.343 0.732 -0.382 0.25
X0|$(T0)_2$ 0.04 0.201 0.198 0.843 -0.291 0.37
\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
CATE Intercept Results
point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|$(T0)_0$ 0.579 0.052 11.242 0.0 0.495 0.664
cate_intercept|$(T0)_1$ 0.032 0.086 0.379 0.704 -0.108 0.173
cate_intercept|$(T0)_2$ -0.098 0.093 -1.049 0.294 -0.251 0.055


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " Coefficient Results \n", + "=================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "-----------------------------------------------------------------\n", + "X0|$(T0)_0$ 0.394 0.103 3.838 0.0 0.225 0.563\n", + "X0|$(T0)_1$ -0.066 0.192 -0.343 0.732 -0.382 0.25\n", + "X0|$(T0)_2$ 0.04 0.201 0.198 0.843 -0.291 0.37\n", + " CATE Intercept Results \n", + "=============================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "-----------------------------------------------------------------------------\n", + "cate_intercept|$(T0)_0$ 0.579 0.052 11.242 0.0 0.495 0.664\n", + "cate_intercept|$(T0)_1$ 0.032 0.086 0.379 0.704 -0.108 0.173\n", + "cate_intercept|$(T0)_2$ -0.098 0.093 -1.049 0.294 -0.251 0.055\n", + "-----------------------------------------------------------------------------\n", + "\n", + "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", + "$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$\n", + "where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:\n", + "$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$\n", + "where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.\n", + "\"\"\"" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average effect of default policy:0.48\n" + ] + } + ], + "source": [ + "# Average treatment effect for test points\n", + "X_test = X[:10]\n", + "print(f\"Average effect of default policy:{est.ate(X=X_test):0.2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Effect of target policy over baseline policy for test set:\n", + " [ 0.48463458 0.52974258 0.21137445 0.36785476 0.37107783 0.6353013\n", + " -0.08878165 0.17427563 0.45723001 0.06398282]\n" + ] + } + ], + "source": [ + "# Effect of target policy over baseline policy\n", + "# Must specify a treatment for each period\n", + "baseline_policy = np.zeros((1, n_periods))\n", + "target_policy = np.array([[1, 2, 3]])\n", + "eff = est.effect(X=X_test, T0=baseline_policy, T1=target_policy)\n", + "print(\"Effect of target policy over baseline policy for test set:\\n\", eff)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 0.57932353, 0.03248502, -0.09764978]),\n", + " array([[ 0.39429592],\n", + " [-0.06583129],\n", + " [ 0.03975663]]))" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Coefficients\n", + "est.intercept_, est.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# Confidence intervals\n", + "conf_ints_intercept = est.intercept__interval(alpha=0.05)\n", + "conf_ints_coef = est.coef__interval(alpha=0.05)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2.3 Performance Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Some plotting boilerplate code\n", + "plt.figure(figsize=(15, 5))\n", + "# Intercepts\n", + "plt.errorbar((het_inds.shape[0]+1)*np.arange(n_periods*n_treatments), \n", + " est.intercept_, \n", + " yerr=(conf_ints_intercept[1] - est.intercept_, est.intercept_ - conf_ints_intercept[0]), \n", + " fmt='o', label='DynamicDML')\n", + "plt.errorbar((het_inds.shape[0]+1)*np.arange(n_periods*n_treatments), ate_effect.flatten(), \n", + " fmt='o', label='Ground truth')\n", + "# Heterogeneous effects\n", + "plt.errorbar((het_inds.shape[0]+1)*np.arange(n_periods*n_treatments)+1, est.coef_, \n", + " yerr=((conf_ints_coef[1] - est.coef_).flatten(), \n", + " (est.coef_ - conf_ints_coef[0]).flatten()), \n", + " fmt='o', color='C0')\n", + "plt.errorbar((het_inds.shape[0]+1)*np.arange(n_periods*n_treatments)+1, het_effect, \n", + " fmt='o', color='C1')\n", + "for t in np.arange(2, (het_inds.shape[0]+1)*n_periods, 2):\n", + " plt.axvline(x=t * n_treatments - .5, linestyle='--', alpha=.4)\n", + "# Labels\n", + "x_range = np.arange(n_periods*n_treatments*(het_inds.shape[0]+1))\n", + "x_ticks = list(x_range)\n", + "x_labels = [1 if i%(n_treatments+1)==0 else f\"$X_\\u007b{het_inds[i%(n_treatments+1)-1]}\\u007d$\" for i in x_range]\n", + "x_ticks += [het_inds.shape[0]/2 + i*(n_treatments*(het_inds.shape[0]+1)) for i in range(n_periods)]\n", + "x_labels += [f\"\\n\\n\\n$\\\\theta_{i}$\" for i in range(n_periods)]\n", + "plt.xticks(x_ticks, x_labels)\n", + "plt.ylabel(\"Effect\")\n", + "plt.legend()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 5f6da4010b646d8c7b65356915b9216013ff6859 Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Fri, 9 Apr 2021 18:20:52 -0400 Subject: [PATCH 03/27] Add scores. --- econml/dml/dynamic_dml.py | 89 +++++++++++++++++++++++++++++--- econml/tests/test_dynamic_dml.py | 16 +++--- 2 files changed, 89 insertions(+), 16 deletions(-) diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index 9d10e33da..cb68ad562 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -92,8 +92,6 @@ def predict(self, Y, T, X=None, W=None, sample_weight=None, groups=None): return Y_res, T_res def score(self, Y, T, X=None, W=None, sample_weight=None, groups=None): - # TODO: implement scores - # TODO: fix correctness? assert Y.shape[0] % self.n_periods == 0, \ "Length of training data should be an integer multiple of time periods." inds_score = np.arange(Y.shape[0])[np.arange(Y.shape[0]) % self.n_periods == 0] @@ -147,7 +145,7 @@ def __init__(self, model_final, n_periods): self._model_final_trained = {k: clone(self._model_final, safe=False) for k in np.arange(n_periods)} def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): - # TODO: handle sample weight, sample var + # NOTE: sample weight, sample var are not passed in Y_res, T_res = nuisances self._d_y = Y.shape[1:] for kappa in np.arange(self.n_periods): @@ -186,8 +184,29 @@ def predict(self, X=None): return preds def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): - # TODO: implement score - return None + assert Y.shape[0] % self.n_periods == 0, \ + "Length of training data should be an integer multiple of time periods." + Y_res, T_res = nuisances + + scores = np.full((self.n_periods, ), np.nan) + for kappa in np.arange(self.n_periods): + period = self.n_periods - 1 - kappa + period_filter = self.period_filter_gen(period, Y.shape[0]) + Y_adj = Y_res[period_filter].copy() + if kappa > 0: + Y_adj -= np.sum( + [self._model_final_trained[tau].predict_with_res( + X[self.period_filter_gen(self.n_periods - 1 - tau, Y.shape[0])] if X is not None else None, + T_res[period_filter, ..., self.n_periods - 1 - tau] + ) for tau in np.arange(kappa)], axis=0) + Y_adj_pred = self._model_final_trained[kappa].predict_with_res( + X[period_filter] if X is not None else None, + T_res[period_filter, ..., period]) + if sample_weight is not None: + scores[kappa] = np.mean(np.average((Y_adj - Y_adj_pred)**2, weights=sample_weight, axis=0)) + else: + scores[kappa] = np.mean((Y_adj - Y_adj_pred) ** 2) + return scores def period_filter_gen(self, p, n): return (np.arange(n) % self.n_periods == p) @@ -548,12 +567,39 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou warn("This CATE estimator does not yet support sample weights and sample variance. " "These inputs will be ignored during fitting.", UserWarning) - # TODO: support sample_weight, sample_var? return super().fit(Y, T, X=X, W=W, sample_weight=None, sample_var=None, groups=groups, cache_values=cache_values, inference=inference) + def score(self, Y, T, X=None, W=None): + """ + Score the fitted CATE model on a new data set. Generates nuisance parameters + for the new data set based on the fitted residual nuisance models created at fit time. + It uses the mean prediction of the models fitted by the different crossfit folds. + Then calculates the MSE of the final residual Y on residual T regression. + + If model_final does not have a score method, then it raises an :exc:`.AttributeError` + + Parameters + ---------- + Y: (n, d_y) matrix or vector of length n + Outcomes for each sample (required: n = n_groups * n_periods) + T: (n, d_t) matrix or vector of length n + Treatments for each sample (required: n = n_groups * n_periods) + X: optional(n, d_x) matrix or None (Default=None) + Features for each sample (Required: n = n_groups * n_periods) + W: optional(n, d_w) matrix or None (Default=None) + Controls for each sample (Required: n = n_groups * n_periods) + + Returns + ------- + score: float + The MSE of the final CATE model on the new data. + """ + # Replacing score from _OrthoLearner, to enforce Z=None and improve the docstring + return super().score(Y, T, X=X, W=W) + def cate_treatment_names(self, treatment_names=None): """ Get treatment names for each time period. @@ -658,3 +704,34 @@ def model_final(self): def model_final(self, model): if model is not None: raise ValueError("Parameter `model_final` cannot be altered for this estimator!") + + @property + def models_y(self): + return [[mdl._model_y for mdl in mdls] for mdls in super().models_nuisance_] + + @property + def models_t(self): + return [[mdl._model_t for mdl in mdls] for mdls in super().models_nuisance_] + + @property + def nuisance_scores_y(self): + return self.nuisance_scores_[0] + + @property + def nuisance_scores_t(self): + return self.nuisance_scores_[1] + + @property + def residuals_(self): + """ + A tuple (y_res, T_res, X, W), of the residuals from the first stage estimation + along with the associated X and W. Samples are not guaranteed to be in the same + order as the input order. + """ + if not hasattr(self, '_cached_values'): + raise AttributeError("Estimator is not fitted yet!") + if self._cached_values is None: + raise AttributeError("`fit` was called with `cache_values=False`. " + "Set to `True` to enable residual storage.") + Y_res, T_res = self._cached_values.nuisances + return Y_res, T_res, self._cached_values.X, self._cached_values.W diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py index 409cd231c..21a5aa7a9 100644 --- a/econml/tests/test_dynamic_dml.py +++ b/econml/tests/test_dynamic_dml.py @@ -113,14 +113,11 @@ def make_random(n, is_discrete, d): np.testing.assert_allclose( marg_eff if d_x else marg_eff[0:1], const_marg_eff) - # TODO: add score and nuisance scores - """ - assert isinstance(est.score_, float) - for score in est.nuisance_scores_y: - assert isinstance(score, float) - for score in est.nuisance_scores_t: - assert isinstance(score, float) - """ + assert len(est.score_) == n_periods + for score in est.nuisance_scores_y[0]: + assert score.shape == (n_periods, ) + for score in est.nuisance_scores_t[0]: + assert score.shape == (n_periods, n_periods) T0 = np.full_like(T_test, 'a') if is_discrete else np.zeros_like(T_test) eff = est.effect(X, T0=T0, T1=T_test) @@ -238,8 +235,7 @@ def make_random(n, is_discrete, d): [0], est.intercept__interval()[0], decimal=5) est.summary() - - # TODO: add score to estimator + # TODO: fix score """ est.score(Y, T, X, W) """ From d8bc1f30fc5e3d3e5f5d688d36fc78034dd7c2e7 Mon Sep 17 00:00:00 2001 From: Maggie Hei Date: Fri, 4 Jun 2021 18:34:53 -0400 Subject: [PATCH 04/27] store some internal variables to allow calling from diased inference function --- econml/dml/dynamic_dml.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index cb68ad562..c8bbc3ec5 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -269,6 +269,7 @@ def _fit_single_output_cov(self, nuisances, X, y_index): self.n_periods * d_xt)) Sigma = np.zeros((self.n_periods * d_xt, self.n_periods * d_xt)) + self._res_epsilon = {} for kappa in np.arange(self.n_periods): # Calculating the (kappa, kappa) block entry (of size n_treatments x n_treatments) of matrix Sigma period = self.n_periods - 1 - kappa @@ -283,6 +284,7 @@ def _fit_single_output_cov(self, nuisances, X, y_index): res_epsilon = (Y_res[period_filter] - (Y_diff[:, y_index] if y_index >= 0 else Y_diff) ).reshape(-1, 1, 1) + self._res_epsilon[period] = res_epsilon.flatten() cur_resT = XT_res[period][period] cov_cur_resT = cur_resT.reshape(-1, d_xt, 1) @ cur_resT.reshape(-1, 1, d_xt) sigma_kappa = np.mean((res_epsilon**2) * cov_cur_resT, axis=0) @@ -295,6 +297,7 @@ def _fit_single_output_cov(self, nuisances, X, y_index): axis=0) M[kappa * d_xt:(kappa + 1) * d_xt, tau * d_xt:(tau + 1) * d_xt] = m_kappa_tau + self._M = M return np.linalg.inv(M) @ Sigma @ np.linalg.inv(M).T From 6daf315d220095d039fe74d564d20ccae9046e74 Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Thu, 29 Jul 2021 13:13:02 -0400 Subject: [PATCH 05/27] Swap t and j indexes to match the paper --- econml/dml/dynamic_dml.py | 167 +++++++++++++++++++------------------- 1 file changed, 84 insertions(+), 83 deletions(-) diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index c8bbc3ec5..1c014c02b 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -1,6 +1,8 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +# TODO: add paper reference + import abc import numpy as np from warnings import warn @@ -39,19 +41,18 @@ def fit(self, Y, T, X=None, W=None, sample_weight=None, groups=None): "Length of training data should be an integer multiple of time periods." inds_train = np.arange(Y.shape[0])[np.arange(Y.shape[0]) % self.n_periods == 0] self._model_y_trained = {} - self._model_t_trained = {} - for kappa in np.arange(self.n_periods): - self._model_y_trained[kappa] = clone(self._model_y, safe=False).fit( - self._filter_or_None(X, inds_train + kappa), + self._model_t_trained = {j: {} for j in np.arange(self.n_periods)} + for t in np.arange(self.n_periods): + self._model_y_trained[t] = clone(self._model_y, safe=False).fit( + self._filter_or_None(X, inds_train + t), self._filter_or_None( - W, inds_train + kappa), + W, inds_train + t), Y[inds_train + self.n_periods - 1]) - self._model_t_trained[kappa] = {} - for tau in np.arange(kappa, self.n_periods): - self._model_t_trained[kappa][tau] = clone(self._model_t, safe=False).fit( - self._filter_or_None(X, inds_train + kappa), - self._filter_or_None(W, inds_train + kappa), - T[inds_train + tau]) + for j in np.arange(t, self.n_periods): + self._model_t_trained[j][t] = clone(self._model_t, safe=False).fit( + self._filter_or_None(X, inds_train + t), + self._filter_or_None(W, inds_train + t), + T[inds_train + j]) return self def predict(self, Y, T, X=None, W=None, sample_weight=None, groups=None): @@ -63,31 +64,32 @@ def predict(self, Y, T, X=None, W=None, sample_weight=None, groups=None): Y residuals for each period in panel format. This shape is required for _OrthoLearner's crossfitting. T_res : (n, d_t, n_periods) matrix - T residuals for pairs of periods (kappa, tau), where the data is in panel format for kappa - and in index form for tau. For example, the residuals for (kappa, tau) can be retrieved via - T_res[np.arange(n) % n_periods == kappa, ..., tau]. For tau < kappa, the entries of this + T residuals for pairs of periods (t, j), where the data is in panel format for t + and in index form for j. For example, the residuals for (t, j) can be retrieved via + T_res[np.arange(n) % n_periods == t, ..., j]. For t < j, the entries of this matrix are np.nan. This shape is required for _OrthoLearner's crossfitting. """ + # TODO: update T_res docstring assert Y.shape[0] % self.n_periods == 0, \ "Length of training data should be an integer multiple of time periods." inds_predict = np.arange(Y.shape[0])[np.arange(Y.shape[0]) % self.n_periods == 0] Y_res = np.full(Y.shape, np.nan) T_res = np.full(T.shape + (self.n_periods, ), np.nan) shape_formatter = self._get_shape_formatter(X, W) - for kappa in np.arange(self.n_periods): + for t in np.arange(self.n_periods): Y_slice = Y[inds_predict + self.n_periods - 1] - Y_pred = self._model_y_trained[kappa].predict( - self._filter_or_None(X, inds_predict + kappa), - self._filter_or_None(W, inds_predict + kappa)) - Y_res[np.arange(Y.shape[0]) % self.n_periods == kappa] = Y_slice\ + Y_pred = self._model_y_trained[t].predict( + self._filter_or_None(X, inds_predict + t), + self._filter_or_None(W, inds_predict + t)) + Y_res[np.arange(Y.shape[0]) % self.n_periods == t] = Y_slice\ - shape_formatter(Y_slice, Y_pred).reshape(Y_slice.shape) - for tau in np.arange(kappa, self.n_periods): - T_slice = T[inds_predict + tau] - T_pred = self._model_t_trained[kappa][tau].predict( - self._filter_or_None(X, inds_predict + kappa), - self._filter_or_None(W, inds_predict + kappa)) - T_res[np.arange(Y.shape[0]) % self.n_periods == kappa, ..., tau] = T_slice\ + for j in np.arange(t, self.n_periods): + T_slice = T[inds_predict + j] + T_pred = self._model_t_trained[j][t].predict( + self._filter_or_None(X, inds_predict + t), + self._filter_or_None(W, inds_predict + t)) + T_res[np.arange(Y.shape[0]) % self.n_periods == j, ..., t] = T_slice\ - shape_formatter(T_slice, T_pred).reshape(T_slice.shape) return Y_res, T_res @@ -97,21 +99,21 @@ def score(self, Y, T, X=None, W=None, sample_weight=None, groups=None): inds_score = np.arange(Y.shape[0])[np.arange(Y.shape[0]) % self.n_periods == 0] if hasattr(self._model_y, 'score'): Y_score = np.full((self.n_periods, ), np.nan) - for kappa in np.arange(self.n_periods): - Y_score[kappa] = self._model_y_trained[kappa].score( - self._filter_or_None(X, inds_score + kappa), - self._filter_or_None(W, inds_score + kappa), + for t in np.arange(self.n_periods): + Y_score[t] = self._model_y_trained[t].score( + self._filter_or_None(X, inds_score + t), + self._filter_or_None(W, inds_score + t), Y[inds_score + self.n_periods - 1]) else: Y_score = None if hasattr(self._model_t, 'score'): T_score = np.full((self.n_periods, self.n_periods), np.nan) - for kappa in np.arange(self.n_periods): - for tau in np.arange(kappa, self.n_periods): - T_score[kappa][tau] = self._model_t_trained[kappa][tau].score( - self._filter_or_None(X, inds_score + kappa), - self._filter_or_None(W, inds_score + kappa), - T[inds_score + tau]) + for t in np.arange(self.n_periods): + for j in np.arange(t, self.n_periods): + T_score[j][t] = self._model_t_trained[j][t].score( + self._filter_or_None(X, inds_score + t), + self._filter_or_None(W, inds_score + t), + T[inds_score + j]) else: T_score = None return Y_score, T_score @@ -148,17 +150,17 @@ def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, # NOTE: sample weight, sample var are not passed in Y_res, T_res = nuisances self._d_y = Y.shape[1:] - for kappa in np.arange(self.n_periods): - period = self.n_periods - 1 - kappa + for t in np.arange(self.n_periods): + period = self.n_periods - 1 - t period_filter = self.period_filter_gen(period, Y.shape[0]) Y_adj = Y_res[period_filter].copy() - if kappa > 0: + if t > 0: Y_adj -= np.sum( - [self._model_final_trained[tau].predict_with_res( - X[self.period_filter_gen(self.n_periods - 1 - tau, Y.shape[0])] if X is not None else None, - T_res[period_filter, ..., self.n_periods - 1 - tau] - ) for tau in np.arange(kappa)], axis=0) - self._model_final_trained[kappa].fit( + [self._model_final_trained[j].predict_with_res( + X[self.period_filter_gen(self.n_periods - 1 - j, Y.shape[0])] if X is not None else None, + T_res[self.period_filter_gen(self.n_periods - 1 - j, Y.shape[0]), ..., period] + ) for j in np.arange(t)], axis=0) + self._model_final_trained[t].fit( X[period_filter] if X is not None else None, T[period_filter], T_res[period_filter, ..., period], Y_adj) @@ -176,9 +178,9 @@ def predict(self, X=None): x_dy_shape + (self.n_periods * d_t, ) ) - for kappa in range(self.n_periods): - preds[..., kappa * d_t: (kappa + 1) * d_t] = \ - self._model_final_trained[kappa].predict(X).reshape( + for t in range(self.n_periods): + preds[..., t * d_t: (t + 1) * d_t] = \ + self._model_final_trained[t].predict(X).reshape( x_dy_shape + (d_t, ) ) return preds @@ -189,23 +191,23 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None Y_res, T_res = nuisances scores = np.full((self.n_periods, ), np.nan) - for kappa in np.arange(self.n_periods): - period = self.n_periods - 1 - kappa + for t in np.arange(self.n_periods): + period = self.n_periods - 1 - t period_filter = self.period_filter_gen(period, Y.shape[0]) Y_adj = Y_res[period_filter].copy() - if kappa > 0: + if t > 0: Y_adj -= np.sum( - [self._model_final_trained[tau].predict_with_res( - X[self.period_filter_gen(self.n_periods - 1 - tau, Y.shape[0])] if X is not None else None, - T_res[period_filter, ..., self.n_periods - 1 - tau] - ) for tau in np.arange(kappa)], axis=0) - Y_adj_pred = self._model_final_trained[kappa].predict_with_res( + [self._model_final_trained[j].predict_with_res( + X[self.period_filter_gen(self.n_periods - 1 - j, Y.shape[0])] if X is not None else None, + T_res[self.period_filter_gen(self.n_periods - 1 - j, Y.shape[0]), ..., period] + ) for j in np.arange(t)], axis=0) + Y_adj_pred = self._model_final_trained[t].predict_with_res( X[period_filter] if X is not None else None, T_res[period_filter, ..., period]) if sample_weight is not None: - scores[kappa] = np.mean(np.average((Y_adj - Y_adj_pred)**2, weights=sample_weight, axis=0)) + scores[t] = np.mean(np.average((Y_adj - Y_adj_pred)**2, weights=sample_weight, axis=0)) else: - scores[kappa] = np.mean((Y_adj - Y_adj_pred) ** 2) + scores[t] = np.mean((Y_adj - Y_adj_pred) ** 2) return scores def period_filter_gen(self, p, n): @@ -233,7 +235,7 @@ def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, self.model_final_._param = coef.T if self.model_final_._n_out else coef def _get_coef_(self): - period_coefs = np.array([self._model_final_trained[kappa]._model.coef_ for kappa in range(self.n_periods)]) + period_coefs = np.array([self._model_final_trained[t]._model.coef_ for t in range(self.n_periods)]) if self._d_y: return np.array([ np.array([period_coefs[k, i, :] for k in range(self.n_periods)]).flatten() @@ -256,49 +258,48 @@ def _fit_single_output_cov(self, nuisances, X, y_index): XT_res = np.array([ [ self._model_final_trained[0]._combine( - X[self.period_filter_gen(tau, Y_res.shape[0])] if X is not None else None, - T_res[self.period_filter_gen(kappa, Y_res.shape[0]), ..., tau], + X[self.period_filter_gen(j, Y_res.shape[0])] if X is not None else None, + T_res[self.period_filter_gen(t, Y_res.shape[0]), ..., j], fitting=False ) - for tau in range(self.n_periods) + for j in range(self.n_periods) ] - for kappa in range(self.n_periods) + for t in range(self.n_periods) ]) d_xt = XT_res.shape[-1] - M = np.zeros((self.n_periods * d_xt, + J = np.zeros((self.n_periods * d_xt, self.n_periods * d_xt)) Sigma = np.zeros((self.n_periods * d_xt, self.n_periods * d_xt)) - self._res_epsilon = {} - for kappa in np.arange(self.n_periods): - # Calculating the (kappa, kappa) block entry (of size n_treatments x n_treatments) of matrix Sigma - period = self.n_periods - 1 - kappa + for t in np.arange(self.n_periods): + # Calculating the (t, t) block entry (of size n_treatments x n_treatments) of matrix Sigma + period = self.n_periods - 1 - t period_filter = self.period_filter_gen(period, Y_res.shape[0]) Y_diff = np.sum([ - self._model_final_trained[tau].predict_with_res( - X[self.period_filter_gen(self.n_periods - 1 - tau, + self._model_final_trained[j].predict_with_res( + X[self.period_filter_gen(self.n_periods - 1 - j, Y_res.shape[0])] if X is not None else None, - T_res[period_filter, ..., self.n_periods - 1 - tau]) - for tau in np.arange(kappa + 1) + T_res[self.period_filter_gen(self.n_periods - 1 - j, + Y_res.shape[0]), ..., period] + ) + for j in np.arange(t + 1) ], axis=0) res_epsilon = (Y_res[period_filter] - (Y_diff[:, y_index] if y_index >= 0 else Y_diff) ).reshape(-1, 1, 1) - self._res_epsilon[period] = res_epsilon.flatten() cur_resT = XT_res[period][period] cov_cur_resT = cur_resT.reshape(-1, d_xt, 1) @ cur_resT.reshape(-1, 1, d_xt) - sigma_kappa = np.mean((res_epsilon**2) * cov_cur_resT, axis=0) - Sigma[kappa * d_xt:(kappa + 1) * d_xt, - kappa * d_xt:(kappa + 1) * d_xt] = sigma_kappa - for tau in np.arange(kappa + 1): - # Calculating the (kappa, tau) block entry (of size n_treatments x n_treatments) of matrix M - m_kappa_tau = np.mean( - XT_res[period][self.n_periods - 1 - tau].reshape(-1, d_xt, 1) @ cur_resT.reshape(-1, 1, d_xt), + sigma_t = np.mean((res_epsilon**2) * cov_cur_resT, axis=0) + Sigma[t * d_xt:(t + 1) * d_xt, + t * d_xt:(t + 1) * d_xt] = sigma_t + for j in np.arange(t + 1): + # Calculating the (t, j) block entry (of size n_treatments x n_treatments) of matrix J + m_t_j = np.mean( + XT_res[self.n_periods - 1 - j][period].reshape(-1, d_xt, 1) @ cur_resT.reshape(-1, 1, d_xt), axis=0) - M[kappa * d_xt:(kappa + 1) * d_xt, - tau * d_xt:(tau + 1) * d_xt] = m_kappa_tau - self._M = M - return np.linalg.inv(M) @ Sigma @ np.linalg.inv(M).T + J[t * d_xt:(t + 1) * d_xt, + j * d_xt:(j + 1) * d_xt] = m_t_j + return np.linalg.inv(J) @ Sigma @ np.linalg.inv(J).T class _DynamicFinalWrapper(_FinalWrapper): @@ -668,7 +669,7 @@ def _expand_treatments(self, X, *Ts): if self.transformer: T = np.hstack([ base_expand_treatments( - X, T[:, [kappa]])[1] for kappa in range(self._n_periods) + X, T[:, [t]])[1] for t in range(self._n_periods) ]) outTs.append(T) return (X,) + tuple(outTs) From 0615d70c7e7e2bbb6a08eafd218a8bf321e54b67 Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Thu, 29 Jul 2021 15:49:11 -0400 Subject: [PATCH 06/27] Update covariance matrix to include off-diagonal elements --- econml/dml/dynamic_dml.py | 67 +++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index 1c014c02b..61fa68834 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -255,6 +255,8 @@ def _fit_single_output_cov(self, nuisances, X, y_index): x (n_periods*n_treatments) matrix for a single outcome. """ Y_res, T_res = nuisances + # Calculate auxiliary quantities + # X ⨂ T_res XT_res = np.array([ [ self._model_final_trained[0]._combine( @@ -267,38 +269,49 @@ def _fit_single_output_cov(self, nuisances, X, y_index): for t in range(self.n_periods) ]) d_xt = XT_res.shape[-1] + # sum(model_final.predict(X, T_res)) + Y_diff = np.array([ + np.sum([ + self._model_final_trained[j].predict_with_res( + X[self.period_filter_gen(self.n_periods - 1 - j, + Y_res.shape[0])] if X is not None else None, + T_res[ + self.period_filter_gen(self.n_periods - 1 - j, Y_res.shape[0]), ..., self.n_periods - 1 - t] + ) for j in np.arange(t + 1)], + axis=0 + ) + for t in np.arange(self.n_periods) + ]) J = np.zeros((self.n_periods * d_xt, self.n_periods * d_xt)) Sigma = np.zeros((self.n_periods * d_xt, self.n_periods * d_xt)) for t in np.arange(self.n_periods): - # Calculating the (t, t) block entry (of size n_treatments x n_treatments) of matrix Sigma - period = self.n_periods - 1 - t - period_filter = self.period_filter_gen(period, Y_res.shape[0]) - Y_diff = np.sum([ - self._model_final_trained[j].predict_with_res( - X[self.period_filter_gen(self.n_periods - 1 - j, - Y_res.shape[0])] if X is not None else None, - T_res[self.period_filter_gen(self.n_periods - 1 - j, - Y_res.shape[0]), ..., period] - ) - for j in np.arange(t + 1) - ], axis=0) - res_epsilon = (Y_res[period_filter] - - (Y_diff[:, y_index] if y_index >= 0 else Y_diff) - ).reshape(-1, 1, 1) - cur_resT = XT_res[period][period] - cov_cur_resT = cur_resT.reshape(-1, d_xt, 1) @ cur_resT.reshape(-1, 1, d_xt) - sigma_t = np.mean((res_epsilon**2) * cov_cur_resT, axis=0) - Sigma[t * d_xt:(t + 1) * d_xt, - t * d_xt:(t + 1) * d_xt] = sigma_t - for j in np.arange(t + 1): - # Calculating the (t, j) block entry (of size n_treatments x n_treatments) of matrix J - m_t_j = np.mean( - XT_res[self.n_periods - 1 - j][period].reshape(-1, d_xt, 1) @ cur_resT.reshape(-1, 1, d_xt), - axis=0) - J[t * d_xt:(t + 1) * d_xt, - j * d_xt:(j + 1) * d_xt] = m_t_j + period_t = self.n_periods - 1 - t + period_filter_t = self.period_filter_gen(period_t, Y_res.shape[0]) + res_epsilon_t = (Y_res[period_filter_t] - + (Y_diff[t][:, y_index] if y_index >= 0 else Y_diff[t]) + ).reshape(-1, 1, 1) + resT_t = XT_res[period_t][period_t] + for j in np.arange(self.n_periods): + # Calculating the (t, j) block entry (of size n_treatments x n_treatments) of matrix Sigma + period_j = self.n_periods - 1 - j + period_filter_j = self.period_filter_gen(period_j, Y_res.shape[0]) + res_epsilon_j = (Y_res[period_filter_j] - + (Y_diff[j][:, y_index] if y_index >= 0 else Y_diff[j]) + ).reshape(-1, 1, 1) + resT_j = XT_res[period_j][period_j] + cov_resT_tj = resT_t.reshape(-1, d_xt, 1) @ resT_j.reshape(-1, 1, d_xt) + sigma_tj = np.mean((res_epsilon_t * res_epsilon_j) * cov_resT_tj, axis=0) + Sigma[t * d_xt:(t + 1) * d_xt, + j * d_xt:(j + 1) * d_xt] = sigma_tj + if j <= t: + # Calculating the (t, j) block entry (of size n_treatments x n_treatments) of matrix J + m_tj = np.mean( + XT_res[period_j][period_t].reshape(-1, d_xt, 1) @ resT_t.reshape(-1, 1, d_xt), + axis=0) + J[t * d_xt:(t + 1) * d_xt, + j * d_xt:(j + 1) * d_xt] = m_tj return np.linalg.inv(J) @ Sigma @ np.linalg.inv(J).T From 7dc65b8b1e4a488f5cf4055be67d0b7474f1f7f4 Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Fri, 30 Jul 2021 23:29:33 -0400 Subject: [PATCH 07/27] Add support for out of order groups --- doc/reference.rst | 1 + doc/spec/estimation/dml.rst | 42 ++++- doc/spec/references.rst | 7 +- econml/_ortho_learner.py | 19 ++- econml/dml/__init__.py | 2 +- econml/dml/_rlearner.py | 4 +- econml/dml/causal_forest.py | 2 +- econml/dml/dml.py | 2 +- econml/dml/dynamic_dml.py | 143 ++++++++++-------- econml/dr/_drlearner.py | 4 +- econml/iv/dml/_dml.py | 8 +- econml/iv/dr/_dr.py | 4 +- econml/policy/_drlearner.py | 4 +- econml/tests/test_dynamic_dml.py | 36 +++-- ...mic Double Machine Learning Examples.ipynb | 49 +++--- 15 files changed, 211 insertions(+), 116 deletions(-) diff --git a/doc/reference.rst b/doc/reference.rst index c7f9f3ca7..865cc52a4 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -17,6 +17,7 @@ Double Machine Learning (DML) econml.dml.SparseLinearDML econml.dml.CausalForestDML econml.dml.NonParamDML + econml.dml.DynamicDML .. _dr_api: diff --git a/doc/spec/estimation/dml.rst b/doc/spec/estimation/dml.rst index 3032f72c2..af65c4c92 100644 --- a/doc/spec/estimation/dml.rst +++ b/doc/spec/estimation/dml.rst @@ -34,7 +34,8 @@ What are the relevant estimator classes? This section describes the methodology implemented in the classes, :class:`._RLearner`, :class:`.DML`, :class:`.LinearDML`, :class:`.SparseLinearDML`, :class:`.KernelDML`, :class:`.NonParamDML`, -:class:`.CausalForestDML`. +:class:`.CausalForestDML`, +:class:`.DynamicDML`. Click on each of these links for a detailed module documentation and input parameters of each class. @@ -71,8 +72,10 @@ Most of the methods provided make a parametric form assumption on the heterogene linear on some pre-defined; potentially high-dimensional; featurization). These methods include: :class:`.DML`, :class:`.LinearDML`, :class:`.SparseLinearDML`, :class:`.KernelDML`. -For fullly non-parametric heterogeneous treatment effect models, checkout the :class:`.NonParamDML` -and the :class:`.CausalForestDML`. For more options of non-parametric CATE estimators, +For fullly non-parametric heterogeneous treatment effect models, check out the :class:`.NonParamDML` +and the :class:`.CausalForestDML`. +For treatments assigned sequentially over several time periods, see the class :class:`.DynamicDML`. +For more options of non-parametric CATE estimators, check out the :ref:`Forest Estimators User Guide ` and the :ref:`Meta Learners User Guide `. @@ -155,7 +158,7 @@ Class Hierarchy Structure In this library we implement variants of several of the approaches mentioned in the last section. The hierarchy structure of the implemented CATE estimators is as follows. - .. inheritance-diagram:: econml.dml.LinearDML econml.dml.SparseLinearDML econml.dml.KernelDML econml.dml.NonParamDML econml.dml.CausalForestDML + .. inheritance-diagram:: econml.dml.LinearDML econml.dml.SparseLinearDML econml.dml.KernelDML econml.dml.NonParamDML econml.dml.CausalForestDML econml.dml.DynamicDML :parts: 1 :private-bases: :top-classes: econml._rlearner._RLearner, econml._cate_estimator.StatsModelsCateEstimatorMixin, econml._cate_estimator.DebiasedLassoCateEstimatorMixin @@ -286,6 +289,37 @@ Below we give a brief description of each of these classes: Check out :ref:`Forest Estimators User Guide ` for more information on forest based CATE models and other alternatives to the :class:`.CausalForestDML`. + * **DynamicDML.** The class :class:`.DynamicDML` is an extension of the Double ML approach for treatments assigned sequentially over time periods. + This estimator will adjust for treatments that can have causal effects on future outcomes. The data corresponds to a Markov decision process :math:`\{X_t, W_t, T_t, Y_t\}_{t=1}^m`, + where :math:`X_t, W_t` corresponds to the state at time :math:`t`, :math:`T_t` is the treatment at time :math:`t` and :math:`Y_t` is the observed outcome at time :math:`t`. + + The model makes the following structural equation assumptions on the data generating process: + + .. math:: + + X_t =~& A \cdot T_{t-1} + B \cdot X_{t-1} + \eta_t\\ + T_t =~& p(T_{t-1}, X_t, \zeta_t) \\ + Y_t =~& \theta_0'T_t + \mu'X_t \epsilon_t + + For more details about this model and underlying assumptions, see [Lewis2021]_. + + To learn the treatment effects of treatments in the different periods on the last period outcome, one can simply call: + + .. testcode:: + + import numpy as np + from econml.dml import DynamicDML + + n_panels = 100 # number of panels + n_periods = 3 # number of time periods per panel + n = n_panels * n_periods + groups = np.repeat(a=np.arange(n_panels), repeats=n_periods, axis=0) + X = np.random.normal(size=(n, 1)) + T = np.random.normal(size=(n, 2)) + y = np.random.normal(size=(n, )) + est = DynamicDML() + est.fit(y, T, X=X, W=None, groups=groups, inference="auto") + * **_RLearner.** The internal private class :class:`._RLearner` is a parent of the :class:`.DML` and allows the user to specify any way of fitting a final model that takes as input the residual :math:`\tilde{T}`, the features :math:`X` and predicts the residual :math:`\tilde{Y}`. Moreover, the nuisance models take as input diff --git a/doc/spec/references.rst b/doc/spec/references.rst index 0692af351..dc9abe5bb 100644 --- a/doc/spec/references.rst +++ b/doc/spec/references.rst @@ -113,4 +113,9 @@ References .. [Lundberg2017] Lundberg, S., Lee, S. (2017). A Unified Approach to Interpreting Model Predictions. - URL https://arxiv.org/abs/1705.07874 \ No newline at end of file + URL https://arxiv.org/abs/1705.07874 + +.. [Lewis2021] + Lewis, G., Syrgkanis, V. (2021). + Double/Debiased Machine Learning for Dynamic Treatment Effects. + URL https://arxiv.org/abs/2002.07285 \ No newline at end of file diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 2cee3da92..8134ffd88 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -664,7 +664,8 @@ def fit(self, Y, T, X=None, W=None, Z=None, *, sample_weight=None, sample_var=No X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, - sample_var=sample_var) + sample_var=sample_var, + groups=groups) return self @@ -748,17 +749,20 @@ def _fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None, group sample_weight=sample_weight, groups=groups) return nuisances, fitted_models, fitted_inds, scores - def _fit_final(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def _fit_final(self, Y, T, X=None, W=None, Z=None, nuisances=None, + sample_weight=None, sample_var=None, groups=None): self._ortho_learner_model_final.fit(Y, T, **filter_none_kwargs(X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, - sample_var=sample_var)) + sample_var=sample_var, + groups=groups)) self.score_ = None if hasattr(self._ortho_learner_model_final, 'score'): self.score_ = self._ortho_learner_model_final.score(Y, T, **filter_none_kwargs(X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, - sample_var=sample_var)) + sample_var=sample_var, + groups=groups)) def const_marginal_effect(self, X=None): X, = check_input_arrays(X) @@ -793,7 +797,7 @@ def effect_inference(self, X=None, *, T0=0, T1=1): return super().effect_inference(X, T0=T0, T1=T1) effect_inference.__doc__ = LinearCateEstimator.effect_inference.__doc__ - def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None): + def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): """ Score the fitted CATE model on a new data set. Generates nuisance parameters for the new data set based on the fitted nuisance models created at fit time. @@ -817,6 +821,8 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None): Instruments for each sample sample_weight: optional(n,) vector or None (Default=None) Weights for each samples + groups: (n,) vector, optional + All rows corresponding to the same group will be kept together during splitting. Returns ------- @@ -853,7 +859,8 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None): nuisances[it] = np.mean(nuisances[it], axis=0) return self._ortho_learner_model_final.score(Y, T, nuisances=nuisances, - **filter_none_kwargs(X=X, W=W, Z=Z, sample_weight=sample_weight)) + **filter_none_kwargs(X=X, W=W, Z=Z, + sample_weight=sample_weight, groups=groups)) @property def ortho_learner_model_final_(self): diff --git a/econml/dml/__init__.py b/econml/dml/__init__.py index 2c3785d34..00035f2c1 100644 --- a/econml/dml/__init__.py +++ b/econml/dml/__init__.py @@ -8,7 +8,7 @@ Then estimates a CATE model by regressing the residual outcome on the residual treatment in a manner that accounts for heterogeneity in the regression coefficient, with respect to X. For the theoretical foundations of these methods see [dml]_, [rlearner]_, [paneldml]_, -[lassodml]_, [ortholearner]_. +[lassodml]_, [ortholearner]_, [dynamicdml]_. References ---------- diff --git a/econml/dml/_rlearner.py b/econml/dml/_rlearner.py index 5b954716a..d5989541a 100644 --- a/econml/dml/_rlearner.py +++ b/econml/dml/_rlearner.py @@ -91,7 +91,7 @@ class _ModelFinal: def __init__(self, model_final): self._model_final = model_final - def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None): Y_res, T_res = nuisances self._model_final.fit(X, T, T_res, Y_res, sample_weight=sample_weight, sample_var=sample_var) return self @@ -99,7 +99,7 @@ def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, def predict(self, X=None): return self._model_final.predict(X) - def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None): Y_res, T_res = nuisances if Y_res.ndim == 1: Y_res = Y_res.reshape((-1, 1)) diff --git a/econml/dml/causal_forest.py b/econml/dml/causal_forest.py index 4d4ac5eb8..84bb034d9 100644 --- a/econml/dml/causal_forest.py +++ b/econml/dml/causal_forest.py @@ -52,7 +52,7 @@ def _ate_and_stderr(self, drpreds, mask=None): stderr = (np.nanstd(drpreds, axis=0) / np.sqrt(nonnan)).reshape(self._d_y + self._d_t) return point, stderr - def fit(self, X, T, T_res, Y_res, sample_weight=None, sample_var=None): + def fit(self, X, T, T_res, Y_res, sample_weight=None, sample_var=None, groups=None): # Track training dimensions to see if Y or T is a vector instead of a 2-dimensional array self._d_t = shape(T_res)[1:] self._d_y = shape(Y_res)[1:] diff --git a/econml/dml/dml.py b/econml/dml/dml.py index 36d54f69c..defacd0cf 100644 --- a/econml/dml/dml.py +++ b/econml/dml/dml.py @@ -134,7 +134,7 @@ def _combine(self, X, T, fitting=True): F = np.ones((T.shape[0], 1)) return cross_product(F, T) - def fit(self, X, T, T_res, Y_res, sample_weight=None, sample_var=None): + def fit(self, X, T, T_res, Y_res, sample_weight=None, sample_var=None, groups=None): # Track training dimensions to see if Y or T is a vector instead of a 2-dimensional array self._d_t = shape(T_res)[1:] self._d_y = shape(Y_res)[1:] diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index 61fa68834..f39a25826 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -23,6 +23,17 @@ get_feature_names_or_default) +def _get_groups_period_filter(groups, n_periods): + group_counts = {} + group_period_filter = {i: [] for i in range(n_periods)} + for i, g in enumerate(groups): + if g not in group_counts: + group_counts[g] = 0 + group_period_filter[group_counts[g]].append(i) + group_counts[g] += 1 + return group_period_filter + + class _DynamicModelNuisance: """ Nuisance model fits the model_y and model_t at fit time and at predict time @@ -36,23 +47,23 @@ def __init__(self, model_y, model_t, n_periods): self.n_periods = n_periods def fit(self, Y, T, X=None, W=None, sample_weight=None, groups=None): - """Fit a series of nuisance models for each period or period pairs""" + """Fit a series of nuisance models for each period or period pairs.""" assert Y.shape[0] % self.n_periods == 0, \ "Length of training data should be an integer multiple of time periods." - inds_train = np.arange(Y.shape[0])[np.arange(Y.shape[0]) % self.n_periods == 0] + period_filters = _get_groups_period_filter(groups, self.n_periods) self._model_y_trained = {} self._model_t_trained = {j: {} for j in np.arange(self.n_periods)} for t in np.arange(self.n_periods): self._model_y_trained[t] = clone(self._model_y, safe=False).fit( - self._filter_or_None(X, inds_train + t), + self._filter_or_None(X, period_filters[t]), self._filter_or_None( - W, inds_train + t), - Y[inds_train + self.n_periods - 1]) + W, period_filters[t]), + Y[period_filters[self.n_periods - 1]]) for j in np.arange(t, self.n_periods): self._model_t_trained[j][t] = clone(self._model_t, safe=False).fit( - self._filter_or_None(X, inds_train + t), - self._filter_or_None(W, inds_train + t), - T[inds_train + j]) + self._filter_or_None(X, period_filters[t]), + self._filter_or_None(W, period_filters[t]), + T[period_filters[j]]) return self def predict(self, Y, T, X=None, W=None, sample_weight=None, groups=None): @@ -70,40 +81,39 @@ def predict(self, Y, T, X=None, W=None, sample_weight=None, groups=None): matrix are np.nan. This shape is required for _OrthoLearner's crossfitting. """ - # TODO: update T_res docstring assert Y.shape[0] % self.n_periods == 0, \ "Length of training data should be an integer multiple of time periods." - inds_predict = np.arange(Y.shape[0])[np.arange(Y.shape[0]) % self.n_periods == 0] + period_filters = _get_groups_period_filter(groups, self.n_periods) Y_res = np.full(Y.shape, np.nan) T_res = np.full(T.shape + (self.n_periods, ), np.nan) shape_formatter = self._get_shape_formatter(X, W) for t in np.arange(self.n_periods): - Y_slice = Y[inds_predict + self.n_periods - 1] + Y_slice = Y[period_filters[self.n_periods - 1]] Y_pred = self._model_y_trained[t].predict( - self._filter_or_None(X, inds_predict + t), - self._filter_or_None(W, inds_predict + t)) - Y_res[np.arange(Y.shape[0]) % self.n_periods == t] = Y_slice\ + self._filter_or_None(X, period_filters[t]), + self._filter_or_None(W, period_filters[t])) + Y_res[period_filters[t]] = Y_slice\ - shape_formatter(Y_slice, Y_pred).reshape(Y_slice.shape) for j in np.arange(t, self.n_periods): - T_slice = T[inds_predict + j] + T_slice = T[period_filters[j]] T_pred = self._model_t_trained[j][t].predict( - self._filter_or_None(X, inds_predict + t), - self._filter_or_None(W, inds_predict + t)) - T_res[np.arange(Y.shape[0]) % self.n_periods == j, ..., t] = T_slice\ + self._filter_or_None(X, period_filters[t]), + self._filter_or_None(W, period_filters[t])) + T_res[period_filters[j], ..., t] = T_slice\ - shape_formatter(T_slice, T_pred).reshape(T_slice.shape) return Y_res, T_res def score(self, Y, T, X=None, W=None, sample_weight=None, groups=None): assert Y.shape[0] % self.n_periods == 0, \ "Length of training data should be an integer multiple of time periods." - inds_score = np.arange(Y.shape[0])[np.arange(Y.shape[0]) % self.n_periods == 0] + period_filters = _get_groups_period_filter(groups, self.n_periods) if hasattr(self._model_y, 'score'): Y_score = np.full((self.n_periods, ), np.nan) for t in np.arange(self.n_periods): Y_score[t] = self._model_y_trained[t].score( - self._filter_or_None(X, inds_score + t), - self._filter_or_None(W, inds_score + t), - Y[inds_score + self.n_periods - 1]) + self._filter_or_None(X, period_filters[t]), + self._filter_or_None(W, period_filters[t]), + Y[period_filters[self.n_periods - 1]]) else: Y_score = None if hasattr(self._model_t, 'score'): @@ -111,9 +121,9 @@ def score(self, Y, T, X=None, W=None, sample_weight=None, groups=None): for t in np.arange(self.n_periods): for j in np.arange(t, self.n_periods): T_score[j][t] = self._model_t_trained[j][t].score( - self._filter_or_None(X, inds_score + t), - self._filter_or_None(W, inds_score + t), - T[inds_score + j]) + self._filter_or_None(X, period_filters[t]), + self._filter_or_None(W, period_filters[t]), + T[period_filters[j]]) else: T_score = None return Y_score, T_score @@ -146,23 +156,23 @@ def __init__(self, model_final, n_periods): self.n_periods = n_periods self._model_final_trained = {k: clone(self._model_final, safe=False) for k in np.arange(n_periods)} - def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None): # NOTE: sample weight, sample var are not passed in + period_filters = _get_groups_period_filter(groups, self.n_periods) Y_res, T_res = nuisances self._d_y = Y.shape[1:] for t in np.arange(self.n_periods): period = self.n_periods - 1 - t - period_filter = self.period_filter_gen(period, Y.shape[0]) - Y_adj = Y_res[period_filter].copy() + Y_adj = Y_res[period_filters[period]].copy() if t > 0: Y_adj -= np.sum( [self._model_final_trained[j].predict_with_res( - X[self.period_filter_gen(self.n_periods - 1 - j, Y.shape[0])] if X is not None else None, - T_res[self.period_filter_gen(self.n_periods - 1 - j, Y.shape[0]), ..., period] + X[period_filters[self.n_periods - 1 - j]] if X is not None else None, + T_res[period_filters[self.n_periods - 1 - j], ..., period] ) for j in np.arange(t)], axis=0) self._model_final_trained[t].fit( - X[period_filter] if X is not None else None, T[period_filter], - T_res[period_filter, ..., period], Y_adj) + X[period_filters[period]] if X is not None else None, T[period_filters[period]], + T_res[period_filters[period], ..., period], Y_adj) return self @@ -185,25 +195,25 @@ def predict(self, X=None): ) return preds - def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None): assert Y.shape[0] % self.n_periods == 0, \ "Length of training data should be an integer multiple of time periods." Y_res, T_res = nuisances - scores = np.full((self.n_periods, ), np.nan) + period_filters = _get_groups_period_filter(groups, self.n_periods) for t in np.arange(self.n_periods): period = self.n_periods - 1 - t - period_filter = self.period_filter_gen(period, Y.shape[0]) - Y_adj = Y_res[period_filter].copy() + # period_filter = self.period_filter_gen(period, Y.shape[0]) + Y_adj = Y_res[period_filters[period]].copy() if t > 0: Y_adj -= np.sum( [self._model_final_trained[j].predict_with_res( - X[self.period_filter_gen(self.n_periods - 1 - j, Y.shape[0])] if X is not None else None, - T_res[self.period_filter_gen(self.n_periods - 1 - j, Y.shape[0]), ..., period] + X[period_filters[self.n_periods - 1 - j]] if X is not None else None, + T_res[period_filters[self.n_periods - 1 - j], ..., period] ) for j in np.arange(t)], axis=0) Y_adj_pred = self._model_final_trained[t].predict_with_res( - X[period_filter] if X is not None else None, - T_res[period_filter, ..., period]) + X[period_filters[period]] if X is not None else None, + T_res[period_filters[period], ..., period]) if sample_weight is not None: scores[t] = np.mean(np.average((Y_adj - Y_adj_pred)**2, weights=sample_weight, axis=0)) else: @@ -225,10 +235,11 @@ def __init__(self, model_final, n_periods): super().__init__(model_final, n_periods) self.model_final_ = StatsModelsLinearRegression(fit_intercept=False) - def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): - super().fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, sample_var=sample_var) + def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None): + super().fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, + sample_weight=sample_weight, sample_var=sample_var, groups=groups) # Compose final model - cov = self._get_cov(nuisances, X) + cov = self._get_cov(nuisances, X, groups) coef = self._get_coef_() self.model_final_._n_out = self._d_y[0] if self._d_y else 0 self.model_final_._param_var = cov / (Y.shape[0] / self.n_periods) @@ -243,25 +254,28 @@ def _get_coef_(self): ]) return period_coefs.flatten() - def _get_cov(self, nuisances, X): + def _get_cov(self, nuisances, X, groups): if self._d_y: return np.array( - [self._fit_single_output_cov((nuisances[0][:, i], nuisances[1]), X, i) for i in range(self._d_y[0])] + [self._fit_single_output_cov((nuisances[0][:, i], nuisances[1]), X, i, groups) + for i in range(self._d_y[0])] ) - return self._fit_single_output_cov(nuisances, X, -1) + return self._fit_single_output_cov(nuisances, X, -1, groups) - def _fit_single_output_cov(self, nuisances, X, y_index): + def _fit_single_output_cov(self, nuisances, X, y_index, groups): """ Calculates the covariance (n_periods*n_treatments) x (n_periods*n_treatments) matrix for a single outcome. """ + # TODO: add group filters here Y_res, T_res = nuisances # Calculate auxiliary quantities + period_filters = _get_groups_period_filter(groups, self.n_periods) # X ⨂ T_res XT_res = np.array([ [ self._model_final_trained[0]._combine( - X[self.period_filter_gen(j, Y_res.shape[0])] if X is not None else None, - T_res[self.period_filter_gen(t, Y_res.shape[0]), ..., j], + X[period_filters[j]] if X is not None else None, + T_res[period_filters[t], ..., j], fitting=False ) for j in range(self.n_periods) @@ -273,10 +287,9 @@ def _fit_single_output_cov(self, nuisances, X, y_index): Y_diff = np.array([ np.sum([ self._model_final_trained[j].predict_with_res( - X[self.period_filter_gen(self.n_periods - 1 - j, - Y_res.shape[0])] if X is not None else None, + X[period_filters[self.n_periods - 1 - j]] if X is not None else None, T_res[ - self.period_filter_gen(self.n_periods - 1 - j, Y_res.shape[0]), ..., self.n_periods - 1 - t] + period_filters[self.n_periods - 1 - j], ..., self.n_periods - 1 - t] ) for j in np.arange(t + 1)], axis=0 ) @@ -288,7 +301,7 @@ def _fit_single_output_cov(self, nuisances, X, y_index): self.n_periods * d_xt)) for t in np.arange(self.n_periods): period_t = self.n_periods - 1 - t - period_filter_t = self.period_filter_gen(period_t, Y_res.shape[0]) + period_filter_t = period_filters[period_t] res_epsilon_t = (Y_res[period_filter_t] - (Y_diff[t][:, y_index] if y_index >= 0 else Y_diff[t]) ).reshape(-1, 1, 1) @@ -296,7 +309,7 @@ def _fit_single_output_cov(self, nuisances, X, y_index): for j in np.arange(self.n_periods): # Calculating the (t, j) block entry (of size n_treatments x n_treatments) of matrix Sigma period_j = self.n_periods - 1 - j - period_filter_j = self.period_filter_gen(period_j, Y_res.shape[0]) + period_filter_j = period_filters[period_j] res_epsilon_j = (Y_res[period_filter_j] - (Y_diff[j][:, y_index] if y_index >= 0 else Y_diff[j]) ).reshape(-1, 1, 1) @@ -545,11 +558,19 @@ def _strata(self, Y, T, X=None, W=None, Z=None, "we will disallow passing X and W by position.", ['X', 'W']) def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups, cache_values=False, inference=None): - """ - Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. + """Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. + + The input data must contain groups with the same size corresponding to the number + of time periods the treatments were assigned over. - The input data has to be in panel format, i.e. a sequence of groups, each with the same size corresponding - to the number of time periods the treatments were assigned over. + The data should be preferably in panel format, with groups clustered together. + If group members do not appear together, the following is assumed: + + * the first instance of a group in the dataset is assumed to correspond to the first period of that group + * the second instance of a group in the dataset is assumed to correspond to the + second period of that group + + ...etc. Parameters ---------- @@ -589,7 +610,7 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou cache_values=cache_values, inference=inference) - def score(self, Y, T, X=None, W=None): + def score(self, Y, T, X=None, W=None, *, groups): """ Score the fitted CATE model on a new data set. Generates nuisance parameters for the new data set based on the fitted residual nuisance models created at fit time. @@ -608,6 +629,8 @@ def score(self, Y, T, X=None, W=None): Features for each sample (Required: n = n_groups * n_periods) W: optional(n, d_w) matrix or None (Default=None) Controls for each sample (Required: n = n_groups * n_periods) + groups: (n,) vector, required + All rows corresponding to the same group will be kept together during splitting. Returns ------- @@ -615,7 +638,7 @@ def score(self, Y, T, X=None, W=None): The MSE of the final CATE model on the new data. """ # Replacing score from _OrthoLearner, to enforce Z=None and improve the docstring - return super().score(Y, T, X=X, W=W) + return super().score(Y, T, X=X, W=W, groups=groups) def cate_treatment_names(self, treatment_names=None): """ diff --git a/econml/dr/_drlearner.py b/econml/dr/_drlearner.py index 03edf8e62..5ec747e38 100644 --- a/econml/dr/_drlearner.py +++ b/econml/dr/_drlearner.py @@ -124,7 +124,7 @@ def __init__(self, model_final, featurizer, multitask_model_final): self._multitask_model_final = multitask_model_final return - def fit(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None): + def fit(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None, groups=None): Y_pred, = nuisances self.d_y = Y_pred.shape[1:-1] # track whether there's a Y dimension (must be a singleton) self.d_t = Y_pred.shape[-1] - 1 # track # of treatment (exclude baseline treatment) @@ -154,7 +154,7 @@ def predict(self, X=None): preds = np.array([mdl.predict(X).reshape((-1,) + self.d_y) for mdl in self.models_cate]) return np.moveaxis(preds, 0, -1) # move treatment dim to end - def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None): + def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None, groups=None): if (X is not None) and (self._featurizer is not None): X = self._featurizer.transform(X) Y_pred, = nuisances diff --git a/econml/iv/dml/_dml.py b/econml/iv/dml/_dml.py index 23f0add53..98a85f274 100644 --- a/econml/iv/dml/_dml.py +++ b/econml/iv/dml/_dml.py @@ -33,7 +33,7 @@ def __init__(self): self._model_final = _FinalWrapper(LinearRegression(fit_intercept=False), fit_cate_intercept=True, featurizer=None, use_weight_trick=False) - def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None): Y_res, T_res, Z_res = nuisances if Z_res.ndim == 1: Z_res = Z_res.reshape(-1, 1) @@ -49,7 +49,7 @@ def predict(self, X=None): # TODO: allow the final model to actually use X? return self._model_final.predict(X=None) - def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None): Y_res, T_res, Z_res = nuisances if Y_res.ndim == 1: Y_res = Y_res.reshape((-1, 1)) @@ -379,7 +379,7 @@ class _BaseDMLIVModelFinal: def __init__(self, model_final): self._model_final = clone(model_final, safe=False) - def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None): Y_res, T_res = nuisances self._model_final.fit(X, T, T_res, Y_res, sample_weight=sample_weight, sample_var=sample_var) return self @@ -387,7 +387,7 @@ def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, def predict(self, X=None): return self._model_final.predict(X) - def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None): Y_res, T_res = nuisances if Y_res.ndim == 1: Y_res = Y_res.reshape((-1, 1)) diff --git a/econml/iv/dr/_dr.py b/econml/iv/dr/_dr.py index 003b1577e..9ef8c547a 100644 --- a/econml/iv/dr/_dr.py +++ b/econml/iv/dr/_dr.py @@ -75,7 +75,7 @@ def _effect_estimate(self, nuisances): self._cov_clip, np.inf) return prel_theta + (res_y - prel_theta * res_t) * res_z / clipped_cov, clipped_cov - def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None): self.d_y = Y.shape[1:] self.d_t = nuisances[1].shape[1:] self.d_z = nuisances[3].shape[1:] @@ -114,7 +114,7 @@ def predict(self, X=None): X = self._featurizer.transform(X) return self._model_final.predict(X).reshape((-1,) + self.d_y + self.d_t) - def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None): theta_dr, clipped_cov = self._effect_estimate(nuisances) if (X is not None) and (self._featurizer is not None): diff --git a/econml/policy/_drlearner.py b/econml/policy/_drlearner.py index 145a36fd9..d9bc7966d 100644 --- a/econml/policy/_drlearner.py +++ b/econml/policy/_drlearner.py @@ -14,7 +14,7 @@ class _PolicyModelFinal(_ModelFinal): - def fit(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None): + def fit(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None, groups=None): if sample_var is not None: warn('Parameter `sample_var` is ignored by the final estimator') sample_var = None @@ -38,7 +38,7 @@ def predict(self, X=None): return pred[:, np.newaxis, :] return pred - def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None): + def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None, groups=None): return 0 diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py index 21a5aa7a9..b573be805 100644 --- a/econml/tests/test_dynamic_dml.py +++ b/econml/tests/test_dynamic_dml.py @@ -9,6 +9,7 @@ from sklearn.linear_model import (LinearRegression, LassoCV, Lasso, MultiTaskLasso, MultiTaskLassoCV, LogisticRegression) from econml.dml import DynamicDML +from econml.dml.dynamic_dml import _get_groups_period_filter from econml.inference import BootstrapInference, EmpiricalInferenceResults, NormalInferenceResults from econml.utilities import shape, hstack, vstack, reshape, cross_product import econml.tests.utilities # bugfix for assertWarns @@ -267,25 +268,34 @@ def test_perf(self): def lasso_model(): return LassoCV(cv=3, alphas=alpha_regs, max_iter=500) + # No heterogeneity dgp = DynamicPanelDGP(n_periods, n_treatments, n_x).create_instance( s_x, random_seed=1) Y, T, X, W, groups = dgp.observational_data(n_units, s_t=s_t, random_seed=12) est = DynamicDML(model_y=lasso_model(), model_t=lasso_model(), cv=3) - est.fit(Y, T, X=X, W=W, groups=groups, inference="auto") - np.testing.assert_allclose(est.intercept_, dgp.true_effect.flatten(), atol=1e-01) - np.testing.assert_array_less(est.intercept__interval()[0], dgp.true_effect.flatten()) - np.testing.assert_array_less(dgp.true_effect.flatten(), est.intercept__interval()[1]) + # Define indices to test + groups_filter = _get_groups_period_filter(groups, 3) + shuffled_idx = np.array([groups_filter[i] for i in range(n_periods)]).flatten() + test_indices = [np.arange(n_units * n_periods), shuffled_idx] + for test_idx in test_indices: + est.fit(Y[test_idx], T[test_idx], X=X[test_idx] if X is not None else None, W=W[test_idx], + groups=groups[test_idx], inference="auto") + np.testing.assert_allclose(est.intercept_, dgp.true_effect.flatten(), atol=1e-01) + np.testing.assert_array_less(est.intercept__interval()[0], dgp.true_effect.flatten()) + np.testing.assert_array_less(dgp.true_effect.flatten(), est.intercept__interval()[1]) + # Heterogeneous effects - hetero_strength = .5 - hetero_inds = np.arange(n_x - n_treatments, n_x) dgp = DynamicPanelDGP(n_periods, n_treatments, n_x).create_instance( s_x, hetero_strength=hetero_strength, hetero_inds=hetero_inds, random_seed=1) Y, T, X, W, groups = dgp.observational_data(n_units, s_t=s_t, random_seed=12) - est.fit(Y, T, X=X, W=W, groups=groups, inference="auto") - np.testing.assert_allclose(est.intercept_, dgp.true_effect.flatten(), atol=0.2) - np.testing.assert_allclose(est.coef_, dgp.true_hetero_effect[:, hetero_inds + 1], atol=0.2) - np.testing.assert_array_less(est.intercept__interval()[0], dgp.true_effect.flatten()) - np.testing.assert_array_less(dgp.true_effect.flatten(), est.intercept__interval()[1]) - np.testing.assert_array_less(est.coef__interval()[0], dgp.true_hetero_effect[:, hetero_inds + 1]) - np.testing.assert_array_less(dgp.true_hetero_effect[:, hetero_inds + 1], est.coef__interval()[1]) + for test_idx in test_indices: + hetero_strength = .5 + hetero_inds = np.arange(n_x - n_treatments, n_x) + est.fit(Y[test_idx], T[test_idx], X=X[test_idx], W=W[test_idx], groups=groups[test_idx], inference="auto") + np.testing.assert_allclose(est.intercept_, dgp.true_effect.flatten(), atol=0.2) + np.testing.assert_allclose(est.coef_, dgp.true_hetero_effect[:, hetero_inds + 1], atol=0.2) + np.testing.assert_array_less(est.intercept__interval()[0], dgp.true_effect.flatten()) + np.testing.assert_array_less(dgp.true_effect.flatten(), est.intercept__interval()[1]) + np.testing.assert_array_less(est.coef__interval()[0], dgp.true_hetero_effect[:, hetero_inds + 1]) + np.testing.assert_array_less(dgp.true_hetero_effect[:, hetero_inds + 1], est.coef__interval()[1]) diff --git a/notebooks/Dynamic Double Machine Learning Examples.ipynb b/notebooks/Dynamic Double Machine Learning Examples.ipynb index 268402dfb..6eb2e2b91 100644 --- a/notebooks/Dynamic Double Machine Learning Examples.ipynb +++ b/notebooks/Dynamic Double Machine Learning Examples.ipynb @@ -28,7 +28,7 @@ "* the effect of prices on demand in stores where prices of goods change over time\n", "* the effect of income on health outcomes in people who receive yearly income\n", "\n", - "The expected data format is balanced panel data. Each panel corresponds to one entity (e.g. company, store or person) and the different rows in a panel correspond to different time points. Example:\n", + "The preferred data format is balanced panel data. Each panel corresponds to one entity (e.g. company, store or person) and the different rows in a panel correspond to different time points. Example:\n", "\n", "||Company|Year|Features|Investment|Revenue|\n", "|---|---|---|---|---|---|\n", @@ -44,6 +44,21 @@ "\n", "(Note: when passing the data to the DynamicDML estimator, the \"Company\" column above corresponds to the `groups` argument at fit time. The \"Year\" column above should not be passed in as it will be inferred from the \"Company\" column)\n", "\n", + "If group memebers do not appear together, it is assumed that the first instance of a group in the dataset corresponds to the first period of that group, the second instance of the group corresponds to the second period, etc. Example:\n", + "\n", + "||Company|Features|Investment|Revenue|\n", + "|---|---|---|---|---|\n", + "|1|A|...|\\$1,000|\\$10,000|\n", + "|2|B|...|\\$0|\\$5,000\n", + "|3|C|...|\\$1,000|\\$20,000|\n", + "|4|A|...|\\$2,000|\\$12,000|\n", + "|5|B|...|\\$100|\\$10,000|\n", + "|6|C|...|\\$1,500|\\$25,000|\n", + "|7|A|...|\\$3,000|\\$15,000|\n", + "|8|B|...|\\$1,200|\\$7,000|\n", + "|9|C|...|\\$500|\\$15,000|\n", + "\n", + "In this dataset, 1st row corresponds to the first period of group `A`, 4th row corresponds to the second period of group `A`, etc.\n", "\n", "In this notebook, we show the performance of the DynamicDML on synthetic and observational data. \n", "\n", @@ -172,7 +187,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 7, @@ -269,10 +284,10 @@ " cate_intercept|$(T0)_0$ 0.711 0.041 17.224 0.0 0.643 0.779 \n", "\n", "\n", - " cate_intercept|$(T0)_1$ 1.031 0.1 10.306 0.0 0.866 1.195 \n", + " cate_intercept|$(T0)_1$ 1.031 0.096 10.715 0.0 0.872 1.189 \n", "\n", "\n", - " cate_intercept|$(T0)_2$ 0.518 0.139 3.739 0.0 0.29 0.746 \n", + " cate_intercept|$(T0)_2$ 0.518 0.142 3.658 0.0 0.285 0.751 \n", "\n", "

A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], @@ -284,8 +299,8 @@ " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------------------\n", "cate_intercept|$(T0)_0$ 0.711 0.041 17.224 0.0 0.643 0.779\n", - "cate_intercept|$(T0)_1$ 1.031 0.1 10.306 0.0 0.866 1.195\n", - "cate_intercept|$(T0)_2$ 0.518 0.139 3.739 0.0 0.29 0.746\n", + "cate_intercept|$(T0)_1$ 1.031 0.096 10.715 0.0 0.872 1.189\n", + "cate_intercept|$(T0)_2$ 0.518 0.142 3.658 0.0 0.285 0.751\n", "-----------------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", @@ -329,7 +344,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -431,7 +446,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 17, @@ -460,10 +475,10 @@ " X0|$(T0)_0$ 0.394 0.103 3.838 0.0 0.225 0.563 \n", "\n", "\n", - " X0|$(T0)_1$ -0.066 0.192 -0.343 0.732 -0.382 0.25 \n", + " X0|$(T0)_1$ -0.066 0.191 -0.345 0.73 -0.38 0.248 \n", "\n", "\n", - " X0|$(T0)_2$ 0.04 0.201 0.198 0.843 -0.291 0.37 \n", + " X0|$(T0)_2$ 0.04 0.2 0.199 0.843 -0.29 0.369 \n", "\n", "\n", "\n", @@ -475,10 +490,10 @@ " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "
cate_intercept|$(T0)_0$ 0.579 0.052 11.242 0.0 0.495 0.664
cate_intercept|$(T0)_1$ 0.032 0.086 0.379 0.704 -0.108 0.173cate_intercept|$(T0)_1$ 0.032 0.086 0.379 0.705 -0.108 0.173
cate_intercept|$(T0)_2$ -0.098 0.093 -1.049 0.294 -0.251 0.055cate_intercept|$(T0)_2$ -0.098 0.093 -1.048 0.294 -0.251 0.056


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], @@ -490,15 +505,15 @@ " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------\n", "X0|$(T0)_0$ 0.394 0.103 3.838 0.0 0.225 0.563\n", - "X0|$(T0)_1$ -0.066 0.192 -0.343 0.732 -0.382 0.25\n", - "X0|$(T0)_2$ 0.04 0.201 0.198 0.843 -0.291 0.37\n", + "X0|$(T0)_1$ -0.066 0.191 -0.345 0.73 -0.38 0.248\n", + "X0|$(T0)_2$ 0.04 0.2 0.199 0.843 -0.29 0.369\n", " CATE Intercept Results \n", "=============================================================================\n", " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------------------\n", "cate_intercept|$(T0)_0$ 0.579 0.052 11.242 0.0 0.495 0.664\n", - "cate_intercept|$(T0)_1$ 0.032 0.086 0.379 0.704 -0.108 0.173\n", - "cate_intercept|$(T0)_2$ -0.098 0.093 -1.049 0.294 -0.251 0.055\n", + "cate_intercept|$(T0)_1$ 0.032 0.086 0.379 0.705 -0.108 0.173\n", + "cate_intercept|$(T0)_2$ -0.098 0.093 -1.048 0.294 -0.251 0.056\n", "-----------------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", @@ -610,7 +625,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] From efd634da819a56afcf08a4d326f989c17b98ace2 Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Fri, 30 Jul 2021 23:54:32 -0400 Subject: [PATCH 08/27] Implement score --- econml/_ortho_learner.py | 2 +- econml/dml/dynamic_dml.py | 40 +++++++++++++++++++++++++------- econml/tests/test_dynamic_dml.py | 3 --- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 8134ffd88..3c7afcbab 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -845,7 +845,7 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): for i, models_nuisances in enumerate(self._models_nuisance): # for each model under cross fit setting for j, mdl in enumerate(models_nuisances): - nuisance_temp = mdl.predict(Y, T, **filter_none_kwargs(X=X, W=W, Z=Z)) + nuisance_temp = mdl.predict(Y, T, **filter_none_kwargs(X=X, W=W, Z=Z, groups=groups)) if not isinstance(nuisance_temp, tuple): nuisance_temp = (nuisance_temp,) diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index f39a25826..0e67aa76f 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -20,7 +20,8 @@ cross_product, deprecated, fit_with_groups, hstack, inverse_onehot, ndim, reshape, reshape_treatmentwise_effects, shape, transpose, - get_feature_names_or_default) + get_feature_names_or_default, check_input_arrays, + filter_none_kwargs) def _get_groups_period_filter(groups, n_periods): @@ -203,7 +204,6 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None period_filters = _get_groups_period_filter(groups, self.n_periods) for t in np.arange(self.n_periods): period = self.n_periods - 1 - t - # period_filter = self.period_filter_gen(period, Y.shape[0]) Y_adj = Y_res[period_filters[period]].copy() if t > 0: Y_adj -= np.sum( @@ -220,9 +220,6 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None scores[t] = np.mean((Y_adj - Y_adj_pred) ** 2) return scores - def period_filter_gen(self, p, n): - return (np.arange(n) % self.n_periods == p) - class _LinearDynamicModelFinal(_DynamicModelFinal): """Wrapper for the DynamicModelFinal with StatsModelsLinearRegression final model. @@ -567,7 +564,7 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou If group members do not appear together, the following is assumed: * the first instance of a group in the dataset is assumed to correspond to the first period of that group - * the second instance of a group in the dataset is assumed to correspond to the + * the second instance of a group in the dataset is assumed to correspond to the second period of that group ...etc. @@ -610,7 +607,7 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou cache_values=cache_values, inference=inference) - def score(self, Y, T, X=None, W=None, *, groups): + def score(self, Y, T, X=None, W=None, sample_weight=None, *, groups): """ Score the fitted CATE model on a new data set. Generates nuisance parameters for the new data set based on the fitted residual nuisance models created at fit time. @@ -637,8 +634,33 @@ def score(self, Y, T, X=None, W=None, *, groups): score: float The MSE of the final CATE model on the new data. """ - # Replacing score from _OrthoLearner, to enforce Z=None and improve the docstring - return super().score(Y, T, X=X, W=W, groups=groups) + if not hasattr(self._ortho_learner_model_final, 'score'): + raise AttributeError("Final model does not have a score method!") + Y, T, X, W, groups = check_input_arrays(Y, T, X, W, groups) + self._check_fitted_dims(X) + X, T = super()._expand_treatments(X, T) + n_iters = len(self._models_nuisance) + n_splits = len(self._models_nuisance[0]) + + # for each mc iteration + for i, models_nuisances in enumerate(self._models_nuisance): + # for each model under cross fit setting + for j, mdl in enumerate(models_nuisances): + nuisance_temp = mdl.predict(Y, T, **filter_none_kwargs(X=X, W=W, groups=groups)) + if not isinstance(nuisance_temp, tuple): + nuisance_temp = (nuisance_temp,) + + if i == 0 and j == 0: + nuisances = [np.zeros((n_iters * n_splits,) + nuis.shape) for nuis in nuisance_temp] + + for it, nuis in enumerate(nuisance_temp): + nuisances[it][i * n_iters + j] = nuis + + for it in range(len(nuisances)): + nuisances[it] = np.mean(nuisances[it], axis=0) + return self._ortho_learner_model_final.score(Y, T, nuisances=nuisances, + **filter_none_kwargs(X=X, W=W, + sample_weight=sample_weight, groups=groups)) def cate_treatment_names(self, treatment_names=None): """ diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py index b573be805..4fdca33d1 100644 --- a/econml/tests/test_dynamic_dml.py +++ b/econml/tests/test_dynamic_dml.py @@ -236,10 +236,7 @@ def make_random(n, is_discrete, d): [0], est.intercept__interval()[0], decimal=5) est.summary() - # TODO: fix score - """ est.score(Y, T, X, W) - """ # make sure we can call effect with implied scalar treatments, # no matter the dimensions of T, and also that we warn when there # are multiple treatments From ac4dd7081860594b04063759cc56842c6bf73bb1 Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Mon, 2 Aug 2021 15:59:22 -0400 Subject: [PATCH 09/27] Update docstring test outputs --- econml/dml/dynamic_dml.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index 0e67aa76f..647986b24 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -451,16 +451,16 @@ class DynamicDML(LinearModelFinalCateEstimatorMixin, _OrthoLearner): >>> est.coef__interval() (array([[-0.333...], [-0.171...], - [-0.158...], - [-0.352...], - [-0.045...], - [ 0.049...]]), + [-0.154...], + [-0.336...], + [-0.051...], + [ 0.040...]]), array([[-0.050...], [ 0.056...], - [ 0.112...], - [ 0.079...], - [ 0.260...], - [ 0.405...]])) + [ 0.108...], + [ 0.064...], + [ 0.265...], + [ 0.415...]])) """ def __init__(self, *, From a44a9605f9a02a4847e9fe88f0e76f2fc1541cda Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Mon, 2 Aug 2021 16:13:12 -0400 Subject: [PATCH 10/27] Fix merge issues --- econml/dml/_rlearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/dml/_rlearner.py b/econml/dml/_rlearner.py index 621238faa..ebdbe1fda 100644 --- a/econml/dml/_rlearner.py +++ b/econml/dml/_rlearner.py @@ -92,7 +92,7 @@ def __init__(self, model_final): self._model_final = model_final def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, - sample_weight=None, freq_weight=None, sample_var=None): + sample_weight=None, freq_weight=None, sample_var=None, groups=None): Y_res, T_res = nuisances self._model_final.fit(X, T, T_res, Y_res, sample_weight=sample_weight, freq_weight=freq_weight, sample_var=sample_var) From 1950fd16242605997a193f8a2d4d6c335427d5fe Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Mon, 2 Aug 2021 19:08:53 -0400 Subject: [PATCH 11/27] Address PR suggestions --- doc/spec/estimation/dml.rst | 18 ++++++++---------- econml/dml/dynamic_dml.py | 30 +++++++++++++++--------------- econml/tests/dgp.py | 5 ++++- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/doc/spec/estimation/dml.rst b/doc/spec/estimation/dml.rst index af65c4c92..9fe1deb57 100644 --- a/doc/spec/estimation/dml.rst +++ b/doc/spec/estimation/dml.rst @@ -51,6 +51,7 @@ characteristics :math:`X` of the treated samples, then one can use this method. .. testsetup:: + # DML import numpy as np X = np.random.choice(np.arange(5), size=(100,3)) Y = np.random.normal(size=(100,2)) @@ -59,6 +60,12 @@ characteristics :math:`X` of the treated samples, then one can use this method. t = t0 = t1 = T[:,0] W = np.random.normal(size=(100,2)) + # DynamicDML + groups = np.repeat(a=np.arange(100), repeats=3, axis=0) + X_dyn = np.random.normal(size=(300, 1)) + T_dyn = np.random.normal(size=(300, 2)) + y_dyn = np.random.normal(size=(300, )) + .. testcode:: from econml.dml import LinearDML @@ -307,18 +314,9 @@ Below we give a brief description of each of these classes: .. testcode:: - import numpy as np from econml.dml import DynamicDML - - n_panels = 100 # number of panels - n_periods = 3 # number of time periods per panel - n = n_panels * n_periods - groups = np.repeat(a=np.arange(n_panels), repeats=n_periods, axis=0) - X = np.random.normal(size=(n, 1)) - T = np.random.normal(size=(n, 2)) - y = np.random.normal(size=(n, )) est = DynamicDML() - est.fit(y, T, X=X, W=None, groups=groups, inference="auto") + est.fit(y_dyn, T_dyn, X=X_dyn, W=None, groups=groups, inference="auto") * **_RLearner.** The internal private class :class:`._RLearner` is a parent of the :class:`.DML` and allows the user to specify any way of fitting a final model that takes as input the residual :math:`\tilde{T}`, diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index 647986b24..e2c822f0c 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -56,14 +56,14 @@ def fit(self, Y, T, X=None, W=None, sample_weight=None, groups=None): self._model_t_trained = {j: {} for j in np.arange(self.n_periods)} for t in np.arange(self.n_periods): self._model_y_trained[t] = clone(self._model_y, safe=False).fit( - self._filter_or_None(X, period_filters[t]), - self._filter_or_None( + self._index_or_None(X, period_filters[t]), + self._index_or_None( W, period_filters[t]), Y[period_filters[self.n_periods - 1]]) for j in np.arange(t, self.n_periods): self._model_t_trained[j][t] = clone(self._model_t, safe=False).fit( - self._filter_or_None(X, period_filters[t]), - self._filter_or_None(W, period_filters[t]), + self._index_or_None(X, period_filters[t]), + self._index_or_None(W, period_filters[t]), T[period_filters[j]]) return self @@ -91,15 +91,15 @@ def predict(self, Y, T, X=None, W=None, sample_weight=None, groups=None): for t in np.arange(self.n_periods): Y_slice = Y[period_filters[self.n_periods - 1]] Y_pred = self._model_y_trained[t].predict( - self._filter_or_None(X, period_filters[t]), - self._filter_or_None(W, period_filters[t])) + self._index_or_None(X, period_filters[t]), + self._index_or_None(W, period_filters[t])) Y_res[period_filters[t]] = Y_slice\ - shape_formatter(Y_slice, Y_pred).reshape(Y_slice.shape) for j in np.arange(t, self.n_periods): T_slice = T[period_filters[j]] T_pred = self._model_t_trained[j][t].predict( - self._filter_or_None(X, period_filters[t]), - self._filter_or_None(W, period_filters[t])) + self._index_or_None(X, period_filters[t]), + self._index_or_None(W, period_filters[t])) T_res[period_filters[j], ..., t] = T_slice\ - shape_formatter(T_slice, T_pred).reshape(T_slice.shape) return Y_res, T_res @@ -112,8 +112,8 @@ def score(self, Y, T, X=None, W=None, sample_weight=None, groups=None): Y_score = np.full((self.n_periods, ), np.nan) for t in np.arange(self.n_periods): Y_score[t] = self._model_y_trained[t].score( - self._filter_or_None(X, period_filters[t]), - self._filter_or_None(W, period_filters[t]), + self._index_or_None(X, period_filters[t]), + self._index_or_None(W, period_filters[t]), Y[period_filters[self.n_periods - 1]]) else: Y_score = None @@ -122,8 +122,8 @@ def score(self, Y, T, X=None, W=None, sample_weight=None, groups=None): for t in np.arange(self.n_periods): for j in np.arange(t, self.n_periods): T_score[j][t] = self._model_t_trained[j][t].score( - self._filter_or_None(X, period_filters[t]), - self._filter_or_None(W, period_filters[t]), + self._index_or_None(X, period_filters[t]), + self._index_or_None(W, period_filters[t]), T[period_filters[j]]) else: T_score = None @@ -134,7 +134,7 @@ def _get_shape_formatter(self, X, W): return lambda x, x_pred: np.tile(x_pred.reshape(1, -1), (x.shape[0], 1)) return lambda x, x_pred: x_pred - def _filter_or_None(self, X, filter_idx): + def _index_or_None(self, X, filter_idx): return None if X is None else X[filter_idx] @@ -742,7 +742,7 @@ def fit_cate_intercept_(self): @property def original_featurizer(self): - # NOTE: important to use the rlearner_model_final_ attribute instead of the + # NOTE: important to use the _ortho_learner_model_final_ attribute instead of the # attribute so that the trained featurizer will be passed through return self.ortho_learner_model_final_._model_final_trained[0]._original_featurizer @@ -755,7 +755,7 @@ def featurizer_(self): @property def model_final_(self): # NOTE This is used by the inference methods and is more for internal use to the library - # We need to use the rlearner's copy to retain the information from fitting + # We need to use the _ortho_learner's copy to retain the information from fitting return self.ortho_learner_model_final_.model_final_ @property diff --git a/econml/tests/dgp.py b/econml/tests/dgp.py index 16e1f1aae..98d4e6a81 100644 --- a/econml/tests/dgp.py +++ b/econml/tests/dgp.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +import abc import numpy as np from econml.utilities import cross_product from statsmodels.tools.tools import add_constant @@ -13,9 +14,11 @@ def __init__(self, n_periods, n_treatments, n_x): self.n_x = n_x return + @abc.abstractmethod def create_instance(self, *args, **kwargs): pass + @abc.abstractmethod def _gen_data_with_policy(self, n_units, policy_gen, random_seed=123): pass @@ -138,7 +141,7 @@ def _gen_data_with_policy(self, n_units, policy_gen, random_seed=123): return Y, T, X[:, self.hetero_inds] if self.hetero_inds else None, X[:, self.endo_inds], groups def observational_data(self, n_units, gamma=0, s_t=1, sigma_t=0.5, random_seed=123): - """ Generated observational data with some observational treatment policy parameters + """Generate observational data with some observational treatment policy parameters. Parameters ---------- From 4636257aa3a21828e1c0a4b7fe3ce780928b30fa Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Tue, 3 Aug 2021 13:08:09 -0400 Subject: [PATCH 12/27] Fix subscript printing in summary --- econml/dml/dynamic_dml.py | 2 +- ...mic Double Machine Learning Examples.ipynb | 40 +++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index e2c822f0c..eca010c1a 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -682,7 +682,7 @@ def cate_treatment_names(self, treatment_names=None): slice_treatment_names = super().cate_treatment_names(treatment_names) treatment_names_out = [] for k in range(self._n_periods): - treatment_names_out += [f"$({t})_{k}$" for t in slice_treatment_names] + treatment_names_out += [f"({t})$_{k}$" for t in slice_treatment_names] return treatment_names_out def cate_feature_names(self, feature_names=None): diff --git a/notebooks/Dynamic Double Machine Learning Examples.ipynb b/notebooks/Dynamic Double Machine Learning Examples.ipynb index 6eb2e2b91..38082d3eb 100644 --- a/notebooks/Dynamic Double Machine Learning Examples.ipynb +++ b/notebooks/Dynamic Double Machine Learning Examples.ipynb @@ -187,7 +187,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 7, @@ -281,13 +281,13 @@ " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "\n", "\n", - " cate_intercept|$(T0)_0$ 0.711 0.041 17.224 0.0 0.643 0.779 \n", + " cate_intercept|(T0)$_0$ 0.711 0.041 17.224 0.0 0.643 0.779 \n", "\n", "\n", - " cate_intercept|$(T0)_1$ 1.031 0.096 10.715 0.0 0.872 1.189 \n", + " cate_intercept|(T0)$_1$ 1.031 0.096 10.715 0.0 0.872 1.189 \n", "\n", "\n", - " cate_intercept|$(T0)_2$ 0.518 0.142 3.658 0.0 0.285 0.751 \n", + " cate_intercept|(T0)$_2$ 0.518 0.142 3.658 0.0 0.285 0.751 \n", "\n", "

A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], @@ -298,9 +298,9 @@ "=============================================================================\n", " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------------------\n", - "cate_intercept|$(T0)_0$ 0.711 0.041 17.224 0.0 0.643 0.779\n", - "cate_intercept|$(T0)_1$ 1.031 0.096 10.715 0.0 0.872 1.189\n", - "cate_intercept|$(T0)_2$ 0.518 0.142 3.658 0.0 0.285 0.751\n", + "cate_intercept|(T0)$_0$ 0.711 0.041 17.224 0.0 0.643 0.779\n", + "cate_intercept|(T0)$_1$ 1.031 0.096 10.715 0.0 0.872 1.189\n", + "cate_intercept|(T0)$_2$ 0.518 0.142 3.658 0.0 0.285 0.751\n", "-----------------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", @@ -446,7 +446,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 17, @@ -472,13 +472,13 @@ " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "\n", "\n", - " X0|$(T0)_0$ 0.394 0.103 3.838 0.0 0.225 0.563 \n", + " X0|(T0)$_0$ 0.394 0.103 3.838 0.0 0.225 0.563 \n", "\n", "\n", - " X0|$(T0)_1$ -0.066 0.191 -0.345 0.73 -0.38 0.248 \n", + " X0|(T0)$_1$ -0.066 0.191 -0.345 0.73 -0.38 0.248 \n", "\n", "\n", - " X0|$(T0)_2$ 0.04 0.2 0.199 0.843 -0.29 0.369 \n", + " X0|(T0)$_2$ 0.04 0.2 0.199 0.843 -0.29 0.369 \n", "\n", "\n", "\n", @@ -487,13 +487,13 @@ " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "
point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|$(T0)_0$ 0.579 0.052 11.242 0.0 0.495 0.664cate_intercept|(T0)$_0$ 0.579 0.052 11.242 0.0 0.495 0.664
cate_intercept|$(T0)_1$ 0.032 0.086 0.379 0.705 -0.108 0.173cate_intercept|(T0)$_1$ 0.032 0.086 0.379 0.705 -0.108 0.173
cate_intercept|$(T0)_2$ -0.098 0.093 -1.048 0.294 -0.251 0.056cate_intercept|(T0)$_2$ -0.098 0.093 -1.048 0.294 -0.251 0.056


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], @@ -504,16 +504,16 @@ "=================================================================\n", " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------\n", - "X0|$(T0)_0$ 0.394 0.103 3.838 0.0 0.225 0.563\n", - "X0|$(T0)_1$ -0.066 0.191 -0.345 0.73 -0.38 0.248\n", - "X0|$(T0)_2$ 0.04 0.2 0.199 0.843 -0.29 0.369\n", + "X0|(T0)$_0$ 0.394 0.103 3.838 0.0 0.225 0.563\n", + "X0|(T0)$_1$ -0.066 0.191 -0.345 0.73 -0.38 0.248\n", + "X0|(T0)$_2$ 0.04 0.2 0.199 0.843 -0.29 0.369\n", " CATE Intercept Results \n", "=============================================================================\n", " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------------------\n", - "cate_intercept|$(T0)_0$ 0.579 0.052 11.242 0.0 0.495 0.664\n", - "cate_intercept|$(T0)_1$ 0.032 0.086 0.379 0.705 -0.108 0.173\n", - "cate_intercept|$(T0)_2$ -0.098 0.093 -1.048 0.294 -0.251 0.056\n", + "cate_intercept|(T0)$_0$ 0.579 0.052 11.242 0.0 0.495 0.664\n", + "cate_intercept|(T0)$_1$ 0.032 0.086 0.379 0.705 -0.108 0.173\n", + "cate_intercept|(T0)$_2$ -0.098 0.093 -1.048 0.294 -0.251 0.056\n", "-----------------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", From 9328a2282732a6829950875c6bdc457f82c2f192 Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Thu, 5 Aug 2021 13:59:53 -0400 Subject: [PATCH 13/27] Address PR suggestions --- econml/dml/dynamic_dml.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index eca010c1a..bb162e75d 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -1,8 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -# TODO: add paper reference - import abc import numpy as np from warnings import warn @@ -94,14 +92,14 @@ def predict(self, Y, T, X=None, W=None, sample_weight=None, groups=None): self._index_or_None(X, period_filters[t]), self._index_or_None(W, period_filters[t])) Y_res[period_filters[t]] = Y_slice\ - - shape_formatter(Y_slice, Y_pred).reshape(Y_slice.shape) + - shape_formatter(Y_slice, Y_pred) for j in np.arange(t, self.n_periods): T_slice = T[period_filters[j]] T_pred = self._model_t_trained[j][t].predict( self._index_or_None(X, period_filters[t]), self._index_or_None(W, period_filters[t])) T_res[period_filters[j], ..., t] = T_slice\ - - shape_formatter(T_slice, T_pred).reshape(T_slice.shape) + - shape_formatter(T_slice, T_pred) return Y_res, T_res def score(self, Y, T, X=None, W=None, sample_weight=None, groups=None): @@ -131,8 +129,8 @@ def score(self, Y, T, X=None, W=None, sample_weight=None, groups=None): def _get_shape_formatter(self, X, W): if (X is None) and (W is None): - return lambda x, x_pred: np.tile(x_pred.reshape(1, -1), (x.shape[0], 1)) - return lambda x, x_pred: x_pred + return lambda x, x_pred: np.tile(x_pred.reshape(1, -1), (x.shape[0], 1)).reshape(x.shape) + return lambda x, x_pred: x_pred.reshape(x.shape) def _index_or_None(self, X, filter_idx): return None if X is None else X[filter_idx] From 24ca086efc96c9e885ca27d637430f541fee735e Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Thu, 5 Aug 2021 15:02:39 -0400 Subject: [PATCH 14/27] Update nuisance models in notebook --- ...mic Double Machine Learning Examples.ipynb | 145 ++++++++---------- 1 file changed, 68 insertions(+), 77 deletions(-) diff --git a/notebooks/Dynamic Double Machine Learning Examples.ipynb b/notebooks/Dynamic Double Machine Learning Examples.ipynb index 38082d3eb..0eb1f1d6e 100644 --- a/notebooks/Dynamic Double Machine Learning Examples.ipynb +++ b/notebooks/Dynamic Double Machine Learning Examples.ipynb @@ -128,7 +128,7 @@ "source": [ "# Define DGP parameters\n", "np.random.seed(123)\n", - "n_panels = 400 # number of panels\n", + "n_panels = 1000 # number of panels\n", "n_periods = 3 # number of time periods in each panel\n", "n_treatments = 1 # number of treatments in each period\n", "n_x = 100 # number of features + controls\n", @@ -161,36 +161,34 @@ "execution_count": 5, "metadata": {}, "outputs": [], - "source": [ - "alpha_regs = [1e-4, 1e-3, 1e-2, 5e-2, .1, 1]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], "source": [ "est = DynamicDML(\n", - " model_y=LassoCV(cv=3, alphas=alpha_regs, tol=1e-2), \n", - " model_t=MultiTaskLassoCV(cv=3, alphas=alpha_regs, tol=1e-2), \n", + " model_y=LassoCV(cv=3), \n", + " model_t=MultiTaskLassoCV(cv=3), \n", " cv=3)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Objective did not converge. You might want to increase the number of iterations. Duality gap: 2.0609523330807065, tolerance: 0.3973271492025328\n" + ] + }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -201,14 +199,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Average effect of default policy: 2.26\n" + "Average effect of default policy: 2.35\n" ] } ], @@ -219,14 +217,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Effect of target policy over baseline policy: 4.33\n" + "Effect of target policy over baseline policy: 4.62\n" ] } ], @@ -241,16 +239,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Effect of a treatment in period 1 on period 3 outcome: 0.71\n", - "Effect of a treatment in period 2 on period 3 outcome: 1.03\n", - "Effect of a treatment in period 3 on period 3 outcome: 0.52\n" + "Effect of a treatment in period 1 on period 3 outcome: 0.69\n", + "Effect of a treatment in period 2 on period 3 outcome: 1.05\n", + "Effect of a treatment in period 3 on period 3 outcome: 0.61\n" ] } ], @@ -262,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -281,13 +279,13 @@ " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "\n", "\n", - " cate_intercept|(T0)$_0$ 0.711 0.041 17.224 0.0 0.643 0.779 \n", + " cate_intercept|(T0)$_0$ 0.692 0.031 22.308 0.0 0.641 0.743 \n", "\n", "\n", - " cate_intercept|(T0)$_1$ 1.031 0.096 10.715 0.0 0.872 1.189 \n", + " cate_intercept|(T0)$_1$ 1.05 0.067 15.565 0.0 0.939 1.161 \n", "\n", "\n", - " cate_intercept|(T0)$_2$ 0.518 0.142 3.658 0.0 0.285 0.751 \n", + " cate_intercept|(T0)$_2$ 0.61 0.09 6.74 0.0 0.461 0.758 \n", "\n", "

A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], @@ -298,9 +296,9 @@ "=============================================================================\n", " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------------------\n", - "cate_intercept|(T0)$_0$ 0.711 0.041 17.224 0.0 0.643 0.779\n", - "cate_intercept|(T0)$_1$ 1.031 0.096 10.715 0.0 0.872 1.189\n", - "cate_intercept|(T0)$_2$ 0.518 0.142 3.658 0.0 0.285 0.751\n", + "cate_intercept|(T0)$_0$ 0.692 0.031 22.308 0.0 0.641 0.743\n", + "cate_intercept|(T0)$_1$ 1.05 0.067 15.565 0.0 0.939 1.161\n", + "cate_intercept|(T0)$_2$ 0.61 0.09 6.74 0.0 0.461 0.758\n", "-----------------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", @@ -311,7 +309,7 @@ "\"\"\"" ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -323,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -339,12 +337,12 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -387,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -398,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -419,37 +417,30 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "est = DynamicDML(\n", - " model_y=LassoCV(cv=3, alphas=alpha_regs, tol=1e-2), \n", - " model_t=MultiTaskLassoCV(cv=3, alphas=alpha_regs, tol=1e-2), \n", + " model_y=LassoCV(cv=3), \n", + " model_t=MultiTaskLassoCV(cv=3), \n", " cv=3)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": { "scrolled": true }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Objective did not converge. You might want to increase the number of iterations. Duality gap: 2.6550941575656566, tolerance: 1.6272718134380235\n" - ] - }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -460,7 +451,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -472,13 +463,13 @@ " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "\n", "\n", - " X0|(T0)$_0$ 0.394 0.103 3.838 0.0 0.225 0.563 \n", + " X0|(T0)$_0$ 0.392 0.073 5.366 0.0 0.272 0.512 \n", "\n", "\n", - " X0|(T0)$_1$ -0.066 0.191 -0.345 0.73 -0.38 0.248 \n", + " X0|(T0)$_1$ -0.041 0.12 -0.339 0.735 -0.239 0.157 \n", "\n", "\n", - " X0|(T0)$_2$ 0.04 0.2 0.199 0.843 -0.29 0.369 \n", + " X0|(T0)$_2$ 0.115 0.119 0.968 0.333 -0.081 0.311 \n", "\n", "\n", "\n", @@ -487,13 +478,13 @@ " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "
point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|(T0)$_0$ 0.579 0.052 11.242 0.0 0.495 0.664cate_intercept|(T0)$_0$ 0.602 0.034 17.813 0.0 0.547 0.658
cate_intercept|(T0)$_1$ 0.032 0.086 0.379 0.705 -0.108 0.173cate_intercept|(T0)$_1$ -0.027 0.059 -0.462 0.644 -0.125 0.07
cate_intercept|(T0)$_2$ -0.098 0.093 -1.048 0.294 -0.251 0.056cate_intercept|(T0)$_2$ -0.085 0.06 -1.411 0.158 -0.184 0.014


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], @@ -504,16 +495,16 @@ "=================================================================\n", " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------\n", - "X0|(T0)$_0$ 0.394 0.103 3.838 0.0 0.225 0.563\n", - "X0|(T0)$_1$ -0.066 0.191 -0.345 0.73 -0.38 0.248\n", - "X0|(T0)$_2$ 0.04 0.2 0.199 0.843 -0.29 0.369\n", + "X0|(T0)$_0$ 0.392 0.073 5.366 0.0 0.272 0.512\n", + "X0|(T0)$_1$ -0.041 0.12 -0.339 0.735 -0.239 0.157\n", + "X0|(T0)$_2$ 0.115 0.119 0.968 0.333 -0.081 0.311\n", " CATE Intercept Results \n", "=============================================================================\n", " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------------------\n", - "cate_intercept|(T0)$_0$ 0.579 0.052 11.242 0.0 0.495 0.664\n", - "cate_intercept|(T0)$_1$ 0.032 0.086 0.379 0.705 -0.108 0.173\n", - "cate_intercept|(T0)$_2$ -0.098 0.093 -1.048 0.294 -0.251 0.056\n", + "cate_intercept|(T0)$_0$ 0.602 0.034 17.813 0.0 0.547 0.658\n", + "cate_intercept|(T0)$_1$ -0.027 0.059 -0.462 0.644 -0.125 0.07\n", + "cate_intercept|(T0)$_2$ -0.085 0.06 -1.411 0.158 -0.184 0.014\n", "-----------------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", @@ -524,7 +515,7 @@ "\"\"\"" ] }, - "execution_count": 18, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -535,14 +526,14 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Average effect of default policy:0.48\n" + "Average effect of default policy:0.45\n" ] } ], @@ -554,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -562,8 +553,8 @@ "output_type": "stream", "text": [ "Effect of target policy over baseline policy for test set:\n", - " [ 0.48463458 0.52974258 0.21137445 0.36785476 0.37107783 0.6353013\n", - " -0.08878165 0.17427563 0.45723001 0.06398282]\n" + " [ 0.52203836 0.5995183 0.05267201 0.32145103 0.32698715 0.78083164\n", + " -0.46289232 -0.01105092 0.47496678 -0.20049582]\n" ] } ], @@ -578,19 +569,19 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(array([ 0.57932353, 0.03248502, -0.09764978]),\n", - " array([[ 0.39429592],\n", - " [-0.06583129],\n", - " [ 0.03975663]]))" + "(array([ 0.60246595, -0.02731669, -0.08491363]),\n", + " array([[ 0.39168147],\n", + " [-0.04079943],\n", + " [ 0.11529833]]))" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -602,7 +593,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -620,12 +611,12 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4AAAAFWCAYAAAA4zPNMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAApz0lEQVR4nO3de3hV9Z3v8c+XECBcg4C3gFw6yHCHEj3SqkUFwVOnMu0pWHtGvI16KmWm0+LBI4+i7bRaa+1YPS20dbSdjkCpIl56HEHolJYWE7koIMUiSFAhBhJuCYTwPX8kZEJISPZe2fuXnfV+PQ8Pe/32SvJxG7/xk7X2WubuAgAAAAC0fe1CBwAAAAAApAcFEAAAAABiggIIAAAAADFBAQQAAACAmKAAAgAAAEBMUAABAAAAICbahw7Q0nr37u0DBgwIHQNAK3agolKS1L1TduAkAJAY5heA5igsLPzY3fs09FybK4ADBgxQQUFB6BgAWrH1u0olSWP65QbNAQCJYn4BaA4z29nYc22uAAJAU/gfJwCZivkFICreAwgAAAAAMUEBBBA7v9tWrN9tKw4dAwASxvwCEBWngAKInaOVJ0JHAICkML8QUmVlpYqKilRRURE6Cmp06tRJffv2VXZ28y8MRQEEAAAA0KSioiJ169ZNAwYMkJmFjhN77q6SkhIVFRVp4MCBzf44TgEFAAAA0KSKigr16tWL8tdKmJl69eqV8BFZCiAAAACAZqH8tS7J/PugAAKInXN7dNK5PTqFjgEACWN+IdNMn79G0+evabHPl5WVpTFjxmj48OEaPXq0Hn30UZ04Eea9sQUFBZo1a9YZ9xkwYIBGjhypkSNHatiwYZo7d27tEbsdO3bIzDR37tza/T/++GNlZ2dr5syZkqR58+bpe9/7XovmpgACiJ0ReT00Iq9H6BgAkDDmF+IuJydH69ev16ZNm/Taa6/pN7/5jR544IEgWfLz8/X44483ud/KlSv11ltvae3atdq+fbvuuOOO2ucGDhyol19+uXb7V7/6lYYPH56SvCdRAAEAAAC0uKXrdmvd+6X603v79OmHXtfSdbtb9POfffbZWrBggZ544gm5uy6//HKtX7++9vlLL71UGzZs0Lx583TLLbdowoQJGjRo0CmlberUqRo3bpyGDx+uBQsW1K537dpVs2fP1vDhwzVx4kStXbu29uOXLVsmSVq1apWuvfZaSdKhQ4d08803a+TIkRo1apR+/etfn5a3a9eu+vGPf6ylS5dq3759kqTOnTtr6NChKigokCQtWrRI06ZNa9HXqT4KYBq09KFvANGs3LpXK7fuDR0DABLG/EKmWLput+557i0dq6o+PXN3abnuee6tFi+BgwYNUlVVlfbu3atbb71VTz/9tCTpz3/+syoqKjR69GhJ0jvvvKNXX31Va9eu1QMPPKDKykpJ0lNPPaXCwkIVFBTo8ccfV0lJiSTp8OHDuvLKK7Vp0yZ169ZNc+fO1Wuvvabnn39e991332k5vvnNb6pHjx566623tHHjRl155ZUN5u3evbsGDhyobdu21a5df/31WrhwoXbt2qWsrCydf/75LfkSnYYCCCB2qqpcVVUeOgYAJIz5hUzxyKtbVV5ZdcpaeWWVHnl1a8q+5he/+EW99NJLqqys1FNPPaWbbrqp9rnPfvaz6tixo3r37q2zzz5be/bskSQ9/vjjGj16tC655BLt2rWrtph16NBBU6ZMkSSNHDlSn/nMZ5Sdna2RI0dqx44dp33t5cuX66677qrd7tmzZ6M53U/9b3jKlCl67bXXtHDhQk2fPj3Zf/xm4z6AAAAAAFrUB6XlCa0na/v27crKytLZZ58tM9OkSZP0wgsvaPHixSosLKzdr2PHjrWPs7KydPz4ca1atUrLly/XmjVr1LlzZ02YMKH2Ai3Z2dm1V9hs165d7ce3a9dOx48fTzrvwYMHtWPHDl144YUqKyuTVF02x40bp0cffVSbN2+uPcU0VTgCCAAAAKBFnZ+bk9B6MoqLi3XnnXdq5syZtWXttttu06xZs3TRRRed8SicJJWVlalnz57q3Lmz3nnnHf3xj39MOsukSZP05JNP1m7v37//tH0OHTqkr3zlK5o6depp2b7+9a/r4Ycf1llnnZV0huaiAAIAAABoUbMnD1FOdtYpaznZWZo9eUikz1teXl57G4iJEyfq6quv1v3331/7/Lhx49S9e3fdfPPNTX6uKVOm6Pjx4xo6dKjmzJmjSy65JOlcc+fO1f79+zVixAiNHj1aK1eurH3uiiuu0IgRI3TxxRfrggsu0Pz580/7+OHDh2vGjBkNfu5vfetb6tu3b+2fqKz+OaiZLj8/309eRae1OHkBmEV3jA+cBIAkbfnwgCRp6HndAycBgMQwvxDSli1bNHTo0Gbvv3Tdbt29ZKOOVZ1QXm6OZk8eoqlj81KYUPrggw80YcIEvfPOO2rXLh7Huhr692Jmhe6e39D+vAcQQOzwP04AMhXzC5lk6tg8Pbv2fUnpORDy85//XPfee6++//3vx6b8JYMCCAAAACAl0nkG3I033qgbb7wxbV8vU1GNAcTO8s17tHzzntAxACBhzC8AUVEAAQAAACAmKIAAAAAAEBMUQAAAAACICQogAAAAgIywZ88e3XDDDRo0aJDGjRun8ePH6/nnn09rhh07dmjEiBENrv/7v/97Up/zBz/4gY4cOVK73bVr16TzNYUCCCB2+vfqrP69OoeOAQAJY34hztxdU6dO1eWXX67t27ersLBQCxcuVFFR0Wn7Hj9+PO35zlQAm8pTvwCmUtDbQJjZFEn/IilL0k/d/aEG9pkmaZ4kl7TB3W9Ia0gAbc7gc7qFjgAASWF+IaNsXCyteFAqK5J69JWuuk8aNS3pT/f666+rQ4cOuvPOO2vX+vfvr69+9auSpKefflrPPfecDh06pKqqKj3//PO65ZZbtH37dnXu3FkLFizQqFGjNG/ePHXt2lXf+MY3JEkjRozQSy+9JEm65pprdOmll+oPf/iD8vLy9MILLygnJ0eFhYW65ZZbJElXX311g/nmzJmjLVu2aMyYMZoxY4Z69ux5Sp4HHnhA3/ve92q/1syZM5Wfn68DBw7ogw8+0BVXXKHevXtr5cqVkqR7771XL730knJycvTCCy/onHPOSfq1qyvYEUAzy5L0pKRrJA2T9CUzG1Zvn8GS7pH0aXcfLukf050TQNtzvOqEjledCB0DABLG/ELG2LhYenGWVLZLklf//eKs6vUkbdq0SZ/85CfPuM+bb76pJUuW6Le//a3uv/9+jR07Vhs3btS3v/3tZt0jcNu2bbrrrru0adMm5ebm6te//rUk6eabb9YPf/hDbdiwodGPfeihh3TZZZdp/fr1+trXvnZansbMmjVL559/vlauXFlb/g4fPqxLLrlEGzZs0OWXX66f/OQnTWZvrpCngF4s6V133+7uxyQtlHRdvX3+XtKT7r5fktx9b5ozRrdxsZ7Yc6Oe/fAa6bERkb7pAbSMVVuLtWprcegYAJAw5hcyxooHpcryU9cqy6vXW8hdd92l0aNH66KLLqpdmzRpks466yxJ0urVq/V3f/d3kqQrr7xSJSUlOnDgwBk/58CBAzVmzBhJ0rhx47Rjxw6VlpaqtLRUl19+uSTVfs7mqJsnER06dNC11157So6WErIA5knaVWe7qGatrgslXWhmvzezP9acMpo5an7z0efEXrVrod98AAAAAK1e2envyzvjejMMHz5cb775Zu32k08+qRUrVqi4+L9+KdKlS5cmP0/79u114sR/HUmvqKiofdyxY8fax1lZWZHfS1g3z5m+bn3Z2dkysxbLUVdrvwhMe0mDJU2Q9CVJPzGz3Po7mdntZlZgZgV1vwGCS8NvPgAAAIBWp0ffxNab4corr1RFRYV+9KMf1a6d6cIpl112mX75y19KklatWqXevXure/fuGjBgQG2RfPPNN/Xee++d8evm5uYqNzdXq1evlqTaz1lft27ddPDgwUY/T//+/bV582YdPXpUpaWlWrFiRbM/tiWFLIC7JfWrs923Zq2uIknL3L3S3d+T9GdVF8JTuPsCd8939/w+ffqkLHDCUvCbD6Ah0+ev0fT5a0LHAAAAqHbVfVJ2zqlr2TnV60kyMy1dulS//e1vNXDgQF188cWaMWOGHn744Qb3nzdvngoLCzVq1CjNmTNHzzzzjCTpC1/4gvbt26fhw4friSee0IUXXtjk1/7Xf/1X3XXXXRozZozcvcF9Ro0apaysLI0ePVqPPfbYac/369dP06ZN04gRIzRt2jSNHTu29rnbb79dU6ZM0RVXXNGclyISa+wfIOVf2Ky9qgvdVaoufm9IusHdN9XZZ4qkL7n7DDPrLWmdpDHuXtLY583Pz/eCgoLUhm+ux0bUvPG1nh79pK+9nf48aLNOlr9Fd4wPnCQzLN+8R5I0cVjLXE0LANKF+YWQtmzZoqFDhzb/A1r4KqBoWEP/Xsys0N3zG9o/2G0g3P24mc2U9KqqbwPxlLtvMrMHJRW4+7Ka5642s82SqiTNPlP5a23e+MRXNaJwrnLsWO1auXfQ25/4qi46w8cBSK1BfZp+fwAAtEbML2SUUdMofK1Q0PsAuvsrkl6pt3Zfnccu6Z9q/mScf9w8WOMqb9Pd7RfrfCvRB95L3z0+TYWbB+v3nwudDoivQX26ho4AAElhfgGIKmgBbOs+KC3Xbl2qZccuPWXdSssb+QgA6VBRWSVJ6pSdFTgJACSG+QUgqtZ+FdCMdn5uTkLrANJj9baPtXrbx6FjAEDCmF8ILdT1Q9CwZP59UABTaPbkIcqp9xu6nOwszZ48JFAiAAAAIDmdOnVSSUkJJbCVcHeVlJSoU6dOCX0cp4Cm0NSx1fe1v3vJRh2rOqG83BzNnjykdh0AAADIFH379lVRUZFa1X23Y65Tp07q2zexeytSAFNs6tg8Pbv2fUlcoh8AAACZKzs7WwMHDgwdAxFxCigAAAAAxARHAAHEzuBzuIw6gMzE/AIQFQUQQOz078WNlAFkJuYXgKg4BRRA7Bw+elyHjx4PHQMAEsb8AhAVBRBA7Kz5S4nW/KUkdAwASBjzC0BUFEAAAAAAiAkKIAAAAADEBAUQAAAAAGKCAggAAAAAMcFtIADEzl+f1y10BABICvMLQFQUQACx07dn59ARACApzC8AUXEKKIDYKSuvVFl5ZegYAJAw5heAqCiAAGLnjff26Y339oWOAQAJY34BiIoCCAAAAAAxQQEEAAAAgJigAAIAAABATFAAAQAAACAmuA1EGiy6Y3zoCADqGJHXI3QEAEgK8wtAVBRAALFzbo9OoSMAQFKYXwCi4hRQALGz//Ax7T98LHQMAEgY8wtAVBRAALFTuHO/CnfuDx0DABLG/AIQFQUQAAAAAGKCAggAAAAAMUEBBAAAAICYoAACAAAAQExwGwgAsTO6X27oCACQFOYXgKgogABip0+3jqEjAEBSmF8AouIUUACxU3zwqIoPHg0dAwASxvwCWo/p89do+vw1oWMkjAIIIHY27CrVhl2loWMAQMKYXwCiogACmW7jYj2x50Y9++E10mMjpI2LQycCAABAKxW0AJrZFDPbambvmtmcM+z3BTNzM8tPZz6g1du4WHpxlvqc2Kt2cqlsl/TiLEogAAAAGhSsAJpZlqQnJV0jaZikL5nZsAb26ybpHyT9Kb0JgQyw4kGpsvzUtcry6nUAAACgnpBHAC+W9K67b3f3Y5IWSrqugf2+KelhSRXpDAdkhLKixNYBAAAQayELYJ6kXXW2i2rWapnZJyX1c/eX0xkMyBRHcs5NaB3VxvXvqXH9e4aOAQAJY34BiKrVXgTGzNpJ+r6krzdj39vNrMDMCoqLi1MfDmglvls5XUe8wylrR7yDvls5PVCizNCzSwf17NKh6R0BoJVhfgGIKmQB3C2pX53tvjVrJ3WTNELSKjPbIekSScsauhCMuy9w93x3z+/Tp08KIwOtyzOHLtacyttUdKK3Trip6ERvzam8Tc8cujh0tFbto7IKfVTGWeUAMg/zC0BU7QN+7TckDTazgaouftdLuuHkk+5eJqn3yW0zWyXpG+5ekOacQKt1fm6OlpVeqmXHLj1lPS83J1CizPD27jJJ0rk9OgVOAgCJYX4BiCrYEUB3Py5ppqRXJW2RtNjdN5nZg2b2uVC5gEwye/IQ5WRnnbKWk52l2ZOHBEoEAACA1izkEUC5+yuSXqm3dl8j+05IRyYgk0wdW33dpLuXbNSxqhPKy83R7MlDatcBAACAuoIWQADRTR2bp2fXvi9JWnTH+MBpAAAA0Jq12quAAgAAAABaFkcAAcTORQPPCh0BAJLC/AIQFQUQQOz0yMkOHQEAksL8AhAVp4ACiJ2i/UdUtP9I6BgAkDDmF4CoOAIIIHbe+fCgJKlvz86BkwBAYphfAKLiCCAAAAAAxAQFEAAAAABiggIIAAAAADFBAQQAAACAmOAiMABiZ/wneoWOAABJYX4BiIoCCCB2unRk9AHITMwvAFFxCiiA2NlZclg7Sw6HjgEACWN+AYiKXyMBiJ1tew5Jkvr36hI4CQAkhvkFICqOAAIAAABATFAAAQBAMNPnr9H0+WtCxwCA2KAAAgAAAEBMUAABAAAAICa4CAyA2Ll0cO/QEQAgKcwvAFFRAAHETqfsrNARACApzC8AUXEKKIDY2V58SNuLD4WOAQAJY34BiIoCCCB2thcf1vZibqQMIPMwvwBERQEEAAAAgJigAAIAAABATFAAAQAAACAmKIAAAAAAEBPcBgJA7EwY0id0BABICvMLQFQUQACx0z6Lkx8AZCbmF4ComCIAYmfbnoPatudg6BgAkDDmF4CoKIAAYmdnyRHtLDkSOgYAJIz5BSAqCiAAAAAAxAQFEAAAAABiggIIAAAAADFBAQQAAACAmOA2EABiZ+Kwc0JHAICkML8ARBX0CKCZTTGzrWb2rpnNaeD5fzKzzWa20cxWmFn/EDkBAAAAoC0IVgDNLEvSk5KukTRM0pfMbFi93dZJynf3UZKWSPpuelMCaIu2fHhAWz48EDoGACSM+QUgqpBHAC+W9K67b3f3Y5IWSrqu7g7uvtLdT97s5o+S+qY5I4A2aPf+cu3eXx46BgAkjPkFIKqQBTBP0q4620U1a425VdJvUpoIAAAAANqwjLgIjJn9T0n5kj7TyPO3S7pdki644II0JgOAtm/6/DWSpEV3jA+cBAAARBXyCOBuSf3qbPetWTuFmU2UdK+kz7n70YY+kbsvcPd8d8/v06dPSsICAAAAQKYLeQTwDUmDzWygqovf9ZJuqLuDmY2VNF/SFHffm/6IQGbgyExisrIsdAQASArzC0BUwQqgux83s5mSXpWUJekpd99kZg9KKnD3ZZIekdRV0q/MTJLed/fPhcoMoG24YsjZoSMAQFKYXwCiCvoeQHd/RdIr9dbuq/N4YtpDAQAAAEAbFfRG8AAQwtu7y/T27rLQMQAgYcwvAFE1qwCa2S+aswYAmeCjsgp9VFYROgYAJIz5BSCq5h4BHF53w8yyJI1r+TgAAAAAgFQ5YwE0s3vM7KCkUWZ2oObPQUl7Jb2QloQAAAAAgBZxxgLo7t9x926SHnH37jV/url7L3e/J00ZAQAAAAAtoLmngK41sx4nN8ws18ympiYSAKRWx+x26pjNNbAAZB7mF4ComnsbiPvd/fmTG+5eamb3S1qaklQAkEKXDe4TOgIAJIX5BSCq5v4KqaH9gt5DEAAAAACQmOYWwAIz+76ZfaLmz/clFaYyGACkyvpdpVq/qzR0DABIGPMLQFTNLYBflXRM0iJJCyVVSLorVaEAIJU+PnhUHx88GjoGACSM+QUgqmadxunuhyXNMbMuNY8BAAAAABmmWUcAzexTZrZZ0paa7dFm9n9TmgwAAAAA0KKaewroY5ImSyqRJHffIOnyVIUCAAAAALS8Zl/J0913mVndpaqWjwMAqde5Q1boCACQFOYXgKiaWwB3mdmnJLmZZUv6B9WcDgoAmeZTf9U7dAQASArzC0BUzT0F9E5VX/UzT9JuSWPEVUABAAAAIKOc8QigmT3s7v9b0hXu/uU0ZQKAlCrcuU+SNK7/WYGTAEBimF8AomrqCOB/t+o3/t2TjjAAkA77D1dq/+HK0DEAIGHMLwBRNfUewP8nab+krmZ2QJJJ8pN/u3v3FOcDAAAAALSQpo4AznX3XEkvu3t3d+9W9+805AMAAAAAtJCmCuCamr8PpDoIAAAAACC1mjoFtIOZ3SDpU2b2+fpPuvtzqYkFAKnTrVOzb4EKAK0K8ysx0+dXH8tYdMf4wEmA1qOpKXKnpC9LypX0N/Wec0kUQAAZ578N6hU6AgAkhfkFIKozFkB3Xy1ptZkVuPvP0pQJAAAAAJACZ3wPoJndLUnu/jMz+2K9576dymAAkCp/2l6iP20vCR0DABLG/AIQVVMXgbm+zuP69wKc0sJZACAtDlYc18GK46FjAEDCmF8AomqqAFojjxvaBgAAAAC0Yk0VQG/kcUPbAAAAAIBWrKkCONrMDpjZQUmjah6f3B6ZhnwAAAAA0KosXbdb694v1Z/e26dPP/S6lq7bHTpSszV1FdCsdAUBgHTp2SU7dISMcfIH3LGqE/r0Q69r9uQhmjo2L3QsILaYX0B4S9ft1j3PvaVjVSckSbtLy3XPc29JUkb8jORuogBiZ1z/s0JHyAiZ/gMOaIuYX0B4j7y6VeWVVaeslVdW6ZFXt2bEz8emTgEFAMTUmX7AAQAQVx+Ulie03tpQAAHEzh/e/Vh/ePfj0DFavUz/AQe0RcwvILzzc3MSWm9tKIAAYufIsSodOVbV9I4xl+k/4IC2iPkFhDd78hDlZJ96qZSc7CzNnjwkUKLEUAABAA3K9B9wAACkwtSxefrO50eqQ1Z1lcrLzdF3Pj8yI97/J3ERGABAI07+ILt7yUYdqzqhvNwcrgIKAICqf0Y+u/Z9SdKiO8YHTpOYoEcAzWyKmW01s3fNbE4Dz3c0s0U1z//JzAYEiAkAsTU16/f6fcdZ2t7py/p9x1mamvX70JHQlmxcrCf23KhnP7xGemyEtHFx6EQA0OYFOwJoZlmSnpQ0SVKRpDfMbJm7b66z262S9rv7X5nZ9ZIeljQ9/WkBtCW9u3UMHSEzbFwsvThLfU7UXPSlbJf04qzqx6OmhcuFtoHvr6QwvwBEFfII4MWS3nX37e5+TNJCSdfV2+c6Sc/UPF4i6SozszRmBNAGjemXqzH9ckPHaP1WPChV1rviZ2V59ToQFd9fSWF+AYgqZAHMk7SrznZRzVqD+7j7cUllknrV/0RmdruZFZhZQXFxcYriAkDMlBUltg4kgu8vAAiiTVwF1N0XuHu+u+f36dMndBwArdzvthXrd9v4ZVGTevRNbB1IBN9fSWF+AYgqZAHcLalfne2+NWsN7mNm7SX1kFSSlnQA2qyjlSd0tPJE6Bit31X3Sdn17vmXnVO9DkTF91dSmF8AogpZAN+QNNjMBppZB0nXS1pWb59lkmbUPP4fkl53d09jRgCIr1HTpL95XMXtztYJmdSjn/Q3j3OBDrQMvr8AIIhgVwF19+NmNlPSq5KyJD3l7pvM7EFJBe6+TNLPJP3CzN6VtE/VJREAkC6jpmnmmuqTNTLtPkfIAHx/AUDaBb0RvLu/IumVemv31XlcIemL6c4FAAAAAI3auFhP7LlXvU4US4/1rT59PUPOYAhaAAEghHN7dAodAQCSwvwCWoEMv48pBRBA7IzI6xE6AgAkhfkFtAJnuo9pBhTANnEbCAAAAOAUGxfriT036tkPr5EeG1F91AZoCRl+H1MKIIDYWbl1r1Zu3Rs6BgAkjPnVTLWn6O1VO/l/naJHCURLyPD7mFIAAcROVZWrqoo7ygDIPMyvZjrTKXpAVBl+H1MKIAAAANqWDD9FD61cht/HlIvAAACAIJau261175fqWNUJffqh1zV78hBNHZsXOhbagh59q0/7bGgdaAkZfB9TjgACAIC0W7put+557i0dqzohSdpdWq57nntLS9ftDpwMbUKGn6IHpBIFEEDs5PXMUV7PnKZ3BJAyj7y6VeWVVaeslVdW6ZFXtwZKlBmYX82U4afoAanEKaAAYmfoed1DRwBi74PS8oTWUY35lYAMPkUPSCWOAAIAgLQ7P7fho1iNrQMAWgYFEEDsLN+8R8s37wkdA4i12ZOHKCc765S1nOwszZ48JFCizMD8AhAVp4ACAIC0O3m1z7uXbNSxqhPKy83hKqAAkAYUQAAAEMTUsXl6du37kniPFloetxkBGsYpoAAAAGhTuM0I0DgKIAAAANoUbjMCNI5TQAHETv9enUNHAICkML+ah9uMAI2jAAKIncHndAsdAQCSwvxqnvNzc7S7gbLHbUYATgEFEEPHq07oeM37QgAgkzC/mofbjACN4wgggNhZtbVYkjRx2DmBkwBAYphfzcNtRoDGUQABAADQ5nCbEaBhnAIKAAAAADFBAQQAAACAmKAAAgAAAEBM8B5AALEzqE+X0BEyCu+dAVoP5heAqCiAAGJnUJ+uoSMAQFKYXwCi4hRQALFTUVmlisqq0DEAIGHMLwBRUQABxM7qbR9r9baPQ8cAgIQxvwBERQEEAAAAgJigAAIAAABATFAAAQAAACAmKIAAAAAAEBPcBgJA7Aw+h8uoA8hMzC8AUVEAAcRO/17cSBlAZmJ+AYiKU0ABxM7ho8d1+Ojx0DEAIGHMLwBRBSmAZnaWmb1mZttq/u7ZwD5jzGyNmW0ys41mNj1EVgBtz5q/lGjNX0pCxwCAhDG/AEQV6gjgHEkr3H2wpBU12/UdkXSjuw+XNEXSD8wsN30RAQAAAKBtCVUAr5P0TM3jZyRNrb+Du//Z3bfVPP5A0l5JfdIVEAAAAADamlAF8Bx3/7Dm8UeSzjnTzmZ2saQOkv7SyPO3m1mBmRUUFxe3bFIAAAAAaCNSdhVQM1su6dwGnrq37oa7u5n5GT7PeZJ+IWmGu59oaB93XyBpgSTl5+c3+rkAAAAAIM5SVgDdfWJjz5nZHjM7z90/rCl4exvZr7uklyXd6+5/TFFUADHz1+d1Cx0BAJLC/AIQVahTQJdJmlHzeIakF+rvYGYdJD0v6efuviSN2QC0cX17dlbfnp1DxwCAhDG/AEQVqgA+JGmSmW2TNLFmW2aWb2Y/rdlnmqTLJd1kZutr/owJkhZAm1JWXqmy8srQMQAgYcwvAFGl7BTQM3H3EklXNbBeIOm2msf/Junf0hwNQAy88d4+SdLEYWe8/hQAtDrMLwBRhToCCAAAAABIMwogAAAAAMQEBRAAAAAAYoICCAAAAAAxEeQiMAAQ0oi8HqEjAEBSmF8AoqIAAoidc3t0Ch0BAJLC/AIQFaeAAoid/YePaf/hY6FjAEDCmF8AoqIAAoidwp37Vbhzf+gYAJAw5heAqCiAAAAAABATFEAAAAAAiAkKIAAAAADEBAUQAAAAAGKC20AAiJ3R/XJDRwCApDC/ErPojvGhIwCtDgUQQOz06dYxdAQASArzC0BUnAIKIHaKDx5V8cGjoWMAQMKYXwCiogACiJ0Nu0q1YVdp6BgAkDDmF4CoKIAAAAAAEBMUQAAAAACICQogAAAAAMQEBRAAAAAAYoLbQACInXH9e4aOAABJYX4BiIoCCCB2enbpEDoCACSF+QUgKk4BBRA7H5VV6KOyitAxACBhzC8AUXEEEEDsvL27TJJ0bo9OgZMAQGKYXwCi4gggAAAAAMQEBRAAAAAAYoICCAAAAAAxQQEEAAAAgJjgIjAAYueigWeFjgAASWF+Aa3HojvGh46QFAoggNjpkZMdOgIAJIX5BSAqTgEFEDtF+4+oaP+R0DEAIGHMLwBRcQQQQOy88+FBSVLfnp0DJwGAxDC/AETFEUAAAAAAiAkKIAAAAADEBAUQAAAAAGIiSAE0s7PM7DUz21bzd88z7NvdzIrM7Il0ZgQAAACAtibUEcA5kla4+2BJK2q2G/NNSf+ZllQAYmH8J3pp/Cd6hY4BAAljfgGIKlQBvE7SMzWPn5E0taGdzGycpHMk/Ud6YgGIgy4d26tLRy6CDCDzML8ARBWqAJ7j7h/WPP5I1SXvFGbWTtKjkr6RzmAA2r6dJYe1s+Rw6BgAkDDmF4CoUvYrJDNbLuncBp66t+6Gu7uZeQP7fUXSK+5eZGZNfa3bJd0uSRdccEFygQHExrY9hyRJ/Xt1CZwEABLD/AIQVcoKoLtPbOw5M9tjZue5+4dmdp6kvQ3sNl7SZWb2FUldJXUws0Puftr7Bd19gaQFkpSfn99QmQQAAACA2At1EvkySTMkPVTz9wv1d3D3L598bGY3ScpvqPwBAAAAAJon1HsAH5I0ycy2SZpYsy0zyzeznwbKBAAAAABtWpAjgO5eIumqBtYLJN3WwPrTkp5OeTAAAAAAaMO4jjCA2Ll0cO/QEQAgKcwvAFFRAAHETqfsrNARACApzC8AUYV6DyAABLO9+JC2Fx8KHQMAEsb8AhAVRwABxM724uqbKA/q0zVwEgCL7hgfOkJGYX4BiIojgAAAAAAQExRAAAAAAIgJCiAAAAAAxAQFEAAAAABigovAAIidCUP6hI4AAElhfgGIigIIIHbaZ3HyA4DMxPwCEBVTBEDsbNtzUNv2HAwdAwASxvwCEBUFEEDs7Cw5op0lR0LHAICEMb8AREUBBAAAAICYoAACAAAAQExQAAEAAAAgJiiAAAAAABAT5u6hM7QoMyuWtDN0jgb0lvRx6BAZhtcsMbxeSCW+vxLD65UYXi+kEt9fieH1Skxrfb36u3uDNw5tcwWwtTKzAnfPD50jk/CaJYbXC6nE91dieL0Sw+uFVOL7KzG8XonJxNeLU0ABAAAAICYogAAAAAAQExTA9FkQOkAG4jVLDK8XUonvr8TweiWG1wupxPdXYni9EpNxrxfvAQQAAACAmOAIIAAAAADEBAUwxczsKTPba2Zvh84CAIlihgHIVMwvoGEUwNR7WtKU0CEAIElPixkGIDM9LeYXcBoKYIq5+39K2hc6B9oGM7vTzH5UZ/tbZvaLkJnQtjHD0FKYX0g35hdaUluaYRRAILP8XNK1ZpZrZtdK+qyk2wNnAoDmYH4ByGRtZoa1Dx0AQPO5+xEze1bSP0u6RtIkdy8PHAsAmsT8ApDJ2tIMowACmecpSVskXefufwkdBgASwPwCkMnaxAyjAAKZ5z5Jxarz36+ZDZM0T1KJpBWSNtfddvclaU8JAKdjfgHIZG1ihvEewBSrOVS8RtIQMysys1tDZ0LmMrOvS+okaZqkf6jz1DWSfuju/0vSjQ1sA0lhhqGlML+QbswvtKS2NMPM3UNnANAMZnalpMcljXf3g2a2TtLN7r7ezM6WdL+kI5I+Jelv6267+6dD5QYA5heATNbWZhgFEMgAZnaBpFWSPuvuW2rWbpI0wd1vqrNflqTn3P26hrYBIN2YXwAyWVucYRRAoA0wswGS/o+kLpJ+JKmo7ra7rw6XDgAax/wCkMkycYZRAAEAAAAgJrgIDAAAAADEBAUQAAAAAGKCAggAAAAAMUEBBAAAAICYoAACAAAAQExQAAEAAAAgJiiAAAAAABATFEAAAAAAiAkKIAAAAADEBAUQAAAAAGKCAggAAAAAMUEBBAAAAICYoAACAAAAQExQAAEAAAAgJiiAAAAAABATFEAAAAAAiAkKIAAAAADEBAUQAAAAAGKCAggAAAAAMUEBBAAAAICYoAACAAAAQExQAAEAAAAgJiiAAAAAABATFEAAAAAAiAkKIAAAAADEBAUQrYaZZZnZv5jZJjN7y8wGhc4EAM3B/AKQqZhf8UMBRGtyj6Tt7j5c0uOSvhI4DwA0F/MLQKZifsVM+9ABAEkysy6S/tbdx9UsvSfpswEjAUCzML8AZCrmVzxRANFaTJTUz8zW12yfJWl5uDgA0GzMLwCZivkVQ5wCitZijKT73H2Mu4+R9B+S1ptZFzN7xsx+YmZfDpoQABo2Rg3Pr0Fm9jMzWxI0HQA0bowanl9Ta/7fa5GZXR00IVocBRCtRU9JRyTJzNpLulrSi5I+L2mJu/+9pM+FiwcAjWpwfrn7dne/NWgyADizxubX0pr/97pT0vSA+ZACFEC0Fn+WdEnN469Jetnd35PUV9KumvWqEMEAoAmNzS8AaO2aml9zJT2Z9lRIKQogWotnJX3SzN6VNErSP9WsF6m6BEp8vwJonRqbXwDQ2jU4v6zaw5J+4+5vhgyIlmfuHjoD0Kiaq1M9IalC0mp3/2XgSADQLGbWS9I/S5ok6afu/p3AkQCgWcxslqQZkt6QtN7dfxw4EloQBRAAAAAAYoJT6gAAAAAgJiiAAAAAABATFEAAAAAAiAkKIAAAAADEBAUQAAAAAGKCAggAAAAAMUEBBAAAAICYoAACAAAAQExQAAEAAAAgJv4/cD4IRmr/kb8AAAAASUVORK5CYII=\n", + "image/png": "\n", "text/plain": [ "
" ] From a39f1b5de1fb1442a32d310934271171698eea47 Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Thu, 5 Aug 2021 20:16:06 -0400 Subject: [PATCH 15/27] Reverse effect indices to match paper --- econml/dml/dynamic_dml.py | 90 +++++++++---------- econml/tests/dgp.py | 19 ++-- ...mic Double Machine Learning Examples.ipynb | 48 +++++----- 3 files changed, 76 insertions(+), 81 deletions(-) diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index bb162e75d..ce43834d0 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -160,18 +160,17 @@ def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, period_filters = _get_groups_period_filter(groups, self.n_periods) Y_res, T_res = nuisances self._d_y = Y.shape[1:] - for t in np.arange(self.n_periods): - period = self.n_periods - 1 - t - Y_adj = Y_res[period_filters[period]].copy() - if t > 0: + for t in np.arange(self.n_periods - 1, -1, -1): + Y_adj = Y_res[period_filters[t]].copy() + if t < self.n_periods - 1: Y_adj -= np.sum( [self._model_final_trained[j].predict_with_res( - X[period_filters[self.n_periods - 1 - j]] if X is not None else None, - T_res[period_filters[self.n_periods - 1 - j], ..., period] - ) for j in np.arange(t)], axis=0) + X[period_filters[j]] if X is not None else None, + T_res[period_filters[j], ..., t] + ) for j in np.arange(t + 1, self.n_periods)], axis=0) self._model_final_trained[t].fit( - X[period_filters[period]] if X is not None else None, T[period_filters[period]], - T_res[period_filters[period], ..., period], Y_adj) + X[period_filters[t]] if X is not None else None, T[period_filters[t]], + T_res[period_filters[t], ..., t], Y_adj) return self @@ -200,18 +199,17 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None Y_res, T_res = nuisances scores = np.full((self.n_periods, ), np.nan) period_filters = _get_groups_period_filter(groups, self.n_periods) - for t in np.arange(self.n_periods): - period = self.n_periods - 1 - t - Y_adj = Y_res[period_filters[period]].copy() - if t > 0: + for t in np.arange(self.n_periods - 1, -1, -1): + Y_adj = Y_res[period_filters[t]].copy() + if t < self.n_periods - 1: Y_adj -= np.sum( [self._model_final_trained[j].predict_with_res( - X[period_filters[self.n_periods - 1 - j]] if X is not None else None, - T_res[period_filters[self.n_periods - 1 - j], ..., period] - ) for j in np.arange(t)], axis=0) + X[period_filters[j]] if X is not None else None, + T_res[period_filters[j], ..., t] + ) for j in np.arange(t + 1, self.n_periods)], axis=0) Y_adj_pred = self._model_final_trained[t].predict_with_res( - X[period_filters[period]] if X is not None else None, - T_res[period_filters[period], ..., period]) + X[period_filters[t]] if X is not None else None, + T_res[period_filters[t], ..., t]) if sample_weight is not None: scores[t] = np.mean(np.average((Y_adj - Y_adj_pred)**2, weights=sample_weight, axis=0)) else: @@ -261,7 +259,6 @@ def _fit_single_output_cov(self, nuisances, X, y_index, groups): """ Calculates the covariance (n_periods*n_treatments) x (n_periods*n_treatments) matrix for a single outcome. """ - # TODO: add group filters here Y_res, T_res = nuisances # Calculate auxiliary quantities period_filters = _get_groups_period_filter(groups, self.n_periods) @@ -282,10 +279,9 @@ def _fit_single_output_cov(self, nuisances, X, y_index, groups): Y_diff = np.array([ np.sum([ self._model_final_trained[j].predict_with_res( - X[period_filters[self.n_periods - 1 - j]] if X is not None else None, - T_res[ - period_filters[self.n_periods - 1 - j], ..., self.n_periods - 1 - t] - ) for j in np.arange(t + 1)], + X[period_filters[j]] if X is not None else None, + T_res[period_filters[j], ..., t] + ) for j in np.arange(t, self.n_periods)], axis=0 ) for t in np.arange(self.n_periods) @@ -295,28 +291,24 @@ def _fit_single_output_cov(self, nuisances, X, y_index, groups): Sigma = np.zeros((self.n_periods * d_xt, self.n_periods * d_xt)) for t in np.arange(self.n_periods): - period_t = self.n_periods - 1 - t - period_filter_t = period_filters[period_t] - res_epsilon_t = (Y_res[period_filter_t] - + res_epsilon_t = (Y_res[period_filters[t]] - (Y_diff[t][:, y_index] if y_index >= 0 else Y_diff[t]) ).reshape(-1, 1, 1) - resT_t = XT_res[period_t][period_t] + resT_t = XT_res[t][t] for j in np.arange(self.n_periods): # Calculating the (t, j) block entry (of size n_treatments x n_treatments) of matrix Sigma - period_j = self.n_periods - 1 - j - period_filter_j = period_filters[period_j] - res_epsilon_j = (Y_res[period_filter_j] - + res_epsilon_j = (Y_res[period_filters[j]] - (Y_diff[j][:, y_index] if y_index >= 0 else Y_diff[j]) ).reshape(-1, 1, 1) - resT_j = XT_res[period_j][period_j] + resT_j = XT_res[j][j] cov_resT_tj = resT_t.reshape(-1, d_xt, 1) @ resT_j.reshape(-1, 1, d_xt) sigma_tj = np.mean((res_epsilon_t * res_epsilon_j) * cov_resT_tj, axis=0) Sigma[t * d_xt:(t + 1) * d_xt, j * d_xt:(j + 1) * d_xt] = sigma_tj - if j <= t: + if j >= t: # Calculating the (t, j) block entry (of size n_treatments x n_treatments) of matrix J m_tj = np.mean( - XT_res[period_j][period_t].reshape(-1, d_xt, 1) @ resT_t.reshape(-1, 1, d_xt), + XT_res[j][t].reshape(-1, d_xt, 1) @ resT_t.reshape(-1, 1, d_xt), axis=0) J[t * d_xt:(t + 1) * d_xt, j * d_xt:(j + 1) * d_xt] = m_tj @@ -431,34 +423,34 @@ class DynamicDML(LinearModelFinalCateEstimatorMixin, _OrthoLearner): est.fit(y, T, X=X, W=None, groups=groups, inference="auto") >>> est.const_marginal_effect(X[:2]) - array([[-0.012..., 0.031..., 0.069..., 0.111..., -0.349..., - -0.076...], - [-0.411..., -0.088..., 0.021..., -0.171..., -0.126... , - 0.397...]]) + array([[-0.349..., -0.076..., 0.069..., 0.111..., -0.012..., + 0.031...], + [-0.126... , 0.397..., 0.021..., -0.171..., -0.411..., + -0.088...]]) >>> est.effect(X[:2], T0=0, T1=1) array([-0.225..., -0.378...]) >>> est.effect(X[:2], T0=np.zeros((2, n_periods*T.shape[1])), T1=np.ones((2, n_periods*T.shape[1]))) array([-0.225..., -0.378...]) >>> est.coef_ - array([[-0.191...], - [-0.057...], + array([[ 0.107...], + [ 0.227...], [-0.023...], [-0.136...], - [ 0.107...], - [ 0.227...]]) + [-0.191...], + [-0.057...]]) >>> est.coef__interval() - (array([[-0.333...], - [-0.171...], + (array([[-0.051...], + [ 0.040...], [-0.154...], [-0.336...], - [-0.051...], - [ 0.040...]]), - array([[-0.050...], - [ 0.056...], + [-0.333...], + [-0.171...]]), + array([[ 0.265...], + [ 0.415...], [ 0.108...], [ 0.064...], - [ 0.265...], - [ 0.415...]])) + [-0.050...], + [ 0.056...]])) """ def __init__(self, *, diff --git a/econml/tests/dgp.py b/econml/tests/dgp.py index 98d4e6a81..333a326f2 100644 --- a/econml/tests/dgp.py +++ b/econml/tests/dgp.py @@ -93,28 +93,31 @@ def create_instance(self, s_x, sigma_x=.5, sigma_y=.5, conf_str=5, hetero_streng len(self.hetero_inds) self.true_effect = np.zeros((self.n_periods, self.n_treatments)) - self.true_effect[0] = self.epsilon - for t in np.arange(1, self.n_periods): + # Invert indices to match latest API + self.true_effect[self.n_periods - 1] = self.epsilon + for t in np.arange(self.n_periods - 2, -1, -1): self.true_effect[t, :] = (self.zeta.reshape( - 1, -1) @ np.linalg.matrix_power(self.Beta, t - 1) @ self.Alpha) + 1, -1) @ np.linalg.matrix_power(self.Beta, (self.n_periods - 1 - t) - 1) @ self.Alpha) self.true_hetero_effect = np.zeros( (self.n_periods, (self.n_x + 1) * self.n_treatments)) - self.true_hetero_effect[0, :] = cross_product( + self.true_hetero_effect[self.n_periods - 1, :] = cross_product( add_constant(self.y_hetero_effect.reshape(1, -1), has_constant='add'), self.epsilon.reshape(1, -1)) - for t in np.arange(1, self.n_periods): + for t in np.arange(self.n_periods - 2, -1, -1): + # Invert indices to match latest API self.true_hetero_effect[t, :] = cross_product( add_constant(self.x_hetero_effect.reshape(1, -1), has_constant='add'), - self.zeta.reshape(1, -1) @ np.linalg.matrix_power(self.Beta, t - 1) @ self.Alpha) + self.zeta.reshape(1, -1) @ np.linalg.matrix_power( + self.Beta, (self.n_periods - 1 - t) - 1) @ self.Alpha) return self def hetero_effect_fn(self, t, x): - if t == 0: + if t == self.n_periods - 1: return (np.dot(self.y_hetero_effect, x.flatten()) + 1) * self.epsilon else: return (np.dot(self.x_hetero_effect, x.flatten()) + 1) *\ - (self.zeta.reshape(1, -1) @ np.linalg.matrix_power(self.Beta, t - 1) + (self.zeta.reshape(1, -1) @ np.linalg.matrix_power(self.Beta, (self.n_periods - 1 - t) - 1) @ self.Alpha).flatten() def _gen_data_with_policy(self, n_units, policy_gen, random_seed=123): diff --git a/notebooks/Dynamic Double Machine Learning Examples.ipynb b/notebooks/Dynamic Double Machine Learning Examples.ipynb index 0eb1f1d6e..e07d966f5 100644 --- a/notebooks/Dynamic Double Machine Learning Examples.ipynb +++ b/notebooks/Dynamic Double Machine Learning Examples.ipynb @@ -185,7 +185,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -224,7 +224,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Effect of target policy over baseline policy: 4.62\n" + "Effect of target policy over baseline policy: 4.79\n" ] } ], @@ -246,9 +246,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Effect of a treatment in period 1 on period 3 outcome: 0.69\n", + "Effect of a treatment in period 1 on period 3 outcome: 0.61\n", "Effect of a treatment in period 2 on period 3 outcome: 1.05\n", - "Effect of a treatment in period 3 on period 3 outcome: 0.61\n" + "Effect of a treatment in period 3 on period 3 outcome: 0.69\n" ] } ], @@ -279,13 +279,13 @@ " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "\n", "\n", - " cate_intercept|(T0)$_0$ 0.692 0.031 22.308 0.0 0.641 0.743 \n", + " cate_intercept|(T0)$_0$ 0.61 0.09 6.74 0.0 0.461 0.758 \n", "\n", "\n", " cate_intercept|(T0)$_1$ 1.05 0.067 15.565 0.0 0.939 1.161 \n", "\n", "\n", - " cate_intercept|(T0)$_2$ 0.61 0.09 6.74 0.0 0.461 0.758 \n", + " cate_intercept|(T0)$_2$ 0.692 0.031 22.308 0.0 0.641 0.743 \n", "\n", "

A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], @@ -296,9 +296,9 @@ "=============================================================================\n", " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------------------\n", - "cate_intercept|(T0)$_0$ 0.692 0.031 22.308 0.0 0.641 0.743\n", + "cate_intercept|(T0)$_0$ 0.61 0.09 6.74 0.0 0.461 0.758\n", "cate_intercept|(T0)$_1$ 1.05 0.067 15.565 0.0 0.939 1.161\n", - "cate_intercept|(T0)$_2$ 0.61 0.09 6.74 0.0 0.461 0.758\n", + "cate_intercept|(T0)$_2$ 0.692 0.031 22.308 0.0 0.641 0.743\n", "-----------------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", @@ -342,7 +342,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -437,7 +437,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 16, @@ -463,13 +463,13 @@ " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "\n", "\n", - " X0|(T0)$_0$ 0.392 0.073 5.366 0.0 0.272 0.512 \n", + " X0|(T0)$_0$ 0.115 0.119 0.968 0.333 -0.081 0.311 \n", "\n", "\n", " X0|(T0)$_1$ -0.041 0.12 -0.339 0.735 -0.239 0.157 \n", "\n", "\n", - " X0|(T0)$_2$ 0.115 0.119 0.968 0.333 -0.081 0.311 \n", + " X0|(T0)$_2$ 0.392 0.073 5.366 0.0 0.272 0.512 \n", "\n", "\n", "\n", @@ -478,13 +478,13 @@ " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", " \n", "\n", "\n", - " \n", + " \n", "\n", "
point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|(T0)$_0$ 0.602 0.034 17.813 0.0 0.547 0.658cate_intercept|(T0)$_0$ -0.085 0.06 -1.411 0.158 -0.184 0.014
cate_intercept|(T0)$_1$ -0.027 0.059 -0.462 0.644 -0.125 0.07
cate_intercept|(T0)$_2$ -0.085 0.06 -1.411 0.158 -0.184 0.014cate_intercept|(T0)$_2$ 0.602 0.034 17.813 0.0 0.547 0.658


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], @@ -495,16 +495,16 @@ "=================================================================\n", " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------\n", - "X0|(T0)$_0$ 0.392 0.073 5.366 0.0 0.272 0.512\n", + "X0|(T0)$_0$ 0.115 0.119 0.968 0.333 -0.081 0.311\n", "X0|(T0)$_1$ -0.041 0.12 -0.339 0.735 -0.239 0.157\n", - "X0|(T0)$_2$ 0.115 0.119 0.968 0.333 -0.081 0.311\n", + "X0|(T0)$_2$ 0.392 0.073 5.366 0.0 0.272 0.512\n", " CATE Intercept Results \n", "=============================================================================\n", " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------------------\n", - "cate_intercept|(T0)$_0$ 0.602 0.034 17.813 0.0 0.547 0.658\n", + "cate_intercept|(T0)$_0$ -0.085 0.06 -1.411 0.158 -0.184 0.014\n", "cate_intercept|(T0)$_1$ -0.027 0.059 -0.462 0.644 -0.125 0.07\n", - "cate_intercept|(T0)$_2$ -0.085 0.06 -1.411 0.158 -0.184 0.014\n", + "cate_intercept|(T0)$_2$ 0.602 0.034 17.813 0.0 0.547 0.658\n", "-----------------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", @@ -553,8 +553,8 @@ "output_type": "stream", "text": [ "Effect of target policy over baseline policy for test set:\n", - " [ 0.52203836 0.5995183 0.05267201 0.32145103 0.32698715 0.78083164\n", - " -0.46289232 -0.01105092 0.47496678 -0.20049582]\n" + " [2.08972181 2.23249102 1.22483907 1.72010749 1.73030869 2.56658993\n", + " 0.27482914 1.10741935 2.00298489 0.75833676]\n" ] } ], @@ -575,10 +575,10 @@ { "data": { "text/plain": [ - "(array([ 0.60246595, -0.02731669, -0.08491363]),\n", - " array([[ 0.39168147],\n", + "(array([-0.08491363, -0.02731669, 0.60246595]),\n", + " array([[ 0.11529833],\n", " [-0.04079943],\n", - " [ 0.11529833]]))" + " [ 0.39168147]]))" ] }, "execution_count": 20, @@ -616,7 +616,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] From 4210d1db2d36df3dea4bfb97522622c299585577 Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Thu, 5 Aug 2021 20:33:07 -0400 Subject: [PATCH 16/27] Add sample code to README --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index f841b040a..cbc77cfdd 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ For information on use cases and background material on causal inference and het - [Interpretability](#interpretability) - [Causal Model Selection and Cross-Validation](#causal-model-selection-and-cross-validation) - [Inference](#inference) + - [Policy Learning](#policy-learning) - [For Developers](#for-developers) - [Running the tests](#running-the-tests) - [Generating the documentation](#generating-the-documentation) @@ -158,6 +159,25 @@ To install from source, see [For Developers](#for-developers) section below. +
+ Dynamic Double Machine Learning (click to expand) + + ```Python + from econml.dml import DynamicDML + # Use defaults + est = DynamicDML() + # Or specify hyperparameters + est = DynamicDML(model_y=LassoCV(cv=3), + model_t=LassoCV(cv=3), + cv=3) + est.fit(Y, T, X=X, W=None, groups=groups, inference="auto") + # Effects + treatment_effects = est.effect(X_test) + # Confidence intervals + lb, ub = est.effect_interval(X_test, alpha=0.05) + ``` +
+
Causal Forests (click to expand) From 4cc11560448993ba70c4f493cc24ab7b7066f483 Mon Sep 17 00:00:00 2001 From: Miruna Oprescu Date: Thu, 5 Aug 2021 23:13:30 -0400 Subject: [PATCH 17/27] Adjust heterogeneity to depend only on features from the first period --- econml/dml/dynamic_dml.py | 60 +++++++++---------- econml/tests/dgp.py | 6 +- econml/tests/test_dynamic_dml.py | 20 +++---- ...mic Double Machine Learning Examples.ipynb | 58 +++++++++--------- 4 files changed, 74 insertions(+), 70 deletions(-) diff --git a/econml/dml/dynamic_dml.py b/econml/dml/dynamic_dml.py index ce43834d0..5ecd69ef2 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dml/dynamic_dml.py @@ -165,11 +165,11 @@ def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, if t < self.n_periods - 1: Y_adj -= np.sum( [self._model_final_trained[j].predict_with_res( - X[period_filters[j]] if X is not None else None, + X[period_filters[0]] if X is not None else None, T_res[period_filters[j], ..., t] ) for j in np.arange(t + 1, self.n_periods)], axis=0) self._model_final_trained[t].fit( - X[period_filters[t]] if X is not None else None, T[period_filters[t]], + X[period_filters[0]] if X is not None else None, T[period_filters[t]], T_res[period_filters[t], ..., t], Y_adj) return self @@ -204,11 +204,11 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None if t < self.n_periods - 1: Y_adj -= np.sum( [self._model_final_trained[j].predict_with_res( - X[period_filters[j]] if X is not None else None, + X[period_filters[0]] if X is not None else None, T_res[period_filters[j], ..., t] ) for j in np.arange(t + 1, self.n_periods)], axis=0) Y_adj_pred = self._model_final_trained[t].predict_with_res( - X[period_filters[t]] if X is not None else None, + X[period_filters[0]] if X is not None else None, T_res[period_filters[t], ..., t]) if sample_weight is not None: scores[t] = np.mean(np.average((Y_adj - Y_adj_pred)**2, weights=sample_weight, axis=0)) @@ -266,7 +266,7 @@ def _fit_single_output_cov(self, nuisances, X, y_index, groups): XT_res = np.array([ [ self._model_final_trained[0]._combine( - X[period_filters[j]] if X is not None else None, + X[period_filters[0]] if X is not None else None, T_res[period_filters[t], ..., j], fitting=False ) @@ -279,7 +279,7 @@ def _fit_single_output_cov(self, nuisances, X, y_index, groups): Y_diff = np.array([ np.sum([ self._model_final_trained[j].predict_with_res( - X[period_filters[j]] if X is not None else None, + X[period_filters[0]] if X is not None else None, T_res[period_filters[j], ..., t] ) for j in np.arange(t, self.n_periods)], axis=0 @@ -423,34 +423,34 @@ class DynamicDML(LinearModelFinalCateEstimatorMixin, _OrthoLearner): est.fit(y, T, X=X, W=None, groups=groups, inference="auto") >>> est.const_marginal_effect(X[:2]) - array([[-0.349..., -0.076..., 0.069..., 0.111..., -0.012..., - 0.031...], - [-0.126... , 0.397..., 0.021..., -0.171..., -0.411..., - -0.088...]]) + array([[-0.336..., -0.048..., -0.061..., 0.042..., -0.204..., + 0.00667271], + [-0.101..., 0.433..., 0.054..., -0.217..., -0.101..., + -0.159...]]) >>> est.effect(X[:2], T0=0, T1=1) - array([-0.225..., -0.378...]) + array([-0.601..., -0.091...]) >>> est.effect(X[:2], T0=np.zeros((2, n_periods*T.shape[1])), T1=np.ones((2, n_periods*T.shape[1]))) - array([-0.225..., -0.378...]) + array([-0.601..., -0.091...]) >>> est.coef_ - array([[ 0.107...], - [ 0.227...], - [-0.023...], - [-0.136...], - [-0.191...], - [-0.057...]]) + array([[ 0.112...], + [ 0.231...], + [ 0.055...], + [-0.125...], + [ 0.049...], + [-0.079...]]) >>> est.coef__interval() - (array([[-0.051...], - [ 0.040...], - [-0.154...], - [-0.336...], - [-0.333...], - [-0.171...]]), - array([[ 0.265...], - [ 0.415...], - [ 0.108...], - [ 0.064...], - [-0.050...], - [ 0.056...]])) + (array([[-0.035...], + [ 0.029...], + [-0.087... ], + [-0.366... ], + [-0.090...], + [-0.233...]]), + array([[0.260...], + [0.433... ], + [0.198...], + [0.116...], + [0.189...], + [0.074...]])) """ def __init__(self, *, diff --git a/econml/tests/dgp.py b/econml/tests/dgp.py index 333a326f2..e4057b44a 100644 --- a/econml/tests/dgp.py +++ b/econml/tests/dgp.py @@ -130,13 +130,17 @@ def _gen_data_with_policy(self, n_units, policy_gen, random_seed=123): period = t % self.n_periods if period == 0: X[t] = np.random.normal(0, self.sigma_x, size=self.n_x) + const_x0 = X[t][self.hetero_inds] T[t] = policy_gen(np.zeros(self.n_treatments), X[t], period) else: X[t] = (np.dot(self.x_hetero_effect, X[t - 1]) + 1) * np.dot(self.Alpha, T[t - 1]) + \ np.dot(self.Beta, X[t - 1]) + \ np.random.normal(0, self.sigma_x, size=self.n_x) + # The feature for heterogeneity stays constant + X_t = X[t].copy() + X_t[self.hetero_inds] = const_x0 T[t] = policy_gen(T[t - 1], X[t], period) - Y[t] = (np.dot(self.y_hetero_effect, X[t]) + 1) * np.dot(self.epsilon, T[t]) + \ + Y[t] = (np.dot(self.y_hetero_effect, X_t if period != 0 else X[t]) + 1) * np.dot(self.epsilon, T[t]) + \ np.dot(X[t], self.zeta) + \ np.random.normal(0, self.sigma_y) groups[t] = t // self.n_periods diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py index 7f26743ce..380c4a777 100644 --- a/econml/tests/test_dynamic_dml.py +++ b/econml/tests/test_dynamic_dml.py @@ -16,6 +16,7 @@ from econml.tests.dgp import DynamicPanelDGP +@pytest.mark.dml class TestDynamicDML(unittest.TestCase): def test_cate_api(self): @@ -253,7 +254,7 @@ def make_random(n, is_discrete, d): def test_perf(self): np.random.seed(123) - n_units = 400 + n_units = 1000 n_periods = 3 n_treatments = 1 n_x = 100 @@ -261,15 +262,14 @@ def test_perf(self): s_t = 10 hetero_strength = .5 hetero_inds = np.arange(n_x - n_treatments, n_x) - alpha_regs = [1e-4, 1e-3, 1e-2, 5e-2, .1, 1] def lasso_model(): - return LassoCV(cv=3, alphas=alpha_regs, max_iter=500) + return LassoCV(cv=3) # No heterogeneity dgp = DynamicPanelDGP(n_periods, n_treatments, n_x).create_instance( - s_x, random_seed=1) - Y, T, X, W, groups = dgp.observational_data(n_units, s_t=s_t, random_seed=12) + s_x, random_seed=12345) + Y, T, X, W, groups = dgp.observational_data(n_units, s_t=s_t, random_seed=12345) est = DynamicDML(model_y=lasso_model(), model_t=lasso_model(), cv=3) # Define indices to test groups_filter = _get_groups_period_filter(groups, 3) @@ -278,17 +278,17 @@ def lasso_model(): for test_idx in test_indices: est.fit(Y[test_idx], T[test_idx], X=X[test_idx] if X is not None else None, W=W[test_idx], groups=groups[test_idx], inference="auto") - np.testing.assert_allclose(est.intercept_, dgp.true_effect.flatten(), atol=1e-01) + np.testing.assert_allclose(est.intercept_, dgp.true_effect.flatten(), atol=0.2) np.testing.assert_array_less(est.intercept__interval()[0], dgp.true_effect.flatten()) np.testing.assert_array_less(dgp.true_effect.flatten(), est.intercept__interval()[1]) # Heterogeneous effects dgp = DynamicPanelDGP(n_periods, n_treatments, n_x).create_instance( - s_x, hetero_strength=hetero_strength, hetero_inds=hetero_inds, random_seed=1) - Y, T, X, W, groups = dgp.observational_data(n_units, s_t=s_t, random_seed=12) + s_x, hetero_strength=hetero_strength, hetero_inds=hetero_inds, random_seed=12) + Y, T, X, W, groups = dgp.observational_data(n_units, s_t=s_t, random_seed=1) + hetero_strength = .5 + hetero_inds = np.arange(n_x - n_treatments, n_x) for test_idx in test_indices: - hetero_strength = .5 - hetero_inds = np.arange(n_x - n_treatments, n_x) est.fit(Y[test_idx], T[test_idx], X=X[test_idx], W=W[test_idx], groups=groups[test_idx], inference="auto") np.testing.assert_allclose(est.intercept_, dgp.true_effect.flatten(), atol=0.2) np.testing.assert_allclose(est.coef_, dgp.true_hetero_effect[:, hetero_inds + 1], atol=0.2) diff --git a/notebooks/Dynamic Double Machine Learning Examples.ipynb b/notebooks/Dynamic Double Machine Learning Examples.ipynb index e07d966f5..ecfd78ee1 100644 --- a/notebooks/Dynamic Double Machine Learning Examples.ipynb +++ b/notebooks/Dynamic Double Machine Learning Examples.ipynb @@ -110,7 +110,7 @@ "\n", "We consider a data generating process from a markovian treatment model. \n", "\n", - "In the example bellow, $T_t\\rightarrow$ treatment(s) at time $t$, $Y_t\\rightarrow$outcome at time $t$, $X_t\\rightarrow$ features and controls at time $t$ (the coefficients $e, f'$ will pick the features and the controls).\n", + "In the example bellow, $T_t\\rightarrow$ treatment(s) at time $t$, $Y_t\\rightarrow$outcome at time $t$, $X_t\\rightarrow$ features and controls at time $t$ (the coefficients $e, f$ will pick the features and the controls).\n", "\\begin{align}\n", " X_t =& (\\pi'X_{t-1} + 1) \\cdot A\\, T_{t-1} + B X_{t-1} + \\epsilon_t\\\\\n", " T_t =& \\gamma\\, T_{t-1} + (1-\\gamma) \\cdot D X_t + \\zeta_t\\\\\n", @@ -185,7 +185,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -437,7 +437,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 16, @@ -460,16 +460,16 @@ "\n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "
Coefficient Results
point_estimate stderr zstat pvalue ci_lower ci_upper point_estimate stderr zstat pvalue ci_lower ci_upper
X0|(T0)$_0$ 0.115 0.119 0.968 0.333 -0.081 0.311X0|(T0)$_0$ 0.112 0.119 0.947 0.344 -0.083 0.308
X0|(T0)$_1$ -0.041 0.12 -0.339 0.735 -0.239 0.157X0|(T0)$_1$ 0.092 0.128 0.719 0.472 -0.119 0.303
X0|(T0)$_2$ 0.392 0.073 5.366 0.0 0.272 0.512X0|(T0)$_2$ 0.443 0.076 5.85 0.0 0.318 0.567
\n", "\n", @@ -478,33 +478,33 @@ " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "
point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|(T0)$_0$ -0.085 0.06 -1.411 0.158 -0.184 0.014cate_intercept|(T0)$_0$ -0.082 0.061 -1.347 0.178 -0.182 0.018
cate_intercept|(T0)$_1$ -0.027 0.059 -0.462 0.644 -0.125 0.07cate_intercept|(T0)$_1$ -0.035 0.059 -0.591 0.554 -0.133 0.063
cate_intercept|(T0)$_2$ 0.602 0.034 17.813 0.0 0.547 0.658cate_intercept|(T0)$_2$ 0.592 0.035 17.006 0.0 0.535 0.65


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], "text/plain": [ "\n", "\"\"\"\n", - " Coefficient Results \n", - "=================================================================\n", - " point_estimate stderr zstat pvalue ci_lower ci_upper\n", - "-----------------------------------------------------------------\n", - "X0|(T0)$_0$ 0.115 0.119 0.968 0.333 -0.081 0.311\n", - "X0|(T0)$_1$ -0.041 0.12 -0.339 0.735 -0.239 0.157\n", - "X0|(T0)$_2$ 0.392 0.073 5.366 0.0 0.272 0.512\n", + " Coefficient Results \n", + "================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "----------------------------------------------------------------\n", + "X0|(T0)$_0$ 0.112 0.119 0.947 0.344 -0.083 0.308\n", + "X0|(T0)$_1$ 0.092 0.128 0.719 0.472 -0.119 0.303\n", + "X0|(T0)$_2$ 0.443 0.076 5.85 0.0 0.318 0.567\n", " CATE Intercept Results \n", "=============================================================================\n", " point_estimate stderr zstat pvalue ci_lower ci_upper\n", "-----------------------------------------------------------------------------\n", - "cate_intercept|(T0)$_0$ -0.085 0.06 -1.411 0.158 -0.184 0.014\n", - "cate_intercept|(T0)$_1$ -0.027 0.059 -0.462 0.644 -0.125 0.07\n", - "cate_intercept|(T0)$_2$ 0.602 0.034 17.813 0.0 0.547 0.658\n", + "cate_intercept|(T0)$_0$ -0.082 0.061 -1.347 0.178 -0.182 0.018\n", + "cate_intercept|(T0)$_1$ -0.035 0.059 -0.591 0.554 -0.133 0.063\n", + "cate_intercept|(T0)$_2$ 0.592 0.035 17.006 0.0 0.535 0.65\n", "-----------------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", @@ -533,13 +533,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Average effect of default policy:0.45\n" + "Average effect of default policy:0.42\n" ] } ], "source": [ "# Average treatment effect for test points\n", - "X_test = X[:10]\n", + "X_test = X[np.arange(0, 25, 3)]\n", "print(f\"Average effect of default policy:{est.ate(X=X_test):0.2f}\")" ] }, @@ -553,8 +553,8 @@ "output_type": "stream", "text": [ "Effect of target policy over baseline policy for test set:\n", - " [2.08972181 2.23249102 1.22483907 1.72010749 1.73030869 2.56658993\n", - " 0.27482914 1.10741935 2.00298489 0.75833676]\n" + " [ 2.1924814 1.69559976 -0.24732358 0.40266764 2.33464274 0.81404248\n", + " 2.25540586 1.52509443 2.44106892]\n" ] } ], @@ -575,10 +575,10 @@ { "data": { "text/plain": [ - "(array([-0.08491363, -0.02731669, 0.60246595]),\n", - " array([[ 0.11529833],\n", - " [-0.04079943],\n", - " [ 0.39168147]]))" + "(array([-0.08178444, -0.03511038, 0.59245168]),\n", + " array([[0.11235101],\n", + " [0.09227667],\n", + " [0.44267996]]))" ] }, "execution_count": 20, @@ -616,7 +616,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4AAAAFWCAYAAAA4zPNMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAApVklEQVR4nO3de3iV5Z3u8ftHCCScEuSgkiCHjjKcoURHqqWIKLjVyrS7aJ0ZqdoRd6XM9EAHtlyIttPqtq0dqtNKW8d2piNQinjebEXorrtUDHIqIEU5SKJCDCRySCAJz/4jMRNCIOtdb1aerDzfz3VxudazXrLuLpe/cvOezDknAAAAAED718F3AAAAAABA66AAAgAAAEAgKIAAAAAAEAgKIAAAAAAEggIIAAAAAIGgAAIAAABAIDr6DtDSevfu7QYOHOg7BoA27KPKKklSj6xMz0kAIBrmF4BEbNiw4UPnXJ+mXmt3BXDgwIEqLCz0HQNAG7Zpf5kkaUz/XK85ACAq5heARJjZvrO91u4KIAA0hz84AUhXzC8AcXEOIAAAAAAEggIIIDi/31Wi3+8q8R0DACJjfgGIi0NAAQTnRNUp3xEAICnML/hUVVWloqIiVVZW+o6COllZWcrPz1dmZuIXhqIAAgAAAGhWUVGRunfvroEDB8rMfMcJnnNOpaWlKioq0qBBgxL+fRwCCgAAAKBZlZWV6tWrF+WvjTAz9erVK/IeWQogAAAAgIRQ/tqWZP59UAABBOeCnCxdkJPlOwYARMb8Qrq5+fF1uvnxdS328zIyMjRmzBgNHz5co0eP1g9+8AOdOuXn3NjCwkLNnj37nNsMHDhQI0eO1MiRIzVs2DDNnz+/fo/d3r17ZWaaP39+/fYffvihMjMzNWvWLEnSwoUL9f3vf79Fc1MAAQRnRF6ORuTl+I4BAJExvxC67Oxsbdq0Sdu2bdPLL7+sl156Sffff7+XLAUFBVq0aFGz261Zs0Zbt27V+vXrtXv3bs2cObP+tUGDBumFF16of/6b3/xGw4cPT0nej1EAAQAAALS4lRuLtfHdMr2+55CuePBVrdxY3KI/v2/fvlq8eLEeffRROec0YcIEbdq0qf71K6+8Ups3b9bChQt1xx13aOLEiRo8ePBppW3atGkaN26chg8frsWLF9evd+vWTXPmzNHw4cM1efJkrV+/vv73P/vss5KktWvX6oYbbpAkHT16VLfffrtGjhypUaNG6be//e0Zebt166af/vSnWrlypQ4dOiRJ6tKli4YOHarCwkJJ0tKlSzV9+vQW/ZwaowACCM6anQe1ZudB3zEAIDLmVzQtffghErdyY7HmrdiqkzW1h2cWl1Vo3oqtLV4CBw8erJqaGh08eFB33nmnnnzySUnSn//8Z1VWVmr06NGSpLfeekurVq3S+vXrdf/996uqqkqS9MQTT2jDhg0qLCzUokWLVFpaKkk6duyYJk2apG3btql79+6aP3++Xn75ZT399NNasGDBGTm+/e1vKycnR1u3btWWLVs0adKkJvP26NFDgwYN0q5du+rXbrnlFi1ZskT79+9XRkaG+vXr15If0RkogACCU1PjVFPjfMcAgMiYX0gXD6/aqYqqmtPWKqpq9PCqnSl7zy984Qt6/vnnVVVVpSeeeEJf+tKX6l+7/vrr1blzZ/Xu3Vt9+/bVgQMHJEmLFi3S6NGjdfnll2v//v31xaxTp06aOnWqJGnkyJH6zGc+o8zMTI0cOVJ79+49471feeUV3XPPPfXPe/bsedaczp3+3/DUqVP18ssva8mSJbr55puT/Z+fMO4DCAAAAKBFvVdWEWk9Wbt371ZGRob69u0rM9M111yjZ555RsuWLdOGDRvqt+vcuXP944yMDFVXV2vt2rV65ZVXtG7dOnXp0kUTJ06sv0BLZmZm/RU2O3ToUP/7O3TooOrq6qTzHjlyRHv37tUll1yi8vJySbVlc9y4cfrBD36g7du31x9imirsAQQAAADQovrlZkdaT0ZJSYnuvvtuzZo1q76sffnLX9bs2bN16aWXnnMvnCSVl5erZ8+e6tKli9566y398Y9/TDrLNddco8cee6z++eHDh8/Y5ujRo/rKV76iadOmnZHtG9/4hh566CGdd955SWdIFAUQAAAAQIuaM2WIsjMzTlvLzszQnClDYv3cioqK+ttATJ48Wddee63uu++++tfHjRunHj166Pbbb2/2Z02dOlXV1dUaOnSo5s6dq8svvzzpXPPnz9fhw4c1YsQIjR49WmvWrKl/7aqrrtKIESN02WWX6aKLLtLjjz9+xu8fPny4ZsyY0eTP/s53vqP8/Pz6X3FZ42NQ011BQYH7+Co6ANCUHe9/JEkaemEPz0kAIBrmVzQfXwBm6czxnpO0Dzt27NDQoUMT3n7lxmJ9a/kWnaw5pbzcbM2ZMkTTxualMKH03nvvaeLEiXrrrbfUoUMY+7qa+vdiZhuccwVNbc85gACCwx+cAKQr5hfSybSxeXpq/buSWqeE/+pXv9K9996rH/7wh8GUv2RQAAEAAACkRGvufb3tttt02223tdr7pSuqMYDgvLL9gF7ZfsB3DACIjPkFIC4KIAAAAAAEggIIAAAAAIGgAAIAAABAICiAAAAAANLCgQMHdOutt2rw4MEaN26cxo8fr6effrpVM+zdu1cjRoxocv0///M/k/qZP/rRj3T8+PH65926dUs6X3MogACCM6BXFw3o1cV3DACIjPmFkDnnNG3aNE2YMEG7d+/Whg0btGTJEhUVFZ2xbXV1davnO1cBbC5P4wKYSl5vA2FmUyX9i6QMST93zj3YxDbTJS2U5CRtds7d2qohAbQ7F5/f3XcEAEgK8wtpZcsyafUDUnmRlJMvXb1AGjU96R/36quvqlOnTrr77rvr1wYMGKCvfvWrkqQnn3xSK1as0NGjR1VTU6Onn35ad9xxh3bv3q0uXbpo8eLFGjVqlBYuXKhu3brpm9/8piRpxIgRev755yVJ1113na688kr94Q9/UF5enp555hllZ2drw4YNuuOOOyRJ1157bZP55s6dqx07dmjMmDGaMWOGevbseVqe+++/X9///vfr32vWrFkqKCjQRx99pPfee09XXXWVevfurTVr1kiS7r33Xj3//PPKzs7WM888o/PPPz/pz64hb3sAzSxD0mOSrpM0TNIXzWxYo20uljRP0hXOueGS/rG1cwJof6prTqm65pTvGAAQGfMLaWPLMum52VL5fkmu9p/Pza5dT9K2bdv0yU9+8pzbvPnmm1q+fLl+97vf6b777tPYsWO1ZcsWffe7303oHoG7du3SPffco23btik3N1e//e1vJUm33367fvzjH2vz5s1n/b0PPvigPv3pT2vTpk362te+dkaes5k9e7b69eunNWvW1Je/Y8eO6fLLL9fmzZs1YcIE/exnP2s2e6J8HgJ6maS3nXO7nXMnJS2RdFOjbf5e0mPOucOS5Jw72MoZAbRDa3eWaO3OEt8xACAy5lcEW5bp0QO36an3r5MeGRGreCAJqx+QqipOX6uqqF1vIffcc49Gjx6tSy+9tH7tmmuu0XnnnSdJeu211/R3f/d3kqRJkyaptLRUH3300Tl/5qBBgzRmzBhJ0rhx47R3716VlZWprKxMEyZMkKT6n5mIhnmi6NSpk2644YbTcrQUnwUwT9L+Bs+L6tYaukTSJWb2/8zsj3WHjAIAAABnt2WZqp/5qvqcOqgOdXufqp/5KiWwNZWfeV7eOdcTMHz4cL355pv1zx977DGtXr1aJSX/9ZciXbt2bfbndOzYUadO/dee9MrKyvrHnTt3rn+ckZER+1zChnnO9b6NZWZmysxaLEdDbf0iMB0lXSxpoqQvSvqZmeU23sjM7jKzQjMrbPgFAAAAQHiOv7RAHWtO/8N1x5pKHX9pgadEAcrJj7aegEmTJqmyslI/+clP6tfOdeGUT3/60/r1r38tSVq7dq169+6tHj16aODAgfVF8s0339SePXvO+b65ubnKzc3Va6+9Jkn1P7Ox7t2768iRI2f9OQMGDND27dt14sQJlZWVafXq1Qn/3pbkswAWS+rf4Hl+3VpDRZKedc5VOef2SPqzagvhaZxzi51zBc65gj59+qQsMAAAANq+rIoPIq0jBa5eIGVmn76WmV27niQz08qVK/W73/1OgwYN0mWXXaYZM2booYceanL7hQsXasOGDRo1apTmzp2rX/7yl5Kkz3/+8zp06JCGDx+uRx99VJdcckmz7/1v//ZvuueeezRmzBg555rcZtSoUcrIyNDo0aP1yCOPnPF6//79NX36dI0YMULTp0/X2LFj61+76667NHXqVF111VWJfBSx2Nn+B6T8jc06qrbQXa3a4veGpFudc9sabDNV0hedczPMrLekjZLGOOdKz/ZzCwoKXGFhYWrDA0hrr2w/IEmaPKxlrqYFAK2F+ZWYogWfUH6HD89cP9Vb+Q+84yFR+7Bjxw4NHTo08d/QwlcBbWveKTkqSfpEn9Tdsy8RTf17MbMNzrmCprb3dhsI51y1mc2StEq1t4F4wjm3zcwekFTonHu27rVrzWy7pBpJc85V/gAgEYP7NH9+AAC0RcyvxPy809/qW1X/qi52sn7tuOukn3f6Wy30Fys8o6a3q8LXXni9D6Bz7kVJLzZaW9DgsZP09bpfANAiBnv+mzoASBbzKzFjrr9LC56u1j+6JepnpXrP9dKPdIuuvP4u39EA77wWQADwobKqRpKUlZnhOQkARMP8Ssy0sXmSvqJJy6/UyZpTysvN1pwpQ+rWgbBRAAEE57VdteeFcA4NgHTD/ErctLF5emr9u5KkpTPHe07Tfjjn6m9PAP+SuZ5LW78NBAAAAIA2ICsrS6WlpUmVDrQ855xKS0uVlZUV6fexBxAAAABAs/Lz81VUVCTuu12r5MgJSdLJDzs3s2XqZGVlKT8/2r0VKYAAAAAAmpWZmalBgwb5jtFmLHx8nSRp6cwxfoNExCGgAAAAABAI9gACCM7F53MZdQDpifkFIC4KIIDgDOjFjZQBpCfmF4C4OAQUQHCOnajWsRPVvmMAQGTMLwBxUQABBGfdO6Va906p7xgAEBnzC0BcFEAAAAAACAQFEAAAAAACQQEEAAAAgEBQAAEAAAAgENwGAkBw/vLC7r4jAEBSmF8A4qIAAghOfs8uviMAQFKYXwDi4hBQAMEpr6hSeUWV7xgAEBnzC0Bc7AEEEJw39hySJE0edr7nJAAQDfMrmqUzx/uOALQ57AEEAAAAgEBQAAEAAAAgEBRAAAAAAAgEBRAAAAAAAsFFYAAEZ0Reju8IAJAU5heAuCiAAIJzQU6W7wgAkBTmF4C4OAQUQHAOHzupw8dO+o4BAJExvwDERQEEEJwN+w5rw77DvmMAQGTMLwBxUQABAAAAIBAUQAAAAAAIBAUQAAAAAAJBAQQAAACAQHAbCADBGd0/13cEAEgK8wtAXBRAAMHp072z7wgAkBTmF4C4OAQUQHBKjpxQyZETvmMAQGTMLwBxUQABBGfz/jJt3l/mOwYARMb8AhAXBRAAAAAAAkEBBAAAAIBAUAABAAAAIBAUQAAAAACIYOXGYm18t0yv7zmkKx58VSs3FvuOlDBuAwEgOOMG9PQdAQCSwvwC/Fu5sVjzVmzVyZpTkqTisgrNW7FVkjRtbJ7PaAlhDyCA4PTs2kk9u3byHQMAImN+Af49vGqnKqpqTlurqKrRw6t2ekoUDQUQQHA+KK/UB+WVvmMAQGTML8C/98oqIq23NRwCCiA4fyoulyRdkJPlOQkARMP8Avzrl5ut4ibKXr/cbA9pomMPIAAAAAAkaM6UIcrOzDhtLTszQ3OmDPGUKBr2AAIAAABAgj6+0Mu3lm/RyZpTysvN1pwpQ9LiAjASBRAAAAAAIpk2Nk9PrX9XkrR05njPaaLhEFAAAAAACAR7AAEE59JB5/mOAABJYX4BiIs9gACCk5OdqZzsTN8xACAy5hfQRmxZpkcP3Kan3r9OemSEtGWZ70QJ81oAzWyqme00s7fNbO45tvu8mTkzK2jNfADap6LDx1V0+LjvGAAQGfMLaAO2LJOem60+pw6qg5xUvl96bnbalEBvBdDMMiQ9Juk6ScMkfdHMhjWxXXdJ/yDp9dZNCKC9euv9I3rr/SO+YwBAZMwvoA1Y/YBU1eg+gFUVtetpwOcewMskve2c2+2cOylpiaSbmtju25IeklTZmuEAAAAA4AzlRdHW2xifBTBP0v4Gz4vq1uqZ2Scl9XfOvdCawQAAAACgSTn50dbbmDZ7ERgz6yDph5K+kcC2d5lZoZkVlpSUpD4cAAAAgDBdvUDKzD59LTO7dj0N+CyAxZL6N3ieX7f2se6SRkhaa2Z7JV0u6dmmLgTjnFvsnCtwzhX06dMnhZEBAAAABG3UdOnGRSrp0FenZFJOf+nGRbXracDnfQDfkHSxmQ1SbfG7RdKtH7/onCuX1Pvj52a2VtI3nXOFrZwTQDsz/hO9fEcAgKQwv4A2YtR0zVpXuy9r6czxnsNE460AOueqzWyWpFWSMiQ94ZzbZmYPSCp0zj3rKxuA9q1rZ59/9wUAyWN+AYjL6xRxzr0o6cVGa00ePOucm9gamQC0f/tKj0mSBvTq6jkJAETD/AIQF3+NBCA4uw4clcQfoACkH+YXgLja7FVAAQAAAAAtiwIIAAAAAIGgAAIAAABAICiAAAAAABAILgIDIDhXXty7+Y0AoA1ifgGIiwIIIDhZmRm+IwBAUphfAOLiEFAAwdldclS7S476jgEAkTG/AMRFAQQQnN0lx7S75JjvGAAQGfMLQFwUQAAAAAAIBAUQAAAAAAJBAQTagZsfX6ebH1/nOwYARMb8AoDWRQEEAAAAgEBwGwgAwZk4pI/vCACQFOYXgLgogACC0zGDgx8ApCfmF4C4mCIAgrPrwBHtOnDEdwwAiIz5BSAuCiCA4OwrPa59pcd9xwCAyJhfAOKiAAIAAABAICiAAAAAABAICiAAAAAABIICCAAAAACB4DYQAIIzedj5viMAQFKYXwDiYg8gAAAAAASCAgggODve/0g73v/IdwwAiIz5BSAuCiCA4BQfrlDx4QrfMQAgMuYXgLg4BxAAAAAAIlo6c7zvCElhDyAAAAAABIICCAAAAACB4BBQAMHJyDDfEQAgKcwvAHFRAAEE56ohfX1HAICkML8AxMUhoAAAAAAQCAoggOD8qbhcfyou9x0DACJjfgGIK6ECaGb/nsgaAKSDD8or9UF5pe8YABAZ8wtAXInuARze8ImZZUga1/JxAAAAAACpcs4CaGbzzOyIpFFm9lHdryOSDkp6plUSAgAAAABaxDkLoHPue8657pIeds71qPvV3TnXyzk3r5UyAgAAAABaQKKHgK43s5yPn5hZrplNS00kAEitzpkd1DmTa2ABSD/MLwBxJXofwPucc09//MQ5V2Zm90lamZJUAJBCn764j+8IAJAU5heAuBL9K6SmtuMm8gAAAACQRhItgIVm9kMz+0Tdrx9K2pDKYACQKpv2l2nT/jLfMQAgMuYXgLgSLYBflXRS0lJJSyRVSronVaEAIJU+PHJCHx454TsGAETG/AIQV0KHcTrnjkmaa2Zd6x4DAAAAANJMQnsAzexTZrZd0o6656PN7F9TmgwAAAAA0KISPQT0EUlTJJVKknNus6QJqQoFAAAAAGh5CV/J0zm338waLtW0fBwASL0unTJ8RwCApDC/AMSVaAHcb2afkuTMLFPSP6jucFAASDef+oveviMAQFKYXwDiSvQQ0LtVe9XPPEnFksaIq4ACAAAAQFo55x5AM3vIOfdPkq5yzv1NK2UCgJTasO+QJGncgPM8JwGAaJhfAOJqbg/gf7PaE//mtUYYAGgNh49V6fCxKt8xACAy5heAuJorgP9b0mFJo8zsIzM70vCfcd/czKaa2U4ze9vM5jbx+tfNbLuZbTGz1WY2IO57AgAAAEComiuA851zuZJecM71cM51b/jPOG9sZhmSHpN0naRhkr5oZsMabbZRUoFzbpSk5ZL+V5z3BAAAAICQNVcA19X9M/beviZcJult59xu59xJSUsk3dRwA+fcGufc8bqnf5SUn4IcAAAAABCE5m4D0cnMbpX0KTP7XOMXnXMrYrx3nqT9DZ4XSfqrc2x/p6SXmnrBzO6SdJckXXTRRTEiAQhB96yEb4EKAG0K8wtAXM1Nkbsl/Y2kXEk3NnrNSYpTABNmZn8rqUDSZ5p63Tm3WNJiSSooKHCtkQlA+vqrwb18RwCApDC/AMR1zgLonHtN0mtmVuic+0ULv3expP4NnufXrZ3GzCZLulfSZ5xzJ1o4AwAAAAAE45znAJrZtyTJOfcLM/tCo9e+G/O935B0sZkNMrNOkm6R9Gyj9xgr6XFJn3XOHYz5fgAgSXp9d6le313qOwYARMb8AhBXcxeBuaXB48b3Apwa542dc9WSZklaJWmHpGXOuW1m9oCZfbZus4cldZP0GzPbZGbPnuXHAUDCjlRW60hlte8YABAZ8wtAXM2dA2hnedzU88iccy9KerHR2oIGjyfHfQ8AAAAAQK3m9gC6szxu6jkAAAAAoA1rbg/gaDP7SLV7+7LrHqvueVZKkwEAAAAAWlRzVwHNaK0gANBaenbN9B0BAJLC/AIQF3cTBRCccQPO8x0BAJLC/AIQV3PnAAIAAAAA2gkKIIDg/OHtD/WHtz/0HQMAImN+AYiLQ0ABBOf4yRrfEQAgKcwvAHGxBxAAAAAAAkEBBAAAAIBAUAABAAAAIBCcAwggOL27d/YdAQCSwvwCEBcFEEBwxvTP9R0BAJLC/AIQF4eAAgAAAEAgKIAAgvP7XSX6/a4S3zEAIDLmF4C4OAQUQHBOVJ3yHQEAksL8AhAXewABAAAAIBAUQAAAAAAIBAUQSHMrNxZr47tlen3PIV3x4KtaubHYdyQAAAC0UZwDCKSxlRuLNW/FVp2sqT0npLisQvNWbJUkTRub5zNam3ZBTpbvCACQFOYXgLgogEAae3jVTlVU1Zy2VlFVo4dX7aQAnsOIvBzfEQAgKcwvAHFxCCiQxt4rq4i0DgAAgLBRAIE01i83O9I6aq3ZeVBrdh70HQMAImN+AYiLAgiksTlThig7M+O0tezMDM2ZMsRTovRQU+NUU+N8xwCCx0WsomN+AYiLcwCBNPbxeX7fWr5FJ2tOKS83W3OmDOH8PwBtHhexAgA/KIBAmps2Nk9PrX9XkrR05njPaQAgMVzECgD84BBQAADQ6riIFQD4wR5AAMHJ68lFcgDf+uVmq7iJssdFrM6N+QUgLvYAAgjO0At7aOiFPXzHAILGRaySw/wCEBd7AAEAQKvjIlYA4AcFEEBwXtl+QJI0edj5npOkh5sfXyeJiwyh5XERq+iYXwDi4hBQAAAAAAgEBRAAAAAAAkEBBAAAAIBAUAABAAAAIBBcBAZAcAb06uI7AgAkhfkFIC4KIIDgXHx+d98RACApzC8AcXEIKIDgVNecUnXNKd8xACAy5heAuCiAAIKzdmeJ1u4s8R0DACJjfgGIiwIIAAAAAIGgAAIAAABAICiAAAAAABAICmAruPnxdbr58XW+YwAAAAAIHLeBABCcwX26+o4AAElhfgGIiwIIIDiD+3TzHQEAksL8AhAXh4ACCE5lVY0qq2p8xwCAyJhfAOKiAAIIzmu7PtRruz70HQMAImN+AYiLAggAAAAAgaAAAgAAAEAgvBZAM5tqZjvN7G0zm9vE653NbGnd66+b2UAPMQEAAACgXfBWAM0sQ9Jjkq6TNEzSF81sWKPN7pR02Dn3F5IekfRQ66YEgMBtWaZHD9ymp96/TnpkhLRlme9EaE/4fgFAq/N5G4jLJL3tnNstSWa2RNJNkrY32OYmSQvrHi+X9KiZmXPOtWZQAO3LxedzGfWEbFkmPTdbfU5V1D4v3y89N7v28ajp/nKhfeD7lRTmF4C4fB4Cmidpf4PnRXVrTW7jnKuWVC6pV6ukA9BuDejVVQN6cTPlZq1+QKqqOH2tqqJ2HYiL71dSmF8A4moXF4Exs7vMrNDMCktKSnzHAdDGHTtRrWMnqn3HaPvKi6KtA1Hw/UoK8wtAXD4LYLGk/g2e59etNbmNmXWUlCOptPEPcs4tds4VOOcK+vTpk6K4ANqLde+Uat07Z4wSNJaTH20diILvV1KYXwDi8lkA35B0sZkNMrNOkm6R9GyjbZ6VNKPu8X+X9Crn/wFAK7l6gZSZffpaZnbtOhAX3y8A8MJbAaw7p2+WpFWSdkha5pzbZmYPmNln6zb7haReZva2pK9LOuNWEQCAFBk1XbpxkUo69NUpmZTTX7pxERfoQMvg+wUAXvi8Cqiccy9KerHR2oIGjyslfaG1cwEA6oyarlnrao/WXzpzvOcwaHf4fgFAq2sXF4EBAAAAADTP6x5AAPDhLy/s7jsCACSF+QUgLvYAAuluyzI9euA2PfX+ddIjI2pvroxzyu/ZRfk9u/iOkRZWbizWxnfL9PqeQ7riwVe1cmPjizUDaE3MLwBxsQcQSGdblknPzVafU3U3Uy7fLz03u/YxF1I4q/KKKklSTnam5yRt28qNxZq3YqtO1pySJBWXVWjeiq2SpGlj83xGA4LF/AIQF3sAgXS2+gGpquL0taqK2nWc1Rt7DumNPYd8x2jzHl61UxVVNaetVVTV6OFVOz0lAsD8AhAXBRBIZ+VF0daBCN4rq4i0DgAA2j4KYKpxfhZSKSc/2joQQb/c7EjrAACg7aMAplL9+VkH1UHuv87PogSipVy9QMps9IfxzOzadSCmOVOGKDsz47S17MwMzZkyxFMiAAAQFwUwlTg/C6k2arp04yKVdOirUzIpp7904yIuAIMWMW1snr73uZHqlFH7fxV5udn63udGcgEYAADSGFcBTSXOz0JrGDVds9b1lyQtnTnec5j0MCIvx3eEtDFtbJ6eWv+uJL5fQFvA/AIQFwUwlXLyaw/7bGodgDcX5GT5jgAASWF+AYiLQ0BTifOzgDbp8LGTOnzspO8YABAZ8wtAXBTAVOL8LKBN2rDvsDbsO+w7BgBExvwCEBeHgKYa52cBAAAAaCPYAwgAAAAAgaAAptjKjcXa+G6ZXt9zSFc8+KpWbiz2HQkAAABAoCiAKbRyY7HmrdiqkzWnJEnFZRWat2IrJRAAAACAF5wDmEIPr9qpiqqa09Yqqmr08Kqd3EgZ8Gh0/1zfEQAgKcwvAHFRAFPovbKKSOsAWkef7p19RwCApDC/AMTFIaAp1C83O9I6gNZRcuSESo6c8B0DACJjfgGIiwKYQnOmDFF2ZsZpa9mZGZozZYinRAAkafP+Mm3eX+Y7BgBExvwCEBeHgKbQx+f5fWv5Fp2sOaW83GzNmTKE8/8AAAAAeEEBTLFpY/P01Pp3JXEjeAAAAAB+cQgoAAAAAASCAggAAAAAgeAQUADBGTegp+8IAJAU5heAuCiAAILTs2sn3xEAICnMLwBxcQgogOB8UF6pD8orfccAgMiYXwDiYg8ggOD8qbhcknRBTpbnJAAQDfMLQFwUQKAd4BYjAAAASASHgAIAAABAICiAAAAAABAICiAAAAAABIJzAAEE59JB5/mOAABJYX4BiIsCCCA4OdmZviMAQFKYXwDi4hBQAMEpOnxcRYeP+44BAJExvwDExR5AAMF56/0jkqT8nl08JwGAaJhfAOJiDyAAAAAABII9gACAc1o6c7zvCAAAoIWwBxAAAAAAAkEBBAAAAIBAcAgogOCM/0Qv3xEA1OEQ42iYXwDiogACCE7Xzow+AOmJ+QUgLg4BBRCcfaXHtK/0mO8YABAZ8wtAXPw1EoDg7DpwVJI0oFdXz0kAIBrmF4C42AMIAAAAAIGgAAIAAABAICiAAAAAABAILwXQzM4zs5fNbFfdP3s2sc0YM1tnZtvMbIuZ3ewjKwAAAAC0F74uAjNX0mrn3INmNrfu+T812ua4pNucc7vMrJ+kDWa2yjlX1spZAbQzV17c23cEAEgK8wtAXL4OAb1J0i/rHv9S0rTGGzjn/uyc21X3+D1JByX1aa2AANqvrMwMZWVm+I4BAJExvwDE5asAnu+ce7/u8QeSzj/XxmZ2maROkt5JdTAA7d/ukqPaXXLUdwwAiIz5BSCulB0CamavSLqgiZfubfjEOefMzJ3j51wo6d8lzXDOnTrLNndJukuSLrrooqQzp8rSmeN9RwDQwO6S2psoD+7TzXMSAIiG+QUgrpQVQOfc5LO9ZmYHzOxC59z7dQXv4Fm26yHpBUn3Ouf+eI73WixpsSQVFBSctUwCAAAAQMh8HQL6rKQZdY9nSHqm8QZm1knS05J+5Zxb3orZAAAAAKBd8lUAH5R0jZntkjS57rnMrMDMfl63zXRJEyR9ycw21f0a4yUtAAAAALQDXm4D4ZwrlXR1E+uFkr5c9/g/JP1HK0cDAAAAgHbL130AAcCbiUO4owyA9MT8AhAXBRBAcDpm+Dr6HQDiYX4BiIspAiA4uw4c0a4DR3zHAIDImF8A4qIAAgjOvtLj2ld63HcMAIiM+QUgLgogAAAAAASCAggAAAAAgaAAAgAAAEAgKIAAAAAAEAhzzvnO0KLMrETSPt85mtBb0oe+Q6QZPrNo+LyQSny/ouHziobPC6nE9ysaPq9o2urnNcA51+SNQ9tdAWyrzKzQOVfgO0c64TOLhs8LqcT3Kxo+r2j4vJBKfL+i4fOKJh0/Lw4BBQAAAIBAUAABAAAAIBAUwNaz2HeANMRnFg2fF1KJ71c0fF7R8Hkhlfh+RcPnFU3afV6cAwgAAAAAgWAPIAAAAAAEggKYYmb2hJkdNLM/+c4CAFExwwCkK+YX0DQKYOo9KWmq7xAAkKQnxQwDkJ6eFPMLOAMFMMWcc/9X0iHfOdA+mNndZvaTBs+/Y2b/7jMT2jdmGFoK8wutjfmFltSeZhgFEEgvv5J0g5nlmtkNkq6XdJfnTACQCOYXgHTWbmZYR98BACTOOXfczJ6S9M+SrpN0jXOuwnMsAGgW8wtAOmtPM4wCCKSfJyTtkHSTc+4d32EAIALmF4B01i5mGAUQSD8LJJWowX+/ZjZM0kJJpZJWS9re8LlzbnmrpwSAMzG/AKSzdjHDOAcwxep2Fa+TNMTMiszsTt+ZkL7M7BuSsiRNl/QPDV66TtKPnXP/Q9JtTTwHksIMQ0thfqG1Mb/QktrTDDPnnO8MABJgZpMkLZI03jl3xMw2SrrdObfJzPpKuk/ScUmfkvTXDZ87567wlRsAmF8A0ll7m2EUQCANmNlFktZKut45t6Nu7UuSJjrnvtRguwxJK5xzNzX1HABaG/MLQDprjzOMAgi0A2Y2UNL/lNRV0k8kFTV87px7zV86ADg75heAdJaOM4wCCAAAAACB4CIwAAAAABAICiAAAAAABIICCAAAAACBoAACAAAAQCAogAAAAAAQCAogAAAAAASCAggAAAAAgaAAAgAAAEAgKIAAAAAAEAgKIAAAAAAEggIIAAAAAIGgAAIAAABAICiAAAAAABAICiAAAAAABIICCAAAAACBoAACAAAAQCAogAAAAAAQCAogAAAAAASCAggAAAAAgaAAAgAAAEAgKIAAAAAAEAgKIAAAAAAEggIIAAAAAIGgAAIAAABAICiAAAAAABAICiDaDDPLMLN/MbNtZrbVzAb7zgQAiWB+AUhXzK/wUADRlsyTtNs5N1zSIklf8ZwHABLF/AKQrphfgenoOwAgSWbWVdJfO+fG1S3tkXS9x0gAkBDmF4B0xfwKEwUQbcVkSf3NbFPd8/MkveIvDgAkjPkFIF0xvwLEIaBoK8ZIWuCcG+OcGyPp/0jaZGZdzeyXZvYzM/sbrwkBoGlj1PT8GmxmvzCz5V7TAcDZjVHT82ta3Z+9lprZtV4TosVRANFW9JR0XJLMrKOkayU9J+lzkpY75/5e0mf9xQOAs2pyfjnndjvn7vSaDADO7Wzza2Xdn73ulnSzx3xIAQog2oo/S7q87vHXJL3gnNsjKV/S/rr1Gh/BAKAZZ5tfANDWNTe/5kt6rNVTIaUogGgrnpL0STN7W9IoSV+vWy9SbQmU+L4CaJvONr8AoK1rcn5ZrYckveSce9NnQLQ8c875zgCcVd3VqR6VVCnpNefcrz1HAoCEmFkvSf8s6RpJP3fOfc9zJABIiJnNljRD0huSNjnnfuo5EloQBRAAAAAAAsEhdQAAAAAQCAogAAAAAASCAggAAAAAgaAAAgAAAEAgKIAAAAAAEAgKIAAAAAAEggIIAAAAAIGgAAIAAABAICiAAAAAABCI/w+4qa9eA5S14AAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] From e74067e27d49467ca6dbe54eac237ca3d224e989 Mon Sep 17 00:00:00 2001 From: Vasilis Syrgkanis Date: Fri, 6 Aug 2021 12:34:53 -0400 Subject: [PATCH 18/27] moved dynamic_dml to separate module. fixed remaining bugs in dgp. fixed docstring for X. fixed notebook. changed documentation. --- README.md | 2 +- doc/reference.rst | 16 +- doc/spec/estimation/dml.rst | 34 +- doc/spec/estimation/dynamic_dml.rst | 94 +++++ doc/spec/estimation_dynamic.rst | 11 + doc/spec/references.rst | 7 +- doc/spec/spec.rst | 1 + econml/dml/__init__.py | 10 +- econml/dynamic/__init__.py | 4 + econml/dynamic/dml/__init__.py | 20 + .../dynamic_dml.py => dynamic/dml/_dml.py} | 36 +- econml/tests/dgp.py | 59 ++- econml/tests/test_dynamic_dml.py | 4 +- ...mic Double Machine Learning Examples.ipynb | 345 +++++++++++------- 14 files changed, 455 insertions(+), 188 deletions(-) create mode 100644 doc/spec/estimation/dynamic_dml.rst create mode 100644 doc/spec/estimation_dynamic.rst create mode 100755 econml/dynamic/__init__.py create mode 100755 econml/dynamic/dml/__init__.py rename econml/{dml/dynamic_dml.py => dynamic/dml/_dml.py} (96%) mode change 100644 => 100755 notebooks/Dynamic Double Machine Learning Examples.ipynb diff --git a/README.md b/README.md index b6d58ec16..efd5dccf3 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ To install from source, see [For Developers](#for-developers) section below. Dynamic Double Machine Learning (click to expand) ```Python - from econml.dml import DynamicDML + from econml.dynamic.dml import DynamicDML # Use defaults est = DynamicDML() # Or specify hyperparameters diff --git a/doc/reference.rst b/doc/reference.rst index 865cc52a4..76b528925 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -17,7 +17,6 @@ Double Machine Learning (DML) econml.dml.SparseLinearDML econml.dml.CausalForestDML econml.dml.NonParamDML - econml.dml.DynamicDML .. _dr_api: @@ -105,6 +104,21 @@ Sieve Methods econml.iv.sieve.HermiteFeatures econml.iv.sieve.DPolynomialFeatures +.. _dynamic_api: + +Estimators for Dynamic Treatment Regimes +---------------------------------------- + +.. _dynamicdml_api: + +Dynamic Double Machine Learning +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: _autosummary + + econml.dynamic.dml.DynamicDML + .. _policy_api: Policy Learning diff --git a/doc/spec/estimation/dml.rst b/doc/spec/estimation/dml.rst index 9fe1deb57..0ffe0718a 100644 --- a/doc/spec/estimation/dml.rst +++ b/doc/spec/estimation/dml.rst @@ -34,8 +34,7 @@ What are the relevant estimator classes? This section describes the methodology implemented in the classes, :class:`._RLearner`, :class:`.DML`, :class:`.LinearDML`, :class:`.SparseLinearDML`, :class:`.KernelDML`, :class:`.NonParamDML`, -:class:`.CausalForestDML`, -:class:`.DynamicDML`. +:class:`.CausalForestDML`. Click on each of these links for a detailed module documentation and input parameters of each class. @@ -60,12 +59,6 @@ characteristics :math:`X` of the treated samples, then one can use this method. t = t0 = t1 = T[:,0] W = np.random.normal(size=(100,2)) - # DynamicDML - groups = np.repeat(a=np.arange(100), repeats=3, axis=0) - X_dyn = np.random.normal(size=(300, 1)) - T_dyn = np.random.normal(size=(300, 2)) - y_dyn = np.random.normal(size=(300, )) - .. testcode:: from econml.dml import LinearDML @@ -81,7 +74,6 @@ linear on some pre-defined; potentially high-dimensional; featurization). These :class:`.SparseLinearDML`, :class:`.KernelDML`. For fullly non-parametric heterogeneous treatment effect models, check out the :class:`.NonParamDML` and the :class:`.CausalForestDML`. -For treatments assigned sequentially over several time periods, see the class :class:`.DynamicDML`. For more options of non-parametric CATE estimators, check out the :ref:`Forest Estimators User Guide ` and the :ref:`Meta Learners User Guide `. @@ -165,7 +157,7 @@ Class Hierarchy Structure In this library we implement variants of several of the approaches mentioned in the last section. The hierarchy structure of the implemented CATE estimators is as follows. - .. inheritance-diagram:: econml.dml.LinearDML econml.dml.SparseLinearDML econml.dml.KernelDML econml.dml.NonParamDML econml.dml.CausalForestDML econml.dml.DynamicDML + .. inheritance-diagram:: econml.dml.LinearDML econml.dml.SparseLinearDML econml.dml.KernelDML econml.dml.NonParamDML econml.dml.CausalForestDML :parts: 1 :private-bases: :top-classes: econml._rlearner._RLearner, econml._cate_estimator.StatsModelsCateEstimatorMixin, econml._cate_estimator.DebiasedLassoCateEstimatorMixin @@ -296,28 +288,6 @@ Below we give a brief description of each of these classes: Check out :ref:`Forest Estimators User Guide ` for more information on forest based CATE models and other alternatives to the :class:`.CausalForestDML`. - * **DynamicDML.** The class :class:`.DynamicDML` is an extension of the Double ML approach for treatments assigned sequentially over time periods. - This estimator will adjust for treatments that can have causal effects on future outcomes. The data corresponds to a Markov decision process :math:`\{X_t, W_t, T_t, Y_t\}_{t=1}^m`, - where :math:`X_t, W_t` corresponds to the state at time :math:`t`, :math:`T_t` is the treatment at time :math:`t` and :math:`Y_t` is the observed outcome at time :math:`t`. - - The model makes the following structural equation assumptions on the data generating process: - - .. math:: - - X_t =~& A \cdot T_{t-1} + B \cdot X_{t-1} + \eta_t\\ - T_t =~& p(T_{t-1}, X_t, \zeta_t) \\ - Y_t =~& \theta_0'T_t + \mu'X_t \epsilon_t - - For more details about this model and underlying assumptions, see [Lewis2021]_. - - To learn the treatment effects of treatments in the different periods on the last period outcome, one can simply call: - - .. testcode:: - - from econml.dml import DynamicDML - est = DynamicDML() - est.fit(y_dyn, T_dyn, X=X_dyn, W=None, groups=groups, inference="auto") - * **_RLearner.** The internal private class :class:`._RLearner` is a parent of the :class:`.DML` and allows the user to specify any way of fitting a final model that takes as input the residual :math:`\tilde{T}`, the features :math:`X` and predicts the residual :math:`\tilde{Y}`. Moreover, the nuisance models take as input diff --git a/doc/spec/estimation/dynamic_dml.rst b/doc/spec/estimation/dynamic_dml.rst new file mode 100644 index 000000000..b11dad886 --- /dev/null +++ b/doc/spec/estimation/dynamic_dml.rst @@ -0,0 +1,94 @@ +.. _dynamicdmluserguide: + +=============================== +Dynamic Double Machine Learning +=============================== + +What is it? +================================== + +Dynamic Double Machine Learning is a method for estimating (heterogeneous) treatment effects when +treatments are offered over time via an adaptive dynamic policy. It applies to the case when +all potential dynamic confounders/controls (factors that simultaneously had a direct effect on the adaptive treatment +decision in the collected data and the observed outcome) are observed, but are either too many (high-dimensional) for +classical statistical approaches to be applicable or their effect on +the treatment and outcome cannot be satisfactorily modeled by parametric functions (non-parametric). +Both of these latter problems can be addressed via machine learning techniques (see e.g. [Lewis2021]_). + + +What are the relevant estimator classes? +======================================== + +This section describes the methodology implemented in the class +:class:`.DynamicDML`. +Click on each of these links for a detailed module documentation and input parameters of each class. + + +When should you use it? +================================== + +Suppose you have observational (or experimental from an A/B test) historical data, where multiple treatment(s)/intervention(s)/action(s) +:math:`T` were offered over time to each of the units and some final outcome(s) :math:`Y` was observed and all the variables :math:`W` that could have +potentially gone into the choice of :math:`T`, and simultaneously could have had a direct effect on the outcome :math:`Y` (aka controls or confounders) are also recorder in the dataset. + +If your goal is to understand what was the effect of the treatment on the outcome as a function of a set of observable +characteristics :math:`X` of the treated samples, then one can use this method. For instance call: + +.. testsetup:: + + # DynamicDML + import numpy as np + groups = np.repeat(a=np.arange(100), repeats=3, axis=0) + W_dyn = np.random.normal(size=(300, 1)) + X_dyn = np.random.normal(size=(300, 1)) + T_dyn = np.random.normal(size=(300, 2)) + y_dyn = np.random.normal(size=(300, )) + +.. testcode:: + + from econml.dml import DynamicDML + est = DynamicDML() + est.fit(y_dyn, T_dyn, X=X_dyn, W=W_dyn, groups=groups) + + +Class Hierarchy Structure +================================== + +In this library we implement variants of several of the approaches mentioned in the last section. The hierarchy +structure of the implemented CATE estimators is as follows. + + .. inheritance-diagram:: econml.dml.DynamicDML + :parts: 1 + :private-bases: + :top-classes: econml._OrthoLearner, econml._cate_estimator.LinearModelFinalCateEstimatorMixin + +Below we give a brief description of each of these classes: + + * **DynamicDML.** The class :class:`.DynamicDML` is an extension of the Double ML approach for treatments assigned sequentially over time periods. + This estimator will adjust for treatments that can have causal effects on future outcomes. The data corresponds to a Markov decision process :math:`\{X_t, W_t, T_t, Y_t\}_{t=1}^m`, + where :math:`X_t, W_t` corresponds to the state at time :math:`t`, :math:`T_t` is the treatment at time :math:`t` and :math:`Y_t` is the observed outcome at time :math:`t`. + + The model makes the following structural equation assumptions on the data generating process: + + .. math:: + + X_t =~& A \cdot T_{t-1} + B \cdot X_{t-1} + \eta_t\\ + T_t =~& p(T_{t-1}, X_t, \zeta_t) \\ + Y_t =~& \theta_0'T_t + \mu'X_t \epsilon_t + + For more details about this model and underlying assumptions, see [Lewis2021]_. + + To learn the treatment effects of treatments in the different periods on the last period outcome, one can simply call: + + .. testcode:: + + from econml.dml import DynamicDML + est = DynamicDML() + est.fit(y_dyn, T_dyn, X=X_dyn, W=W_dyn, groups=groups) + + + +Usage FAQs +========== + +See our FAQ section in `_dmluserguide`_ diff --git a/doc/spec/estimation_dynamic.rst b/doc/spec/estimation_dynamic.rst new file mode 100644 index 000000000..6e7b47cc0 --- /dev/null +++ b/doc/spec/estimation_dynamic.rst @@ -0,0 +1,11 @@ +Estimation Methods for Dynamic Treatment Regimes +================================================ + +This section contains methods for estimating (heterogeneous) treatment effects, +even when treatments are offered over time and the treatments were chosen based on a dynamic +adaptive policy. This is referred to as the dynamic treatment regime (see e.g. [Hernan2010]_) + +.. toctree:: + :maxdepth: 2 + + estimation/dynamic_dml diff --git a/doc/spec/references.rst b/doc/spec/references.rst index dc9abe5bb..5f0213ac9 100644 --- a/doc/spec/references.rst +++ b/doc/spec/references.rst @@ -118,4 +118,9 @@ References .. [Lewis2021] Lewis, G., Syrgkanis, V. (2021). Double/Debiased Machine Learning for Dynamic Treatment Effects. - URL https://arxiv.org/abs/2002.07285 \ No newline at end of file + URL https://arxiv.org/abs/2002.07285 + +.. [Hernan2010] + Hernán, Miguel A., and James M. Robins (2010). + Causal inference. + URL https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/ diff --git a/doc/spec/spec.rst b/doc/spec/spec.rst index 693854193..649dba98c 100644 --- a/doc/spec/spec.rst +++ b/doc/spec/spec.rst @@ -19,6 +19,7 @@ The EconML Python SDK, developed by the ALICE team at MSR New England, incorpora comparison estimation estimation_iv + estimation_dynamic inference interpretability references diff --git a/econml/dml/__init__.py b/econml/dml/__init__.py index 00035f2c1..a83428cf7 100644 --- a/econml/dml/__init__.py +++ b/econml/dml/__init__.py @@ -8,7 +8,7 @@ Then estimates a CATE model by regressing the residual outcome on the residual treatment in a manner that accounts for heterogeneity in the regression coefficient, with respect to X. For the theoretical foundations of these methods see [dml]_, [rlearner]_, [paneldml]_, -[lassodml]_, [ortholearner]_, [dynamicdml]_. +[lassodml]_, [ortholearner]_. References ---------- @@ -32,15 +32,10 @@ .. [ortholearner] Dylan Foster, Vasilis Syrgkanis (2019). Orthogonal Statistical Learning. ACM Conference on Learning Theory. ``_ - -.. [dynamicdml] Greg Lewis and Vasilis Syrgkanis. - Double/Debiased Machine Learning for Dynamic Treatment Effects. - ``_, 2021. """ from .dml import (DML, LinearDML, SparseLinearDML, KernelDML, NonParamDML, ForestDML) -from .dynamic_dml import DynamicDML from .causal_forest import CausalForestDML __all__ = ["DML", @@ -49,5 +44,4 @@ "KernelDML", "NonParamDML", "ForestDML", - "CausalForestDML", - "DynamicDML"] + "CausalForestDML"] diff --git a/econml/dynamic/__init__.py b/econml/dynamic/__init__.py new file mode 100755 index 000000000..8e4ecd538 --- /dev/null +++ b/econml/dynamic/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +__all__ = ["dml"] diff --git a/econml/dynamic/dml/__init__.py b/econml/dynamic/dml/__init__.py new file mode 100755 index 000000000..a95579da7 --- /dev/null +++ b/econml/dynamic/dml/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""Double Machine Learning for Dynamic Treatment Effects. + +A Double/Orthogonal machine learning approach to estimation of heterogeneous +treatment effect in the dynamic treatment regime. For the theoretical +foundations of these methods see: [dynamicdml]_. + +References +---------- + +.. [dynamicdml] Greg Lewis and Vasilis Syrgkanis. + Double/Debiased Machine Learning for Dynamic Treatment Effects. + ``_, 2021. +""" + +from ._dml import DynamicDML + +__all__ = ["DynamicDML"] diff --git a/econml/dml/dynamic_dml.py b/econml/dynamic/dml/_dml.py similarity index 96% rename from econml/dml/dynamic_dml.py rename to econml/dynamic/dml/_dml.py index 5ecd69ef2..9453b16e5 100644 --- a/econml/dml/dynamic_dml.py +++ b/econml/dynamic/dml/_dml.py @@ -8,18 +8,18 @@ from sklearn.model_selection import GroupKFold from scipy.stats import norm from sklearn.linear_model import (ElasticNetCV, LassoCV, LogisticRegressionCV) -from ..sklearn_extensions.linear_model import (StatsModelsLinearRegression, WeightedLassoCVWrapper) -from ..sklearn_extensions.model_selection import WeightedStratifiedKFold -from .dml import _FirstStageWrapper, _FinalWrapper -from .._cate_estimator import TreatmentExpansionMixin, LinearModelFinalCateEstimatorMixin -from .._ortho_learner import _OrthoLearner -from ..utilities import (_deprecate_positional, add_intercept, - broadcast_unit_treatments, check_high_dimensional, - cross_product, deprecated, fit_with_groups, - hstack, inverse_onehot, ndim, reshape, - reshape_treatmentwise_effects, shape, transpose, - get_feature_names_or_default, check_input_arrays, - filter_none_kwargs) +from ...sklearn_extensions.linear_model import (StatsModelsLinearRegression, WeightedLassoCVWrapper) +from ...sklearn_extensions.model_selection import WeightedStratifiedKFold +from ...dml.dml import _FirstStageWrapper, _FinalWrapper +from ..._cate_estimator import TreatmentExpansionMixin, LinearModelFinalCateEstimatorMixin +from ..._ortho_learner import _OrthoLearner +from ...utilities import (_deprecate_positional, add_intercept, + broadcast_unit_treatments, check_high_dimensional, + cross_product, deprecated, fit_with_groups, + hstack, inverse_onehot, ndim, reshape, + reshape_treatmentwise_effects, shape, transpose, + get_feature_names_or_default, check_input_arrays, + filter_none_kwargs) def _get_groups_period_filter(groups, n_periods): @@ -408,7 +408,7 @@ class DynamicDML(LinearModelFinalCateEstimatorMixin, _OrthoLearner): .. testcode:: - from econml.dml import DynamicDML + from econml.dynamic.dml import DynamicDML np.random.seed(123) @@ -544,7 +544,7 @@ def _strata(self, Y, T, X=None, W=None, Z=None, @_deprecate_positional("X, and should be passed by keyword only. In a future release " "we will disallow passing X and W by position.", ['X', 'W']) def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups, - cache_values=False, inference=None): + cache_values=False, inference='auto'): """Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. The input data must contain groups with the same size corresponding to the number @@ -559,6 +559,10 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou ...etc. + Only the value of the features X at the first period of each unit are used for + heterogeneity. The value of X in subseuqnet periods is used as a time-varying control + but not for heterogeneity. + Parameters ---------- Y: (n, d_y) matrix or vector of length n @@ -566,7 +570,9 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou T: (n, d_t) matrix or vector of length n Treatments for each sample (required: n = n_groups * n_periods) X: optional(n, d_x) matrix or None (Default=None) - Features for each sample (Required: n = n_groups * n_periods) + Features for each sample (Required: n = n_groups * n_periods). Only first + period features from each unit are used for heterogeneity, the rest are + used as time-varying controls together with W W: optional(n, d_w) matrix or None (Default=None) Controls for each sample (Required: n = n_groups * n_periods) sample_weight: optional(n,) vector or None (Default=None) diff --git a/econml/tests/dgp.py b/econml/tests/dgp.py index e4057b44a..403783447 100644 --- a/econml/tests/dgp.py +++ b/econml/tests/dgp.py @@ -4,6 +4,16 @@ import numpy as np from econml.utilities import cross_product from statsmodels.tools.tools import add_constant +try: + import matplotlib + import matplotlib.pyplot as plt +except ImportError as exn: + from .utilities import MissingModule + + # make any access to matplotlib or plt throw an exception + matplotlib = plt = MissingModule("matplotlib is no longer a dependency of the main econml package; " + "install econml[plt] or econml[all] to require it, or install matplotlib " + "separately, to use the tree interpreters", exn) class _BaseDynamicPanelDGP: @@ -52,8 +62,8 @@ class DynamicPanelDGP(_BaseDynamicPanelDGP): def __init__(self, n_periods, n_treatments, n_x): super().__init__(n_periods, n_treatments, n_x) - def create_instance(self, s_x, sigma_x=.5, sigma_y=.5, conf_str=5, hetero_strength=0, hetero_inds=None, - autoreg=.5, state_effect=.5, random_seed=123): + def create_instance(self, s_x, sigma_x=.8, sigma_y=.1, conf_str=5, hetero_strength=.5, hetero_inds=None, + autoreg=.25, state_effect=.25, random_seed=123): np.random.seed(random_seed) self.s_x = s_x self.conf_str = conf_str @@ -145,7 +155,7 @@ def _gen_data_with_policy(self, n_units, policy_gen, random_seed=123): np.random.normal(0, self.sigma_y) groups[t] = t // self.n_periods - return Y, T, X[:, self.hetero_inds] if self.hetero_inds else None, X[:, self.endo_inds], groups + return Y, T, X[:, self.hetero_inds] if (self.hetero_inds is not None) else None, X[:, self.endo_inds], groups def observational_data(self, n_units, gamma=0, s_t=1, sigma_t=0.5, random_seed=123): """Generate observational data with some observational treatment policy parameters. @@ -164,3 +174,46 @@ def policy_gen(Tpre, X, period): return gamma * Tpre + (1 - gamma) * np.dot(Delta, X) + \ np.random.normal(0, sigma_t, size=self.n_treatments) return self._gen_data_with_policy(n_units, policy_gen, random_seed=random_seed) + + +# Auxiliary function for adding xticks and vertical lines when plotting results +# for dynamic dml vs ground truth parameters. +def add_vlines(n_periods, n_treatments, hetero_inds): + locs, labels = plt.xticks([], []) + locs += [- .501 + (len(hetero_inds) + 1) / 2] + labels += ["\n\n$\\tau_{{{}}}$".format(0)] + locs += [qx for qx in np.arange(len(hetero_inds) + 1)] + labels += ["$1$"] + ["$x_{{{}}}$".format(qx) for qx in hetero_inds] + for q in np.arange(1, n_treatments): + plt.axvline(x=q * (len(hetero_inds) + 1) - .5, + linestyle='--', color='red', alpha=.2) + locs += [q * (len(hetero_inds) + 1) - .501 + (len(hetero_inds) + 1) / 2] + labels += ["\n\n$\\tau_{{{}}}$".format(q)] + locs += [(q * (len(hetero_inds) + 1) + qx) + for qx in np.arange(len(hetero_inds) + 1)] + labels += ["$1$"] + ["$x_{{{}}}$".format(qx) for qx in hetero_inds] + locs += [- .501 + (len(hetero_inds) + 1) * n_treatments / 2] + labels += ["\n\n\n\n$\\theta_{{{}}}$".format(0)] + for t in np.arange(1, n_periods): + plt.axvline(x=t * (len(hetero_inds) + 1) * + n_treatments - .5, linestyle='-', alpha=.6) + locs += [t * (len(hetero_inds) + 1) * n_treatments - .501 + + (len(hetero_inds) + 1) * n_treatments / 2] + labels += ["\n\n\n\n$\\theta_{{{}}}$".format(t)] + locs += [t * (len(hetero_inds) + 1) * + n_treatments - .501 + (len(hetero_inds) + 1) / 2] + labels += ["\n\n$\\tau_{{{}}}$".format(0)] + locs += [t * (len(hetero_inds) + 1) * n_treatments + + qx for qx in np.arange(len(hetero_inds) + 1)] + labels += ["$1$"] + ["$x_{{{}}}$".format(qx) for qx in hetero_inds] + for q in np.arange(1, n_treatments): + plt.axvline(x=t * (len(hetero_inds) + 1) * n_treatments + q * (len(hetero_inds) + 1) - .5, + linestyle='--', color='red', alpha=.2) + locs += [t * (len(hetero_inds) + 1) * n_treatments + q * + (len(hetero_inds) + 1) - .501 + (len(hetero_inds) + 1) / 2] + labels += ["\n\n$\\tau_{{{}}}$".format(q)] + locs += [t * (len(hetero_inds) + 1) * n_treatments + (q * (len(hetero_inds) + 1) + qx) + for qx in np.arange(len(hetero_inds) + 1)] + labels += ["$1$"] + ["$x_{{{}}}$".format(qx) for qx in hetero_inds] + plt.xticks(locs, labels) + plt.tight_layout() diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py index 380c4a777..be2437a7c 100644 --- a/econml/tests/test_dynamic_dml.py +++ b/econml/tests/test_dynamic_dml.py @@ -8,8 +8,8 @@ from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, PolynomialFeatures from sklearn.linear_model import (LinearRegression, LassoCV, Lasso, MultiTaskLasso, MultiTaskLassoCV, LogisticRegression) -from econml.dml import DynamicDML -from econml.dml.dynamic_dml import _get_groups_period_filter +from econml.dynamic.dml import DynamicDML +from econml.dynamic.dml._dml import _get_groups_period_filter from econml.inference import BootstrapInference, EmpiricalInferenceResults, NormalInferenceResults from econml.utilities import shape, hstack, vstack, reshape, cross_product import econml.tests.utilities # bugfix for assertWarns diff --git a/notebooks/Dynamic Double Machine Learning Examples.ipynb b/notebooks/Dynamic Double Machine Learning Examples.ipynb old mode 100644 new mode 100755 index ecfd78ee1..b5d48e3a2 --- a/notebooks/Dynamic Double Machine Learning Examples.ipynb +++ b/notebooks/Dynamic Double Machine Learning Examples.ipynb @@ -74,7 +74,8 @@ "metadata": {}, "outputs": [], "source": [ - "import econml" + "%load_ext autoreload\n", + "%autoreload 2" ] }, { @@ -82,10 +83,19 @@ "execution_count": 2, "metadata": {}, "outputs": [], + "source": [ + "import econml" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], "source": [ "# Main imports\n", - "from econml.dml import DynamicDML\n", - "from econml.tests.dgp import DynamicPanelDGP\n", + "from econml.dynamic.dml import DynamicDML\n", + "from econml.tests.dgp import DynamicPanelDGP, add_vlines\n", "\n", "# Helper imports\n", "import numpy as np\n", @@ -122,15 +132,15 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Define DGP parameters\n", "np.random.seed(123)\n", - "n_panels = 1000 # number of panels\n", + "n_panels = 5000 # number of panels\n", "n_periods = 3 # number of time periods in each panel\n", - "n_treatments = 1 # number of treatments in each period\n", + "n_treatments = 2 # number of treatments in each period\n", "n_x = 100 # number of features + controls\n", "s_x = 10 # number of controls (endogeneous variables)\n", "s_t = 10 # treatment support size" @@ -138,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -158,55 +168,55 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "est = DynamicDML(\n", - " model_y=LassoCV(cv=3), \n", - " model_t=MultiTaskLassoCV(cv=3), \n", + " model_y=LassoCV(cv=3, max_iter=1000), \n", + " model_t=MultiTaskLassoCV(cv=3, max_iter=1000), \n", " cv=3)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Objective did not converge. You might want to increase the number of iterations. Duality gap: 2.0609523330807065, tolerance: 0.3973271492025328\n" - ] - }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "est.fit(Y, T, X=None, W=W, groups=groups, inference=\"auto\")" + "est.fit(Y, T, X=None, W=W, groups=groups)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Average effect of default policy: 2.35\n" + "Average effect of default policy: 1.40\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "A scalar was specified but there are multiple treatments; the same value will be used for each treatment. Consider specifyingall treatments, or using the const_marginal_effect method.\n" ] } ], @@ -217,50 +227,50 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Effect of target policy over baseline policy: 4.79\n" + "Effect of target policy over baseline policy: 1.40\n" ] } ], "source": [ "# Effect of target policy over baseline policy\n", "# Must specify a treatment for each period\n", - "baseline_policy = np.zeros((1, n_periods))\n", - "target_policy = np.array([[1, 2, 3]])\n", + "baseline_policy = np.zeros((1, n_periods * n_treatments))\n", + "target_policy = np.ones((1, n_periods * n_treatments))\n", "eff = est.effect(T0=baseline_policy, T1=target_policy)\n", "print(f\"Effect of target policy over baseline policy: {eff[0]:0.2f}\")" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Effect of a treatment in period 1 on period 3 outcome: 0.61\n", - "Effect of a treatment in period 2 on period 3 outcome: 1.05\n", - "Effect of a treatment in period 3 on period 3 outcome: 0.69\n" + "Marginal effect of a treatments in period 1 on period 3 outcome: [0.04000235 0.0701606 ]\n", + "Marginal effect of a treatments in period 2 on period 3 outcome: [0.31611764 0.23714736]\n", + "Marginal effect of a treatments in period 3 on period 3 outcome: [0.13108411 0.60656886]\n" ] } ], "source": [ "# Period treatment effects + interpretation\n", - "for i, theta in enumerate(est.intercept_):\n", - " print(f\"Effect of a treatment in period {i+1} on period {n_periods} outcome: {theta:0.2f}\")" + "for i, theta in enumerate(est.intercept_.reshape(-1, n_treatments)):\n", + " print(f\"Marginal effect of a treatments in period {i+1} on period {n_periods} outcome: {theta}\")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -276,30 +286,42 @@ "\n", "\n", "\n", - " \n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "
CATE Intercept Results
point_estimate stderr zstat pvalue ci_lower ci_upper point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|(T0)$_0$ 0.04 0.041 0.977 0.328 -0.027 0.107
cate_intercept|(T1)$_0$ 0.07 0.04 1.74 0.082 0.004 0.136
cate_intercept|(T0)$_1$ 0.316 0.036 8.848 0.0 0.257 0.375
cate_intercept|(T0)$_0$ 0.61 0.09 6.74 0.0 0.461 0.758cate_intercept|(T1)$_1$ 0.237 0.036 6.608 0.0 0.178 0.296
cate_intercept|(T0)$_1$ 1.05 0.067 15.565 0.0 0.939 1.161cate_intercept|(T0)$_2$ 0.131 0.003 45.665 0.0 0.126 0.136
cate_intercept|(T0)$_2$ 0.692 0.031 22.308 0.0 0.641 0.743cate_intercept|(T1)$_2$ 0.607 0.003 210.244 0.0 0.602 0.611


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], "text/plain": [ "\n", "\"\"\"\n", - " CATE Intercept Results \n", - "=============================================================================\n", - " point_estimate stderr zstat pvalue ci_lower ci_upper\n", - "-----------------------------------------------------------------------------\n", - "cate_intercept|(T0)$_0$ 0.61 0.09 6.74 0.0 0.461 0.758\n", - "cate_intercept|(T0)$_1$ 1.05 0.067 15.565 0.0 0.939 1.161\n", - "cate_intercept|(T0)$_2$ 0.692 0.031 22.308 0.0 0.641 0.743\n", - "-----------------------------------------------------------------------------\n", + " CATE Intercept Results \n", + "==============================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "------------------------------------------------------------------------------\n", + "cate_intercept|(T0)$_0$ 0.04 0.041 0.977 0.328 -0.027 0.107\n", + "cate_intercept|(T1)$_0$ 0.07 0.04 1.74 0.082 0.004 0.136\n", + "cate_intercept|(T0)$_1$ 0.316 0.036 8.848 0.0 0.257 0.375\n", + "cate_intercept|(T1)$_1$ 0.237 0.036 6.608 0.0 0.178 0.296\n", + "cate_intercept|(T0)$_2$ 0.131 0.003 45.665 0.0 0.126 0.136\n", + "cate_intercept|(T1)$_2$ 0.607 0.003 210.244 0.0 0.602 0.611\n", + "------------------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", "$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$\n", @@ -309,7 +331,7 @@ "\"\"\"" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -321,7 +343,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -337,12 +359,12 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -373,7 +395,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 2. Example Usage with Heterogeneous Treatment Effects" + "# 2. Example Usage with Heterogeneous Treatment Effects on Time-Invariant Unit Characteristics\n", + "\n", + "We can also estimate treatment effect heterogeneity with respect to the value of some subset of features $X$ in the initial period. Heterogeneity is currently only supported with respect to such initial state features. This for instance can support heterogeneity with respect to time-invariant unit characteristics. In that case you can simply pass as $X$ a repetition of some unit features that stay constant in all periods. You can also pass time-varying features, and their time varying component will be used as a time-varying control. However, heterogeneity will only be estimated with respect to the initial state." ] }, { @@ -385,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -396,7 +420,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -417,7 +441,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -429,7 +453,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": { "scrolled": true }, @@ -437,10 +461,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 16, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -451,7 +475,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -460,52 +484,100 @@ "\n", "\n", "\n", - " \n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "
Coefficient Results
point_estimate stderr zstat pvalue ci_lower ci_upper point_estimate stderr zstat pvalue ci_lower ci_upper
X0|(T0)$_0$ 0.009 0.045 0.203 0.839 -0.065 0.083
X0|(T1)$_0$ 0.017 0.042 0.416 0.677 -0.051 0.086
X0|(T0)$_1$ -0.001 0.041 -0.035 0.972 -0.069 0.067
X0|(T1)$_1$ -0.031 0.041 -0.76 0.447 -0.099 0.036
X0|(T0)$_2$ -0.306 0.008 -36.667 0.0 -0.32 -0.292
X0|(T1)$_2$ 0.158 0.008 19.656 0.0 0.145 0.171
X1|(T0)$_0$ 0.017 0.044 0.378 0.706 -0.056 0.09
X1|(T1)$_0$ -0.007 0.045 -0.164 0.87 -0.082 0.067
X1|(T0)$_1$ -0.034 0.042 -0.821 0.412 -0.103 0.034
X0|(T0)$_0$ 0.112 0.119 0.947 0.344 -0.083 0.308X1|(T1)$_1$ -0.025 0.042 -0.6 0.549 -0.095 0.044
X0|(T0)$_1$ 0.092 0.128 0.719 0.472 -0.119 0.303X1|(T0)$_2$ -0.302 0.008 -35.72 0.0 -0.316 -0.288
X0|(T0)$_2$ 0.443 0.076 5.85 0.0 0.318 0.567X1|(T1)$_2$ 0.156 0.008 18.801 0.0 0.142 0.169
\n", "\n", "\n", "\n", - " \n", + " \n", + "\n", + "\n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", "\n", "
CATE Intercept Results
point_estimate stderr zstat pvalue ci_lower ci_upper point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|(T0)$_0$ 0.024 0.036 0.653 0.513 -0.036 0.084
cate_intercept|(T0)$_0$ -0.082 0.061 -1.347 0.178 -0.182 0.018cate_intercept|(T1)$_0$ -0.033 0.036 -0.929 0.353 -0.092 0.025
cate_intercept|(T0)$_1$ -0.035 0.059 -0.591 0.554 -0.133 0.063cate_intercept|(T0)$_1$ -0.105 0.034 -3.067 0.002 -0.162 -0.049
cate_intercept|(T0)$_2$ 0.592 0.035 17.006 0.0 0.535 0.65cate_intercept|(T1)$_1$ 0.037 0.034 1.079 0.281 -0.019 0.093
cate_intercept|(T0)$_2$ -0.743 0.005 -140.503 0.0 -0.752 -0.734
cate_intercept|(T1)$_2$ 0.48 0.005 91.061 0.0 0.472 0.489


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], "text/plain": [ "\n", "\"\"\"\n", - " Coefficient Results \n", - "================================================================\n", - " point_estimate stderr zstat pvalue ci_lower ci_upper\n", - "----------------------------------------------------------------\n", - "X0|(T0)$_0$ 0.112 0.119 0.947 0.344 -0.083 0.308\n", - "X0|(T0)$_1$ 0.092 0.128 0.719 0.472 -0.119 0.303\n", - "X0|(T0)$_2$ 0.443 0.076 5.85 0.0 0.318 0.567\n", - " CATE Intercept Results \n", - "=============================================================================\n", - " point_estimate stderr zstat pvalue ci_lower ci_upper\n", - "-----------------------------------------------------------------------------\n", - "cate_intercept|(T0)$_0$ -0.082 0.061 -1.347 0.178 -0.182 0.018\n", - "cate_intercept|(T0)$_1$ -0.035 0.059 -0.591 0.554 -0.133 0.063\n", - "cate_intercept|(T0)$_2$ 0.592 0.035 17.006 0.0 0.535 0.65\n", - "-----------------------------------------------------------------------------\n", + " Coefficient Results \n", + "==================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "------------------------------------------------------------------\n", + "X0|(T0)$_0$ 0.009 0.045 0.203 0.839 -0.065 0.083\n", + "X0|(T1)$_0$ 0.017 0.042 0.416 0.677 -0.051 0.086\n", + "X0|(T0)$_1$ -0.001 0.041 -0.035 0.972 -0.069 0.067\n", + "X0|(T1)$_1$ -0.031 0.041 -0.76 0.447 -0.099 0.036\n", + "X0|(T0)$_2$ -0.306 0.008 -36.667 0.0 -0.32 -0.292\n", + "X0|(T1)$_2$ 0.158 0.008 19.656 0.0 0.145 0.171\n", + "X1|(T0)$_0$ 0.017 0.044 0.378 0.706 -0.056 0.09\n", + "X1|(T1)$_0$ -0.007 0.045 -0.164 0.87 -0.082 0.067\n", + "X1|(T0)$_1$ -0.034 0.042 -0.821 0.412 -0.103 0.034\n", + "X1|(T1)$_1$ -0.025 0.042 -0.6 0.549 -0.095 0.044\n", + "X1|(T0)$_2$ -0.302 0.008 -35.72 0.0 -0.316 -0.288\n", + "X1|(T1)$_2$ 0.156 0.008 18.801 0.0 0.142 0.169\n", + " CATE Intercept Results \n", + "===============================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "-------------------------------------------------------------------------------\n", + "cate_intercept|(T0)$_0$ 0.024 0.036 0.653 0.513 -0.036 0.084\n", + "cate_intercept|(T1)$_0$ -0.033 0.036 -0.929 0.353 -0.092 0.025\n", + "cate_intercept|(T0)$_1$ -0.105 0.034 -3.067 0.002 -0.162 -0.049\n", + "cate_intercept|(T1)$_1$ 0.037 0.034 1.079 0.281 -0.019 0.093\n", + "cate_intercept|(T0)$_2$ -0.743 0.005 -140.503 0.0 -0.752 -0.734\n", + "cate_intercept|(T1)$_2$ 0.48 0.005 91.061 0.0 0.472 0.489\n", + "-------------------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", "$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$\n", @@ -515,7 +587,7 @@ "\"\"\"" ] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -526,14 +598,22 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Average effect of default policy:0.42\n" + "Average effect of default policy:-0.42\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "A scalar was specified but there are multiple treatments; the same value will be used for each treatment. Consider specifyingall treatments, or using the const_marginal_effect method.\n", + "A scalar was specified but there are multiple treatments; the same value will be used for each treatment. Consider specifyingall treatments, or using the const_marginal_effect method.\n" ] } ], @@ -545,7 +625,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -553,47 +633,54 @@ "output_type": "stream", "text": [ "Effect of target policy over baseline policy for test set:\n", - " [ 2.1924814 1.69559976 -0.24732358 0.40266764 2.33464274 0.81404248\n", - " 2.25540586 1.52509443 2.44106892]\n" + " [-0.37368525 -0.30896804 -0.43030363 -0.52252401 -0.42849622 -0.48790877\n", + " -0.34417987 -0.51804937 -0.36806744]\n" ] } ], "source": [ "# Effect of target policy over baseline policy\n", "# Must specify a treatment for each period\n", - "baseline_policy = np.zeros((1, n_periods))\n", - "target_policy = np.array([[1, 2, 3]])\n", + "baseline_policy = np.zeros((1, n_periods * n_treatments))\n", + "target_policy = np.ones((1, n_periods * n_treatments))\n", "eff = est.effect(X=X_test, T0=baseline_policy, T1=target_policy)\n", "print(\"Effect of target policy over baseline policy for test set:\\n\", eff)" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(array([-0.08178444, -0.03511038, 0.59245168]),\n", - " array([[0.11235101],\n", - " [0.09227667],\n", - " [0.44267996]]))" + "(array([ 0.02374269, -0.03302781, -0.10526464, 0.03675719, -0.74294675,\n", + " 0.48025068]),\n", + " array([[ 0.00914226, 0.01675409],\n", + " [ 0.01732804, -0.00741467],\n", + " [-0.00143705, -0.03431712],\n", + " [-0.03136295, -0.02536834],\n", + " [-0.30581311, -0.30189654],\n", + " [ 0.15773252, 0.15564665]]))" ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Coefficients\n", + "# Coefficients: intercept is of shape n_treatments*n_periods\n", + "# coef_ is of shape (n_treatments*n_periods, n_hetero_inds).\n", + "# first n_treatment rows are from first period, next n_treatment\n", + "# from second period, etc.\n", "est.intercept_, est.coef_" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -611,12 +698,39 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# parse true parameters in array of shape (n_treatments*n_periods, 1 + n_hetero_inds)\n", + "# first column is the intercept\n", + "true_effect_inds = []\n", + "for t in range(n_treatments):\n", + " true_effect_inds += [t * (1 + n_x)] + (list(t * (1 + n_x) + 1 + het_inds) if len(het_inds)>0 else [])\n", + "true_effect_params = dgp.true_hetero_effect[:, true_effect_inds]\n", + "true_effect_params = true_effect_params.reshape((n_treatments*n_periods, 1 + het_inds.shape[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "# concatenating intercept and coef_\n", + "param_hat = np.hstack([est.intercept_.reshape(-1, 1), est.coef_])\n", + "lower = np.hstack([conf_ints_intercept[0].reshape(-1, 1), conf_ints_coef[0]])\n", + "upper = np.hstack([conf_ints_intercept[1].reshape(-1, 1), conf_ints_coef[1]])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -628,32 +742,13 @@ } ], "source": [ - "# Some plotting boilerplate code\n", "plt.figure(figsize=(15, 5))\n", - "# Intercepts\n", - "plt.errorbar((het_inds.shape[0]+1)*np.arange(n_periods*n_treatments), \n", - " est.intercept_, \n", - " yerr=(conf_ints_intercept[1] - est.intercept_, est.intercept_ - conf_ints_intercept[0]), \n", - " fmt='o', label='DynamicDML')\n", - "plt.errorbar((het_inds.shape[0]+1)*np.arange(n_periods*n_treatments), ate_effect.flatten(), \n", - " fmt='o', label='Ground truth')\n", - "# Heterogeneous effects\n", - "plt.errorbar((het_inds.shape[0]+1)*np.arange(n_periods*n_treatments)+1, est.coef_, \n", - " yerr=((conf_ints_coef[1] - est.coef_).flatten(), \n", - " (est.coef_ - conf_ints_coef[0]).flatten()), \n", - " fmt='o', color='C0')\n", - "plt.errorbar((het_inds.shape[0]+1)*np.arange(n_periods*n_treatments)+1, het_effect, \n", - " fmt='o', color='C1')\n", - "for t in np.arange(2, (het_inds.shape[0]+1)*n_periods, 2):\n", - " plt.axvline(x=t * n_treatments - .5, linestyle='--', alpha=.4)\n", - "# Labels\n", - "x_range = np.arange(n_periods*n_treatments*(het_inds.shape[0]+1))\n", - "x_ticks = list(x_range)\n", - "x_labels = [1 if i%(n_treatments+1)==0 else f\"$X_\\u007b{het_inds[i%(n_treatments+1)-1]}\\u007d$\" for i in x_range]\n", - "x_ticks += [het_inds.shape[0]/2 + i*(n_treatments*(het_inds.shape[0]+1)) for i in range(n_periods)]\n", - "x_labels += [f\"\\n\\n\\n$\\\\theta_{i}$\" for i in range(n_periods)]\n", - "plt.xticks(x_ticks, x_labels)\n", - "plt.ylabel(\"Effect\")\n", + "plt.errorbar(np.arange(n_periods * (len(het_inds) + 1) * n_treatments),\n", + " true_effect_params.flatten(), fmt='*', label='Ground Truth')\n", + "plt.errorbar(np.arange(n_periods * (len(het_inds) + 1) * n_treatments),\n", + " param_hat.flatten(), yerr=((upper - param_hat).flatten(),\n", + " (param_hat - lower).flatten()), fmt='o', label='DynamicDML')\n", + "add_vlines(n_periods, n_treatments, het_inds)\n", "plt.legend()\n", "plt.show()" ] @@ -675,7 +770,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.8.5" } }, "nbformat": 4, From 99e62e7b1e367279c24b32387f696bbdb6c8f2ff Mon Sep 17 00:00:00 2001 From: Vasilis Syrgkanis Date: Fri, 6 Aug 2021 12:51:49 -0400 Subject: [PATCH 19/27] fixed ref in doc --- doc/spec/estimation/dynamic_dml.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/spec/estimation/dynamic_dml.rst b/doc/spec/estimation/dynamic_dml.rst index b11dad886..db6487c4f 100644 --- a/doc/spec/estimation/dynamic_dml.rst +++ b/doc/spec/estimation/dynamic_dml.rst @@ -57,7 +57,7 @@ Class Hierarchy Structure In this library we implement variants of several of the approaches mentioned in the last section. The hierarchy structure of the implemented CATE estimators is as follows. - .. inheritance-diagram:: econml.dml.DynamicDML + .. inheritance-diagram:: econml.dynamic.dml.DynamicDML :parts: 1 :private-bases: :top-classes: econml._OrthoLearner, econml._cate_estimator.LinearModelFinalCateEstimatorMixin From 1cf663ac222a5ddb7895233e0064eee9c4f0b9fe Mon Sep 17 00:00:00 2001 From: Vasilis Syrgkanis Date: Fri, 6 Aug 2021 14:00:31 -0400 Subject: [PATCH 20/27] doc bug --- doc/spec/estimation/dynamic_dml.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/spec/estimation/dynamic_dml.rst b/doc/spec/estimation/dynamic_dml.rst index db6487c4f..0003c76f6 100644 --- a/doc/spec/estimation/dynamic_dml.rst +++ b/doc/spec/estimation/dynamic_dml.rst @@ -72,10 +72,11 @@ Below we give a brief description of each of these classes: .. math:: - X_t =~& A \cdot T_{t-1} + B \cdot X_{t-1} + \eta_t\\ - T_t =~& p(T_{t-1}, X_t, \zeta_t) \\ - Y_t =~& \theta_0'T_t + \mu'X_t \epsilon_t + XW_t =~& A \cdot T_{t-1} + B \cdot XW_{t-1} + \eta_t\\ + T_t =~& p(T_{t-1}, XW_t, \zeta_t) \\ + Y_t =~& \theta_0(X_0)'T_t + \mu'XW_t + \epsilon_t + where :math:`XW` is the concatenation of the :math:`X` and :math:`W` variables. For more details about this model and underlying assumptions, see [Lewis2021]_. To learn the treatment effects of treatments in the different periods on the last period outcome, one can simply call: @@ -91,4 +92,4 @@ Below we give a brief description of each of these classes: Usage FAQs ========== -See our FAQ section in `_dmluserguide`_ +See our FAQ section in :ref:`DML User Guide ` From 669c284ac2c60cf1ec2bfc1ed7c372e38faca114 Mon Sep 17 00:00:00 2001 From: Vasilis Syrgkanis Date: Fri, 6 Aug 2021 14:05:12 -0400 Subject: [PATCH 21/27] relaxed dynamci dml tests --- econml/tests/test_dynamic_dml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py index be2437a7c..7539c18f9 100644 --- a/econml/tests/test_dynamic_dml.py +++ b/econml/tests/test_dynamic_dml.py @@ -294,5 +294,5 @@ def lasso_model(): np.testing.assert_allclose(est.coef_, dgp.true_hetero_effect[:, hetero_inds + 1], atol=0.2) np.testing.assert_array_less(est.intercept__interval()[0], dgp.true_effect.flatten()) np.testing.assert_array_less(dgp.true_effect.flatten(), est.intercept__interval()[1]) - np.testing.assert_array_less(est.coef__interval()[0], dgp.true_hetero_effect[:, hetero_inds + 1]) - np.testing.assert_array_less(dgp.true_hetero_effect[:, hetero_inds + 1], est.coef__interval()[1]) + np.testing.assert_array_less(est.coef__interval()[0] - .05, dgp.true_hetero_effect[:, hetero_inds + 1]) + np.testing.assert_array_less(dgp.true_hetero_effect[:, hetero_inds + 1] - .05, est.coef__interval()[1]) From 0420656ba52c451054177f928c8668704d3a8e62 Mon Sep 17 00:00:00 2001 From: Vasilis Syrgkanis Date: Fri, 6 Aug 2021 15:55:09 -0400 Subject: [PATCH 22/27] fixed doctest --- doc/spec/estimation/dynamic_dml.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/spec/estimation/dynamic_dml.rst b/doc/spec/estimation/dynamic_dml.rst index 0003c76f6..b92f319d8 100644 --- a/doc/spec/estimation/dynamic_dml.rst +++ b/doc/spec/estimation/dynamic_dml.rst @@ -46,7 +46,7 @@ characteristics :math:`X` of the treated samples, then one can use this method. .. testcode:: - from econml.dml import DynamicDML + from econml.dynamic.dml import DynamicDML est = DynamicDML() est.fit(y_dyn, T_dyn, X=X_dyn, W=W_dyn, groups=groups) @@ -83,7 +83,7 @@ Below we give a brief description of each of these classes: .. testcode:: - from econml.dml import DynamicDML + from econml.dynamic.dml import DynamicDML est = DynamicDML() est.fit(y_dyn, T_dyn, X=X_dyn, W=W_dyn, groups=groups) From 42c65dd89af0234c9afb774a47a5417e335aa509 Mon Sep 17 00:00:00 2001 From: Maggie Hei Date: Sun, 8 Aug 2021 01:02:33 -0400 Subject: [PATCH 23/27] add ROI notebook --- econml/data/dynamic_panel_dgp.py | 457 +++++++++++++ econml/data/input_dynamicdgp/cov_new.jbl | Bin 0 -> 63587 bytes econml/data/input_dynamicdgp/gm_0.jbl | Bin 0 -> 1067 bytes econml/data/input_dynamicdgp/gm_1.jbl | Bin 0 -> 1067 bytes econml/data/input_dynamicdgp/gm_2.jbl | Bin 0 -> 1067 bytes econml/data/input_dynamicdgp/gm_3.jbl | Bin 0 -> 1067 bytes econml/data/input_dynamicdgp/gm_4.jbl | Bin 0 -> 1067 bytes econml/data/input_dynamicdgp/gm_5.jbl | Bin 0 -> 1067 bytes econml/data/input_dynamicdgp/gm_6.jbl | Bin 0 -> 1067 bytes .../data/input_dynamicdgp/lognorm_neg_0.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_neg_1.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_neg_2.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_neg_3.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_neg_4.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_neg_5.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_pos_0.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_pos_1.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_pos_2.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_pos_3.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_pos_4.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_pos_5.jbl | Bin 0 -> 164 bytes .../data/input_dynamicdgp/lognorm_pos_6.jbl | Bin 0 -> 164 bytes econml/data/input_dynamicdgp/n_0.jbl | Bin 0 -> 139 bytes econml/data/input_dynamicdgp/n_1.jbl | Bin 0 -> 139 bytes econml/data/input_dynamicdgp/n_2.jbl | Bin 0 -> 139 bytes econml/data/input_dynamicdgp/n_3.jbl | Bin 0 -> 139 bytes econml/data/input_dynamicdgp/n_4.jbl | Bin 0 -> 139 bytes econml/data/input_dynamicdgp/n_5.jbl | Bin 0 -> 139 bytes econml/data/input_dynamicdgp/n_6.jbl | Bin 0 -> 139 bytes ... at Microsoft via Short-Term Proxies.ipynb | 606 ++++++++++++++++++ 30 files changed, 1063 insertions(+) create mode 100644 econml/data/dynamic_panel_dgp.py create mode 100644 econml/data/input_dynamicdgp/cov_new.jbl create mode 100644 econml/data/input_dynamicdgp/gm_0.jbl create mode 100644 econml/data/input_dynamicdgp/gm_1.jbl create mode 100644 econml/data/input_dynamicdgp/gm_2.jbl create mode 100644 econml/data/input_dynamicdgp/gm_3.jbl create mode 100644 econml/data/input_dynamicdgp/gm_4.jbl create mode 100644 econml/data/input_dynamicdgp/gm_5.jbl create mode 100644 econml/data/input_dynamicdgp/gm_6.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_neg_0.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_neg_1.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_neg_2.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_neg_3.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_neg_4.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_neg_5.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_pos_0.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_pos_1.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_pos_2.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_pos_3.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_pos_4.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_pos_5.jbl create mode 100644 econml/data/input_dynamicdgp/lognorm_pos_6.jbl create mode 100644 econml/data/input_dynamicdgp/n_0.jbl create mode 100644 econml/data/input_dynamicdgp/n_1.jbl create mode 100644 econml/data/input_dynamicdgp/n_2.jbl create mode 100644 econml/data/input_dynamicdgp/n_3.jbl create mode 100644 econml/data/input_dynamicdgp/n_4.jbl create mode 100644 econml/data/input_dynamicdgp/n_5.jbl create mode 100644 econml/data/input_dynamicdgp/n_6.jbl create mode 100644 notebooks/CustomerScenarios/Case Study - Long-Term Return-on-Investment at Microsoft via Short-Term Proxies.ipynb diff --git a/econml/data/dynamic_panel_dgp.py b/econml/data/dynamic_panel_dgp.py new file mode 100644 index 000000000..07c280083 --- /dev/null +++ b/econml/data/dynamic_panel_dgp.py @@ -0,0 +1,457 @@ +import numpy as np +from econml.utilities import cross_product +from statsmodels.tools.tools import add_constant +import pandas as pd +import scipy as sp +from scipy.stats import expon +from sklearn.linear_model import LinearRegression +import matplotlib.pyplot as plt +import joblib +import os + + +dir = os.path.dirname(__file__) + +# covariance matrix + + +def new_cov_matrix(cov): + p = cov.shape[0] + # get eigen value and eigen vectors + e_val, e_vec = sp.linalg.eigh(cov) + start = [0, 35, 77, 86] + end = [35, 77, 86, p] + e_val_new = np.array([]) + for i, j in zip(start, end): + e_val_new = np.append(e_val_new, linear_approximation(i, j, e_val)) + # simulate eigen vectors + e_vec_new = np.zeros_like(e_vec) + for i in range(p): + w = np.zeros(p) # , np.random.normal(0.01, 0.01, size=p) + w[np.random.choice(p, 6)] += np.random.normal(0.01, 0.06, size=(6)) + e_vec_new[:, i] = w / np.linalg.norm(w) + # keep the top 4 eigen value and corresponding eigen vector + e_vec_new[:, -4:] = e_vec[:, -4:] + e_val_new[-4:] = e_val[-4:] + # replace the negative eigen values + e_val_new[np.where(e_val_new < 0)] = e_val[np.where(e_val_new < 0)] + # generate a new covariance matrix + cov_new = e_vec_new.dot(np.diag(e_val_new)).dot(e_vec_new.T) + return cov_new + +# get linear approximation of eigen values + + +def linear_approximation(start, end, e_val): + est = LinearRegression() + X = np.arange(start, end).reshape(-1, 1) + est.fit(X, e_val[start:end]) + pred = est.predict(X) + return pred + + +# coefs +def generate_coefs(index, columns): + simulated_coefs_df = pd.DataFrame(0, index=index, columns=columns) + # get the indices of each group of features + ind_demo = [columns.index(col) for col in columns if "demo" in col] + ind_proxy = [columns.index(col) for col in columns if "proxy" in col] + ind_investment = [columns.index(col) + for col in columns if "investment" in col] + + for i in range(7): + outcome_name = simulated_coefs_df.index[i] + if "proxy" in outcome_name: + ind_same_proxy = [ + ind for ind in ind_proxy if outcome_name in columns[ind]] + # print(ind_same_proxy) + random_proxy_name = np.random.choice( + [proxy for proxy in index[:4] if proxy != outcome_name] + ) + ind_random_other_proxy = [ + ind for ind in ind_proxy if random_proxy_name in columns[ind] + ] + # demo + simulated_coefs_df.iloc[ + i, np.random.choice(ind_demo, 2) + ] = np.random.uniform(0.004, 0.05) + # same proxy + simulated_coefs_df.iloc[i, ind_same_proxy] = sorted( + np.random.choice(expon.pdf(np.arange(10)) * + 5e-1, 6, replace=False) + ) + simulated_coefs_df.iloc[i, ind_random_other_proxy] = sorted( + np.random.choice(expon.pdf(np.arange(10)) * + 5e-2, 6, replace=False) + ) + elif "investment" in outcome_name: + ind_same_invest = [ + ind for ind in ind_investment if outcome_name in columns[ind] + ] + random_proxy_name = np.random.choice(index[:4]) + ind_random_other_proxy = [ + ind for ind in ind_proxy if random_proxy_name in columns[ind] + ] + simulated_coefs_df.iloc[ + i, np.random.choice(ind_demo, 2) + ] = np.random.uniform(0.001, 0.05) + simulated_coefs_df.iloc[i, ind_same_invest] = sorted( + np.random.choice(expon.pdf(np.arange(10)) * + 5e-1, 6, replace=False) + ) + simulated_coefs_df.iloc[i, ind_random_other_proxy] = sorted( + np.random.choice(expon.pdf(np.arange(10)) * + 1e-1, 6, replace=False) + ) + return simulated_coefs_df + + +# residuals + + +def simulate_residuals(ind): + n, n_pos, n_neg = joblib.load(os.path.join(dir, f"input_dynamicdgp/n_{ind}.jbl")) + # gmm + est = joblib.load(os.path.join(dir, f"input_dynamicdgp/gm_{ind}.jbl")) + x_new = est.sample(n - n_pos - n_neg)[0].flatten() + + # log normal on outliers + if n_pos > 0: + # positive outliers + s, loc, scale = joblib.load(os.path.join(dir, f"input_dynamicdgp/lognorm_pos_{ind}.jbl")) + fitted_pos_outliers = sp.stats.lognorm( + s, loc=loc, scale=scale).rvs(size=n_pos) + else: + fitted_pos_outliers = np.array([]) + # negative outliers + if n_neg > 0: + s, loc, scale = joblib.load(os.path.join(dir, f"input_dynamicdgp/lognorm_neg_{ind}.jbl")) + fitted_neg_outliers = - \ + sp.stats.lognorm(s, loc=loc, scale=scale).rvs(size=n_neg) + else: + fitted_neg_outliers = np.array([]) + x_new = np.concatenate((x_new, fitted_pos_outliers, fitted_neg_outliers)) + return x_new + + +def simulate_residuals_all(res_df): + res_df_new = res_df.copy(deep=True) + for i in range(res_df.shape[1]): + res_df_new.iloc[:, i] = simulate_residuals(i) + # demean the new residual again + res_df_new = res_df_new - res_df_new.mean(axis=0) + return res_df_new + +# generate data + + +def get_prediction(df, coef_matrix, residuals, thetas, n, intervention, columns, index, counterfactual): + data_matrix = df[columns].values + # sample residuals + sample_residuals = residuals + preds = np.matmul(data_matrix, coef_matrix.T) + + # get prediction for current investment + if counterfactual: + pred_inv = np.zeros(preds[:, 4:].shape) + else: + pred_inv = preds[:, 4:] + sample_residuals[:, 4:] + intervention + df[index[4:]] = pd.DataFrame(pred_inv, index=df.index) + + # get prediction for current proxy + pred_proxy = preds[:, :4] + sample_residuals[:, :4] + \ + np.matmul(pred_inv, thetas.T) + df[index[:4]] = pd.DataFrame(pred_proxy, index=df.index) + return df + + +def generate_dgp( + cov_matrix, + n_tpid, + t_period, + coef_matrix, + residual_matrix, + thetas, + intervention, + columns, + index, + counterfactual +): + df_all = pd.DataFrame() + # get first period prediction + m = cov_matrix.shape[0] + x = np.random.multivariate_normal(np.repeat(0, m), cov_matrix, size=n_tpid) + df = pd.DataFrame( + np.hstack( + (np.arange(n_tpid).reshape(-1, 1), + np.repeat(1, n_tpid).reshape(-1, 1), x) + ), + columns=["id", "datetime"] + columns, + ) + df = get_prediction(df, coef_matrix, residual_matrix[0], + thetas, n_tpid, intervention, columns, index, False) + df_all = pd.concat([df_all, df], axis=0) + + # iterate the step ahead contruction + for t in range(2, t_period + 1): + # prepare new x + new_df = df.copy(deep=True) + new_df["datetime"] = np.repeat(t, n_tpid) + for name in index: + for i in range(-6, -1): + new_df[f"{name}_{i}"] = df[f"{name}_{i+1}"] + new_df[f"{name}_-1"] = df[name] + df = get_prediction(new_df, coef_matrix, residual_matrix[t - 1], + thetas, n_tpid, [0, 0, 0], columns, index, counterfactual) + df_all = pd.concat([df_all, df]) + df_all = df_all.sort_values(["id", "datetime"]) + return df_all + + +class AbstracDynamicPanelDGP: + + def __init__(self, n_periods, n_treatments, n_x): + self.n_periods = n_periods + self.n_treatments = n_treatments + self.n_x = n_x + return + + def create_instance(self, *args, **kwargs): + pass + + def _gen_data_with_policy(self, n_units, policy_gen, random_seed=123): + pass + + def static_policy_data(self, n_units, tau, random_seed=123): + def policy_gen(Tpre, X, period): + return tau[period] + return self._gen_data_with_policy(n_units, policy_gen, random_seed=random_seed) + + def adaptive_policy_data(self, n_units, policy_gen, random_seed=123): + return self._gen_data_with_policy(n_units, policy_gen, random_seed=random_seed) + + def static_policy_effect(self, tau, mc_samples=1000): + Y_tau, _, _, _ = self.static_policy_data(mc_samples, tau) + Y_zero, _, _, _ = self.static_policy_data( + mc_samples, np.zeros((self.n_periods, self.n_treatments))) + return np.mean(Y_tau[np.arange(Y_tau.shape[0]) % self.n_periods == self.n_periods - 1]) - \ + np.mean(Y_zero[np.arange(Y_zero.shape[0]) % + self.n_periods == self.n_periods - 1]) + + def adaptive_policy_effect(self, policy_gen, mc_samples=1000): + Y_tau, _, _, _ = self.adaptive_policy_data(mc_samples, policy_gen) + Y_zero, _, _, _ = self.static_policy_data( + mc_samples, np.zeros((self.n_periods, self.n_treatments))) + return np.mean(Y_tau[np.arange(Y_tau.shape[0]) % self.n_periods == self.n_periods - 1]) - \ + np.mean(Y_zero[np.arange(Y_zero.shape[0]) % + self.n_periods == self.n_periods - 1]) + + +class DynamicPanelDGP(AbstracDynamicPanelDGP): + + def __init__(self, n_periods, n_treatments, n_x): + super().__init__(n_periods, n_treatments, n_x) + + def create_instance(self, s_x, sigma_x, sigma_y, conf_str, epsilon, Alpha_unnormalized, + hetero_strength=0, hetero_inds=None, + autoreg=.5, state_effect=.5, random_seed=123): + random_state = np.random.RandomState(random_seed) + self.s_x = s_x + self.conf_str = conf_str + self.sigma_x = sigma_x + self.sigma_y = sigma_y + self.hetero_inds = hetero_inds.astype( + int) if hetero_inds is not None else hetero_inds + self.hetero_strength = hetero_strength + self.autoreg = autoreg + self.state_effect = state_effect + self.random_seed = random_seed + self.endo_inds = np.setdiff1d( + np.arange(self.n_x), hetero_inds).astype(int) + # The first s_x state variables are confounders. The final s_x variables are exogenous and can create + # heterogeneity + self.Alpha = Alpha_unnormalized + self.Alpha /= np.linalg.norm(self.Alpha, axis=1, ord=1, keepdims=True) + self.Alpha *= state_effect + if self.hetero_inds is not None: + self.Alpha[self.hetero_inds] = 0 + + self.Beta = np.zeros((self.n_x, self.n_x)) + for t in range(self.n_x): + self.Beta[t, :] = autoreg * np.roll(random_state.uniform(low=4.0**(-np.arange( + 0, self.n_x)), high=4.0**(-np.arange(1, self.n_x + 1))), t) + if self.hetero_inds is not None: + self.Beta[np.ix_(self.endo_inds, self.hetero_inds)] = 0 + self.Beta[np.ix_(self.hetero_inds, self.endo_inds)] = 0 + + self.epsilon = epsilon + self.zeta = np.zeros(self.n_x) + self.zeta[:self.s_x] = self.conf_str / self.s_x + + self.y_hetero_effect = np.zeros(self.n_x) + self.x_hetero_effect = np.zeros(self.n_x) + if self.hetero_inds is not None: + self.y_hetero_effect[self.hetero_inds] = random_state.uniform(.5 * hetero_strength, + 1.5 * hetero_strength) /\ + len(self.hetero_inds) + self.x_hetero_effect[self.hetero_inds] = random_state.uniform(.5 * hetero_strength, + 1.5 * hetero_strength) / \ + len(self.hetero_inds) + + self.true_effect = np.zeros((self.n_periods, self.n_treatments)) + self.true_effect[0] = self.epsilon + for t in np.arange(1, self.n_periods): + self.true_effect[t, :] = (self.zeta.reshape( + 1, -1) @ np.linalg.matrix_power(self.Beta, t - 1) @ self.Alpha) + + self.true_hetero_effect = np.zeros( + (self.n_periods, (self.n_x + 1) * self.n_treatments)) + self.true_hetero_effect[0, :] = cross_product(add_constant(self.y_hetero_effect.reshape(1, -1), + has_constant='add'), + self.epsilon.reshape(1, -1)) + for t in np.arange(1, self.n_periods): + self.true_hetero_effect[t, :] = cross_product(add_constant(self.x_hetero_effect.reshape(1, -1), + has_constant='add'), + self.zeta.reshape(1, -1) @ + np.linalg.matrix_power(self.Beta, t - 1) @ self.Alpha) + + return self + + def hetero_effect_fn(self, t, x): + if t == 0: + return (np.dot(self.y_hetero_effect, x.flatten()) + 1) * self.epsilon + else: + return (np.dot(self.x_hetero_effect, x.flatten()) + 1) *\ + (self.zeta.reshape(1, -1) @ np.linalg.matrix_power(self.Beta, t - 1) + @ self.Alpha).flatten() + + def _gen_data_with_policy(self, n_units, policy_gen, random_seed=123): + random_state = np.random.RandomState(random_seed) + Y = np.zeros(n_units * self.n_periods) + T = np.zeros((n_units * self.n_periods, self.n_treatments)) + X = np.zeros((n_units * self.n_periods, self.n_x)) + groups = np.zeros(n_units * self.n_periods) + for t in range(n_units * self.n_periods): + period = t % self.n_periods + if period == 0: + X[t] = random_state.normal(0, self.sigma_x, size=self.n_x) + T[t] = policy_gen(np.zeros(self.n_treatments), X[t], period, random_state) + else: + X[t] = (np.dot(self.x_hetero_effect, X[t - 1]) + 1) * np.dot(self.Alpha, T[t - 1]) + \ + np.dot(self.Beta, X[t - 1]) + \ + random_state.normal(0, self.sigma_x, size=self.n_x) + T[t] = policy_gen(T[t - 1], X[t], period, random_state) + Y[t] = (np.dot(self.y_hetero_effect, X[t]) + 1) * np.dot(self.epsilon, T[t]) + \ + np.dot(X[t], self.zeta) + \ + random_state.normal(0, self.sigma_y) + groups[t] = t // self.n_periods + + return Y, T, X, groups + + def observational_data(self, n_units, gamma, s_t, sigma_t, random_seed=123): + """ Generated observational data with some observational treatment policy parameters + + Parameters + ---------- + n_units : how many units to observe + gamma : what is the degree of auto-correlation of the treatments across periods + s_t : sparsity of treatment policy; how many states does it depend on + sigma_t : what is the std of the exploration/randomness in the treatment + """ + Delta = np.zeros((self.n_treatments, self.n_x)) + Delta[:, :s_t] = self.conf_str / s_t + + def policy_gen(Tpre, X, period, random_state): + return gamma * Tpre + (1 - gamma) * np.dot(Delta, X) + \ + random_state.normal(0, sigma_t, size=self.n_treatments) + return self._gen_data_with_policy(n_units, policy_gen, random_seed=random_seed) + + +class SemiSynthetic: + + def create_instance(self): + # get new covariance matrix + self.cov_new = joblib.load(os.path.join(dir, f"input_dynamicdgp/cov_new.jbl")) + + # get coefs + self.index = ["proxy1", "proxy2", "proxy3", "proxy4", + "investment1", "investment2", "investment3", ] + self.columns = [f"{ind}_{i}" for ind in self.index for i in range(-6, 0)] +\ + [f"demo_{i}" for i in range(47)] + + self.coef_df = generate_coefs(self.index, self.columns) + self.n_proxies = 4 + self.n_treatments = 3 + + # get residuals + res_df = pd.DataFrame(columns=self.index) + self.new_res_df = simulate_residuals_all(res_df) + + def gen_data(self, n, n_periods, thetas, random_seed): + random_state = np.random.RandomState(random_seed) + n_proxies = self.n_proxies + n_treatments = self.n_treatments + coef_matrix = self.coef_df.values + residual_matrix = self.new_res_df.values + n_x = len(self.columns) + # proxy 1 is the outcome + outcome = "proxy1" + + # make fixed residuals + all_residuals = [] + for t in range(n_periods): + sample_residuals = [] + for i in range(7): + sample_residuals.append( + random_state.choice(residual_matrix[:, i], n)) + sample_residuals = np.array(sample_residuals).T + all_residuals.append(sample_residuals) + all_residuals = np.array(all_residuals) + + fn_df_control = generate_dgp(self.cov_new, n, n_periods, + coef_matrix, all_residuals, thetas, + [0, 0, 0], self.columns, self.index, False) + + fn_df_cf_control = generate_dgp(self.cov_new, n, n_periods, + coef_matrix, all_residuals, thetas, + [0, 0, 0], self.columns, self.index, True) + true_effect = np.zeros((n_periods, n_treatments)) + for i in range(n_treatments): + intervention = [0, 0, 0] + intervention[i] = 1 + fn_df_treated = generate_dgp(self.cov_new, n, n_periods, + coef_matrix, all_residuals, thetas, + intervention, self.columns, self.index, True) + for t in range(n_periods): + ate_control = fn_df_cf_control.loc[ + fn_df_control["datetime"] == t + 1, outcome + ].mean() + ate_treated = fn_df_treated.loc[ + fn_df_treated["datetime"] == t + 1, outcome + ].mean() + true_effect[t, i] = ate_treated - ate_control + + panelX = fn_df_control[self.columns].values.reshape(-1, n_periods, n_x) + panelT = fn_df_control[self.index[n_proxies:] + ].values.reshape(-1, n_periods, n_treatments) + panelY = fn_df_control[outcome].values.reshape(-1, n_periods) + panelGroups = fn_df_control["id"].values.reshape(-1, n_periods) + return panelX, panelT, panelY, panelGroups, true_effect + + def plot_coefs(self): + coef_df = self.coef_df + plt.figure(figsize=(20, 20)) + for i in range(7): + outcome = coef_df.index[i] + plt.subplot(2, 4, i + 1) + coef_list = coef_df.iloc[i] + coef_list = coef_list[coef_list != 0] + plt.plot(coef_list) + plt.xticks(rotation=90) + plt.title(f"outcome:{outcome}") + plt.show() + + def plot_cov(self): + plt.imshow(self.cov_new) + plt.colorbar() + plt.show() diff --git a/econml/data/input_dynamicdgp/cov_new.jbl b/econml/data/input_dynamicdgp/cov_new.jbl new file mode 100644 index 0000000000000000000000000000000000000000..8d0bb38d911ff888185cb3a988dc370b93442e65 GIT binary patch literal 63587 zcmX_|cQ}_}`^G7IghZ(*Bcy1d6kW(FrN~V7&K}u&uR^xS-g~dOt%&TInH^Fhqf+{P zfA9N!-u}qJeLo#Xhs))>&hv9Uo`?0GS?ZdZ=<-=SH@9-qvNF*#H8Y@)`~OeGY;1I# zRBUvttPE@@tO?G0SrfWhAL3ReBOoBKeXgr#rekYseOT{54^mj@>-_(93Tq-&;{QK4 z*0D0MCYDu`Rr9wd`OlXuZS)OntVvY~amRP8$^P>-eLE-IHFEsF@t@BqtSM9v;jS3o zu%?tHkR=STrc$t`=9WGDp9h7lkI2c%-TR08^Pj(D3GA$CbX6&E59pYgSvqK$o9kFv z(*{1*ecb%HY})M)d=~N}qc&dw84+`zpsy=%os+7>aBc})&W{)I-(3Zf?}vl`x~+f$ zTV%?-{2+Kq6)u$r^#h?6@qt;&0N9?82|t_D4VSr}Bwnt22Lhoa>1Brfz%-Ridn;rS zB!gTfir9uh`}&P(+1e36H0M6pi1ve*U)yb)o)Iv(ZNPtFcNkpch8+ZiHsImq*dU#> zHDG8@Ro~29gSeouKA`^$D%lquIE}u*r}V+xqt$Dm^>ooxOehBkDq}jU`znE_SGYZ_ zuob9JT|AMelmU;bVu*@+Q=sOrT@20HCRm6!xx-VKisDzr%iK=200XIy7)`7x`fNGs zZa|a>M&GC|T>j^bs5@G>m}iQC(f*h~wrT(jw?0h>yetnRM2x#t>aY$mXV;4E3|mW~e3#sxS;7Ex93Ry*cK4&_Yo6<4yYA?&{LbiyjuiAV)mdt? z+7lUu{f$#6$pmsWn0K2=MWu7mJL*0<;CbOpJOz0yLcEiRgEb9K{yWumF02(5`({q= zjM&0nJ(H0Ov^g)q@QsQO+dcbytZqR?mOig*+h>?L`QjxJ!4fb`K98qR`vyI~pT<$Ueg+92z0cDf zV_=>9wzoE67&7Nf7oM$;f!{C{hfZZDq)Akh9LX7musdmA-A)d}$2u9i$(boQYCElY zMfbyM*3h;S-oG1VH9$Qmf{3TM0?&oFzM|vl_e#Sv7A18rQSny^-5}bfy$&PM2j|>uv$c zu8^UzX*omm1Ca@KV^?n!q2^BV9SKw^R zhA<+fi9@Nr@MX}`ZeB7Eq!b5>PRsj2f#dA`EGH=#S5v%`vJwFbzgSv~81msoGLr>) zx(6JdrYu})&4Ddnu{oRVH8|69;_rjIv%uXiRiN2A3d+Zg{nVMqfKc%6&qQ8-IFYMz zmpHi{yb2dJS~%*VjG!yQv}Fnr4%Nw$iaVi2inHs=ljUF+W63{jn1&4B{n?u#H9-N) z0<{m5qQISp{s+bLB9MO-dZ}&C34HH*@$x9Tpe53R_wxhZ$mHKzi}YX$s{YBn7I@qT z)ex+z5FeyM;SQ6al42_2uTuYD?O_hOpYI9XFpEO2#|Gf_{%;JB6qa5|J9*MXV zYz?{Drur7r5$M(CrEU_FLS&xYoO>sy2)fg>N}q-2!BbNw`?0}Pl*+`ncxf&XC7E&= zm%hn?wh@Ym$JBY~m%{aQL)&!py;kUv@=_3TpAYz@&z%5E?zux-noVG!7!@U`REqcx zHQ6`Im!g)!0@kqSdH=g#`kNqN=vyfa$B5dUCFq1n_Hx)u>_B)e4Cq#Rv;dxswg%;!@V|xp;Xd77}lvjcZYHm zdIC<8+D-Pr_MqbSY5G=p^6Pqv!KVS>N-ksj$M*rWkDZ&iCOQP7e!uo**+;>=gk99+ z+%POuUvoB;>IL2vO94`)#xV7EtxsLnjvS}Px z*}Mu)aik+El56m;&_JQ@@)`_9Xy4PC&xRJSA0##mrH~?)=pmxe0yl?PX7iad;kM)n z9%b!B=$=%$U=Z5~9;v&vJ06ChcP`!_b14UC(8HU956n?BH-pi9?Mt`?7N-C58bJNh zP>+pPG0cBhrO*|QMDMtT?gvjPLCa`bY52}%AhWytU|+8ThFC+4EKj5%6G9ulb+tSg z_Pd%)*s=_9gY0R+C9~kK;^aLjF$OMCGP$j1KLU9&k&5WDCvaZPdGH`Q7jmEGiyJ&g z;Jrj3$m8sePVc>%8K?0<+BVw?w|4_jwXFA+q)HlcR^PrKVDE#NZClk1XPl$&^vf&rs->0D?GIe!j-y+bV zD~ZjTTLs8Cwb`N__j{`AtVGON={&HiN{mkCDMNx)nP%t5YJgNaq0+Fs0Q`x}CvR%x zBG&Zn<)7Cx(ddnC3%bEV6!BAJdg(_Xx{*}jICi}Wd@Xw`Xl_;@f6=40(JRv6aHgHe zvY-tWay({#e<==;jTVcSkr#q!v7m!|cPAVSRGcMEng^q%U&pI==0I=S+Ur_mIl}Y< z)_<}3ouGT8LO`+(i9Y8xZa-aue=W2rT*OPzvcEld<#j&{E*_!VY43#}el3^xj`jm( z(GjzG!7jKhu~qVFvJbx0ok^^9?Sas8wO9uBVL(sh4hsbi!_94;XCvc-pb#|?y~^7S zlB+1DC3_IAO_u$N=EZ&7oOVkdT!y~ko*=)mWhfBo{C9VJ84kDuOVJJs)H zXdk#Ru(+}eBdf;_ael~v8Y*?Ur?y3~+OSM6LC_2p+@#7blo>EjKl4_(I}zTjJsFGk zu7gTzDc=b@JxJd7tR6Fb3*Mha{@D>5q6W6uJto6MV0`)L<3}zvu;DSTrg@qV0nzkc zQei%@@^r+P8eK!$dBFq}we+ZCf&2oCMHXnB)@beY7(nAreh-p9WC6GPcuoF~1>i{- zu6`jm4cDi&%I}R1!>VrEm9V*Cc+0{y6wU1k7e>9=^uA^TDf1Vh8I6zBH`Hd9|4L#1;D~eOulMk z0;EgHCC3s>(4`dL87WqGB;fVYwDVXdDy`@1jPor;8_@M|xm`pTC~bZpYL=*)B~ z8X$gHY$VtSpKrxpbNPY$fAKfvm0f%b+SSvujwkE>_q^oJp~EL%HlrVvc_cbtv#{fImmBc>T+W*AH0##p?InF;0p-I3zkw zCEIRscLn5gKGSio_5+cy*;*pO0E9#YW;#?2LY4Hf* z23&3PKg2cHpqWCo^`F)nB-liiJ>psezJON`pZKqVzI$ZjYnwF)d~ZS+x|Oyt3V7UHx_rE$5#F_ld9vsk0-Xdi$D&g+(EH!M zoRpS?(i}a>s6M5@CWkanaIyYM$XgF zG@<>O=!-X~Z){j79ee?0G0n+tx&|28I{A+@(gS^$R$0L*G+sz=m_A}aH3R+zO~+-frJ^pmsF;-vU0~v~Pi13>MV{x>XDW;{K_>I# zk11tqC<-b`5R$Ti@5ZF2+fETklh(G0QL7M*mYM!yOw56f2d__rdgOudFeCLybqcz^ zN8Eavy9QksZ6bS_l?QUg>^7aExu`mD^PX001~Q;hUYL0rgcdm165bOxfp%M4Qn!2) za7z1|wx3HwyL8*Mq-}#pr0DbqMbS1i^@~z)=uR`*FX~YwJ6Q?G{uz9a3g3`ezdK$Cu~MuMR@-!mccYEJ3=f0o~V}RiGnZrO(S>h4~F$ zQ@yvV&>=%)fO>KIVldrAzJ3*c{i;!YW10;{S2lMhsY{{#<%HaR$`FjFxZS+_!w&+k zI^A~VOM&PMQT#p94e*$ckZMBA5GeCfJ~bvaz|mW48+7Ky$YFLc%-b{xEJZjiS4Gv) z;?oMIxK~A>vC3Vhoa_%80e-*J734vMFJQk`qX3bXvHE*>r68}b*F?yAJz*odeeRc8 zF39i(acpQWL&!!@(l@PHnD}&jjTFrQ|K;yNsACj3EE;)*Z+Zihtn!->u{=02_wTtp zR|Cwm1x8kSx+9I#M?#-uCL^Uktslf%{E%kpElz@xCirQ-cPf(B5i#~L-L;NRhnw@t z(H|L0fi&S_Lxchq^F!>QP zYs*xl^*_x0AO993N4_(cQ)T>7ur3pC>f->kp>cy*b?yrsc0ILa=U0Y4T8$M%S-8Nb zI5nzEB!h5R_ohwo_Y|Zi&$4u$s~p0%-qLW2cf!fVe6gJmQ((eS_sf5F4jO4_ig(`E zAk2Tj<~L0LHskffBYgjjz}N4$eS?M<#9MI0R)TAua|2vi^gJI9tOK8b?Zu;|D{%5h z)5f6MI%o`8P%QR-fxewS&rcKO5ZRUB!DrqG6#19-C-s})J^KS^i*r>#{%85Ae_j&+ zXZJ@G+yZY}OmlPXhoI@j>y})aA&~h(Z`j~J2pO}dO?+nuL9<@$i)H!%Ox&XSVc{?c zvGAeO>}LqXWSKg&|4s*yHOSU%~V0{&>cDKz8eJ}B)9(WQ2-amhtN2NbO;^p;o zWPhHa#mHoaSp6aNthZ5mYo!zt*mob9R_LKcMv47*H;m!Nr!$Yexd!0+$#JIhI_+@o z_Q2%Dx@JgVe9!Yhw;2LRqJs<$T0P%uhYY)g=GAy} z!DEf*;9w^mrLBtn@?m_1&IzVpU=zrJC`UPxmWB|t5vyZse4+#`$s0~x6v+ZVR#hju zpjc%5w^YnI;vFIu-P%;ndX0Wa_?!?Id4*;u^_&E`Q;?wHmFy*vC{$JSMC6WiCA2n1 z@Eq~?1N$7R+j)Y~Kq=^{ar93G5<6=hd+LTSGU0eN@{_?5>BX@scorn1xJ_c~)Em!$ zFz1_q%hxnWppf#t6`YH1pT6;zRJ{bfRCF9g2AIQbSHNV*n^3w3~f_u7p8-2u3kX)NzyKLGbUvWb#E_d`e8 zcJWh*0T^^_vHiE#2?9q#Z~eYG0G*Oqd5jkVp>4dYKae0Do~-1!UJ-OhEj`Or8x@6M z{cDQ5(!~@$tWQZ_JClf7Ke3!rF;a#l>GaLk7(-B5?z4Th>k6Ki4$CK<>x1&8m>ZKf zKERDdlk(!2@u)vt?)Ff~D=ZFOSs$r$Jev9al_D25R= zM{#$%VniLPA|aw~heGc=ONIOk1cQ=WVh4F%=-KwYqhASLAukPNz}uVy*}XQ#(p+K4 z{Cn3Qdg4;#DmN3bNSh7mbk1B_+=0mWtrt2kmx#=rZ-$ukw4&|{Jp`f~uTVktMDu({ zDq>M^{e7b%3RR@X7nOH(!A`B7N)l5Rco-E;+w{DHkaskNX)9{z_t_#2O=eBh!BKYE zj==)OZa%Ocn1_Vd#O2Sslp(eE!Q%Uo3CQ;G zz21TDbVQ=1wRD^6Ib_qm9dp-6z|D&!51qZOA+_2@DOSG?0wgK-?DcZtg{LFC!`)^$ z&KAq=#+iz|ujuai9jgV$h>{0Hk6xj*k)nqcLU~~8tNP@;R~zt?Z%DRp4}d1^^{2yT z%?R^N^aPG{V=nw=bLUR6s$K)D`piQc%B>((Zq#73@iP z$x0p!!fYo|%LS)FxRk$A-}QX}cBdepbZ`zj%Q(Nz2n^zWFAqDO6CH%hRJ?Q5Vj)1y zvcD3Wmkvy7P9uZc{;0>n`{9CcA#k}6%1O1_K*qx0j9IcLswF1hzvQnBTgR9!spZOm zyRK7|Xut{f=sY8Ze)Qno12P z+WaP%`FonA`#}`=ZQOVicE1Qd>8b}%R}>T+wH03s^RNtbJN6(W3os4jJRc9_Y^xF?!bZ+ZL z8ArjxPcsuHGj9<+X`JvaTXkgNlR_}lV2To7kZ9KOry&iG{2fR2NGQ4IB>5vI4a!^e z!{THj>%$&H9=(26_R;=&; zSg{JlnH`cY*H^(~BV{4*GtPh4;j2`vsDeblP&Oy&TKE*?&|+Fp0vgnc8QbL_VfW$M z9(!^HobfpEcU_?$PMu*HnHlPVAc7F14F7lV;u;&FrehDRZkV48eAEMX4PCFL@$^7l z8ey}vkS`cWv#_0gp9xEnUKRJsU!W@?R3Wv8%TThGyUmJV9{gsKI1`tEn=_nBU(?bm zf^3p)vC^y@{8(|MZLM_#5eEHd4#6KFqJ75H#orh$cXQQD=y|}@*4WuMKhohcdls2! zf)ToAuVZ%Sgf1LceEvZQ*G~tY7EY*f=%H&D63J&j37{7qvKr)xP4IUtX_c+B00t5= zGM<%|z{F8C%N~n5un>CStNJGrVij_!9uDWjlh|{MQHJ^G^o^X%d(s zJrTOKVm>%OqjzoebU2y`v-a2{EdkH2ZO}lOsE^!UtbjfMt_zZtJXg$s<_l4Sq^^Ew z?$zGEE81~rT(vRnD^U*IbGpNIdO94b%5bUM9b|*<-+0;f{0PK1zI?eHr{AA(RhPay z6bNy3K4Jbi|MrvMjN7(wI%0ajwQ}@|D%#Whc>yHNQ1#v)0=*Y0sIZZ0T;4VmW=aX% za_i$EPn=A8QZ^estG&?>Azgx48^+(B>GDM>qv-8lhBPE$@!{nqp&FE~(-$Ham4^(k z)Tv(~vx0lo3`I3FIp}m(yGWT$IsD$2JYN)3k1|QGw!P2oLu=&G3HL%{kk}Y+x*usQ zN_IDWvB8uM4~rgsUzcfyANB@^ACL9I-T$8dFMq@G3(Wr&!TV=(c>h5YKfhu6R~)Y& zu>OnH@4Cui#Wco$1DIB%37g(JsD+InI(ilPeX8?a!bQw9) zz$0(wFYWaxcwZ_^JT5nlB=?wS{J*(C(v$YC0=YhTx=mziao!Al`t&QH5PX1vlKYTz zLm_PTn#ljnokD*uFh1&Gw?oU;j3v}Z%b|V!U$)H!f!skXIP*hq%9` zbFb*yK!9etng1y+Z+UgH#@lufJOnn4Pp-tkPS#qI^VeeF+?p9C`QU^8`9!)^`Pv~p z^RI>uOaUOZ#Ww2K?*}xxw~p!0zCtq9jl$1Ea=^rqU;1fz5mIfXJ{vAqf*O<-%@2-d zLt$pPc>_%wd{AWL?z~uyL^B+AygCxm*(@?THBxDW%Q#lr8tFZ@>LnLVM^VFp zys@mX{uRk}T)>XsZ}{Gg{Gz@;Y#9wUF6-Rgb1N z{v{RpS3xB4Dnnvg0k9wQeYdjL4}Us{g2~Pez_e0AJ#Sn;Gz~FZF!J@lU&_U;`pJHX ztg7OFhttgu#|d{^4+7x#jakXzx-^LTe&A{A7=?4%r> zBe9bat?=!BoUbm&TPS0hqq+E}0p3u+%Iw={aQIGgt*5aFavSc;9H}fqRUwzs3$|_1 zM&03R&P{)~AR))#k?se|Y0RS%B@syH$P2dT(K&GN&-?;MTN66BW)&OcRe}Ooy>6Ri zW`V$&v%+m(1Cdj6qsn&vYgCdFRLoZY8m-CSa7|5og*p_r#i(ji(H{Bsz=wb+qJctjXXtzbN#j!xM7HWmG| z137+|($K(o^q8%JMv>nJ1~p1*ew8&qgrSh^X`x(ri+`TJSh9}!L@L7WJ2BDD5zR%R z+JXiVwUun(O3YIi8)}8g(0gL1QU}2AzvutsAF%uw%ipm4A`kEX_Tl}r0lfc!&2M~o z{TqPS4~qEyTaT~b4W?#Y{Q=>8i%PW^Zm2A$hSLnGS2JPuAyS+zi1@3Vo;J9Tg}c<9dx z;}tk=eo}1R2j?%7=twr2T}3yp{o#Bvn*u9w6!}4uoG5Ckc;>>dSeR0J)K0fz@}4 zwhHl3xKi}+99PRA{0kcR$ZW5YiS~sA`#QDb<@>n5$GbJ8KQBR zK#sR5X+-Km>Gs zNP_RXb?}n#?*lcZPPikvw{>-92*m$;jxc_Q9?w5u`7xHiVfh8-|3>5evm1E-!396R zVft4buOHO#{THiW31>eCwUSL}G~78}ld=vzMCof)-iuItg|eKd_zTE#FnNz8u0u1| zG1lOtgAh$##iD@AS56$Z8Hmap1hL-Y#h-_VpnCCuYl~$FL?&_+Iv9pPvZ`%KDBA@d zm@CH!HiSc=dK~!|uUYu~mTix!;WY?MjSzxI}fhzm%3j?S{P4el; zj1{Q;*l)}ntpt+a0*?ZQv;nDU-aizd0OI5P-B!jUAoD)p4$(j#_`B+OS^11W!j-7a zY_?GtCOtT#cPk1ujAQz(?aCmx=P&Q&sw^mB^{e66u|iL%N2dye;$W=N(Y9zj5uM-{ zS6tytL|UO&em43hpl-oa1LxE=EMtY2I z)Y1pdoc#F!WZxjkP_l8G^WLc9xlg5>V>HrKw1^j7p5HOU^AA{ld>fy?Vfh6m-v5ok`)5;l{~;Mazir|5FQy-`{)^S` z>UTG$Q@^%A`>y#R_CE`t#po8v_InM;s&;d@KYsyTww+v$mVOA+CJIZ{v(VnT) zemGu0O7!(#KO_$4={cVmfXO$jf1c6de7GE9Z;5(4xKpM&@1gPn3_2?s44C z5qm3loI;J;*=g)<(IEY%TFYJ%_jAM|xRDt07G0Mk+|0twDIGpaUDlqJpc5c3co3on z+!^(+#mehZT+?y&lL~_{c@PvW&ej9ppTw3Pb{>ZE52I?Oj5wcenRMsPP9!AkU#Plt zCLMnCUU1)TOM|;}qSK5mj);%O`Dj2<94<$@CLiAX8aa@LPTlrMKvA8MiD^&ckk|zo zif&y6M9q4~|7DT`EO46l68s2+ZTIM)SL_vNa&P5ZG_gMVm%iN=lk9{%i^v`l5(J=c z3#yA+_d`M9d-{pPA2m>}S!h~}O#sjiKe9;;OGHBq;t8k!Re-tnP|fWPAEa=%^Txh~ z8K^#aC&pJCfJDA~cruwJAgy~U)5OWPa5R8r?{=;%YUV3Sv_#&BhBw&s=leIP`IGkk zI$I!$7VZphN{U3S#muxlLY7EV`Q?u3a1By)W!{xrN`cQeLZ(!B5l|^`NW4wTNA&pT zd#JRm8G|a(;_IoO`ZIy3XyA=_QJFVb#Cg|Tz;OZ$J7st5n7hG9Gka1ms2zxVWs_cQ z=AtG%KRJx?XLx=G;~%j67|Y+V`~vfTzv2C}dA$G7h@aoQ@cI|i4_N=z!PoC&eU)Ce zJ3oQx5q0{iz#?on%GPJ;EWw8GHI|k5dOWTZY zwq0B1D0+vY|8z(%m^GlwCLYh&?zE!EB<0S}wo_qJWNz-p30d$E3tu+>{tE6MAF0rI z(F3dr>U`|i+JR|vO|RRa7oHEgUj2Bx4{WrBIS9P-(IxW%%YEWb2#f4yo%$UEQb*|M z9{-jB*Z1XzFS6#s>384K4iTiIxgdh9-nd#Q_90@aO)m$2{_jCVS18as(<}wyff*pn z;@@Sv?FAC${#+JnFF@{(!b7!dX6O(*-Q=nVxWA2?fazkmpI+!35d`%cyjkmxEy{C?q~E^~jWk}IYQ7ociGH7ApDI-GLAL_f z&00(R(MgqVi<`cFNPyT#`)ijs`svleX!pqr6~e>L&`*$=$`FpHnxF#U_^2dw{M^?PPHApe-oFG!l_ye9wV3*=AG zMk+S;fLWtV;q1H(~x5(mCr$c z;@)O_RwyLDd*AoRdltmIYB)b{y+fws2a)CP9D!NXT=-W=9VDuB-h0?_1t_^%wL|V^ z0{g2wmM&v;K=6LeN_R0G$~T(pC%Ch38R$y8VgKi*3vdcL!crf-lEao4f;GOlWli~;r=9B z#ATOZ;5cUey=Z9|e8sPRl6EQuF8#-69@1yRhXuzwh{Xa?eH&tpU5^2SDZPKgdkM(? z{%+I$OdV>z+j;1gS3GKmCYo5Q5;Xc|Jji%b9c>3*Dc?Km4>;8JTfymQ(6ZfBm^h2` z#TQ&S$emr#q0xVZ6c7B-strpBnN=7(q&Nt0Ff)R?{QK_?5l+DgKW<);H?I-L-ZRS5 z3||m+*zTGhD?@Xd)HO7k&Zv}afjmMj5LpqOj0&ksKquPSWj~yEgfGlDZ>|I-pitr) zOMS)ONIc6h-Q##PIv8SpL4VT+xy*Kiw9G`IhixsgrJFU#-aUp_*7hB04~_{rS{966 z#@jRn20E2^}qQERv~wZF>M|)f8?qs+{%s9aodxDA2_>5 zpj3KIU{1OWWO#Uf$FjBq_g$l>B@@O7E6<`O(xEj+cwNC-XR`xew0NNljC~ z=>rN{`Ei#E{qQQluaHopA8tEd(>*29kK^-?|MR$E4XoBVJm(36;iW$jL;A;A;Jm3n zykwM&Xcl$OD_pXN_YRqU`qQ|2Rg}%tPvn9JYs*hHPGx{r@Ml%7-1CTjK3;h2L@I1v zdX&tOUjx~OI}uY}Q7~cH_r{e|5?%i1Q*`g29$Go!U;puS6zZ1r-}F-Lgn`jFbFvOK z2=_CBtU_XehbfMcdvOrNQz=ReiF9ENdWZ`HFnUE%&FE}puy z6m+632Z30>Hz3(JoR8fXb5tS_uOJLDg4a^s^gl4y2_Xuqq;91l++7k|mh;?iCb5mahwD9O>3is6@WfBjUNoh~y z(0VEI;dB5xB|w>TvoIb-wjHPE!tp~%g}gfo#+s;HV&ZOMnODnjkx2F8RhqHDF!a}(lc(mVKWasc>ds^#=sS&}pGHzQAo;JMMWqxb*Z}onU+wG!_^ox^sL0wF()v2mjjO!GF&Ywts=~>*w+O zFUC*0;Q2F*-@*6?EI-EbH!Q!v{9nvJ!~BOP{QQRLUraw>{nrp*zk+_Z1DO|oL)^~q z36;}JAf23$C30;Y*0PpXYemb~!X%thy+#lnrP66X)h`mcwId!vV3@MtDLgPVN)$0iTxm=zQi|V2kL@4(c{X zPeQncW=}OiwUtAW_<{wRK>-PtpG)9oy|5H%NDx#O9b>!JpbR(N4W-(43y?yz#t%!S zEXeU}j0o}YhWI{8$|lg;#{GL|J3wt=T49hd12rhh%`Y`7!uPi38vd+ycu7ec&oY$+=hb-mpJ*8& zsxQjf_`NiyE-Qt}^zPDU!Ii-G$jhTKwH`j7`Bk^c z9)n!we~ZxK^vPAd>&X&!c}OY#_BhXpEJUSbA$jC}0Ae++I`7xlfCksHBX~*j;Zb|# zgFc+kB4OcYn|r(tTp16vZk&1dzxzdHJBO1Es^P_QwA1J7-B2kS)frSW1u7TjjBSM% zVKI_%uP(kHVf#ba{sqRbWBf11Ph$KT#_wSK1C}3S`5TsB?Bo64UwHp43hzIJ%Z~%`gJNgDDc0vgX5>Kohlbvhd4$0!?M!ra4gY*9@KGMG|EUd!ufQgZ^K1o zaQr%@F!Q`G?)!SDtlMm?tq%U!WwRo=6fSs&l6E!rZHg(}6dA5y`l1 z@YBDZ=oxjXK*(Y$dheU~_K&nX>KWYpxWb2amb#*$Y+1lTen1| z(HDT2?&)wp$MGgxZkK*Nd{O?t=Z%ZfG_&MzoD=?j+`pN^)~YIy4T`gG{__EhS}yL+ z-TVY+Q>P3=vKIhW9T{ebmftbY3n`-oX-cOb!%pKj^?I*_l6_tNOBgKn4U6B^ld(1fUb z8IwF%q$fACUoQjqliiCuEp?#s`SY*=S2ldk`Pw^kEe&?*!Yf{WY=pw-1m(LG#^9#A z^F(o{1%7BoNxhb`K&5u$H4BGhfjQ)Lmr#`jS`J_6^ItCkWZ_qn)Dr~v9y;n;lvTj9 z2)5*%1Kb{052sk|ekSPN;p4bl=MBBf8gc_``S9)Sh`z1n3haK$oci@<364*|CfAoS zSd3w->e!xw=yHYL^-)(Klyv&DaWxlGt>TBN*_zYP9EgO{k6mFr z;)`a;h^1&nCQ%{*jIpWDVARRv$y@%*E%l=VOp3 zsr&~=?IdVld40R}tO0V2?PKgxb%NZhFMJkozF8sf^(X$1`;l%EbDwWzDaOg7cT$j!c;&_62T>f5maI^+8s3slx&pOObyV{#pVKr)wn_+k)U>@W|(k zFctVw>l_}%C5+qyQp-0zvLLo5yEM?H1*T}t(&k)h1xrpC5aU^+%guCg=-Q*MgF zmic2aLZ-&|!Lc9a*t9vi1>M2V+-M@_O)lj7U84Vfwh2;L3b;=5WuiL*6TiE=1JLOW z`4h>sFVT&{lHXP1I5?1F6sCr35n-C$@0QgHIPNR?yjrFlhRLhBj&epL`*`+865oB0 zQG)c~w^mOSV3IKN0N0NjKMvg+5_U&?Khy6T@bsehZOI3V=ik7!yr5dEab;k9BY6h6 zW00%p%hq>=?dZAB4BM~yacKBrcXYSb&}ZPf z!2PxYYBg_NTM14@|8~2_Jv7ZxkBUrNO=%i18az`N6zoRH+vKgs#j{WlhsSAYreP=( zvPxz=5{P0yx*yuP)CA%(1HRuH%8|dBH~&T97{oo{dj0gv8gx;r#&qFHG<*xEXs_t3 z0;!*IUX(=LFyg9mjZx_nbWnwdER`()w_W{@lS{P-+dt;P?{8xJZzcHsA#DEwW9T{ebmf9(?`U{uy|+mp~}0 zF+=6OX0{G#BW2YletD299r*Vuc{y->pt{r1Tn`~pQpE?#F%TQ;S{$6427$*rsLm;O zz>0I@jk}e`KxA>LK8>UmI*d-^7+pK`r>pN*L4I<4kCX1T^Yy;$7~OBM38+@C@r&J78pCn*X?9`!*vq2FM|vPK(_qJ@Jr8U5V>V( z<-R)yB$tFJ7q~wHS%YXo#=kLWs{iN4;8KDltaYJRr5}!Ra*^I5>Vk%)46#HjPef!& z5R)2{4CTej4$j`bi2Ia&@BO!F=veNb=7K!T zb0t@(ANNHo7bZtH?s%c0%PIQtU8yLbiTL!@ayN7+J$&*=81CoJuDH zZA(oR0+4w8O<@(9Kt$1K+MYz!1hWKevc?(ZNNrz-`|E}GAVVQNejC@1vHOCe^sPx@ z)xh*oNonm_H$=+E+>EjP1W`)*ZaucMu)&%#s~0wiu>E;f{QfbvzlrU?Vf#al`27ov zU+2g3zZgG>@n;lxe&-dQf57r%VSN5JfzK~6|MxoHKg0Y7Y<|P^uMS>6VEy+gzJ5b! z#UtOLZ%|Nj_4zxCJm@{EP8+yi29Jqn9<)gw5Wnd4mx4PyY4A(1dqQu@7fnQmn4OfgeMCXf!H0MXuW3Wi6S*R z3Xb-KqD;?9%2H!rbm{m4MO0ZDI_IZKUK?qHrrW#2?>tL@^rE59r>M)}#iOXzFp56nL<%%KwdETq-&^+A!>A>CUYgxGc z|L=>(9|a)UH?N(#_T!PqM>aic++3jip6-R<^K$f(gS-APTP)BPHC-9K_3nT7i^wa_ zYnoTX(JQm1H4nRCI-c5$_u&Na5%@uw!yMqx5%&HEY=0ixKOV*JZ({pz*#1x)e*XgF z*D?MV<0p^f`7=2@zw;5#KZxV=<9>YpCW+54hVlNd6y86>{0D4)!}KqvAF%$5)h~U~ zpgv2d6)2_1Us0oUggXlp+^M_r;J0?{_p+7=*ooTk|6nzS75N?8OTBI&^!eld@4Z7X zRKWJyNy{51<4egGDQ}^T32MQYzb?a(Hn*s+={J#w%IvO)qCE_3Z1d%opT^ZQ$4+UR zAe!4f9XdcKf*e0n-i*GX1nX=O48YBaxW<#^X|=3DqwB}PAkHUI9^pD8@G1?S%qT7@ zOD2GYQH0x#|K}XJ59S;AjgfBlK!L?`MKqD!va~Csk8CX|?B#A>NB;WT$My{Fz`|_) zy+{0VD5K7$`r{`hxc2miqRo2?-1`q+H_&Qo!tHG`I$;hwMEBDn;k_9PI98ue-cQ#+ zy$6xv=^oEe6Q6O=Un^lCdVA;<&+&(#Db+yun%^866`gAf`fX6qM7hjKdu3GkQ;6jD zgb5l&fAVFoSwkd&Po~a8b@Y1ZszY_wITZcs_}sLu0*G9HXm}8M17bDIr`pw=;DH={ z)lCTnU_8l6Ps8yDmJ$OD{1z^wj&HOY!ZCVK_@|s`s5qp)>*KUK=~=?yP*gEs8p^IS{V!9Q=>RrD$j_udTbzPHi7=tf8igf4BXrfsDG z(cMFwDh4UAJX3{QfbvzlrU?Vf#a^`2C9`cz)dw&;P38 z`ALjFJB#OcF#bUjpCA9j=Wn|B{DKYd|Hk6|Gt7Tj!q0D*{>{Vd2dw{M_1iEhc34&` z9zGhKvRJpYhVk7K0GqOb3> zO237QdIMta|8#XkDP>$v-!LL^;Ux%5g;*zCNaQ^7v$k4CYOox9l7crV1pjMBA^>EcZX1J zeJQITL-a^_U7X256jAxcTOOj+hHmpGOU*TsU}bET%wgt@+Y3w6;M`RODHb+R-g*Fh zZDG3xliG;s?bXV!9Gb9g`OW!qfeZX*Z)lx=riA9-DrC(l*&}n_Lq`({E(7x$(}Y=} z5L7+t!5{p{3ohZGhg+dk)42Ts?0%bEEr*M7E+pw4HL|>32(4j9?v$$LfGZ_o_u=Uh zz@H=R{X5wEOR)DpVEgmW@%zWv{w61W|Lq}ue~1CUe=&*Y*Uj+!Z!n&p#P~Ce-!a7V z4_JPT+W`Nc=P|0{v_&oKYtB7T0u^sgdbKVbcL3}3%uU0%nFJ4T@L+p{0c{0dMK zyk$6e5x2kef+|GdY%%nRUE}g!K96qqCD;<9NZ72-z0T(+f%;$2vd=8*f|h}J@{l|a zxMLL7{xDQv^u@*T zV=&IAABg3cZiapXVk@STHE?V0`TM%-?cn~bKSy@A9i%)Gi5(-{K-M|zYj1lB{8&u$ zUR8eyUVmD%?2=Vcn>XQQ|DP}6@O$>J!U++mB~-rA<&Yn$ydPV)m-!OuKMfP8CgeZ@ zC7a{ycNI`WRcQK>fhA0<()!JA6++MC|M7I*Q9ZYB9JeE@GRnw^NP`NMQkRC3l$JE5 zQcBU@(V`{ow4{xqB`KjxLs5zbA`u#9WM%t(&-vZg^Y^FkdCqgr=iKhkb-mxOSJ#B1 zH~LsyQ&e>Nx(TFDogcZ5(YKp=iAj>_k(=BT7qkyF7$t&cB+vl5ryTp z@w!SDLU**)Z?{bW*w!j}sr-OD#<%`%EBxn*y98fZJ?%b%zR}iW;@Z17bNDxx){o?K za`xISqg+-nX;WTcsdpIWGAiqmQ9>Gg>~$+Mb1Q>bo>gNS zQ?Eim^fsMDr*fG3|K~{jDB|xBzl8V?HLO4XkoAwLze)W!>JLe>{sraN|I7cf{3PYi zyjXr`7Rx`-{+RY}w7>Yp=D$X4en#_$NcR0k_g}g{(D{3wy}#m#vzm@{eF3Qpm!p0q z?uDRY@8MUkJmL3{?YGH!(;%zA<8pt#8+wlabT(y5Jq|ib&mT5F0XSvl+3T%}SQ$7c z=JH}!u(I$TtDm_6r6j7(=(}Hqaq?k#f-TC}YqX|ozPv9KI7cOHR_C0zM?wuo)j0FK zx105PCk?}u4>yM%x|PGz&>$V(wISf?yQaN1>J+qJ(HnceUI+h7RpZWvt7!br&CIIn zFrJ+8r*z83Em)#zuws_(Ug$jCIU4m$4JS+%?Cw;o#?Ze3KfSeM;X)Yy+8rM&Va_rC zM^U38D8AlUb4A^LIC4F!B+TQ(PKq}t`EGCbqaktYKk|lxsM#oR7R)S^FF@& zwjB&3-xk+4>tXQc8-mToN8mVj$L2R@%wa~$jkTMCO>keoMbD;GHTB29v&F)dWV&nU?SMV;hgJr3kyCBM(T*=7f@Y_7;G;V-^$cVE}Roj>ki zdw`TogFV^1_W&Jnm53OhYi*A-*Kg9CCGAuuNjpffgSbpaa%RkIv`(xU_ z-DLX)zX1bhUVqsutqFFvI(zXBjVy7Z}-DI{x zgIDLWg)esC>TpM)a33SMGkfXE#A|l&N>*ZXL81Xv-5lLk;p+i@Cl)596bPUQf9--I zZ$o?(#b@DTvJdl9#GSHauYgp;ExkNRZIF+VIIE-1$*p>~{!1-z#K$7TQ}+)VVA8Sq zyTfkU;=RYkB4w%vVajizdkdO_@NtUyf$kaNsG+(y*ld;_jMEhHE%4j{Z`*BL#Dm0{w+!RwhC(88#DUlQ((A2%(6ew2zg%}KmX-HFizm!=i2sI5#!TO{Ekuz zhf49yN>`2VgJ`Vpir|IT@Nl!ow~4JXz*oj&Q}8zwh6G&wD(-B99V$Odw+=*F+^|VqgWRZ5`(wAZd=*md+{&7xac@spWxb(#PQs zZ=U(_dpmKn*i~UISyOoWTWjqb&rJB-;t<%XV}}F(S|oYyT4LY#cWXwgU7;vusTT0K zgVgNXxi5_UI9x-=OZ}e}K|(K7RVnci7P!4gcp>f&@^dO=mKu9@!Q=oF&T!XN(I)o>tgbP z6PsK0no%}WRnK7dF-Y}#S1|jqEoA(OJa%%47Pyag?3gtI*jb)8tQX@CB{rkO!3Xp~ z^02mq{RCfJ#}og|@2V4OXN&S>S=*q-b}j35t3qIPaHr?+w4+$`#IW9--wnTsuTE*V zjzZ{=1aT&*a$rA(DN+wX^##-5(aR^OxS=(#XjtBQ^RV;8Jb$YcnPIxU6aS znq422!qdD)3P2dN>f|D4^1oB!63&s{Gvin75dqJ;Cu=((s(7-~e1UmTFyiC4vEUl& z3$In)DJlf|Rb$PT?LyG7+-drcJ$3kGW5lz?)2(5Ruu|n$hfbJtOo~+I*8CB248>%VCnFo)R)T-Sa-QoSlVMfF&$em&As`oFR(kHn2Ar^a%Tujre$XHK z%=6G(4SkM@w>;IQ@!6c)UAB>vsYdo zvc)eYuISo=L!l)SCild!+;4^aS4ksGaNjuLhnon*cbb|A{uBd+mIgTk4{3;z>tE&P zxCQt3H(WG5CJ8oXS7sc#whXGot8Z#o%i*@}#lCm0YjWn!Yb{*-X5tcYGoMdEs?hMQ z(OHH|702&Me`fM&CH}OGAF~btu-z+@J9ii7z5k#rTDW5)9(+DyhREn8+^!CnZEr8d z?^V6tOB8S8uI(>I3i*vW^XdQpp4XE%@2I66wmcDN@OmH%D&gA6I+4!kdFR~Tsyz;< zTjO6j{<$`^MRx=m59!czAbu+GC#@O3?=<6|@i2ZAchUd(JH3ow(#7}>)Ss7U{bTBH zE@S;S>JK%r{sraNAF%u{H_K1zu>9FmmfxZLLjc=Iech%Cke|8xqG^jDoc?6?_j1T3NN(EVn9gkuQKAcqayhxjB|ghP{8PCB zqhE6bE^exZ8Uc4T=hGL#zv#%adED2qrG?KaJ6HmLj`{IKuJnSUE8e=wd(vRi%lR{X zY@P7@l#tZxQ*2T3m5a)HT^m$8+kM3T;W<=3_B`v^?jX#mHr4%Gf0EM@iOqes<2BcoOH{P8oj%TKxIjuUJ53&zRKNl(b z;h~va8KYaYuz%qCw7YlQv4=M#Q1ntZ_&fL&UGG*!i%W|$E;Z_db7iW?%vd`dvN`B1 zT^x)%ZWx9gyA}av4LbJg76-$*BMV~J%)Jg10(b}X;#T0V907-B-AG88r>nYSmOn}_ z@$dWS$vN-kwZ4}g&_kE5GVQag65ts>kI?Sb3Gh(uvq|~wSX5!Z*Zf-P#N(Rr-`_vW zlI?1(tArcr-4BDF9y6d-MG1Z#ny{wLW-U;!4-55WW_>;u%<750Y zUdE3i{tod=i2v}3_2*St|2UTQH~CrrEtd6%dRqS9zxcrN>y-bEVEIYPpHY723(G&q zvi&jb-)Mh9^Iw{u(fq-KeZLK{`!C%eM%npG@9*b_Svy^Ye?YD87w>n6t)S(^zUI)n zA4+f1tv9IVs9{Eu@ZZf>n$dY_+K(Z3OTBCT^Mg9pDr&IFNB>6?%;OGFX*! z7PStT#i|Y}LTB>S0@=T^@M0&gf0y2JD0F|UEHM>UzL>}IV>0I(< zC5JRIq0f6NkGmqcSInGmpCbqVPV4DQWT--YkM0SN2lwH)n)ll+ep2As^W=TS%E`c= zcWF4`nZzDdX@1fvl+k7lkv}pA4U9~?~GqU{0HjKQ~#Lyn_pS~O@{S{>{3FOCqJ_MSt-l!EMxfx+8^t&{o5n9zqr8WzfaiwjOGt3+4mdWf9d|v#m--Pf0JK( z^>6rc2NwRWcq|dFiB&4bsr+%87_=@u#@x&l#kt|^1UW+tJR$Vq+;Jn^|HG-ctt%1S zZ^cLT4H=`9_2D(#*iZUtHL;Wcs*a4RG8$fA{!e9}s%EHg4Tngd1Mx z3XW+c;n4DfClqSype^ycZjY)Te!0C4JGStH--eb6Z}N|z$n_YhB?^1sgnFq!vPuXH zJYLgaHGCT^w_Cm}E~|i9|4NRgJhcO#61|6f5(&6A_w*en4rggdIyRZMug5WdHSvFmpGy46LyX@?{4?T5H8K899OIV||6vL1 z&%a~+V`0|coX+}h*I9pv`WKX6r~I!m%TH4NjPg5_f1v#_?cZpBv6sz%X?`}5%^&Fd zZ3nym(*1$XUwVHdbxIcM4n6>Nscuu{bt-7|OgrjZh5&q?;CsuWM}Wg+x;$sT#sWAO zb8&fB)opO+`ux;Tcqw?)Y&)H2y$SX9$?X4$zwo|ZTSI-tS(Ih8!c6*#zzvw1wd z4?jOD4|txW4A*x#C|it|gSc^JMSY&;Fi+;!s?J^6aJOgDGkFz59Q-FCb6@csIv#(W z`*el@JaK(CrVBc#mcp&5w?h>?x|?bY#_xgO6X)v4Ph5jNa?%Bkf2Cp9{m<6Lm$t$N z!T4oS6K~^@ri~Yp-UwsiE$i4?3o%UIf7i_Nsv2;`hfh$}5Che5RgSwBD**S_oxX;h z8aV5u{F~qH8^AxZ=Kbos3&28MXPa!846dK2A?(nlfD#oiPu(<8$1|^Y%_uw;j>+>X z^Lr|!QL|F+X6_bYn6T4iV960-I2wN1EKgA%J)Zt7n%A`$7sd4nKl!W*Zbw5Ve2iX% zPsgjemV*s!+rQ}agOgkS`~CjKos;5It?*uzNtH#x2~e#2ar@dHFYF0T_MPy@3pGDX zc)L4!Ki=6{A11Zf4Syaq70u;*r_N^nfOm|)ZqE4C?u`HYoAFbLKRL?yeVZ8nY&YXa zyH>mMWQZ&Lrwg!PBcvHry~mS3m*?=Z_xIuaFvDu+L~OKM7XvQ8m< zelg@OH8UR|@P_8c`aGDV99n-Q-5vcC-uj(dl7rSQBHg@Wz9>>y-yykZGv;pkc;J%1 z1?=9D=si4VFFq1{yJo$f9oUxqw77QG3y$5ZxNG{?24K5<_PeK{XwtiWt+oGZsQz~H z_w;rvblN-V+Pq>9l$@p-J+HwG7XA#ka>_U5^aiW&{25Q!zU|ZZKv6$bxXPcGd&wG~ zo@f>Q%;SZFAh++lO%Mpa+~8&;c@13sH*QaiRmI=NiSb$LRvgaylw!44d*HL`h!F?;C$_g*L8jn<#F?4FP#;iPtzWSl$16&k zn#y@#*My+qT+30V9SgF({4-;4`u3Q%q7wmN+}3D*v-&9f=A9c{wJ8kf@2#7rtrm1C z0(`bxQ{iASTzrvVb*#AR(WPy_V&FDL+a1v-K>$bC=~G zqS^kK_HVSmp!qM&&uIQY-*0sPrTYV&zwg=myXS(d&#iy2IouVU^p-zmC^PB#YB#a< zka_lg>dBJ3m>LuQt@DBg#92QZ)a<*73tl#_@$C$TgctLhisrb%RTW~hbrtN(4@IZ^{>bn>ij3+qAPtX0iH-CFR}Gqc4()DTwWhU#2P zvPI`ym4ojOX5qK{7nbVoItCvfoo|U0wZM*-ZuKJfZlG+{3vT^JSq!)vHzdNDJMLV% z>&57(HNL-^5W8XPEPP|;lJFwl7953~U*Z(x@?S;x6_roGN92v>$dG(U>VzDQ^ zt>apQ1K3|%oXLOFnx6mXZ#mEW7vv8ZX8d*HR}=r2_^HI7Bz_<9&ptAKlrQ7&5Wj@@ z4_jD&p8Chs-=zNACDtFJ{sraNDgVpO@{^Q5+sN`e2U-3hj_r@7+5XL(?JsEldz{VB zB-#9dzTfEnOZNvlf9d_ryI3QoAut9l6P1RS+`0kvtLxR}^5k&*^E;cQaundmk@y)$ zhZL~Ov^~QuK>|a+tu3kkYK%Mft!n4HF1vLKJ+VFR_5r@}# z@JiWYAK19W(ky~^G6wocY|>n(hhlNb$*mJNg2sl~{6&9FaqwQ_buQnlxUh1J$31Bw zaJ_FY&*1fj)nn^*M1wZtIKg1)<57*s>)B8no@xTUQ&-s(MjPUg;I@N_Yg>_T%?U^C zi7Ro3ebj`kae6SrWLZkvFBQfh9N z!RxxXH(Bow;xmH@dPkHsF=Js*RP{bT7|R%Ij(dm&V_oRjwy z2{3#Hvd203ng9ETdYHfECi7odGk*Z_*NImdp52D;R%=_$9=D zp#D7dkB3=*a}(>oQGbZ~7o#k{&c*V-D_MS$@@HBszeD*4+8+n7{aX*)U(oz_6PusW z{2_>aztR2oA-g}&`P<6g-z~QsV;tW81c{afwl?=>LiB@wxl3g>g4(|~2b4c=#LSIz zXDB3c_Rf(4a*Ks((bI5=5SOV1#upr&_3y(CI5=Z|fBZs2n6IgLDT2eP9@`akGj5wZ zc&B#V@w6JNy=J@m&h~~Yn6r9Ru!XA)9M$#IBUTt;@4?euJ4Ewwp32`1FXYdIc1Ccw zp_Chxs8^l1^FtOB7Rn6&l|Bs{yB)PZPUwMMA!R>L3GKjK=~zk2y_?Z5?8~N2`aGzz zV%Dn9pLOBXO>Uv4m&L$$)McXV6*Xwz*Og*9Umq(Mw14=0-w>iBGw1A>zYdaT{iq&4 zUK`VT{lq^??8b!tiDQ%7Poog$LfBQ`hmIzb!%z3@z@p zzf7KS5yGYPm-g6K09379mu8d(DV?Ltng^?UhZ;8 z+!e`Z5bc9%&rdbEtgC=^8x^y8$Aw{9)%7pAl1H(xwd|1ZDRUG#Yf_v$;DS|#_CF_R zm;C3?(qsN1^0zcG|HXOc4lc{*Cq*nQZ<$ht1FI*!a2@B;`3m>sO)ve5G%N*TXtfZ&{*S#r<{#)~gnD;k8A=H9x+p!6%_s{ecu+ilL()1rbBWmdxF+Zo}cS{dLt#;&8#gUabAFL>EIt-rQ{~ zv4F{XS~WAJcjE)SNxG2-f;gUrj^2v>+c*Ha~rpPIl_c@9Ih1xPj4@8HFw)6K&MBuCK24`2# z+lMkUkLYRaam0mBHZ9g13xV3^cMm0fxd(8M}#E&BW&Qr!OA^rpP=c#{8{Y~n>rLz7I^)JF$ ze*G=W|5ARE@@JIav1a)P+8-OR{TuBsX#Pv{Gnzlp_gf0P|I+<|&fhfl{<=udjX7=b z1YQm)S#T?^;e3v4_cb`|15OblA=Z&);A1AC`h8Og?5q;I>?>`ALOUgA2QIJyi4{Ah z1pl*z=H3%y&XITU(e1#!R0p4%$rRE)lTBqQ|qIZ-03oO1GMS z;}78Y1lMfxKXBOs%SW}i4mUYsW6*-JsCk?@bX$(V=Iu6cW>(1lsg2fHgpVIxwXZ-$ z-sSt7rrW@P|JqAuZwI1@&RR|7PsXV1ZN-yndjpM6|M(>QN)0dMFN)gZ9tmC><|Ms! z55&9nPfEnO7onu`blqPTQTX#Y+^Ah<1+zA#`Z)6>;Zhr^1bO**6mqdY+?i|y`(Hji z^IT9K*>fO&A^G3PpGE#5^0$mI|HVt@53pkV^)be;mS_Cmjf|fC3I6J(i>sf^X>>B@Ryf|85 zTf>poxFvl6MN>#P4wW6jd zk3#KqKd`!ackraW6@F|t|9l{E6Uu(H))@I|08=_goO6=uFf{#C@ZDfHaCX@$vUH_5 zM99aMbFxI>dgrFl=X2}8Z>iL?d~q9CT)cf!S%epe9X?|7Q@{+eptq%g~$QGSQ= z541m~{TuBsblLn@n$6E>{?N_7-{}5J_lKwK{H6D|q3B0t@WWrwdQ@ZhTHiCMmMvVP zQ}F)&)0 zIZlPczarG9wIgqWzWi`QDyE8>$R3Tkh7|%I*!6ujUU3l3bG<-jdD#a>1O%B7&bE|Tnl%K(gg_3x7N*NSf ztSIc=at_bf4f1V#o`SLGT9O>pldWZrFW#^4jZnP)0xd(ED&#Oxr7Cn6{z{o`3CG z+J3kWWJXF~3AJ+g)Vwhf1IwR)$Buy1Z}HDT{HKXf{8%TFKXwW8Z<4=o4fDT|KP!s) zhsfWO%={PR517>RfBt$3<5x#B{_kSOPt{=jN#ge{WBfBC#*b2F{2k(#^fUg$SJs~w zVEtq2Z&Lq_`a`~~e{q-P*D3!?`N=AlKcoB(HQTOS-A4Vw$E_NWQLxCP$%3l{U)O@p%eOK-~8Od;Zep-%ujNRzX~xQ)V5j7 z%YmV3qpdcE?#T0XJC{L$4K%I(7(Z(u3^SYbqwea3V2g@dUVv&86iYu)%wHA{*>aC} zS%}{QkF!6^evP*U>lsUHT)sEsXSZf6p{NA(bm`@u_U=9wb12*+Q&c&=yDb&ZcHc)Q z+y0zYqB;0y!!?_yYM$6P!sGW-FddU66y~o_&WFEBo_DNVk7C2L0*5^x#6V5X+T~n? zC2p84VJ(-|ic`1CnTj4M0_%XIFJs@P!0excBUDt2 z?hkbS()+8Do4us|>pQTi@SR)}b`NSNev-GVK92p-VpC5T9*3+cfg3Il#iPcFsyDqH zuAtz%MLgfaqoJ(HtGN2-34FhL@q4vJ_8_|NhjqlJP~de9J^3az6w>zznMBv;V`qAa zh~y(*P^@s9u<(={UZ4ANnp#5&g1yZ#i~A1H5MT0^OTi9K>e)V)R!M{V9SW;MS9jnE z$(PrqQUb6%R&0uYZzhf_Nau5OzW`!+rYCN=--P}V`-}=HSDZDLbZxS(CkR-HXl}WG z6QfIRaSOl5gXz~xe+Z=?0GEA>+5{GRf@+=i+)F1T(e0~QcVvVQJV*)9oHvj2-xHks zH~-Qxd{;DQrEYy1>J+?PrXr9Fy+wz%T(c|zx1j?Ks;P0H$$r0Fc3a;vju(>t{@vik zAIq9sq1fejd!1qjEPHf1`xZ`JL0dA@aSB^TanTNd9i}ACo_p{F~%2B>!6m z^JkHNi2NGVvHTh3cPRfb!1l*VZ2wly_7~i2{!8;SK{kJ&?>D;t(*1$X zUwVJP+bPUdHyniDe*RMy2WH`oql@~(6f^$?|J^9B##}`UhMJM$hoi9tvBB95_1wP#nJpO;rpE?6x+~o{aj6X(!s^drP5fWtrwTLvM2`9jt|rv4`N z-&$CIi24_lUyo<`-+L@SN%^xaEWbnfhly-|O#3(5U(o#bGMk?*XY+>}?EB4`-GAx+ zK<6*Lzs^!o&HLRr-0~xZd_vsEVc`0y7iS%^VUzXkcKH>F_-xy|@DxFJ6po5Kd@I`w z{H~1uD!Mfs6J~nd$bNhgqc7O>#2wg)U&nnouC*cyN{^LP)#$ucs+v}~;8(mOOuWu- zpjFSwQ#S5an;RL7N>}d`oxN}y8*X*;RLoK5+!2*vgRN2#5ct-g#+7#dKmWWi^LL9f|1tSv1(<)6{Dn7} z|Bd`vCz*eU{4HM0f1$?w0mNS?el_ubLm5ByFyl{7X8bFoP0 zmfe4!vik$F^OxS=H5Ud(T6>2fJ!5d*>%m-DwWHzqkBd9;cDh(YrR#A}jLE(?_*fO* zofs?a{d58^CdKJ*pBjY^TXeG16?Wjoge^ZZI`6<*GYe}!drJ^MxWI79G#8LfNf(^s z6$I}(BI{P22*bvuiX)QoK6uX~*?&>leq0xB`fkEaZ`kfw@@%h-FD}(H+V1>HZA}&44Vu(>Nctik z{b-nH@jD(S@Au%IRTu$(ye?c%tA2`dqwTLNoD(6Wc)+55<<hhq)<53O`kT~$Th97J)W5jE^6Qsb{+IHTls}{V&Oerap#3rJ-;S{T1{wvAu4|M*XV(;(g7aCVpCiTJjo{E7XZ&C3=^ zW|!h@lp2w9>3-IjwJv8dkfRPw^3i3P$IvR=bg}L2r}TE@P0=JzIZvV zR;0WQ{eBxS;d4%c)djVN#bP1Io`Zg#+N9AlfmlASCEc<935E=glM(ehg5;m?{`7x; zH~EjrA4~pC@)wf-jr>{UA5v%jmQv=wAb-F%#$Rt?{A%L=nlOGU@h6GjNBlG5N2xIW z4)IHT82^F#^VC27$oiYqe_PG^Llas5g7WK>|E2uoDwaQ^{EiIEKhXY|_HW5-e=(WO ze|6dXjOGvY{YLj+x<9OD=P$j#K3n60TQ=^HyPqmwo(6%5^-9hFnYE5*h<$ zPHCn(cGB?L-r}d+Y$23g^x956DF%4EbUhw-F2h%$W;HuPp4T>RK7Lg{YY|#z@3pgx ztwMMHqWFog*K$0ua~cXb+=H|2>hFzjs)5_|*d^*UBBTZ4Scc7CdsQ5(5+ST_*he zu2}bD#w4Ywo~S-NfBc&5ih$40z6?|g!tq@-T3jl=xOgn#>7&`9=sNzd!wqBK|NQgh z?6{>5>YU#I*pHYmuns@DrX&yXMD&E_qYJwZ%q~uq;Q7r$yQ|_F15>$SlYjJ+w9%!yJUzt7VjnVll3wEZKqs+$zRXZQ1VB8yH%`Jnb zFtm0>@QM>F!My1rYe{Hu8n>gdk&O}|uzCpWA-Q(_+sT-^nKo~}HA(*yaE zs?|-=IAVE~&4WW=zw)2Zw7>q)d?-gF&tNZbwJkPSQtgRGzAd7A>fOOzTg`B>?hf>C zyYJ;-;fiGPlD?GF8@aXNR`b7IP%E0sz4(v`H1I1OEbh?8jahti13w={?mbE} z>l>4B=E>uNH@8-UitdSnF`ZVJ(>niG2!}(ra^e#~hYC(!-fG?YA7@5q(K;APcv43Ux|Vm!L(t!m4p)Hz1Gs=N*~9oBYR9nLn0? z`8Ua5NdC74%%A1M{6pk#@n-%D@&^!qy_@l?pELe1@l%OEN&G(IpAkQb_&ehnzl8V? z)SowC{bNeptn(WwHImCpQ0G#O7x- ze|XQn-{}7PiQOOQ{H6Cd>&3`7(eh7_B!Bg%qC+%@G{k?o7Z(fjPX188!QoAwT%8ei z&}$pi^VkP|TfkhXx6e&r9vhYQuZGD_H~eRZu;Lp|KP1P!lu8&{Ek6jEw*O+{Kv7d z4UgWCo$d-FR+XydCdW`^SFLo5WjN#}R`YJXr2q?;fBSPFHV}n=FBm?yt{S^e_*{xI z^Z`qa-CyUv3&PNvFGJ5H>|pf-A4z4^Vlh0yl>5q&pO62jul`?r^LL1Kk`RB>sP5xsI=8q-+Cix4= z|3>~S@(+=}h5Q%f44WS2OK{{o zQ=RqSp0NIq9qV6Eex35al%KrL@@H-=zeD+lVzxh~{hJuuU(o!Q=4Tab{y^Vv=Is7U z_lFPc{H6EzMbX-gU;Z|N#os?iN4Ilww_+jJR=zlkC7HisF50-j4CUf2y_|pNZ!a)< zDHeSkZ_N1HJ^yzIyckr0-def$%kOcQ(dSj)qep(h z;QV=Qnx1eJtWvV}S~lp1mn{rxUWnM?@%Ah2y!j!pJ!N!=>lEic&RhRh+cXHKs&~(G zTbBx@Y5VI80mq&h){oGPFP1+r0ZiKHOx#7n>&Q{CT+azrP<{Z|l1L{#8(~e62Ol{R*^> zy}nc2cmm#8{_Wf=mkaDUa^2{i&3!Ee!UZ>8*_oLHOWhmH>}F@+RErVEw2?%(^+XZ*ACj2}h(ohOW6qQv+Q*{nZL z{bTBHQvdBJ>km=?g7WK>|E2sS< zzjS|~^OxRV<;qQV>zf~dqFuPw^j`bxz7Fe`2`VkPXo%w0Gu<{#z^JbKf(>RSxqbZx=uN zI#zqw;{9+P=im1G6MbB&<&c1c&z^q1baxaeq5 zzGklelsg`7{Qo%~<~{b=uAv+bi_ZDX?O6hoPlyOP`=-F$zTX?Ue_w!U7dEK3#d3Tv zFN~6VI2_3jDOT?G0%dsUW6SS3lgm-rP3heG$$shZa`8VeazlHo4E9vHTh3cXC+%f%eC=f1~{c&3|crM)QXX_Wh>G?!R-{ z{b4mbf0fw#o1ve*cDBwRh)9lkY%Cdqg~x9_j*>eH?ORX3iO|f33YFYC75@wrEmJtR z?MoQ`NWOhaU7i=jy}w4e98ZD*dCi1b%Q^nYf%?sjHiOu;&h&)j(Ogci_5Rg4cXM%{ z%-pxOyg^{l{e6!5UQWo^rFugqdp5QK!o*@UZjLpw&GIIDE0fYyH9dV6-=OSTHyUpMTh%=4;XjR(Q3> zBIOER_#koR+GSk`Sl)kV{l`XpaQW}_W9Kr#*@5qX?t~5;8e5oiO}-s9WD++U1_$C; z#xOvd_dBxFVJb z-o9f8gL-yP#5vsA0G+4d13YJ;>D}4Y$;-}wtjyFhPs4mp5BUH7dGdFgGym~E^2aj& zCKvM;er5i*2IkLt$NWQf%-=G~{1;u!9}v#?>+y_VJ&o~yiJw}}_>;u%3t;@S^^6~- z%lJFQFCqQ|_2;R7Y|8qZ)PJM?P%-OYP=1~Azu#GY(t_pByjXsxo8=#VvHfv3+rO=2 z`wN=?^0E0D%^w!B@3(Yz|E2pwAUl8Q{hi^SY_p{D1>{)0Ie#fY6>nbGKl46yAC_3& z)!Oa37>-Vp+3SCn!wFir?w)mN5)`$*z7y7LfZKoK2O5bE_uLnUSUhF5CINZu9VCugZ_L z*)2Y3aN=~*p|OKF-a#;LbGak(Jx-1I(vyxqb(`c2o*4oE3d8Vnv6J{>^P;a;OO+w3 zsIX^}~qkMEmbm9RBY5d>#jX8$6$N+35U>Ab6zKWn_LU z9JphiCA&^>fziqnc7Ra{t)#qD8HV|^1qayr2HA> zcO+Q;f%eC=f1~}yFq{82vH2OzAL#py?!PMR{y^t%E_;8qoD?qy!5awDpLP7H@JX<< z)R4Wt%K~*q?wDNsZh#jCBz3N23BzJ3Ym2oNfp98gtYjL`F4&`|5?-Kh3=RG~^S-p1 zV-t$rE%Dq96U&5W{@E)BKh(Pql;v9Cf^8=~Y}y>~r&4kHZ4GN|e0=T9wF|oNEp%p1 z_`1F5+Ic2+vE5R<5ZPf~FlQT7Hl5$HhT9))Ww{fkl9cWCB#ywZ~p3I%SADn*I&PX3Sg&yl3#Y{L^fnSC? zjnb9}VbDDvPe1o-$ex48o1k|Oza7CLt_!zbbcA4Fr^4E;Z@qDC_R**6%a5RwWoJsp zt^LrMvq|c7$q{t;Q?Be6e-rJmak8#Dr%-!*`n4X#Q`ny{_D^bWDz+L5iQE>iLf^XD z-CxdU;3MXrFJb=fN6de`llf!GznR1Qh2(#m%=}s8A0mIt5c6M@Fn<8?*NI$@iH2d_ zXYn}T*Txe|<|!HDl;6{aBqI*PxA9whJH(wp^VN`G634&2rQrJoqnI%KE}sRx?H3_% z)5nS-kG;@f;rUNvk1y73-I=}jGN&IWeB)W8i62g%gm=xjLfDvVTl@G<0`AFtU41hq8~<54 z7q}{g~GDYJyUO6dO2sW zz2(!W(U}Z9W`QBfHn@?eBc4Vp0)eQVT@=Q?e zt1phL{l9;HeE$Fa-BXzVnEbKi-+adWh2(!De-`b#?`AKP( zKcoE4Tb6(5X8Ys4Z2u<9_7^n&rTH1M`2&5w(fv1z-5>U`^LI6Se>L1buRXM67`C0v z(OxzA5PZ2YR1)>!KJ5GxIPC7_j2@3N-c9N|gu^S13%g!=VA6^v?L{R)Ffcqm@7w}c zblH~b*T2&eFJ#S<_A2**Nqix7)te)suWOO1)gl{6II-ohsdWfWd)W8-)8SCKUb>*- z_M`)_Z(Wg$LYObUC=IC-DoTbYUZD=n2i$=B&fiNyZk+z8^VG?GofhzQyk+Jv!GC|hVrx&0+O%@m-EN|vf@5*P2R`jc3;)ii~Hhm*_o-lhqoo-8$G#z91eHDC+Y52m*N}@ z>3Ec8y@umWWBz&acaLZOWAev_GXLgn<}W1w8!~@Z1oIF1Fn~S_2;R7O#RJT)_+rG{h>70zsO|y^?H{7z0UHJ ziY$Ld`JF2)|KP^<$FzT|VEYT2|I+-dnav;Q`;G3u+U))?$j)DSf2aKHDVSOG5hSA@ zxAQxi!gJBhVTW$m;HGPZ+|a%k)pr;|SUZ~yGpvj+RLPtQX4 znV@}_;s;fqqp;YiWU2Ll3BVQE@oR^&Fv95mh|I<9&^HuWq^HI4s-ImJQ090PjM(qh zJ&Xdz-A&~54~KXwSKHys4@0{jIOjla%!Y59-(*6OcT7XY^{p^LW_UQi^M{@FRkk0Sm~9pje}{~>_&=MS*{@h#Thr2ZTAhp2x+`Sp61|7~RX z$-69n7S8fJlz-@B`(xU_(f*>7&40JE`Pp|if3RcUZ>8-1OZSJb?EIBx@9)275|3v4 zeT48&rP;~lMi6&d=ELQ&z4-FHiT467S4@0aHSt!OD@Yp@JvlMk6vHPKZaOccXxl#_t@4}~oYWAV+7Y$|)#4}ou6I8GZPjP@rHyld`SLFo?~p74 znNLOA-#guh^BKkx!--b-@Df+9`MEf3)zK}sS;+Cdbu1X0Ss94;jx;16I;D-x?Dw%g z7aDAX4#4(d*S5+EQ?$~UcfvF=3x2yh_HES90nNY`wc?CZApF`S#f=x69Hjq6Il zX(sUE@Y5VD{<=VBmthVzPTHxgubYFZ^JYulpO?kqF!ZGsyve~8dHp^n_BkkPCv|pE zF$tIG2mF+~l!O`z`!~-RNk*GLXu#VNo_GP0ja{Q zGM-pVF#4D$FtNu1L+a5$!2Ye0CiBnFXZ~*TACo_p{F~%2B>x-vv&cV0{uc6IkUxO< z>%^}n{x9)Ui9b2W_km=?g7WK>|6R-S zlVL1>c8=wDDE~nFW7@wRV*3l4|4Oj=8OFJVuu!uE#akKdm) zdf)2f=d1D9&g&0GNj6aSR38CB_5a#Hg-S{gH6^&u_ODb#Ewiiq1Xh77ju4pNfn; zasbT@ziv+8^ohwof0g;WJDLAjoB3lUnSXOJ^A}EL{deO|Cjiw#GfR7AMwwKA2prvcZgp?{D+UMKhMwl#}TZ*`J46Ms6Ryg3(Buk{Y->+jdbK72 zCN2-kTecuzfiS8KczX5>l1fFdzik3N&6`nEqZ$FhebW^S$`@p zEZPD7_8dFYT6`Wp?k(Y}&`pOS6_xwyQMth9-g$Q7hr6)+_rDoAs!9L-y;E57^J<4Y zoVj?~@>HW79P^3Zo%+Zfix!qhe98#IdCo4a3UeDUNNi~u_p}tW9^Sr5&M65Snk`f0 zZPIaQez}Q6MhSt$T zv#w{W;*A0S|8@4JVLdil+_ouWXb_^%fI`X`i4{sjkvT(!QYvW@36;{IMA4{dAc{s! z3agpYKxI}*qT3Xyl*s#k4{Q0bpX2%ReA>s})v@mn*S_|;*82U<^Q^pZ1dOcxu}fXT z56$WC6<>ucEIb#EHG7k`#H$Cx;LX?W6{IDj=mb%{$I8jrcd2@JxPB;#&kfvRl6Vp6 z=YUEtQC=nnj*fZ+7gX{wIw<#8ep4(|{g4>@{&P6EPTpRhZhi$a5{*BpxkSOCYr;zs z<_E!Xv!pt^ieflj+P?d|`FRMU} z8_!(_GoVIdS-8}nbWrd8?{esZWSIWLVpytx<1X9Q`97~I9sGxhgiUxA3GAO|e>eM& z*&jQd{G04AWd9rcv)DgWN&Xi0U$8&mx#(a1dLi+vng7fDROV0062H%Y_-D+IV*U>E zOI8y9L6iFP+&||2CimaCKNLp&3(l`Uqx>)DCpmw%jq*Fzlz*5;_hY`l@%$sSJU&Gd)6>@bJcsW7{6vrxA{@DicDJl;e$B}VM9`6=@IxZafp}Y z98aijnDF$yyDL-(1)Lu(@c(u^7`V5l>^zLp?rgbTxf8qNCsY*l9>?4EM*VKS2!%C% z!((zq!(rCs8M#(X?{V_(eWspM^g-|JjLXrRuVDJ`X0NrGF`!6)A02*k{|UP|criVt zbjI5h$oG-3zAcmsH;nnzr<*&zHpGRF2?s}kfpUTe%lsyDo z)0*rht?sxx-e`FG@nY0C)LvHiGz|-;nOsq?%0>kPrMcUT%<%Vgo8Z-rCRjlJc}Mbh zYm@(2iTttb-#ktJLiWG0Ka2fCI^=I*|HTpV2QYvAKJlxW|I7ST=1(%ekNIcJk7E7~ z^Glfj!2NmdA9H_``)_*GAKFa)i!+p8A58gQ&QFF?{*3cGBPjo1LHFYkbbq@?_Y2lfJUvZir`2A~1&#$YPeaU#ER#;tf)8D&j1Ke(P&_90I3Of>e#_0+; z-aB@_8)g315+~|pn{JV`f!w}UY3=9i(d6U&-R1#mm@3(67b9%}O9Px57kvxB&W-`O zRsD9M(e($a55K13=a{Kd(G9xT&u4IQ+`dqlb|E(Ja(_QuNq;|ELsxgl>`+{^wDQ%A zN%kP_UE8S?brszQ<_~F8iNgu5l~qGj9YCwHP)lLY88md0D=%=%hGdT`FFOPt)JmBV z0m`RrA#!l->96-=P`bfet!uswB>AL-FI*w$4Hm4CKGkLqODEZej9=h^KPM&08%i4r zyxF1Zi|n_fr0&!fu~}PjuH5p)PLE^pilR_nfB6UaMpx7`NpJ_;EVRW=wZa+oR+TR3 zv9^MT`8h8?!&z)J9G*SW@Ca@m=;HiR+ZBg@+`k{7L5b?IHdd^P`x* z!~BvW;y*M}f1dltGSuJX{+kB%hq!+sO!;-B{4eJxIe*6a9aG9b@cp?JTKmQ z1xD#sKKQdQAN=m!nK>lE80S6gy4P(YcprTnw>#tTF0gEhD9B!P5a((+Yi4fV2Aeg8 z2(=p+L8$2A`(^#r(MD`vWu2A>jBxC-S>|>W!=`QTOprg&fEMzBkGmueV3B;9ii3nTTDwQ}slQ{1 zU4~uLLCXrA-b@v2PHpka^xnlIzT`vx*0W+T*&BPt?HkY-;DcMY6)&Bc?1kbVL}oSW z2=>&nO+x|^0~EjUO2#M{Lvrv0eG%9b*_cyu!#{D7gUvPe%^S_**{=cGfxUBfT+81N+ZN2x`jFv>M~6;U zzGAr@VuBT7e1^y1;cs~&MHkNl(BB&lF-V#9Ci>s+H&%}uJ7Pg1EE*&&xpi$k^e(7u zw0t4(p6oaOakKa!(9f}7R-aeL$`Uc~n{xTUeTmp_Rz$vG+YMBEGAz2=Iv#h5*yoKl zO2lu{j)^*zH_#+hX1(i;OBmKZaL%2u7>qnU(D2sW7|ie81rR?`Ybe@&19o z-+2CQr}^Ovy?^=n&6MgoD|WRBtls8+m^W15yDZLC_AT;78xe*3;H1C#@U&D|J@Kyl z;Dl7z9VuPqk&z0UuDqNpA^6Y73~Ke1evk@F6-O+}%T4{~pJ#tJ`;XZl%l^&zc4S+XchG@IKS>o`CoU+PbN_QjPpAUlz(WT`|%*UznReeg7?3?KjZxa zf4}kk%ku-jf34~H{i*2td*ZXZa7<1pYhRfHPFS-}NLtwvmbULR3VkGoU;iul&n4an zj%H}dW(C}blP2(>e77#vynCRxDE%QG-^xt)5qI!iA<2=r7>RMMt^VO zKcIP>dH__@?;n_?bWDD53g`yLbWBlCg{e=cgsWV*3KvlF$U(t#N^~5vPG}prm^3Sus`xN<)*&iE1{!R85vj2_! zS?nLWLjIN!{UPpOaDJWhzm}Ar%(9r~AbK+W+$YjQ0-{==+W5-+Y=M z`2Bl=p5J+2HYUdRX@_I$9@w|7IEh-TDkjSbI6)(eqn1uQ;tJ0m>Hpjo5{y!=yWcAH zE5-bH$;|whj&NM3cVls-A)eaNcJN5nWxV+Pp5%a*09-~N( zWJUyCnwEy!3}dGL{u+VmwWEhW&Wb`u>xqUz4pDev=n9WTr{Zzc6ZcN5(Fu67z~t=a zS@9UZY_RrA!GFJps%qoyI|*nrd&@xEwsUlfJb`ZE zL-LK5+oPY*g668^6PTv?VpT+uE6!Z!=5jYW7gjm-%JtWb5%}8fI|yx!##@Gq=CquO z#tCL_qr0uDz%}XYqR!pXIBJ}X_(=J6uxx@XY;MzqPV&zWCx18lj|0gcTSWd%_7}4M zjs02Y$UnsX7WQATKfsFk>+Zy_X8x}y@l&r7e{vM@`}Pw5ESC6DcEsOFA$|$-AGkly z{bTNLa{n!f`a|5m;QTu0e>p$N`7_S%aQ;Dy?#Ex~{>JwUb=v>({w#;~55DyMwu|Op zo*%~3`(RSCLB8_-ZMQ2 zm&Walbr}A!F4?kUUUU%t=`4TlVz?i>_84wA zFG)aYHTAguO2OC^;bnTVNd;#Xo#`}vX^aCF#=0aQ^8DwYPayxc4f&7RA3L1voji^`!jcUCNIcQ2vJV53lHc%=b6GUr5mYmiK4#Y5$;2 z-(NicE~fwg=l3r^zajZQ=IcFt0>^&Ly79T)2>h=7=ilUa1Z*D<6nyrpz_swv3vyc5 z!6d`~20EK4!ZgP+2k$k0_|wP!dXwr(=*^V*vf|_sFqB+bB3t1BgPM1wB|WNyjHQ2M z@AUdX#m(jCqa%)hNpb0rqFNvP+%L=Ka%==-{rx$vSzPe?pG^T7IatFw03!*E!((vD3B!|<78%VY^zcU{cmzv*T-1AA+8+V)@pRX3| zihpXPPm8Mwcp1Iz0bkp^(D}{J#Ug_HNd7r%@mZ^q!ESui(rrFhpkm9)qU_WdsJh(O z`JQbusE7)9y?V(|@Vf}*Ut?XW=5}Tove@SpJ#t}5BZPT zAN!2_o9r(fK>jyR@@KJsNSypFQRKgfCw~C**PV!8&HP{Hr`iyIaw_rr8i{|#{3zz{ zFu&v|@gF8re||jmkDI8!*-QO5E9wt%|AO=D)s+9;OZiC)%AZM4en*?~4@c>KEJpXY z9=czo(f*hBXS{#l@3&T(e?@41;P>xQdVb@t8A*nJcnOYv2i|(M2Y|e7>iLn=oY2j^ z-o#ef9y^;PPnRDzMu*j(Ha?Y#f04-MEbJOMoi z?0>1YGakEt_-4)ecm?&6Kp2&ELdgo96J0!;8;h#zFTCJ%I-}SiSUYUgSCK887!L zaf>@TzrN>NP=6k8ecxV@WN;2=?@_&^>5zc=;djkNUnk%Q|6E!J1hFULli z`Q2-R6EOB*LSVaJ0w(@SuN)@ea97%q&r@a9raByP&4=xBe1bE3@fQ z>XCXV|0Mc-%lB(=GqfhgB{Kz1())XjsL`q;aYbNUHZ&?~%sK4NUiZD$u@r8ckW!nf zSptnyc6b>%2sj4Ei$7gl5d_HivTp-XdN(6um>(rmKLd4cmUPBi$@6Jr#o@G^6!{^7f1RKC zXjRsnIJ|bbRqTqyX8a*{r|^x%CY}Bb9f8L{SINAe z(?9<_`@2=if83|+uRqq2{F`~?FVrCaTRr)+Jjp+_m;5d4zhHj=^Veq*zq*$AzZS$# zH75Qf^ZS^8CQAG$H{$O+Bz{RX@gKseKhOQ+lA-e~1(N;W zYU)n0#KW2&6Fz@4fae*->g$e%;qWJ}{YzqwgBAV#jdeXW{&)dIj+_i}5sHAGnt&kV zPmypgIK6%LpddK0eTLS$)Id}nGk&1EULaaA zpxj!tIuRcUtxYzYe-})JH#gPq@WDefC#poR^u#5HU;3R{7mim&Za+P~GaLt;ZF{V- zEF5EZRr%_jPr&#$UsaC=3wX_Qom2h1!*F_ED;#;~1%lVpN1q(M(QvbWhh=Rxj^9#l zt>$qO9ghbqt+p@5-zTO_%kJ_-;Z4?BBi0CbM!r8xAHVm+eJ`Tzg|FQQA9)ukZOK{? zT4tYWx!wy?PkntmW}O%Ij4)MRJZ&GoxxGHFev>1HJP3@m>~+R4_eDoGpSMA2`*{~+ zeD|QTNI=7pTr0G06c$$amX2ZMpMOdIZuTFuKbC!(>@Q^h8~d{)$v>n){ucIMus?wL z>&&BeCI0Ui;-@lylKFkiKVyCr^LLm>;z0ZdL+a0S|Csxm+<)W#5ce+@Q-1v!<$v2K zKWRkyv$2%lQK9?;-;ep8WWYrz4%xM=WE^<49`zj~*B18Jx`nDz3O8vwl`lvv!y2MhWwqrQmYQY%?i&!ktw)1@X z{reK#sA*!CcJD!*6_9CKIuUJZQgp9Q3H;}upGy928}c6~kUy6Fo9r)?C;uD!v)Dh> zO#T-3UrZ-|z*^$3n-Ra7`M=ChW&UIv@%w%g|BU%j%->;t3G*MgKhOPR?r(DcZ8`Oa zxPQU<^-Yxj<@{th<g-ZDD47SY)qDH~_RoWmXUF?>DCfb@yOt%9!g=8G zOm>g0;D28~qgPn&Sm1`z7cIkIFZ9L6WkHdjJ{G{uF^3JvcHi1Z|u)fB>&J> z^0!ox|KbJt1DuGzUP=7wZN&d&ek$`PncwG1{4)jOM=^iLllUdff8hQ+_m8>1*-8C3 z?hkSQg7fR&DF4g(NzR{feuwi9d_NAL``c2wU-14{nf7PAf8g)82Q>fk{2)s2-(B?l z%0F#UlilBZC%hhWjRaUY%&F&^FxGt-?39}(!@50qm z3**7DP{T}QLL$y-+}rn{M<@&&la{_h*&r!s$X2l4xuf5!YM=I`t#ehKp*xIfSR;|l6;a{sNA`a|5m zcu)CtQOf^vevHBRM&A+p0e&F{nKffa` zUXo0d{RqOp)8nHjt%I4NmbPM}3qVmmc;D%$MD!mc`#fhyIc&CY`5+yBA8e+leBKun zfyFrR+_C3?p@kN68x}rg$II~}#W4<6i?4==gE2u^L-~PB$v|VISQ8m7>d3xi# zwKM!)q#r+_+6Vky56c}|l@I@Q=#K6xO~s!*T8c@*x#*~8_H3qK8s^geSdaF{&9pz3 zp#5Mf0Pl~(&wse-KlmDWTAi3XLp>M`dUh#%o_-(v z)#sd2>c7l9Hc!NQkJnGW^*haYg&iVtI zXryD*l$df8cz?|P`DEH3yO95QJo#fQ$iG=h{=%8GKb}YatQ^`Ov%lp%`7iRwAHe+e z0^(OQ|CjlxCd8j)eqRagk5>~vDuMVrhlyWuoc6~7v_IzlG50sQ{}x64A?{yreqDj~ z$DE(!{Fw&rk2(KvjP}Qi>Hfy|3*P_o{*3nz`|10Q=U-WxANc*t&+qd=BU_!meuJ&F zKTf^cd{ehf3*u>ioJ#xSWjdnQ_XH)O;v`4A6J1Fda0Ewa4ar0O?Hw1!R2N}k;tiQI z2NUqn#F7CAZWN(rN5B5#{PR&gN$SByjUs#S7Es3wwFW`!Nn?U!{ttj><$s?qGFPKSH4+sd| zg}+5dxn(W%1oqEolfOHV{KxE%RVM!?`wQog|Bd}w6UaYwjQlO^zhHmB4&tw0AbvIT zf0c-z8cqC3MdJ4{|BU%jpNPNng!m=Qf8hQ+_m2apzZp;cH|`H{|AO=Dvnc<|`N^G> zKjZw)9?C!P{h04>e7}&O{V(s&c>loPZ#@6@qxnIJ-oO0(9_p`Gk|Emq?|W~pTC)Bj z-)Hb{6CBd3sfD>Vs_zxoM_~B9f=!oP?SOuc*M5xCRD^o|#S z2JI`}UU=1IQtyocgM7L~hMdG>noo_V4fg!M?kiMpSXjxnZ~WLq|E3;KWFS1gqZ z6S0hRz_M3k3mu=R0`Dsl%%3#{{=Nx^lk}g8DFx!gEQ73_S55=(D}ti${Jy7e4CBc^ zuSNcD_8+r9_6GSk!^vOB{x{xNuz!gCExfOoPW}J~;;-|*g89GyyRXoreFgK+{D>dL z{GI>ZS8#ux`^W#gui*X=_b)iV{=fT*#gspLMERW(%0KYFg70s9zu^6^812t^|M0*2 z3Z5U*=>5yjyG^mFddl0z|6^aV^=#EhPr38R`wEY+mD#VG)1Y(V`~gZD>5%%fk5|o? zR2cXqd0E(OZSje;V*Tk_84@gVeh?B0aHFsK-1`FhaxQ1GrEL-*W6fhv9|4D>rR3q@g)@AS{qyt4-_8DG_Q$e+ll_IC$^XXwtTOTsWstvx z{TB-45138-^+e)VGyj+Qsmz~beqRFd&zK+eoA^8O#4lm~1NY~-f4raio7{gZp#Bi| zFXSk{&iP+A%1?6sjPpBBDF49sW4^!rr2EA$+W)H4{;Zew4<+>dc82C(o*(%A%fFvq zA^ZJqTMNWCc}pF9`wkxZ9rS+Eo&nwsVFP;VAHnNtv4JyPS}?aWZacV?L2a;x!=yf? zppiWPkbZe7yzjqZxs=j981>sU+bA{@`~?__Da%SB*xr2W9YKz+<#E#5^o0qa*QM8I zSV=NmJZ^cqRWcS-8nvU7w!}f!jXR4CTCPIrgGl(=nFA?Ji|Ssr-2&^-6WP;uT!&S= z8pdr>&V=tTy&p^|e+)Bse94&~mI&f_Vbb9<)GCS>NadZ zIgEX_Y10RbauA*MIyHN3IrufVZ;fe-frvd~W(QOgq4lL*%67XHPzm3&ttqnx-AaDl z%x@HMc$I3jZ^tCU!SZXyS%&*CFTD1tYr|y_(^|7oKd1y+W}opdk#~Wo?mG@uLigzjK-RCCq=|{yg`O#i+lT zLH#%G4{`s3^XrO~|K30~SSfVLeRtc-?>QAqae!|bUz`JgjFnnM=yp)Ueu(|&VdLN}v8Wi~k z6yo$WKh1ps3A>v2%lp;AxVP7rc070m#;WSZUaLyL!DH(-MYkfDm<5ZqgNk8o(z>so zR_4QSnc@wgUj$Pl61Fd`DFpw%zjX~Zl|r99KS>|!N|5MCTkarT4hdi16{qGFK$7-{ zd5zj>aQB8;#Buotkg0LL)F10GY*cFio4A2Ps-XcMAKfx%^ri+lryr~qY~KKN zVZ*JI*1mxLtNKmZVj2a7XGSbf-Fgkqh)nsRJvbMhtE_#Zpn3`P?rwYW#X1BIEL#0x zL%~hZ&vDM^+o%hv7l+-=K2wXGp)0P9-)M>oZ;J}TBhSIg)_xzi1esy#o6A`z?J>H3R4^ph1B6c9&`}9=B>JqSjp8ehIKW2X{`#0HN$o{vJ>$DJR+Sn7b;+_pwA%j+JK)zJv5 zVpBg2)_4u6M^!yeD8B)7iJWfF*Nxz!_hW9_+FF=1q*be={SmaUc@eheTs3@mH0{6P zX(gQ7X8c-4@Snf*(^)!R(1+2~`q4JKz6Pe`f4C#JrUc9kufE*lQvx6Ej5yeECm&W$ zoAxtArUVSqR~qR4DS|E28_n(2-@>HmS9^U0{R8*idUy5;{`H>lS>NWgCOE&ztZ69 z-j<3SQ@k4*-pJ<22=;vnR}B(l-rC@q8%j4+-$jFSQ+-^cv>LL1p8ehIKW2YyDET+p zUpR>TZ!+Z1V*ijG`CBHD|Kc+F1DLJ{4?f9F@J~oCCq=| z{(OJxA9H_``)}MI%BKDW=hr#^JAm?&A1Qyv`JIK7f8hHu&$A|Uzu^6E3GL5#{}4#u zZ~Xf_Kk)mPpI<}gWx?;gKJk2kG#{hRM!6Zmy(PH2&emD6rtId=Uqo55!D^FR{41R?|cla{?5k`zRcWe?Dg~L`FB1B?Z5Le-u*it z`!OZ-WikN@${m>~ z_2+j||M)KTH@W{t^D%P&g7fQ~|K;UcLYT literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/gm_0.jbl b/econml/data/input_dynamicdgp/gm_0.jbl new file mode 100644 index 0000000000000000000000000000000000000000..e4e262d9d950fe9c4bbf5c7060b615062a558f93 GIT binary patch literal 1067 zcmZ8hJ4_To7{22`V0kM(KmidShvEr{g7P>BCYQKyD2Z{qadvOGUD?+^v&*BnL=r?O zv``xYF~&|S8*H(%(!^L8(GVL8W3(_9SeWSS+!-EQZ1R8e|KI=k{+U}5jg6ZGQ#)oE zQ=U&r45hKpn1OASL!?9w7|wb~5Ne)*a0c?CN@*w*vXSOku45D1V-S(^ln4hs$AtVu z{E9mK?&s4#N->8hNpsH$Orapr_x#ny{dye+CXjMLhFW&BTmBjV1ymJB0&x32K!Bo{{O8 zH+N9ZyA6i~;LsG8piT+%!$GJQSV~NwA;%yr2@WfH+-0l_8s(52z5_=_p(!dyL|_jb zO(v6<|ME}#-7e`Ne>9tzkPY#5nftd{xtjYC;6q8jSmG^&a8~eiWZ>Kmp7YyyF3l}7=(ykCo4hg7-W}~1du)rVf}m3_ z6CLxR5{gvExB2dLWtEC)4!_8jZ+b>nbEaVv^A)+EHw_m3UVHLvdGS{qF3v&sDD;Ty afO5|O;%gXeE%XXOT4U`Uu{P-Q)!08I#a%xD literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/gm_1.jbl b/econml/data/input_dynamicdgp/gm_1.jbl new file mode 100644 index 0000000000000000000000000000000000000000..02c721f9040763c3d12402efcb444c054f63a89c GIT binary patch literal 1067 zcmZ8hNlX+$6rCAVpivgZ1r;|02bU2L1veOjaS~f5%Ei)4rMjn>Zt1S>U)2qAm_!dE z95|S0;>qn`;z=(?Jz+d}&={Aq@xaA{8n+mOnrL;+7{NYt@?ZV`{_=j+nKDxhIh1gg za_!SW$Y~1aNhpL(7%m}NrUdjCJv4~5o`rY;5=pc$7K<@#damy=$^rppdIFV6Fz_6h zm+3uf^uPLi>ldn&5T|+RIZQar&-C1R{QcV950j`u9=PN*wgbvxfu2JP6r;`T0Lt|^ zs*+}8V4rZ}3aC&HLoOvuK&8sh3Cnpd76Az;Xj!DmkP*Ta!UL$9K;;q{rrhuZg@t+y z)&8@w&1B|b;=m$ZLpAb1l_=XcDGo+`3e{+yLgxYy45HeEMSX<1e>M0_1 zSw-V>YTCl~Sjlm57FA0gh6gE$<{ZHomdF;PdQFeX0`oM7p?+KquGJYp@c{ml=gc zWOJ+Z#5q{40FURS1dS*z5Bp$^!g6W?YfB8u(qJ7*NSDEGSg*(Q_<7hc08L4~Oa<

k=PY#zTX=J8?PF^|lwa9(nFxc-gCTEsha3Sy3;^ z0<vz;CMzPi-S=R!|Ko+JLo?Ix4?A!6qI%ihEcr~!bEp^^#jsrwwiTfLKf;bVgin7A z(nlw)^!=HZH@`ZUq}3A7B7_}^r?UV%|Kr&;hiB)L_C)iS6b~cQ&$B; zmtLnjmP4f+8(zrd?sONGN|~O#$ZqHac2RR-5{JkY*=;rTRj|hEUtc^+q+!n(>>Yr8 c>N?=kGl1n9Mq3N}l^|`Yj?Pp&^n^z059)|uGXMYp literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/gm_2.jbl b/econml/data/input_dynamicdgp/gm_2.jbl new file mode 100644 index 0000000000000000000000000000000000000000..91737d308e5459eb8a96f1e818e080f4dc631403 GIT binary patch literal 1067 zcmZ8hJ4_To7{24+fCc3tqM(2Z;vu^7Qc;n^5G8TLX|Qk$lgaGfaJ#ave`c41;u1B= z!)T$EB`s>9A!4Bkory6f#LCLb#EL{4X-IH(?hKDDHu=B#|L=c%|IEFZ&L%8^sUzFw zlpjzcqYMrhGqHmTh%{0FM)MvLL^|$6GzUcqwK5WkI7oMG&vl67Gl(fgYK()vYeDfy z@}@TW?a$(dTFxO#GTd_#Qz#i3eDi62;oaI(wTvIwcnX<5p%7QnYFsdyj_E_G5>+d> z85+pL6x$5Sgu{eQu*0BSc&$JbjyD&m&ZVdalZW92`uDcot~7g&BheT?9%_8i(!@G^!;$BQr2> z?vPUS2o4LtrD-lflN#lR!_X|Sw3t9kfk9Xj98rt7%XmK=RU%4s7LJWUYeI>MzyWAW zrBc^7`6qr##E0XWXc1eMn?<&bJ!qc`w7P-L_J+-0$@SUArLWP|q|h}#MmS-Fy(Dtb zkuVyJMuS(WL}%cn(PT6mEk>&m5Usk%WsgaA^z%#C1T0 YX8`dv47V083qiW%o<6x71_Mq02k^aI>;M1& literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/gm_3.jbl b/econml/data/input_dynamicdgp/gm_3.jbl new file mode 100644 index 0000000000000000000000000000000000000000..4c3ce9289069d56c2cb71d1601323b7fe39df462 GIT binary patch literal 1067 zcmZ8hNoy2A6rP!ElU9>x5;bm7<1%q+lNdE>%(MswY%)kLDG)5Gdn)NR-PQf7x-;f5 z5RteX1b=`K5d;MhFWv+ZLA`j;9K0#$O;pIilu7+PhENxgQ!WPua3h@tJ88p^*c&~9G{EYG!c z7W?5e4qaOcrWF$7BnO0XHF<>4vVqOhWxKxdUfSsH9b8R;@U0Nc!znZ63!N1-cc)~LWi z*ik4H&is*|`kAQ+c3P@M;(0*{`#uR_*R|OCWW1|=HS{_E=I5O+A8KFbm9CXB!fq$& zC0B;-oYUcKaAcKc`U>oEHalCKPN&OB2tLjmx6buhPoK|Uy?4mtd8N&N@dSpwie_IK z_AjDID%O_}9hn)u{4ldHG&UOrZ0=?rwaNAt$!9x(N5$kch69SQrwqNz2>X^0{wX<6 zzGnlY^y}G!R@!N(C0;}b2Nh3$84fMuIlP3Y`Qh-~`HpYK9sa{=e_b%t9y4`SFdQ+P zRL63tlw&K1ncSU$ic-2A$cya9UgTCZ7dG*TT#=(r*Qspk?n&eH`>X-SuE6n87*y8* ZSDyha*D%>yIH3gT(fj)KUKolk{V%=8T>Jn4 literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/gm_4.jbl b/econml/data/input_dynamicdgp/gm_4.jbl new file mode 100644 index 0000000000000000000000000000000000000000..77af014faee8d2036d9892ad9d4e21f1dad3e8e5 GIT binary patch literal 1067 zcmZ8hNoy2A6rRaC>BJhr(X~Ls%0FaB*Q(YF@@sc6HneQ-+un%h8p7sHl9VMPbicqX|+T!nvUs1sS;Jo zxfvSB!xY;LV!~m{CfH$6Ch}A4q+J^^ANvHPlq&I%8B7I2eJH=CmU3j8P|aloDwK#? zy=o)V;hCqg1)CH}t>OowMBctmkU!@Us8owMIuTeFREc~u#LO5zRdiiPY&6f z5;Ptqy2(t}$vbXIsg<0^K@$Xra}HqyHN3^JUfp$gfftO zZ>~WpxB`s=aA}%L(41UuCN?lRE_yOf9$y$ZX>pf#=(iogNb zlS-vVR`@4=O2mh~nrIPQmOF!N8+)*CAcU0!@If7TN#AHPpF^uKr|v{@C8Avh>#4rSr+ z8k(?Tc^%R6yPBn?JFN-aa{lJ^ulEvaEpKm)e7fOUgiXyMI3ft!v(T}DuyY;Z-O6Xp zu6;H+)APl?HC~exOT2~v-ejPu!and+dm-f}mHa z5FPWO5{fi8aQN=@<&=_jmtSN%uzWM8In%L)`HJ)#twXW8u2adyKPw41HV?gU&C-~VNeLtE_Zgz9dIJhFrl_CmCTt z55_3utY{!!)k`jdw;Y6k;>m+ZE*^rQ2M-ZNTtKU9MzZvw;lKL-{pJ0tn{k7U*#uKp zajeN8q(nh^95QBM7nKots0<93JYfxh>|?_9LE$k4W;H6e>{HvWk##v2M(S@RzN6JsX47mFj}q^K(!jv z>bMyh$j1~r3~GeKm_xA3pjPCk*v)wkVgU{a$XQM1ArqJiga%M|SF7g8IH9`72-K^R zmRPls@Y?>-L`ba z4IRILe9Pd1$=S#lpcEIbQSAGK%CM8Gva1NYlOmplx?y9+pve${vYW$^y9|4@O*|tr zF>h|KT5%hi1>n&fm!L(9@xwuA6jopF$SD+)Q#zo)- zIFQX|FaG79_^DC=2X)aRwry_$IS%&W&~&Ib=U)E&`1?~jWqsXkCf=lluB9=;VKeF_ zS%l7{*<|i9d6jBx3cAb|v(;=fJIsjSM}Je>!>M3lesuBkyV<7;LYr0b7=ms=bEF7I z*U&^2E9;0ZpFVp1`;A|zQ_J@syqFBrS|e|7jeLgb*@TT15F8VPJw@om(^pbTF+6^eozM=flIG07Hs&jG*6bLpS?a1=`rclhfpb%E{t8?W b*8!EE0mRoZ+FIxrg7hfIPb$5T3U%clBwk*a literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/gm_6.jbl b/econml/data/input_dynamicdgp/gm_6.jbl new file mode 100644 index 0000000000000000000000000000000000000000..fbdfd1f70169bb966dbef2f3c10dfbb88606f956 GIT binary patch literal 1067 zcmZ8hJ4_To7`{8+3ko7WKprXx2g>mf6%@HM6ht>%l*G7+#W1@!uq(T>`_JqGS6qS^ z9wypY8Dl|VjCO`#E0tQ=5KCjJu^`6E#zbf5&hXe`lmDCl|Nh7K&ny%fe8wi2(S&2o z`vD^fG>rq!Elg1XQN{|uc-}*SNauZs<{%c=sv?mHMTYBmE+y3GP^8ARA_4lY4aH;0 zoBH_Huh)OH3IQ=PEj(v2gOah4Hy?hlz1@DSm5T!h&m+qx3`*6ERw@}SYWYy6Mzu;| zh6eI5!w!dX=`iCEOgU7@{0!5K>mcr9pMa9q6cI9qnM9Zml@GNtfy@%7yPQCk8qsQY zY-CZ9c^2DnKvlGAaUe?+?HdI7S&zU$tyrK_fo(&z%r{4@Y18K-Qq)+J&nN52BZtzG z#$&{=xaHD9L2qQQwS`6zoTq+8jCK&{Et8ys+v%nGKo~zrIo}hu_ zWszqYLNFyA8rK-Z**gq-9$^X`6{;M|!ST3^=Yek6m~*H%WS~MbICNK_K`Rj%xrs${ zC)C&yG)ll_86iQF78Qr1&@8cxoIp!~L0Sr&)MCPAY7kDT5jDC3rzfB_t`^C_VQ5RI z(>MN#PyW=14`+1QBDQUJ4ml3?pnWyayI0@iy2hVL{viF4=bk5}uK6)ShZ*(~&p~J0 ztT!7>QKcGPhAy+oY&KiWRx>21rjIt7GE19V9p5K!#ouj8ZFa=t2+m5Hb2;eVMH5z> z*hBQ9wWI56Pb@h(`fY7vdoiiiiS~BMXPB-{_)Hc-k0k8PLEk>Y{yl_awfcI`5J}x$ zesz8QQ(HYZJ+yAQB?{qFT<4y7?#%o6`lbk W)-c>!xGDwdP4o{W`d}o`6aN5t@mQ4r literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/lognorm_neg_0.jbl b/econml/data/input_dynamicdgp/lognorm_neg_0.jbl new file mode 100644 index 0000000000000000000000000000000000000000..21e84be89efa9f7d19a00c07f17255034af16e3a GIT binary patch literal 164 zcmZo*PR=XMEvVE>&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge0(t;q CfI2P! literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/lognorm_neg_2.jbl b/econml/data/input_dynamicdgp/lognorm_neg_2.jbl new file mode 100644 index 0000000000000000000000000000000000000000..f4f1cc696565109f642e42809d06643f6497d3c9 GIT binary patch literal 164 zcmZo*PR=XMEvVE>&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge>!oDNTt3(ra&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge~#d{Vmn+P;vtt&lIMkiWf9Ko0N(y3 literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/lognorm_pos_1.jbl b/econml/data/input_dynamicdgp/lognorm_pos_1.jbl new file mode 100644 index 0000000000000000000000000000000000000000..3e1fa82367247d794c5f2f3ec899315494630f5e GIT binary patch literal 164 zcmZo*PR=XMEvVE>&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge(FXU}2&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Gej;_Yqi>)zWJa&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge&7r$lqQlpa%d* C4mnT& literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/lognorm_pos_4.jbl b/econml/data/input_dynamicdgp/lognorm_pos_4.jbl new file mode 100644 index 0000000000000000000000000000000000000000..4728440281e2251d55ded4904a0e5f588ee671dc GIT binary patch literal 164 zcmZo*PR=XMEvVE>&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge&7r$lqQlpa%e4 CBROFJ literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/lognorm_pos_6.jbl b/econml/data/input_dynamicdgp/lognorm_pos_6.jbl new file mode 100644 index 0000000000000000000000000000000000000000..ebed90e543979a438e97a2b118b66456876d09c5 GIT binary patch literal 164 zcmZo*PR=XMEvVE>&M!*U%Pq|*$xJLNO0486PEO28EaEC;fXH*DlvEa^auqU0FflMN zq*)X)c{6x3wihx76|!h}Ge&Px$kSfPs|NsaeJdpZ literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/n_2.jbl b/econml/data/input_dynamicdgp/n_2.jbl new file mode 100644 index 0000000000000000000000000000000000000000..d9a912dda9ac6a45877843fb379db7d2c9c4bf93 GIT binary patch literal 139 zcmZo*_L??}i6J?!G`FBqFFC&`RWG+RrzA76s3@_Lt2j9^C$WgD5Gv1=Qc_uv%2mi1 z!NkD8kZDrL(nWYsZ!qryD9aPBEUdXEl0E*Kr2LJ#7 literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/n_4.jbl b/econml/data/input_dynamicdgp/n_4.jbl new file mode 100644 index 0000000000000000000000000000000000000000..de5e2691a4d017b6fe3bf8f00aec23d3921fb8bb GIT binary patch literal 139 zcmZo*_L??}i6J?!G`FBqFFC&`RWG+RrzA76s3@_Lt2j9^C$WgD5Gv1=Qc_uv%2mi1 z!NkD8kZDrL(ndKu;c_CL@A$L$APkSM+9sq-`EDHbt literal 0 HcmV?d00001 diff --git a/econml/data/input_dynamicdgp/n_5.jbl b/econml/data/input_dynamicdgp/n_5.jbl new file mode 100644 index 0000000000000000000000000000000000000000..e60b88978b95b303f53c8b690784c55a3073a743 GIT binary patch literal 139 zcmZo*_L??}i6J?!G`FBqFFC&`RWG+RrzA76s3@_Lt2j9^C$WgD5Gv1=Qc_uv%2mi1 z!NkD8kZDrL\n", + "\n", + "# Long-Term Return-on-Investment at Microsoft via Short-Term Proxies\n", + "\n", + "\n", + "Policy makers typically face the problem of wanting to estimate the treatment effect of some new incentives on long-run downstream interests. However, we only have historical data of older treatment options, and we haven't seen the long-run play out yet. We assume access to a long-term dataset where only past treatments were administered and a short-term dataset where novel treatments have been administered. We propose a surrogate based approach where we assume that the long-term effect is channeled through a multitude of available short-term proxies. Our work combines three major recent techniques in the causal machine learning literature: **surrogate indices**, **dynamic treatment effect estimation** and **double machine learning**, in a unified\n", + "pipeline. For more details, see this paper [here](https://arxiv.org/pdf/2103.08390.pdf).\n", + "\n", + "In this case study, we will show you how to apply this unified pipeline to a ROI estimation problem at Microsoft. These methodologies have already been implemented into our [EconML](https://aka.ms/econml) library and you could do it with only a few lines of code." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Summary\n", + "\n", + "1. [Background](#Background)\n", + "2. [Data](#Data)\n", + "3. [Do Dynamic Adjustment with EconML](#Do-Dynamic-Adjustment-with-EconML)\n", + "4. [Train Surrogate Index](#Train-Surrogate-Index)\n", + "5. [Run DML to learn ROI with EconML](#Run-DML-to-learn-ROI-with-EconML)\n", + "6. [Model Evaluation](#Model-Evaluation)\n", + "7. [Conclusions](#Conclusions)" + ] + }, + { + "attachments": { + "causal_graph.PNG": { + "image/png": "" + }, + "pipeline.PNG": { + "image/png": "" + } + }, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Background\n", + "\n", + "Microsoft provides multiple montetary and resource investments to enterprice customers in support of products adoption, the sales manager would like to know which of these programs (\"investments\") are more successful than others? Specifically, we are interested in identifying the average treatment effect of each investment at some period $t$, on the cumulative outcome in the subsequent $m$ months. \n", + "\n", + "There are a few challenges to answer this question. First of all, we haven't fully observed the long-term revenue yet and we don't want to wait that long to evaluate a program. In addition, a careful causal modeling is required to correctly attribute the long-term ROI of multiple programs in a holistic manner, avoiding the biased estimate coming from confounding effect or double counting issues. \n", + "\n", + "The causal graph below shows how to frame this problem:\n", + "\n", + "![causal_graph.PNG](attachment:causal_graph.PNG)\n", + "\n", + "**Methodology:** Our proposed adjusted surrogate index approach could address all the chanllenges above by assuming the long-term effect is channeled through some short-term observed surrogates and employing a dynamic adjustment step (`DynamicDML`) to the surrogate model in order to get rid of the effect from future investment, finally applying double machine learning (`DML`) techniques to estimate the ROI. \n", + "\n", + "The pipeline below tells you how to solve this problem step by step:\n", + "![pipeline.PNG](attachment:pipeline.PNG)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "from econml.data.dynamic_panel_dgp import SemiSynthetic\n", + "from sklearn.linear_model import LassoCV, MultiTaskLassoCV\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data\n", + "\n", + "The **semi-synthetic data*** is comprised of 4 components:\n", + " * **Surrogates:** short-term metrics that could represent long-term revenue\n", + " * **Treatments:** different types of monetary investments to the end customers\n", + " * **Outcomes:** cumulative long-term revenue\n", + " * **Controls:** lagged surrogates and treatments, other time-invariant controls (e.g. demographics)\n", + "\n", + "To build the semi-synthetic data we estimate a series of moments from a real-world dataset: a full covariance matrix of\n", + "all surrogates, treatments, and controls in one period and a series of linear prediction models (lassoCV) of each surrogate and\n", + "treatment on a set of 6 lags of each treatment, 6 lags of each surrogate, and time-invariant controls. Using these values, we draw new parameters from distributions matching the key characteristics of each family of parameters. Finally, we use these new\n", + "parameters to simulate surrogates, treatments, and controls by drawing a set of initial values from the covariance matrix and\n", + "forward simulating to match intertemporal relationships from the transformed prediction models. We use one surrogate to be the outcome of interests. Then we consider the effect of each treatment in period $t$ on the cumulative sum of outcome from following 4 periods. We can calculate the true treatment effects in the semi-synthetic data as a function of parameters from the linear prediction models.\n", + "\n", + "The input data is in a **panel format**. Each panel corresponds to one company and the different rows in a panel correspond to different time period. \n", + "\n", + "Example:\n", + "\n", + "||Company|Year|Features|Controls/Surrogates|T1|T2|T3|AdjRev|\n", + "|---|---|---|---|---|---|---|---|---|\n", + "|1|A|2018|...|...|\\$1,000|...|...|\\$10,000|\n", + "|2|A|2019|...|...|\\$2,000|...|...|\\$12,000|\n", + "|3|A|2020|...|...|\\$3,000|...|...|\\$15,000|\n", + "|4|A|2021|...|...|\\$3,000|...|...|\\$18,000|\n", + "|5|B|2018|...|...|\\$0|...|...|\\$5,000|\n", + "|6|B|2019|...|...|\\$1,000|...|...|\\$10,000|\n", + "|7|B|2020|...|...|\\$0|...|...|\\$7,000|\n", + "|8|B|2021|...|...|\\$1,200|...|...|\\$12,000|\n", + "|9|C|2018|...|...|\\$1,000|...|...|\\$20,000|\n", + "|10|C|2019|...|...|\\$1,500|...|...|\\$25,000|\n", + "|11|C|2020|...|...|\\$500|...|...|\\$18,000|\n", + "|12|C|2021|...|...|\\$500|...|...|\\$20,000|\n", + " \n", + " **For confidentiality reason, the data used in this case study is synthetically generated and the feature distributions don't exactly correspond to real distributions.*" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# generate historical dataset (training purpose)\n", + "np.random.seed(43)\n", + "dgp = SemiSynthetic()\n", + "dgp.create_instance()\n", + "n_periods = 4\n", + "n_units = 5000\n", + "n_treatments = dgp.n_treatments\n", + "random_seed = 43\n", + "thetas = np.random.uniform(0, 2, size=(dgp.n_proxies, n_treatments))\n", + "\n", + "panelX, panelT, panelY, panelGroups, true_effect = dgp.gen_data(\n", + " n_units, n_periods, thetas, random_seed\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Outcome shape: (5000, 4)\n", + "Treatment shape: (5000, 4, 3)\n", + "Controls shape: (5000, 4, 89)\n" + ] + } + ], + "source": [ + "# print panel data shape\n", + "print(\"Outcome shape: \", panelY.shape)\n", + "print(\"Treatment shape: \", panelT.shape)\n", + "print(\"Controls shape: \", panelX.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# generate new dataset (testing purpose)\n", + "panelXnew, panelTnew, panelYnew, panelGroupsnew, true_effect_new = dgp.gen_data(\n", + " n_units, n_periods, thetas, random_seed\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True Long-term Effect for each investment: [2.78577429 3.93764671 0.74981862]\n" + ] + } + ], + "source": [ + "# print true long term effect\n", + "true_longterm_effect = np.sum(true_effect_new, axis=0)\n", + "print(\"True Long-term Effect for each investment: \", true_longterm_effect)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Do Dynamic Adjustment with EconML\n", + "From the causal graph above, we could see we want to first remove the effects of future incentives from the historical outcomes to create an **adjusted long-term revenue** as if those future incentives never happened.\n", + "\n", + "EconML's `DynamicDML` estimator is an extension of Double Machine Learning approach to **dynamically estimate the period effect of treatments assigned sequentially over time period**. In this scenario, it could help us to adjust the cumulative revenue by subtracting the period effect of all of the investments after the target investment.\n", + "\n", + "For more details about `DynamicDML`, please read this [paper](https://arxiv.org/pdf/2002.07285.pdf). " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Helper function to reshape the panel data\n", + "def long(x): # reshape the panel data to (n_units * n_periods, -1)\n", + " n_units = x.shape[0]\n", + " n_periods = x.shape[1]\n", + " return (\n", + " x.reshape(n_units * n_periods)\n", + " if np.ndim(x) == 2\n", + " else x.reshape(n_units * n_periods, -1)\n", + " )\n", + "\n", + "\n", + "def wide(x): # reshape the panel data to (n_units, n_periods * d_x)\n", + " n_units = x.shape[0]\n", + " return x.reshape(n_units, -1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# on historical data construct adjusted outcomes\n", + "from econml.dynamic.dml import DynamicDML\n", + "\n", + "panelYadj = panelY.copy()\n", + "\n", + "est = DynamicDML(\n", + " model_y=LassoCV(max_iter=2000), model_t=MultiTaskLassoCV(max_iter=2000), cv=2\n", + ")\n", + "for t in range(1, n_periods): # for each target period 1...m\n", + " # learn period effect for each period treatment on target period t\n", + " est.fit(\n", + " long(panelY[:, 1 : t + 1]),\n", + " long(panelT[:, 1 : t + 1, :]), # reshape data to long format\n", + " X=None,\n", + " W=long(panelX[:, 1 : t + 1, :]),\n", + " groups=long(panelGroups[:, 1 : t + 1]),\n", + " )\n", + " # remove effect of observed treatments\n", + " T1 = wide(panelT[:, 1 : t + 1, :])\n", + " panelYadj[:, t] = panelY[:, t] - est.effect(\n", + " T0=np.zeros_like(T1), T1=T1\n", + " ) # reshape data to wide format" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train Surrogate Index\n", + "Once we have the adjusted outcome, we'd like to train any ML model to learn the relationship between short-term surrogates and long-term revenue from the historical dataset, assuming the treatment effect of investments on long-term revenue could **only** go through short-term surrogates, and the **relationship keeps the same** between the historical dataset and the new dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# train surrogate index on historical dataset\n", + "S=panelX[:,1,[6*(i+1)-1 for i in range(4)]] # surrogates columns\n", + "XS = np.hstack(\n", + " [panelX[:, 0], S]\n", + ") # concatenate controls and surrogates from historical dataset\n", + "TotalYadj = np.sum(panelYadj, axis=1) # total revenue from historical dataset\n", + "adjusted_proxy_model = LassoCV().fit(\n", + " XS, TotalYadj\n", + ") # train proxy model from historical dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# predict new long term revenue\n", + "Snew=panelXnew[:,1,[6*(i+1)-1 for i in range(4)]] # surrogates columns\n", + "XSnew = np.hstack(\n", + " [panelXnew[:, 0], Snew]\n", + ") # concatenate controls and surrogates from new dataset\n", + "sindex_adj = adjusted_proxy_model.predict(XSnew)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run DML to learn ROI with EconML\n", + "Finally we will call `LinearDML` estimator from EconML to learn the treatment effect of multiple investments on the adjusted surrogate index in new dataset. `LinearDML` is a two stage machine learning models for estimating **(heterogeneous) treatment effects** when all potential confounders are observed, it leverages the machine learning power to deal with **high dimensional dataset** and still be able to construct **confidence intervals**. \n", + "\n", + "For more details, please read this [paper](https://arxiv.org/pdf/1608.00060.pdf). " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True Long-term Effect for each investment: [2.78577429 3.93764671 0.74981862]\n", + "Coefficient Results: X is None, please call intercept_inference to learn the constant!\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
CATE Intercept Results
point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|T0 2.826 0.007 405.552 0.0 2.812 2.84
cate_intercept|T1 4.03 0.023 174.383 0.0 3.985 4.075
cate_intercept|T2 0.676 0.035 19.407 0.0 0.608 0.745


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " CATE Intercept Results \n", + "========================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "------------------------------------------------------------------------\n", + "cate_intercept|T0 2.826 0.007 405.552 0.0 2.812 2.84\n", + "cate_intercept|T1 4.03 0.023 174.383 0.0 3.985 4.075\n", + "cate_intercept|T2 0.676 0.035 19.407 0.0 0.608 0.745\n", + "------------------------------------------------------------------------\n", + "\n", + "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", + "$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$\n", + "where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:\n", + "$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$\n", + "where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.\n", + "\"\"\"" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# learn treatment effect on surrogate index on new dataset\n", + "from econml.dml import LinearDML\n", + "\n", + "adjsurr_est = LinearDML(\n", + " model_y=LassoCV(max_iter=2000), model_t=MultiTaskLassoCV(max_iter=2000), cv=3\n", + ")\n", + "# fit treatment_0 on total revenue from new dataset\n", + "adjsurr_est.fit(sindex_adj, panelTnew[:, 0], X=None, W=panelXnew[:, 0])\n", + "# print treatment effect summary\n", + "print(\"True Long-term Effect for each investment: \", true_longterm_effect)\n", + "adjsurr_est.summary(alpha=0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# save the treatment effect and confidence interval\n", + "adjsurr_point_est = adjsurr_est.intercept_\n", + "adjsurr_conf_int_lb, adjsurr_conf_int_ub = adjsurr_est.intercept__interval(alpha=0.05)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Model Evaluation\n", + "Now we want to compare the proposed **adjusted surrogate index** approach with estimation from realized long-term outcome. Below we train another `LinearDML` model on the realized cumulative revenue directly, without any adjustment. And then we visualize the two models output, comparing with the ground truth." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True Long-term Effect for each investment: [2.78577429 3.93764671 0.74981862]\n", + "Coefficient Results: X is None, please call intercept_inference to learn the constant!\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
CATE Intercept Results
point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|T0 6.73 0.052 129.741 0.0 6.628 6.832
cate_intercept|T1 9.754 0.132 73.643 0.0 9.494 10.014
cate_intercept|T2 3.781 0.12 31.484 0.0 3.545 4.016


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " CATE Intercept Results \n", + "========================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "------------------------------------------------------------------------\n", + "cate_intercept|T0 6.73 0.052 129.741 0.0 6.628 6.832\n", + "cate_intercept|T1 9.754 0.132 73.643 0.0 9.494 10.014\n", + "cate_intercept|T2 3.781 0.12 31.484 0.0 3.545 4.016\n", + "------------------------------------------------------------------------\n", + "\n", + "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", + "$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$\n", + "where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:\n", + "$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$\n", + "where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.\n", + "\"\"\"" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# learn treatment effect on direct outcome\n", + "from econml.dml import LinearDML\n", + "\n", + "direct_est = LinearDML(\n", + " model_y=LassoCV(max_iter=1000), model_t=MultiTaskLassoCV(max_iter=1000), cv=3\n", + ")\n", + "# fit treatment_0 on total revenue from new dataset\n", + "direct_est.fit(np.sum(panelYnew, axis=1), panelTnew[:, 0], X=None, W=panelXnew[:, 0])\n", + "# print treatment effect summary\n", + "print(\"True Long-term Effect for each investment: \", true_longterm_effect)\n", + "direct_est.summary(alpha=0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# save the treatment effect and confidence interval\n", + "direct_point_est = direct_est.intercept_\n", + "direct_conf_int_lb, direct_conf_int_ub = direct_est.intercept__interval(alpha=0.05)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0.98, 'Error bar plot of treatment effect from different models')" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "

" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# plot the error bar plot of different models\n", + "plt.figure(figsize=(18, 6))\n", + "plt.subplot(1, 2, 1)\n", + "\n", + "plt.errorbar(\n", + " np.arange(n_treatments) - 0.04,\n", + " true_longterm_effect,\n", + " fmt=\"o\",\n", + " alpha=0.6,\n", + " label=\"Ground truth\",\n", + ")\n", + "plt.errorbar(\n", + " np.arange(n_treatments),\n", + " adjsurr_point_est,\n", + " yerr=(\n", + " adjsurr_conf_int_ub - adjsurr_point_est,\n", + " adjsurr_point_est - adjsurr_conf_int_lb,\n", + " ),\n", + " fmt=\"o\",\n", + " label=\"Adjusted Surrogate Index\",\n", + ")\n", + "plt.xticks(np.arange(n_treatments), [\"T0\", \"T1\", \"T2\"])\n", + "plt.ylabel(\"Effect\")\n", + "plt.legend()\n", + "\n", + "plt.subplot(1, 2, 2)\n", + "plt.errorbar(\n", + " np.arange(n_treatments) - 0.04,\n", + " true_longterm_effect,\n", + " fmt=\"o\",\n", + " alpha=0.6,\n", + " label=\"Ground truth\",\n", + ")\n", + "plt.errorbar(\n", + " np.arange(n_treatments),\n", + " adjsurr_point_est,\n", + " yerr=(\n", + " adjsurr_conf_int_ub - adjsurr_point_est,\n", + " adjsurr_point_est - adjsurr_conf_int_lb,\n", + " ),\n", + " fmt=\"o\",\n", + " label=\"Adjusted Surrogate Index\",\n", + ")\n", + "plt.errorbar(\n", + " np.arange(n_treatments) + 0.04,\n", + " direct_point_est,\n", + " yerr=(direct_conf_int_ub - direct_point_est, direct_point_est - direct_conf_int_lb),\n", + " fmt=\"o\",\n", + " label=\"Direct Model\",\n", + ")\n", + "plt.xticks(np.arange(n_treatments), [\"T0\", \"T1\", \"T2\"])\n", + "plt.ylabel(\"Effect\")\n", + "plt.legend()\n", + "plt.suptitle(\"Error bar plot of treatment effect from different models\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We could see the **adjusted surrogate index** approach does a good job overcomes a common data limitation when considering long-term effects of novel treatments and expands the surrogate approach to consider a common, and previously\n", + "problematic, pattern of serially correlated treatments." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conclusions\n", + "\n", + "In this notebook, we have demonstrated the power of using EconML to:\n", + "\n", + "* estimate treatment effects in settings when multiple treatments are assigned over time and treatments can have a causal effect on future outcomes\n", + "* correct the bias coming from auto-correlation of the historical treatment policy\n", + "* use Machine Learning to enable estimation with high-dimensional surrogates and controls\n", + "* solve a complex problem using an unified pipeline with only a few lines of code\n", + "\n", + "To learn more about what EconML can do for you, visit our [website](https://aka.ms/econml), our [GitHub page](https://github.com/microsoft/EconML) or our [documentation](https://econml.azurewebsites.net/). " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From a5bd8e9c5c5283ebaf7388f0de48583cb3553f97 Mon Sep 17 00:00:00 2001 From: Maggie Hei Date: Mon, 9 Aug 2021 14:07:39 -0400 Subject: [PATCH 24/27] update setup to install jbl file --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 266aa9d16..185fdd26e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -82,6 +82,7 @@ exclude = [options.package_data] ; include all CSV files as data * = *.csv +* = *.jbl ; coverage configuration [coverage:run] From da8085d585c8eaa3e4ad90460b8e16bb4ae3b964 Mon Sep 17 00:00:00 2001 From: Maggie Hei Date: Mon, 9 Aug 2021 14:21:32 -0400 Subject: [PATCH 25/27] update setup to install jbl file --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 185fdd26e..0d9f2db65 100644 --- a/setup.cfg +++ b/setup.cfg @@ -82,7 +82,7 @@ exclude = [options.package_data] ; include all CSV files as data * = *.csv -* = *.jbl + *.jbl ; coverage configuration [coverage:run] From ebef40b7d8898f0bc66b732b97c06809c512b612 Mon Sep 17 00:00:00 2001 From: Maggie Hei Date: Mon, 9 Aug 2021 19:54:28 -0400 Subject: [PATCH 26/27] update roi notebook --- econml/data/dynamic_panel_dgp.py | 5 +- ... at Microsoft via Short-Term Proxies.ipynb | 612 ++++++++++++++++-- 2 files changed, 552 insertions(+), 65 deletions(-) diff --git a/econml/data/dynamic_panel_dgp.py b/econml/data/dynamic_panel_dgp.py index 07c280083..82b842912 100644 --- a/econml/data/dynamic_panel_dgp.py +++ b/econml/data/dynamic_panel_dgp.py @@ -431,7 +431,10 @@ def gen_data(self, n, n_periods, thetas, random_seed): ].mean() true_effect[t, i] = ate_treated - ate_control - panelX = fn_df_control[self.columns].values.reshape(-1, n_periods, n_x) + new_index = ["proxy1", "proxy2", "proxy3", "proxy4"] + new_columns = [f"{ind}_{i}" for ind in new_index for i in range(-6, 0)] +\ + [f"demo_{i}" for i in range(47)] + panelX = fn_df_control[new_columns].values.reshape(-1, n_periods, len(new_columns)) panelT = fn_df_control[self.index[n_proxies:] ].values.reshape(-1, n_periods, n_treatments) panelY = fn_df_control[outcome].values.reshape(-1, n_periods) diff --git a/notebooks/CustomerScenarios/Case Study - Long-Term Return-on-Investment at Microsoft via Short-Term Proxies.ipynb b/notebooks/CustomerScenarios/Case Study - Long-Term Return-on-Investment at Microsoft via Short-Term Proxies.ipynb index 26b74c7e2..4c6ca7658 100644 --- a/notebooks/CustomerScenarios/Case Study - Long-Term Return-on-Investment at Microsoft via Short-Term Proxies.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Long-Term Return-on-Investment at Microsoft via Short-Term Proxies.ipynb @@ -25,9 +25,10 @@ "2. [Data](#Data)\n", "3. [Do Dynamic Adjustment with EconML](#Do-Dynamic-Adjustment-with-EconML)\n", "4. [Train Surrogate Index](#Train-Surrogate-Index)\n", - "5. [Run DML to learn ROI with EconML](#Run-DML-to-learn-ROI-with-EconML)\n", + "5. [Run DML to Learn ROI with EconML](#Run-DML-to-Learn-ROI-with-EconML)\n", "6. [Model Evaluation](#Model-Evaluation)\n", - "7. [Conclusions](#Conclusions)" + "7. [Extensions -- Including Heterogeneity in Effect](#Extensions----Including-Heterogeneity-in-Effect)\n", + "8. [Conclusions](#Conclusions)" ] }, { @@ -60,9 +61,34 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 2;\n", + " var nbb_formatted_code = \"# imports\\nfrom econml.data.dynamic_panel_dgp import SemiSynthetic\\nfrom sklearn.linear_model import LassoCV, MultiTaskLassoCV\\nimport numpy as np\\nimport matplotlib.pyplot as plt\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# imports\n", "from econml.data.dynamic_panel_dgp import SemiSynthetic\n", @@ -113,9 +139,34 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 3;\n", + " var nbb_formatted_code = \"# generate historical dataset (training purpose)\\nnp.random.seed(43)\\ndgp = SemiSynthetic()\\ndgp.create_instance()\\nn_periods = 4\\nn_units = 5000\\nn_treatments = dgp.n_treatments\\nrandom_seed = 43\\nthetas = np.random.uniform(0, 2, size=(dgp.n_proxies, n_treatments))\\n\\npanelX, panelT, panelY, panelGroups, true_effect = dgp.gen_data(\\n n_units, n_periods, thetas, random_seed\\n)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# generate historical dataset (training purpose)\n", "np.random.seed(43)\n", @@ -134,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -143,8 +194,32 @@ "text": [ "Outcome shape: (5000, 4)\n", "Treatment shape: (5000, 4, 3)\n", - "Controls shape: (5000, 4, 89)\n" + "Controls shape: (5000, 4, 71)\n" ] + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 4;\n", + " var nbb_formatted_code = \"# print panel data shape\\nprint(\\\"Outcome shape: \\\", panelY.shape)\\nprint(\\\"Treatment shape: \\\", panelT.shape)\\nprint(\\\"Controls shape: \\\", panelX.shape)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -156,27 +231,77 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 5;\n", + " var nbb_formatted_code = \"# generate new dataset (testing purpose)\\nthetas_new = np.random.uniform(0, 2, size=(dgp.n_proxies, n_treatments))\\npanelXnew, panelTnew, panelYnew, panelGroupsnew, true_effect_new = dgp.gen_data(\\n n_units, n_periods, thetas_new, random_seed\\n)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# generate new dataset (testing purpose)\n", + "thetas_new = np.random.uniform(0, 2, size=(dgp.n_proxies, n_treatments))\n", "panelXnew, panelTnew, panelYnew, panelGroupsnew, true_effect_new = dgp.gen_data(\n", - " n_units, n_periods, thetas, random_seed\n", + " n_units, n_periods, thetas_new, random_seed\n", ")" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "True Long-term Effect for each investment: [2.78577429 3.93764671 0.74981862]\n" + "True Long-term Effect for each investment: [0.90994672 0.709811 2.45310877]\n" ] + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 6;\n", + " var nbb_formatted_code = \"# print true long term effect\\ntrue_longterm_effect = np.sum(true_effect_new, axis=0)\\nprint(\\\"True Long-term Effect for each investment: \\\", true_longterm_effect)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -199,9 +324,34 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 7;\n", + " var nbb_formatted_code = \"# Helper function to reshape the panel data\\ndef long(x): # reshape the panel data to (n_units * n_periods, -1)\\n n_units = x.shape[0]\\n n_periods = x.shape[1]\\n return (\\n x.reshape(n_units * n_periods)\\n if np.ndim(x) == 2\\n else x.reshape(n_units * n_periods, -1)\\n )\\n\\n\\ndef wide(x): # reshape the panel data to (n_units, n_periods * d_x)\\n n_units = x.shape[0]\\n return x.reshape(n_units, -1)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Helper function to reshape the panel data\n", "def long(x): # reshape the panel data to (n_units * n_periods, -1)\n", @@ -221,9 +371,34 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 8;\n", + " var nbb_formatted_code = \"# on historical data construct adjusted outcomes\\nfrom econml.dynamic.dml import DynamicDML\\n\\npanelYadj = panelY.copy()\\n\\nest = DynamicDML(\\n model_y=LassoCV(max_iter=2000), model_t=MultiTaskLassoCV(max_iter=2000), cv=2\\n)\\nfor t in range(1, n_periods): # for each target period 1...m\\n # learn period effect for each period treatment on target period t\\n est.fit(\\n long(panelY[:, 1 : t + 1]),\\n long(panelT[:, 1 : t + 1, :]), # reshape data to long format\\n X=None,\\n W=long(panelX[:, 1 : t + 1, :]),\\n groups=long(panelGroups[:, 1 : t + 1]),\\n )\\n # remove effect of observed treatments\\n T1 = wide(panelT[:, 1 : t + 1, :])\\n panelYadj[:, t] = panelY[:, t] - est.effect(\\n T0=np.zeros_like(T1), T1=T1\\n ) # reshape data to wide format\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# on historical data construct adjusted outcomes\n", "from econml.dynamic.dml import DynamicDML\n", @@ -259,14 +434,38 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 9;\n", + " var nbb_formatted_code = \"# train surrogate index on historical dataset\\nXS = np.hstack(\\n [panelX[:, 1], panelYadj[:, :1]]\\n) # concatenate controls and surrogates from historical dataset\\nTotalYadj = np.sum(panelYadj, axis=1) # total revenue from historical dataset\\nadjusted_proxy_model = LassoCV().fit(\\n XS, TotalYadj\\n) # train proxy model from historical dataset\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# train surrogate index on historical dataset\n", - "S=panelX[:,1,[6*(i+1)-1 for i in range(4)]] # surrogates columns\n", "XS = np.hstack(\n", - " [panelX[:, 0], S]\n", + " [panelX[:, 1], panelYadj[:, :1]]\n", ") # concatenate controls and surrogates from historical dataset\n", "TotalYadj = np.sum(panelYadj, axis=1) # total revenue from historical dataset\n", "adjusted_proxy_model = LassoCV().fit(\n", @@ -276,14 +475,38 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 10;\n", + " var nbb_formatted_code = \"# predict new long term revenue\\nXSnew = np.hstack(\\n [panelXnew[:, 1], panelYnew[:, :1]]\\n) # concatenate controls and surrogates from new dataset\\nsindex_adj = adjusted_proxy_model.predict(XSnew)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# predict new long term revenue\n", - "Snew=panelXnew[:,1,[6*(i+1)-1 for i in range(4)]] # surrogates columns\n", "XSnew = np.hstack(\n", - " [panelXnew[:, 0], Snew]\n", + " [panelXnew[:, 1], panelYnew[:, :1]]\n", ") # concatenate controls and surrogates from new dataset\n", "sindex_adj = adjusted_proxy_model.predict(XSnew)" ] @@ -292,7 +515,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Run DML to learn ROI with EconML\n", + "# Run DML to Learn ROI with EconML\n", "Finally we will call `LinearDML` estimator from EconML to learn the treatment effect of multiple investments on the adjusted surrogate index in new dataset. `LinearDML` is a two stage machine learning models for estimating **(heterogeneous) treatment effects** when all potential confounders are observed, it leverages the machine learning power to deal with **high dimensional dataset** and still be able to construct **confidence intervals**. \n", "\n", "For more details, please read this [paper](https://arxiv.org/pdf/1608.00060.pdf). " @@ -300,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": { "scrolled": true }, @@ -309,7 +532,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "True Long-term Effect for each investment: [2.78577429 3.93764671 0.74981862]\n", + "True Long-term Effect for each investment: [0.90994672 0.709811 2.45310877]\n", "Coefficient Results: X is None, please call intercept_inference to learn the constant!\n" ] }, @@ -319,30 +542,30 @@ "\n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "
CATE Intercept Results
point_estimate stderr zstat pvalue ci_lower ci_upper point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|T0 2.826 0.007 405.552 0.0 2.812 2.84cate_intercept|T0 0.83 0.015 57.214 0.0 0.802 0.858
cate_intercept|T1 4.03 0.023 174.383 0.0 3.985 4.075cate_intercept|T1 0.677 0.028 23.767 0.0 0.621 0.733
cate_intercept|T2 0.676 0.035 19.407 0.0 0.608 0.745cate_intercept|T2 2.438 0.035 69.711 0.0 2.369 2.507


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], "text/plain": [ "\n", "\"\"\"\n", - " CATE Intercept Results \n", - "========================================================================\n", - " point_estimate stderr zstat pvalue ci_lower ci_upper\n", - "------------------------------------------------------------------------\n", - "cate_intercept|T0 2.826 0.007 405.552 0.0 2.812 2.84\n", - "cate_intercept|T1 4.03 0.023 174.383 0.0 3.985 4.075\n", - "cate_intercept|T2 0.676 0.035 19.407 0.0 0.608 0.745\n", - "------------------------------------------------------------------------\n", + " CATE Intercept Results \n", + "=======================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "-----------------------------------------------------------------------\n", + "cate_intercept|T0 0.83 0.015 57.214 0.0 0.802 0.858\n", + "cate_intercept|T1 0.677 0.028 23.767 0.0 0.621 0.733\n", + "cate_intercept|T2 2.438 0.035 69.711 0.0 2.369 2.507\n", + "-----------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", "$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$\n", @@ -352,9 +575,33 @@ "\"\"\"" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 11;\n", + " var nbb_formatted_code = \"# learn treatment effect on surrogate index on new dataset\\nfrom econml.dml import LinearDML\\n\\nadjsurr_est = LinearDML(\\n model_y=LassoCV(max_iter=2000), model_t=MultiTaskLassoCV(max_iter=2000), cv=3\\n)\\n# fit treatment_0 on total revenue from new dataset\\nadjsurr_est.fit(sindex_adj, panelTnew[:, 0], X=None, W=panelXnew[:, 0])\\n# print treatment effect summary\\nprint(\\\"True Long-term Effect for each investment: \\\", true_longterm_effect)\\nadjsurr_est.summary(alpha=0.05)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -373,9 +620,34 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 12;\n", + " var nbb_formatted_code = \"# save the treatment effect and confidence interval\\nadjsurr_point_est = adjsurr_est.intercept_\\nadjsurr_conf_int_lb, adjsurr_conf_int_ub = adjsurr_est.intercept__interval(alpha=0.05)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# save the treatment effect and confidence interval\n", "adjsurr_point_est = adjsurr_est.intercept_\n", @@ -392,14 +664,14 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "True Long-term Effect for each investment: [2.78577429 3.93764671 0.74981862]\n", + "True Long-term Effect for each investment: [0.90994672 0.709811 2.45310877]\n", "Coefficient Results: X is None, please call intercept_inference to learn the constant!\n" ] }, @@ -409,30 +681,30 @@ "\n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "\n", - " \n", + " \n", "\n", "
CATE Intercept Results
point_estimate stderr zstat pvalue ci_lower ci_upper point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|T0 6.73 0.052 129.741 0.0 6.628 6.832cate_intercept|T0 2.227 0.039 56.865 0.0 2.15 2.304
cate_intercept|T1 9.754 0.132 73.643 0.0 9.494 10.014cate_intercept|T1 1.561 0.226 6.911 0.0 1.118 2.004
cate_intercept|T2 3.781 0.12 31.484 0.0 3.545 4.016cate_intercept|T2 4.335 0.209 20.748 0.0 3.926 4.745


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" ], "text/plain": [ "\n", "\"\"\"\n", - " CATE Intercept Results \n", - "========================================================================\n", - " point_estimate stderr zstat pvalue ci_lower ci_upper\n", - "------------------------------------------------------------------------\n", - "cate_intercept|T0 6.73 0.052 129.741 0.0 6.628 6.832\n", - "cate_intercept|T1 9.754 0.132 73.643 0.0 9.494 10.014\n", - "cate_intercept|T2 3.781 0.12 31.484 0.0 3.545 4.016\n", - "------------------------------------------------------------------------\n", + " CATE Intercept Results \n", + "=======================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "-----------------------------------------------------------------------\n", + "cate_intercept|T0 2.227 0.039 56.865 0.0 2.15 2.304\n", + "cate_intercept|T1 1.561 0.226 6.911 0.0 1.118 2.004\n", + "cate_intercept|T2 4.335 0.209 20.748 0.0 3.926 4.745\n", + "-----------------------------------------------------------------------\n", "\n", "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", "$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$\n", @@ -442,9 +714,33 @@ "\"\"\"" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 13;\n", + " var nbb_formatted_code = \"# learn treatment effect on direct outcome\\nfrom econml.dml import LinearDML\\n\\ndirect_est = LinearDML(\\n model_y=LassoCV(max_iter=2000), model_t=MultiTaskLassoCV(max_iter=2000), cv=3\\n)\\n# fit treatment_0 on total revenue from new dataset\\ndirect_est.fit(np.sum(panelYnew, axis=1), panelTnew[:, 0], X=None, W=panelXnew[:, 0])\\n# print treatment effect summary\\nprint(\\\"True Long-term Effect for each investment: \\\", true_longterm_effect)\\ndirect_est.summary(alpha=0.05)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -452,7 +748,7 @@ "from econml.dml import LinearDML\n", "\n", "direct_est = LinearDML(\n", - " model_y=LassoCV(max_iter=1000), model_t=MultiTaskLassoCV(max_iter=1000), cv=3\n", + " model_y=LassoCV(max_iter=2000), model_t=MultiTaskLassoCV(max_iter=2000), cv=3\n", ")\n", "# fit treatment_0 on total revenue from new dataset\n", "direct_est.fit(np.sum(panelYnew, axis=1), panelTnew[:, 0], X=None, W=panelXnew[:, 0])\n", @@ -463,9 +759,34 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 14;\n", + " var nbb_formatted_code = \"# save the treatment effect and confidence interval\\ndirect_point_est = direct_est.intercept_\\ndirect_conf_int_lb, direct_conf_int_ub = direct_est.intercept__interval(alpha=0.05)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# save the treatment effect and confidence interval\n", "direct_point_est = direct_est.intercept_\n", @@ -474,7 +795,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -483,13 +804,13 @@ "Text(0.5, 0.98, 'Error bar plot of treatment effect from different models')" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -498,6 +819,30 @@ "needs_background": "light" }, "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 15;\n", + " var nbb_formatted_code = \"# plot the error bar plot of different models\\nplt.figure(figsize=(18, 6))\\nplt.subplot(1, 2, 1)\\n\\nplt.errorbar(\\n np.arange(n_treatments) - 0.04,\\n true_longterm_effect,\\n fmt=\\\"o\\\",\\n alpha=0.6,\\n label=\\\"Ground truth\\\",\\n)\\nplt.errorbar(\\n np.arange(n_treatments),\\n adjsurr_point_est,\\n yerr=(\\n adjsurr_conf_int_ub - adjsurr_point_est,\\n adjsurr_point_est - adjsurr_conf_int_lb,\\n ),\\n fmt=\\\"o\\\",\\n label=\\\"Adjusted Surrogate Index\\\",\\n)\\nplt.xticks(np.arange(n_treatments), [\\\"T0\\\", \\\"T1\\\", \\\"T2\\\"])\\nplt.ylabel(\\\"Effect\\\")\\nplt.legend()\\n\\nplt.subplot(1, 2, 2)\\nplt.errorbar(\\n np.arange(n_treatments) - 0.04,\\n true_longterm_effect,\\n fmt=\\\"o\\\",\\n alpha=0.6,\\n label=\\\"Ground truth\\\",\\n)\\nplt.errorbar(\\n np.arange(n_treatments),\\n adjsurr_point_est,\\n yerr=(\\n adjsurr_conf_int_ub - adjsurr_point_est,\\n adjsurr_point_est - adjsurr_conf_int_lb,\\n ),\\n fmt=\\\"o\\\",\\n label=\\\"Adjusted Surrogate Index\\\",\\n)\\nplt.errorbar(\\n np.arange(n_treatments) + 0.04,\\n direct_point_est,\\n yerr=(direct_conf_int_ub - direct_point_est, direct_point_est - direct_conf_int_lb),\\n fmt=\\\"o\\\",\\n label=\\\"Direct Model\\\",\\n)\\nplt.xticks(np.arange(n_treatments), [\\\"T0\\\", \\\"T1\\\", \\\"T2\\\"])\\nplt.ylabel(\\\"Effect\\\")\\nplt.legend()\\nplt.suptitle(\\\"Error bar plot of treatment effect from different models\\\")\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -565,6 +910,145 @@ "problematic, pattern of serially correlated treatments." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extensions -- Including Heterogeneity in Effect\n", + "\n", + "Finally, I will show that our EconML's `DynamicDML` and `LinearDML` estimators could not only learn Average Treatment Effect (ATE), but also **Heterogeneous Treatment Effect (CATE)**, which will return the treatment effect as a function of interested characteristics. In the example below, I will use first control variable as feature to learn effect heterogeneity, and retrain the final `LinearDML` model. Similarly, you could train `DynamicDML` with feature $X$ as well." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True Long-term Effect for each investment: [0.90994672 0.709811 2.45310877]\n", + "Average treatment effect for each investment: [0.82738185 0.71610965 2.56087599]\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Coefficient Results
point_estimate stderr zstat pvalue ci_lower ci_upper
X0|T0 0.009 0.011 0.76 0.447 -0.014 0.031
X0|T1 0.037 0.031 1.218 0.223 -0.023 0.098
X0|T2 -0.072 0.151 -0.478 0.633 -0.369 0.224
\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
CATE Intercept Results
point_estimate stderr zstat pvalue ci_lower ci_upper
cate_intercept|T0 0.827 0.015 56.625 0.0 0.799 0.856
cate_intercept|T1 0.716 0.032 22.466 0.0 0.654 0.779
cate_intercept|T2 2.56 0.237 10.82 0.0 2.096 3.024


A linear parametric conditional average treatment effect (CATE) model was fitted:
$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$
where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:
$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$
where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.
" + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " Coefficient Results \n", + "===========================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "-----------------------------------------------------------\n", + "X0|T0 0.009 0.011 0.76 0.447 -0.014 0.031\n", + "X0|T1 0.037 0.031 1.218 0.223 -0.023 0.098\n", + "X0|T2 -0.072 0.151 -0.478 0.633 -0.369 0.224\n", + " CATE Intercept Results \n", + "=======================================================================\n", + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "-----------------------------------------------------------------------\n", + "cate_intercept|T0 0.827 0.015 56.625 0.0 0.799 0.856\n", + "cate_intercept|T1 0.716 0.032 22.466 0.0 0.654 0.779\n", + "cate_intercept|T2 2.56 0.237 10.82 0.0 2.096 3.024\n", + "-----------------------------------------------------------------------\n", + "\n", + "A linear parametric conditional average treatment effect (CATE) model was fitted:\n", + "$Y = \\Theta(X)\\cdot T + g(X, W) + \\epsilon$\n", + "where for every outcome $i$ and treatment $j$ the CATE $\\Theta_{ij}(X)$ has the form:\n", + "$\\Theta_{ij}(X) = \\phi(X)' coef_{ij} + cate\\_intercept_{ij}$\n", + "where $\\phi(X)$ is the output of the `featurizer` or $X$ if `featurizer`=None. Coefficient Results table portrays the $coef_{ij}$ parameter vector for each outcome $i$ and treatment $j$. Intercept Results table portrays the $cate\\_intercept_{ij}$ parameter.\n", + "\"\"\"" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 16;\n", + " var nbb_formatted_code = \"# learn treatment effect on surrogate index on new dataset\\nfrom econml.dml import LinearDML\\n\\nadjsurr_est = LinearDML(\\n model_y=LassoCV(max_iter=2000), model_t=MultiTaskLassoCV(max_iter=2000), cv=3\\n)\\n# fit treatment_0 on total revenue from new dataset\\nadjsurr_est.fit(\\n sindex_adj, panelTnew[:, 0], X=panelXnew[:, 0, :1], W=panelXnew[:, 0, 1:]\\n)\\n# print treatment effect summary\\nprint(\\\"True Long-term Effect for each investment: \\\", true_longterm_effect)\\nprint(\\n \\\"Average treatment effect for each investment: \\\",\\n adjsurr_est.const_marginal_ate(panelXnew[:, 0, :1]),\\n)\\nadjsurr_est.summary(alpha=0.05)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# learn treatment effect on surrogate index on new dataset\n", + "from econml.dml import LinearDML\n", + "\n", + "adjsurr_est = LinearDML(\n", + " model_y=LassoCV(max_iter=2000), model_t=MultiTaskLassoCV(max_iter=2000), cv=3\n", + ")\n", + "# fit treatment_0 on total revenue from new dataset\n", + "adjsurr_est.fit(\n", + " sindex_adj, panelTnew[:, 0], X=panelXnew[:, 0, :1], W=panelXnew[:, 0, 1:]\n", + ")\n", + "# print treatment effect summary\n", + "print(\"True Long-term Effect for each investment: \", true_longterm_effect)\n", + "print(\n", + " \"Average treatment effect for each investment: \",\n", + " adjsurr_est.const_marginal_ate(panelXnew[:, 0, :1]),\n", + ")\n", + "adjsurr_est.summary(alpha=0.05)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From the summary table above, none of the coefficient for feature $X0$ is significant, that means there is no effect heterogeneity identified, which is consistent with the data generation process." + ] + }, { "cell_type": "markdown", "metadata": {}, From e77c5685802db1eb3e62bca4f64484f72a625b6a Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Mon, 9 Aug 2021 07:53:47 -0400 Subject: [PATCH 27/27] Limit test paralellization --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 4bc7d52b5..bedbbd8de 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -169,7 +169,7 @@ jobs: - script: 'pip install pytest-runner && python setup.py pytest' displayName: 'Unit tests' env: - PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal)"' + PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal)" -n 2' COVERAGE_PROCESS_START: 'setup.cfg' - task: PublishTestResults@2 displayName: 'Publish Test Results **/test-results.xml' @@ -223,7 +223,7 @@ jobs: - script: 'pip install pytest-runner && python setup.py pytest' displayName: 'Unit tests' env: - PYTEST_ADDOPTS: '-m "causal"' + PYTEST_ADDOPTS: '-m "causal" -n 1' COVERAGE_PROCESS_START: 'setup.cfg' - task: PublishTestResults@2 displayName: 'Publish Test Results **/test-results.xml'