Skip to content

Commit

Permalink
Fix policy computation with array outcome
Browse files Browse the repository at this point in the history
  • Loading branch information
kbattocchi committed Jun 26, 2021
1 parent d493d28 commit b1a7f44
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 19 deletions.
11 changes: 11 additions & 0 deletions econml/solutions/causal_analysis/_causal_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1422,12 +1422,23 @@ def individualized_policy(self, Xtest, feature_index, *, n_rows=None, treatment_
else:
effect = result.estimator.const_marginal_effect_inference(Xtest)

multi_y = (not self._vec_y) or self.classification

if multi_y and result.feature_baseline is not None and np.ndim(treatment_costs) == 2:
# we've got treatment costs of shape (n, d_t-1) so we need to add a y dimension to broadcast safely
treatment_costs = np.expand_dims(treatment_costs, 1)

effect.translate(-treatment_costs)

est = effect.point_estimate
est_lb = effect.conf_int(alpha)[0]
est_ub = effect.conf_int(alpha)[1]

if multi_y: # y was an array, not a vector
est = np.squeeze(est, 1)
est_lb = np.squeeze(est_lb, 1)
est_ub = np.squeeze(est_ub, 1)

if result.feature_baseline is None:
rec = np.empty(est.shape[0], dtype=object)
rec[est > 0] = "increase"
Expand Down
40 changes: 21 additions & 19 deletions econml/tests/test_causal_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ def test_over_cat_limit(self):
self.assertEqual([res.feature_name for res in ca._results], ['a', 'b', 'c', 'd', 'f', 'g', 'h'])

def test_individualized_policy(self):
y = pd.Series(np.random.choice([0, 1], size=(500,)))
y_arr = np.random.choice([0, 1], size=(500,))
X = pd.DataFrame({'a': np.random.normal(size=500),
'b': np.random.normal(size=500),
'c': np.random.choice([0, 1], size=500),
Expand All @@ -524,24 +524,26 @@ def test_individualized_policy(self):
cats = ['c', 'd']
hinds = ['a', 'd']

ca = CausalAnalysis(inds, cats, hinds, heterogeneity_model='linear')
ca.fit(X, y)
df = ca.individualized_policy(X, 'a')
self.assertEqual(df.shape[0], 500) # all rows included by default
self.assertEqual(df.shape[1], 4 + X.shape[1]) # new cols for policy, effect, upper and lower bounds
df = ca.individualized_policy(X, 'b', n_rows=5)
self.assertEqual(df.shape[0], 5)
self.assertEqual(df.shape[1], 4 + X.shape[1]) # new cols for policy, effect, upper and lower bounds
# verify that we can use a scalar treatment cost
df = ca.individualized_policy(X, 'c', treatment_costs=100)
self.assertEqual(df.shape[0], 500)
self.assertEqual(df.shape[1], 4 + X.shape[1]) # new cols for policy, effect, upper and lower bounds
# verify that we can specify per-treatment costs for each sample
df = ca.individualized_policy(X, 'd', alpha=0.05, treatment_costs=np.random.normal(size=(500, 2)))
self.assertEqual(df.shape[0], 500)
self.assertEqual(df.shape[1], 4 + X.shape[1]) # new cols for policy, effect, upper and lower bounds

dictionary = ca._individualized_policy_dict(X, 'a')
for y in [pd.Series(y_arr), y_arr.reshape(-1, 1)]:
for classification in [True, False]:
ca = CausalAnalysis(inds, cats, hinds, heterogeneity_model='linear', classification=classification)
ca.fit(X, y)
df = ca.individualized_policy(X, 'a')
self.assertEqual(df.shape[0], 500) # all rows included by default
self.assertEqual(df.shape[1], 4 + X.shape[1]) # new cols for policy, effect, upper and lower bounds
df = ca.individualized_policy(X, 'b', n_rows=5)
self.assertEqual(df.shape[0], 5)
self.assertEqual(df.shape[1], 4 + X.shape[1]) # new cols for policy, effect, upper and lower bounds
# verify that we can use a scalar treatment cost
df = ca.individualized_policy(X, 'c', treatment_costs=100)
self.assertEqual(df.shape[0], 500)
self.assertEqual(df.shape[1], 4 + X.shape[1]) # new cols for policy, effect, upper and lower bounds
# verify that we can specify per-treatment costs for each sample
df = ca.individualized_policy(X, 'd', alpha=0.05, treatment_costs=np.random.normal(size=(500, 2)))
self.assertEqual(df.shape[0], 500)
self.assertEqual(df.shape[1], 4 + X.shape[1]) # new cols for policy, effect, upper and lower bounds

dictionary = ca._individualized_policy_dict(X, 'a')

def test_random_state(self):
# verify that using the same state returns the same results each time
Expand Down

0 comments on commit b1a7f44

Please sign in to comment.