Fix policy computation with array outcome

py-why · Jun 26, 2021 · b1a7f44 · b1a7f44
1 parent d493d28
commit b1a7f44
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 19 deletions.
diff --git a/econml/solutions/causal_analysis/_causal_analysis.py b/econml/solutions/causal_analysis/_causal_analysis.py
@@ -1422,12 +1422,23 @@ def individualized_policy(self, Xtest, feature_index, *, n_rows=None, treatment_
         else:
             effect = result.estimator.const_marginal_effect_inference(Xtest)
 
+        multi_y = (not self._vec_y) or self.classification
+
+        if multi_y and result.feature_baseline is not None and np.ndim(treatment_costs) == 2:
+            # we've got treatment costs of shape (n, d_t-1) so we need to add a y dimension to broadcast safely
+            treatment_costs = np.expand_dims(treatment_costs, 1)
+
         effect.translate(-treatment_costs)
 
         est = effect.point_estimate
         est_lb = effect.conf_int(alpha)[0]
         est_ub = effect.conf_int(alpha)[1]
 
+        if multi_y:  # y was an array, not a vector
+            est = np.squeeze(est, 1)
+            est_lb = np.squeeze(est_lb, 1)
+            est_ub = np.squeeze(est_ub, 1)
+
         if result.feature_baseline is None:
             rec = np.empty(est.shape[0], dtype=object)
             rec[est > 0] = "increase"

diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py
@@ -515,7 +515,7 @@ def test_over_cat_limit(self):
         self.assertEqual([res.feature_name for res in ca._results], ['a', 'b', 'c', 'd', 'f', 'g', 'h'])
 
     def test_individualized_policy(self):
-        y = pd.Series(np.random.choice([0, 1], size=(500,)))
+        y_arr = np.random.choice([0, 1], size=(500,))
         X = pd.DataFrame({'a': np.random.normal(size=500),
                           'b': np.random.normal(size=500),
                           'c': np.random.choice([0, 1], size=500),
@@ -524,24 +524,26 @@ def test_individualized_policy(self):
         cats = ['c', 'd']
         hinds = ['a', 'd']
 
-        ca = CausalAnalysis(inds, cats, hinds, heterogeneity_model='linear')
-        ca.fit(X, y)
-        df = ca.individualized_policy(X, 'a')
-        self.assertEqual(df.shape[0], 500)  # all rows included by default
-        self.assertEqual(df.shape[1], 4 + X.shape[1])  # new cols for policy, effect, upper and lower bounds
-        df = ca.individualized_policy(X, 'b', n_rows=5)
-        self.assertEqual(df.shape[0], 5)
-        self.assertEqual(df.shape[1], 4 + X.shape[1])  # new cols for policy, effect, upper and lower bounds
-        # verify that we can use a scalar treatment cost
-        df = ca.individualized_policy(X, 'c', treatment_costs=100)
-        self.assertEqual(df.shape[0], 500)
-        self.assertEqual(df.shape[1], 4 + X.shape[1])  # new cols for policy, effect, upper and lower bounds
-        # verify that we can specify per-treatment costs for each sample
-        df = ca.individualized_policy(X, 'd', alpha=0.05, treatment_costs=np.random.normal(size=(500, 2)))
-        self.assertEqual(df.shape[0], 500)
-        self.assertEqual(df.shape[1], 4 + X.shape[1])  # new cols for policy, effect, upper and lower bounds
-
-        dictionary = ca._individualized_policy_dict(X, 'a')
+        for y in [pd.Series(y_arr), y_arr.reshape(-1, 1)]:
+            for classification in [True, False]:
+                ca = CausalAnalysis(inds, cats, hinds, heterogeneity_model='linear', classification=classification)
+                ca.fit(X, y)
+                df = ca.individualized_policy(X, 'a')
+                self.assertEqual(df.shape[0], 500)  # all rows included by default
+                self.assertEqual(df.shape[1], 4 + X.shape[1])  # new cols for policy, effect, upper and lower bounds
+                df = ca.individualized_policy(X, 'b', n_rows=5)
+                self.assertEqual(df.shape[0], 5)
+                self.assertEqual(df.shape[1], 4 + X.shape[1])  # new cols for policy, effect, upper and lower bounds
+                # verify that we can use a scalar treatment cost
+                df = ca.individualized_policy(X, 'c', treatment_costs=100)
+                self.assertEqual(df.shape[0], 500)
+                self.assertEqual(df.shape[1], 4 + X.shape[1])  # new cols for policy, effect, upper and lower bounds
+                # verify that we can specify per-treatment costs for each sample
+                df = ca.individualized_policy(X, 'd', alpha=0.05, treatment_costs=np.random.normal(size=(500, 2)))
+                self.assertEqual(df.shape[0], 500)
+                self.assertEqual(df.shape[1], 4 + X.shape[1])  # new cols for policy, effect, upper and lower bounds
+
+                dictionary = ca._individualized_policy_dict(X, 'a')
 
     def test_random_state(self):
         # verify that using the same state returns the same results each time