From 3a3f6c8441905ba378ac58f25a9134f8c3140de4 Mon Sep 17 00:00:00 2001
From: Janis Klaise <jk@seldon.io>
Date: Fri, 10 May 2019 14:45:59 +0100
Subject: [PATCH] Fix linting and remove old statsmodels tests

---
 alibi/explainers/__init__.py                  |  4 +-
 alibi/explainers/counterfactual.py            | 37 ++++---------------
 alibi/explainers/tests/test_counterfactual.py | 33 +----------------
 3 files changed, 12 insertions(+), 62 deletions(-)

diff --git a/alibi/explainers/__init__.py b/alibi/explainers/__init__.py
index 6bb5df0d6..2057a02c4 100644
--- a/alibi/explainers/__init__.py
+++ b/alibi/explainers/__init__.py
@@ -6,8 +6,10 @@
 from .anchor_text import AnchorText
 from .anchor_image import AnchorImage
 from .cem import CEM
+from .counterfactual import CounterFactual
 
 __all__ = ["AnchorTabular",
            "AnchorText",
            "AnchorImage",
-           "CEM"]
+           "CEM",
+           "CounterFactual"]
diff --git a/alibi/explainers/counterfactual.py b/alibi/explainers/counterfactual.py
index 12e03ff65..aed373ffc 100644
--- a/alibi/explainers/counterfactual.py
+++ b/alibi/explainers/counterfactual.py
@@ -1,6 +1,5 @@
 import numpy as np
 from typing import Callable, Dict, List, Optional, Tuple, Union
-from statsmodels.tools.numdiff import approx_fprime
 import tensorflow as tf
 import logging
 
@@ -87,28 +86,6 @@ def func(X):  # type: ignore
     return func, target_class
 
 
-def num_grad(func: Callable, X: np.ndarray, args: Tuple = (), epsilon: float = 1e-08) -> np.ndarray:
-    """
-    Compute the numerical gradient using the symmetric difference. Currently wraps statsmodels implementation.
-
-    Parameters
-    ----------
-    func
-        Function to differentiate
-    X
-        Point at which to compute the gradient
-    args
-        Additional arguments to the function
-    epsilon
-        Step size for computing the gradient
-    Returns
-    -------
-    Numerical gradient
-    """
-    gradient = approx_fprime(X, func, epsilon=epsilon, args=args, centered=True)
-    return gradient
-
-
 def _perturb(X: np.ndarray,
              eps: Union[float, np.ndarray] = 1e-08,
              proba: bool = False) -> Tuple[np.ndarray, np.ndarray]:
@@ -472,20 +449,20 @@ def _minimize_wachter_loss(self,
                        'success': False}
 
         lam = self.lam_init
-        lam_lb = 0
+        lam_lb = 0.0
         lam_ub = 1e10
 
         lam_steps = 0
         X_current = X_init
         # expanding = True  # flag to check if we are expanding the search or zooming in on a solution
-        for l in range(self.max_lam_steps):
+        for l_step in range(self.max_lam_steps):
             self.sess.run(self.tf_init)  # where to put this
 
             # cf_found = False  # flag to use for increasing/decreasing lambda
             # TODO need some early stopping when lambda grows too big to satisfy prob_cond
             # while np.abs(self.predict_class_fn(X_current) - self.target_proba) > self.tol:
             lr = self.sess.run(self.learning_rate)
-            logger.info('Starting outer loop: %s/%s with lambda=%s, lr=%s', lam_steps+1, self.max_lam_steps, lam, lr)
+            logger.info('Starting outer loop: %s/%s with lambda=%s, lr=%s', lam_steps + 1, self.max_lam_steps, lam, lr)
 
             # if lam_steps == self.max_lam_steps:
             #    logger.warning(
@@ -519,7 +496,7 @@ def _minimize_wachter_loss(self,
 
                 cond = self._prob_condition(X_current).squeeze()
                 if cond:
-                    cf_found[l] = True
+                    cf_found[l_step] = True
                     return_dict['X_cf'] = X_current
                     logger.debug('CF found')
                 prob_cond.append(cond)
@@ -543,7 +520,7 @@ def _minimize_wachter_loss(self,
 
             # adjust the lambda constant
             if self.bisect:
-                if cf_found[l]:
+                if cf_found[l_step]:
                     # want to improve the solution by putting more weight on the distance term
                     # by increasing lambda
                     lam_lb = max(lam, lam_lb)
@@ -554,8 +531,8 @@ def _minimize_wachter_loss(self,
                         lam *= self.lam_step
                         logger.debug('Changed lambda to %s', lam)
 
-                elif not cf_found[l]:
-                    if l == 0:
+                elif not cf_found[l_step]:
+                    if l_step == 0:
                         logger.warning('No solution found in the first iteration, try to reduce target_proba'
                                        'or increase tolerance.')
                         return return_dict
diff --git a/alibi/explainers/tests/test_counterfactual.py b/alibi/explainers/tests/test_counterfactual.py
index c8f9463f4..1a0d5ead4 100644
--- a/alibi/explainers/tests/test_counterfactual.py
+++ b/alibi/explainers/tests/test_counterfactual.py
@@ -6,7 +6,7 @@
 from scipy.spatial.distance import cityblock
 import tensorflow as tf
 
-from alibi.explainers.counterfactual.counterfactual import _define_func, num_grad, \
+from alibi.explainers.counterfactual import _define_func, \
     num_grad_batch, cityblock_batch, get_wachter_grads
 from alibi.explainers import CounterFactual
 
@@ -57,18 +57,6 @@ def test_define_func(logistic_iris, target_class):
         assert func(x) == probas[:, ix2]
 
 
-@pytest.mark.parametrize('dim', [1, 2, 3, 10])
-def test_get_num_gradients_cityblock(dim):
-    u = np.random.rand(dim)
-    v = np.random.rand(dim)
-
-    grad_true = np.sign(u - v)
-    grad_approx = num_grad(cityblock, u, args=tuple([v])).flatten()  # promote 0-D to 1-D
-
-    assert grad_approx.shape == grad_true.shape
-    assert np.allclose(grad_true, grad_approx)
-
-
 @pytest.mark.parametrize('shape', [(1,), (2, 3), (1, 3, 5)])
 @pytest.mark.parametrize('batch_size', [1, 3, 10])
 def test_get_batch_num_gradients_cityblock(shape, batch_size):
@@ -82,23 +70,6 @@ def test_get_batch_num_gradients_cityblock(shape, batch_size):
     assert np.allclose(grad_true, grad_approx)
 
 
-def test_get_num_gradients_logistic_iris(logistic_iris):
-    X, y, lr = logistic_iris
-    predict_fn = lambda x: lr.predict_proba(x.reshape(1, -1))  # need squeezed x for numerical gradient
-    x = X[0]
-    probas = predict_fn(x)
-    pred_class = probas.argmax()
-
-    # true gradient of the logistic regression wrt x
-    grad_true = (probas.T * (np.eye(3, 3) - probas) @ lr.coef_)
-    assert grad_true.shape == (3, 4)
-
-    grad_approx = num_grad(predict_fn, x)
-
-    assert grad_approx.shape == grad_true.shape
-    assert np.allclose(grad_true, grad_approx)
-
-
 @pytest.mark.parametrize('batch_size', [1, 2, 5])
 def test_get_batch_num_gradients_logistic_iris(logistic_iris, batch_size):
     X, y, lr = logistic_iris
@@ -128,7 +99,7 @@ def test_get_wachter_grads(logistic_iris):
     pred_class = probas.argmax()
     func, target = _define_func(predict_fn, pred_class, 'same')
 
-    loss, grad_loss = get_wachter_grads(X_current=x, predict_class_fn=func, distance_fn=cityblock_batch,
+    loss, grad_loss, debug_info = get_wachter_grads(X_current=x, predict_class_fn=func, distance_fn=cityblock_batch,
                                         X_test=x, target_proba=0.1, lam=1)
 
     assert loss.shape == (1, 1)