From 696333a4ec3d32f1ddf67118a2e0ae4d01221840 Mon Sep 17 00:00:00 2001
From: noe <frank.noe@fu-berlin.de>
Date: Tue, 4 Aug 2015 19:54:18 +0200
Subject: [PATCH] [estimation: reversible tmatrix MLE and sampler]: Added
 option to suppress the not-converged warning. Using that option and running
 only 100 steps in the initialization of the reversible transition matrix
 sampler. That's sufficient for the sampler because the MLE is just a starting
 point anyway. In cases where the reversible MLE doesn't converge well - and
 that seems to happen often for the fractional counts employed in Bayesian
 methods - we would otherwise optimize for 1M steps, just to throw that result
 away in the first sampling step. This change makes a huge performance
 difference for the Bayesian HMM, where a transition matrix sampling is run
 for every iteration of the algorithm (usually 100 times or more).

---
 msmtools/estimation/api.py                      |  6 ++++--
 msmtools/estimation/dense/sampler_rev.pyx       |  5 ++---
 msmtools/estimation/dense/transition_matrix.py  | 17 +++++++++++------
 msmtools/estimation/sparse/mle_trev.pyx         |  4 ++--
 .../estimation/sparse/mle_trev_given_pi.pyx     |  5 +++--
 5 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/msmtools/estimation/api.py b/msmtools/estimation/api.py
index 0ea50149..ad338deb 100644
--- a/msmtools/estimation/api.py
+++ b/msmtools/estimation/api.py
@@ -832,12 +832,14 @@ def transition_matrix(C, reversible=False, mu=None, method='auto', **kwargs):
         stationary probabilities (:math:`x_i = \sum_k x_{ik}`). The relative stationary probability changes
         :math:`e_i = (x_i^{(1)} - x_i^{(2)})/(x_i^{(1)} + x_i^{(2)})` are used in order to track changes in small
         probabilities. The Euclidean norm of the change vector, :math:`|e_i|_2`, is compared to maxerr.
-    return_statdist : False : Boolean
+    return_statdist : bool, default=False
         Optional parameter with reversible = True.
         If set to true, the stationary distribution is also returned
-    return_conv : False : Boolean
+    return_conv : bool, default=False
         Optional parameter with reversible = True.
         If set to true, the likelihood history and the pi_change history is returned.
+    warn_not_converged : bool, default=True
+        Prints a warning if not converged.
 
     Returns
     -------
diff --git a/msmtools/estimation/dense/sampler_rev.pyx b/msmtools/estimation/dense/sampler_rev.pyx
index 6ab27dd1..bf3f21ec 100644
--- a/msmtools/estimation/dense/sampler_rev.pyx
+++ b/msmtools/estimation/dense/sampler_rev.pyx
@@ -96,9 +96,8 @@ class SamplerRev(object):
 
         """Set up initial state of the chain"""
         if P0 is None:
-            P0 = tmatrix(C, reversible=True)
-            # A = C + C.T
-            # V0 = A/A.sum()            
+            # only do a few iterations to get close to the MLE and suppress not converged warning
+            P0 = tmatrix(C, reversible=True, maxiter=100, warn_not_converged=False)
         pi0 = statdist(P0)
         V0 = pi0[:,np.newaxis] * P0            
         
diff --git a/msmtools/estimation/dense/transition_matrix.py b/msmtools/estimation/dense/transition_matrix.py
index a39a34cb..9472daac 100644
--- a/msmtools/estimation/dense/transition_matrix.py
+++ b/msmtools/estimation/dense/transition_matrix.py
@@ -97,7 +97,7 @@ def __relative_error(x, y, norm=None):
 
 
 def estimate_transition_matrix_reversible(C, Xinit=None, maxiter=1000000, maxerr=1e-8,
-                                          return_statdist=False, return_conv=False):
+                                          return_statdist=False, return_conv=False, warn_not_converged=True):
     """
     iterative method for estimating a maximum likelihood reversible transition matrix
 
@@ -121,10 +121,12 @@ def estimate_transition_matrix_reversible(C, Xinit=None, maxiter=1000000, maxerr
         stationary probabilities (x_i = sum_k x_ik). The relative stationary probability changes
         e_i = (x_i^(1) - x_i^(2))/(x_i^(1) + x_i^(2)) are used in order to track changes in small
         probabilities. The Euclidean norm of the change vector, |e_i|_2, is compared to convtol.
-    return_statdist = False : Boolean
+    return_statdist : bool, default=False
         If set to true, the stationary distribution is also returned
-    return_conv = False : Boolean
+    return_conv : bool, default=False
         If set to true, the likelihood history and the pi_change history is returned.
+    warn_not_converged : bool, default=True
+        Prints a warning if not converged.
 
     Returns
     -------
@@ -193,7 +195,7 @@ def estimate_transition_matrix_reversible(C, Xinit=None, maxiter=1000000, maxerr
         i += 1
     # finalize and return
     T = X / xsum[:, np.newaxis]
-    if not converged:
+    if warn_not_converged and not converged:
         warnings.warn("Reversible transition matrix estimation didn't converge.", msmtools.util.exceptions.NotConvergedWarning)
     if (return_statdist and return_conv):
         return (T, xsum, lhist[0:i], diffs[0:i])
@@ -204,7 +206,8 @@ def estimate_transition_matrix_reversible(C, Xinit=None, maxiter=1000000, maxerr
     return T  # else just return T
 
 
-def transition_matrix_reversible_fixpi(Z, mu, maxerr=1e-10, maxiter=10000, return_iterations=False):
+def transition_matrix_reversible_fixpi(Z, mu, maxerr=1e-10, maxiter=10000, return_iterations=False,
+                                       warn_not_converged=True):
     r"""
     maximum likelihood transition matrix with fixed stationary distribution
 
@@ -223,6 +226,8 @@ def transition_matrix_reversible_fixpi(Z, mu, maxerr=1e-10, maxiter=10000, retur
         Will exit when reaching maxiter iterations without reaching convergence.
     return_iterations: bool (False)
         set true in order to return (T, it), where T is the transition matrix and it is the number of iterations needed
+    warn_not_converged : bool, default=True
+        Prints a warning if not converged.
 
     Returns
     -------
@@ -267,7 +272,7 @@ def transition_matrix_reversible_fixpi(Z, mu, maxerr=1e-10, maxiter=10000, retur
         # copy new to old l-vector
         l[:] = lnew[:]
         it += 1
-    if (not converged) and (it >= maxiter):
+    if warn_not_converged and (not converged) and (it >= maxiter):
         warnings.warn('NOT CONVERGED: 2-norm of Langrange multiplier vector is still ' +
                          str(err) + ' > ' + str(maxerr) + ' after ' + str(it) +
                          ' iterations. Increase maxiter or decrease maxerr',
diff --git a/msmtools/estimation/sparse/mle_trev.pyx b/msmtools/estimation/sparse/mle_trev.pyx
index b07a19a5..495c84c1 100644
--- a/msmtools/estimation/sparse/mle_trev.pyx
+++ b/msmtools/estimation/sparse/mle_trev.pyx
@@ -15,7 +15,7 @@ import msmtools.util.exceptions
 cdef extern from "_mle_trev.h":
   int _mle_trev_sparse(double * const T_data, const double * const CCt_data, const int * const i_indices, const int * const j_indices, const int len_CCt, const double * const sum_C, const int dim, const double maxerr, const int maxiter)
 
-def mle_trev(C, double maxerr = 1.0E-12, int maxiter = 1000000):
+def mle_trev(C, double maxerr = 1.0E-12, int maxiter = 1000000, warn_not_converged = True):
 
   assert maxerr > 0, 'maxerr must be positive'
   assert maxiter > 0, 'maxiter must be positive'
@@ -53,7 +53,7 @@ def mle_trev(C, double maxerr = 1.0E-12, int maxiter = 1000000):
     raise Exception('The update of the stationary distribution produced zero or NaN.')
   elif err == -3:
     raise Exception('Some row and corresponding column of C have zero counts.')
-  elif err == -5:
+  elif err == -5 and warn_not_converged:
     warnings.warn('Reversible transition matrix estimation didn\'t converge.', msmtools.util.exceptions.NotConvergedWarning)
 
   # T matrix has the same shape and positions of nonzero elements as CCt
diff --git a/msmtools/estimation/sparse/mle_trev_given_pi.pyx b/msmtools/estimation/sparse/mle_trev_given_pi.pyx
index 9430d1bf..353c62b5 100644
--- a/msmtools/estimation/sparse/mle_trev_given_pi.pyx
+++ b/msmtools/estimation/sparse/mle_trev_given_pi.pyx
@@ -27,7 +27,8 @@ def mle_trev_given_pi(
   mu,
   double maxerr = 1.0E-12,
   int maxiter = 1000000,
-  double eps = 0.0
+  double eps = 0.0,
+  warn_not_converged = True
   ):
 
   assert maxerr > 0, 'maxerr must be positive'
@@ -70,7 +71,7 @@ def mle_trev_given_pi(
     raise Exception('Some row and corresponding column of C have zero counts.')
   elif err == -4:
     raise Exception('Some element of pi is zero.')
-  elif err == -5:
+  elif err == -5 and warn_not_converged:
     warnings.warn('Reversible transition matrix estimation with fixed stationary distribution didn\'t converge.', msmtools.util.exceptions.NotConvergedWarning)
 
   # unnormalized T matrix has the same shape and positions of nonzero elements as the C matrix