From 5e0120343544a2b1b36561d86f06353b43a3c7a0 Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Wed, 11 Dec 2019 19:10:01 +0800
Subject: [PATCH] Improving solvers convergence checks and docstrings

Adds additional check of condition of change in position between
iterations being less than a tolerance to projection solvers and clearer
names plus docstrings given to these solvers.
---
 mici/integrators.py |  43 ++++----
 mici/solvers.py     | 254 ++++++++++++++++++++++++++++++++++----------
 2 files changed, 219 insertions(+), 78 deletions(-)

diff --git a/mici/integrators.py b/mici/integrators.py
index f0776b2..7d07c46 100644
--- a/mici/integrators.py
+++ b/mici/integrators.py
@@ -3,7 +3,7 @@
 from abc import ABC, abstractmethod
 from mici.errors import NonReversibleStepError
 from mici.solvers import (maximum_norm, solve_fixed_point_direct,
-                          retract_onto_manifold_quasi_newton)
+                          solve_projection_onto_manifold_quasi_newton)
 
 __pdoc__ = {}
 
@@ -232,9 +232,9 @@ class ConstrainedLeapfrogIntegrator(Integrator):
     """
 
     def __init__(self, system, step_size, n_inner_step=1,
-                 reverse_check_tol=1e-8, reverse_check_norm=maximum_norm,
-                 retraction_solver=retract_onto_manifold_quasi_newton,
-                 retraction_solver_kwargs=None):
+                 reverse_check_tol=2e-8, reverse_check_norm=maximum_norm,
+                 projection_solver=solve_projection_onto_manifold_quasi_newton,
+                 projection_solver_kwargs=None):
         """
         Args:
             system (mici.systems.System): Hamiltonian system to integrate the
@@ -270,39 +270,40 @@ def __init__(self, system, step_size, n_inner_step=1,
                 non-negative floating point value defining the distance to use
                 in the reversibility check. Defaults to
                 `mici.solvers.maximum_norm`.
-            retraction_solver (Callable[
+            projection_solver (Callable[
                     [ChainState, ChainState, float, System], ChainState]):
                 Function which given two states `state` and `state_prev`,
                 floating point time step `dt` and a Hamiltonian system object
-                `system` solves the non-linear system of equations in `lambda`
+                `system` solves the non-linear system of equations in `λ`
 
                     system.constr(
                         state.pos + dh2_flow_pos_dmom @
-                            system.jacob_constr(state_prev).T @ lambda) == 0
+                            system.jacob_constr(state_prev).T @ λ) == 0
 
                 where `dh2_flow_pos_dmom = system.dh2_flow_dmom(dt)[0]` is the
                 derivative of the action of the (linear) `system.h2_flow` map
                 on the state momentum component with respect to the position
-                component. This is used to retract the state position
+                component. This is used to project the state position
                 component back on to the manifold after an unconstrained
                 `system.h2_flow` update. Defaults to
-                `mici.solvers.retract_onto_manifold_quasi_newton`.
-            retraction_solver_kwargs (None or Dict[str, object]): Dictionary of
-                any keyword arguments to `retraction_solver`.
+                `mici.solvers.solve_projection_onto_manifold_quasi_newton`.
+            projection_solver_kwargs (None or Dict[str, object]): Dictionary of
+                any keyword arguments to `projection_solver`.
         """
         self.system = system
         self.step_size = step_size
         self.n_inner_step = n_inner_step
         self.reverse_check_tol = reverse_check_tol
         self.reverse_check_norm = reverse_check_norm
-        self.retraction_solver = retraction_solver
-        if retraction_solver_kwargs is None:
-            retraction_solver_kwargs = {}
-        self.retraction_solver_kwargs = retraction_solver_kwargs
+        self.projection_solver = projection_solver
+        if projection_solver_kwargs is None:
+            projection_solver_kwargs = {}
+        self.projection_solver_kwargs = projection_solver_kwargs
 
-    def _retract_onto_manifold(self, state, state_prev, dt):
-        self.retraction_solver(state, state_prev, dt, self.system,
-                               **self.retraction_solver_kwargs)
+    def _h2_flow_retraction_onto_manifold(self, state, state_prev, dt):
+        self.system.h2_flow(state, dt)
+        self.projection_solver(state, state_prev, dt, self.system,
+                               **self.projection_solver_kwargs)
 
     def _project_onto_cotangent_space(self, state):
         state.mom = self.system.project_onto_cotangent_space(state.mom, state)
@@ -315,8 +316,7 @@ def _step_b(self, state, dt):
         dt_i = dt / self.n_inner_step
         for i in range(self.n_inner_step):
             state_prev = state.copy()
-            self.system.h2_flow(state, dt_i)
-            self._retract_onto_manifold(state, state_prev, dt_i)
+            self._h2_flow_retraction_onto_manifold(state, state_prev, dt_i)
             if i == self.n_inner_step - 1:
                 # If at last inner step pre-evaluate dh1_dpos before projecting
                 # state on to cotangent space, with computed value being
@@ -333,8 +333,7 @@ def _step_b(self, state, dt):
                 self.system.dh1_dpos(state)
             self._project_onto_cotangent_space(state)
             state_back = state.copy()
-            self.system.h2_flow(state_back, -dt_i)
-            self._retract_onto_manifold(state_back, state, -dt_i)
+            self._h2_flow_retraction_onto_manifold(state_back, state, -dt_i)
             rev_diff = self.reverse_check_norm(state_back.pos - state_prev.pos)
             if rev_diff > self.reverse_check_tol:
                 raise NonReversibleStepError(
diff --git a/mici/solvers.py b/mici/solvers.py
index 72fd089..5ca17ef 100644
--- a/mici/solvers.py
+++ b/mici/solvers.py
@@ -14,38 +14,52 @@ def maximum_norm(vct):
     return np.max(abs(vct))
 
 
-def solve_fixed_point_direct(func, x0, tol=1e-9, max_iters=100,
-                             norm=maximum_norm):
+def solve_fixed_point_direct(
+        func, x0, convergence_tol=1e-9, divergence_tol=1e10, max_iters=100,
+        norm=maximum_norm):
     """Solve fixed point equation `func(x) = x` using direct iteration.
 
     Args:
-        func: Single argument function to find fixed point of.
-        x0: Initial state (function argument).
-        tol: Convergence tolerance - terminates when `norm(func(x) - x) < tol`.
-        max_iters: Maximum number of iterations before raising exception.
-        norm: Vector norm to use to assess convergence.
+        func (Callable([array], array)): Function to find fixed point of.
+        x0 (array): Initial state (function argument).
+        convergence_tol (float): Convergence tolerance - solver successfully
+            terminates when `norm(func(x) - x) < convergence_tol`.
+        divergence_tol (float): Divergence tolerance - solver aborts if
+            `norm(func(x) - x) > divergence_tol` on any iteration.
+        max_iters (int): Maximum number of iterations before raising exception.
+        norm (Callable([array], float)): Norm to use to assess convergence.
 
     Returns:
-        Solution to fixed point equation with `norm(func(x) - x) < tol`.
+        Solution to fixed point equation with
+        `norm(func(x) - x) < convergence_tol`.
 
     Raises:
-        ConvergenceError if not converged within `max_iters` iterations.
+        `mici.errors.ConvergenceError` if solver does not converge within
+        `max_iters` iterations, diverges or encounters a `ValueError` during
+        the iteration.
     """
     for i in range(max_iters):
-        x = func(x0)
-        if any(np.isnan(x)) or any(np.isinf(x)):
+        try:
+            x = func(x0)
+            if any(np.isnan(x)) or any(np.isinf(x)):
+                raise ConvergenceError(
+                    f'Fixed point iteration diverged on iteration {i}.'
+                    f'Last error={error:.1e}.')
+            error = norm(x - x0)
+            if error < tol:
+                return x
+            x0 = x
+        except ValueError as e:
+            # Make robust to inf/nan values in intermediate linear algebra ops
             raise ConvergenceError(
-                f'Fixed point iteration diverged on iteration {i}.')
-        error = norm(x - x0)
-        if error < tol:
-            return x
-        x0 = x
+                f'ValueError at iteration {i} of fixed point solver ({e}).')
     raise ConvergenceError(
-        f'Fixed point iteration did not converge. Last error {error:.1e}.')
+        f'Fixed point iteration did not converge. Last error={error:.1e}.')
 
 
-def solve_fixed_point_steffensen(func, x0, tol=1e-9, max_iters=100,
-                                 norm=maximum_norm):
+def solve_fixed_point_steffensen(
+        func, x0, convergence_tol=1e-9, divergence_tol=1e10, max_iters=100,
+        norm=maximum_norm):
     """Solve fixed point equation `func(x) = x` using Steffensen's method.
 
     Steffennsen's method [1] achieves quadratic convergence but at the cost of
@@ -56,36 +70,102 @@ def solve_fixed_point_steffensen(func, x0, tol=1e-9, max_iters=100,
     [1] : https://en.wikipedia.org/wiki/Steffensen%27s_method
 
     Args:
-        func: Single argument function to find fixed point of.
-        x0: Initial state (function argument).
-        tol: Convergence tolerance - terminates when `norm(func(x) - x) < tol`.
-        max_iters: Maximum number of iterations before raising exception.
-        norm: Vector norm to use to assess convergence.
+        func (Callable([array], array)): Function to find fixed point of.
+        x0 (array): Initial state (function argument).
+        convergence_tol (float): Convergence tolerance - solver successfully
+            terminates when `norm(func(x) - x) < convergence_tol`.
+        divergence_tol (float): Divergence tolerance - solver aborts if
+            `norm(func(x) - x) > divergence_tol` on any iteration.
+        max_iters (int): Maximum number of iterations before raising exception.
+        norm (Callable([array], float)): Norm to use to assess convergence.
 
     Returns:
-        Solution to fixed point equation with `norm(func(x) - x) < tol`.
+        Solution to fixed point equation with
+        `norm(func(x) - x) < convergence_tol`.
 
     Raises:
-        ConvergenceError if not converged within `max_iters` iterations.
+        `mici.errors.ConvergenceError` if solver does not converge within
+        `max_iters` iterations, diverges or encounters a `ValueError` during
+        the iteration.
     """
     for i in range(max_iters):
-        x1 = func(x0)
-        x2 = func(x1)
-        x = x0 - (x1 - x0)**2 / (x2 - 2 * x1 + x0)
-        if any(np.isnan(x)) or any(np.isinf(x)):
+        try:
+            x1 = func(x0)
+            x2 = func(x1)
+            x = x0 - (x1 - x0)**2 / (x2 - 2 * x1 + x0)
+            error = norm(x - x0)
+            if error > divergence_tol or np.isnan(error):
+                raise ConvergenceError(
+                    f'Fixed point iteration diverged on iteration {i}.'
+                    f'Last error={error:.1e}.')
+            if error < convergence_tol:
+                return x
+            x0 = x
+        except ValueError as e:
+            # Make robust to inf/nan values in intermediate linear algebra ops
             raise ConvergenceError(
-                f'Fixed point iteration diverged on iteration {i}.')
-        error = norm(x - x0)
-        if error < tol:
-            return x
-        x0 = x
+                f'ValueError at iteration {i} of fixed point solver ({e}).')
     raise ConvergenceError(
-        f'Fixed point iteration did not converge. Last error {error:.1e}.')
+        f'Fixed point iteration did not converge. Last error={error:.1e}.')
 
 
-def retract_onto_manifold_quasi_newton(
-        state, state_prev, dt, system, convergence_tol=1e-9,
+def solve_projection_onto_manifold_quasi_newton(
+        state, state_prev, dt, system, constraint_tol=1e-9, position_tol=1e-8,
         divergence_tol=1e10, max_iters=50, norm=maximum_norm):
+    """Solve constraint equation using quasi-Newton method.
+
+    Uses a quasi-Newton iteration to solve the non-linear system of equations
+    in `λ`
+
+        system.constr(
+            state.pos + dh2_flow_pos_dmom @
+                system.jacob_constr(state_prev).T @ λ) == 0
+
+    where `dh2_flow_pos_dmom = system.dh2_flow_dmom(dt)[0]` is the derivative
+    of the action of the (linear) `system.h2_flow` map on the state momentum
+    component with respect to the position component, `state` is a post
+    (unconstrained) `system.h2_flow` update state with position component
+    outside of the manifold and `state_prev` is the corresponding pre-update
+    state in the co-tangent bundle.
+
+    Only requires re-evaluating the constraint function `system.constr` within
+    the solver loop and no recomputation of matrix decompositions on each
+    iteration.
+
+    Args:
+        state (mici.states.ChainState): Post `h2_flow `update state to project.
+        state_prev (mici.states.ChainState): Previous state in co-tangent
+            bundle manifold before `h2_flow` update which defines the
+            co-tangent space to perform projection in.
+        dt (float): Integrator time step used in `h2_flow` update.
+        system (mici.systems.ConstrainedEuclideanMetricSystem): Hamiltonian
+           system defining `h2_flow` and `constr` functions used to define
+           constraint equation to solve.
+        constraint_tol (float): Convergence tolerance in constraint space.
+           Iteration will continue until `norm(constr(pos)) < constraint_tol`
+           where `pos` is the position at the current iteration.
+        position_tol (float): Convergence tolerance in position space.
+           Iteration will continue until `norm(delt_pos) < position_tol`
+           where `delta_pos` is the change in the position in the current
+           iteration.
+        divergence_tol (float): Divergence tolerance - solver aborts if
+            `norm(constr(pos)) > divergence_tol` on any iteration where `pos`
+            is the position at the current iteration and raises
+            `mici.errors.ConvergenceError`.
+        max_iters (int): Maximum number of iterations to perform before
+            aborting and raising `mici.errors.ConvergenceError`.
+        norm (Callable[[array], float]): Norm to use to test for convergence.
+
+    Returns:
+        Updated `state` object with position component satisfying constraint
+        equation to within `constraint_tol`, i.e.
+        `norm(system.constr(state.pos)) < constraint_tol`.
+
+    Raises:
+        `mici.errors.ConvergenceError` if solver does not converge within
+        `max_iters` iterations, diverges or encounters a `ValueError` during
+        the iteration.
+    """
     mu = np.zeros_like(state.pos)
     jacob_constr_prev = system.jacob_constr(state_prev)
     dh2_flow_pos_dmom, dh2_flow_mom_dmom = system.dh2_flow_dmom(dt)
@@ -95,27 +175,85 @@ def retract_onto_manifold_quasi_newton(
         try:
             constr = system.constr(state)
             error = norm(constr)
+            delta_mu = jacob_constr_prev.T @ (
+                inv_jacob_constr_inner_product @ constr)
+            delta_pos = dh2_flow_pos_dmom @ delta_mu
             if error > divergence_tol or np.isnan(error):
                 raise ConvergenceError(
-                    f'Quasi-Newton iteration diverged. Last error {error:.1e}')
-            elif error < convergence_tol:
+                    f'Quasi-Newton solver diverged on iteration {i}. '
+                    f'Last |constr|={error:.1e}, '
+                    f'|delta_pos|={norm(delta_pos):.1e}.')
+            elif error < constraint_tol and norm(delta_pos) < position_tol:
                 state.mom -= dh2_flow_mom_dmom @ mu
                 return state
-            delta_mu = jacob_constr_prev.T @ (
-                inv_jacob_constr_inner_product @ constr)
             mu += delta_mu
-            state.pos -= dh2_flow_pos_dmom @ delta_mu
+            state.pos -= delta_pos
         except ValueError as e:
             # Make robust to inf/nan values in intermediate linear algebra ops
             raise ConvergenceError(
-                f'ValueError during Quasi-Newton iteration ({e}).')
+                f'ValueError at iteration {i} of Quasi-Newton solver ({e}).')
     raise ConvergenceError(
-        f'Quasi-Newton iteration did not converge. Last error {error:.1e}.')
+        f'Quasi-Newton solver did not converge with {max_iters} iterations. '
+        f'Last |constr|={error:.1e}, |delta_pos|={norm(delta_pos)}.')
 
 
-def retract_onto_manifold_newton(
-        state, state_prev, dt, system, convergence_tol=1e-9,
+def solve_projection_onto_manifold_newton(
+        state, state_prev, dt, system, constraint_tol=1e-9, position_tol=1e-8,
         divergence_tol=1e10, max_iters=50, norm=maximum_norm):
+    """Solve constraint equation using Newton method.
+
+    Uses a Newton iteration to solve the non-linear system of equations in `λ`
+
+        system.constr(
+            state.pos + dh2_flow_pos_dmom @
+                system.jacob_constr(state_prev).T @ λ) == 0
+
+    where `dh2_flow_pos_dmom = system.dh2_flow_dmom(dt)[0]` is the derivative
+    of the action of the (linear) `system.h2_flow` map on the state momentum
+    component with respect to the position component, `state` is a post
+    (unconstrained) `system.h2_flow` update state with position component
+    outside of the manifold and `state_prev` is the corresponding pre-update
+    state in the co-tangent bundle.
+
+    Requires re-evaluating both the constraint function `system.constr` and
+    constraint Jacobian `system.jacob_constr` within the solver loop and
+    computation of matrix decompositions of a preconditioned matrix on each
+    iteration.
+
+    Args:
+        state (mici.states.ChainState): Post `h2_flow `update state to project.
+        state_prev (mici.states.ChainState): Previous state in co-tangent
+            bundle manifold before `h2_flow` update which defines the
+            co-tangent space to perform projection in.
+        dt (float): Integrator time step used in `h2_flow` update.
+        system (mici.systems.ConstrainedEuclideanMetricSystem): Hamiltonian
+           system defining `h2_flow` and `constr` functions used to define
+           constraint equation to solve.
+        constraint_tol (float): Convergence tolerance in constraint space.
+           Iteration will continue until `norm(constr(pos)) < constraint_tol`
+           where `pos` is the position at the current iteration.
+        position_tol (float): Convergence tolerance in position space.
+           Iteration will continue until `norm(delt_pos) < position_tol`
+           where `delta_pos` is the change in the position in the current
+           iteration.
+        divergence_tol (float): Divergence tolerance - solver aborts if
+            `norm(constr(pos)) > divergence_tol` on any iteration where `pos`
+            is the position at the current iteration and raises
+            `mici.errors.ConvergenceError`.
+        max_iters (int): Maximum number of iterations to perform before
+            aborting and raising `mici.errors.ConvergenceError`.
+        norm (Callable[[array], float]): Norm to use to test for convergence.
+
+    Returns:
+        Updated `state` object with position component satisfying constraint
+        equation to within `constraint_tol`, i.e.
+        `norm(system.constr(state.pos)) < constraint_tol`.
+
+    Raises:
+        `mici.errors.ConvergenceError` if solver does not converge within
+        `max_iters` iterations, diverges or encounters a `ValueError` during
+        the iteration.
+    """
     mu = np.zeros_like(state.pos)
     jacob_constr_prev = system.jacob_constr(state_prev)
     dh2_flow_pos_dmom, dh2_flow_mom_dmom = system.dh2_flow_dmom(dt)
@@ -124,21 +262,25 @@ def retract_onto_manifold_newton(
             jacob_constr = system.jacob_constr(state)
             constr = system.constr(state)
             error = norm(constr)
-            if error > divergence_tol or np.isnan(error):
-                raise ConvergenceError(
-                    'Newton iteration diverged. Last error {err:.1e}.')
-            if error < convergence_tol:
-                state.mom -= dh2_flow_mom_dmom @ mu
-                return state
             delta_mu = jacob_constr_prev.T @ (
                 system.jacob_constr_inner_product(
                     jacob_constr, dh2_flow_pos_dmom, jacob_constr_prev).inv @
                 constr)
+            delta_pos = dh2_flow_pos_dmom @ delta_mu
+            if error > divergence_tol or np.isnan(error):
+                raise ConvergenceError(
+                    f'Newton solver diverged at iteration {i}. '
+                    f'Last |constr|={error:.1e}, '
+                    f'|delta_pos|={norm(delta_pos):.1e}.')
+            if error < constraint_tol and norm(delta_pos) < position_tol:
+                state.mom -= dh2_flow_mom_dmom @ mu
+                return state
             mu += delta_mu
-            state.pos -= dh2_flow_pos_dmom @ delta_mu
+            state.pos -= delta_pos
         except ValueError as e:
             # Make robust to inf/nan values in intermediate linear algebra ops
             raise ConvergenceError(
-                f'ValueError during Newton iteration ({e}).')
+                f'ValueError at iteration {i} of Newton solver ({e}).')
     raise ConvergenceError(
-        f'Newton iteration did not converge. Last error {error:.1e}.')
+        f'Newton solver did not converge in {max_iters} iterations. '
+        f'Last |constr|={error:.1e}, |delta_pos|={norm(delta_pos)}.')