Merge pull request #25 from djdt/cpoisson_accuracy_fix

Compound-Poisson accuracy improvements
djdt · Dec 19, 2024 · 4950f53 · 4950f53
2 parents 6f46bed + 79679a5
commit 4950f53
Show file tree

Hide file tree

Showing 18 changed files with 412 additions and 106 deletions.
diff --git a/README.md b/README.md
@@ -42,3 +42,4 @@ Documentation on usage, examples and a programming reference can be found at htt
 ## Publications
 
 * [Lockwood, T. E.; Gonzalez de Vega, R.; Clases, D. An Interactive Python-Based Data Processing Platform for Single Particle and Single Cell ICP-MS. Journal of Analytical Atomic Spectrometry 2021, 36 (11), 2536–2544.](https://doi.org/10.1039/D1JA00297J)
+* [Lockwood, T. E.; Schlatt, L.; Clases, D. SPCal – an Open Source, Easy-to-Use Processing Platform for ICP-TOFMS-Based Single Event Data. J. Anal. At. Spectrom. 2024](https://doi.org/10.1039/D4JA00241E)
diff --git a/docs/source/background/thresholds.rst b/docs/source/background/thresholds.rst
@@ -53,6 +53,27 @@ The log-normal approximation works by closely approximating the SIA with a log-n
 Since the cumulative density and quantile functions of a log-normal are known, we can then predict the resulting :term:`detection threshold` for the sum of log-normal distributions.
 In the case of the log-normal approximation only the shape parameter (:math:`\sigma`) of the log-normal fit to the SIA is required.
 
+Lookup Table
+^^^^^^^^^^^^
+Similar to the :ref:`Log-normal approximation` the lookup table assumes the SIA can be approximated using a log-normal distribution. The lookup table is a 3-dimensional array of quantiles calculated for a zero-truncated compound-Poisson-log-normal distribution. Each quantile comes from a simulation of :math:`10^{10}` values using the parameters in the table below, using the computational facilities of the UTS eResearch High Performance Computer Cluster. Values between simulated points are interpolated at a sub 0.2 % error.
+
+
+.. list-table:: Parameters in the lookup table.
+   :header-rows: 1
+
+   * - Parameter
+     - Range
+     - No. Values
+   * - :math:`\lambda`
+     - 0.001 - 100 (geometric)
+     - 71
+   * - :math:`\sigma`
+     - 0.25 -- 0.65 (linear)
+     - 41
+   * - :math:`\alpha`
+     - 0.999 -- :math:`10^{-7}` (logistic)
+     - 101
+
 Threshold selection
 -------------------
 

diff --git a/docs/source/usage/options.rst b/docs/source/usage/options.rst
@@ -64,13 +64,13 @@ Compound-Poisson options
     * - :math:`\alpha`
       - The (Type I) :term:`error rate`.
     * - Method
-      - The method used: log-normal approximation or simulation.
+      - The method used: lookup table, log-normal approximation or simulation.
     * - SIA :math:`\sigma`
-      - The shape parameter used in the log-normal approximation.
+      - The shape parameter used in the lookup table and log-normal approximation.
     * - SIA Dist
       - The distribution used in the simulation, must be loaded from a file.
 
-Details on the method :term:`detection threshold` is calculated using Compound-Poisson statistics can be found in :ref:`Thresholds for spICP-MS`.
+Details on the method used to calculated the :term:`detection threshold` using Compound-Poisson statistics can be found in :ref:`Thresholds for spICP-MS`.
 To load a SIA distribution press the left-most button. This will start a dialog to import data.
 The loaded distribution can be viewed using the center button, or cleared using the right-most button.
 
@@ -122,7 +122,7 @@ Windowed thresholding is performed by calculating the local signal mean and :ter
 Larger windows are less affected by local changes, but take longer to compute.
 
 .. note::
-    Windowed thresholding is not availble for Compound-Poisson thresholds.
+    Windowed thresholding is only availble for Compound-Poisson thresholds when using the 'Lookup Table' method.
 
 Iterative thresholding
 ----------------------

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "spcal"
-version = "1.2.10"
+version = "1.3.0"
 dependencies = [
     "numpy>=2.0.0",
     "PySide6",

diff --git a/spcal.spec b/spcal.spec
@@ -12,6 +12,7 @@ a = Analysis(
     datas=[
         ("spcal/resources/app.ico", "spcal/resources"),
         ("spcal/resources/npdb.npz", "spcal/resources"),
+        ("spcal/resources/cpln_quantiles.npz", "spcal/resources"),
     ],
     hiddenimports=["bottleneck"],
 )

diff --git a/spcal/calc.py b/spcal/calc.py
@@ -178,3 +178,60 @@ def weighted_linreg(
         error = 0.0
 
     return coef[1], coef[0], r2, error
+
+
+def interpolate_3d(
+    x: np.ndarray | float,
+    y: np.ndarray | float,
+    z: np.ndarray | float,
+    xs: np.ndarray,
+    ys: np.ndarray,
+    zs: np.ndarray,
+    data: np.ndarray,
+) -> np.ndarray | float:
+    """Cubic interpolation of (x, y, z) for data.
+
+    Args:
+        x: x position of values to interpolate
+        y: y position of values to interpolate
+        z: z position of values to interpolate
+        xs: x values of ``data``
+        ys: y values of ``data``
+        zs: z values of ``data``
+        data: known values, shape (xs, ys, zs)
+
+    Returns:
+        interpolated values, shape (x)
+    """
+    x, y, z = np.asarray(x), np.asarray(y), np.asarray(z)
+    assert x.size == y.size == z.size
+    assert data.shape == (xs.size, ys.size, zs.size)
+
+    idx0 = np.searchsorted(xs, x, side="right") - 1
+    idy0 = np.searchsorted(ys, y, side="right") - 1
+    idz0 = np.searchsorted(zs, z, side="right") - 1
+
+    idx1 = np.minimum(idx0 + 1, xs.size - 1)
+    idy1 = np.minimum(idy0 + 1, ys.size - 1)
+    idz1 = np.minimum(idz0 + 1, zs.size - 1)
+
+    # Fix any edge cases
+    idx0 = np.where(idx1 == xs.size - 1, idx1 - 1, idx0)
+    idy0 = np.where(idy1 == ys.size - 1, idy1 - 1, idy0)
+    idz0 = np.where(idz1 == zs.size - 1, idz1 - 1, idz0)
+
+    xd = (x - xs[idx0]) / (xs[idx1] - xs[idx0])
+    yd = (y - ys[idy0]) / (ys[idy1] - ys[idy0])
+    zd = (z - zs[idz0]) / (zs[idz1] - zs[idz0])
+
+    c00 = data[idx0, idy0, idz0] * (1.0 - xd) + data[idx1, idy0, idz0] * xd
+    c01 = data[idx0, idy0, idz1] * (1.0 - xd) + data[idx1, idy0, idz1] * xd
+    c10 = data[idx0, idy1, idz0] * (1.0 - xd) + data[idx1, idy1, idz0] * xd
+    c11 = data[idx0, idy1, idz1] * (1.0 - xd) + data[idx1, idy1, idz1] * xd
+
+    c0 = c00 * (1.0 - yd) + c10 * yd
+    c1 = c01 * (1.0 - yd) + c11 * yd
+
+    c = c0 * (1.0 - zd) + c1 * zd
+
+    return c
diff --git a/spcal/dists/poisson.py b/spcal/dists/poisson.py
@@ -1,5 +1,7 @@
 import numpy as np
 
+from spcal.lib.spcalext import poisson_quantile
+
 
 def pdf(k: np.ndarray, lam: float) -> np.ndarray:
     """Poisson probability mass function.
@@ -15,13 +17,14 @@ def pdf(k: np.ndarray, lam: float) -> np.ndarray:
     """
     assert np.issubdtype(k.dtype, np.integer)
     assert np.all(k >= 0)
+    j = np.arange(0, np.amax(k) + 1)
 
     # There will be overflows for high lam during factorial and power
-    with np.errstate(over="ignore"):
-        fk = np.cumprod(np.where(k == 0, 1, k), dtype=np.float64)
-        pdf = np.zeros(k.size, dtype=np.float64)
-        np.divide(lam**k * np.exp(-lam), fk, where=np.isfinite(fk), out=pdf)
-    return pdf
+    with np.errstate(invalid="ignore", over="ignore"):
+        top = lam**j
+        bottom = np.cumprod(np.where(j == 0, 1, j), dtype=np.float64)
+        pdf = top / bottom * np.exp(-lam)
+    return pdf[k]
 
 
 def cdf(k: np.ndarray, lam: float) -> np.ndarray:
@@ -41,3 +44,8 @@ def cdf(k: np.ndarray, lam: float) -> np.ndarray:
 
     j = np.arange(0, np.amax(k) + 1)
     return np.cumsum(pdf(j, lam))[k]
+
+
+def quantile(q: float, lam: float) -> int:
+    """Poisson quantile function"""
+    return poisson_quantile(q, lam)
diff --git a/spcal/dists/util.py b/spcal/dists/util.py
@@ -1,21 +1,80 @@
-from statistics import NormalDist
+from importlib.resources import files
 
 import numpy as np
 
+from spcal.calc import interpolate_3d
 from spcal.dists import lognormal, poisson
 
+qtable = np.load(
+    files("spcal.resources").joinpath("cpln_quantiles.npz").open("rb"),
+    allow_pickle=False,
+)
 
-def compound_poisson_lognormal_quantile(
-    q: float, lam: float, mu: float, sigma: float, method: str = "Fenton-Wilkinson"
+
+def zero_trunc_quantile(
+    lam: np.ndarray | float, y: np.ndarray | float
+) -> np.ndarray | float:
+    """Returns the zero-truncated Poisson quantile.
+
+    Args:
+        lam: Poisson rate parameter(s)
+        y: quantile(s) of non-truncated dist
+
+    Returns:
+        quantile(s) of the zero-truncated dist
+    """
+    k0 = np.exp(-lam)
+    return np.maximum((y - k0) / (1.0 - k0), 0.0)
+
+
+def compound_poisson_lognormal_quantile_lookup(
+    q: float, lam: np.ndarray | float, sigma: float
+) -> np.ndarray | float:
+    """The quantile of a compound Poisson-Lognormal distribution.
+
+    Interpolates values from a simulation of 1e10 zero-truncated values.
+    The lookup table spans lambda values from 0.01 to 100.0, sigmas of
+    0.25 to 0.65 and zt-quantiles of 1e-3 to 1.0 - 1e-7.
+    Maximum error is ~ 0.2 %.
+
+    Args:
+        q: quantile
+        lam: mean of the Poisson distribution
+        sigma: log stddev of the log-normal distribution
+
+    Returns:
+        the ``q`` th value of the compound Poisson-Lognormal
+    """
+    lam = np.atleast_1d(lam)
+    q0 = np.atleast_1d(zero_trunc_quantile(lam, q))
+    nonzero = q0 > 0.0
+
+    qs = np.zeros_like(lam)
+    qs[nonzero] = interpolate_3d(
+        lam[nonzero],
+        np.full_like(lam[nonzero], sigma),
+        q0[nonzero],
+        qtable["lambdas"],
+        qtable["sigmas"],
+        qtable["ys"],
+        qtable["quantiles"],
+    )
+    if len(qs) == 1:
+        return qs[0]
+    return qs
+
+
+def compound_poisson_lognormal_quantile_approximation(
+    q: float, lam: float, mu: float, sigma: float
 ) -> float:
-    """Appoximation of a compound Poisson-Log-normal quantile.
+    """Appoximation of a compound Poisson-Lognormal quantile.
 
-    Calcultes the zero-truncated quantile of the distribution by appoximating the
+    Calculates the zero-truncated quantile of the distribution by appoximating the
     log-normal sum for each value ``k`` given by the Poisson distribution. The
     CDF is calculated for each log-normal, weighted by the Poisson PDF for ``k``.
     The quantile is taken from the sum of the CDFs.
 
-    <1% error for lam < 50.0
+    <5% error for lam < 100.0; sigma < 0.5
 
     Args:
         q: quantile
@@ -24,35 +83,32 @@ def compound_poisson_lognormal_quantile(
         sigma: log stddev of the log-normal distribution
 
     Returns:
-        the ``q`` th value of the compound Poisson-Log-normal
+        the ``q`` th value of the compound Poisson-Lognormal
     """
+
     # A reasonable overestimate of the upper value
-    uk = (
-        int((lam + 1.0) * NormalDist().inv_cdf(1.0 - (1.0 - q) / 1e3) * np.sqrt(lam))
-        + 1
-    )
-    k = np.arange(0, uk + 1)
+    uk = poisson.quantile(1.0 - 1e-12, lam)
+    k = np.arange(0, uk + 1, dtype=int)
     pdf = poisson.pdf(k, lam)
+
+    valid = np.isfinite(pdf)
+    k = k[valid]
+    pdf = pdf[valid]
     cdf = np.cumsum(pdf)
 
     # Calculate the zero-truncated quantile
-    q0 = (q - pdf[0]) / (1.0 - pdf[0])
+    q0 = zero_trunc_quantile(lam, q)
     if q0 <= 0.0:  # The quantile is in the zero portion
         return 0.0
-    # Trim values with a low probability
-    valid = pdf > 1e-6
-    weights = pdf[valid][1:]
-    k = k[valid][1:]
-    cdf = cdf[valid]
+    weights = pdf[1:]
+    k = k[1:]
     # Re-normalize weights
     weights /= weights.sum()
 
     # Get the sum LN for each value of the Poisson
-    mus, sigmas = sum_iid_lognormals(
-        k, np.log(1.0) - 0.5 * sigma**2, sigma, method=method
-    )
+    mus, sigmas = sum_iid_lognormals(k, np.log(1.0) - 0.5 * sigma**2, sigma)
     # The quantile of the last log-normal, must be lower than this
-    upper_q = lognormal.quantile(q, mus[-1], sigmas[-1])
+    upper_q = lognormal.quantile(q0, mus[-1], sigmas[-1])
 
     xs = np.linspace(lam, upper_q, 10000)
     cdf = np.sum(
@@ -110,26 +166,33 @@ def sum_iid_lognormals(
     """Sum of ``n`` identical independant log-normal distributions.
 
     The sum is approximated by another log-normal distribution, defined by
-    the returned parameters. Uses the Fenton-Wilkinson approximation for good
-    right-tail accuracy [3]_.
+    the returned parameters. By feaults, the Fenton-Wilkinson approximation is used
+    for good right-tail accuracy [3]_.
 
     Args:
         n: int or array of ints
         mu: log mean of the underlying distributions
         sigma: log stddev of the underlying distributions
+        method: approximation to use, 'Fenton-Wilkinson' or 'Lo'
 
     Returns:
         mu, sigma of the log-normal approximation
 
     References:
         .. [3] L. F. Fenton, "The sum of lognormal probability distributions in scatter
-            transmission systems," IRE Trans. Commun. Syst., vol. CS-8, pp. 57-67, 1960.
+            transmission systems", IRE Trans. Commun. Syst., vol. CS-8, pp. 57-67, 1960.
             https://doi.org/10.1109/TCOM.1960.1097606
+        .. C. F. Lo, "The sum and Difference of Two Lognormal Random Variables",
+            Journal of Applied Mathematics, 2012, 838397.
+            https://doi.org/10.1155/2012/838397
     """
     if method == "Fenton-Wilkinson":
-        # Fenton-Wilkinson
         sigma2_x = np.log((np.exp(sigma**2) - 1.0) / n + 1.0)
         mu_x = np.log(n * np.exp(mu)) + 0.5 * (sigma**2 - sigma2_x)
         return mu_x, np.sqrt(sigma2_x)
-    else:  # pragma: no cover
+    elif method == "Lo":
+        Sp = n * np.exp(mu + 0.5 * sigma**2)
+        sigma2_s = n / Sp**2 * sigma**2 * np.exp(mu + 0.5 * sigma**2) ** 2
+        return np.log(Sp) - 0.5 * sigma2_s, np.sqrt(sigma2_s)
+    else:
         raise NotImplementedError
diff --git a/spcal/gui/inputs.py b/spcal/gui/inputs.py
@@ -401,8 +401,6 @@ def updateLimits(self) -> None:
         self.limits.clear()
 
         compound_kws = self.options.compound_poisson.state()
-        if not compound_kws["simulate"]:
-            compound_kws["single ion"] = None
         gaussian_kws = self.options.gaussian.state()
         poisson_kws = self.options.poisson.state()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -42,3 +42,4 @@ Documentation on usage, examples and a programming reference can be found at htt
		## Publications

		* [Lockwood, T. E.; Gonzalez de Vega, R.; Clases, D. An Interactive Python-Based Data Processing Platform for Single Particle and Single Cell ICP-MS. Journal of Analytical Atomic Spectrometry 2021, 36 (11), 2536–2544.](https://doi.org/10.1039/D1JA00297J)
		* [Lockwood, T. E.; Schlatt, L.; Clases, D. SPCal – an Open Source, Easy-to-Use Processing Platform for ICP-TOFMS-Based Single Event Data. J. Anal. At. Spectrom. 2024](https://doi.org/10.1039/D4JA00241E)