From 93c7a7d8b66843911ea3a644892e8d94891217ae Mon Sep 17 00:00:00 2001
From: Huite Bootsma <huitebootsma@gmail.com>
Date: Mon, 5 Aug 2024 22:15:29 +0200
Subject: [PATCH 1/4] Refactor reduce module; improve performance for mode,
 median

* Simplify methods by just giving a contiguous values array instead of source and indices
* Provide a pre-allocated workspace to avoid dynamic allocation in reduce functions
* Add a generalized percentile function
---
 examples/overlap_regridder.py       |  62 +++++++---
 tests/test_regrid/test_reduce.py    | 157 +++++++++++++++++++-----
 tests/test_regrid/test_regridder.py |  14 +++
 xugrid/regrid/nanpercentile.py      |  97 +++++++++++++++
 xugrid/regrid/reduce.py             | 180 ++++++++++++++++++++--------
 xugrid/regrid/regridder.py          |  28 ++++-
 6 files changed, 432 insertions(+), 106 deletions(-)
 create mode 100644 xugrid/regrid/nanpercentile.py

diff --git a/examples/overlap_regridder.py b/examples/overlap_regridder.py
index 8c1abbd54..ec78e7a37 100644
--- a/examples/overlap_regridder.py
+++ b/examples/overlap_regridder.py
@@ -111,30 +111,43 @@ def create_grid(bounds, nx, ny):
 # A valid reduction method must be compileable by Numba, and takes exactly three
 # arguments: ``values``, ``indices``, ``weights``.
 #
-# * ``values``: is the ravelled array containing the (float) source values.
-# * ``indices``: contains the flat, or "linear", (integer) indices into the
-#   source values for a single target face.
+# * ``values``: is the array containing the (float) source values.
 # * ``weights``: contains the (float) overlap between the target face and the
 #   source faces. The size of ``weights`` is equal to the size of ``indices``.
+# * ``work``: used as a temporary workspace. Contains one float value for each
+#   index in indices.
 #
-# Xugrid regridder reduction functions are implemented in such a way. For a example, the area
-# could be implemented as follows:
+# Xugrid regridder reduction functions are implemented in such a way. For a
+# example, an area weighted sum could be implemented as follows:
 
 
-def mean(values, indices, weights):
-    subset = values[indices]
-    return np.nansum(subset * weights) / np.nansum(weights)
+def mean(values, weights, workspace):
+    total = 0.0
+    weight_sum = 0.0
+    for value, weight in zip(values, weights):
+        if ~np.isnan(value):
+            total = +value * weight
+            weight_sum += weight
+    if weight_sum == 0.0:
+        return np.nan
+    return total / weight_sum
 
 
 # %%
 # .. note::
+#    * Each reduction must return a single float.
 #    * Custom reductions methods must be able to deal with NaN values as these
 #      are commonly encountered in datasets as a "no data value".
 #    * If Python features are used that are unsupported by Numba, you will get
 #      somewhat obscure errors. In such a case, test your function with
-#      synthetic values for ``values, indices, weights``.
-#    * The built-in mean is more efficient, avoiding temporary memory
-#      allocations.
+#      synthetic values for ``values, weights, workspace``.
+#    * The ``workspace`` array is provided to avoid dynamic memory allocations.
+#      It is a an array of floats with the same size as ``values`` or
+#      ``weights``. You may freely allocate new arrays within the reduction
+#      function but it will impact performance.
+#    * While we could have implemented a weighted mean as:
+#      ``np.nansum(values * weights) / np.nansum(weights)``, the function above
+#      is efficiently compiled by Numba and does not allocate.
 #
 # To use our custom method, we provide at initialization of the OverlapRegridder:
 
@@ -143,18 +156,29 @@ def mean(values, indices, weights):
 result.ugrid.plot(vmin=-20, vmax=90, cmap="terrain")
 
 # %%
-# Not every reduction uses the weights argument. For example, computing an
-# arbitrary quantile value requires just the values. Again, make sure the
-# function can deal with NaN values! -- hence ``nanpercentile`` rather than
-# ``percentile`` here.
+# Not every reduction uses the ``weights`` and ``workspace`` arguments. For
+# example, a regular sum:
 
 
-def p17(values, indices, weights):
-    subset = values[indices]
-    return np.nanpercentile(subset, 17)
+def nansum(values, weights, workspace):
+    return np.nansum(values)
 
 
-regridder = xu.OverlapRegridder(uda, grid, method=p17)
+# %%
+# Always ensure that the function can deal with NaN values!
+#
+# Custom percentiles
+# ------------------
+#
+# Xugrid provides a number of predefined percentiles (5, 10, 25, 50, 75, 90,
+# 95). In case you need a different percentile value, you can use this utility:
+
+p333 = xu.OverlapRegridder.create_percentile_method(33.3)
+
+# %%
+# Then, provide it as the regridder method as above:
+
+regridder = xu.OverlapRegridder(uda, grid, method=p333)
 result = regridder.regrid(uda)
 result.ugrid.plot(vmin=-20, vmax=90, cmap="terrain")
 
diff --git a/tests/test_regrid/test_reduce.py b/tests/test_regrid/test_reduce.py
index 4a696a099..cd3c104d4 100644
--- a/tests/test_regrid/test_reduce.py
+++ b/tests/test_regrid/test_reduce.py
@@ -4,90 +4,181 @@
 from xugrid.regrid import reduce
 
 
-@pytest.fixture(scope="function")
-def args():
-    values = np.array([0.0, 1.0, 2.0, np.nan, 3.0, 4.0])
-    indices = np.array([0, 1, 2, 3])
+def forward_args():
+    values = np.array([0.0, 1.0, 2.0, np.nan])
     weights = np.array([0.5, 0.5, 0.5, 0.5])
-    return values, indices, weights
+    work = np.empty_like(weights)
+    return values, weights, work
+
 
+def reverse_args():
+    values = np.flip(np.array([0.0, 1.0, 2.0, np.nan]))
+    weights = np.array([0.5, 0.5, 0.5, 0.5])
+    work = np.empty_like(weights)
+    return values, weights, work
 
+
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_mean(args):
     actual = reduce.mean(*args)
     assert np.allclose(actual, 1.0)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_harmonic_mean(args):
     actual = reduce.harmonic_mean(*args)
     assert np.allclose(actual, 1.0 / (0.5 / 1.0 + 0.5 / 2.0))
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_geometric_mean(args):
     actual = reduce.geometric_mean(*args)
     assert np.allclose(actual, np.sqrt(1.0 * 2.0))
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_sum(args):
     actual = reduce.sum(*args)
     assert np.allclose(actual, 3.0)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_minimum(args):
     actual = reduce.minimum(*args)
     assert np.allclose(actual, 0.0)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_maximum(args):
     actual = reduce.maximum(*args)
     assert np.allclose(actual, 2.0)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_mode(args):
+    # In case of ties, returns the last not-nan value.
     actual = reduce.mode(*args)
-    assert np.allclose(actual, 0.0)
-
-    values = np.array([0.0, 1.0, 1.0, 2.0, np.nan])
-    indices = np.array([0, 1, 2, 3, 4])
-    weights = np.array([0.5, 0.5, 0.5, 0.5, 0.5])
-    args = (values, indices, weights)
-    actual = reduce.mode(*args)
-    assert np.allclose(actual, 1.0)
-    # The weights shouldn't be mutated!
-    assert np.allclose(weights, 0.5)
-
-    values = np.array([-1, 1, 1, 3, 4])
-    indices = np.array([1, 2, 3])
-    weights = np.array([1.0, 1.0, 1.0])
-    args = (values, indices, weights)
-    actual = reduce.mode(*args)
-    assert np.allclose(actual, 1.0)
-
-    values = np.array([99, 1, 2, 3, 4, 5, 6, 7, 8])
-    indices = np.array([4, 5, 6])
-    weights = np.array([0.5, 0.5, 0.5])
-    args = (values, indices, weights)
-    actual = reduce.mode(*args)
-    assert np.allclose(actual, 4)
-    assert np.allclose(weights, 0.5)
+    assert ~np.isnan(actual)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_median(args):
     actual = reduce.median(*args)
     assert np.allclose(actual, 1.0)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_conductance(args):
     actual = reduce.conductance(*args)
     assert np.allclose(actual, 1.5)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_max_overlap(args):
     actual = reduce.max_overlap(*args)
-    assert np.allclose(actual, 0.0)
+    # It returns the last not-nan value.
+    assert ~np.isnan(actual)
+
 
+def test_max_overlap_extra():
     values = np.array([0.0, 1.0, 2.0, np.nan])
-    indices = np.array([0, 1, 2, 3])
     weights = np.array([0.5, 1.5, 0.5, 2.5])
-    args = (values, indices, weights)
+    workspace = np.empty_like(weights)
+    args = (values, weights, workspace)
     actual = reduce.max_overlap(*args)
     assert np.allclose(actual, 1.0)
+
+
+def test_mode_extra():
+    values = np.array([0.0, 1.0, 1.0, 2.0, np.nan])
+    weights = np.array([0.5, 0.5, 0.5, 0.5, 0.5])
+    workspace = np.empty_like(weights)
+    args = (values, weights, workspace)
+    actual = reduce.mode(*args)
+    assert np.allclose(actual, 1.0)
+    # The weights shouldn't be mutated!
+    assert np.allclose(weights, 0.5)
+
+    values = np.array([1, 1, 3])
+    weights = np.array([1.0, 1.0, 1.0])
+    workspace = np.empty_like(weights)
+    args = (values, weights, workspace)
+    actual = reduce.mode(*args)
+    assert np.allclose(actual, 1.0)
+
+    values = np.array([4, 5, 6])
+    weights = np.array([0.5, 0.5, 0.5])
+    workspace = np.empty_like(weights)
+    args = (values, weights, workspace)
+    actual = reduce.mode(*args)
+    # Returns last not-nan value
+    assert np.allclose(actual, 6)
+    assert np.allclose(weights, 0.5)
+
+
+def test_percentile():
+    # Simplified from:
+    # https://github.com/numba/numba/blob/2001717f3321a5082c39c5787676320e699aed12/numba/tests/test_array_reductions.py#L396
+    def func(x, p):
+        p = np.atleast_1d(p)
+        values = x.ravel()
+        weights = np.empty_like(values)
+        work = np.empty_like(values)
+        return np.array([reduce.percentile(values, weights, work, pval) for pval in p])
+
+    q_upper_bound = 100.0
+    x = np.arange(8) * 0.5
+    np.testing.assert_equal(func(x, 0), 0.0)
+    np.testing.assert_equal(func(x, q_upper_bound), 3.5)
+    np.testing.assert_equal(func(x, q_upper_bound / 2), 1.75)
+
+    x = np.arange(12).reshape(3, 4)
+    q = np.array((0.25, 0.5, 1.0)) * q_upper_bound
+    np.testing.assert_equal(func(x, q), [2.75, 5.5, 11.0])
+
+    x = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6)
+    q = np.array((0.25, 0.50)) * q_upper_bound
+    np.testing.assert_equal(func(x, q).shape, (2,))
+
+    q = np.array((0.25, 0.50, 0.75)) * q_upper_bound
+    np.testing.assert_equal(func(x, q).shape, (3,))
+
+    x = np.arange(12).reshape(3, 4)
+    np.testing.assert_equal(func(x, q_upper_bound / 2), 5.5)
+
+    np.testing.assert_equal(func(np.array([1, 2, 3]), 0), 1)
+
+    a = np.array([2, 3, 4, 1])
+    func(a, [q_upper_bound / 2])
+    np.testing.assert_equal(a, np.array([2, 3, 4, 1]))
+
+
+METHODS = [
+    reduce.mean,
+    reduce.harmonic_mean,
+    reduce.geometric_mean,
+    reduce.sum,
+    reduce.minimum,
+    reduce.maximum,
+    reduce.mode,
+    reduce.first_order_conservative,
+    reduce.conductance,
+    reduce.max_overlap,
+    reduce.median,
+]
+
+
+@pytest.mark.parametrize("f", METHODS)
+def test_weights_all_zeros(f):
+    values = np.ones(5)
+    weights = np.zeros(5)
+    workspace = np.zeros(5)
+    assert np.isnan(f(values, weights, workspace))
+
+
+@pytest.mark.parametrize("f", METHODS)
+def test_weights_all_nan(f):
+    values = np.full(5, np.nan)
+    weights = np.ones(5)
+    workspace = np.zeros(5)
+    assert np.isnan(f(values, weights, workspace))
diff --git a/tests/test_regrid/test_regridder.py b/tests/test_regrid/test_regridder.py
index 0d9e3605b..699e39fdc 100644
--- a/tests/test_regrid/test_regridder.py
+++ b/tests/test_regrid/test_regridder.py
@@ -269,3 +269,17 @@ def test_regridder_daks_arrays(
     )
     result = regridder.regrid(grid_data_dask_source_layered)
     assert result.isel(layer=0).equals(grid_data_dask_expected_layered.isel(layer=0))
+
+
+def test_create_percentile_method():
+    with pytest.raises(ValueError):
+        OverlapRegridder.create_percentile_method(percentile=-1)
+
+    with pytest.raises(ValueError):
+        OverlapRegridder.create_percentile_method(percentile=101)
+
+    median = OverlapRegridder.create_percentile_method(percentile=50)
+    values = np.array([0, 1, 2, 3, 4], dtype=float)
+    weights = np.ones_like(values)
+    workspace = np.zeros_like(values)
+    assert median(values, weights, workspace) == 2
diff --git a/xugrid/regrid/nanpercentile.py b/xugrid/regrid/nanpercentile.py
new file mode 100644
index 000000000..07a0ced4c
--- /dev/null
+++ b/xugrid/regrid/nanpercentile.py
@@ -0,0 +1,97 @@
+"""
+Numba percentile methods allocate continuosly on the heap.
+
+This has significant overhead when calling the reduction method millions of
+times -- as happens when regridding.
+
+This is a simplified port of the percentile helpers:
+# https://github.com/numba/numba/blob/0441bb17c7820efc2eba4fd141b68dac2afa4740/numba/np/arraymath.py#L1595
+"""
+import numba as nb
+import numpy as np
+
+
+@nb.njit(inline="always")
+def nan_le(a, b) -> bool:
+    # Nan-aware <
+    if np.isnan(a):
+        return False
+    elif np.isnan(b):
+        return True
+    else:
+        return a < b
+
+
+@nb.njit
+def _partition(A, low, high):
+    mid = (low + high) >> 1
+    # NOTE: the pattern of swaps below for the pivot choice and the
+    # partitioning gives good results (i.e. regular O(n log n))
+    # on sorted, reverse-sorted, and uniform arrays.  Subtle changes
+    # risk breaking this property.
+
+    # Use median of three {low, middle, high} as the pivot
+    if nan_le(A[mid], A[low]):
+        A[low], A[mid] = A[mid], A[low]
+    if nan_le(A[high], A[mid]):
+        A[high], A[mid] = A[mid], A[high]
+    if nan_le(A[mid], A[low]):
+        A[low], A[mid] = A[mid], A[low]
+    pivot = A[mid]
+
+    A[high], A[mid] = A[mid], A[high]
+    i = low
+    j = high - 1
+    while True:
+        while i < high and nan_le(A[i], pivot):
+            i += 1
+        while j >= low and nan_le(pivot, A[j]):
+            j -= 1
+        if i >= j:
+            break
+        A[i], A[j] = A[j], A[i]
+        i += 1
+        j -= 1
+    # Put the pivot back in its final place (all items before `i`
+    # are smaller than the pivot, all items at/after `i` are larger)
+    A[i], A[high] = A[high], A[i]
+    return i
+
+
+@nb.njit
+def _select(arry, k, low, high):
+    """Select the k'th smallest element in array[low:high + 1]."""
+    i = _partition(arry, low, high)
+    while i != k:
+        if i < k:
+            low = i + 1
+            i = _partition(arry, low, high)
+        else:
+            high = i - 1
+            i = _partition(arry, low, high)
+    return arry[k]
+
+
+@nb.njit
+def _select_two(arry, k, low, high):
+    """
+    Select the k'th and k+1'th smallest elements in array[low:high + 1].
+
+    This is significantly faster than doing two independent selections
+    for k and k+1.
+    """
+    while True:
+        assert high > low  # by construction
+        i = _partition(arry, low, high)
+        if i < k:
+            low = i + 1
+        elif i > k + 1:
+            high = i - 1
+        elif i == k:
+            _select(arry, k + 1, i + 1, high)
+            break
+        else:  # i == k + 1
+            _select(arry, k, low, i - 1)
+            break
+
+    return arry[k], arry[k + 1]
diff --git a/xugrid/regrid/reduce.py b/xugrid/regrid/reduce.py
index f278b80a7..d0be30c94 100644
--- a/xugrid/regrid/reduce.py
+++ b/xugrid/regrid/reduce.py
@@ -1,12 +1,18 @@
 """Contains common reduction methods."""
+
+import math
+from typing import Callable
+
+import numba as nb
 import numpy as np
 
+from xugrid.regrid.nanpercentile import _select_two
+
 
-def mean(values, indices, weights):
+def mean(values, weights, workspace):
     vsum = 0.0
     wsum = 0.0
-    for i, w in zip(indices, weights):
-        v = values[i]
+    for v, w in zip(values, weights):
         if np.isnan(v):
             continue
         vsum += w * v
@@ -17,11 +23,10 @@ def mean(values, indices, weights):
         return vsum / wsum
 
 
-def harmonic_mean(values, indices, weights):
+def harmonic_mean(values, weights, workspace):
     v_agg = 0.0
     w_sum = 0.0
-    for i, w in zip(indices, weights):
-        v = values[i]
+    for v, w in zip(values, weights):
         if np.isnan(v) or v == 0:
             continue
         if w > 0:
@@ -33,21 +38,20 @@ def harmonic_mean(values, indices, weights):
         return w_sum / v_agg
 
 
-def geometric_mean(values, indices, weights):
+def geometric_mean(values, weights, workspace):
     v_agg = 0.0
     w_sum = 0.0
 
     # Compute sum to normalize weights to avoid tiny or huge values in exp
     normsum = 0.0
-    for i, w in zip(indices, weights):
+    for v, w in zip(values, weights):
         normsum += w
     # Early return if no values
     if normsum == 0:
         return np.nan
 
-    for i, w in zip(indices, weights):
+    for v, w in zip(values, weights):
         w = w / normsum
-        v = values[i]
         # Skip if v is NaN or 0.
         if v > 0 and w > 0:
             v_agg += w * np.log(abs(v))
@@ -64,12 +68,11 @@ def geometric_mean(values, indices, weights):
         return np.exp((1.0 / w_sum) * v_agg)
 
 
-def sum(values, indices, weights):
+def sum(values, weights, workspace):
     v_sum = 0.0
     w_sum = 0.0
 
-    for i, w in zip(indices, weights):
-        v = values[i]
+    for v, w in zip(values, weights):
         if np.isnan(v):
             continue
         v_sum += v
@@ -80,68 +83,127 @@ def sum(values, indices, weights):
         return v_sum
 
 
-def minimum(values, indices, weights):
-    vmin = values[indices[0]]
-    for i in indices:
-        v = values[i]
+def minimum(values, weights, workspace):
+    v_min = np.inf
+    w_max = 0.0
+    for v, w in zip(values, weights):
         if np.isnan(v):
             continue
-        if v < vmin:
-            vmin = v
-    return vmin
+        v_min = min(v, v_min)
+        w_max = max(w_max, w)
+    if w_max == 0.0:
+        return np.nan
+    return v_min
 
 
-def maximum(values, indices, weights):
-    vmax = values[indices[0]]
-    for i in indices:
-        v = values[i]
+def maximum(values, weights, workspace):
+    v_max = -np.inf
+    w_max = 0.0
+    for v, w in zip(values, weights):
         if np.isnan(v):
             continue
-        if v > vmax:
-            vmax = v
-    return vmax
+        v_max = max(v, v_max)
+        w_max = max(w_max, w)
+    if w_max == 0.0:
+        return np.nan
+    return v_max
 
 
-def mode(values, indices, weights):
+def mode(values, weights, workspace):
     # Area weighted mode. We use a linear search to accumulate weights, as we
     # generally expect a relatively small number of elements in the indices and
     # weights arrays.
-    accum = weights.copy()
+    accum = workspace
+    accum[: weights.size] = weights[:]
     w_sum = 0
-    for running_total, (i, w) in enumerate(zip(indices, weights)):
-        v = values[i]
+    w_max = 0.0
+    for running_total, (v, w) in enumerate(zip(values, weights)):
         if np.isnan(v):
             continue
+        w_max = max(w, w_max)
         w_sum += 1
         for j in range(running_total):  # Compare with previously found values
             if values[j] == v:  # matches previous value
                 accum[j] += w  # increase previous weight sum
                 break
 
-    if w_sum == 0:  # It skipped everything: only nodata values
+    if w_sum == 0 or w_max == 0.0:
+        # Everything skipped (all nodata), or all weights zero
         return np.nan
     else:  # Find value with highest frequency
         w_max = 0
-        for w_accum, i in zip(accum, indices):
-            if w_accum > w_max:
+        mode_value = values[0]
+        for w_accum, v in zip(accum, values):
+            if w_accum >= w_max and ~np.isnan(v):
                 w_max = w_accum
-                v = values[i]
-        return v
+                mode_value = v
+        return mode_value
+
+
+@nb.njit
+def percentile(values, weights, workspace, p):
+    # This function is a simplified port of:
+    # https://github.com/numba/numba/blob/0441bb17c7820efc2eba4fd141b68dac2afa4740/numba/np/arraymath.py#L1745
+
+    w_max = 0.0
+    for w in weights:
+        w_max = max(w, w_max)
+    if w_max == 0.0:
+        return np.nan
+
+    if p == 0:
+        # Equal to minimum
+        vmin = values[0]
+        for v in values[1:]:
+            if np.isnan(v):
+                continue
+            if v < vmin:
+                vmin = v
+        return vmin
+
+    if p == 100:
+        # Equal to maximum
+        vmax = values[0]
+        for v in values:
+            if np.isnan(v):
+                continue
+            if v > vmax:
+                vmax = v
+        return vmax
+
+    # Everything should've been checked before:
+    #
+    # * a.dtype should be float
+    # * 0 <= q <= 100.
+    #
+    # Filter the NaNs
+
+    n = 0
+    for v in values:
+        if ~np.isnan(v):
+            workspace[n] = v
+            n += 1
 
+    # Early returns
+    if n == 0:
+        return np.nan
+    if n == 1:
+        return workspace[0]
 
-def median(values, indices, weights):
-    # TODO: more efficient implementation?
-    # See: https://github.com/numba/numba/blob/0441bb17c7820efc2eba4fd141b68dac2afa4740/numba/np/arraymath.py#L1693
-    return np.nanpercentile(values[indices], 50)
+    # linear interp between closest ranks
+    rank = 1 + (n - 1) * p / 100.0
+    f = math.floor(rank)
+    m = rank - f
+    lower, upper = _select_two(workspace[:n], k=int(f - 1), low=0, high=(n - 1))
+    return lower * (1 - m) + upper * m
 
 
-def first_order_conservative(values, indices, weights):
+def first_order_conservative(values, weights, workspace):
     # Uses relative weights!
     # Rename to: first order conservative?
     v_agg = 0.0
     w_sum = 0.0
-    for i, w in zip(indices, weights):
-        v = values[i]
+    for v, w in zip(values, weights):
         if np.isnan(v):
             continue
         v_agg += v * w
@@ -155,20 +217,32 @@ def first_order_conservative(values, indices, weights):
 conductance = first_order_conservative
 
 
-def max_overlap(values, indices, weights):
-    max_w = 0.0
+def max_overlap(values, weights, workspace):
+    w_max = 0.0
     v = np.nan
-    for i, w in zip(indices, weights):
-        if w > max_w:
-            max_w = w
-            v_temp = values[i]
-            if np.isnan(v_temp):
-                continue
+    for v_temp, w in zip(values, weights):
+        if w >= w_max and ~np.isnan(v_temp):
+            w_max = w
             v = v_temp
+    if w_max == 0.0:
+        return np.nan
     return v
 
 
-ASBOLUTE_OVERLAP_METHODS = {
+def create_percentile_method(p: float) -> Callable:
+    if not (0.0 <= p <= 100.0):
+        raise ValueError(f"percentile must be in the range [0, 100], received: {p}")
+
+    def f(values, weights, workspace) -> float:
+        return percentile(values, weights, workspace, p)
+
+    return f
+
+
+median = create_percentile_method(50)
+
+
+ABSOLUTE_OVERLAP_METHODS = {
     "mean": mean,
     "harmonic_mean": harmonic_mean,
     "geometric_mean": geometric_mean,
@@ -179,6 +253,8 @@ def max_overlap(values, indices, weights):
     "median": median,
     "max_overlap": max_overlap,
 }
+for p in (5, 10, 25, 50, 75, 90, 95):
+    ABSOLUTE_OVERLAP_METHODS[f"p{p}"] = create_percentile_method(p)
 
 
 RELATIVE_OVERLAP_METHODS = {
diff --git a/xugrid/regrid/regridder.py b/xugrid/regrid/regridder.py
index 104f4cb68..0d128fb24 100644
--- a/xugrid/regrid/regridder.py
+++ b/xugrid/regrid/regridder.py
@@ -1,4 +1,5 @@
 """This module is heavily inspired by xemsf.frontend.py"""
+
 import abc
 from typing import Callable, Optional, Tuple, Union
 
@@ -34,16 +35,31 @@ def make_regrid(func):
     f = numba.njit(func, inline="always")
 
     def _regrid(source: FloatArray, A: MatrixCSR, size: int):
+        # Pre-allocate the output array
         n_extra = source.shape[0]
         out = np.full((n_extra, size), np.nan)
+        # Pre-allocate workspace arrays. Every reduction algorithm should use
+        # no more than the size of indices. Every thread gets it own workspace
+        # row!
+        n_work = np.diff(A.indptr).max()
+        workspace = np.empty((n_extra, 2, n_work), dtype=np.float64)
         for extra_index in numba.prange(n_extra):
             source_flat = source[extra_index]
             for target_index in range(A.n):
                 slice = row_slice(A, target_index)
                 indices = A.indices[slice]
                 weights = A.data[slice]
+
+                # Copy the source data for this row to values.
+                n_value = len(indices)
+                values = workspace[extra_index, 0, :n_value]
+                for i, index in enumerate(indices):
+                    values[i] = source_flat[index]
+
                 if len(indices) > 0:
-                    out[extra_index, target_index] = f(source_flat, indices, weights)
+                    out[extra_index, target_index] = f(
+                        values, weights, workspace[extra_index, 1, :]
+                    )
         return out
 
     return numba.njit(_regrid, parallel=True, cache=True)
@@ -433,10 +449,14 @@ class OverlapRegridder(BaseOverlapRegridder):
     * ``"mode"``
     * ``"median"``
     * ``"max_overlap"``
+    * percentiles 5, 10, 25, 50, 75, 90, 95: as ``"p5"``, ``"p10"``, etc.
 
     Custom aggregation functions are also supported, if they can be compiled by
     Numba. See the User Guide.
 
+    Any percentile method can be created via:
+    ``method = OverlapRegridder.create_percentile_methode(percentile)``
+
     Parameters
     ----------
     source: Ugrid2d, UgridDataArray
@@ -446,7 +466,7 @@ class OverlapRegridder(BaseOverlapRegridder):
     """
 
     _JIT_FUNCTIONS = {
-        k: make_regrid(f) for k, f in reduce.ASBOLUTE_OVERLAP_METHODS.items()
+        k: make_regrid(f) for k, f in reduce.ABSOLUTE_OVERLAP_METHODS.items()
     }
 
     def __init__(
@@ -461,6 +481,10 @@ def __init__(
     def _compute_weights(self, source, target) -> None:
         super()._compute_weights(source, target, relative=False)
 
+    @staticmethod
+    def create_percentile_method(percentile: float) -> Callable:
+        return reduce.create_percentile_method(percentile)
+
     @classmethod
     def from_weights(
         cls,

From d128d9539c15977a73b67277d8e51bc9c0a06576 Mon Sep 17 00:00:00 2001
From: Huite Bootsma <huitebootsma@gmail.com>
Date: Tue, 6 Aug 2024 10:32:29 +0200
Subject: [PATCH 2/4] Make mode and max_overlap give the same answer regardless
 of order Small miscellaneous fixes.

---
 docs/changelog.rst               | 25 +++++++++++++
 examples/overlap_regridder.py    | 30 ++++++++--------
 tests/test_regrid/test_reduce.py | 11 +++---
 xugrid/regrid/reduce.py          | 61 +++++++++++++++++---------------
 xugrid/regrid/regridder.py       | 10 +++++-
 5 files changed, 89 insertions(+), 48 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index a23722067..76d87ef02 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -6,6 +6,31 @@ All notable changes to this project will be documented in this file.
 The format is based on `Keep a Changelog`_, and this project adheres to
 `Semantic Versioning`_.
 
+Unreleased
+----------
+
+Fixed
+~~~~~
+
+- The reduction methods for the overlap regridders now behave consistently when
+  all values are NaN or when all weights (overlaps) are zero, and all methods
+  give the same answer irrespective of the order in which the values are
+  encountered.
+  
+Added
+~~~~~
+
+- Percentiles (5, 10, 25, 50, 75, 90, 95) have been added to the
+  :class:`xugrid.OverlapRegridder` as standard available reduction methods
+  (available as ``"p5", "p10"``, etc.). Custom percentile values (e.g. 2.5, 42) can be
+  setup using :meth:`xugrid.OverlapRegridder.create_percentile_method`.
+  
+Changed
+~~~~~~~
+
+- Custom reduction functions provide to the overlap regridders no longer require
+  an ``indices`` argument.
+
 [0.11.0] 2024-08-05
 -------------------
 
diff --git a/examples/overlap_regridder.py b/examples/overlap_regridder.py
index ec78e7a37..55dcfc6e0 100644
--- a/examples/overlap_regridder.py
+++ b/examples/overlap_regridder.py
@@ -113,9 +113,9 @@ def create_grid(bounds, nx, ny):
 #
 # * ``values``: is the array containing the (float) source values.
 # * ``weights``: contains the (float) overlap between the target face and the
-#   source faces. The size of ``weights`` is equal to the size of ``indices``.
-# * ``work``: used as a temporary workspace. Contains one float value for each
-#   index in indices.
+#   source faces. The size of ``weights`` is equal to the size of ``values``.
+# * ``work``: used as a temporary workspace of floats. The size of ``work`` is
+#   equal to the size of ``values``.
 #
 # Xugrid regridder reduction functions are implemented in such a way. For a
 # example, an area weighted sum could be implemented as follows:
@@ -126,7 +126,7 @@ def mean(values, weights, workspace):
     weight_sum = 0.0
     for value, weight in zip(values, weights):
         if ~np.isnan(value):
-            total = +value * weight
+            total += value * weight
             weight_sum += weight
     if weight_sum == 0.0:
         return np.nan
@@ -136,20 +136,24 @@ def mean(values, weights, workspace):
 # %%
 # .. note::
 #    * Each reduction must return a single float.
-#    * Custom reductions methods must be able to deal with NaN values as these
-#      are commonly encountered in datasets as a "no data value".
+#    * Always check for ``np.isnan(value)``: Custom reductions methods must be
+#      able to deal with NaN values as these are commonly encountered in datasets
+#      as a "no data value".
 #    * If Python features are used that are unsupported by Numba, you will get
-#      somewhat obscure errors. In such a case, test your function with
-#      synthetic values for ``values, weights, workspace``.
+#      somewhat obscure errors. In such a case, ``numba.njit`` and test your
+#      function separately with synthetic values for ``values, weights,
+#      workspace``.
 #    * The ``workspace`` array is provided to avoid dynamic memory allocations.
 #      It is a an array of floats with the same size as ``values`` or
 #      ``weights``. You may freely allocate new arrays within the reduction
-#      function but it will impact performance.
+#      function but it will impact performance. (Methods such as mode or median
+#      require a workspace.)
 #    * While we could have implemented a weighted mean as:
 #      ``np.nansum(values * weights) / np.nansum(weights)``, the function above
-#      is efficiently compiled by Numba and does not allocate.
+#      is efficiently compiled by Numba and does not allocate temporary arrays.
 #
-# To use our custom method, we provide at initialization of the OverlapRegridder:
+# To use our custom method, we provide it at initialization of the
+# OverlapRegridder:
 
 regridder = xu.OverlapRegridder(uda, grid, method=mean)
 result = regridder.regrid(uda)
@@ -157,7 +161,7 @@ def mean(values, weights, workspace):
 
 # %%
 # Not every reduction uses the ``weights`` and ``workspace`` arguments. For
-# example, a regular sum:
+# example, a regular sum could only look at the values:
 
 
 def nansum(values, weights, workspace):
@@ -165,8 +169,6 @@ def nansum(values, weights, workspace):
 
 
 # %%
-# Always ensure that the function can deal with NaN values!
-#
 # Custom percentiles
 # ------------------
 #
diff --git a/tests/test_regrid/test_reduce.py b/tests/test_regrid/test_reduce.py
index cd3c104d4..058e04a46 100644
--- a/tests/test_regrid/test_reduce.py
+++ b/tests/test_regrid/test_reduce.py
@@ -56,9 +56,10 @@ def test_maximum(args):
 
 @pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_mode(args):
-    # In case of ties, returns the last not-nan value.
     actual = reduce.mode(*args)
-    assert ~np.isnan(actual)
+    # We have tied frequency (all weights 0.5). In this case we return the
+    # highest value.
+    assert np.allclose(actual, 2.0)
 
 
 @pytest.mark.parametrize("args", [forward_args(), reverse_args()])
@@ -76,8 +77,8 @@ def test_conductance(args):
 @pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_max_overlap(args):
     actual = reduce.max_overlap(*args)
-    # It returns the last not-nan value.
-    assert ~np.isnan(actual)
+    # We have tied overlap (all 0.5). In this case we return the highest value.
+    assert np.allclose(actual, 2.0)
 
 
 def test_max_overlap_extra():
@@ -177,7 +178,7 @@ def test_weights_all_zeros(f):
 
 
 @pytest.mark.parametrize("f", METHODS)
-def test_weights_all_nan(f):
+def test_values_all_nan(f):
     values = np.full(5, np.nan)
     weights = np.ones(5)
     workspace = np.zeros(5)
diff --git a/xugrid/regrid/reduce.py b/xugrid/regrid/reduce.py
index d0be30c94..ea3173449 100644
--- a/xugrid/regrid/reduce.py
+++ b/xugrid/regrid/reduce.py
@@ -83,7 +83,8 @@ def sum(values, weights, workspace):
         return v_sum
 
 
-def minimum(values, weights, workspace):
+@nb.njit(inline="always")
+def _minimum(values, weights):
     v_min = np.inf
     w_max = 0.0
     for v, w in zip(values, weights):
@@ -96,7 +97,12 @@ def minimum(values, weights, workspace):
     return v_min
 
 
-def maximum(values, weights, workspace):
+def minimum(values, weights, workspace):
+    return _minimum(values, weights)
+
+
+@nb.njit(inline="always")
+def _maximum(values, weights):
     v_max = -np.inf
     w_max = 0.0
     for v, w in zip(values, weights):
@@ -109,6 +115,10 @@ def maximum(values, weights, workspace):
     return v_max
 
 
+def maximum(values, weights, workspace):
+    return _maximum(values, weights)
+
+
 def mode(values, weights, workspace):
     # Area weighted mode. We use a linear search to accumulate weights, as we
     # generally expect a relatively small number of elements in the indices and
@@ -130,13 +140,17 @@ def mode(values, weights, workspace):
     if w_sum == 0 or w_max == 0.0:
         # Everything skipped (all nodata), or all weights zero
         return np.nan
-    else:  # Find value with highest frequency
+    else:
+        # Find value with highest frequency.
+        # In case frequencies are equal (a tie), take the largest value.
+        # This ensures the same result irrespective of value ordering.
         w_max = 0
         mode_value = values[0]
         for w_accum, v in zip(accum, values):
-            if w_accum >= w_max and ~np.isnan(v):
-                w_max = w_accum
-                mode_value = v
+            if ~np.isnan(v):
+                if (w_accum > w_max) or (w_accum == w_max and v > mode_value):
+                    w_max = w_accum
+                    mode_value = v
         return mode_value
 
 
@@ -145,6 +159,7 @@ def percentile(values, weights, workspace, p):
     # This function is a simplified port of:
     # https://github.com/numba/numba/blob/0441bb17c7820efc2eba4fd141b68dac2afa4740/numba/np/arraymath.py#L1745
 
+    # Exit early if all weights are 0.
     w_max = 0.0
     for w in weights:
         w_max = max(w, w_max)
@@ -152,24 +167,10 @@ def percentile(values, weights, workspace, p):
         return np.nan
 
     if p == 0:
-        # Equal to minimum
-        vmin = values[0]
-        for v in values[1:]:
-            if np.isnan(v):
-                continue
-            if v < vmin:
-                vmin = v
-        return vmin
+        return _minimum(values, weights)
 
     if p == 100:
-        # Equal to maximum
-        vmax = values[0]
-        for v in values:
-            if np.isnan(v):
-                continue
-            if v > vmax:
-                vmax = v
-        return vmax
+        return _maximum(values, weights)
 
     # Everything should've been checked before:
     #
@@ -219,14 +220,18 @@ def first_order_conservative(values, weights, workspace):
 
 def max_overlap(values, weights, workspace):
     w_max = 0.0
-    v = np.nan
-    for v_temp, w in zip(values, weights):
-        if w >= w_max and ~np.isnan(v_temp):
-            w_max = w
-            v = v_temp
+    v_max = -np.inf
+    # Find value with highest overlap.
+    # In case frequencies are equal (a tie), take the largest value.
+    # This ensures the same result irrespective of value ordering.
+    for v, w in zip(values, weights):
+        if ~np.isnan(v):
+            if (w > w_max) or (w == w_max and v > v_max):
+                w_max = w
+                v_max = v
     if w_max == 0.0:
         return np.nan
-    return v
+    return v_max
 
 
 def create_percentile_method(p: float) -> Callable:
diff --git a/xugrid/regrid/regridder.py b/xugrid/regrid/regridder.py
index 0d128fb24..9b06ee735 100644
--- a/xugrid/regrid/regridder.py
+++ b/xugrid/regrid/regridder.py
@@ -58,7 +58,7 @@ def _regrid(source: FloatArray, A: MatrixCSR, size: int):
 
                 if len(indices) > 0:
                     out[extra_index, target_index] = f(
-                        values, weights, workspace[extra_index, 1, :]
+                        values, weights, workspace[extra_index, 1, :n_value]
                     )
         return out
 
@@ -456,6 +456,7 @@ class OverlapRegridder(BaseOverlapRegridder):
 
     Any percentile method can be created via:
     ``method = OverlapRegridder.create_percentile_methode(percentile)``
+    See the examples.
 
     Parameters
     ----------
@@ -463,6 +464,13 @@ class OverlapRegridder(BaseOverlapRegridder):
     target: Ugrid2d, UgridDataArray
     method: str, function, optional
         Default value is ``"mean"``.
+
+    Examples
+    --------
+    Setup a custom percentile method and apply it:
+
+    >>> p33_3 = OverlapRegridder.create_percentile_method(33.3)
+    >>> regridder = OverlapRegridder(source, target, method=p33_3)
     """
 
     _JIT_FUNCTIONS = {

From ec6ca85cfd76ebbe1957d18a66c991ae498bfbd7 Mon Sep 17 00:00:00 2001
From: Huite Bootsma <huitebootsma@gmail.com>
Date: Tue, 6 Aug 2024 10:40:27 +0200
Subject: [PATCH 3/4] Remove mention of indices in example

---
 examples/overlap_regridder.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/overlap_regridder.py b/examples/overlap_regridder.py
index 55dcfc6e0..e7835158a 100644
--- a/examples/overlap_regridder.py
+++ b/examples/overlap_regridder.py
@@ -109,13 +109,14 @@ def create_grid(bounds, nx, ny):
 # inserted during the ``.regrid`` call and compiled by `Numba`_ for performance.
 #
 # A valid reduction method must be compileable by Numba, and takes exactly three
-# arguments: ``values``, ``indices``, ``weights``.
+# arguments: ``values``, ``weights``, ``workspace``.
 #
 # * ``values``: is the array containing the (float) source values.
 # * ``weights``: contains the (float) overlap between the target face and the
 #   source faces. The size of ``weights`` is equal to the size of ``values``.
-# * ``work``: used as a temporary workspace of floats. The size of ``work`` is
-#   equal to the size of ``values``.
+# * ``workspace``: used as a temporary workspace of floats. The size of ``work`` is
+#   equal to the size of ``values``. (Make sure to zero it beforehand if that's
+#   important to your reduction!)
 #
 # Xugrid regridder reduction functions are implemented in such a way. For a
 # example, an area weighted sum could be implemented as follows:

From abedf0b9182fbb67678f3998c8f0ee8e1f143c9d Mon Sep 17 00:00:00 2001
From: Huite Bootsma <huitebootsma@gmail.com>
Date: Tue, 6 Aug 2024 11:27:18 +0200
Subject: [PATCH 4/4] Address review comments

---
 tests/test_regrid/test_reduce.py | 6 ++----
 xugrid/regrid/nanpercentile.py   | 2 +-
 xugrid/regrid/regridder.py       | 8 +++++++-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/tests/test_regrid/test_reduce.py b/tests/test_regrid/test_reduce.py
index 058e04a46..7d81f7547 100644
--- a/tests/test_regrid/test_reduce.py
+++ b/tests/test_regrid/test_reduce.py
@@ -12,10 +12,8 @@ def forward_args():
 
 
 def reverse_args():
-    values = np.flip(np.array([0.0, 1.0, 2.0, np.nan]))
-    weights = np.array([0.5, 0.5, 0.5, 0.5])
-    work = np.empty_like(weights)
-    return values, weights, work
+    values, weights, work = forward_args()
+    return np.flip(values), weights, work
 
 
 @pytest.mark.parametrize("args", [forward_args(), reverse_args()])
diff --git a/xugrid/regrid/nanpercentile.py b/xugrid/regrid/nanpercentile.py
index 07a0ced4c..1163c3612 100644
--- a/xugrid/regrid/nanpercentile.py
+++ b/xugrid/regrid/nanpercentile.py
@@ -1,5 +1,5 @@
 """
-Numba percentile methods allocate continuosly on the heap.
+Numba percentile methods allocate continuously on the heap.
 
 This has significant overhead when calling the reduction method millions of
 times -- as happens when regridding.
diff --git a/xugrid/regrid/regridder.py b/xugrid/regrid/regridder.py
index 9b06ee735..bb587ec30 100644
--- a/xugrid/regrid/regridder.py
+++ b/xugrid/regrid/regridder.py
@@ -467,10 +467,16 @@ class OverlapRegridder(BaseOverlapRegridder):
 
     Examples
     --------
+    Create an OverlapRegridder to regrid with mean:
+
+    >>> regridder = OverlapRegridder(source_grid, target_grid, method="mean")
+    >>> regridder.regrid(source_data)
+
     Setup a custom percentile method and apply it:
 
     >>> p33_3 = OverlapRegridder.create_percentile_method(33.3)
-    >>> regridder = OverlapRegridder(source, target, method=p33_3)
+    >>> regridder = OverlapRegridder(source_grid, target_grid, method=p33_3)
+    >>> regridder.regrid(source_data)
     """
 
     _JIT_FUNCTIONS = {