Deltares · Huite · Aug 6, 2024 · Aug 5, 2024 · Aug 6, 2024 · Aug 6, 2024
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -6,6 +6,31 @@ All notable changes to this project will be documented in this file.
 The format is based on `Keep a Changelog`_, and this project adheres to
 `Semantic Versioning`_.
 
+Unreleased
+----------
+
+Fixed
+~~~~~
+
+- The reduction methods for the overlap regridders now behave consistently when
+  all values are NaN or when all weights (overlaps) are zero, and all methods
+  give the same answer irrespective of the order in which the values are
+  encountered.
+
+Added
+~~~~~
+
+- Percentiles (5, 10, 25, 50, 75, 90, 95) have been added to the
+  :class:`xugrid.OverlapRegridder` as standard available reduction methods
+  (available as ``"p5", "p10"``, etc.). Custom percentile values (e.g. 2.5, 42) can be
+  setup using :meth:`xugrid.OverlapRegridder.create_percentile_method`.
+
+Changed
+~~~~~~~
+
+- Custom reduction functions provide to the overlap regridders no longer require
+  an ``indices`` argument.
+
 [0.11.0] 2024-08-05
 -------------------
 

diff --git a/examples/overlap_regridder.py b/examples/overlap_regridder.py
@@ -111,50 +111,76 @@ def create_grid(bounds, nx, ny):
 # A valid reduction method must be compileable by Numba, and takes exactly three
 # arguments: ``values``, ``indices``, ``weights``.
 #
-# * ``values``: is the ravelled array containing the (float) source values.
-# * ``indices``: contains the flat, or "linear", (integer) indices into the
-#   source values for a single target face.
+# * ``values``: is the array containing the (float) source values.
 # * ``weights``: contains the (float) overlap between the target face and the
-#   source faces. The size of ``weights`` is equal to the size of ``indices``.
+#   source faces. The size of ``weights`` is equal to the size of ``values``.
+# * ``work``: used as a temporary workspace of floats. The size of ``work`` is
+#   equal to the size of ``values``.
 #
-# Xugrid regridder reduction functions are implemented in such a way. For a example, the area
-# could be implemented as follows:
+# Xugrid regridder reduction functions are implemented in such a way. For a
+# example, an area weighted sum could be implemented as follows:
 
 
-def mean(values, indices, weights):
-    subset = values[indices]
-    return np.nansum(subset * weights) / np.nansum(weights)
+def mean(values, weights, workspace):
+    total = 0.0
+    weight_sum = 0.0
+    for value, weight in zip(values, weights):
+        if ~np.isnan(value):
+            total += value * weight
+            weight_sum += weight
+    if weight_sum == 0.0:
+        return np.nan
+    return total / weight_sum
 
 
 # %%
 # .. note::
-#    * Custom reductions methods must be able to deal with NaN values as these
-#      are commonly encountered in datasets as a "no data value".
+#    * Each reduction must return a single float.
+#    * Always check for ``np.isnan(value)``: Custom reductions methods must be
+#      able to deal with NaN values as these are commonly encountered in datasets
+#      as a "no data value".
 #    * If Python features are used that are unsupported by Numba, you will get
-#      somewhat obscure errors. In such a case, test your function with
-#      synthetic values for ``values, indices, weights``.
-#    * The built-in mean is more efficient, avoiding temporary memory
-#      allocations.
+#      somewhat obscure errors. In such a case, ``numba.njit`` and test your
+#      function separately with synthetic values for ``values, weights,
+#      workspace``.
+#    * The ``workspace`` array is provided to avoid dynamic memory allocations.
+#      It is a an array of floats with the same size as ``values`` or
+#      ``weights``. You may freely allocate new arrays within the reduction
+#      function but it will impact performance. (Methods such as mode or median
+#      require a workspace.)
+#    * While we could have implemented a weighted mean as:
+#      ``np.nansum(values * weights) / np.nansum(weights)``, the function above
+#      is efficiently compiled by Numba and does not allocate temporary arrays.
 #
-# To use our custom method, we provide at initialization of the OverlapRegridder:
+# To use our custom method, we provide it at initialization of the
+# OverlapRegridder:
 
 regridder = xu.OverlapRegridder(uda, grid, method=mean)
 result = regridder.regrid(uda)
 result.ugrid.plot(vmin=-20, vmax=90, cmap="terrain")
 
 # %%
-# Not every reduction uses the weights argument. For example, computing an
-# arbitrary quantile value requires just the values. Again, make sure the
-# function can deal with NaN values! -- hence ``nanpercentile`` rather than
-# ``percentile`` here.
+# Not every reduction uses the ``weights`` and ``workspace`` arguments. For
+# example, a regular sum could only look at the values:
 
 
-def p17(values, indices, weights):
-    subset = values[indices]
-    return np.nanpercentile(subset, 17)
+def nansum(values, weights, workspace):
+    return np.nansum(values)
 
 
-regridder = xu.OverlapRegridder(uda, grid, method=p17)
+# %%
+# Custom percentiles
+# ------------------
+#
+# Xugrid provides a number of predefined percentiles (5, 10, 25, 50, 75, 90,
+# 95). In case you need a different percentile value, you can use this utility:
+
+p333 = xu.OverlapRegridder.create_percentile_method(33.3)
+
+# %%
+# Then, provide it as the regridder method as above:
+
+regridder = xu.OverlapRegridder(uda, grid, method=p333)
 result = regridder.regrid(uda)
 result.ugrid.plot(vmin=-20, vmax=90, cmap="terrain")
 

diff --git a/tests/test_regrid/test_reduce.py b/tests/test_regrid/test_reduce.py
@@ -4,90 +4,182 @@
 from xugrid.regrid import reduce
 
 
-@pytest.fixture(scope="function")
-def args():
-    values = np.array([0.0, 1.0, 2.0, np.nan, 3.0, 4.0])
-    indices = np.array([0, 1, 2, 3])
+def forward_args():
+    values = np.array([0.0, 1.0, 2.0, np.nan])
+    weights = np.array([0.5, 0.5, 0.5, 0.5])
+    work = np.empty_like(weights)
+    return values, weights, work
+
+
+def reverse_args():
-def reverse_args():
+def reverse_args():
+     values, weights, work = forward_args()
+     return np.flip(values), weights, work
-def reverse_args():
+def reverse_args():
+     values, weights, work = forward_args()
+     return np.flip(values), weights, work
+    values = np.flip(np.array([0.0, 1.0, 2.0, np.nan]))
     weights = np.array([0.5, 0.5, 0.5, 0.5])
-    return values, indices, weights
+    work = np.empty_like(weights)
+    return values, weights, work
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_mean(args):
     actual = reduce.mean(*args)
     assert np.allclose(actual, 1.0)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_harmonic_mean(args):
     actual = reduce.harmonic_mean(*args)
     assert np.allclose(actual, 1.0 / (0.5 / 1.0 + 0.5 / 2.0))
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_geometric_mean(args):
     actual = reduce.geometric_mean(*args)
     assert np.allclose(actual, np.sqrt(1.0 * 2.0))
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_sum(args):
     actual = reduce.sum(*args)
     assert np.allclose(actual, 3.0)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_minimum(args):
     actual = reduce.minimum(*args)
     assert np.allclose(actual, 0.0)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_maximum(args):
     actual = reduce.maximum(*args)
     assert np.allclose(actual, 2.0)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_mode(args):
     actual = reduce.mode(*args)
-    assert np.allclose(actual, 0.0)
-
-    values = np.array([0.0, 1.0, 1.0, 2.0, np.nan])
-    indices = np.array([0, 1, 2, 3, 4])
-    weights = np.array([0.5, 0.5, 0.5, 0.5, 0.5])
-    args = (values, indices, weights)
-    actual = reduce.mode(*args)
-    assert np.allclose(actual, 1.0)
-    # The weights shouldn't be mutated!
-    assert np.allclose(weights, 0.5)
-
-    values = np.array([-1, 1, 1, 3, 4])
-    indices = np.array([1, 2, 3])
-    weights = np.array([1.0, 1.0, 1.0])
-    args = (values, indices, weights)
-    actual = reduce.mode(*args)
-    assert np.allclose(actual, 1.0)
-
-    values = np.array([99, 1, 2, 3, 4, 5, 6, 7, 8])
-    indices = np.array([4, 5, 6])
-    weights = np.array([0.5, 0.5, 0.5])
-    args = (values, indices, weights)
-    actual = reduce.mode(*args)
-    assert np.allclose(actual, 4)
-    assert np.allclose(weights, 0.5)
+    # We have tied frequency (all weights 0.5). In this case we return the
+    # highest value.
+    assert np.allclose(actual, 2.0)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_median(args):
     actual = reduce.median(*args)
     assert np.allclose(actual, 1.0)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_conductance(args):
     actual = reduce.conductance(*args)
     assert np.allclose(actual, 1.5)
 
 
+@pytest.mark.parametrize("args", [forward_args(), reverse_args()])
 def test_max_overlap(args):
     actual = reduce.max_overlap(*args)
-    assert np.allclose(actual, 0.0)
+    # We have tied overlap (all 0.5). In this case we return the highest value.
+    assert np.allclose(actual, 2.0)
+
 
+def test_max_overlap_extra():
     values = np.array([0.0, 1.0, 2.0, np.nan])
-    indices = np.array([0, 1, 2, 3])
     weights = np.array([0.5, 1.5, 0.5, 2.5])
-    args = (values, indices, weights)
+    workspace = np.empty_like(weights)
+    args = (values, weights, workspace)
     actual = reduce.max_overlap(*args)
     assert np.allclose(actual, 1.0)
+
+
+def test_mode_extra():
+    values = np.array([0.0, 1.0, 1.0, 2.0, np.nan])
+    weights = np.array([0.5, 0.5, 0.5, 0.5, 0.5])
+    workspace = np.empty_like(weights)
+    args = (values, weights, workspace)
+    actual = reduce.mode(*args)
+    assert np.allclose(actual, 1.0)
+    # The weights shouldn't be mutated!
+    assert np.allclose(weights, 0.5)
+
+    values = np.array([1, 1, 3])
+    weights = np.array([1.0, 1.0, 1.0])
+    workspace = np.empty_like(weights)
+    args = (values, weights, workspace)
+    actual = reduce.mode(*args)
+    assert np.allclose(actual, 1.0)
+
+    values = np.array([4, 5, 6])
+    weights = np.array([0.5, 0.5, 0.5])
+    workspace = np.empty_like(weights)
+    args = (values, weights, workspace)
+    actual = reduce.mode(*args)
+    # Returns last not-nan value
+    assert np.allclose(actual, 6)
+    assert np.allclose(weights, 0.5)
+
+
+def test_percentile():
+    # Simplified from:
+    # https://github.com/numba/numba/blob/2001717f3321a5082c39c5787676320e699aed12/numba/tests/test_array_reductions.py#L396
+    def func(x, p):
+        p = np.atleast_1d(p)
+        values = x.ravel()
+        weights = np.empty_like(values)
+        work = np.empty_like(values)
+        return np.array([reduce.percentile(values, weights, work, pval) for pval in p])
+
+    q_upper_bound = 100.0
+    x = np.arange(8) * 0.5
+    np.testing.assert_equal(func(x, 0), 0.0)
+    np.testing.assert_equal(func(x, q_upper_bound), 3.5)
+    np.testing.assert_equal(func(x, q_upper_bound / 2), 1.75)
+
+    x = np.arange(12).reshape(3, 4)
+    q = np.array((0.25, 0.5, 1.0)) * q_upper_bound
+    np.testing.assert_equal(func(x, q), [2.75, 5.5, 11.0])
+
+    x = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6)
+    q = np.array((0.25, 0.50)) * q_upper_bound
+    np.testing.assert_equal(func(x, q).shape, (2,))
+
+    q = np.array((0.25, 0.50, 0.75)) * q_upper_bound
+    np.testing.assert_equal(func(x, q).shape, (3,))
+
+    x = np.arange(12).reshape(3, 4)
+    np.testing.assert_equal(func(x, q_upper_bound / 2), 5.5)
+
+    np.testing.assert_equal(func(np.array([1, 2, 3]), 0), 1)
+
+    a = np.array([2, 3, 4, 1])
+    func(a, [q_upper_bound / 2])
+    np.testing.assert_equal(a, np.array([2, 3, 4, 1]))
+
+
+METHODS = [
+    reduce.mean,
+    reduce.harmonic_mean,
+    reduce.geometric_mean,
+    reduce.sum,
+    reduce.minimum,
+    reduce.maximum,
+    reduce.mode,
+    reduce.first_order_conservative,
+    reduce.conductance,
+    reduce.max_overlap,
+    reduce.median,
+]
+
+
+@pytest.mark.parametrize("f", METHODS)
+def test_weights_all_zeros(f):
+    values = np.ones(5)
+    weights = np.zeros(5)
+    workspace = np.zeros(5)
+    assert np.isnan(f(values, weights, workspace))
+
+
+@pytest.mark.parametrize("f", METHODS)
+def test_values_all_nan(f):
+    values = np.full(5, np.nan)
+    weights = np.ones(5)
+    workspace = np.zeros(5)
+    assert np.isnan(f(values, weights, workspace))
diff --git a/tests/test_regrid/test_regridder.py b/tests/test_regrid/test_regridder.py
@@ -269,3 +269,17 @@ def test_regridder_daks_arrays(
     )
     result = regridder.regrid(grid_data_dask_source_layered)
     assert result.isel(layer=0).equals(grid_data_dask_expected_layered.isel(layer=0))
+
+
+def test_create_percentile_method():
+    with pytest.raises(ValueError):
+        OverlapRegridder.create_percentile_method(percentile=-1)
+
+    with pytest.raises(ValueError):
+        OverlapRegridder.create_percentile_method(percentile=101)
+
+    median = OverlapRegridder.create_percentile_method(percentile=50)
+    values = np.array([0, 1, 2, 3, 4], dtype=float)
+    weights = np.ones_like(values)
+    workspace = np.zeros_like(values)
+    assert median(values, weights, workspace) == 2