From 6c499fff1ee1cb2330347bf3586c72a5c84a07b3 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <dcherian@users.noreply.github.com>
Date: Sun, 28 Jul 2024 23:50:13 -0600
Subject: [PATCH] Improve precision for mean, std, var, cumsum. (#90)

* Improve precision for mean, std, var.

np.bincount always accumulates to float64.
So only cast after the division.
---
 numpy_groupies/aggregate_numpy.py    | 28 +++++++++++++++++++---------
 numpy_groupies/tests/test_generic.py | 11 +++++++++++
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/numpy_groupies/aggregate_numpy.py b/numpy_groupies/aggregate_numpy.py
index 844f2f2..dfbf11e 100644
--- a/numpy_groupies/aggregate_numpy.py
+++ b/numpy_groupies/aggregate_numpy.py
@@ -149,15 +149,16 @@ def _mean(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)):
         sums.real = np.bincount(group_idx, weights=a.real, minlength=size)
         sums.imag = np.bincount(group_idx, weights=a.imag, minlength=size)
     else:
-        sums = np.bincount(group_idx, weights=a, minlength=size).astype(
-            dtype, copy=False
-        )
+        sums = np.bincount(group_idx, weights=a, minlength=size)
 
     with np.errstate(divide="ignore", invalid="ignore"):
-        ret = sums.astype(dtype, copy=False) / counts
+        ret = sums / counts
     if not np.isnan(fill_value):
         ret[counts == 0] = fill_value
-    return ret
+    if iscomplexobj(a):
+        return ret
+    else:
+        return ret.astype(dtype, copy=False)
 
 
 def _sum_of_squres(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)):
@@ -165,7 +166,10 @@ def _sum_of_squres(group_idx, a, size, fill_value, dtype=np.dtype(np.float64)):
     if fill_value != 0:
         counts = np.bincount(group_idx, minlength=size)
         ret[counts == 0] = fill_value
-    return ret
+    if iscomplexobj(a):
+        return ret
+    else:
+        return ret.astype(dtype, copy=False)
 
 
 def _var(
@@ -176,7 +180,7 @@ def _var(
     counts = np.bincount(group_idx, minlength=size)
     sums = np.bincount(group_idx, weights=a, minlength=size)
     with np.errstate(divide="ignore", invalid="ignore"):
-        means = sums.astype(dtype, copy=False) / counts
+        means = sums / counts
         counts = np.where(counts > ddof, counts - ddof, 0)
         ret = (
             np.bincount(group_idx, (a - means[group_idx]) ** 2, minlength=size) / counts
@@ -185,7 +189,10 @@ def _var(
         ret = np.sqrt(ret)  # this is now std not var
     if not np.isnan(fill_value):
         ret[counts == 0] = fill_value
-    return ret
+    if iscomplexobj(a):
+        return ret
+    else:
+        return ret.astype(dtype, copy=False)
 
 
 def _std(group_idx, a, size, fill_value, dtype=np.dtype(np.float64), ddof=0):
@@ -252,7 +259,10 @@ def _cumsum(group_idx, a, size, fill_value=None, dtype=None):
 
     increasing = np.arange(len(a), dtype=int)
     group_starts = _min(group_idx_srt, increasing, size, fill_value=0)[group_idx_srt]
-    a_srt_cumsum += -a_srt_cumsum[group_starts] + a_srt[group_starts]
+    # First subtract large numbers
+    a_srt_cumsum -= a_srt_cumsum[group_starts]
+    # Then add potentially small numbers
+    a_srt_cumsum += a_srt[group_starts]
     return a_srt_cumsum[invsortidx]
 
 
diff --git a/numpy_groupies/tests/test_generic.py b/numpy_groupies/tests/test_generic.py
index 2a2811d..7a8b772 100644
--- a/numpy_groupies/tests/test_generic.py
+++ b/numpy_groupies/tests/test_generic.py
@@ -570,3 +570,14 @@ def test_var_with_nan_fill_value(aggregate_all, ddof, nan_inds, func):
         group_idx, a, axis=-1, fill_value=np.nan, func=func, ddof=ddof
     )
     np.testing.assert_equal(actual, expected)
+
+
+def test_cumsum_accuracy(aggregate_all):
+    array = np.array(
+        [0.00000000e00, 0.00000000e00, 0.00000000e00, 3.27680000e04, 9.99999975e-06]
+    )
+    group_idx = np.array([0, 0, 0, 0, 1])
+
+    actual = aggregate_all(group_idx, array, axis=-1, func="cumsum")
+    expected = array
+    np.testing.assert_allclose(actual, expected)