From cc25f3d30ce6f4e4939d5b913ca1a66ab32ac75e Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 11 Jan 2022 12:28:59 -0600
Subject: [PATCH] Match pandas scalar result types in reductions (#9717)

Moving this casting logic to python and updating it so that integer sum and product operations give back an `int64` and give back the original column dtype in float cases. This is a breaking change.

Closes https://github.com/rapidsai/cudf/issues/8449

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/9717
---
 python/cudf/cudf/_lib/reduce.pyx          | 11 ++++-------
 python/cudf/cudf/core/column/column.py    |  7 +++++++
 python/cudf/cudf/core/column/numerical.py | 11 +++++++++++
 python/cudf/cudf/tests/test_reductions.py |  6 ++----
 4 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx
index 21a039dbf78..ecb787703d2 100644
--- a/python/cudf/cudf/_lib/reduce.pyx
+++ b/python/cudf/cudf/_lib/reduce.pyx
@@ -43,13 +43,10 @@ def reduce(reduction_op, Column incol, dtype=None, **kwargs):
         to the same type as the input column
     """
 
-    col_dtype = incol.dtype
-    if (
-        reduction_op in ['sum', 'sum_of_squares', 'product']
-        and not is_decimal_dtype(col_dtype)
-    ):
-        col_dtype = np.find_common_type([col_dtype], [np.uint64])
-    col_dtype = col_dtype if dtype is None else dtype
+    col_dtype = (
+        dtype if dtype is not None
+        else incol._reduction_result_dtype(reduction_op)
+    )
 
     cdef column_view c_incol_view = incol.view()
     cdef unique_ptr[scalar] c_result
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index a3a8b0c91d1..c1e037499fc 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1235,6 +1235,13 @@ def _process_for_reduction(
             )
         return result_col
 
+    def _reduction_result_dtype(self, reduction_op: str) -> Dtype:
+        """
+        Determine the correct dtype to pass to libcudf based on
+        the input dtype, data dtype, and specific reduction op
+        """
+        return self.dtype
+
     def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase:
         """
         Copies type metadata from self onto other, returning a new column.
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index c947440edb1..8f0a858ee34 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -641,6 +641,17 @@ def to_pandas(
             pd_series.index = index
         return pd_series
 
+    def _reduction_result_dtype(self, reduction_op: str) -> Dtype:
+        col_dtype = self.dtype
+        if reduction_op in {"sum", "product"}:
+            col_dtype = (
+                col_dtype if col_dtype.kind == "f" else np.dtype("int64")
+            )
+        elif reduction_op == "sum_of_squares":
+            col_dtype = np.find_common_type([col_dtype], [np.dtype("uint64")])
+
+        return col_dtype
+
 
 def _normalize_find_and_replace_input(
     input_column_dtype: DtypeObj, col_to_normalize: Union[ColumnBase, list]
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
index 89d665382d3..4ed6448de50 100644
--- a/python/cudf/cudf/tests/test_reductions.py
+++ b/python/cudf/cudf/tests/test_reductions.py
@@ -30,8 +30,7 @@ def test_sum(dtype, nelem):
     sr = Series(data)
 
     got = sr.sum()
-    expect = dtype(data.sum())
-
+    expect = data.sum()
     significant = 4 if dtype == np.float32 else 6
     np.testing.assert_approx_equal(expect, got, significant=significant)
 
@@ -83,8 +82,7 @@ def test_product(dtype, nelem):
     sr = Series(data)
 
     got = sr.product()
-    expect = np.product(data)
-
+    expect = pd.Series(data).product()
     significant = 4 if dtype == np.float32 else 6
     np.testing.assert_approx_equal(expect, got, significant=significant)