diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx
index 56bfa0ba332..64634b7a6f9 100644
--- a/python/cudf/cudf/_lib/reduce.pyx
+++ b/python/cudf/cudf/_lib/reduce.pyx
@@ -1,4 +1,5 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
+import warnings
 
 import cudf
 from cudf.core.buffer import acquire_spill_lock
@@ -26,11 +27,15 @@ def reduce(reduction_op, Column incol, dtype=None, **kwargs):
         A numpy data type to use for the output, defaults
         to the same type as the input column
     """
-
-    col_dtype = (
-        dtype if dtype is not None
-        else incol._reduction_result_dtype(reduction_op)
-    )
+    if dtype is not None:
+        warnings.warn(
+            "dtype is deprecated and will be remove in a future release. "
+            "Cast the result (e.g. .astype) after the operation instead.",
+            FutureWarning
+        )
+        col_dtype = dtype
+    else:
+        col_dtype = incol._reduction_result_dtype(reduction_op)
 
     # check empty case
     if len(incol) <= incol.null_count:
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 9467bbeed15..5e77aa87e4e 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -261,7 +261,7 @@ def all(self, skipna: bool = True) -> bool:
         if self.null_count == self.size:
             return True
 
-        return libcudf.reduce.reduce("all", self, dtype=np.bool_)
+        return libcudf.reduce.reduce("all", self)
 
     def any(self, skipna: bool = True) -> bool:
         # Early exit for fast cases.
@@ -271,7 +271,7 @@ def any(self, skipna: bool = True) -> bool:
         elif skipna and self.null_count == self.size:
             return False
 
-        return libcudf.reduce.reduce("any", self, dtype=np.bool_)
+        return libcudf.reduce.reduce("any", self)
 
     def dropna(self) -> Self:
         if self.has_nulls():
@@ -1305,7 +1305,10 @@ def _reduce(
             skipna=skipna, min_count=min_count
         )
         if isinstance(preprocessed, ColumnBase):
-            return libcudf.reduce.reduce(op, preprocessed, **kwargs)
+            dtype = kwargs.pop("dtype", None)
+            return libcudf.reduce.reduce(
+                op, preprocessed, dtype=dtype, **kwargs
+            )
         return preprocessed
 
     def _process_for_reduction(
@@ -1336,6 +1339,8 @@ def _reduction_result_dtype(self, reduction_op: str) -> Dtype:
         Determine the correct dtype to pass to libcudf based on
         the input dtype, data dtype, and specific reduction op
         """
+        if reduction_op in {"any", "all"}:
+            return np.dtype(np.bool_)
         return self.dtype
 
     def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase:
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 004a059af95..a4538179415 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -485,13 +485,11 @@ def as_string_column(self) -> cudf.core.column.StringColumn:
                 format = format.split(" ")[0]
         return self.strftime(format)
 
-    def mean(
-        self, skipna=None, min_count: int = 0, dtype=np.float64
-    ) -> ScalarLike:
+    def mean(self, skipna=None, min_count: int = 0) -> ScalarLike:
         return pd.Timestamp(
             cast(
                 "cudf.core.column.NumericalColumn", self.astype("int64")
-            ).mean(skipna=skipna, min_count=min_count, dtype=dtype),
+            ).mean(skipna=skipna, min_count=min_count),
             unit=self.time_unit,
         ).as_unit(self.time_unit)
 
@@ -499,12 +497,11 @@ def std(
         self,
         skipna: bool | None = None,
         min_count: int = 0,
-        dtype: Dtype = np.float64,
         ddof: int = 1,
     ) -> pd.Timedelta:
         return pd.Timedelta(
             cast("cudf.core.column.NumericalColumn", self.astype("int64")).std(
-                skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof
+                skipna=skipna, min_count=min_count, ddof=ddof
             )
             * _unit_to_nanoseconds_conversion[self.time_unit],
         ).as_unit(self.time_unit)
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index cea68c88c90..ba080863722 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -395,7 +395,7 @@ def all(self, skipna: bool = True) -> bool:
         if result_col.null_count == result_col.size:
             return True
 
-        return libcudf.reduce.reduce("all", result_col, dtype=np.bool_)
+        return libcudf.reduce.reduce("all", result_col)
 
     def any(self, skipna: bool = True) -> bool:
         # Early exit for fast cases.
@@ -406,7 +406,7 @@ def any(self, skipna: bool = True) -> bool:
         elif skipna and result_col.null_count == result_col.size:
             return False
 
-        return libcudf.reduce.reduce("any", result_col, dtype=np.bool_)
+        return libcudf.reduce.reduce("any", result_col)
 
     @functools.cached_property
     def nan_count(self) -> int:
@@ -684,15 +684,16 @@ def to_pandas(
             return super().to_pandas(nullable=nullable, arrow_type=arrow_type)
 
     def _reduction_result_dtype(self, reduction_op: str) -> Dtype:
-        col_dtype = self.dtype
         if reduction_op in {"sum", "product"}:
-            col_dtype = (
-                col_dtype if col_dtype.kind == "f" else np.dtype("int64")
-            )
+            if self.dtype.kind == "f":
+                return self.dtype
+            return np.dtype("int64")
         elif reduction_op == "sum_of_squares":
-            col_dtype = np.result_dtype(col_dtype, np.dtype("uint64"))
+            return np.result_dtype(self.dtype, np.dtype("uint64"))
+        elif reduction_op in {"var", "std", "mean"}:
+            return np.dtype("float64")
 
-        return col_dtype
+        return super()._reduction_result_dtype(reduction_op)
 
 
 def _normalize_find_and_replace_input(
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 95c78c5efcb..f41010062c8 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -144,32 +144,27 @@ def mean(
         self,
         skipna: bool | None = None,
         min_count: int = 0,
-        dtype=np.float64,
     ):
-        return self._reduce(
-            "mean", skipna=skipna, min_count=min_count, dtype=dtype
-        )
+        return self._reduce("mean", skipna=skipna, min_count=min_count)
 
     def var(
         self,
         skipna: bool | None = None,
         min_count: int = 0,
-        dtype=np.float64,
         ddof=1,
     ):
         return self._reduce(
-            "var", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof
+            "var", skipna=skipna, min_count=min_count, ddof=ddof
         )
 
     def std(
         self,
         skipna: bool | None = None,
         min_count: int = 0,
-        dtype=np.float64,
         ddof=1,
     ):
         return self._reduce(
-            "std", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof
+            "std", skipna=skipna, min_count=min_count, ddof=ddof
         )
 
     def median(self, skipna: bool | None = None) -> NumericalBaseColumn:
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 36d7d9f9614..59ea1cc002c 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -287,11 +287,11 @@ def as_timedelta_column(self, dtype: Dtype) -> TimeDeltaColumn:
             return self
         return libcudf.unary.cast(self, dtype=dtype)
 
-    def mean(self, skipna=None, dtype: Dtype = np.float64) -> pd.Timedelta:
+    def mean(self, skipna=None) -> pd.Timedelta:
         return pd.Timedelta(
             cast(
                 "cudf.core.column.NumericalColumn", self.astype("int64")
-            ).mean(skipna=skipna, dtype=dtype),
+            ).mean(skipna=skipna),
             unit=self.time_unit,
         ).as_unit(self.time_unit)
 
@@ -345,12 +345,11 @@ def std(
         self,
         skipna: bool | None = None,
         min_count: int = 0,
-        dtype: Dtype = np.float64,
         ddof: int = 1,
     ) -> pd.Timedelta:
         return pd.Timedelta(
             cast("cudf.core.column.NumericalColumn", self.astype("int64")).std(
-                skipna=skipna, min_count=min_count, ddof=ddof, dtype=dtype
+                skipna=skipna, min_count=min_count, ddof=ddof
             ),
             unit=self.time_unit,
         ).as_unit(self.time_unit)
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
index 1247fa362ce..8be6463c699 100644
--- a/python/cudf/cudf/tests/test_reductions.py
+++ b/python/cudf/cudf/tests/test_reductions.py
@@ -248,16 +248,11 @@ def test_sum_masked(nelem):
 
 def test_sum_boolean():
     s = Series(np.arange(100000))
-    got = (s > 1).sum(dtype=np.int32)
+    got = (s > 1).sum()
     expect = 99998
 
     assert expect == got
 
-    got = (s > 1).sum(dtype=np.bool_)
-    expect = True
-
-    assert expect == got
-
 
 def test_date_minmax():
     np_data = np.random.normal(size=10**3)
@@ -371,3 +366,11 @@ def test_reduction_column_multiindex():
     result = df.mean()
     expected = df.to_pandas().mean()
     assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("op", ["sum", "product"])
+def test_dtype_deprecated(op):
+    ser = cudf.Series(range(5))
+    with pytest.warns(FutureWarning):
+        result = getattr(ser, op)(dtype=np.dtype(np.int8))
+    assert isinstance(result, np.int8)