diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8c9b61a7364..6db093b2dbb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -8,12 +8,12 @@ repos:
       - id: check-yaml
   # isort should run before black as black sometimes tweaks the isort output
   - repo: https://github.com/PyCQA/isort
-    rev: 5.9.3
+    rev: 5.10.1
     hooks:
       - id: isort
   # https://github.com/python/black#version-control-integration
   - repo: https://github.com/psf/black
-    rev: 21.9b0
+    rev: 21.10b0
     hooks:
       - id: black
       - id: black-jupyter
@@ -22,8 +22,8 @@ repos:
     hooks:
       - id: blackdoc
         exclude: "generate_reductions.py"
-  - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.2
+  - repo: https://github.com/PyCQA/flake8
+    rev: 4.0.1
     hooks:
       - id: flake8
   # - repo: https://github.com/Carreau/velin
diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
index 0d9ce0d51a3..26738e2d357 100644
--- a/asv_bench/asv.conf.json
+++ b/asv_bench/asv.conf.json
@@ -62,7 +62,7 @@
         "pandas": [""],
         "netcdf4": [""],
         "scipy": [""],
-        "bottleneck": ["", null],
+        "bottleneck": [""],
         "dask": [""],
         "distributed": [""],
         "flox": [""],
diff --git a/asv_bench/benchmarks/dataarray_missing.py b/asv_bench/benchmarks/dataarray_missing.py
index f89fe7f8eb9..d786c04e852 100644
--- a/asv_bench/benchmarks/dataarray_missing.py
+++ b/asv_bench/benchmarks/dataarray_missing.py
@@ -16,13 +16,6 @@ def make_bench_data(shape, frac_nan, chunks):
     return da
 
 
-def requires_bottleneck():
-    try:
-        import bottleneck  # noqa: F401
-    except ImportError:
-        raise NotImplementedError()
-
-
 class DataArrayMissingInterpolateNA:
     def setup(self, shape, chunks, limit):
         if chunks is not None:
@@ -46,7 +39,6 @@ def time_interpolate_na(self, shape, chunks, limit):
 
 class DataArrayMissingBottleneck:
     def setup(self, shape, chunks, limit):
-        requires_bottleneck()
         if chunks is not None:
             requires_dask()
         self.da = make_bench_data(shape, 0.1, chunks)
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
index f0e18bf2153..1d3713f19bf 100644
--- a/asv_bench/benchmarks/rolling.py
+++ b/asv_bench/benchmarks/rolling.py
@@ -36,29 +36,45 @@ def setup(self, *args, **kwargs):
             randn_long, dims="x", coords={"x": np.arange(long_nx) * 0.1}
         )
 
-    @parameterized(["func", "center"], (["mean", "count"], [True, False]))
-    def time_rolling(self, func, center):
-        getattr(self.ds.rolling(x=window, center=center), func)().load()
-
-    @parameterized(["func", "pandas"], (["mean", "count"], [True, False]))
-    def time_rolling_long(self, func, pandas):
+    @parameterized(
+        ["func", "center", "use_bottleneck"],
+        (["mean", "count"], [True, False], [True, False]),
+    )
+    def time_rolling(self, func, center, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            getattr(self.ds.rolling(x=window, center=center), func)().load()
+
+    @parameterized(
+        ["func", "pandas", "use_bottleneck"],
+        (["mean", "count"], [True, False], [True, False]),
+    )
+    def time_rolling_long(self, func, pandas, use_bottleneck):
         if pandas:
             se = self.da_long.to_series()
             getattr(se.rolling(window=window, min_periods=window), func)()
         else:
-            getattr(self.da_long.rolling(x=window, min_periods=window), func)().load()
-
-    @parameterized(["window_", "min_periods"], ([20, 40], [5, 5]))
-    def time_rolling_np(self, window_, min_periods):
-        self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce(
-            getattr(np, "nansum")
-        ).load()
-
-    @parameterized(["center", "stride"], ([True, False], [1, 1]))
-    def time_rolling_construct(self, center, stride):
-        self.ds.rolling(x=window, center=center).construct(
-            "window_dim", stride=stride
-        ).sum(dim="window_dim").load()
+            with xr.set_options(use_bottleneck=use_bottleneck):
+                getattr(
+                    self.da_long.rolling(x=window, min_periods=window), func
+                )().load()
+
+    @parameterized(
+        ["window_", "min_periods", "use_bottleneck"], ([20, 40], [5, 5], [True, False])
+    )
+    def time_rolling_np(self, window_, min_periods, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce(
+                getattr(np, "nansum")
+            ).load()
+
+    @parameterized(
+        ["center", "stride", "use_bottleneck"], ([True, False], [1, 1], [True, False])
+    )
+    def time_rolling_construct(self, center, stride, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            self.ds.rolling(x=window, center=center).construct(
+                "window_dim", stride=stride
+            ).sum(dim="window_dim").load()
 
 
 class RollingDask(Rolling):
@@ -87,24 +103,28 @@ def setup(self, *args, **kwargs):
 
 
 class DataArrayRollingMemory(RollingMemory):
-    @parameterized("func", ["sum", "max", "mean"])
-    def peakmem_ndrolling_reduce(self, func):
-        roll = self.ds.var1.rolling(x=10, y=4)
-        getattr(roll, func)()
+    @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False]))
+    def peakmem_ndrolling_reduce(self, func, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            roll = self.ds.var1.rolling(x=10, y=4)
+            getattr(roll, func)()
 
-    @parameterized("func", ["sum", "max", "mean"])
-    def peakmem_1drolling_reduce(self, func):
-        roll = self.ds.var3.rolling(t=100)
-        getattr(roll, func)()
+    @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False]))
+    def peakmem_1drolling_reduce(self, func, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            roll = self.ds.var3.rolling(t=100)
+            getattr(roll, func)()
 
 
 class DatasetRollingMemory(RollingMemory):
-    @parameterized("func", ["sum", "max", "mean"])
-    def peakmem_ndrolling_reduce(self, func):
-        roll = self.ds.rolling(x=10, y=4)
-        getattr(roll, func)()
-
-    @parameterized("func", ["sum", "max", "mean"])
-    def peakmem_1drolling_reduce(self, func):
-        roll = self.ds.rolling(t=100)
-        getattr(roll, func)()
+    @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False]))
+    def peakmem_ndrolling_reduce(self, func, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            roll = self.ds.rolling(x=10, y=4)
+            getattr(roll, func)()
+
+    @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False]))
+    def peakmem_1drolling_reduce(self, func, use_bottleneck):
+        with xr.set_options(use_bottleneck=use_bottleneck):
+            roll = self.ds.rolling(t=100)
+            getattr(roll, func)()
diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst
index fc3c457308f..a4ba606feeb 100644
--- a/doc/user-guide/computation.rst
+++ b/doc/user-guide/computation.rst
@@ -107,6 +107,8 @@ Xarray also provides the ``max_gap`` keyword argument to limit the interpolation
 data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na`
 for more.
 
+.. _agg:
+
 Aggregation
 ===========
 
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index f499cbe3d21..b66c99d0bcb 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -36,6 +36,8 @@ Bug fixes
 ~~~~~~~~~
 - Fix plot.line crash for data of shape ``(1, N)`` in _title_for_slice on format_item (:pull:`5948`).
   By `Sebastian Weigand <https://github.com/s-weigand>`_.
+- Fix a regression in the removal of duplicate backend entrypoints (:issue:`5944`, :pull:`5959`)
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 
 Documentation
 ~~~~~~~~~~~~~
@@ -49,6 +51,10 @@ Documentation
 Internal Changes
 ~~~~~~~~~~~~~~~~
 
+- Use ``importlib`` to replace functionality of ``pkg_resources`` in
+  backend plugins tests. (:pull:`5959`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
+
 
 .. _whats-new.0.20.1:
 
diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py
index 32013f1f298..0a9ffcbda22 100644
--- a/xarray/backends/plugins.py
+++ b/xarray/backends/plugins.py
@@ -23,15 +23,17 @@ def remove_duplicates(entrypoints):
     # check if there are multiple entrypoints for the same name
     unique_entrypoints = []
     for name, matches in entrypoints_grouped:
-        matches = list(matches)
+        # remove equal entrypoints
+        matches = list(set(matches))
         unique_entrypoints.append(matches[0])
         matches_len = len(matches)
         if matches_len > 1:
-            selected_module_name = matches[0].module_name
-            all_module_names = [e.module_name for e in matches]
+            all_module_names = [e.value.split(":")[0] for e in matches]
+            selected_module_name = all_module_names[0]
             warnings.warn(
                 f"Found {matches_len} entrypoints for the engine name {name}:"
-                f"\n {all_module_names}.\n It will be used: {selected_module_name}.",
+                f"\n {all_module_names}.\n "
+                f"The entrypoint {selected_module_name} will be used.",
                 RuntimeWarning,
             )
     return unique_entrypoints
diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py
index ef27413fb5b..c56e76cf5d3 100644
--- a/xarray/core/_reductions.py
+++ b/xarray/core/_reductions.py
@@ -1,19 +1,15 @@
 """Mixin classes with reduction operations."""
 # This file was generated using xarray.util.generate_reductions. Do not edit manually.
 
-import sys
-from typing import Any, Callable, Hashable, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Any, Callable, Hashable, Optional, Sequence, Union
 
 from . import duck_array_ops
 from .options import OPTIONS
-from .types import T_DataArray, T_Dataset
 from .utils import contains_only_dask_or_numpy
 
-if sys.version_info >= (3, 8):
-    from typing import Protocol
-else:
-    from typing_extensions import Protocol
-
+if TYPE_CHECKING:
+    from .dataarray import DataArray
+    from .dataset import Dataset
 
 try:
     import flox
@@ -21,85 +17,27 @@
     flox = None
 
 
-class DatasetReduce(Protocol):
-    def reduce(
-        self,
-        func: Callable[..., Any],
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        axis: Union[None, int, Sequence[int]] = None,
-        keep_attrs: bool = None,
-        keepdims: bool = False,
-        **kwargs: Any,
-    ) -> T_Dataset:
-        ...
-
-
-class DatasetGroupByReduce(Protocol):
-    _obj: T_Dataset
-
-    def reduce(
-        self,
-        func: Callable[..., Any],
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        axis: Union[None, int, Sequence[int]] = None,
-        keep_attrs: bool = None,
-        keepdims: bool = False,
-        **kwargs: Any,
-    ) -> T_Dataset:
-        ...
-
-    def _flox_reduce(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]],
-        **kwargs,
-    ) -> T_Dataset:
-        ...
-
-
-class DataArrayReduce(Protocol):
-    def reduce(
-        self,
-        func: Callable[..., Any],
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        axis: Union[None, int, Sequence[int]] = None,
-        keep_attrs: bool = None,
-        keepdims: bool = False,
-        **kwargs: Any,
-    ) -> T_DataArray:
-        ...
-
-
-class DataArrayGroupByReduce(Protocol):
-    _obj: T_DataArray
+class DatasetReductions:
+    __slots__ = ()
 
     def reduce(
         self,
         func: Callable[..., Any],
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
         axis: Union[None, int, Sequence[int]] = None,
         keep_attrs: bool = None,
         keepdims: bool = False,
         **kwargs: Any,
-    ) -> T_DataArray:
-        ...
-
-    def _flox_reduce(
-        self,
-        dim: Union[None, Hashable, Sequence[Hashable]],
-        **kwargs,
-    ) -> T_DataArray:
-        ...
-
-
-class DatasetReductions:
-    __slots__ = ()
+    ) -> "Dataset":
+        raise NotImplementedError()
 
     def count(
-        self: DatasetReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``count`` along some dimension(s).
 
@@ -166,11 +104,11 @@ def count(
         )
 
     def all(
-        self: DatasetReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``all`` along some dimension(s).
 
@@ -237,11 +175,11 @@ def all(
         )
 
     def any(
-        self: DatasetReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``any`` along some dimension(s).
 
@@ -308,12 +246,12 @@ def any(
         )
 
     def max(
-        self: DatasetReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``max`` along some dimension(s).
 
@@ -394,12 +332,12 @@ def max(
         )
 
     def min(
-        self: DatasetReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``min`` along some dimension(s).
 
@@ -480,12 +418,12 @@ def min(
         )
 
     def mean(
-        self: DatasetReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``mean`` along some dimension(s).
 
@@ -570,13 +508,13 @@ def mean(
         )
 
     def prod(
-        self: DatasetReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``prod`` along some dimension(s).
 
@@ -676,13 +614,13 @@ def prod(
         )
 
     def sum(
-        self: DatasetReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``sum`` along some dimension(s).
 
@@ -782,13 +720,13 @@ def sum(
         )
 
     def std(
-        self: DatasetReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``std`` along some dimension(s).
 
@@ -885,13 +823,13 @@ def std(
         )
 
     def var(
-        self: DatasetReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``var`` along some dimension(s).
 
@@ -988,12 +926,12 @@ def var(
         )
 
     def median(
-        self: DatasetReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``median`` along some dimension(s).
 
@@ -1081,12 +1019,24 @@ def median(
 class DataArrayReductions:
     __slots__ = ()
 
+    def reduce(
+        self,
+        func: Callable[..., Any],
+        dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
+        axis: Union[None, int, Sequence[int]] = None,
+        keep_attrs: bool = None,
+        keepdims: bool = False,
+        **kwargs: Any,
+    ) -> "DataArray":
+        raise NotImplementedError()
+
     def count(
-        self: DataArrayReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``count`` along some dimension(s).
 
@@ -1147,11 +1097,11 @@ def count(
         )
 
     def all(
-        self: DataArrayReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``all`` along some dimension(s).
 
@@ -1212,11 +1162,11 @@ def all(
         )
 
     def any(
-        self: DataArrayReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``any`` along some dimension(s).
 
@@ -1277,12 +1227,12 @@ def any(
         )
 
     def max(
-        self: DataArrayReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``max`` along some dimension(s).
 
@@ -1355,12 +1305,12 @@ def max(
         )
 
     def min(
-        self: DataArrayReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``min`` along some dimension(s).
 
@@ -1433,12 +1383,12 @@ def min(
         )
 
     def mean(
-        self: DataArrayReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``mean`` along some dimension(s).
 
@@ -1515,13 +1465,13 @@ def mean(
         )
 
     def prod(
-        self: DataArrayReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``prod`` along some dimension(s).
 
@@ -1611,13 +1561,13 @@ def prod(
         )
 
     def sum(
-        self: DataArrayReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``sum`` along some dimension(s).
 
@@ -1707,13 +1657,13 @@ def sum(
         )
 
     def std(
-        self: DataArrayReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``std`` along some dimension(s).
 
@@ -1800,13 +1750,13 @@ def std(
         )
 
     def var(
-        self: DataArrayReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``var`` along some dimension(s).
 
@@ -1893,12 +1843,12 @@ def var(
         )
 
     def median(
-        self: DataArrayReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``median`` along some dimension(s).
 
@@ -1976,14 +1926,33 @@ def median(
 
 
 class DatasetGroupByReductions:
-    __slots__ = ()
+    _obj: "Dataset"
+
+    def reduce(
+        self,
+        func: Callable[..., Any],
+        dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
+        axis: Union[None, int, Sequence[int]] = None,
+        keep_attrs: bool = None,
+        keepdims: bool = False,
+        **kwargs: Any,
+    ) -> "Dataset":
+        raise NotImplementedError()
+
+    def _flox_reduce(
+        self,
+        dim: Union[None, Hashable, Sequence[Hashable]],
+        **kwargs,
+    ) -> "Dataset":
+        raise NotImplementedError()
 
     def count(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``count`` along some dimension(s).
 
@@ -2067,11 +2036,11 @@ def count(
             )
 
     def all(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``all`` along some dimension(s).
 
@@ -2155,11 +2124,11 @@ def all(
             )
 
     def any(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``any`` along some dimension(s).
 
@@ -2243,12 +2212,12 @@ def any(
             )
 
     def max(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``max`` along some dimension(s).
 
@@ -2349,12 +2318,12 @@ def max(
             )
 
     def min(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``min`` along some dimension(s).
 
@@ -2455,12 +2424,12 @@ def min(
             )
 
     def mean(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``mean`` along some dimension(s).
 
@@ -2565,13 +2534,13 @@ def mean(
             )
 
     def prod(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``prod`` along some dimension(s).
 
@@ -2694,13 +2663,13 @@ def prod(
             )
 
     def sum(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``sum`` along some dimension(s).
 
@@ -2823,13 +2792,13 @@ def sum(
             )
 
     def std(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``std`` along some dimension(s).
 
@@ -2949,13 +2918,13 @@ def std(
             )
 
     def var(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``var`` along some dimension(s).
 
@@ -3075,12 +3044,12 @@ def var(
             )
 
     def median(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``median`` along some dimension(s).
 
@@ -3170,14 +3139,33 @@ def median(
 
 
 class DatasetResampleReductions:
-    __slots__ = ()
+    _obj: "Dataset"
+
+    def reduce(
+        self,
+        func: Callable[..., Any],
+        dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
+        axis: Union[None, int, Sequence[int]] = None,
+        keep_attrs: bool = None,
+        keepdims: bool = False,
+        **kwargs: Any,
+    ) -> "Dataset":
+        raise NotImplementedError()
+
+    def _flox_reduce(
+        self,
+        dim: Union[None, Hashable, Sequence[Hashable]],
+        **kwargs,
+    ) -> "Dataset":
+        raise NotImplementedError()
 
     def count(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``count`` along some dimension(s).
 
@@ -3261,11 +3249,11 @@ def count(
             )
 
     def all(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``all`` along some dimension(s).
 
@@ -3349,11 +3337,11 @@ def all(
             )
 
     def any(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``any`` along some dimension(s).
 
@@ -3437,12 +3425,12 @@ def any(
             )
 
     def max(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``max`` along some dimension(s).
 
@@ -3543,12 +3531,12 @@ def max(
             )
 
     def min(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``min`` along some dimension(s).
 
@@ -3649,12 +3637,12 @@ def min(
             )
 
     def mean(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``mean`` along some dimension(s).
 
@@ -3759,13 +3747,13 @@ def mean(
             )
 
     def prod(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``prod`` along some dimension(s).
 
@@ -3888,13 +3876,13 @@ def prod(
             )
 
     def sum(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``sum`` along some dimension(s).
 
@@ -4017,13 +4005,13 @@ def sum(
             )
 
     def std(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``std`` along some dimension(s).
 
@@ -4143,13 +4131,13 @@ def std(
             )
 
     def var(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``var`` along some dimension(s).
 
@@ -4269,12 +4257,12 @@ def var(
             )
 
     def median(
-        self: DatasetGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_Dataset:
+    ) -> "Dataset":
         """
         Reduce this Dataset's data by applying ``median`` along some dimension(s).
 
@@ -4364,14 +4352,33 @@ def median(
 
 
 class DataArrayGroupByReductions:
-    __slots__ = ()
+    _obj: "DataArray"
+
+    def reduce(
+        self,
+        func: Callable[..., Any],
+        dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
+        axis: Union[None, int, Sequence[int]] = None,
+        keep_attrs: bool = None,
+        keepdims: bool = False,
+        **kwargs: Any,
+    ) -> "DataArray":
+        raise NotImplementedError()
+
+    def _flox_reduce(
+        self,
+        dim: Union[None, Hashable, Sequence[Hashable]],
+        **kwargs,
+    ) -> "DataArray":
+        raise NotImplementedError()
 
     def count(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``count`` along some dimension(s).
 
@@ -4448,11 +4455,11 @@ def count(
             )
 
     def all(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``all`` along some dimension(s).
 
@@ -4529,11 +4536,11 @@ def all(
             )
 
     def any(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``any`` along some dimension(s).
 
@@ -4610,12 +4617,12 @@ def any(
             )
 
     def max(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``max`` along some dimension(s).
 
@@ -4707,12 +4714,12 @@ def max(
             )
 
     def min(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``min`` along some dimension(s).
 
@@ -4804,12 +4811,12 @@ def min(
             )
 
     def mean(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``mean`` along some dimension(s).
 
@@ -4905,13 +4912,13 @@ def mean(
             )
 
     def prod(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``prod`` along some dimension(s).
 
@@ -5023,13 +5030,13 @@ def prod(
             )
 
     def sum(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``sum`` along some dimension(s).
 
@@ -5141,13 +5148,13 @@ def sum(
             )
 
     def std(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``std`` along some dimension(s).
 
@@ -5256,13 +5263,13 @@ def std(
             )
 
     def var(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``var`` along some dimension(s).
 
@@ -5371,12 +5378,12 @@ def var(
             )
 
     def median(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``median`` along some dimension(s).
 
@@ -5458,14 +5465,33 @@ def median(
 
 
 class DataArrayResampleReductions:
-    __slots__ = ()
+    _obj: "DataArray"
+
+    def reduce(
+        self,
+        func: Callable[..., Any],
+        dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
+        axis: Union[None, int, Sequence[int]] = None,
+        keep_attrs: bool = None,
+        keepdims: bool = False,
+        **kwargs: Any,
+    ) -> "DataArray":
+        raise NotImplementedError()
+
+    def _flox_reduce(
+        self,
+        dim: Union[None, Hashable, Sequence[Hashable]],
+        **kwargs,
+    ) -> "DataArray":
+        raise NotImplementedError()
 
     def count(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``count`` along some dimension(s).
 
@@ -5542,11 +5568,11 @@ def count(
             )
 
     def all(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``all`` along some dimension(s).
 
@@ -5623,11 +5649,11 @@ def all(
             )
 
     def any(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``any`` along some dimension(s).
 
@@ -5704,12 +5730,12 @@ def any(
             )
 
     def max(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``max`` along some dimension(s).
 
@@ -5801,12 +5827,12 @@ def max(
             )
 
     def min(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``min`` along some dimension(s).
 
@@ -5898,12 +5924,12 @@ def min(
             )
 
     def mean(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``mean`` along some dimension(s).
 
@@ -5999,13 +6025,13 @@ def mean(
             )
 
     def prod(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``prod`` along some dimension(s).
 
@@ -6117,13 +6143,13 @@ def prod(
             )
 
     def sum(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         min_count: Optional[int] = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``sum`` along some dimension(s).
 
@@ -6235,13 +6261,13 @@ def sum(
             )
 
     def std(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``std`` along some dimension(s).
 
@@ -6350,13 +6376,13 @@ def std(
             )
 
     def var(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``var`` along some dimension(s).
 
@@ -6465,12 +6491,12 @@ def var(
             )
 
     def median(
-        self: DataArrayGroupByReduce,
+        self,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = None,
         keep_attrs: bool = None,
         **kwargs,
-    ) -> T_DataArray:
+    ) -> "DataArray":
         """
         Reduce this DataArray's data by applying ``median`` along some dimension(s).
 
diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py
index 814e9a59877..bf8d6ccaeb6 100644
--- a/xarray/core/arithmetic.py
+++ b/xarray/core/arithmetic.py
@@ -105,7 +105,6 @@ class VariableArithmetic(
 
 class DatasetArithmetic(
     ImplementsDatasetReduce,
-    IncludeReduceMethods,
     IncludeCumMethods,
     SupportsArithmetic,
     DatasetOpsMixin,
@@ -116,7 +115,6 @@ class DatasetArithmetic(
 
 class DataArrayArithmetic(
     ImplementsArrayReduce,
-    IncludeReduceMethods,
     IncludeCumMethods,
     IncludeNumpySameMethods,
     SupportsArithmetic,
diff --git a/xarray/core/common.py b/xarray/core/common.py
index b5dc3bf0e20..2300f3dd8f5 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -60,12 +60,14 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
         if include_skipna:
 
             def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs):
-                return self.reduce(func, dim, axis, skipna=skipna, **kwargs)
+                return self.reduce(
+                    func=func, dim=dim, axis=axis, skipna=skipna, **kwargs
+                )
 
         else:
 
             def wrapped_func(self, dim=None, axis=None, **kwargs):  # type: ignore[misc]
-                return self.reduce(func, dim, axis, **kwargs)
+                return self.reduce(func=func, dim=dim, axis=axis, **kwargs)
 
         return wrapped_func
 
@@ -98,13 +100,19 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool
 
             def wrapped_func(self, dim=None, skipna=None, **kwargs):
                 return self.reduce(
-                    func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs
+                    func=func,
+                    dim=dim,
+                    skipna=skipna,
+                    numeric_only=numeric_only,
+                    **kwargs,
                 )
 
         else:
 
             def wrapped_func(self, dim=None, **kwargs):  # type: ignore[misc]
-                return self.reduce(func, dim, numeric_only=numeric_only, **kwargs)
+                return self.reduce(
+                    func=func, dim=dim, numeric_only=numeric_only, **kwargs
+                )
 
         return wrapped_func
 
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 24e5f5736b0..1b96f22b744 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -35,6 +35,7 @@
     utils,
     weighted,
 )
+from ._reductions import DataArrayReductions
 from .accessor_dt import CombinedDatetimelikeAccessor
 from .accessor_str import StringAccessor
 from .alignment import (
@@ -215,7 +216,9 @@ def __setitem__(self, key, value) -> None:
 _THIS_ARRAY = ReprObject("<this-array>")
 
 
-class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic):
+class DataArray(
+    AbstractArray, DataWithCoords, DataArrayArithmetic, DataArrayReductions
+):
     """N-dimensional array with labeled coordinates and dimensions.
 
     DataArray provides a wrapper around numpy ndarrays that uses
@@ -2652,6 +2655,7 @@ def reduce(
         self,
         func: Callable[..., Any],
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
         axis: Union[None, int, Sequence[int]] = None,
         keep_attrs: bool = None,
         keepdims: bool = False,
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index e882495dce5..cf52fed6974 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -50,6 +50,7 @@
     utils,
     weighted,
 )
+from ._reductions import DatasetReductions
 from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align
 from .arithmetic import DatasetArithmetic
 from .common import DataWithCoords, _contains_datetime_like_objects, get_chunksizes
@@ -574,7 +575,7 @@ def __setitem__(self, key, value) -> None:
         self.dataset[pos_indexers] = value
 
 
-class Dataset(DataWithCoords, DatasetArithmetic, Mapping):
+class Dataset(DataWithCoords, DatasetReductions, DatasetArithmetic, Mapping):
     """A multi-dimensional, in memory, array database.
 
     A dataset resembles an in-memory representation of a NetCDF file,
@@ -4999,6 +5000,7 @@ def reduce(
         self,
         func: Callable,
         dim: Union[Hashable, Iterable[Hashable]] = None,
+        *,
         keep_attrs: bool = None,
         keepdims: bool = False,
         numeric_only: bool = False,
@@ -5034,7 +5036,7 @@ def reduce(
             Dataset with this object's DataArrays replaced with new DataArrays
             of summarized data and the indicated dimension(s) removed.
         """
-        if "axis" in kwargs:
+        if kwargs.get("axis", None) is not None:
             raise ValueError(
                 "passing 'axis' to Dataset reduce methods is ambiguous."
                 " Please use 'dim' instead."
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 7f8f9802b59..8c0bde3a4f9 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -1,5 +1,6 @@
 import datetime
 import warnings
+from typing import Any, Callable, Hashable, Sequence, Union
 
 import numpy as np
 import pandas as pd
@@ -932,7 +933,15 @@ def _combine(self, applied, shortcut=False):
         return combined
 
     def reduce(
-        self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs
+        self,
+        func: Callable[..., Any],
+        dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
+        axis: Union[None, int, Sequence[int]] = None,
+        keep_attrs: bool = None,
+        keepdims: bool = False,
+        shortcut: bool = True,
+        **kwargs: Any,
     ):
         """Reduce the items in this group by applying `func` along some
         dimension(s).
@@ -965,11 +974,15 @@ def reduce(
         if dim is None:
             dim = self._group_dim
 
-        if keep_attrs is None:
-            keep_attrs = _get_keep_attrs(default=False)
-
         def reduce_array(ar):
-            return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs)
+            return ar.reduce(
+                func=func,
+                dim=dim,
+                axis=axis,
+                keep_attrs=keep_attrs,
+                keepdims=keepdims,
+                **kwargs,
+            )
 
         check_reduce_dims(dim, self.dims)
 
@@ -1047,7 +1060,16 @@ def _combine(self, applied):
         combined = self._maybe_unstack(combined)
         return combined
 
-    def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
+    def reduce(
+        self,
+        func: Callable[..., Any],
+        dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
+        axis: Union[None, int, Sequence[int]] = None,
+        keep_attrs: bool = None,
+        keepdims: bool = False,
+        **kwargs: Any,
+    ):
         """Reduce the items in this group by applying `func` along some
         dimension(s).
 
@@ -1079,11 +1101,15 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
         if dim is None:
             dim = self._group_dim
 
-        if keep_attrs is None:
-            keep_attrs = _get_keep_attrs(default=False)
-
         def reduce_dataset(ds):
-            return ds.reduce(func, dim, keep_attrs, **kwargs)
+            return ds.reduce(
+                func=func,
+                dim=dim,
+                axis=axis,
+                keep_attrs=keep_attrs,
+                keepdims=keepdims,
+                **kwargs,
+            )
 
         check_reduce_dims(dim, self.dims)
 
diff --git a/xarray/core/resample.py b/xarray/core/resample.py
index e2f599e8b4e..ed665ad4048 100644
--- a/xarray/core/resample.py
+++ b/xarray/core/resample.py
@@ -1,4 +1,5 @@
 import warnings
+from typing import Any, Callable, Hashable, Sequence, Union
 
 from ._reductions import DataArrayResampleReductions, DatasetResampleReductions
 from .groupby import DataArrayGroupByBase, DatasetGroupByBase
@@ -157,7 +158,7 @@ def _interpolate(self, kind="linear"):
         )
 
 
-class DataArrayResample(DataArrayResampleReductions, DataArrayGroupByBase, Resample):
+class DataArrayResample(DataArrayGroupByBase, DataArrayResampleReductions, Resample):
     """DataArrayGroupBy object specialized to time resampling operations over a
     specified dimension
     """
@@ -248,7 +249,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs):
         return self.map(func=func, shortcut=shortcut, args=args, **kwargs)
 
 
-class DatasetResample(DatasetResampleReductions, DatasetGroupByBase, Resample):
+class DatasetResample(DatasetGroupByBase, DatasetResampleReductions, Resample):
     """DatasetGroupBy object specialized to resampling a specified dimension"""
 
     def __init__(self, *args, dim=None, resample_dim=None, **kwargs):
@@ -316,7 +317,16 @@ def apply(self, func, args=(), shortcut=None, **kwargs):
         )
         return self.map(func=func, shortcut=shortcut, args=args, **kwargs)
 
-    def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
+    def reduce(
+        self,
+        func: Callable[..., Any],
+        dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
+        axis: Union[None, int, Sequence[int]] = None,
+        keep_attrs: bool = None,
+        keepdims: bool = False,
+        **kwargs: Any,
+    ):
         """Reduce the items in this group by applying `func` along the
         pre-defined resampling dimension.
 
@@ -341,4 +351,11 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
             Array with summarized data and the indicated dimension(s)
             removed.
         """
-        return super().reduce(func, dim, keep_attrs, **kwargs)
+        return super().reduce(
+            func=func,
+            dim=dim,
+            axis=axis,
+            keep_attrs=keep_attrs,
+            keepdims=keepdims,
+            **kwargs,
+        )
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index c032a781e47..392597f1bda 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -676,87 +676,6 @@ def test_multiple_dims(dtype, dask, skipna, func):
     assert_allclose(actual, expected)
 
 
-def test_docs():
-    # with min_count
-    actual = DataArray.sum.__doc__
-    expected = dedent(
-        """\
-        Reduce this DataArray's data by applying `sum` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : str or sequence of str, optional
-            Dimension(s) over which to apply `sum`.
-        axis : int or sequence of int, optional
-            Axis(es) over which to apply `sum`. Only one of the 'dim'
-            and 'axis' arguments can be supplied. If neither are supplied, then
-            `sum` is calculated over axes.
-        skipna : bool, optional
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or skipna=True has not been
-            implemented (object, datetime64 or timedelta64).
-        min_count : int, default: None
-            The required number of valid values to perform the operation. If
-            fewer than min_count non-NA values are present the result will be
-            NA. Only used if skipna is set to True or defaults to True for the
-            array's dtype. New in version 0.10.8: Added with the default being
-            None. Changed in version 0.17.0: if specified on an integer array
-            and skipna=True, the result will be a float array.
-        keep_attrs : bool, optional
-            If True, the attributes (`attrs`) will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating `sum` on this object's data.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray object with `sum` applied to its data and the
-            indicated dimension(s) removed.
-        """
-    )
-    assert actual == expected
-
-    # without min_count
-    actual = DataArray.std.__doc__
-    expected = dedent(
-        """\
-        Reduce this DataArray's data by applying `std` along some dimension(s).
-
-        Parameters
-        ----------
-        dim : str or sequence of str, optional
-            Dimension(s) over which to apply `std`.
-        axis : int or sequence of int, optional
-            Axis(es) over which to apply `std`. Only one of the 'dim'
-            and 'axis' arguments can be supplied. If neither are supplied, then
-            `std` is calculated over axes.
-        skipna : bool, optional
-            If True, skip missing values (as marked by NaN). By default, only
-            skips missing values for float dtypes; other dtypes either do not
-            have a sentinel missing value (int) or skipna=True has not been
-            implemented (object, datetime64 or timedelta64).
-        keep_attrs : bool, optional
-            If True, the attributes (`attrs`) will be copied from the original
-            object to the new one.  If False (default), the new object will be
-            returned without attributes.
-        **kwargs : dict
-            Additional keyword arguments passed on to the appropriate array
-            function for calculating `std` on this object's data.
-
-        Returns
-        -------
-        reduced : DataArray
-            New DataArray object with `std` applied to its data and the
-            indicated dimension(s) removed.
-        """
-    )
-    assert actual == expected
-
-
 def test_datetime_to_numeric_datetime64():
     times = pd.date_range("2000", periods=5, freq="7D").values
     result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h")
diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py
index 7f77a677d6d..4d1eee6363d 100644
--- a/xarray/tests/test_plugins.py
+++ b/xarray/tests/test_plugins.py
@@ -1,10 +1,20 @@
+import sys
 from unittest import mock
 
-import pkg_resources
 import pytest
 
 from xarray.backends import common, plugins
 
+if sys.version_info >= (3, 8):
+    from importlib.metadata import EntryPoint
+
+    importlib_metadata_mock = "importlib.metadata"
+else:
+    # if the fallback library is missing, we are doomed.
+    from importlib_metadata import EntryPoint
+
+    importlib_metadata_mock = "importlib_metadata"
+
 
 class DummyBackendEntrypointArgs(common.BackendEntrypoint):
     def open_dataset(filename_or_obj, *args):
@@ -29,12 +39,12 @@ def open_dataset(self, filename_or_obj, *, decoder):
 @pytest.fixture
 def dummy_duplicated_entrypoints():
     specs = [
-        "engine1 = xarray.tests.test_plugins:backend_1",
-        "engine1 = xarray.tests.test_plugins:backend_2",
-        "engine2 = xarray.tests.test_plugins:backend_1",
-        "engine2 = xarray.tests.test_plugins:backend_2",
+        ["engine1", "xarray.tests.test_plugins:backend_1", "xarray.backends"],
+        ["engine1", "xarray.tests.test_plugins:backend_2", "xarray.backends"],
+        ["engine2", "xarray.tests.test_plugins:backend_1", "xarray.backends"],
+        ["engine2", "xarray.tests.test_plugins:backend_2", "xarray.backends"],
     ]
-    eps = [pkg_resources.EntryPoint.parse(spec) for spec in specs]
+    eps = [EntryPoint(name, value, group) for name, value, group in specs]
     return eps
 
 
@@ -46,8 +56,10 @@ def test_remove_duplicates(dummy_duplicated_entrypoints) -> None:
 
 
 def test_broken_plugin() -> None:
-    broken_backend = pkg_resources.EntryPoint.parse(
-        "broken_backend = xarray.tests.test_plugins:backend_1"
+    broken_backend = EntryPoint(
+        "broken_backend",
+        "xarray.tests.test_plugins:backend_1",
+        "xarray.backends",
     )
     with pytest.warns(RuntimeWarning) as record:
         _ = plugins.build_engines([broken_backend])
@@ -68,13 +80,15 @@ def test_remove_duplicates_warnings(dummy_duplicated_entrypoints) -> None:
     assert "entrypoints" in message1
 
 
-@mock.patch("pkg_resources.EntryPoint.load", mock.MagicMock(return_value=None))
+@mock.patch(
+    f"{importlib_metadata_mock}.EntryPoint.load", mock.MagicMock(return_value=None)
+)
 def test_backends_dict_from_pkg() -> None:
     specs = [
-        "engine1 = xarray.tests.test_plugins:backend_1",
-        "engine2 = xarray.tests.test_plugins:backend_2",
+        ["engine1", "xarray.tests.test_plugins:backend_1", "xarray.backends"],
+        ["engine2", "xarray.tests.test_plugins:backend_2", "xarray.backends"],
     ]
-    entrypoints = [pkg_resources.EntryPoint.parse(spec) for spec in specs]
+    entrypoints = [EntryPoint(name, value, group) for name, value, group in specs]
     engines = plugins.backends_dict_from_pkg(entrypoints)
     assert len(engines) == 2
     assert engines.keys() == set(("engine1", "engine2"))
@@ -114,12 +128,12 @@ def test_set_missing_parameters_raise_error() -> None:
 
 
 @mock.patch(
-    "pkg_resources.EntryPoint.load",
+    f"{importlib_metadata_mock}.EntryPoint.load",
     mock.MagicMock(return_value=DummyBackendEntrypoint1),
 )
 def test_build_engines() -> None:
-    dummy_pkg_entrypoint = pkg_resources.EntryPoint.parse(
-        "cfgrib = xarray.tests.test_plugins:backend_1"
+    dummy_pkg_entrypoint = EntryPoint(
+        "cfgrib", "xarray.tests.test_plugins:backend_1", "xarray_backends"
     )
     backend_entrypoints = plugins.build_engines([dummy_pkg_entrypoint])
 
@@ -131,17 +145,13 @@ def test_build_engines() -> None:
 
 
 @mock.patch(
-    "pkg_resources.EntryPoint.load",
+    f"{importlib_metadata_mock}.EntryPoint.load",
     mock.MagicMock(return_value=DummyBackendEntrypoint1),
 )
 def test_build_engines_sorted() -> None:
     dummy_pkg_entrypoints = [
-        pkg_resources.EntryPoint.parse(
-            "dummy2 = xarray.tests.test_plugins:backend_1",
-        ),
-        pkg_resources.EntryPoint.parse(
-            "dummy1 = xarray.tests.test_plugins:backend_1",
-        ),
+        EntryPoint("dummy2", "xarray.tests.test_plugins:backend_1", "xarray.backends"),
+        EntryPoint("dummy1", "xarray.tests.test_plugins:backend_1", "xarray.backends"),
     ]
     backend_entrypoints = plugins.build_engines(dummy_pkg_entrypoints)
     backend_entrypoints = list(backend_entrypoints)
diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py
index 5d4aa2145e1..db4157764ac 100644
--- a/xarray/util/generate_reductions.py
+++ b/xarray/util/generate_reductions.py
@@ -5,7 +5,7 @@
 Usage:
     python xarray/util/generate_reductions.py > xarray/core/_reductions.py
     pytest --doctest-modules xarray/core/_reductions.py --accept || true
-    pytest --doctest-modules xarray/core/_reductions.py --accept
+    pytest --doctest-modules xarray/core/_reductions.py
 
 This requires [pytest-accept](https://github.com/max-sixty/pytest-accept).
 The second run of pytest is deliberate, since the first will return an error
@@ -24,7 +24,6 @@
 
 from . import duck_array_ops
 from .options import OPTIONS
-from .types import T_DataArray, T_Dataset
 from .utils import contains_only_dask_or_numpy
 
 if TYPE_CHECKING:
@@ -36,48 +35,48 @@
 except ImportError:
     flox = None'''
 
-OBJ_PREAMBLE = """
+DEFAULT_PREAMBLE = """
+
+class {obj}{cls}Reductions:
+    __slots__ = ()
 
-class {obj}Reductions():
     def reduce(
         self,
         func: Callable[..., Any],
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
         axis: Union[None, int, Sequence[int]] = None,
         keep_attrs: bool = None,
         keepdims: bool = False,
         **kwargs: Any,
     ) -> "{obj}":
-        ...
+        raise NotImplementedError()"""
 
+GROUPBY_PREAMBLE = """
 
-class {obj}GroupByReductions():
+class {obj}{cls}Reductions:
     _obj: "{obj}"
 
     def reduce(
         self,
         func: Callable[..., Any],
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
+        *,
         axis: Union[None, int, Sequence[int]] = None,
         keep_attrs: bool = None,
         keepdims: bool = False,
         **kwargs: Any,
     ) -> "{obj}":
-        ...
+        raise NotImplementedError()
 
     def _flox_reduce(
         self,
         dim: Union[None, Hashable, Sequence[Hashable]],
         **kwargs,
     ) -> "{obj}":
-        ..."""
+        raise NotImplementedError()"""
 
 
-CLASS_PREAMBLE = """
-
-class {obj}{cls}Reductions:
-    __slots__ = ()"""
-
 TEMPLATE_REDUCTION_SIGNATURE = '''
     def {method}(
         self,
@@ -213,6 +212,7 @@ def __init__(
         docref,
         docref_description,
         example_call_preamble,
+        definition_preamble,
         see_also_obj=None,
     ):
         self.datastructure = datastructure
@@ -221,7 +221,7 @@ def __init__(
         self.docref = docref
         self.docref_description = docref_description
         self.example_call_preamble = example_call_preamble
-        self.preamble = CLASS_PREAMBLE.format(obj=datastructure.name, cls=cls)
+        self.preamble = definition_preamble.format(obj=datastructure.name, cls=cls)
         if not see_also_obj:
             self.see_also_obj = self.datastructure.name
         else:
@@ -245,7 +245,6 @@ def generate_method(self, method):
         yield TEMPLATE_REDUCTION_SIGNATURE.format(
             **template_kwargs,
             extra_kwargs=extra_kwargs,
-            self_type=self.self_type,
         )
 
         for text in [
@@ -415,6 +414,7 @@ class DataStructure:
     docref_description="reduction or aggregation operations",
     example_call_preamble="",
     see_also_obj="DataArray",
+    definition_preamble=DEFAULT_PREAMBLE,
 )
 DataArrayGenerator = GenericReductionGenerator(
     cls="",
@@ -424,6 +424,7 @@ class DataStructure:
     docref_description="reduction or aggregation operations",
     example_call_preamble="",
     see_also_obj="Dataset",
+    definition_preamble=DEFAULT_PREAMBLE,
 )
 
 DataArrayGroupByGenerator = GroupByReductionGenerator(
@@ -433,6 +434,7 @@ class DataStructure:
     docref="groupby",
     docref_description="groupby operations",
     example_call_preamble='.groupby("labels")',
+    definition_preamble=GROUPBY_PREAMBLE,
 )
 DataArrayResampleGenerator = GroupByReductionGenerator(
     cls="Resample",
@@ -441,6 +443,7 @@ class DataStructure:
     docref="resampling",
     docref_description="resampling operations",
     example_call_preamble='.resample(time="3M")',
+    definition_preamble=GROUPBY_PREAMBLE,
 )
 DatasetGroupByGenerator = GroupByReductionGenerator(
     cls="GroupBy",
@@ -449,6 +452,7 @@ class DataStructure:
     docref="groupby",
     docref_description="groupby operations",
     example_call_preamble='.groupby("labels")',
+    definition_preamble=GROUPBY_PREAMBLE,
 )
 DatasetResampleGenerator = GroupByReductionGenerator(
     cls="Resample",
@@ -457,6 +461,7 @@ class DataStructure:
     docref="resampling",
     docref_description="resampling operations",
     example_call_preamble='.resample(time="3M")',
+    definition_preamble=GROUPBY_PREAMBLE,
 )