diff --git a/doc/conf.py b/doc/conf.py
index 77387dfd965..93174c6aaec 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -147,14 +147,18 @@
     "matplotlib colormap name": ":doc:`matplotlib colormap name <matplotlib:gallery/color/colormap_reference>`",
     "matplotlib axes object": ":py:class:`matplotlib axes object <matplotlib.axes.Axes>`",
     "colormap": ":py:class:`colormap <matplotlib.colors.Colormap>`",
-    # objects without namespace
+    # objects without namespace: xarray
     "DataArray": "~xarray.DataArray",
     "Dataset": "~xarray.Dataset",
     "Variable": "~xarray.Variable",
+    "DatasetGroupBy": "~xarray.core.groupby.DatasetGroupBy",
+    "DataArrayGroupBy": "~xarray.core.groupby.DataArrayGroupBy",
+    # objects without namespace: numpy
     "ndarray": "~numpy.ndarray",
     "MaskedArray": "~numpy.ma.MaskedArray",
     "dtype": "~numpy.dtype",
     "ComplexWarning": "~numpy.ComplexWarning",
+    # objects without namespace: pandas
     "Index": "~pandas.Index",
     "MultiIndex": "~pandas.MultiIndex",
     "CategoricalIndex": "~pandas.CategoricalIndex",
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 7f60da7e1b2..0c21ca07744 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -840,7 +840,7 @@ def apply_ufunc(
         the style of NumPy universal functions [1]_ (if this is not the case,
         set ``vectorize=True``). If this function returns multiple outputs, you
         must set ``output_core_dims`` as well.
-    *args : Dataset, DataArray, GroupBy, Variable, numpy.ndarray, dask.array.Array or scalar
+    *args : Dataset, DataArray, DataArrayGroupBy, DatasetGroupBy, Variable, numpy.ndarray, dask.array.Array or scalar
         Mix of labeled and/or unlabeled arrays to which to apply the function.
     input_core_dims : sequence of sequence, optional
         List of the same length as ``args`` giving the list of core dimensions
@@ -911,16 +911,16 @@ def apply_ufunc(
         - 'allowed': pass dask arrays directly on to ``func``. Prefer this option if
           ``func`` natively supports dask arrays.
         - 'parallelized': automatically parallelize ``func`` if any of the
-          inputs are a dask array by using `dask.array.apply_gufunc`. Multiple output
+          inputs are a dask array by using :py:func:`dask.array.apply_gufunc`. Multiple output
           arguments are supported. Only use this option if ``func`` does not natively
           support dask arrays (e.g. converts them to numpy arrays).
     dask_gufunc_kwargs : dict, optional
-        Optional keyword arguments passed to ``dask.array.apply_gufunc`` if
+        Optional keyword arguments passed to :py:func:`dask.array.apply_gufunc` if
         dask='parallelized'. Possible keywords are ``output_sizes``, ``allow_rechunk``
         and ``meta``.
     output_dtypes : list of dtype, optional
         Optional list of output dtypes. Only used if ``dask='parallelized'`` or
-        vectorize=True.
+        ``vectorize=True``.
     output_sizes : dict, optional
         Optional mapping from dimension names to sizes for outputs. Only used
         if dask='parallelized' and new dimensions (not found on inputs) appear
@@ -928,7 +928,7 @@ def apply_ufunc(
         parameter. It will be removed as direct parameter in a future version.
     meta : optional
         Size-0 object representing the type of array wrapped by dask array. Passed on to
-        ``dask.array.apply_gufunc``. ``meta`` should be given in the
+        :py:func:`dask.array.apply_gufunc`. ``meta`` should be given in the
         ``dask_gufunc_kwargs`` parameter . It will be removed as direct parameter
         a future version.
 
@@ -943,7 +943,7 @@ def apply_ufunc(
     arrays. If ``func`` needs to manipulate a whole xarray object subset to each block
     it is possible to use :py:func:`xarray.map_blocks`.
 
-    Note that due to the overhead ``map_blocks`` is considerably slower than ``apply_ufunc``.
+    Note that due to the overhead :py:func:`xarray.map_blocks` is considerably slower than ``apply_ufunc``.
 
     Examples
     --------
@@ -954,7 +954,7 @@ def apply_ufunc(
     ...     return xr.apply_ufunc(func, a, b)
     ...
 
-    You can now apply ``magnitude()`` to ``xr.DataArray`` and ``xr.Dataset``
+    You can now apply ``magnitude()`` to :py:class:`DataArray` and :py:class:`Dataset`
     objects, with automatically preserved dimensions and coordinates, e.g.,
 
     >>> array = xr.DataArray([1, 2, 3], coords=[("x", [0.1, 0.2, 0.3])])
@@ -989,7 +989,7 @@ def apply_ufunc(
     ...     )
     ...
 
-    Inner product over a specific dimension (like ``xr.dot``):
+    Inner product over a specific dimension (like :py:func:`dot`):
 
     >>> def _inner(x, y):
     ...     result = np.matmul(x[..., np.newaxis, :], y[..., :, np.newaxis])
@@ -999,7 +999,7 @@ def apply_ufunc(
     ...     return apply_ufunc(_inner, a, b, input_core_dims=[[dim], [dim]])
     ...
 
-    Stack objects along a new dimension (like ``xr.concat``):
+    Stack objects along a new dimension (like :py:func:`concat`):
 
     >>> def stack(objects, dim, new_coord):
     ...     # note: this version does not stack coordinates
@@ -1034,10 +1034,9 @@ def apply_ufunc(
     ...
 
     Most of NumPy's builtin functions already broadcast their inputs
-    appropriately for use in `apply`. You may find helper functions such as
-    numpy.broadcast_arrays helpful in writing your function. `apply_ufunc` also
-    works well with numba's vectorize and guvectorize. Further explanation with
-    examples are provided in the xarray documentation [3]_.
+    appropriately for use in ``apply_ufunc``. You may find helper functions such as
+    :py:func:`numpy.broadcast_arrays` helpful in writing your function. ``apply_ufunc`` also
+    works well with :py:func:`numba.vectorize` and :py:func:`numba.guvectorize`.
 
     See Also
     --------
@@ -1046,12 +1045,13 @@ def apply_ufunc(
     numba.guvectorize
     dask.array.apply_gufunc
     xarray.map_blocks
+    :ref:`dask.automatic-parallelization`
+        User guide describing :py:func:`apply_ufunc` and :py:func:`map_blocks`.
 
     References
     ----------
     .. [1] http://docs.scipy.org/doc/numpy/reference/ufuncs.html
     .. [2] http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
-    .. [3] http://xarray.pydata.org/en/stable/computation.html#wrapping-custom-computation
     """
     from .dataarray import DataArray
     from .groupby import GroupBy
diff --git a/xarray/core/options.py b/xarray/core/options.py
index c9e037e6fd6..90018c51807 100644
--- a/xarray/core/options.py
+++ b/xarray/core/options.py
@@ -51,8 +51,8 @@ class T_Options(TypedDict):
     "enable_cftimeindex": True,
     "file_cache_maxsize": 128,
     "keep_attrs": "default",
-    "warn_for_unclosed_files": False,
     "use_bottleneck": True,
+    "warn_for_unclosed_files": False,
 }
 
 _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"])
@@ -75,8 +75,8 @@ def _positive_integer(value):
     "enable_cftimeindex": lambda value: isinstance(value, bool),
     "file_cache_maxsize": _positive_integer,
     "keep_attrs": lambda choice: choice in [True, False, "default"],
-    "warn_for_unclosed_files": lambda value: isinstance(value, bool),
     "use_bottleneck": lambda value: isinstance(value, bool),
+    "warn_for_unclosed_files": lambda value: isinstance(value, bool),
 }
 
 
@@ -123,38 +123,16 @@ class set_options:
 
     Parameters
     ----------
-    display_width : int, default: 80
-        Maximum display width for ``repr`` on xarray objects.
-    display_max_rows : int, default: 12
-        Maximum display rows.
-    arithmetic_join : {"inner", "outer", "left", "right", "exact"}
+    arithmetic_join : {"inner", "outer", "left", "right", "exact"}, default: "inner"
         DataArray/Dataset alignment in binary operations.
-    file_cache_maxsize : int, default: 128
-        Maximum number of open files to hold in xarray's
-        global least-recently-usage cached. This should be smaller than
-        your system's per-process file descriptor limit, e.g.,
-        ``ulimit -n`` on Linux.
-    warn_for_unclosed_files : bool, default: False
-        Whether or not to issue a warning when unclosed files are
-        deallocated. This is mostly useful for debugging.
-    cmap_sequential : str or matplotlib.colors.Colormap, default: "viridis"
-        Colormap to use for nondivergent data plots. If string, must be
-        matplotlib built-in colormap. Can also be a Colormap object
-        (e.g. mpl.cm.magma)
     cmap_divergent : str or matplotlib.colors.Colormap, default: "RdBu_r"
         Colormap to use for divergent data plots. If string, must be
         matplotlib built-in colormap. Can also be a Colormap object
         (e.g. mpl.cm.magma)
-    keep_attrs : {"default", True, False}
-        Whether to keep attributes on xarray Datasets/dataarrays after
-        operations. Can be
-
-        * ``True`` : to always keep attrs
-        * ``False`` : to always discard attrs
-        * ``default`` : to use original logic that attrs should only
-          be kept in unambiguous circumstances
-    display_style : {"text", "html"}
-        Display style to use in jupyter for xarray objects.
+    cmap_sequential : str or matplotlib.colors.Colormap, default: "viridis"
+        Colormap to use for nondivergent data plots. If string, must be
+        matplotlib built-in colormap. Can also be a Colormap object
+        (e.g. mpl.cm.magma)
     display_expand_attrs : {"default", True, False}:
         Whether to expand the attributes section for display of
         ``DataArray`` or ``Dataset`` objects. Can be
@@ -183,6 +161,31 @@ class set_options:
         * ``True`` : to always expand data variables
         * ``False`` : to always collapse data variables
         * ``default`` : to expand unless over a pre-defined limit
+    display_max_rows : int, default: 12
+        Maximum display rows.
+    display_style : {"text", "html"}, default: "html"
+        Display style to use in jupyter for xarray objects.
+    display_width : int, default: 80
+        Maximum display width for ``repr`` on xarray objects.
+    file_cache_maxsize : int, default: 128
+        Maximum number of open files to hold in xarray's
+        global least-recently-usage cached. This should be smaller than
+        your system's per-process file descriptor limit, e.g.,
+        ``ulimit -n`` on Linux.
+    keep_attrs : {"default", True, False}
+        Whether to keep attributes on xarray Datasets/dataarrays after
+        operations. Can be
+
+        * ``True`` : to always keep attrs
+        * ``False`` : to always discard attrs
+        * ``default`` : to use original logic that attrs should only
+          be kept in unambiguous circumstances
+    use_bottleneck : bool, default: True
+        Whether to use ``bottleneck`` to accelerate 1D reductions and
+        1D rolling reduction operations.
+    warn_for_unclosed_files : bool, default: False
+        Whether or not to issue a warning when unclosed files are
+        deallocated. This is mostly useful for debugging.
 
     Examples
     --------