diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst
index c38b4f2d11c..1900c208532 100644
--- a/doc/api-hidden.rst
+++ b/doc/api-hidden.rst
@@ -684,7 +684,6 @@
 
    conventions.decode_cf_variables
 
-   coding.variables.UnsignedIntegerCoder
    coding.variables.CFMaskCoder
    coding.variables.CFScaleOffsetCoder
 
diff --git a/doc/conf.py b/doc/conf.py
index 4f1fc6751d2..93a0e459a33 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -153,6 +153,9 @@
     "matplotlib colormap name": ":doc:`matplotlib colormap name <matplotlib:gallery/color/colormap_reference>`",
     "matplotlib axes object": ":py:class:`matplotlib axes object <matplotlib.axes.Axes>`",
     "colormap": ":py:class:`colormap <matplotlib.colors.Colormap>`",
+    # xarray terms
+    "dim name": ":term:`dimension name <name>`",
+    "var name": ":term:`variable name <name>`",
     # objects without namespace: xarray
     "DataArray": "~xarray.DataArray",
     "Dataset": "~xarray.Dataset",
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index c8f3a40e87f..2cf2d5928bf 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -24,6 +24,8 @@ New Features
 ~~~~~~~~~~~~
 - Make chunk manager an option in ``set_options`` (:pull:`9362`).
   By `Tom White <https://github.com/tomwhite>`_.
+- Allow data variable specific ``constant_values`` in the dataset ``pad`` function (:pull:`9353``).
+  By `Tiago Sanona <https://github.com/tsanona>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
@@ -47,6 +49,9 @@ Bug fixes
   date "0001-01-01". (:issue:`9108`, :pull:`9116`) By `Spencer Clark
   <https://github.com/spencerkclark>`_ and `Deepak Cherian
   <https://github.com/dcherian>`_.
+- Fix issue with passing parameters to ZarrStore.open_store when opening
+  datatree in zarr format (:issue:`9376`, :pull:`9377`).
+  By `Alfonso Ladino <https://github.com/aladinor>`_
 
 Documentation
 ~~~~~~~~~~~~~
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 0da056e8ad2..242507f9c20 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -1225,7 +1225,18 @@ def open_datatree(
         filename_or_obj = _normalize_path(filename_or_obj)
         if group:
             parent = NodePath("/") / NodePath(group)
-            stores = ZarrStore.open_store(filename_or_obj, group=parent)
+            stores = ZarrStore.open_store(
+                filename_or_obj,
+                group=parent,
+                mode=mode,
+                synchronizer=synchronizer,
+                consolidated=consolidated,
+                consolidate_on_close=False,
+                chunk_store=chunk_store,
+                storage_options=storage_options,
+                stacklevel=stacklevel + 1,
+                zarr_version=zarr_version,
+            )
             if not stores:
                 ds = open_dataset(
                     filename_or_obj, group=parent, engine="zarr", **kwargs
@@ -1233,7 +1244,18 @@ def open_datatree(
                 return DataTree.from_dict({str(parent): ds})
         else:
             parent = NodePath("/")
-            stores = ZarrStore.open_store(filename_or_obj, group=parent)
+            stores = ZarrStore.open_store(
+                filename_or_obj,
+                group=parent,
+                mode=mode,
+                synchronizer=synchronizer,
+                consolidated=consolidated,
+                consolidate_on_close=False,
+                chunk_store=chunk_store,
+                storage_options=storage_options,
+                stacklevel=stacklevel + 1,
+                zarr_version=zarr_version,
+            )
         ds = open_dataset(filename_or_obj, group=parent, engine="zarr", **kwargs)
         tree_root = DataTree.from_dict({str(parent): ds})
         for path_group, store in stores.items():
diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index 441ddfe7bfd..74916886026 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -261,7 +261,7 @@ def _is_time_like(units):
 
 
 def _check_fill_values(attrs, name, dtype):
-    """ "Check _FillValue and missing_value if available.
+    """Check _FillValue and missing_value if available.
 
     Return dictionary with raw fill values and set with encoded fill values.
 
@@ -298,6 +298,72 @@ def _check_fill_values(attrs, name, dtype):
     return raw_fill_dict, encoded_fill_values
 
 
+def _convert_unsigned_fill_value(
+    name: T_Name,
+    data: Any,
+    unsigned: str,
+    raw_fill_value: Any,
+    encoded_fill_values: set,
+) -> Any:
+    if data.dtype.kind == "i":
+        if unsigned == "true":
+            unsigned_dtype = np.dtype(f"u{data.dtype.itemsize}")
+            transform = partial(np.asarray, dtype=unsigned_dtype)
+            if raw_fill_value is not None:
+                new_fill = np.array(raw_fill_value, dtype=data.dtype)
+                encoded_fill_values.remove(raw_fill_value)
+                # use view here to prevent OverflowError
+                encoded_fill_values.add(new_fill.view(unsigned_dtype).item())
+            data = lazy_elemwise_func(data, transform, unsigned_dtype)
+    elif data.dtype.kind == "u":
+        if unsigned == "false":
+            signed_dtype = np.dtype(f"i{data.dtype.itemsize}")
+            transform = partial(np.asarray, dtype=signed_dtype)
+            data = lazy_elemwise_func(data, transform, signed_dtype)
+            if raw_fill_value is not None:
+                new_fill = signed_dtype.type(raw_fill_value)
+                encoded_fill_values.remove(raw_fill_value)
+                encoded_fill_values.add(new_fill)
+    else:
+        warnings.warn(
+            f"variable {name!r} has _Unsigned attribute but is not "
+            "of integer type. Ignoring attribute.",
+            SerializationWarning,
+            stacklevel=3,
+        )
+    return data
+
+
+def _encode_unsigned_fill_value(
+    name: T_Name,
+    fill_value: Any,
+    encoded_dtype: np.dtype,
+) -> Any:
+    try:
+        if hasattr(fill_value, "item"):
+            # if numpy type, convert to python native integer to determine overflow
+            # otherwise numpy unsigned ints will silently cast to the signed counterpart
+            fill_value = fill_value.item()
+        # passes if provided fill value fits in encoded on-disk type
+        new_fill = encoded_dtype.type(fill_value)
+    except OverflowError:
+        encoded_kind_str = "signed" if encoded_dtype.kind == "i" else "unsigned"
+        warnings.warn(
+            f"variable {name!r} will be stored as {encoded_kind_str} integers "
+            f"but _FillValue attribute can't be represented as a "
+            f"{encoded_kind_str} integer.",
+            SerializationWarning,
+            stacklevel=3,
+        )
+        # user probably provided the fill as the in-memory dtype,
+        # convert to on-disk type to match CF standard
+        orig_kind = "u" if encoded_dtype.kind == "i" else "i"
+        orig_dtype = np.dtype(f"{orig_kind}{encoded_dtype.itemsize}")
+        # use view here to prevent OverflowError
+        new_fill = np.array(fill_value, dtype=orig_dtype).view(encoded_dtype).item()
+    return new_fill
+
+
 class CFMaskCoder(VariableCoder):
     """Mask or unmask fill values according to CF conventions."""
 
@@ -305,11 +371,14 @@ def encode(self, variable: Variable, name: T_Name = None):
         dims, data, attrs, encoding = unpack_for_encoding(variable)
 
         dtype = np.dtype(encoding.get("dtype", data.dtype))
+        # from netCDF best practices
+        # https://docs.unidata.ucar.edu/nug/current/best_practices.html#bp_Unsigned-Data
+        #     "_Unsigned = "true" to indicate that
+        #      integer data should be treated as unsigned"
+        has_unsigned = encoding.get("_Unsigned") is not None
         fv = encoding.get("_FillValue")
         mv = encoding.get("missing_value")
-        # to properly handle _FillValue/missing_value below [a], [b]
-        # we need to check if unsigned data is written as signed data
-        unsigned = encoding.get("_Unsigned") is not None
+        fill_value = None
 
         fv_exists = fv is not None
         mv_exists = mv is not None
@@ -324,23 +393,28 @@ def encode(self, variable: Variable, name: T_Name = None):
 
         if fv_exists:
             # Ensure _FillValue is cast to same dtype as data's
-            # [a] need to skip this if _Unsigned is available
-            if not unsigned:
-                encoding["_FillValue"] = dtype.type(fv)
+            encoding["_FillValue"] = (
+                _encode_unsigned_fill_value(name, fv, dtype)
+                if has_unsigned
+                else dtype.type(fv)
+            )
             fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
 
         if mv_exists:
             # try to use _FillValue, if it exists to align both values
             # or use missing_value and ensure it's cast to same dtype as data's
-            # [b] need to provide mv verbatim if _Unsigned is available
             encoding["missing_value"] = attrs.get(
                 "_FillValue",
-                (dtype.type(mv) if not unsigned else mv),
+                (
+                    _encode_unsigned_fill_value(name, mv, dtype)
+                    if has_unsigned
+                    else dtype.type(mv)
+                ),
             )
             fill_value = pop_to(encoding, attrs, "missing_value", name=name)
 
         # apply fillna
-        if not pd.isnull(fill_value):
+        if fill_value is not None and not pd.isnull(fill_value):
             # special case DateTime to properly handle NaT
             if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu":
                 data = duck_array_ops.where(
@@ -349,46 +423,63 @@ def encode(self, variable: Variable, name: T_Name = None):
             else:
                 data = duck_array_ops.fillna(data, fill_value)
 
+        if fill_value is not None and has_unsigned:
+            pop_to(encoding, attrs, "_Unsigned")
+            # XXX: Is this actually needed? Doesn't the backend handle this?
+            data = duck_array_ops.astype(duck_array_ops.around(data), dtype)
+            attrs["_FillValue"] = fill_value
+
         return Variable(dims, data, attrs, encoding, fastpath=True)
 
     def decode(self, variable: Variable, name: T_Name = None):
         raw_fill_dict, encoded_fill_values = _check_fill_values(
             variable.attrs, name, variable.dtype
         )
+        if "_Unsigned" not in variable.attrs and not raw_fill_dict:
+            return variable
 
-        if raw_fill_dict:
-            dims, data, attrs, encoding = unpack_for_decoding(variable)
-            [
-                safe_setitem(encoding, attr, value, name=name)
-                for attr, value in raw_fill_dict.items()
-            ]
-
-            if encoded_fill_values:
-                # special case DateTime to properly handle NaT
-                dtype: np.typing.DTypeLike
-                decoded_fill_value: Any
-                if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu":
-                    dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min
+        dims, data, attrs, encoding = unpack_for_decoding(variable)
+
+        # Even if _Unsigned is use, retain on-disk _FillValue
+        [
+            safe_setitem(encoding, attr, value, name=name)
+            for attr, value in raw_fill_dict.items()
+        ]
+
+        if "_Unsigned" in attrs:
+            unsigned = pop_to(attrs, encoding, "_Unsigned")
+            data = _convert_unsigned_fill_value(
+                name,
+                data,
+                unsigned,
+                raw_fill_dict.get("_FillValue"),
+                encoded_fill_values,
+            )
+
+        if encoded_fill_values:
+            # special case DateTime to properly handle NaT
+            dtype: np.typing.DTypeLike
+            decoded_fill_value: Any
+            if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu":
+                dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min
+            else:
+                if "scale_factor" not in attrs and "add_offset" not in attrs:
+                    dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)
                 else:
-                    if "scale_factor" not in attrs and "add_offset" not in attrs:
-                        dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype)
-                    else:
-                        dtype, decoded_fill_value = (
-                            _choose_float_dtype(data.dtype, attrs),
-                            np.nan,
-                        )
+                    dtype, decoded_fill_value = (
+                        _choose_float_dtype(data.dtype, attrs),
+                        np.nan,
+                    )
 
-                transform = partial(
-                    _apply_mask,
-                    encoded_fill_values=encoded_fill_values,
-                    decoded_fill_value=decoded_fill_value,
-                    dtype=dtype,
-                )
-                data = lazy_elemwise_func(data, transform, dtype)
+            transform = partial(
+                _apply_mask,
+                encoded_fill_values=encoded_fill_values,
+                decoded_fill_value=decoded_fill_value,
+                dtype=dtype,
+            )
+            data = lazy_elemwise_func(data, transform, dtype)
 
-            return Variable(dims, data, attrs, encoding, fastpath=True)
-        else:
-            return variable
+        return Variable(dims, data, attrs, encoding, fastpath=True)
 
 
 def _scale_offset_decoding(data, scale_factor, add_offset, dtype: np.typing.DTypeLike):
@@ -506,74 +597,6 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
             return variable
 
 
-class UnsignedIntegerCoder(VariableCoder):
-    def encode(self, variable: Variable, name: T_Name = None) -> Variable:
-        # from netCDF best practices
-        # https://docs.unidata.ucar.edu/nug/current/best_practices.html#bp_Unsigned-Data
-        #     "_Unsigned = "true" to indicate that
-        #      integer data should be treated as unsigned"
-        if variable.encoding.get("_Unsigned", "false") == "true":
-            dims, data, attrs, encoding = unpack_for_encoding(variable)
-
-            pop_to(encoding, attrs, "_Unsigned")
-            # we need the on-disk type here
-            # trying to get it from encoding, resort to an int with the same precision as data.dtype if not available
-            signed_dtype = np.dtype(encoding.get("dtype", f"i{data.dtype.itemsize}"))
-            if "_FillValue" in attrs:
-                try:
-                    # user provided the on-disk signed fill
-                    new_fill = signed_dtype.type(attrs["_FillValue"])
-                except OverflowError:
-                    # user provided the in-memory unsigned fill, convert to signed type
-                    unsigned_dtype = np.dtype(f"u{signed_dtype.itemsize}")
-                    # use view here to prevent OverflowError
-                    new_fill = (
-                        np.array(attrs["_FillValue"], dtype=unsigned_dtype)
-                        .view(signed_dtype)
-                        .item()
-                    )
-                attrs["_FillValue"] = new_fill
-            data = duck_array_ops.astype(duck_array_ops.around(data), signed_dtype)
-
-            return Variable(dims, data, attrs, encoding, fastpath=True)
-        else:
-            return variable
-
-    def decode(self, variable: Variable, name: T_Name = None) -> Variable:
-        if "_Unsigned" in variable.attrs:
-            dims, data, attrs, encoding = unpack_for_decoding(variable)
-            unsigned = pop_to(attrs, encoding, "_Unsigned")
-
-            if data.dtype.kind == "i":
-                if unsigned == "true":
-                    unsigned_dtype = np.dtype(f"u{data.dtype.itemsize}")
-                    transform = partial(np.asarray, dtype=unsigned_dtype)
-                    if "_FillValue" in attrs:
-                        new_fill = np.array(attrs["_FillValue"], dtype=data.dtype)
-                        # use view here to prevent OverflowError
-                        attrs["_FillValue"] = new_fill.view(unsigned_dtype).item()
-                    data = lazy_elemwise_func(data, transform, unsigned_dtype)
-            elif data.dtype.kind == "u":
-                if unsigned == "false":
-                    signed_dtype = np.dtype(f"i{data.dtype.itemsize}")
-                    transform = partial(np.asarray, dtype=signed_dtype)
-                    data = lazy_elemwise_func(data, transform, signed_dtype)
-                    if "_FillValue" in attrs:
-                        new_fill = signed_dtype.type(attrs["_FillValue"])
-                        attrs["_FillValue"] = new_fill
-            else:
-                warnings.warn(
-                    f"variable {name!r} has _Unsigned attribute but is not "
-                    "of integer type. Ignoring attribute.",
-                    SerializationWarning,
-                    stacklevel=3,
-                )
-
-            return Variable(dims, data, attrs, encoding, fastpath=True)
-        else:
-            return variable
-
-
 class DefaultFillvalueCoder(VariableCoder):
     """Encode default _FillValue if needed."""
 
diff --git a/xarray/conventions.py b/xarray/conventions.py
index d572b215d2d..18a81938225 100644
--- a/xarray/conventions.py
+++ b/xarray/conventions.py
@@ -187,7 +187,6 @@ def encode_cf_variable(
         times.CFTimedeltaCoder(),
         variables.CFScaleOffsetCoder(),
         variables.CFMaskCoder(),
-        variables.UnsignedIntegerCoder(),
         variables.NativeEnumCoder(),
         variables.NonStringCoder(),
         variables.DefaultFillvalueCoder(),
@@ -279,7 +278,6 @@ def decode_cf_variable(
 
     if mask_and_scale:
         for coder in [
-            variables.UnsignedIntegerCoder(),
             variables.CFMaskCoder(),
             variables.CFScaleOffsetCoder(),
         ]:
diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
index 251edd1fc6f..3b852b962bf 100644
--- a/xarray/core/coordinates.py
+++ b/xarray/core/coordinates.py
@@ -352,7 +352,7 @@ def _construct_direct(
         return obj
 
     @classmethod
-    def from_pandas_multiindex(cls, midx: pd.MultiIndex, dim: str) -> Self:
+    def from_pandas_multiindex(cls, midx: pd.MultiIndex, dim: Hashable) -> Self:
         """Wrap a pandas multi-index as Xarray coordinates (dimension + levels).
 
         The returned coordinates can be directly assigned to a
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 0b9a085cebc..dbc00a03025 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -163,6 +163,7 @@
         ReindexMethodOptions,
         SideOptions,
         T_ChunkDimFreq,
+        T_DatasetPadConstantValues,
         T_Xarray,
     )
     from xarray.core.weighted import DatasetWeighted
@@ -9153,9 +9154,7 @@ def pad(
         stat_length: (
             int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None
         ) = None,
-        constant_values: (
-            float | tuple[float, float] | Mapping[Any, tuple[float, float]] | None
-        ) = None,
+        constant_values: T_DatasetPadConstantValues | None = None,
         end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None,
         reflect_type: PadReflectOptions = None,
         keep_attrs: bool | None = None,
@@ -9211,17 +9210,19 @@ def pad(
             (stat_length,) or int is a shortcut for before = after = statistic
             length for all axes.
             Default is ``None``, to use the entire axis.
-        constant_values : scalar, tuple or mapping of hashable to tuple, default: 0
-            Used in 'constant'.  The values to set the padded values for each
-            axis.
+        constant_values : scalar, tuple, mapping of dim name to scalar or tuple, or \
+            mapping of var name to scalar, tuple or to mapping of dim name to scalar or tuple, default: None
+            Used in 'constant'. The values to set the padded values for each data variable / axis.
+            ``{var_1: {dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}, ...
+            var_M: (before, after)}`` unique pad constants per data variable.
             ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique
             pad constants along each dimension.
             ``((before, after),)`` yields same before and after constants for each
             dimension.
             ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for
             all dimensions.
-            Default is 0.
-        end_values : scalar, tuple or mapping of hashable to tuple, default: 0
+            Default is ``None``, pads with ``np.nan``.
+        end_values : scalar, tuple or mapping of hashable to tuple, default: None
             Used in 'linear_ramp'.  The values used for the ending value of the
             linear_ramp and that will form the edge of the padded array.
             ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique
@@ -9230,7 +9231,7 @@ def pad(
             axis.
             ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for
             all axes.
-            Default is 0.
+            Default is None.
         reflect_type : {"even", "odd", None}, optional
             Used in "reflect", and "symmetric".  The "even" style is the
             default with an unaltered reflection around the edge value.  For
@@ -9304,11 +9305,22 @@ def pad(
             if not var_pad_width:
                 variables[name] = var
             elif name in self.data_vars:
+                if utils.is_dict_like(constant_values):
+                    if name in constant_values.keys():
+                        filtered_constant_values = constant_values[name]
+                    elif not set(var.dims).isdisjoint(constant_values.keys()):
+                        filtered_constant_values = {
+                            k: v for k, v in constant_values.items() if k in var.dims
+                        }
+                    else:
+                        filtered_constant_values = 0  # TODO: https://github.com/pydata/xarray/pull/9353#discussion_r1724018352
+                else:
+                    filtered_constant_values = constant_values
                 variables[name] = var.pad(
                     pad_width=var_pad_width,
                     mode=mode,
                     stat_length=stat_length,
-                    constant_values=constant_values,
+                    constant_values=filtered_constant_values,
                     end_values=end_values,
                     reflect_type=reflect_type,
                     keep_attrs=keep_attrs,
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index faeb0c538c3..833466ffe9e 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -23,7 +23,7 @@
 from xarray.core.formatting import format_array_flat
 from xarray.core.indexes import (
     PandasIndex,
-    create_default_index_implicit,
+    PandasMultiIndex,
     filter_indexes_from_coords,
 )
 from xarray.core.options import OPTIONS, _get_keep_attrs
@@ -54,7 +54,7 @@
     from xarray.core.dataset import Dataset
     from xarray.core.types import GroupIndex, GroupIndices, GroupKey
     from xarray.core.utils import Frozen
-    from xarray.groupers import Grouper
+    from xarray.groupers import EncodedGroups, Grouper
 
 
 def check_reduce_dims(reduce_dims, dimensions):
@@ -273,16 +273,19 @@ class ResolvedGrouper(Generic[T_DataWithCoords]):
     obj: T_DataWithCoords
 
     # returned by factorize:
-    codes: DataArray = field(init=False, repr=False)
-    full_index: pd.Index = field(init=False, repr=False)
-    group_indices: GroupIndices = field(init=False, repr=False)
-    unique_coord: Variable | _DummyGroup = field(init=False, repr=False)
+    encoded: EncodedGroups = field(init=False, repr=False)
 
-    # _ensure_1d:
-    group1d: T_Group = field(init=False, repr=False)
-    stacked_obj: T_DataWithCoords = field(init=False, repr=False)
-    stacked_dim: Hashable | None = field(init=False, repr=False)
-    inserted_dims: list[Hashable] = field(init=False, repr=False)
+    @property
+    def full_index(self) -> pd.Index:
+        return self.encoded.full_index
+
+    @property
+    def codes(self) -> DataArray:
+        return self.encoded.codes
+
+    @property
+    def unique_coord(self) -> Variable | _DummyGroup:
+        return self.encoded.unique_coord
 
     def __post_init__(self) -> None:
         # This copy allows the BinGrouper.factorize() method
@@ -294,20 +297,13 @@ def __post_init__(self) -> None:
 
         self.group = _resolve_group(self.obj, self.group)
 
-        (
-            self.group1d,
-            self.stacked_obj,
-            self.stacked_dim,
-            self.inserted_dims,
-        ) = _ensure_1d(group=self.group, obj=self.obj)
-
-        self.factorize()
+        self.encoded = self.grouper.factorize(self.group)
 
     @property
     def name(self) -> Hashable:
         """Name for the grouped coordinate after reduction."""
         # the name has to come from unique_coord because we need `_bins` suffix for BinGrouper
-        (name,) = self.unique_coord.dims
+        (name,) = self.encoded.unique_coord.dims
         return name
 
     @property
@@ -317,33 +313,7 @@ def size(self) -> int:
 
     def __len__(self) -> int:
         """Number of groups."""
-        return len(self.full_index)
-
-    @property
-    def dims(self):
-        return self.group1d.dims
-
-    def factorize(self) -> None:
-        encoded = self.grouper.factorize(self.group1d)
-
-        self.codes = encoded.codes
-        self.full_index = encoded.full_index
-
-        if encoded.group_indices is not None:
-            self.group_indices = encoded.group_indices
-        else:
-            self.group_indices = tuple(
-                g
-                for g in _codes_to_group_indices(self.codes.data, len(self.full_index))
-                if g
-            )
-        if encoded.unique_coord is None:
-            unique_values = self.full_index[np.unique(encoded.codes)]
-            self.unique_coord = Variable(
-                dims=self.codes.name, data=unique_values, attrs=self.group.attrs
-            )
-        else:
-            self.unique_coord = encoded.unique_coord
+        return len(self.encoded.full_index)
 
 
 def _validate_groupby_squeeze(squeeze: Literal[False]) -> None:
@@ -428,31 +398,29 @@ class GroupBy(Generic[T_Xarray]):
     """
 
     __slots__ = (
-        "_full_index",
-        "_inserted_dims",
-        "_group",
         "_group_dim",
-        "_group_indices",
-        "_groups",
         "groupers",
         "_obj",
         "_restore_coord_dims",
-        "_stacked_dim",
-        "_unique_coord",
+        # cached properties
+        "_groups",
         "_dims",
         "_sizes",
+        "_len",
         # Save unstacked object for flox
         "_original_obj",
-        "_original_group",
-        "_bins",
         "_codes",
+        # stack nD vars
+        "group1d",
+        "_stacked_dim",
+        "_inserted_dims",
+        "encoded",
     )
     _obj: T_Xarray
     groupers: tuple[ResolvedGrouper]
     _restore_coord_dims: bool
 
     _original_obj: T_Xarray
-    _original_group: T_Group
     _group_indices: GroupIndices
     _codes: DataArray
     _group_dim: Hashable
@@ -460,6 +428,14 @@ class GroupBy(Generic[T_Xarray]):
     _groups: dict[GroupKey, GroupIndex] | None
     _dims: tuple[Hashable, ...] | Frozen[Hashable, int] | None
     _sizes: Mapping[Hashable, int] | None
+    _len: int
+
+    # _ensure_1d:
+    group1d: T_Group
+    _stacked_dim: Hashable | None
+    _inserted_dims: list[Hashable]
+
+    encoded: EncodedGroups
 
     def __init__(
         self,
@@ -479,26 +455,26 @@ def __init__(
             If True, also restore the dimension order of multi-dimensional
             coordinates.
         """
-        self.groupers = groupers
-
         self._original_obj = obj
+        self._restore_coord_dims = restore_coord_dims
+        self.groupers = groupers
 
-        (grouper,) = self.groupers
-        self._original_group = grouper.group
+        (grouper,) = groupers
+        self.encoded = grouper.encoded
 
         # specification for the groupby operation
-        self._obj = grouper.stacked_obj
-        self._restore_coord_dims = restore_coord_dims
-
-        # These should generalize to multiple groupers
-        self._group_indices = grouper.group_indices
-        self._codes = self._maybe_unstack(grouper.codes)
+        # TODO: handle obj having variables that are not present on any of the groupers
+        #       simple broadcasting fails for ExtensionArrays.
+        (self.group1d, self._obj, self._stacked_dim, self._inserted_dims) = _ensure_1d(
+            group=self.encoded.codes, obj=obj
+        )
+        (self._group_dim,) = self.group1d.dims
 
-        (self._group_dim,) = grouper.group1d.dims
         # cached attributes
         self._groups = None
         self._dims = None
         self._sizes = None
+        self._len = len(self.encoded.full_index)
 
     @property
     def sizes(self) -> Mapping[Hashable, int]:
@@ -512,8 +488,7 @@ def sizes(self) -> Mapping[Hashable, int]:
         Dataset.sizes
         """
         if self._sizes is None:
-            (grouper,) = self.groupers
-            index = self._group_indices[0]
+            index = self.encoded.group_indices[0]
             self._sizes = self._obj.isel({self._group_dim: index}).sizes
         return self._sizes
 
@@ -546,24 +521,22 @@ def groups(self) -> dict[GroupKey, GroupIndex]:
         """
         # provided to mimic pandas.groupby
         if self._groups is None:
-            (grouper,) = self.groupers
-            self._groups = dict(zip(grouper.unique_coord.values, self._group_indices))
+            self._groups = dict(
+                zip(self.encoded.unique_coord.data, self.encoded.group_indices)
+            )
         return self._groups
 
     def __getitem__(self, key: GroupKey) -> T_Xarray:
         """
         Get DataArray or Dataset corresponding to a particular group label.
         """
-        (grouper,) = self.groupers
         return self._obj.isel({self._group_dim: self.groups[key]})
 
     def __len__(self) -> int:
-        (grouper,) = self.groupers
-        return grouper.size
+        return self._len
 
     def __iter__(self) -> Iterator[tuple[GroupKey, T_Xarray]]:
-        (grouper,) = self.groupers
-        return zip(grouper.unique_coord.data, self._iter_grouped())
+        return zip(self.encoded.unique_coord.data, self._iter_grouped())
 
     def __repr__(self) -> str:
         (grouper,) = self.groupers
@@ -576,28 +549,20 @@ def __repr__(self) -> str:
 
     def _iter_grouped(self) -> Iterator[T_Xarray]:
         """Iterate over each element in this group"""
-        (grouper,) = self.groupers
-        for idx, indices in enumerate(self._group_indices):
-            yield self._obj.isel({self._group_dim: indices})
+        for indices in self.encoded.group_indices:
+            if indices:
+                yield self._obj.isel({self._group_dim: indices})
 
     def _infer_concat_args(self, applied_example):
-        from xarray.groupers import BinGrouper
 
-        (grouper,) = self.groupers
         if self._group_dim in applied_example.dims:
-            coord = grouper.group1d
-            positions = self._group_indices
+            coord = self.group1d
+            positions = self.encoded.group_indices
         else:
-            coord = grouper.unique_coord
+            coord = self.encoded.unique_coord
             positions = None
         (dim,) = coord.dims
-        if isinstance(grouper.group, _DummyGroup) and not isinstance(
-            grouper.grouper, BinGrouper
-        ):
-            # When binning we actually do set the index
-            coord = None
-        coord = getattr(coord, "variable", coord)
-        return coord, dim, positions
+        return dim, positions
 
     def _binary_op(self, other, f, reflexive=False):
         from xarray.core.dataarray import DataArray
@@ -609,7 +574,7 @@ def _binary_op(self, other, f, reflexive=False):
         obj = self._original_obj
         name = grouper.name
         group = grouper.group
-        codes = self._codes
+        codes = self.encoded.codes
         dims = group.dims
 
         if isinstance(group, _DummyGroup):
@@ -710,8 +675,8 @@ def _maybe_unstack(self, obj):
         """This gets called if we are applying on an array with a
         multidimensional group."""
         (grouper,) = self.groupers
-        stacked_dim = grouper.stacked_dim
-        inserted_dims = grouper.inserted_dims
+        stacked_dim = self._stacked_dim
+        inserted_dims = self._inserted_dims
         if stacked_dim is not None and stacked_dim in obj.dims:
             obj = obj.unstack(stacked_dim)
             for dim in inserted_dims:
@@ -797,7 +762,7 @@ def _flox_reduce(
         output_index = grouper.full_index
         result = xarray_reduce(
             obj.drop_vars(non_numeric.keys()),
-            self._codes,
+            self.encoded.codes,
             dim=parsed_dim,
             # pass RangeIndex as a hint to flox that `by` is already factorized
             expected_groups=(pd.RangeIndex(len(output_index)),),
@@ -808,15 +773,27 @@ def _flox_reduce(
 
         # we did end up reducing over dimension(s) that are
         # in the grouped variable
-        group_dims = grouper.group.dims
-        if set(group_dims).issubset(set(parsed_dim)):
-            result = result.assign_coords(
-                Coordinates(
-                    coords={name: (name, np.array(output_index))},
-                    indexes={name: PandasIndex(output_index, dim=name)},
+        group_dims = set(grouper.group.dims)
+        new_coords = {}
+        if group_dims.issubset(set(parsed_dim)):
+            new_indexes = {}
+            for grouper in self.groupers:
+                output_index = grouper.full_index
+                if isinstance(output_index, pd.RangeIndex):
+                    continue
+                name = grouper.name
+                new_coords[name] = IndexVariable(
+                    dims=name, data=np.array(output_index), attrs=grouper.codes.attrs
                 )
-            )
-            result = result.drop_vars(unindexed_dims)
+                index_cls = (
+                    PandasIndex
+                    if not isinstance(output_index, pd.MultiIndex)
+                    else PandasMultiIndex
+                )
+                new_indexes[name] = index_cls(output_index, dim=name)
+            result = result.assign_coords(
+                Coordinates(new_coords, new_indexes)
+            ).drop_vars(unindexed_dims)
 
         # broadcast and restore non-numeric data variables (backcompat)
         for name, var in non_numeric.items():
@@ -986,7 +963,7 @@ def quantile(
         """
         if dim is None:
             (grouper,) = self.groupers
-            dim = grouper.group1d.dims
+            dim = self.group1d.dims
 
         # Dataset.quantile does this, do it for flox to ensure same output.
         q = np.asarray(q, dtype=np.float64)
@@ -1038,7 +1015,7 @@ def _first_or_last(self, op, skipna, keep_attrs):
         if all(
             isinstance(maybe_slice, slice)
             and (maybe_slice.stop == maybe_slice.start + 1)
-            for maybe_slice in self._group_indices
+            for maybe_slice in self.encoded.group_indices
         ):
             # NB. this is currently only used for reductions along an existing
             # dimension
@@ -1087,8 +1064,7 @@ class DataArrayGroupByBase(GroupBy["DataArray"], DataArrayGroupbyArithmetic):
     @property
     def dims(self) -> tuple[Hashable, ...]:
         if self._dims is None:
-            (grouper,) = self.groupers
-            index = self._group_indices[0]
+            index = self.encoded.group_indices[0]
             self._dims = self._obj.isel({self._group_dim: index}).dims
         return self._dims
 
@@ -1097,8 +1073,7 @@ def _iter_grouped_shortcut(self):
         metadata
         """
         var = self._obj.variable
-        (grouper,) = self.groupers
-        for idx, indices in enumerate(self._group_indices):
+        for idx, indices in enumerate(self.encoded.group_indices):
             yield var[{self._group_dim: indices}]
 
     def _concat_shortcut(self, applied, dim, positions=None):
@@ -1109,14 +1084,12 @@ def _concat_shortcut(self, applied, dim, positions=None):
         # TODO: benbovy - explicit indexes: this fast implementation doesn't
         # create an explicit index for the stacked dim coordinate
         stacked = Variable.concat(applied, dim, shortcut=True)
-
-        (grouper,) = self.groupers
-        reordered = _maybe_reorder(stacked, dim, positions, N=grouper.group.size)
+        reordered = _maybe_reorder(stacked, dim, positions, N=self.group1d.size)
         return self._obj._replace_maybe_drop_dims(reordered)
 
     def _restore_dim_order(self, stacked: DataArray) -> DataArray:
         (grouper,) = self.groupers
-        group = grouper.group1d
+        group = self.group1d
 
         def lookup_order(dimension):
             if dimension == grouper.name:
@@ -1200,24 +1173,21 @@ def apply(self, func, shortcut=False, args=(), **kwargs):
     def _combine(self, applied, shortcut=False):
         """Recombine the applied objects like the original."""
         applied_example, applied = peek_at(applied)
-        coord, dim, positions = self._infer_concat_args(applied_example)
+        dim, positions = self._infer_concat_args(applied_example)
         if shortcut:
             combined = self._concat_shortcut(applied, dim, positions)
         else:
             combined = concat(applied, dim)
-            (grouper,) = self.groupers
-            combined = _maybe_reorder(combined, dim, positions, N=grouper.group.size)
+            combined = _maybe_reorder(combined, dim, positions, N=self.group1d.size)
 
         if isinstance(combined, type(self._obj)):
             # only restore dimension order for arrays
             combined = self._restore_dim_order(combined)
         # assign coord and index when the applied function does not return that coord
-        if coord is not None and dim not in applied_example.dims:
-            index, index_vars = create_default_index_implicit(coord)
-            indexes = {k: index for k in index_vars}
-            combined = combined._overwrite_indexes(indexes, index_vars)
-        combined = self._maybe_restore_empty_groups(combined)
+        if dim not in applied_example.dims:
+            combined = combined.assign_coords(self.encoded.coords)
         combined = self._maybe_unstack(combined)
+        combined = self._maybe_restore_empty_groups(combined)
         return combined
 
     def reduce(
@@ -1297,8 +1267,7 @@ class DatasetGroupByBase(GroupBy["Dataset"], DatasetGroupbyArithmetic):
     @property
     def dims(self) -> Frozen[Hashable, int]:
         if self._dims is None:
-            (grouper,) = self.groupers
-            index = self._group_indices[0]
+            index = self.encoded.group_indices[0]
             self._dims = self._obj.isel({self._group_dim: index}).dims
 
         return FrozenMappingWarningOnValuesAccess(self._dims)
@@ -1362,17 +1331,14 @@ def apply(self, func, args=(), shortcut=None, **kwargs):
     def _combine(self, applied):
         """Recombine the applied objects like the original."""
         applied_example, applied = peek_at(applied)
-        coord, dim, positions = self._infer_concat_args(applied_example)
+        dim, positions = self._infer_concat_args(applied_example)
         combined = concat(applied, dim)
-        (grouper,) = self.groupers
-        combined = _maybe_reorder(combined, dim, positions, N=grouper.group.size)
+        combined = _maybe_reorder(combined, dim, positions, N=self.group1d.size)
         # assign coord when the applied function does not return that coord
-        if coord is not None and dim not in applied_example.dims:
-            index, index_vars = create_default_index_implicit(coord)
-            indexes = {k: index for k in index_vars}
-            combined = combined._overwrite_indexes(indexes, index_vars)
-        combined = self._maybe_restore_empty_groups(combined)
+        if dim not in applied_example.dims:
+            combined = combined.assign_coords(self.encoded.coords)
         combined = self._maybe_unstack(combined)
+        combined = self._maybe_restore_empty_groups(combined)
         return combined
 
     def reduce(
diff --git a/xarray/core/types.py b/xarray/core/types.py
index 0e432283ba9..3eb97f86c4a 100644
--- a/xarray/core/types.py
+++ b/xarray/core/types.py
@@ -243,6 +243,11 @@ def copy(
     "symmetric",
     "wrap",
 ]
+T_PadConstantValues = float | tuple[float, float]
+T_VarPadConstantValues = T_PadConstantValues | Mapping[Any, T_PadConstantValues]
+T_DatasetPadConstantValues = (
+    T_VarPadConstantValues | Mapping[Any, T_VarPadConstantValues]
+)
 PadReflectOptions = Literal["even", "odd", None]
 
 CFCalendar = Literal[
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 3cd8e4acbd5..a74fb4d8ce9 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -65,6 +65,7 @@
         Self,
         T_Chunks,
         T_DuckArray,
+        T_VarPadConstantValues,
     )
     from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint
 
@@ -1121,9 +1122,14 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs):
 
     def _pad_options_dim_to_index(
         self,
-        pad_option: Mapping[Any, int | tuple[int, int]],
+        pad_option: Mapping[Any, int | float | tuple[int, int] | tuple[float, float]],
         fill_with_shape=False,
     ):
+        # change number values to a tuple of two of those values
+        for k, v in pad_option.items():
+            if isinstance(v, numbers.Number):
+                pad_option[k] = (v, v)
+
         if fill_with_shape:
             return [
                 (n, n) if d not in pad_option else pad_option[d]
@@ -1138,9 +1144,7 @@ def pad(
         stat_length: (
             int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None
         ) = None,
-        constant_values: (
-            float | tuple[float, float] | Mapping[Any, tuple[float, float]] | None
-        ) = None,
+        constant_values: T_VarPadConstantValues | None = None,
         end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None,
         reflect_type: PadReflectOptions = None,
         keep_attrs: bool | None = None,
@@ -1160,7 +1164,7 @@ def pad(
         stat_length : int, tuple or mapping of hashable to tuple
             Used in 'maximum', 'mean', 'median', and 'minimum'.  Number of
             values at edge of each axis used to calculate the statistic value.
-        constant_values : scalar, tuple or mapping of hashable to tuple
+        constant_values : scalar, tuple or mapping of hashable to scalar or tuple
             Used in 'constant'.  The values to set the padded values for each
             axis.
         end_values : scalar, tuple or mapping of hashable to tuple
@@ -1207,10 +1211,6 @@ def pad(
         if stat_length is None and mode in ["maximum", "mean", "median", "minimum"]:
             stat_length = [(n, n) for n in self.data.shape]  # type: ignore[assignment]
 
-        # change integer values to a tuple of two of those values and change pad_width to index
-        for k, v in pad_width.items():
-            if isinstance(v, numbers.Number):
-                pad_width[k] = (v, v)
         pad_width_by_index = self._pad_options_dim_to_index(pad_width)
 
         # create pad_options_kwargs, numpy/dask requires only relevant kwargs to be nonempty
diff --git a/xarray/groupers.py b/xarray/groupers.py
index 98409dfe542..f70cad655e8 100644
--- a/xarray/groupers.py
+++ b/xarray/groupers.py
@@ -9,13 +9,14 @@
 import datetime
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
-from typing import Any, Literal, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 
 import numpy as np
 import pandas as pd
 
 from xarray.coding.cftime_offsets import _new_to_legacy_freq
 from xarray.core import duck_array_ops
+from xarray.core.coordinates import Coordinates
 from xarray.core.dataarray import DataArray
 from xarray.core.groupby import T_Group, _DummyGroup
 from xarray.core.indexes import safe_cast_to_index
@@ -35,7 +36,18 @@
 RESAMPLE_DIM = "__resample_dim__"
 
 
-@dataclass
+def _coordinates_from_variable(variable: Variable) -> Coordinates:
+    from xarray.core.indexes import create_default_index_implicit
+
+    (name,) = variable.dims
+    new_index, index_vars = create_default_index_implicit(variable)
+    indexes = {k: new_index for k in index_vars}
+    new_vars = new_index.create_variables()
+    new_vars[name].attrs = variable.attrs
+    return Coordinates(new_vars, indexes)
+
+
+@dataclass(init=False)
 class EncodedGroups:
     """
     Dataclass for storing intermediate values for GroupBy operation.
@@ -57,18 +69,49 @@ class EncodedGroups:
 
     codes: DataArray
     full_index: pd.Index
-    group_indices: GroupIndices | None = field(default=None)
-    unique_coord: Variable | _DummyGroup | None = field(default=None)
-
-    def __post_init__(self):
-        assert isinstance(self.codes, DataArray)
-        if self.codes.name is None:
+    group_indices: GroupIndices
+    unique_coord: Variable | _DummyGroup
+    coords: Coordinates
+
+    def __init__(
+        self,
+        codes: DataArray,
+        full_index: pd.Index,
+        group_indices: GroupIndices | None = None,
+        unique_coord: Variable | _DummyGroup | None = None,
+        coords: Coordinates | None = None,
+    ):
+        from xarray.core.groupby import _codes_to_group_indices
+
+        assert isinstance(codes, DataArray)
+        if codes.name is None:
             raise ValueError("Please set a name on the array you are grouping by.")
-        assert isinstance(self.full_index, pd.Index)
-        assert (
-            isinstance(self.unique_coord, Variable | _DummyGroup)
-            or self.unique_coord is None
-        )
+        self.codes = codes
+        assert isinstance(full_index, pd.Index)
+        self.full_index = full_index
+
+        if group_indices is None:
+            self.group_indices = tuple(
+                g
+                for g in _codes_to_group_indices(codes.data.ravel(), len(full_index))
+                if g
+            )
+        else:
+            self.group_indices = group_indices
+
+        if unique_coord is None:
+            unique_values = full_index[np.unique(codes)]
+            self.unique_coord = Variable(
+                dims=codes.name, data=unique_values, attrs=codes.attrs
+            )
+        else:
+            self.unique_coord = unique_coord
+
+        if coords is None:
+            assert not isinstance(self.unique_coord, _DummyGroup)
+            self.coords = _coordinates_from_variable(self.unique_coord)
+        else:
+            self.coords = coords
 
 
 class Grouper(ABC):
@@ -111,11 +154,14 @@ class UniqueGrouper(Grouper):
     def group_as_index(self) -> pd.Index:
         """Caches the group DataArray as a pandas Index."""
         if self._group_as_index is None:
-            self._group_as_index = self.group.to_index()
+            if self.group.ndim == 1:
+                self._group_as_index = self.group.to_index()
+            else:
+                self._group_as_index = pd.Index(np.array(self.group).ravel())
         return self._group_as_index
 
-    def factorize(self, group1d: T_Group) -> EncodedGroups:
-        self.group = group1d
+    def factorize(self, group: T_Group) -> EncodedGroups:
+        self.group = group
 
         index = self.group_as_index
         is_unique_and_monotonic = isinstance(self.group, _DummyGroup) or (
@@ -138,14 +184,17 @@ def _factorize_unique(self) -> EncodedGroups:
             raise ValueError(
                 "Failed to group data. Are you grouping by a variable that is all NaN?"
             )
-        codes = self.group.copy(data=codes_)
+        codes = self.group.copy(data=codes_.reshape(self.group.shape))
         unique_coord = Variable(
             dims=codes.name, data=unique_values, attrs=self.group.attrs
         )
         full_index = pd.Index(unique_values)
 
         return EncodedGroups(
-            codes=codes, full_index=full_index, unique_coord=unique_coord
+            codes=codes,
+            full_index=full_index,
+            unique_coord=unique_coord,
+            coords=_coordinates_from_variable(unique_coord),
         )
 
     def _factorize_dummy(self) -> EncodedGroups:
@@ -156,20 +205,31 @@ def _factorize_dummy(self) -> EncodedGroups:
         group_indices: GroupIndices = tuple(slice(i, i + 1) for i in range(size))
         size_range = np.arange(size)
         full_index: pd.Index
+        unique_coord: _DummyGroup | Variable
         if isinstance(self.group, _DummyGroup):
             codes = self.group.to_dataarray().copy(data=size_range)
             unique_coord = self.group
             full_index = pd.RangeIndex(self.group.size)
+            coords = Coordinates()
         else:
             codes = self.group.copy(data=size_range)
             unique_coord = self.group.variable.to_base_variable()
-            full_index = pd.Index(unique_coord.data)
+            full_index = self.group_as_index
+            if isinstance(full_index, pd.MultiIndex):
+                coords = Coordinates.from_pandas_multiindex(
+                    full_index, dim=self.group.name
+                )
+            else:
+                if TYPE_CHECKING:
+                    assert isinstance(unique_coord, Variable)
+                coords = _coordinates_from_variable(unique_coord)
 
         return EncodedGroups(
             codes=codes,
             group_indices=group_indices,
             full_index=full_index,
             unique_coord=unique_coord,
+            coords=coords,
         )
 
 
@@ -231,7 +291,7 @@ def factorize(self, group: T_Group) -> EncodedGroups:
         data = np.asarray(group.data)  # Cast _DummyGroup data to array
 
         binned, self.bins = pd.cut(  # type: ignore [call-overload]
-            data,
+            data.ravel(),
             bins=self.bins,
             right=self.right,
             labels=self.labels,
@@ -254,13 +314,18 @@ def factorize(self, group: T_Group) -> EncodedGroups:
         unique_values = full_index[uniques[uniques != -1]]
 
         codes = DataArray(
-            binned_codes, getattr(group, "coords", None), name=new_dim_name
+            binned_codes.reshape(group.shape),
+            getattr(group, "coords", None),
+            name=new_dim_name,
         )
         unique_coord = Variable(
             dims=new_dim_name, data=unique_values, attrs=group.attrs
         )
         return EncodedGroups(
-            codes=codes, full_index=full_index, unique_coord=unique_coord
+            codes=codes,
+            full_index=full_index,
+            unique_coord=unique_coord,
+            coords=_coordinates_from_variable(unique_coord),
         )
 
 
@@ -373,13 +438,14 @@ def factorize(self, group: T_Group) -> EncodedGroups:
         unique_coord = Variable(
             dims=group.name, data=first_items.index, attrs=group.attrs
         )
-        codes = group.copy(data=codes_)
+        codes = group.copy(data=codes_.reshape(group.shape))
 
         return EncodedGroups(
             codes=codes,
             group_indices=group_indices,
             full_index=full_index,
             unique_coord=unique_coord,
+            coords=_coordinates_from_variable(unique_coord),
         )
 
 
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index 0caab6e8247..b4d3871c229 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -134,6 +134,7 @@ def _importorskip(
 has_pint, requires_pint = _importorskip("pint")
 has_numexpr, requires_numexpr = _importorskip("numexpr")
 has_flox, requires_flox = _importorskip("flox")
+has_pandas_ge_2_1, __ = _importorskip("pandas", "2.1")
 has_pandas_ge_2_2, __ = _importorskip("pandas", "2.2")
 has_pandas_3, requires_pandas_3 = _importorskip("pandas", "3.0.0.dev0")
 
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index bbe48663c1f..c755924f583 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -54,6 +54,7 @@
 from xarray.conventions import encode_dataset_coordinates
 from xarray.core import indexing
 from xarray.core.options import set_options
+from xarray.core.utils import module_available
 from xarray.namedarray.pycompat import array_type
 from xarray.tests import (
     assert_allclose,
@@ -166,7 +167,7 @@ def create_encoded_masked_and_scaled_data(dtype: np.dtype) -> Dataset:
 
 def create_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset:
     encoding = {
-        "_FillValue": 255,
+        "_FillValue": -1,
         "_Unsigned": "true",
         "dtype": "i1",
         "add_offset": dtype.type(10),
@@ -242,6 +243,32 @@ def create_encoded_signed_masked_scaled_data(dtype: np.dtype) -> Dataset:
     return Dataset({"x": ("t", sb, attributes)})
 
 
+def create_unsigned_false_masked_scaled_data(dtype: np.dtype) -> Dataset:
+    encoding = {
+        "_FillValue": 255,
+        "_Unsigned": "false",
+        "dtype": "u1",
+        "add_offset": dtype.type(10),
+        "scale_factor": dtype.type(0.1),
+    }
+    x = np.array([-1.0, 10.1, 22.7, np.nan], dtype=dtype)
+    return Dataset({"x": ("t", x, {}, encoding)})
+
+
+def create_encoded_unsigned_false_masked_scaled_data(dtype: np.dtype) -> Dataset:
+    # These are values as written to the file: the _FillValue will
+    # be represented in the unsigned form.
+    attributes = {
+        "_FillValue": 255,
+        "_Unsigned": "false",
+        "add_offset": dtype.type(10),
+        "scale_factor": dtype.type(0.1),
+    }
+    # Create unsigned data corresponding to [-110, 1, 127, 255] signed
+    sb = np.asarray([146, 1, 127, 255], dtype="u1")
+    return Dataset({"x": ("t", sb, attributes)})
+
+
 def create_boolean_data() -> Dataset:
     attributes = {"units": "-"}
     return Dataset({"x": ("t", [True, False, False, True], attributes)})
@@ -890,6 +917,10 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
                 create_signed_masked_scaled_data,
                 create_encoded_signed_masked_scaled_data,
             ),
+            (
+                create_unsigned_false_masked_scaled_data,
+                create_encoded_unsigned_false_masked_scaled_data,
+            ),
             (create_masked_and_scaled_data, create_encoded_masked_and_scaled_data),
         ],
     )
@@ -899,9 +930,21 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
             pytest.skip("float32 will be treated as float64 in zarr")
         decoded = decoded_fn(dtype)
         encoded = encoded_fn(dtype)
+        if decoded["x"].encoding["dtype"] == "u1" and not (
+            self.engine == "netcdf4"
+            and self.file_format is None
+            or self.file_format == "NETCDF4"
+        ):
+            pytest.skip("uint8 data can't be written to non-NetCDF4 data")
+
         with self.roundtrip(decoded) as actual:
             for k in decoded.variables:
                 assert decoded.variables[k].dtype == actual.variables[k].dtype
+                # CF _FillValue is always on-disk type
+                assert (
+                    decoded.variables[k].encoding["_FillValue"]
+                    == actual.variables[k].encoding["_FillValue"]
+                )
             assert_allclose(decoded, actual, decode_bytes=False)
 
         with self.roundtrip(decoded, open_kwargs=dict(decode_cf=False)) as actual:
@@ -909,11 +952,21 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
             # encode.  Is that something we want to test for?
             for k in encoded.variables:
                 assert encoded.variables[k].dtype == actual.variables[k].dtype
+                # CF _FillValue is always on-disk type
+                assert (
+                    decoded.variables[k].encoding["_FillValue"]
+                    == actual.variables[k].attrs["_FillValue"]
+                )
             assert_allclose(encoded, actual, decode_bytes=False)
 
         with self.roundtrip(encoded, open_kwargs=dict(decode_cf=False)) as actual:
             for k in encoded.variables:
                 assert encoded.variables[k].dtype == actual.variables[k].dtype
+                # CF _FillValue is always on-disk type
+                assert (
+                    encoded.variables[k].attrs["_FillValue"]
+                    == actual.variables[k].attrs["_FillValue"]
+                )
             assert_allclose(encoded, actual, decode_bytes=False)
 
         # make sure roundtrip encoding didn't change the
@@ -925,11 +978,33 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
                 assert decoded.variables[k].dtype == actual.variables[k].dtype
             assert_allclose(decoded, actual, decode_bytes=False)
 
-    @pytest.mark.parametrize("fillvalue", [np.int8(-1), np.uint8(255), -1, 255])
-    def test_roundtrip_unsigned(self, fillvalue):
+    @pytest.mark.parametrize(
+        ("fill_value", "exp_fill_warning"),
+        [
+            (np.int8(-1), False),
+            (np.uint8(255), True),
+            (-1, False),
+            (255, True),
+        ],
+    )
+    def test_roundtrip_unsigned(self, fill_value, exp_fill_warning):
+        @contextlib.contextmanager
+        def _roundtrip_with_warnings(*args, **kwargs):
+            is_np2 = module_available("numpy", minversion="2.0.0.dev0")
+            if exp_fill_warning and is_np2:
+                warn_checker: contextlib.AbstractContextManager = pytest.warns(
+                    SerializationWarning,
+                    match="_FillValue attribute can't be represented",
+                )
+            else:
+                warn_checker = contextlib.nullcontext()
+            with warn_checker:
+                with self.roundtrip(*args, **kwargs) as actual:
+                    yield actual
+
         # regression/numpy2 test for
         encoding = {
-            "_FillValue": fillvalue,
+            "_FillValue": fill_value,
             "_Unsigned": "true",
             "dtype": "i1",
         }
@@ -937,21 +1012,32 @@ def test_roundtrip_unsigned(self, fillvalue):
         decoded = Dataset({"x": ("t", x, {}, encoding)})
 
         attributes = {
-            "_FillValue": fillvalue,
+            "_FillValue": fill_value,
             "_Unsigned": "true",
         }
         # Create unsigned data corresponding to [0, 1, 127, 128, 255] unsigned
         sb = np.asarray([0, 1, 127, -128, -2, -1], dtype="i1")
         encoded = Dataset({"x": ("t", sb, attributes)})
+        unsigned_dtype = np.dtype(f"u{sb.dtype.itemsize}")
 
-        with self.roundtrip(decoded) as actual:
+        with _roundtrip_with_warnings(decoded) as actual:
             for k in decoded.variables:
                 assert decoded.variables[k].dtype == actual.variables[k].dtype
+                exp_fv = decoded.variables[k].encoding["_FillValue"]
+                if exp_fill_warning:
+                    exp_fv = np.array(exp_fv, dtype=unsigned_dtype).view(sb.dtype)
+                assert exp_fv == actual.variables[k].encoding["_FillValue"]
             assert_allclose(decoded, actual, decode_bytes=False)
 
-        with self.roundtrip(decoded, open_kwargs=dict(decode_cf=False)) as actual:
+        with _roundtrip_with_warnings(
+            decoded, open_kwargs=dict(decode_cf=False)
+        ) as actual:
             for k in encoded.variables:
                 assert encoded.variables[k].dtype == actual.variables[k].dtype
+                exp_fv = encoded.variables[k].attrs["_FillValue"]
+                if exp_fill_warning:
+                    exp_fv = np.array(exp_fv, dtype=unsigned_dtype).view(sb.dtype)
+                assert exp_fv == actual.variables[k].attrs["_FillValue"]
             assert_allclose(encoded, actual, decode_bytes=False)
 
     @staticmethod
diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py
index 6d81d6f5dc8..acb32504948 100644
--- a/xarray/tests/test_coding.py
+++ b/xarray/tests/test_coding.py
@@ -129,7 +129,7 @@ def test_decode_unsigned_from_signed(bits) -> None:
     encoded = xr.Variable(
         ("x",), original_values.astype(signed_dtype), attrs={"_Unsigned": "true"}
     )
-    coder = variables.UnsignedIntegerCoder()
+    coder = variables.CFMaskCoder()
     decoded = coder.decode(encoded)
     assert decoded.dtype == unsigned_dtype
     assert decoded.values == original_values
@@ -143,7 +143,7 @@ def test_decode_signed_from_unsigned(bits) -> None:
     encoded = xr.Variable(
         ("x",), original_values.astype(unsigned_dtype), attrs={"_Unsigned": "false"}
     )
-    coder = variables.UnsignedIntegerCoder()
+    coder = variables.CFMaskCoder()
     decoded = coder.decode(encoded)
     assert decoded.dtype == signed_dtype
     assert decoded.values == original_values
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index fb3d487f2ef..f2e712e334c 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -6704,18 +6704,80 @@ def test_polyfit_warnings(self) -> None:
             ds.var1.polyfit("dim2", 10, full=True)
             assert len(ws) == 1
 
-    def test_pad(self) -> None:
-        ds = create_test_data(seed=1)
-        padded = ds.pad(dim2=(1, 1), constant_values=42)
-
-        assert padded["dim2"].shape == (11,)
-        assert padded["var1"].shape == (8, 11)
-        assert padded["var2"].shape == (8, 11)
-        assert padded["var3"].shape == (10, 8)
-        assert dict(padded.sizes) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20}
+    @staticmethod
+    def _test_data_var_interior(
+        original_data_var, padded_data_var, padded_dim_name, expected_pad_values
+    ):
+        np.testing.assert_equal(
+            np.unique(padded_data_var.isel({padded_dim_name: [0, -1]})),
+            expected_pad_values,
+        )
+        np.testing.assert_array_equal(
+            padded_data_var.isel({padded_dim_name: slice(1, -1)}), original_data_var
+        )
 
-        np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42)
-        np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan)
+    @pytest.mark.parametrize("padded_dim_name", ["dim1", "dim2", "dim3", "time"])
+    @pytest.mark.parametrize(
+        ["constant_values"],
+        [
+            pytest.param(None, id="default"),
+            pytest.param(42, id="scalar"),
+            pytest.param((42, 43), id="tuple"),
+            pytest.param({"dim1": 42, "dim2": 43}, id="per dim scalar"),
+            pytest.param({"dim1": (42, 43), "dim2": (43, 44)}, id="per dim tuple"),
+            pytest.param({"var1": 42, "var2": (42, 43)}, id="per var"),
+            pytest.param({"var1": 42, "dim1": (42, 43)}, id="mixed"),
+        ],
+    )
+    def test_pad(self, padded_dim_name, constant_values) -> None:
+        ds = create_test_data(seed=1)
+        padded = ds.pad({padded_dim_name: (1, 1)}, constant_values=constant_values)
+
+        # test padded dim values and size
+        for ds_dim_name, ds_dim in ds.sizes.items():
+            if ds_dim_name == padded_dim_name:
+                np.testing.assert_equal(padded.sizes[ds_dim_name], ds_dim + 2)
+                if ds_dim_name in padded.coords:
+                    assert padded[ds_dim_name][[0, -1]].isnull().all()
+            else:
+                np.testing.assert_equal(padded.sizes[ds_dim_name], ds_dim)
+
+        # check if coord "numbers" with dimention dim3 is paded correctly
+        if padded_dim_name == "dim3":
+            assert padded["numbers"][[0, -1]].isnull().all()
+            # twarning: passes but dtype changes from int to float
+            np.testing.assert_array_equal(padded["numbers"][1:-1], ds["numbers"])
+
+        # test if data_vars are paded with correct values
+        for data_var_name, data_var in padded.data_vars.items():
+            if padded_dim_name in data_var.dims:
+                if utils.is_dict_like(constant_values):
+                    if (
+                        expected := constant_values.get(data_var_name, None)
+                    ) is not None:
+                        self._test_data_var_interior(
+                            ds[data_var_name], data_var, padded_dim_name, expected
+                        )
+                    elif (
+                        expected := constant_values.get(padded_dim_name, None)
+                    ) is not None:
+                        self._test_data_var_interior(
+                            ds[data_var_name], data_var, padded_dim_name, expected
+                        )
+                    else:
+                        self._test_data_var_interior(
+                            ds[data_var_name], data_var, padded_dim_name, 0
+                        )
+                elif constant_values:
+                    self._test_data_var_interior(
+                        ds[data_var_name], data_var, padded_dim_name, constant_values
+                    )
+                else:
+                    self._test_data_var_interior(
+                        ds[data_var_name], data_var, padded_dim_name, np.nan
+                    )
+            else:
+                assert_array_equal(data_var, ds[data_var_name])
 
     @pytest.mark.parametrize(
         ["keep_attrs", "attrs", "expected"],
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 6c9254966d9..7dbb0d5e59c 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -22,6 +22,7 @@
     create_test_data,
     has_cftime,
     has_flox,
+    has_pandas_ge_2_1,
     requires_cftime,
     requires_dask,
     requires_flox,
@@ -118,6 +119,13 @@ def test_multi_index_groupby_sum() -> None:
     actual = ds.stack(space=["x", "y"]).groupby("space").sum("z").unstack("space")
     assert_equal(expected, actual)
 
+    if not has_pandas_ge_2_1:
+        # the next line triggers a mysterious multiindex error on pandas 2.0
+        return
+
+    actual = ds.stack(space=["x", "y"]).groupby("space").sum(...).unstack("space")
+    assert_equal(expected, actual)
+
 
 def test_groupby_da_datetime() -> None:
     # test groupby with a DataArray of dtype datetime for GH1132
@@ -806,6 +814,7 @@ def test_groupby_dataset_errors() -> None:
         data.groupby(data.coords["dim1"].to_index())  # type: ignore[arg-type]
 
 
+@pytest.mark.parametrize("use_flox", [True, False])
 @pytest.mark.parametrize(
     "by_func",
     [
@@ -813,7 +822,10 @@ def test_groupby_dataset_errors() -> None:
         pytest.param(lambda x: {x: UniqueGrouper()}, id="group-by-unique-grouper"),
     ],
 )
-def test_groupby_dataset_reduce_ellipsis(by_func) -> None:
+@pytest.mark.parametrize("letters_as_coord", [True, False])
+def test_groupby_dataset_reduce_ellipsis(
+    by_func, use_flox: bool, letters_as_coord: bool
+) -> None:
     data = Dataset(
         {
             "xy": (["x", "y"], np.random.randn(3, 4)),
@@ -823,13 +835,18 @@ def test_groupby_dataset_reduce_ellipsis(by_func) -> None:
         }
     )
 
+    if letters_as_coord:
+        data = data.set_coords("letters")
+
     expected = data.mean("y")
     expected["yonly"] = expected["yonly"].variable.set_dims({"x": 3})
     gb = data.groupby(by_func("x"))
-    actual = gb.mean(...)
+    with xr.set_options(use_flox=use_flox):
+        actual = gb.mean(...)
     assert_allclose(expected, actual)
 
-    actual = gb.mean("y")
+    with xr.set_options(use_flox=use_flox):
+        actual = gb.mean("y")
     assert_allclose(expected, actual)
 
     letters = data["letters"]
@@ -841,7 +858,8 @@ def test_groupby_dataset_reduce_ellipsis(by_func) -> None:
         }
     )
     gb = data.groupby(by_func("letters"))
-    actual = gb.mean(...)
+    with xr.set_options(use_flox=use_flox):
+        actual = gb.mean(...)
     assert_allclose(expected, actual)
 
 
@@ -1729,7 +1747,7 @@ def test_groupby_fastpath_for_monotonic(self, use_flox: bool) -> None:
         rev = array_rev.groupby("idx", squeeze=False)
 
         for gb in [fwd, rev]:
-            assert all([isinstance(elem, slice) for elem in gb._group_indices])
+            assert all([isinstance(elem, slice) for elem in gb.encoded.group_indices])
 
         with xr.set_options(use_flox=use_flox):
             assert_identical(fwd.sum(), array)
@@ -2561,3 +2579,29 @@ def factorize(self, group) -> EncodedGroups:
             obj.groupby("time.year", time=YearGrouper())
         with pytest.raises(ValueError):
             obj.groupby()
+
+
+@pytest.mark.parametrize("use_flox", [True, False])
+def test_weather_data_resample(use_flox):
+    # from the docs
+    times = pd.date_range("2000-01-01", "2001-12-31", name="time")
+    annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28))
+
+    base = 10 + 15 * annual_cycle.reshape(-1, 1)
+    tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3)
+    tmax_values = base + 10 + 3 * np.random.randn(annual_cycle.size, 3)
+
+    ds = xr.Dataset(
+        {
+            "tmin": (("time", "location"), tmin_values),
+            "tmax": (("time", "location"), tmax_values),
+        },
+        {
+            "time": ("time", times, {"time_key": "time_values"}),
+            "location": ("location", ["IA", "IN", "IL"], {"loc_key": "loc_value"}),
+        },
+    )
+
+    with xr.set_options(use_flox=use_flox):
+        actual = ds.resample(time="1MS").mean()
+    assert "location" in actual._indexes