diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml
index 4e4f8550e16..3f10a158f91 100644
--- a/ci/requirements/py36-min-all-deps.yml
+++ b/ci/requirements/py36-min-all-deps.yml
@@ -31,6 +31,7 @@ dependencies:
   - numba=0.44
   - numpy=1.14
   - pandas=0.24
+  # - pint  # See py36-min-nep18.yml
   - pip
   - pseudonetcdf=3.0
   - pydap=3.2
diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml
index 5b291cf554c..fc9523ce249 100644
--- a/ci/requirements/py36-min-nep18.yml
+++ b/ci/requirements/py36-min-nep18.yml
@@ -2,7 +2,7 @@ name: xarray-tests
 channels:
   - conda-forge
 dependencies:
-  # Optional dependencies that require NEP18, such as sparse,
+  # Optional dependencies that require NEP18, such as sparse and pint,
   # require drastically newer packages than everything else
   - python=3.6
   - coveralls
@@ -10,6 +10,7 @@ dependencies:
   - distributed=2.4
   - numpy=1.17
   - pandas=0.24
+  - pint=0.9  # Actually not enough as it doesn't implement __array_function__yet!
   - pytest
   - pytest-cov
   - pytest-env
diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
index cc91e8a12da..4d6d778f884 100644
--- a/ci/requirements/py36.yml
+++ b/ci/requirements/py36.yml
@@ -27,8 +27,9 @@ dependencies:
   - numba
   - numpy
   - pandas
+  - pint
   - pip
-  - pseudonetcdf
+  - pseudonetcdf<3.1  # FIXME https://github.com/pydata/xarray/issues/3409
   - pydap
   - pynio
   - pytest
diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml
index bf485b59a49..78784b48f3c 100644
--- a/ci/requirements/py37-windows.yml
+++ b/ci/requirements/py37-windows.yml
@@ -27,8 +27,9 @@ dependencies:
   - numba
   - numpy
   - pandas
+  - pint
   - pip
-  - pseudonetcdf
+  - pseudonetcdf<3.1  # FIXME https://github.com/pydata/xarray/issues/3409
   - pydap
   # - pynio  # Not available on Windows
   - pytest
diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
index 5c9a1cec5b5..38e5db641b7 100644
--- a/ci/requirements/py37.yml
+++ b/ci/requirements/py37.yml
@@ -27,8 +27,9 @@ dependencies:
   - numba
   - numpy
   - pandas
+  - pint
   - pip
-  - pseudonetcdf
+  - pseudonetcdf<3.1  # FIXME https://github.com/pydata/xarray/issues/3409
   - pydap
   - pynio
   - pytest
diff --git a/doc/gallery/plot_cartopy_facetgrid.py b/doc/gallery/plot_cartopy_facetgrid.py
index 11db9b800b5..d8f5e73ee56 100644
--- a/doc/gallery/plot_cartopy_facetgrid.py
+++ b/doc/gallery/plot_cartopy_facetgrid.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 ==================================
 Multiple plots and map projections
diff --git a/doc/gallery/plot_colorbar_center.py b/doc/gallery/plot_colorbar_center.py
index 8227dc5ba0c..42d6448adf6 100644
--- a/doc/gallery/plot_colorbar_center.py
+++ b/doc/gallery/plot_colorbar_center.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 ==================
 Centered colormaps
diff --git a/doc/gallery/plot_control_colorbar.py b/doc/gallery/plot_control_colorbar.py
index bd1f2c69a44..8fb8d7f8be6 100644
--- a/doc/gallery/plot_control_colorbar.py
+++ b/doc/gallery/plot_control_colorbar.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 ===========================
 Control the plot's colorbar
diff --git a/doc/gallery/plot_lines_from_2d.py b/doc/gallery/plot_lines_from_2d.py
index 2aebda2f323..1b2845cd8a9 100644
--- a/doc/gallery/plot_lines_from_2d.py
+++ b/doc/gallery/plot_lines_from_2d.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 ==================================
 Multiple lines from a 2d DataArray
diff --git a/doc/gallery/plot_rasterio.py b/doc/gallery/plot_rasterio.py
index d5cbb0700cc..99eb1fd1daf 100644
--- a/doc/gallery/plot_rasterio.py
+++ b/doc/gallery/plot_rasterio.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 .. _recipes.rasterio:
 
diff --git a/doc/gallery/plot_rasterio_rgb.py b/doc/gallery/plot_rasterio_rgb.py
index 4b5b30ea793..758d4cd3c37 100644
--- a/doc/gallery/plot_rasterio_rgb.py
+++ b/doc/gallery/plot_rasterio_rgb.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 .. _recipes.rasterio_rgb:
 
diff --git a/doc/installing.rst b/doc/installing.rst
index b1bf072dbe1..0c5e8916ca3 100644
--- a/doc/installing.rst
+++ b/doc/installing.rst
@@ -66,6 +66,15 @@ For plotting
 Alternative data containers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 - `sparse <https://sparse.pydata.org/>`_: for sparse arrays
+- `pint <https://pint.readthedocs.io/>`_: for units of measure
+
+  .. note::
+
+    At the moment of writing, xarray requires a `highly experimental version of pint
+    <https://github.com/andrewgsavage/pint/pull/6>`_ (install with
+    ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``.
+    Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken.
+
 - Any numpy-like objects that support
   `NEP-18 <https://numpy.org/neps/nep-0018-array-function-protocol.html>`_.
   Note that while such libraries theoretically should work, they are untested.
@@ -85,7 +94,7 @@ dependencies:
   (`NEP-29 <https://numpy.org/neps/nep-0029-deprecation_policy.html>`_)
 - **pandas:** 12 months
 - **scipy:** 12 months
-- **sparse** and other libraries that rely on
+- **sparse, pint** and other libraries that rely on
   `NEP-18 <https://numpy.org/neps/nep-0018-array-function-protocol.html>`_
   for integration: very latest available versions only, until the technology will have
   matured. This extends to dask when used in conjunction with any of these libraries.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 4c2e6659bdb..61dbf291cf8 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -18,6 +18,37 @@ What's New
 v0.14.1 (unreleased)
 --------------------
 
+New Features
+~~~~~~~~~~~~
+- Added integration tests against `pint <https://pint.readthedocs.io/>`_.
+  (:pull:`3238`) by `Justus Magin <https://github.com/keewis>`_.
+
+  .. note::
+
+    At the moment of writing, these tests *as well as the ability to use pint in general*
+    require `a highly experimental version of pint
+    <https://github.com/andrewgsavage/pint/pull/6>`_ (install with
+    ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``.
+    Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken.
+
+Bug fixes
+~~~~~~~~~
+- Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
+  but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle <https://github.com/rdoyle45>`_
+
+Documentation
+~~~~~~~~~~~~~
+
+- Fix the documentation of :py:meth:`DataArray.resample` and
+  :py:meth:`Dataset.resample` and explicitly state that a
+  datetime-like dimension is required. (:pull:`3400`)
+  By `Justus Magin <https://github.com/keewis>`_.
+
+Internal Changes
+~~~~~~~~~~~~~~~~
+
+- Use Python 3.6 idioms throughout the codebase. (:pull:3419)
+  By `Maximilian Roos <https://github.com/max-sixty>`_
 
 .. _whats-new.0.14.0:
 
diff --git a/setup.cfg b/setup.cfg
index 6293d331477..eee8b2477b2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -73,6 +73,8 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 [mypy-pandas.*]
 ignore_missing_imports = True
+[mypy-pint.*]
+ignore_missing_imports = True
 [mypy-PseudoNetCDF.*]
 ignore_missing_imports = True
 [mypy-pydap.*]
diff --git a/xarray/_version.py b/xarray/_version.py
index 826bf470ca7..0ccb33a5e56 100644
--- a/xarray/_version.py
+++ b/xarray/_version.py
@@ -94,7 +94,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=
             return None, None
     else:
         if verbose:
-            print("unable to find command, tried %s" % (commands,))
+            print(f"unable to find command, tried {commands}")
         return None, None
     stdout = p.communicate()[0].strip()
     if sys.version_info[0] >= 3:
@@ -302,9 +302,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
             if verbose:
                 fmt = "tag '%s' doesn't start with prefix '%s'"
                 print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
-                full_tag,
-                tag_prefix,
+            pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format(
+                full_tag, tag_prefix
             )
             return pieces
         pieces["closest-tag"] = full_tag[len(tag_prefix) :]
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 8f6881b804a..199516116b0 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -718,7 +718,7 @@ def open_mfdataset(
     autoclose=None,
     parallel=False,
     join="outer",
-    **kwargs
+    **kwargs,
 ):
     """Open multiple files as a single dataset.
 
@@ -1258,9 +1258,7 @@ def _validate_append_dim_and_encoding(
         return
     if append_dim:
         if append_dim not in ds.dims:
-            raise ValueError(
-                "{} not a valid dimension in the Dataset".format(append_dim)
-            )
+            raise ValueError(f"{append_dim} not a valid dimension in the Dataset")
     for data_var in ds_to_append:
         if data_var in ds:
             if append_dim is None:
diff --git a/xarray/backends/file_manager.py b/xarray/backends/file_manager.py
index a3c1961373a..4967788a1e7 100644
--- a/xarray/backends/file_manager.py
+++ b/xarray/backends/file_manager.py
@@ -83,7 +83,7 @@ def __init__(
         kwargs=None,
         lock=None,
         cache=None,
-        ref_counts=None
+        ref_counts=None,
     ):
         """Initialize a FileManager.
 
@@ -267,7 +267,7 @@ def __setstate__(self, state):
     def __repr__(self):
         args_string = ", ".join(map(repr, self._args))
         if self._mode is not _DEFAULT_MODE:
-            args_string += ", mode={!r}".format(self._mode)
+            args_string += f", mode={self._mode!r}"
         return "{}({!r}, {}, kwargs={})".format(
             type(self).__name__, self._opener, args_string, self._kwargs
         )
diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py
index d0bf790f074..435690f2079 100644
--- a/xarray/backends/locks.py
+++ b/xarray/backends/locks.py
@@ -1,7 +1,7 @@
 import multiprocessing
 import threading
 import weakref
-from typing import Any, MutableMapping
+from typing import Any, MutableMapping, Optional
 
 try:
     from dask.utils import SerializableLock
@@ -62,7 +62,7 @@ def _get_lock_maker(scheduler=None):
     return _LOCK_MAKERS[scheduler]
 
 
-def _get_scheduler(get=None, collection=None):
+def _get_scheduler(get=None, collection=None) -> Optional[str]:
     """Determine the dask scheduler that is being used.
 
     None is returned if no dask scheduler is active.
@@ -86,10 +86,15 @@ def _get_scheduler(get=None, collection=None):
     except (ImportError, AttributeError):
         pass
 
-    if actual_get is dask.multiprocessing.get:
-        return "multiprocessing"
-    else:
-        return "threaded"
+    try:
+        # As of dask=2.6, dask.multiprocessing requires cloudpickle to be installed
+        # Dependency removed in https://github.com/dask/dask/pull/5511
+        if actual_get is dask.multiprocessing.get:
+            return "multiprocessing"
+    except AttributeError:
+        pass
+
+    return "threaded"
 
 
 def get_write_lock(key):
diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py
index 2edcca7c617..0e454ec47de 100644
--- a/xarray/backends/netCDF4_.py
+++ b/xarray/backends/netCDF4_.py
@@ -137,7 +137,7 @@ def _nc4_dtype(var):
     elif var.dtype.kind in ["i", "u", "f", "c", "S"]:
         dtype = var.dtype
     else:
-        raise ValueError("unsupported dtype for netCDF4 variable: {}".format(var.dtype))
+        raise ValueError(f"unsupported dtype for netCDF4 variable: {var.dtype}")
     return dtype
 
 
diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py
index 887eafc972e..d26b6ce2ea9 100644
--- a/xarray/backends/netcdf3.py
+++ b/xarray/backends/netcdf3.py
@@ -50,7 +50,7 @@ def coerce_nc3_dtype(arr):
         cast_arr = arr.astype(new_dtype)
         if not (cast_arr == arr).all():
             raise ValueError(
-                "could not safely cast array from dtype %s to %s" % (dtype, new_dtype)
+                f"could not safely cast array from dtype {dtype} to {new_dtype}"
             )
         arr = cast_arr
     return arr
diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py
index ca2bf73d048..7ef4ec66241 100644
--- a/xarray/backends/pydap_.py
+++ b/xarray/backends/pydap_.py
@@ -49,7 +49,7 @@ def _fix_attributes(attributes):
             # dot-separated key
             attributes.update(
                 {
-                    "{}.{}".format(k, k_child): v_child
+                    f"{k}.{k_child}": v_child
                     for k_child, v_child in attributes.pop(k).items()
                 }
             )
diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
index 515d309d75b..af46510f7c4 100644
--- a/xarray/coding/cftime_offsets.py
+++ b/xarray/coding/cftime_offsets.py
@@ -638,7 +638,7 @@ def __apply__(self, other):
 
 
 _FREQUENCY_CONDITION = "|".join(_FREQUENCIES.keys())
-_PATTERN = r"^((?P<multiple>\d+)|())(?P<freq>({}))$".format(_FREQUENCY_CONDITION)
+_PATTERN = fr"^((?P<multiple>\d+)|())(?P<freq>({_FREQUENCY_CONDITION}))$"
 
 
 # pandas defines these offsets as "Tick" objects, which for instance have
@@ -759,9 +759,7 @@ def _generate_range(start, end, periods, offset):
 
             next_date = current + offset
             if next_date <= current:
-                raise ValueError(
-                    "Offset {offset} did not increment date".format(offset=offset)
-                )
+                raise ValueError(f"Offset {offset} did not increment date")
             current = next_date
     else:
         while current >= end:
@@ -769,9 +767,7 @@ def _generate_range(start, end, periods, offset):
 
             next_date = current + offset
             if next_date >= current:
-                raise ValueError(
-                    "Offset {offset} did not decrement date".format(offset=offset)
-                )
+                raise ValueError(f"Offset {offset} did not decrement date")
             current = next_date
 
 
diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
index 802dd94f06c..434d55d6569 100644
--- a/xarray/coding/cftimeindex.py
+++ b/xarray/coding/cftimeindex.py
@@ -403,7 +403,7 @@ def shift(self, n, freq):
         from .cftime_offsets import to_offset
 
         if not isinstance(n, int):
-            raise TypeError("'n' must be an int, got {}.".format(n))
+            raise TypeError(f"'n' must be an int, got {n}.")
         if isinstance(freq, timedelta):
             return self + n * freq
         elif isinstance(freq, str):
diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py
index 44d07929e35..6d383fcf318 100644
--- a/xarray/coding/strings.py
+++ b/xarray/coding/strings.py
@@ -228,7 +228,7 @@ def shape(self):
         return self.array.shape[:-1]
 
     def __repr__(self):
-        return "%s(%r)" % (type(self).__name__, self.array)
+        return "{}({!r})".format(type(self).__name__, self.array)
 
     def __getitem__(self, key):
         # require slicing the last dimension completely
diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index ed6908117a2..0174088064b 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -286,7 +286,7 @@ def infer_datetime_units(dates):
         # NumPy casting bug: https://github.com/numpy/numpy/issues/11096
         unique_timedeltas = to_timedelta_unboxed(unique_timedeltas)
     units = _infer_time_units_from_diff(unique_timedeltas)
-    return "%s since %s" % (units, reference_date)
+    return f"{units} since {reference_date}"
 
 
 def format_cftime_datetime(date):
@@ -341,7 +341,7 @@ def cftime_to_nptime(times):
 def _cleanup_netcdf_time_units(units):
     delta, ref_date = _unpack_netcdf_time_units(units)
     try:
-        units = "%s since %s" % (delta, format_timestamp(ref_date))
+        units = "{} since {}".format(delta, format_timestamp(ref_date))
     except OutOfBoundsDatetime:
         # don't worry about reifying the units if they're out of bounds
         pass
diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
index f78502d81be..5f9c8932b6b 100644
--- a/xarray/coding/variables.py
+++ b/xarray/coding/variables.py
@@ -73,11 +73,8 @@ def __array__(self, dtype=None):
         return self.func(self.array)
 
     def __repr__(self):
-        return "%s(%r, func=%r, dtype=%r)" % (
-            type(self).__name__,
-            self.array,
-            self.func,
-            self.dtype,
+        return "{}({!r}, func={!r}, dtype={!r})".format(
+            type(self).__name__, self.array, self.func, self.dtype
         )
 
 
@@ -113,7 +110,7 @@ def unpack_for_decoding(var):
 
 def safe_setitem(dest, key, value, name=None):
     if key in dest:
-        var_str = " on variable {!r}".format(name) if name else ""
+        var_str = f" on variable {name!r}" if name else ""
         raise ValueError(
             "failed to prevent overwriting existing key {} in attrs{}. "
             "This is probably an encoding field used by xarray to describe "
diff --git a/xarray/convert.py b/xarray/convert.py
index 15d701bce21..4974a55d8e2 100644
--- a/xarray/convert.py
+++ b/xarray/convert.py
@@ -229,16 +229,14 @@ def _iris_cell_methods_to_str(cell_methods_obj):
     """
     cell_methods = []
     for cell_method in cell_methods_obj:
-        names = "".join(["{}: ".format(n) for n in cell_method.coord_names])
+        names = "".join([f"{n}: " for n in cell_method.coord_names])
         intervals = " ".join(
-            ["interval: {}".format(interval) for interval in cell_method.intervals]
-        )
-        comments = " ".join(
-            ["comment: {}".format(comment) for comment in cell_method.comments]
+            [f"interval: {interval}" for interval in cell_method.intervals]
         )
+        comments = " ".join([f"comment: {comment}" for comment in cell_method.comments])
         extra = " ".join([intervals, comments]).strip()
         if extra:
-            extra = " ({})".format(extra)
+            extra = f" ({extra})"
         cell_methods.append(names + cell_method.method + extra)
     return " ".join(cell_methods)
 
@@ -267,11 +265,11 @@ def from_iris(cube):
             dim_coord = cube.coord(dim_coords=True, dimensions=(i,))
             dims.append(_name(dim_coord))
         except iris.exceptions.CoordinateNotFoundError:
-            dims.append("dim_{}".format(i))
+            dims.append(f"dim_{i}")
 
     if len(set(dims)) != len(dims):
         duplicates = [k for k, v in Counter(dims).items() if v > 1]
-        raise ValueError("Duplicate coordinate name {}.".format(duplicates))
+        raise ValueError(f"Duplicate coordinate name {duplicates}.")
 
     coords = {}
 
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index 3e53c642bb1..1a33cb955c3 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -64,7 +64,7 @@ def align(
     copy=True,
     indexes=None,
     exclude=frozenset(),
-    fill_value=dtypes.NA
+    fill_value=dtypes.NA,
 ):
     """
     Given any number of Dataset and/or DataArray objects, returns new
@@ -294,9 +294,7 @@ def align(
                 or dim in unlabeled_dim_sizes
             ):
                 if join == "exact":
-                    raise ValueError(
-                        "indexes along dimension {!r} are not equal".format(dim)
-                    )
+                    raise ValueError(f"indexes along dimension {dim!r} are not equal")
                 index = joiner(matching_indexes)
                 joined_indexes[dim] = index
             else:
@@ -402,7 +400,7 @@ def is_alignable(obj):
         copy=copy,
         indexes=indexes,
         exclude=exclude,
-        fill_value=fill_value
+        fill_value=fill_value,
     )
 
     for position, key, aligned_obj in zip(positions, keys, aligned):
diff --git a/xarray/core/common.py b/xarray/core/common.py
index 8313247c743..45d860a1797 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -87,7 +87,7 @@ def wrapped_func(self, dim=None, skipna=None, **kwargs):
                     skipna=skipna,
                     numeric_only=numeric_only,
                     allow_lazy=True,
-                    **kwargs
+                    **kwargs,
                 )
 
         else:
@@ -167,7 +167,7 @@ def _get_axis_num(self: Any, dim: Hashable) -> int:
         try:
             return self.dims.index(dim)
         except ValueError:
-            raise ValueError("%r not found in array dimensions %r" % (dim, self.dims))
+            raise ValueError(f"{dim!r} not found in array dimensions {self.dims!r}")
 
     @property
     def sizes(self: Any) -> Mapping[Hashable, int]:
@@ -225,7 +225,7 @@ def __getattr__(self, name: str) -> Any:
                 with suppress(KeyError):
                     return source[name]
         raise AttributeError(
-            "%r object has no attribute %r" % (type(self).__name__, name)
+            "{!r} object has no attribute {!r}".format(type(self).__name__, name)
         )
 
     # This complicated two-method design boosts overall performance of simple operations
@@ -258,7 +258,9 @@ def __setattr__(self, name: str, value: Any) -> None:
         except AttributeError as e:
             # Don't accidentally shadow custom AttributeErrors, e.g.
             # DataArray.dims.setter
-            if str(e) != "%r object has no attribute %r" % (type(self).__name__, name):
+            if str(e) != "{!r} object has no attribute {!r}".format(
+                type(self).__name__, name
+            ):
                 raise
             raise AttributeError(
                 "cannot set attribute %r on a %r object. Use __setitem__ style"
@@ -479,7 +481,7 @@ def pipe(
         self,
         func: Union[Callable[..., T], Tuple[Callable[..., T], str]],
         *args,
-        **kwargs
+        **kwargs,
     ) -> T:
         """
         Apply func(self, *args, **kwargs)
@@ -735,7 +737,7 @@ def rolling(
         dim: Mapping[Hashable, int] = None,
         min_periods: int = None,
         center: bool = False,
-        **window_kwargs: int
+        **window_kwargs: int,
     ):
         """
         Rolling window object.
@@ -799,7 +801,7 @@ def rolling_exp(
         self,
         window: Mapping[Hashable, int] = None,
         window_type: str = "span",
-        **window_kwargs
+        **window_kwargs,
     ):
         """
         Exponentially-weighted moving window.
@@ -840,7 +842,7 @@ def coarsen(
         boundary: str = "exact",
         side: Union[str, Mapping[Hashable, str]] = "left",
         coord_func: str = "mean",
-        **window_kwargs: int
+        **window_kwargs: int,
     ):
         """
         Coarsen object.
@@ -910,17 +912,20 @@ def resample(
         keep_attrs: bool = None,
         loffset=None,
         restore_coord_dims: bool = None,
-        **indexer_kwargs: str
+        **indexer_kwargs: str,
     ):
         """Returns a Resample object for performing resampling operations.
 
-        Handles both downsampling and upsampling. If any intervals contain no
-        values from the original object, they will be given the value ``NaN``.
+        Handles both downsampling and upsampling. The resampled
+        dimension must be a datetime-like coordinate. If any intervals
+        contain no values from the original object, they will be given
+        the value ``NaN``.
 
         Parameters
         ----------
         indexer : {dim: freq}, optional
-            Mapping from the dimension name to resample frequency.
+            Mapping from the dimension name to resample frequency. The
+            dimension must be datetime-like.
         skipna : bool, optional
             Whether to skip missing values when aggregating in downsampling.
         closed : 'left' or 'right', optional
@@ -978,12 +983,18 @@ def resample(
           * time     (time) datetime64[ns] 1999-12-15 1999-12-16 1999-12-17 ...
 
         Limit scope of upsampling method
+
         >>> da.resample(time='1D').nearest(tolerance='1D')
         <xarray.DataArray (time: 337)>
         array([ 0.,  0., nan, ..., nan, 11., 11.])
         Coordinates:
           * time     (time) datetime64[ns] 1999-12-15 1999-12-16 ... 2000-11-15
 
+        See Also
+        --------
+        pandas.Series.resample
+        pandas.DataFrame.resample
+
         References
         ----------
 
@@ -1060,7 +1071,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False):
 
         Returns
         -------
-        Same type as caller.
+        Same xarray type as caller, with dtype float64.
 
         Examples
         --------
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 04dcbb21ff8..1393d76f283 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -110,16 +110,14 @@ def __ne__(self, other):
         return not self == other
 
     def __repr__(self):
-        return "%s(%r, %r)" % (
-            type(self).__name__,
-            list(self.input_core_dims),
-            list(self.output_core_dims),
+        return "{}({!r}, {!r})".format(
+            type(self).__name__, list(self.input_core_dims), list(self.output_core_dims)
         )
 
     def __str__(self):
         lhs = ",".join("({})".format(",".join(dims)) for dims in self.input_core_dims)
         rhs = ",".join("({})".format(",".join(dims)) for dims in self.output_core_dims)
-        return "{}->{}".format(lhs, rhs)
+        return f"{lhs}->{rhs}"
 
     def to_gufunc_string(self):
         """Create an equivalent signature string for a NumPy gufunc.
@@ -355,7 +353,7 @@ def apply_dataset_vfunc(
     dataset_join="exact",
     fill_value=_NO_FILL_VALUE,
     exclude_dims=frozenset(),
-    keep_attrs=False
+    keep_attrs=False,
 ):
     """Apply a variable level function over Dataset, dict of DataArray,
     DataArray, Variable and/or ndarray objects.
@@ -548,7 +546,7 @@ def apply_variable_ufunc(
     dask="forbidden",
     output_dtypes=None,
     output_sizes=None,
-    keep_attrs=False
+    keep_attrs=False,
 ):
     """Apply a ndarray level function over Variable and/or ndarray objects.
     """
@@ -720,7 +718,7 @@ def _apply_blockwise(
         *blockwise_args,
         dtype=dtype,
         concatenate=True,
-        new_axes=output_sizes
+        new_axes=output_sizes,
     )
 
 
@@ -743,7 +741,7 @@ def apply_array_ufunc(func, *args, dask="forbidden"):
         elif dask == "allowed":
             pass
         else:
-            raise ValueError("unknown setting for dask array handling: {}".format(dask))
+            raise ValueError(f"unknown setting for dask array handling: {dask}")
     return func(*args)
 
 
@@ -761,7 +759,7 @@ def apply_ufunc(
     kwargs: Mapping = None,
     dask: str = "forbidden",
     output_dtypes: Sequence = None,
-    output_sizes: Mapping[Any, int] = None
+    output_sizes: Mapping[Any, int] = None,
 ) -> Any:
     """Apply a vectorized function for unlabeled arrays on xarray objects.
 
@@ -1032,7 +1030,7 @@ def earth_mover_distance(first_samples,
             exclude_dims=exclude_dims,
             dataset_join=dataset_join,
             fill_value=dataset_fill_value,
-            keep_attrs=keep_attrs
+            keep_attrs=keep_attrs,
         )
     elif any(isinstance(a, DataArray) for a in args):
         return apply_dataarray_vfunc(
@@ -1041,7 +1039,7 @@ def earth_mover_distance(first_samples,
             signature=signature,
             join=join,
             exclude_dims=exclude_dims,
-            keep_attrs=keep_attrs
+            keep_attrs=keep_attrs,
         )
     elif any(isinstance(a, Variable) for a in args):
         return variables_vfunc(*args)
@@ -1175,7 +1173,7 @@ def dot(*arrays, dims=None, **kwargs):
         *arrays,
         input_core_dims=input_core_dims,
         output_core_dims=output_core_dims,
-        dask="allowed"
+        dask="allowed",
     )
     return result.transpose(*[d for d in all_dims if d in result.dims])
 
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index e98e8a72125..bcab136de8d 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -217,7 +217,7 @@ def process_subset_opt(opt, subset):
             elif opt == "minimal":
                 pass
             else:
-                raise ValueError("unexpected value for %s: %s" % (subset, opt))
+                raise ValueError(f"unexpected value for {subset}: {opt}")
         else:
             invalid_vars = [k for k in opt if k not in getattr(datasets[0], subset)]
             if invalid_vars:
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 3ef6f386201..6c0144f55a7 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -504,7 +504,7 @@ def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset:
         """
         if dim is not None and dim not in self.dims:
             raise TypeError(
-                "{} is not a dim. If supplying a ``name``, pass as a kwarg.".format(dim)
+                f"{dim} is not a dim. If supplying a ``name``, pass as a kwarg."
             )
 
         if dim is not None:
@@ -996,7 +996,7 @@ def isel(
         self,
         indexers: Mapping[Hashable, Any] = None,
         drop: bool = False,
-        **indexers_kwargs: Any
+        **indexers_kwargs: Any,
     ) -> "DataArray":
         """Return a new DataArray whose data is given by integer indexing
         along the specified dimension(s).
@@ -1016,7 +1016,7 @@ def sel(
         method: str = None,
         tolerance=None,
         drop: bool = False,
-        **indexers_kwargs: Any
+        **indexers_kwargs: Any,
     ) -> "DataArray":
         """Return a new DataArray whose data is given by selecting index
         labels along the specified dimension(s).
@@ -1044,14 +1044,14 @@ def sel(
             drop=drop,
             method=method,
             tolerance=tolerance,
-            **indexers_kwargs
+            **indexers_kwargs,
         )
         return self._from_temp_dataset(ds)
 
     def head(
         self,
         indexers: Union[Mapping[Hashable, int], int] = None,
-        **indexers_kwargs: Any
+        **indexers_kwargs: Any,
     ) -> "DataArray":
         """Return a new DataArray whose data is given by the the first `n`
         values along the specified dimension(s). Default `n` = 5
@@ -1068,7 +1068,7 @@ def head(
     def tail(
         self,
         indexers: Union[Mapping[Hashable, int], int] = None,
-        **indexers_kwargs: Any
+        **indexers_kwargs: Any,
     ) -> "DataArray":
         """Return a new DataArray whose data is given by the the last `n`
         values along the specified dimension(s). Default `n` = 5
@@ -1085,7 +1085,7 @@ def tail(
     def thin(
         self,
         indexers: Union[Mapping[Hashable, int], int] = None,
-        **indexers_kwargs: Any
+        **indexers_kwargs: Any,
     ) -> "DataArray":
         """Return a new DataArray whose data is given by each `n` value
         along the specified dimension(s). Default `n` = 5
@@ -1230,7 +1230,7 @@ def reindex(
         tolerance=None,
         copy: bool = True,
         fill_value=dtypes.NA,
-        **indexers_kwargs: Any
+        **indexers_kwargs: Any,
     ) -> "DataArray":
         """Conform this object onto the indexes of another object, filling in
         missing values with ``fill_value``. The default fill value is NaN.
@@ -1293,7 +1293,7 @@ def interp(
         method: str = "linear",
         assume_sorted: bool = False,
         kwargs: Mapping[str, Any] = None,
-        **coords_kwargs: Any
+        **coords_kwargs: Any,
     ) -> "DataArray":
         """ Multidimensional interpolation of variables.
 
@@ -1348,7 +1348,7 @@ def interp(
             method=method,
             kwargs=kwargs,
             assume_sorted=assume_sorted,
-            **coords_kwargs
+            **coords_kwargs,
         )
         return self._from_temp_dataset(ds)
 
@@ -1410,7 +1410,7 @@ def interp_like(
     def rename(
         self,
         new_name_or_name_dict: Union[Hashable, Mapping[Hashable, Hashable]] = None,
-        **names: Hashable
+        **names: Hashable,
     ) -> "DataArray":
         """Returns a new DataArray with renamed coordinates or a new name.
 
@@ -1491,7 +1491,7 @@ def expand_dims(
         self,
         dim: Union[None, Hashable, Sequence[Hashable], Mapping[Hashable, Any]] = None,
         axis=None,
-        **dim_kwargs: Any
+        **dim_kwargs: Any,
     ) -> "DataArray":
         """Return a new object with an additional axis (or axes) inserted at
         the corresponding position in the array shape. The new object is a
@@ -1545,7 +1545,7 @@ def set_index(
         indexes: Mapping[Hashable, Union[Hashable, Sequence[Hashable]]] = None,
         append: bool = False,
         inplace: bool = None,
-        **indexes_kwargs: Union[Hashable, Sequence[Hashable]]
+        **indexes_kwargs: Union[Hashable, Sequence[Hashable]],
     ) -> Optional["DataArray"]:
         """Set DataArray (multi-)indexes using one or more existing
         coordinates.
@@ -1638,7 +1638,7 @@ def reorder_levels(
         self,
         dim_order: Mapping[Hashable, Sequence[int]] = None,
         inplace: bool = None,
-        **dim_order_kwargs: Sequence[int]
+        **dim_order_kwargs: Sequence[int],
     ) -> "DataArray":
         """Rearrange index levels using input order.
 
@@ -1674,7 +1674,7 @@ def reorder_levels(
     def stack(
         self,
         dimensions: Mapping[Hashable, Sequence[Hashable]] = None,
-        **dimensions_kwargs: Sequence[Hashable]
+        **dimensions_kwargs: Sequence[Hashable],
     ) -> "DataArray":
         """
         Stack any number of existing dimensions into a single new dimension.
@@ -1821,7 +1821,7 @@ def to_unstacked_dataset(self, dim, level=0):
 
         idx = self.indexes[dim]
         if not isinstance(idx, pd.MultiIndex):
-            raise ValueError("'{}' is not a stacked coordinate".format(dim))
+            raise ValueError(f"'{dim}' is not a stacked coordinate")
 
         level_number = idx._get_level_number(level)
         variables = idx.levels[level_number]
@@ -1987,7 +1987,7 @@ def interpolate_na(
         limit: int = None,
         use_coordinate: Union[bool, str] = True,
         max_gap: int = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> "DataArray":
         """Interpolate values according to different methods.
 
@@ -2045,7 +2045,7 @@ def interpolate_na(
             limit=limit,
             use_coordinate=use_coordinate,
             max_gap=max_gap,
-            **kwargs
+            **kwargs,
         )
 
     def ffill(self, dim: Hashable, limit: int = None) -> "DataArray":
@@ -2121,7 +2121,7 @@ def reduce(
         axis: Union[None, int, Sequence[int]] = None,
         keep_attrs: bool = None,
         keepdims: bool = False,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> "DataArray":
         """Reduce this array by applying `func` along some dimension(s).
 
@@ -2503,7 +2503,7 @@ def _binary_op(
         f: Callable[..., Any],
         reflexive: bool = False,
         join: str = None,  # see xarray.align
-        **ignored_kwargs
+        **ignored_kwargs,
     ) -> Callable[..., "DataArray"]:
         @functools.wraps(f)
         def func(self, other):
@@ -2644,7 +2644,7 @@ def shift(
         self,
         shifts: Mapping[Hashable, int] = None,
         fill_value: Any = dtypes.NA,
-        **shifts_kwargs: int
+        **shifts_kwargs: int,
     ) -> "DataArray":
         """Shift this array by an offset along one or more dimensions.
 
@@ -2693,7 +2693,7 @@ def roll(
         self,
         shifts: Mapping[Hashable, int] = None,
         roll_coords: bool = None,
-        **shifts_kwargs: int
+        **shifts_kwargs: int,
     ) -> "DataArray":
         """Roll this array by an offset along one or more dimensions.
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 831881d672b..d1e9f0ee8f1 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -1639,18 +1639,16 @@ def info(self, buf=None) -> None:
         lines.append("xarray.Dataset {")
         lines.append("dimensions:")
         for name, size in self.dims.items():
-            lines.append("\t{name} = {size} ;".format(name=name, size=size))
+            lines.append(f"\t{name} = {size} ;")
         lines.append("\nvariables:")
         for name, da in self.variables.items():
             dims = ", ".join(da.dims)
-            lines.append(
-                "\t{type} {name}({dims}) ;".format(type=da.dtype, name=name, dims=dims)
-            )
+            lines.append(f"\t{da.dtype} {name}({dims}) ;")
             for k, v in da.attrs.items():
-                lines.append("\t\t{name}:{k} = {v} ;".format(name=name, k=k, v=v))
+                lines.append(f"\t\t{name}:{k} = {v} ;")
         lines.append("\n// global attributes:")
         for k, v in self.attrs.items():
-            lines.append("\t:{k} = {v} ;".format(k=k, v=v))
+            lines.append(f"\t:{k} = {v} ;")
         lines.append("}")
 
         buf.write("\n".join(lines))
@@ -1732,7 +1730,7 @@ def maybe_chunk(name, var, chunks):
                 chunks = None
             if var.ndim > 0:
                 token2 = tokenize(name, token if token else var._data)
-                name2 = "%s%s-%s" % (name_prefix, name, token2)
+                name2 = f"{name_prefix}{name}-{token2}"
                 return var.chunk(chunks, name=name2, lock=lock)
             else:
                 return var
@@ -2624,7 +2622,7 @@ def _rename_vars(self, name_dict, dims_dict):
             var.dims = tuple(dims_dict.get(dim, dim) for dim in v.dims)
             name = name_dict.get(k, k)
             if name in variables:
-                raise ValueError("the new name %r conflicts" % (name,))
+                raise ValueError(f"the new name {name!r} conflicts")
             variables[name] = var
             if k in self._coord_names:
                 coord_names.add(name)
@@ -2932,7 +2930,7 @@ def expand_dims(
             raise ValueError("lengths of dim and axis should be identical.")
         for d in dim:
             if d in self.dims:
-                raise ValueError("Dimension {dim} already exists.".format(dim=d))
+                raise ValueError(f"Dimension {d} already exists.")
             if d in self._variables and not utils.is_scalar(self._variables[d]):
                 raise ValueError(
                     "{dim} already exists as coordinate or"
@@ -3871,7 +3869,7 @@ def interpolate_na(
         limit: int = None,
         use_coordinate: Union[bool, Hashable] = True,
         max_gap: int = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> "Dataset":
         """Interpolate values according to different methods.
 
@@ -4728,7 +4726,7 @@ def diff(self, dim, n=1, label="upper"):
         if n == 0:
             return self
         if n < 0:
-            raise ValueError("order `n` must be non-negative but got {}".format(n))
+            raise ValueError(f"order `n` must be non-negative but got {n}")
 
         # prepare slices
         kwargs_start = {dim: slice(None, -1)}
@@ -5131,7 +5129,7 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
         from .variable import Variable
 
         if coord not in self.variables and coord not in self.dims:
-            raise ValueError("Coordinate {} does not exist.".format(coord))
+            raise ValueError(f"Coordinate {coord} does not exist.")
 
         coord_var = self[coord].variable
         if coord_var.ndim != 1:
@@ -5197,7 +5195,7 @@ def _integrate_one(self, coord, datetime_unit=None):
         from .variable import Variable
 
         if coord not in self.variables and coord not in self.dims:
-            raise ValueError("Coordinate {} does not exist.".format(coord))
+            raise ValueError(f"Coordinate {coord} does not exist.")
 
         coord_var = self[coord].variable
         if coord_var.ndim != 1:
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index 126168d418b..d943788c434 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -41,7 +41,7 @@ def f(*args, **kwargs):
                 try:
                     wrapped = getattr(dask_module, name)
                 except AttributeError as e:
-                    raise AttributeError("%s: requires dask >=%s" % (e, requires_dask))
+                    raise AttributeError(f"{e}: requires dask >={requires_dask}")
             else:
                 wrapped = getattr(eager_module, name)
             return wrapped(*args, **kwargs)
@@ -257,7 +257,7 @@ def _create_nan_agg_method(name, coerce_strings=False):
 
     def f(values, axis=None, skipna=None, **kwargs):
         if kwargs.pop("out", None) is not None:
-            raise TypeError("`out` is not valid for {}".format(name))
+            raise TypeError(f"`out` is not valid for {name}")
 
         values = asarray(values)
 
diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
index 0c7f073819d..520fa9b9f1b 100644
--- a/xarray/core/formatting.py
+++ b/xarray/core/formatting.py
@@ -111,7 +111,7 @@ def format_timestamp(t):
         if time_str == "00:00:00":
             return date_str
         else:
-            return "{}T{}".format(date_str, time_str)
+            return f"{date_str}T{time_str}"
 
 
 def format_timedelta(t, timedelta_format=None):
@@ -140,7 +140,7 @@ def format_item(x, timedelta_format=None, quote_strings=True):
     elif isinstance(x, (str, bytes)):
         return repr(x) if quote_strings else x
     elif isinstance(x, (float, np.float)):
-        return "{:.4}".format(x)
+        return f"{x:.4}"
     else:
         return str(x)
 
@@ -228,11 +228,11 @@ def inline_dask_repr(array):
             meta_repr = _KNOWN_TYPE_REPRS[type(meta)]
         else:
             meta_repr = type(meta).__name__
-        meta_string = ", meta={}".format(meta_repr)
+        meta_string = f", meta={meta_repr}"
     else:
         meta_string = ""
 
-    return "dask.array<chunksize={}{}>".format(chunksize, meta_string)
+    return f"dask.array<chunksize={chunksize}{meta_string}>"
 
 
 def inline_sparse_repr(array):
@@ -262,12 +262,12 @@ def summarize_variable(name, var, col_width, marker=" ", max_width=None):
     """Summarize a variable in one line, e.g., for the Dataset.__repr__."""
     if max_width is None:
         max_width = OPTIONS["display_width"]
-    first_col = pretty_print("  {} {} ".format(marker, name), col_width)
+    first_col = pretty_print(f"  {marker} {name} ", col_width)
     if var.dims:
         dims_str = "({}) ".format(", ".join(map(str, var.dims)))
     else:
         dims_str = ""
-    front_str = "{}{}{} ".format(first_col, dims_str, var.dtype)
+    front_str = f"{first_col}{dims_str}{var.dtype} "
 
     values_width = max_width - len(front_str)
     values_str = inline_variable_array_repr(var, values_width)
@@ -276,7 +276,7 @@ def summarize_variable(name, var, col_width, marker=" ", max_width=None):
 
 
 def _summarize_coord_multiindex(coord, col_width, marker):
-    first_col = pretty_print("  {} {} ".format(marker, coord.name), col_width)
+    first_col = pretty_print(f"  {marker} {coord.name} ", col_width)
     return "{}({}) MultiIndex".format(first_col, str(coord.dims[0]))
 
 
@@ -313,13 +313,13 @@ def summarize_coord(name, var, col_width):
 def summarize_attr(key, value, col_width=None):
     """Summary for __repr__ - use ``X.attrs[key]`` for full value."""
     # Indent key and add ':', then right-pad if col_width is not None
-    k_str = "    {}:".format(key)
+    k_str = f"    {key}:"
     if col_width is not None:
         k_str = pretty_print(k_str, col_width)
     # Replace tabs and newlines, so we print on one line in known width
     v_str = str(value).replace("\t", "\\t").replace("\n", "\\n")
     # Finally, truncate to the desired display width
-    return maybe_truncate("{} {}".format(k_str, v_str), OPTIONS["display_width"])
+    return maybe_truncate(f"{k_str} {v_str}", OPTIONS["display_width"])
 
 
 EMPTY_REPR = "    *empty*"
@@ -351,7 +351,7 @@ def _calculate_col_width(col_items):
 def _mapping_repr(mapping, title, summarizer, col_width=None):
     if col_width is None:
         col_width = _calculate_col_width(mapping)
-    summary = ["{}:".format(title)]
+    summary = [f"{title}:"]
     if mapping:
         summary += [summarizer(k, v, col_width) for k, v in mapping.items()]
     else:
@@ -380,19 +380,19 @@ def coords_repr(coords, col_width=None):
 def indexes_repr(indexes):
     summary = []
     for k, v in indexes.items():
-        summary.append(wrap_indent(repr(v), "{}: ".format(k)))
+        summary.append(wrap_indent(repr(v), f"{k}: "))
     return "\n".join(summary)
 
 
 def dim_summary(obj):
-    elements = ["{}: {}".format(k, v) for k, v in obj.sizes.items()]
+    elements = [f"{k}: {v}" for k, v in obj.sizes.items()]
     return ", ".join(elements)
 
 
 def unindexed_dims_repr(dims, coords):
     unindexed_dims = [d for d in dims if d not in coords]
     if unindexed_dims:
-        dims_str = ", ".join("{}".format(d) for d in unindexed_dims)
+        dims_str = ", ".join(f"{d}" for d in unindexed_dims)
         return "Dimensions without coordinates: " + dims_str
     else:
         return None
@@ -438,13 +438,13 @@ def short_data_repr(array):
         return short_numpy_repr(array)
     else:
         # internal xarray array type
-        return "[{} values with dtype={}]".format(array.size, array.dtype)
+        return f"[{array.size} values with dtype={array.dtype}]"
 
 
 def array_repr(arr):
     # used for DataArray, Variable and IndexVariable
     if hasattr(arr, "name") and arr.name is not None:
-        name_str = "{!r} ".format(arr.name)
+        name_str = f"{arr.name!r} "
     else:
         name_str = ""
 
@@ -503,7 +503,7 @@ def _diff_mapping_repr(a_mapping, b_mapping, compat, title, summarizer, col_widt
     def extra_items_repr(extra_keys, mapping, ab_side):
         extra_repr = [summarizer(k, mapping[k], col_width) for k in extra_keys]
         if extra_repr:
-            header = "{} only on the {} object:".format(title, ab_side)
+            header = f"{title} only on the {ab_side} object:"
             return [header] + extra_repr
         else:
             return []
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index c52b0dc97e6..52eb17df18d 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -298,7 +298,7 @@ def __init__(
                 )
             group = obj[group]
             if len(group) == 0:
-                raise ValueError("{} must not be empty".format(group.name))
+                raise ValueError(f"{group.name} must not be empty")
 
             if group.name not in obj.coords and group.name in obj.dims:
                 # DummyGroups should not appear on groupby results
@@ -321,7 +321,7 @@ def __init__(
         full_index = None
 
         if bins is not None:
-            if np.isnan(bins).all():
+            if duck_array_ops.isnull(bins).all():
                 raise ValueError("All bin edges are NaN.")
             binned = pd.cut(group.values, bins, **cut_kwargs)
             new_dim_name = group.name + "_bins"
@@ -417,7 +417,7 @@ def __iter__(self):
         return zip(self._unique_coord.values, self._iter_grouped())
 
     def __repr__(self):
-        return "%s, grouped over %r \n%r groups with labels %s." % (
+        return "{}, grouped over {!r} \n{!r} groups with labels {}.".format(
             self.__class__.__name__,
             self._unique_coord.name,
             self._unique_coord.size,
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index 010c4818ca5..3d2e634eaa8 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -156,7 +156,7 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No
 
             # GH2619. Raise a KeyError if nothing is chosen
             if indexer.dtype.kind == "b" and indexer.sum() == 0:
-                raise KeyError("{} not found".format(label))
+                raise KeyError(f"{label} not found")
 
     elif isinstance(label, tuple) and isinstance(index, pd.MultiIndex):
         if _is_nested_tuple(label):
@@ -352,7 +352,7 @@ class BasicIndexer(ExplicitIndexer):
 
     def __init__(self, key):
         if not isinstance(key, tuple):
-            raise TypeError("key must be a tuple: {!r}".format(key))
+            raise TypeError(f"key must be a tuple: {key!r}")
 
         new_key = []
         for k in key:
@@ -384,7 +384,7 @@ class OuterIndexer(ExplicitIndexer):
 
     def __init__(self, key):
         if not isinstance(key, tuple):
-            raise TypeError("key must be a tuple: {!r}".format(key))
+            raise TypeError(f"key must be a tuple: {key!r}")
 
         new_key = []
         for k in key:
@@ -429,7 +429,7 @@ class VectorizedIndexer(ExplicitIndexer):
 
     def __init__(self, key):
         if not isinstance(key, tuple):
-            raise TypeError("key must be a tuple: {!r}".format(key))
+            raise TypeError(f"key must be a tuple: {key!r}")
 
         new_key = []
         ndim = None
@@ -574,7 +574,9 @@ def __setitem__(self, key, value):
         self.array[full_key] = value
 
     def __repr__(self):
-        return "%s(array=%r, key=%r)" % (type(self).__name__, self.array, self.key)
+        return "{}(array={!r}, key={!r})".format(
+            type(self).__name__, self.array, self.key
+        )
 
 
 class LazilyVectorizedIndexedArray(ExplicitlyIndexedNDArrayMixin):
@@ -625,7 +627,9 @@ def __setitem__(self, key, value):
         )
 
     def __repr__(self):
-        return "%s(array=%r, key=%r)" % (type(self).__name__, self.array, self.key)
+        return "{}(array={!r}, key={!r})".format(
+            type(self).__name__, self.array, self.key
+        )
 
 
 def _wrap_numpy_scalars(array):
@@ -847,7 +851,7 @@ def decompose_indexer(
         return _decompose_vectorized_indexer(indexer, shape, indexing_support)
     if isinstance(indexer, (BasicIndexer, OuterIndexer)):
         return _decompose_outer_indexer(indexer, shape, indexing_support)
-    raise TypeError("unexpected key type: {}".format(indexer))
+    raise TypeError(f"unexpected key type: {indexer}")
 
 
 def _decompose_slice(key, size):
@@ -1413,7 +1417,9 @@ def transpose(self, order) -> pd.Index:
         return self.array  # self.array should be always one-dimensional
 
     def __repr__(self) -> str:
-        return "%s(array=%r, dtype=%r)" % (type(self).__name__, self.array, self.dtype)
+        return "{}(array={!r}, dtype={!r})".format(
+            type(self).__name__, self.array, self.dtype
+        )
 
     def copy(self, deep: bool = True) -> "PandasIndexAdapter":
         # Not the same as just writing `self.array.copy(deep=deep)`, as
diff --git a/xarray/core/merge.py b/xarray/core/merge.py
index 012898ac18b..db5ef9531df 100644
--- a/xarray/core/merge.py
+++ b/xarray/core/merge.py
@@ -144,7 +144,9 @@ def unique_variable(
 
 def _assert_compat_valid(compat):
     if compat not in _VALID_COMPAT:
-        raise ValueError("compat=%r invalid: must be %s" % (compat, set(_VALID_COMPAT)))
+        raise ValueError(
+            "compat={!r} invalid: must be {}".format(compat, set(_VALID_COMPAT))
+        )
 
 
 MergeElement = Tuple[Variable, Optional[pd.Index]]
diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index df36c98f94c..3fe2c254b0f 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -16,7 +16,7 @@
 def _validate_axis(data, axis):
     ndim = data.ndim
     if not -ndim <= axis < ndim:
-        raise IndexError("axis %r out of bounds [-%r, %r)" % (axis, ndim, ndim))
+        raise IndexError(f"axis {axis!r} out of bounds [-{ndim}, {ndim})")
     if axis < 0:
         axis += ndim
     return axis
@@ -190,9 +190,9 @@ def _rolling_window(a, window, axis=-1):
     a = np.swapaxes(a, axis, -1)
 
     if window < 1:
-        raise ValueError("`window` must be at least 1. Given : {}".format(window))
+        raise ValueError(f"`window` must be at least 1. Given : {window}")
     if window > a.shape[-1]:
-        raise ValueError("`window` is too long. Given : {}".format(window))
+        raise ValueError(f"`window` is too long. Given : {window}")
 
     shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
     strides = a.strides + (a.strides[-1],)
diff --git a/xarray/core/options.py b/xarray/core/options.py
index c5086268f48..2f464a33fb1 100644
--- a/xarray/core/options.py
+++ b/xarray/core/options.py
@@ -125,7 +125,7 @@ def __init__(self, **kwargs):
                     % (k, set(OPTIONS))
                 )
             if k in _VALIDATORS and not _VALIDATORS[k](v):
-                raise ValueError("option %r given an invalid value: %r" % (k, v))
+                raise ValueError(f"option {k!r} given an invalid value: {v!r}")
             self.old[k] = OPTIONS[k]
         self._apply_update(kwargs)
 
diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
index 48bb9ccfc3d..fbb5ef94ca2 100644
--- a/xarray/core/parallel.py
+++ b/xarray/core/parallel.py
@@ -222,9 +222,8 @@ def _wrapper(func, obj, to_array, args, kwargs):
     indexes.update({k: template.indexes[k] for k in new_indexes})
 
     graph: Dict[Any, Any] = {}
-    gname = "%s-%s" % (
-        dask.utils.funcname(func),
-        dask.base.tokenize(dataset, args, kwargs),
+    gname = "{}-{}".format(
+        dask.utils.funcname(func), dask.base.tokenize(dataset, args, kwargs)
     )
 
     # map dims to list of chunk indexes
@@ -253,7 +252,7 @@ def _wrapper(func, obj, to_array, args, kwargs):
                 for dim in variable.dims:
                     chunk = chunk[chunk_index_dict[dim]]
 
-                chunk_variable_task = ("%s-%s" % (gname, chunk[0]),) + v
+                chunk_variable_task = (f"{gname}-{chunk[0]}",) + v
                 graph[chunk_variable_task] = (
                     tuple,
                     [variable.dims, chunk, variable.attrs],
@@ -272,7 +271,7 @@ def _wrapper(func, obj, to_array, args, kwargs):
 
                 subset = variable.isel(subsetter)
                 chunk_variable_task = (
-                    "%s-%s" % (gname, dask.base.tokenize(subset)),
+                    "{}-{}".format(gname, dask.base.tokenize(subset)),
                 ) + v
                 graph[chunk_variable_task] = (
                     tuple,
@@ -300,7 +299,7 @@ def _wrapper(func, obj, to_array, args, kwargs):
         for name, variable in template.variables.items():
             if name in indexes:
                 continue
-            gname_l = "%s-%s" % (gname, name)
+            gname_l = f"{gname}-{name}"
             var_key_map[name] = gname_l
 
             key: Tuple[Any, ...] = (gname_l,)
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index c9c5866654d..6befe0b5efc 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -42,7 +42,7 @@ def _check_inplace(inplace: Optional[bool]) -> None:
 
 
 def alias_message(old_name: str, new_name: str) -> str:
-    return "%s has been deprecated. Use %s instead." % (old_name, new_name)
+    return f"{old_name} has been deprecated. Use {new_name} instead."
 
 
 def alias_warning(old_name: str, new_name: str, stacklevel: int = 3) -> None:
@@ -393,7 +393,7 @@ def __contains__(self, key: object) -> bool:
         return key in self.mapping
 
     def __repr__(self) -> str:
-        return "%s(%r)" % (type(self).__name__, self.mapping)
+        return "{}({!r})".format(type(self).__name__, self.mapping)
 
 
 def FrozenDict(*args, **kwargs) -> Frozen:
@@ -430,7 +430,7 @@ def __contains__(self, key: object) -> bool:
         return key in self.mapping
 
     def __repr__(self) -> str:
-        return "%s(%r)" % (type(self).__name__, self.mapping)
+        return "{}({!r})".format(type(self).__name__, self.mapping)
 
 
 class OrderedSet(MutableSet[T]):
@@ -476,7 +476,7 @@ def update(self, values: AbstractSet[T]) -> None:
         self |= values  # type: ignore
 
     def __repr__(self) -> str:
-        return "%s(%r)" % (type(self).__name__, list(self))
+        return "{}({!r})".format(type(self).__name__, list(self))
 
 
 class NdimSizeLenMixin:
@@ -524,7 +524,7 @@ def __getitem__(self: Any, key):
         return self.array[key]
 
     def __repr__(self: Any) -> str:
-        return "%s(array=%r)" % (type(self).__name__, self.array)
+        return "{}(array={!r})".format(type(self).__name__, self.array)
 
 
 class ReprObject:
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index b17597df580..37672cd82d9 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -113,7 +113,7 @@ def as_variable(obj, name=None) -> "Union[Variable, IndexVariable]":
     elif isinstance(obj, (pd.Index, IndexVariable)) and obj.name is not None:
         obj = Variable(obj.name, obj)
     elif isinstance(obj, (set, dict)):
-        raise TypeError("variable %r has invalid type %r" % (name, type(obj)))
+        raise TypeError("variable {!r} has invalid type {!r}".format(name, type(obj)))
     elif name is not None:
         data = as_compatible_data(obj)
         if data.ndim != 1:
@@ -658,7 +658,7 @@ def _broadcast_indexes_vectorized(self, key):
         try:
             variables = _broadcast_compat_variables(*variables)
         except ValueError:
-            raise IndexError("Dimensions of indexers mismatch: {}".format(key))
+            raise IndexError(f"Dimensions of indexers mismatch: {key}")
 
         out_key = [variable.data for variable in variables]
         out_dims = tuple(out_dims_set)
@@ -972,7 +972,7 @@ def chunk(self, chunks=None, name=None, lock=False):
     def isel(
         self: VariableType,
         indexers: Mapping[Hashable, Any] = None,
-        **indexers_kwargs: Any
+        **indexers_kwargs: Any,
     ) -> VariableType:
         """Return a new array indexed along the specified dimension(s).
 
@@ -1417,7 +1417,7 @@ def reduce(
         keep_attrs=None,
         keepdims=False,
         allow_lazy=False,
-        **kwargs
+        **kwargs,
     ):
         """Reduce this array by applying `func` along some dimension(s).
 
@@ -1803,7 +1803,7 @@ def coarsen(self, windows, func, boundary="exact", side="left"):
             name = func
             func = getattr(duck_array_ops, name, None)
             if func is None:
-                raise NameError("{} is not a valid method.".format(name))
+                raise NameError(f"{name} is not a valid method.")
         return type(self)(self.dims, func(reshaped, axis=axes), self._attrs)
 
     def _coarsen_reshape(self, windows, boundary, side):
@@ -1822,7 +1822,7 @@ def _coarsen_reshape(self, windows, boundary, side):
 
         for d, window in windows.items():
             if window <= 0:
-                raise ValueError("window must be > 0. Given {}".format(window))
+                raise ValueError(f"window must be > 0. Given {window}")
 
         variable = self
         for d, window in windows.items():
@@ -2246,7 +2246,7 @@ def assert_unique_multiindex_level_names(variables):
             idx_level_names = var.to_index_variable().level_names
             if idx_level_names is not None:
                 for n in idx_level_names:
-                    level_names[n].append("%r (%s)" % (n, var_name))
+                    level_names[n].append(f"{n!r} ({var_name})")
             if idx_level_names:
                 all_level_names.update(idx_level_names)
 
diff --git a/xarray/plot/dataset_plot.py b/xarray/plot/dataset_plot.py
index 176f0c504f6..ea037c1a2c2 100644
--- a/xarray/plot/dataset_plot.py
+++ b/xarray/plot/dataset_plot.py
@@ -19,7 +19,7 @@
 
 def _infer_meta_data(ds, x, y, hue, hue_style, add_guide):
     dvars = set(ds.variables.keys())
-    error_msg = " must be one of ({0:s})".format(", ".join(dvars))
+    error_msg = " must be one of ({:s})".format(", ".join(dvars))
 
     if x not in dvars:
         raise ValueError("x" + error_msg)
@@ -148,7 +148,7 @@ def _parse_size(data, norm):
     return pd.Series(sizes)
 
 
-class _Dataset_PlotMethods(object):
+class _Dataset_PlotMethods:
     """
     Enables use of xarray.plot functions as attributes on a Dataset.
     For example, Dataset.plot.scatter
@@ -243,7 +243,7 @@ def _dsplot(plotfunc):
     """
 
     # Build on the original docstring
-    plotfunc.__doc__ = "%s\n%s" % (plotfunc.__doc__, commondoc)
+    plotfunc.__doc__ = f"{plotfunc.__doc__}\n{commondoc}"
 
     @functools.wraps(plotfunc)
     def newplotfunc(
@@ -275,7 +275,7 @@ def newplotfunc(
         colors=None,
         extend=None,
         cmap=None,
-        **kwargs
+        **kwargs,
     ):
 
         _is_facetgrid = kwargs.pop("_is_facetgrid", False)
@@ -310,9 +310,9 @@ def newplotfunc(
                 )
 
             # subset that can be passed to scatter, hist2d
-            cmap_params_subset = dict(
-                (vv, cmap_params[vv]) for vv in ["vmin", "vmax", "norm", "cmap"]
-            )
+            cmap_params_subset = {
+                vv: cmap_params[vv] for vv in ["vmin", "vmax", "norm", "cmap"]
+            }
 
         else:
             cmap_params_subset = {}
@@ -325,7 +325,7 @@ def newplotfunc(
             hue_style=hue_style,
             ax=ax,
             cmap_params=cmap_params_subset,
-            **kwargs
+            **kwargs,
         )
 
         if _is_facetgrid:  # if this was called from Facetgrid.map_dataset,
@@ -380,7 +380,7 @@ def plotmethod(
         colors=None,
         extend=None,
         cmap=None,
-        **kwargs
+        **kwargs,
     ):
         """
         The method should have the same signature as the function.
@@ -436,7 +436,7 @@ def scatter(ds, x, y, ax, **kwargs):
                     data["x"].where(mask, drop=True).values.flatten(),
                     data["y"].where(mask, drop=True).values.flatten(),
                     label=label,
-                    **kwargs
+                    **kwargs,
                 )
             )
 
diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
index 7938f9b027b..a288f195e32 100644
--- a/xarray/plot/plot.py
+++ b/xarray/plot/plot.py
@@ -124,7 +124,7 @@ def plot(
     hue=None,
     rtol=0.01,
     subplot_kws=None,
-    **kwargs
+    **kwargs,
 ):
     """
     Default plot of DataArray using matplotlib.pyplot.
@@ -226,7 +226,7 @@ def line(
     ylim=None,
     add_legend=True,
     _labels=True,
-    **kwargs
+    **kwargs,
 ):
     """
     Line plot of DataArray index against values
@@ -404,7 +404,7 @@ def hist(
     yticks=None,
     xlim=None,
     ylim=None,
-    **kwargs
+    **kwargs,
 ):
     """
     Histogram of DataArray
@@ -584,7 +584,7 @@ def _plot2d(plotfunc):
     """
 
     # Build on the original docstring
-    plotfunc.__doc__ = "%s\n%s" % (plotfunc.__doc__, commondoc)
+    plotfunc.__doc__ = f"{plotfunc.__doc__}\n{commondoc}"
 
     @functools.wraps(plotfunc)
     def newplotfunc(
@@ -621,7 +621,7 @@ def newplotfunc(
         xlim=None,
         ylim=None,
         norm=None,
-        **kwargs
+        **kwargs,
     ):
         # All 2d plots in xarray share this function signature.
         # Method signature below should be consistent.
@@ -734,7 +734,7 @@ def newplotfunc(
             vmin=cmap_params["vmin"],
             vmax=cmap_params["vmax"],
             norm=cmap_params["norm"],
-            **kwargs
+            **kwargs,
         )
 
         # Label the plot with metadata
@@ -808,7 +808,7 @@ def plotmethod(
         xlim=None,
         ylim=None,
         norm=None,
-        **kwargs
+        **kwargs,
     ):
         """
         The method should have the same signature as the function.
diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
index e070ea16855..3b739197fea 100644
--- a/xarray/plot/utils.py
+++ b/xarray/plot/utils.py
@@ -302,7 +302,7 @@ def _infer_xy_labels_3d(darray, x, y, rgb):
         )
     for label in not_none:
         if label not in darray.dims:
-            raise ValueError("%r is not a dimension" % (label,))
+            raise ValueError(f"{label!r} is not a dimension")
 
     # Then calculate rgb dimension if certain and check validity
     could_be_color = [
@@ -693,7 +693,7 @@ def _process_cmap_cbar_kwargs(
     colors=None,
     cbar_kwargs: Union[Iterable[Tuple[str, Any]], Mapping[str, Any]] = None,
     levels=None,
-    **kwargs
+    **kwargs,
 ):
     """
     Parameters
diff --git a/xarray/testing.py b/xarray/testing.py
index eeff4d932b4..5c3ca8a3cca 100644
--- a/xarray/testing.py
+++ b/xarray/testing.py
@@ -120,7 +120,7 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True):
     if isinstance(a, Variable):
         assert a.dims == b.dims
         allclose = _data_allclose_or_equiv(a.values, b.values, **kwargs)
-        assert allclose, "{}\n{}".format(a.values, b.values)
+        assert allclose, f"{a.values}\n{b.values}"
     elif isinstance(a, DataArray):
         assert_allclose(a.variable, b.variable, **kwargs)
         assert set(a.coords) == set(b.coords)
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index acf8b67effa..88476e5e730 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -44,7 +44,7 @@ def _importorskip(modname, minversion=None):
                 raise ImportError("Minimum version not satisfied")
     except ImportError:
         has = False
-    func = pytest.mark.skipif(not has, reason="requires {}".format(modname))
+    func = pytest.mark.skipif(not has, reason=f"requires {modname}")
     return has, func
 
 
@@ -109,7 +109,7 @@ def raises_regex(error, pattern):
     message = str(excinfo.value)
     if not re.search(pattern, message):
         raise AssertionError(
-            "exception %r did not match pattern %r" % (excinfo.value, pattern)
+            f"exception {excinfo.value!r} did not match pattern {pattern!r}"
         )
 
 
diff --git a/xarray/tests/test_accessor_str.py b/xarray/tests/test_accessor_str.py
index 5cd815eebf0..a987d302202 100644
--- a/xarray/tests/test_accessor_str.py
+++ b/xarray/tests/test_accessor_str.py
@@ -503,7 +503,7 @@ def test_slice(dtype):
             expected = xr.DataArray([s[start:stop:step] for s in arr.values])
             assert_equal(result, expected.astype(dtype))
         except IndexError:
-            print("failed on %s:%s:%s" % (start, stop, step))
+            print(f"failed on {start}:{stop}:{step}")
             raise
 
 
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index b5421a6bc9f..4bdebe73050 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -92,7 +92,7 @@ def open_example_mfdataset(names, *args, **kwargs):
     return open_mfdataset(
         [os.path.join(os.path.dirname(__file__), "data", name) for name in names],
         *args,
-        **kwargs
+        **kwargs,
     )
 
 
@@ -986,7 +986,7 @@ def test_multiindex_not_implemented(self):
 @contextlib.contextmanager
 def create_tmp_file(suffix=".nc", allow_cleanup_failure=False):
     temp_dir = tempfile.mkdtemp()
-    path = os.path.join(temp_dir, "temp-%s%s" % (next(_counter), suffix))
+    path = os.path.join(temp_dir, "temp-{}{}".format(next(_counter), suffix))
     try:
         yield path
     finally:
@@ -2490,7 +2490,7 @@ def test_open_mfdataset_list_attr():
             f.createDimension("x", 3)
             vlvar = f.createVariable("test_var", np.int32, ("x"))
             # here create an attribute as a list
-            vlvar.test_attr = ["string a {}".format(i), "string b {}".format(i)]
+            vlvar.test_attr = [f"string a {i}", f"string b {i}"]
             vlvar[:] = np.arange(3)
             f.close()
         ds1 = open_dataset(nfiles[0])
@@ -3534,7 +3534,7 @@ def create_tmp_geotiff(
             crs=crs,
             transform=transform,
             dtype=rasterio.float32,
-            **open_kwargs
+            **open_kwargs,
         ) as s:
             for attr, val in additional_attrs.items():
                 setattr(s, attr, val)
@@ -4276,7 +4276,7 @@ def test_use_cftime_standard_calendar_default_out_of_range(calendar, units_year)
 
     x = [0, 1]
     time = [0, 720]
-    units = "days since {}-01-01".format(units_year)
+    units = f"days since {units_year}-01-01"
     original = DataArray(x, [("time", time)], name="x")
     original = original.to_dataset()
     for v in ["x", "time"]:
@@ -4307,7 +4307,7 @@ def test_use_cftime_true(calendar, units_year):
 
     x = [0, 1]
     time = [0, 720]
-    units = "days since {}-01-01".format(units_year)
+    units = f"days since {units_year}-01-01"
     original = DataArray(x, [("time", time)], name="x")
     original = original.to_dataset()
     for v in ["x", "time"]:
@@ -4365,7 +4365,7 @@ def test_use_cftime_false_standard_calendar_in_range(calendar):
 def test_use_cftime_false_standard_calendar_out_of_range(calendar, units_year):
     x = [0, 1]
     time = [0, 720]
-    units = "days since {}-01-01".format(units_year)
+    units = f"days since {units_year}-01-01"
     original = DataArray(x, [("time", time)], name="x")
     original = original.to_dataset()
     for v in ["x", "time"]:
@@ -4384,7 +4384,7 @@ def test_use_cftime_false_standard_calendar_out_of_range(calendar, units_year):
 def test_use_cftime_false_nonstandard_calendar(calendar, units_year):
     x = [0, 1]
     time = [0, 720]
-    units = "days since {}".format(units_year)
+    units = f"days since {units_year}"
     original = DataArray(x, [("time", time)], name="x")
     original = original.to_dataset()
     for v in ["x", "time"]:
diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py
index 3be46b68fc4..142769dbbe7 100644
--- a/xarray/tests/test_cftime_offsets.py
+++ b/xarray/tests/test_cftime_offsets.py
@@ -202,7 +202,7 @@ def test_to_offset_annual(month_label, month_int, multiple, offset_str):
     if month_label:
         freq = "-".join([freq, month_label])
     if multiple:
-        freq = "{}".format(multiple) + freq
+        freq = f"{multiple}{freq}"
     result = to_offset(freq)
 
     if multiple and month_int:
@@ -230,7 +230,7 @@ def test_to_offset_quarter(month_label, month_int, multiple, offset_str):
     if month_label:
         freq = "-".join([freq, month_label])
     if multiple:
-        freq = "{}".format(multiple) + freq
+        freq = f"{multiple}{freq}"
     result = to_offset(freq)
 
     if multiple and month_int:
diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py
index 10cdd03459c..c9d10ba4eb0 100644
--- a/xarray/tests/test_coding_strings.py
+++ b/xarray/tests/test_coding_strings.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 from contextlib import suppress
 
 import numpy as np
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 45f2ea6e28a..021d76e2b11 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -455,7 +455,7 @@ def test_decode_360_day_calendar():
     calendar = "360_day"
     # ensure leap year doesn't matter
     for year in [2010, 2011, 2012, 2013, 2014]:
-        units = "days since {}-01-01".format(year)
+        units = f"days since {year}-01-01"
         num_times = np.arange(100)
 
         if cftime.__name__ == "cftime":
@@ -884,7 +884,7 @@ def test_use_cftime_default_standard_calendar_out_of_range(calendar, units_year)
     from cftime import num2date
 
     numerical_dates = [0, 1]
-    units = "days since {}-01-01".format(units_year)
+    units = f"days since {units_year}-01-01"
     expected = num2date(
         numerical_dates, units, calendar, only_use_cftime_datetimes=True
     )
@@ -901,7 +901,7 @@ def test_use_cftime_default_non_standard_calendar(calendar, units_year):
     from cftime import num2date
 
     numerical_dates = [0, 1]
-    units = "days since {}-01-01".format(units_year)
+    units = f"days since {units_year}-01-01"
     expected = num2date(
         numerical_dates, units, calendar, only_use_cftime_datetimes=True
     )
@@ -919,7 +919,7 @@ def test_use_cftime_true(calendar, units_year):
     from cftime import num2date
 
     numerical_dates = [0, 1]
-    units = "days since {}-01-01".format(units_year)
+    units = f"days since {units_year}-01-01"
     expected = num2date(
         numerical_dates, units, calendar, only_use_cftime_datetimes=True
     )
@@ -946,7 +946,7 @@ def test_use_cftime_false_standard_calendar_in_range(calendar):
 @pytest.mark.parametrize("units_year", [1500, 2500])
 def test_use_cftime_false_standard_calendar_out_of_range(calendar, units_year):
     numerical_dates = [0, 1]
-    units = "days since {}-01-01".format(units_year)
+    units = f"days since {units_year}-01-01"
     with pytest.raises(OutOfBoundsDatetime):
         decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False)
 
@@ -955,6 +955,6 @@ def test_use_cftime_false_standard_calendar_out_of_range(calendar, units_year):
 @pytest.mark.parametrize("units_year", [1500, 2000, 2500])
 def test_use_cftime_false_non_standard_calendar(calendar, units_year):
     numerical_dates = [0, 1]
-    units = "days since {}-01-01".format(units_year)
+    units = f"days since {units_year}-01-01"
     with pytest.raises(OutOfBoundsDatetime):
         decode_cf_datetime(numerical_dates, units, calendar, use_cftime=False)
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index e0058f4001d..383427b479b 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -25,7 +25,7 @@
 
 def assert_identical(a, b):
     if hasattr(a, "identical"):
-        msg = "not identical:\n%r\n%r" % (a, b)
+        msg = f"not identical:\n{a!r}\n{b!r}"
         assert a.identical(b), msg
     else:
         assert_array_equal(a, b)
@@ -376,7 +376,7 @@ def func(*x):
             *objects,
             input_core_dims=[[dim]] * len(objects),
             output_core_dims=[[dim]],
-            exclude_dims={dim}
+            exclude_dims={dim},
         )
         if isinstance(result, (xr.Dataset, xr.DataArray)):
             # note: this will fail if dim is not a coordinate on any input
diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index 5d80abb4661..42b2a679347 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import contextlib
 import warnings
 
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 1398e936f37..d05a02ae705 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -2516,7 +2516,7 @@ def test_groupby_reduce_attrs(self):
 
         for shortcut in [True, False]:
             for keep_attrs in [True, False]:
-                print("shortcut=%s, keep_attrs=%s" % (shortcut, keep_attrs))
+                print(f"shortcut={shortcut}, keep_attrs={keep_attrs}")
                 actual = array.groupby("abc").reduce(
                     np.mean, keep_attrs=keep_attrs, shortcut=shortcut
                 )
@@ -4160,7 +4160,7 @@ def test_rolling_wrapped_bottleneck(da, name, center, min_periods):
     # Test all bottleneck functions
     rolling_obj = da.rolling(time=7, min_periods=min_periods)
 
-    func_name = "move_{}".format(name)
+    func_name = f"move_{name}"
     actual = getattr(rolling_obj, name)()
     expected = getattr(bn, func_name)(
         da.values, window=7, axis=1, min_count=min_periods
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index a5c9920f1d9..dce417f27f9 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pickle
 import sys
 import warnings
@@ -5391,7 +5390,7 @@ def test_rolling_wrapped_bottleneck(ds, name, center, min_periods, key):
     # Test all bottleneck functions
     rolling_obj = ds.rolling(time=7, min_periods=min_periods)
 
-    func_name = "move_{}".format(name)
+    func_name = f"move_{name}"
     actual = getattr(rolling_obj, name)()
     if key == "z1":  # z1 does not depend on 'Time' axis. Stored as it is.
         expected = ds[key]
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index 62ea19be97b..eb073a14aae 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -355,7 +355,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
             # Numpy < 1.13 does not handle object-type array.
             try:
                 if skipna:
-                    expected = getattr(np, "nan{}".format(func))(da.values, axis=axis)
+                    expected = getattr(np, f"nan{func}")(da.values, axis=axis)
                 else:
                     expected = getattr(np, func)(da.values, axis=axis)
 
@@ -400,7 +400,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
         actual = getattr(da, func)(skipna=skipna)
         if dask:
             assert isinstance(da.data, dask_array_type)
-        expected = getattr(np, "nan{}".format(func))(da.values)
+        expected = getattr(np, f"nan{func}")(da.values)
         if actual.dtype == object:
             assert actual.values == np.array(expected)
         else:
diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py
index c518f528537..9a1f0bbd975 100644
--- a/xarray/tests/test_formatting.py
+++ b/xarray/tests/test_formatting.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import sys
 from textwrap import dedent
 
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 957d4800c40..be494c4ae2b 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -276,4 +276,20 @@ def test_groupby_grouping_errors():
         dataset.to_array().groupby(dataset.foo * np.nan)
 
 
+def test_groupby_bins_timeseries():
+    ds = xr.Dataset()
+    ds["time"] = xr.DataArray(
+        pd.date_range("2010-08-01", "2010-08-15", freq="15min"), dims="time"
+    )
+    ds["val"] = xr.DataArray(np.ones(*ds["time"].shape), dims="time")
+    time_bins = pd.date_range(start="2010-08-01", end="2010-08-15", freq="24H")
+    actual = ds.groupby_bins("time", time_bins).sum()
+    expected = xr.DataArray(
+        96 * np.ones((14,)),
+        dims=["time_bins"],
+        coords={"time_bins": pd.cut(time_bins, time_bins).categories},
+    ).to_dataset(name="val")
+    assert_identical(actual, expected)
+
+
 # TODO: move other groupby tests from test_dataset and test_dataarray over here
diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
index e3b29b86e4d..3ac45a9720f 100644
--- a/xarray/tests/test_plot.py
+++ b/xarray/tests/test_plot.py
@@ -1544,7 +1544,7 @@ def test_names_appear_somewhere(self):
         self.darray.name = "testvar"
         self.g.map_dataarray(xplt.contourf, "x", "y")
         for k, ax in zip("abc", self.g.axes.flat):
-            assert "z = {}".format(k) == ax.get_title()
+            assert f"z = {k}" == ax.get_title()
 
         alltxt = text_in_fig()
         assert self.darray.name in alltxt
diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
index 4a0c6c58619..bd26b96f6d4 100644
--- a/xarray/tests/test_sparse.py
+++ b/xarray/tests/test_sparse.py
@@ -65,7 +65,7 @@ def __call__(self, obj):
         return getattr(obj, self.meth)(*self.args, **self.kwargs)
 
     def __repr__(self):
-        return "obj.{}(*{}, **{})".format(self.meth, self.args, self.kwargs)
+        return f"obj.{self.meth}(*{self.args}, **{self.kwargs})"
 
 
 @pytest.mark.parametrize(
diff --git a/xarray/tests/test_tutorial.py b/xarray/tests/test_tutorial.py
index 9bf84c9edb0..a2eb159f624 100644
--- a/xarray/tests/test_tutorial.py
+++ b/xarray/tests/test_tutorial.py
@@ -17,9 +17,9 @@ def setUp(self):
             os.sep.join(("~", ".xarray_tutorial_data", self.testfile))
         )
         with suppress(OSError):
-            os.remove("{}.nc".format(self.testfilepath))
+            os.remove(f"{self.testfilepath}.nc")
         with suppress(OSError):
-            os.remove("{}.md5".format(self.testfilepath))
+            os.remove(f"{self.testfilepath}.md5")
 
     def test_download_from_github(self):
         ds = tutorial.open_dataset(self.testfile).load()
diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
new file mode 100644
index 00000000000..9d14104bb50
--- /dev/null
+++ b/xarray/tests/test_units.py
@@ -0,0 +1,1634 @@
+import operator
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import xarray as xr
+from xarray.core import formatting
+from xarray.core.npcompat import IS_NEP18_ACTIVE
+
+pint = pytest.importorskip("pint")
+DimensionalityError = pint.errors.DimensionalityError
+
+
+unit_registry = pint.UnitRegistry()
+Quantity = unit_registry.Quantity
+
+pytestmark = [
+    pytest.mark.skipif(
+        not IS_NEP18_ACTIVE, reason="NUMPY_EXPERIMENTAL_ARRAY_FUNCTION is not enabled"
+    ),
+    # TODO: remove this once pint has a released version with __array_function__
+    pytest.mark.skipif(
+        not hasattr(unit_registry.Quantity, "__array_function__"),
+        reason="pint does not implement __array_function__ yet",
+    ),
+    # pytest.mark.filterwarnings("ignore:::pint[.*]"),
+]
+
+
+def array_extract_units(obj):
+    raw = obj.data if hasattr(obj, "data") else obj
+    try:
+        return raw.units
+    except AttributeError:
+        return None
+
+
+def array_strip_units(array):
+    try:
+        return array.magnitude
+    except AttributeError:
+        return array
+
+
+def array_attach_units(data, unit, convert_from=None):
+    try:
+        unit, convert_from = unit
+    except TypeError:
+        pass
+
+    if isinstance(data, Quantity):
+        if not convert_from:
+            raise ValueError(
+                "cannot attach unit {unit} to quantity ({data.units})".format(
+                    unit=unit, data=data
+                )
+            )
+        elif isinstance(convert_from, unit_registry.Unit):
+            data = data.magnitude
+        elif convert_from is True:  # intentionally accept exactly true
+            if data.check(unit):
+                convert_from = data.units
+                data = data.magnitude
+            else:
+                raise ValueError(
+                    "cannot convert quantity ({data.units}) to {unit}".format(
+                        unit=unit, data=data
+                    )
+                )
+        else:
+            raise ValueError(
+                "cannot convert from invalid unit {convert_from}".format(
+                    convert_from=convert_from
+                )
+            )
+
+    # to make sure we also encounter the case of "equal if converted"
+    if convert_from is not None:
+        quantity = (data * convert_from).to(
+            unit
+            if isinstance(unit, unit_registry.Unit)
+            else unit_registry.dimensionless
+        )
+    else:
+        try:
+            quantity = data * unit
+        except np.core._exceptions.UFuncTypeError:
+            if unit != 1:
+                raise
+
+            quantity = data
+
+    return quantity
+
+
+def extract_units(obj):
+    if isinstance(obj, xr.Dataset):
+        vars_units = {
+            name: array_extract_units(value) for name, value in obj.data_vars.items()
+        }
+        coords_units = {
+            name: array_extract_units(value) for name, value in obj.coords.items()
+        }
+
+        units = {**vars_units, **coords_units}
+    elif isinstance(obj, xr.DataArray):
+        vars_units = {obj.name: array_extract_units(obj)}
+        coords_units = {
+            name: array_extract_units(value) for name, value in obj.coords.items()
+        }
+
+        units = {**vars_units, **coords_units}
+    elif isinstance(obj, Quantity):
+        vars_units = {"<array>": array_extract_units(obj)}
+
+        units = {**vars_units}
+    else:
+        units = {}
+
+    return units
+
+
+def strip_units(obj):
+    if isinstance(obj, xr.Dataset):
+        data_vars = {name: strip_units(value) for name, value in obj.data_vars.items()}
+        coords = {name: strip_units(value) for name, value in obj.coords.items()}
+
+        new_obj = xr.Dataset(data_vars=data_vars, coords=coords)
+    elif isinstance(obj, xr.DataArray):
+        data = array_strip_units(obj.data)
+        coords = {
+            name: (
+                (value.dims, array_strip_units(value.data))
+                if isinstance(value.data, Quantity)
+                else value  # to preserve multiindexes
+            )
+            for name, value in obj.coords.items()
+        }
+
+        new_obj = xr.DataArray(name=obj.name, data=data, coords=coords, dims=obj.dims)
+    elif hasattr(obj, "magnitude"):
+        new_obj = obj.magnitude
+    else:
+        new_obj = obj
+
+    return new_obj
+
+
+def attach_units(obj, units):
+    if not isinstance(obj, (xr.DataArray, xr.Dataset)):
+        return array_attach_units(obj, units.get("data", 1))
+
+    if isinstance(obj, xr.Dataset):
+        data_vars = {
+            name: attach_units(value, units) for name, value in obj.data_vars.items()
+        }
+
+        coords = {
+            name: attach_units(value, units) for name, value in obj.coords.items()
+        }
+
+        new_obj = xr.Dataset(data_vars=data_vars, coords=coords, attrs=obj.attrs)
+    else:
+        # try the array name, "data" and None, then fall back to dimensionless
+        data_units = (
+            units.get(obj.name, None)
+            or units.get("data", None)
+            or units.get(None, None)
+            or 1
+        )
+
+        data = array_attach_units(obj.data, data_units)
+
+        coords = {
+            name: (
+                (value.dims, array_attach_units(value.data, units.get(name) or 1))
+                if name in units
+                # to preserve multiindexes
+                else value
+            )
+            for name, value in obj.coords.items()
+        }
+        dims = obj.dims
+        attrs = obj.attrs
+
+        new_obj = xr.DataArray(
+            name=obj.name, data=data, coords=coords, attrs=attrs, dims=dims
+        )
+
+    return new_obj
+
+
+def assert_equal_with_units(a, b):
+    # works like xr.testing.assert_equal, but also explicitly checks units
+    # so, it is more like assert_identical
+    __tracebackhide__ = True
+
+    if isinstance(a, xr.Dataset) or isinstance(b, xr.Dataset):
+        a_units = extract_units(a)
+        b_units = extract_units(b)
+
+        a_without_units = strip_units(a)
+        b_without_units = strip_units(b)
+
+        assert a_without_units.equals(b_without_units), formatting.diff_dataset_repr(
+            a, b, "equals"
+        )
+        assert a_units == b_units
+    else:
+        a = a if not isinstance(a, (xr.DataArray, xr.Variable)) else a.data
+        b = b if not isinstance(b, (xr.DataArray, xr.Variable)) else b.data
+
+        assert type(a) == type(b) or (
+            isinstance(a, Quantity) and isinstance(b, Quantity)
+        )
+
+        # workaround until pint implements allclose in __array_function__
+        if isinstance(a, Quantity) or isinstance(b, Quantity):
+            assert (
+                hasattr(a, "magnitude") and hasattr(b, "magnitude")
+            ) and np.allclose(a.magnitude, b.magnitude, equal_nan=True)
+            assert (hasattr(a, "units") and hasattr(b, "units")) and a.units == b.units
+        else:
+            assert np.allclose(a, b, equal_nan=True)
+
+
+@pytest.fixture(params=[float, int])
+def dtype(request):
+    return request.param
+
+
+class method:
+    def __init__(self, name, *args, **kwargs):
+        self.name = name
+        self.args = args
+        self.kwargs = kwargs
+
+    def __call__(self, obj, *args, **kwargs):
+        from collections.abc import Callable
+        from functools import partial
+
+        all_args = list(self.args) + list(args)
+        all_kwargs = {**self.kwargs, **kwargs}
+
+        func = getattr(obj, self.name, None)
+        if func is None or not isinstance(func, Callable):
+            # fall back to module level numpy functions if not a xarray object
+            if not isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)):
+                numpy_func = getattr(np, self.name)
+                func = partial(numpy_func, obj)
+                # remove typical xr args like "dim"
+                exclude_kwargs = ("dim", "dims")
+                all_kwargs = {
+                    key: value
+                    for key, value in all_kwargs.items()
+                    if key not in exclude_kwargs
+                }
+            else:
+                raise AttributeError(f"{obj} has no method named '{self.name}'")
+
+        return func(*all_args, **all_kwargs)
+
+    def __repr__(self):
+        return f"method_{self.name}"
+
+
+class function:
+    def __init__(self, name):
+        self.name = name
+        self.func = getattr(np, name)
+
+    def __call__(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+    def __repr__(self):
+        return f"function_{self.name}"
+
+
+@pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like))
+def test_replication(func, dtype):
+    array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s
+    data_array = xr.DataArray(data=array, dims="x")
+
+    numpy_func = getattr(np, func.__name__)
+    expected = xr.DataArray(data=numpy_func(array), dims="x")
+    result = func(data_array)
+
+    assert_equal_with_units(expected, result)
+
+
+@pytest.mark.xfail(
+    reason="np.full_like on Variable strips the unit and pint does not allow mixed args"
+)
+@pytest.mark.parametrize(
+    "unit,error",
+    (
+        pytest.param(1, DimensionalityError, id="no_unit"),
+        pytest.param(
+            unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+        ),
+        pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"),
+        pytest.param(unit_registry.ms, None, id="compatible_unit"),
+        pytest.param(unit_registry.s, None, id="identical_unit"),
+    ),
+)
+def test_replication_full_like(unit, error, dtype):
+    array = np.linspace(0, 5, 10) * unit_registry.s
+    data_array = xr.DataArray(data=array, dims="x")
+
+    fill_value = -1 * unit
+    if error is not None:
+        with pytest.raises(error):
+            xr.full_like(data_array, fill_value=fill_value)
+    else:
+        result = xr.full_like(data_array, fill_value=fill_value)
+        expected = np.full_like(array, fill_value=fill_value)
+
+        assert_equal_with_units(expected, result)
+
+
+class TestDataArray:
+    @pytest.mark.filterwarnings("error:::pint[.*]")
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            pytest.param(
+                "with_dims",
+                marks=pytest.mark.xfail(reason="units in indexes are not supported"),
+            ),
+            pytest.param("with_coords"),
+            pytest.param("without_coords"),
+        ),
+    )
+    def test_init(self, variant, dtype):
+        array = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.m
+
+        x = np.arange(len(array)) * unit_registry.s
+        y = x.to(unit_registry.ms)
+
+        variants = {
+            "with_dims": {"x": x},
+            "with_coords": {"y": ("x", y)},
+            "without_coords": {},
+        }
+
+        kwargs = {"data": array, "dims": "x", "coords": variants.get(variant)}
+        data_array = xr.DataArray(**kwargs)
+
+        assert isinstance(data_array.data, Quantity)
+        assert all(
+            {
+                name: isinstance(coord.data, Quantity)
+                for name, coord in data_array.coords.items()
+            }.values()
+        )
+
+    @pytest.mark.filterwarnings("error:::pint[.*]")
+    @pytest.mark.parametrize(
+        "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr"))
+    )
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            pytest.param(
+                "with_dims",
+                marks=pytest.mark.xfail(reason="units in indexes are not supported"),
+            ),
+            pytest.param("with_coords"),
+            pytest.param("without_coords"),
+        ),
+    )
+    def test_repr(self, func, variant, dtype):
+        array = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.m
+        x = np.arange(len(array)) * unit_registry.s
+        y = x.to(unit_registry.ms)
+
+        variants = {
+            "with_dims": {"x": x},
+            "with_coords": {"y": ("x", y)},
+            "without_coords": {},
+        }
+
+        kwargs = {"data": array, "dims": "x", "coords": variants.get(variant)}
+        data_array = xr.DataArray(**kwargs)
+
+        # FIXME: this just checks that the repr does not raise
+        # warnings or errors, but does not check the result
+        func(data_array)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(
+                function("all"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                function("any"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                function("argmax"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantity with ndarrays in nanops not implemented"
+                ),
+            ),
+            pytest.param(
+                function("argmin"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantity with ndarrays in nanops not implemented"
+                ),
+            ),
+            function("max"),
+            function("mean"),
+            pytest.param(
+                function("median"),
+                marks=pytest.mark.xfail(
+                    reason="np.median on DataArray strips the units"
+                ),
+            ),
+            function("min"),
+            pytest.param(
+                function("prod"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                function("sum"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantity with ndarrays in nanops not implemented"
+                ),
+            ),
+            function("std"),
+            function("var"),
+            function("cumsum"),
+            pytest.param(
+                function("cumprod"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                method("all"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                method("any"),
+                marks=pytest.mark.xfail(reason="not implemented by pint yet"),
+            ),
+            pytest.param(
+                method("argmax"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantities with ndarrays in nanops not implemented"
+                ),
+            ),
+            pytest.param(
+                method("argmin"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantities with ndarrays in nanops not implemented"
+                ),
+            ),
+            method("max"),
+            method("mean"),
+            method("median"),
+            method("min"),
+            pytest.param(
+                method("prod"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantity with ndarrays in nanops not implemented"
+                ),
+            ),
+            pytest.param(
+                method("sum"),
+                marks=pytest.mark.xfail(
+                    reason="comparison of quantity with ndarrays in nanops not implemented"
+                ),
+            ),
+            method("std"),
+            method("var"),
+            method("cumsum"),
+            pytest.param(
+                method("cumprod"),
+                marks=pytest.mark.xfail(reason="pint does not implement cumprod yet"),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_aggregation(self, func, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.m
+        data_array = xr.DataArray(data=array)
+
+        expected = xr.DataArray(data=func(array))
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(operator.neg, id="negate"),
+            pytest.param(abs, id="absolute"),
+            pytest.param(
+                np.round,
+                id="round",
+                marks=pytest.mark.xfail(reason="pint does not implement round"),
+            ),
+        ),
+    )
+    def test_unary_operations(self, func, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.m
+        data_array = xr.DataArray(data=array)
+
+        expected = xr.DataArray(data=func(array))
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(lambda x: 2 * x, id="multiply"),
+            pytest.param(lambda x: x + x, id="add"),
+            pytest.param(lambda x: x[0] + x, id="add scalar"),
+            pytest.param(
+                lambda x: x.T @ x,
+                id="matrix multiply",
+                marks=pytest.mark.xfail(
+                    reason="pint does not support matrix multiplication yet"
+                ),
+            ),
+        ),
+    )
+    def test_binary_operations(self, func, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.m
+        data_array = xr.DataArray(data=array)
+
+        expected = xr.DataArray(data=func(array))
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "comparison",
+        (
+            pytest.param(operator.lt, id="less_than"),
+            pytest.param(operator.ge, id="greater_equal"),
+            pytest.param(operator.eq, id="equal"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, ValueError, id="without_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incorrect_unit"),
+            pytest.param(unit_registry.m, None, id="correct_unit"),
+        ),
+    )
+    def test_comparison_operations(self, comparison, unit, error, dtype):
+        array = (
+            np.array([10.1, 5.2, 6.5, 8.0, 21.3, 7.1, 1.3]).astype(dtype)
+            * unit_registry.m
+        )
+        data_array = xr.DataArray(data=array)
+
+        value = 8
+        to_compare_with = value * unit
+
+        # incompatible units are all not equal
+        if error is not None and comparison is not operator.eq:
+            with pytest.raises(error):
+                comparison(array, to_compare_with)
+
+            with pytest.raises(error):
+                comparison(data_array, to_compare_with)
+        else:
+            result = comparison(data_array, to_compare_with)
+            # pint compares incompatible arrays to False, so we need to extend
+            # the multiplication works for both scalar and array results
+            expected = xr.DataArray(
+                data=comparison(array, to_compare_with)
+                * np.ones_like(array, dtype=bool)
+            )
+
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "units,error",
+        (
+            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
+            pytest.param(unit_registry.m, DimensionalityError, id="incorrect unit"),
+            pytest.param(unit_registry.degree, None, id="correct unit"),
+        ),
+    )
+    def test_univariate_ufunc(self, units, error, dtype):
+        array = np.arange(10).astype(dtype) * units
+        data_array = xr.DataArray(data=array)
+
+        if error is not None:
+            with pytest.raises(error):
+                np.sin(data_array)
+        else:
+            expected = xr.DataArray(data=np.sin(array))
+            result = np.sin(data_array)
+
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="pint's implementation of `np.maximum` strips units")
+    def test_bivariate_ufunc(self, dtype):
+        unit = unit_registry.m
+        array = np.arange(10).astype(dtype) * unit
+        data_array = xr.DataArray(data=array)
+
+        expected = xr.DataArray(np.maximum(array, 0 * unit))
+
+        assert_equal_with_units(expected, np.maximum(data_array, 0 * unit))
+        assert_equal_with_units(expected, np.maximum(0 * unit, data_array))
+
+    @pytest.mark.parametrize("property", ("T", "imag", "real"))
+    def test_numpy_properties(self, property, dtype):
+        array = (
+            np.arange(5 * 10).astype(dtype)
+            + 1j * np.linspace(-1, 0, 5 * 10).astype(dtype)
+        ).reshape(5, 10) * unit_registry.s
+        data_array = xr.DataArray(data=array, dims=("x", "y"))
+
+        expected = xr.DataArray(
+            data=getattr(array, property),
+            dims=("x", "y")[:: 1 if property != "T" else -1],
+        )
+        result = getattr(data_array, property)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("conj"),
+            method("argsort"),
+            method("conjugate"),
+            method("round"),
+            pytest.param(
+                method("rank", dim="x"),
+                marks=pytest.mark.xfail(reason="pint does not implement rank yet"),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_numpy_methods(self, func, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.m
+        data_array = xr.DataArray(data=array, dims="x")
+
+        expected = xr.DataArray(func(array), dims="x")
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func", (method("clip", min=3, max=8), method("searchsorted", v=5)), ids=repr
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_numpy_methods_with_args(self, func, unit, error, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.m
+        data_array = xr.DataArray(data=array)
+
+        scalar_types = (int, float)
+        kwargs = {
+            key: (value * unit if isinstance(value, scalar_types) else value)
+            for key, value in func.kwargs.items()
+        }
+
+        if error is not None:
+            with pytest.raises(error):
+                func(data_array, **kwargs)
+        else:
+            expected = func(array, **kwargs)
+            if func.name not in ["searchsorted"]:
+                expected = xr.DataArray(data=expected)
+            result = func(data_array, **kwargs)
+
+            if func.name in ["searchsorted"]:
+                assert np.allclose(expected, result)
+            else:
+                assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func", (method("isnull"), method("notnull"), method("count")), ids=repr
+    )
+    def test_missing_value_detection(self, func, dtype):
+        array = (
+            np.array(
+                [
+                    [1.4, 2.3, np.nan, 7.2],
+                    [np.nan, 9.7, np.nan, np.nan],
+                    [2.1, np.nan, np.nan, 4.6],
+                    [9.9, np.nan, 7.2, 9.1],
+                ]
+            )
+            * unit_registry.degK
+        )
+        x = np.arange(array.shape[0]) * unit_registry.m
+        y = np.arange(array.shape[1]) * unit_registry.m
+
+        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
+
+        expected = func(strip_units(data_array))
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="ffill and bfill lose units in data")
+    @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr)
+    def test_missing_value_filling(self, func, dtype):
+        array = (
+            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
+            * unit_registry.degK
+        )
+        x = np.arange(len(array))
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+
+        result_without_units = func(strip_units(data_array), dim="x")
+        result = xr.DataArray(
+            data=result_without_units.data * unit_registry.degK,
+            coords={"x": x},
+            dims=["x"],
+        )
+
+        expected = attach_units(
+            func(strip_units(data_array), dim="x"), {"data": unit_registry.degK}
+        )
+        result = func(data_array, dim="x")
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="fillna drops the unit")
+    @pytest.mark.parametrize(
+        "fill_value",
+        (
+            pytest.param(
+                -1,
+                id="python scalar",
+                marks=pytest.mark.xfail(
+                    reason="python scalar cannot be converted using astype()"
+                ),
+            ),
+            pytest.param(np.array(-1), id="numpy scalar"),
+            pytest.param(np.array([-1]), id="numpy array"),
+        ),
+    )
+    def test_fillna(self, fill_value, dtype):
+        unit = unit_registry.m
+        array = np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) * unit
+        data_array = xr.DataArray(data=array)
+
+        expected = attach_units(
+            strip_units(data_array).fillna(value=fill_value), {"data": unit}
+        )
+        result = data_array.fillna(value=fill_value * unit)
+
+        assert_equal_with_units(expected, result)
+
+    def test_dropna(self, dtype):
+        array = (
+            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
+            * unit_registry.m
+        )
+        x = np.arange(len(array))
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+
+        expected = attach_units(
+            strip_units(data_array).dropna(dim="x"), {"data": unit_registry.m}
+        )
+        result = data_array.dropna(dim="x")
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="pint does not implement `numpy.isin`")
+    @pytest.mark.parametrize(
+        "unit",
+        (
+            pytest.param(1, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, id="dimensionless"),
+            pytest.param(unit_registry.s, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, id="compatible_unit"),
+            pytest.param(unit_registry.m, id="same_unit"),
+        ),
+    )
+    def test_isin(self, unit, dtype):
+        array = (
+            np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype)
+            * unit_registry.m
+        )
+        data_array = xr.DataArray(data=array, dims="x")
+
+        raw_values = np.array([1.4, np.nan, 2.3]).astype(dtype)
+        values = raw_values * unit
+
+        result_without_units = strip_units(data_array).isin(raw_values)
+        if unit != unit_registry.m:
+            result_without_units[:] = False
+        result_with_units = data_array.isin(values)
+
+        assert_equal_with_units(result_without_units, result_with_units)
+
+    @pytest.mark.parametrize(
+        "variant",
+        (
+            pytest.param(
+                "masking",
+                marks=pytest.mark.xfail(reason="nan not compatible with quantity"),
+            ),
+            pytest.param(
+                "replacing_scalar",
+                marks=pytest.mark.xfail(reason="scalar not convertible using astype"),
+            ),
+            pytest.param(
+                "replacing_array",
+                marks=pytest.mark.xfail(
+                    reason="replacing using an array drops the units"
+                ),
+            ),
+            pytest.param(
+                "dropping",
+                marks=pytest.mark.xfail(reason="nan not compatible with quantity"),
+            ),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="same_unit"),
+        ),
+    )
+    def test_where(self, variant, unit, error, dtype):
+        def _strip_units(mapping):
+            return {key: array_strip_units(value) for key, value in mapping.items()}
+
+        original_unit = unit_registry.m
+        array = np.linspace(0, 1, 10).astype(dtype) * original_unit
+
+        data_array = xr.DataArray(data=array)
+
+        condition = data_array < 0.5 * original_unit
+        other = np.linspace(-2, -1, 10).astype(dtype) * unit
+        variant_kwargs = {
+            "masking": {"cond": condition},
+            "replacing_scalar": {"cond": condition, "other": -1 * unit},
+            "replacing_array": {"cond": condition, "other": other},
+            "dropping": {"cond": condition, "drop": True},
+        }
+        kwargs = variant_kwargs.get(variant)
+        kwargs_without_units = _strip_units(kwargs)
+
+        if variant not in ("masking", "dropping") and error is not None:
+            with pytest.raises(error):
+                data_array.where(**kwargs)
+        else:
+            expected = attach_units(
+                strip_units(array).where(**kwargs_without_units),
+                {"data": original_unit},
+            )
+            result = data_array.where(**kwargs)
+
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="interpolate strips units")
+    def test_interpolate_na(self, dtype):
+        array = (
+            np.array([-1.03, 0.1, 1.4, np.nan, 2.3, np.nan, np.nan, 9.1])
+            * unit_registry.m
+        )
+        x = np.arange(len(array))
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype)
+
+        expected = attach_units(
+            strip_units(data_array).interpolate_na(dim="x"), {"data": unit_registry.m}
+        )
+        result = data_array.interpolate_na(dim="x")
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="uses DataArray.where, which currently fails")
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_combine_first(self, unit, error, dtype):
+        array = np.zeros(shape=(2, 2), dtype=dtype) * unit_registry.m
+        other_array = np.ones_like(array) * unit
+
+        data_array = xr.DataArray(
+            data=array, coords={"x": ["a", "b"], "y": [-1, 0]}, dims=["x", "y"]
+        )
+        other = xr.DataArray(
+            data=other_array, coords={"x": ["b", "c"], "y": [0, 1]}, dims=["x", "y"]
+        )
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.combine_first(other)
+        else:
+            expected = attach_units(
+                strip_units(data_array).combine_first(strip_units(other)),
+                {"data": unit_registry.m},
+            )
+            result = data_array.combine_first(other)
+
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "unit",
+        (
+            pytest.param(1, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, id="dimensionless"),
+            pytest.param(unit_registry.s, id="incompatible_unit"),
+            pytest.param(
+                unit_registry.cm,
+                id="compatible_unit",
+                marks=pytest.mark.xfail(reason="identical does not check units yet"),
+            ),
+            pytest.param(unit_registry.m, id="identical_unit"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "variation",
+        (
+            "data",
+            pytest.param(
+                "dims", marks=pytest.mark.xfail(reason="units in indexes not supported")
+            ),
+            "coords",
+        ),
+    )
+    @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr)
+    def test_comparisons(self, func, variation, unit, dtype):
+        data = np.linspace(0, 5, 10).astype(dtype)
+        coord = np.arange(len(data)).astype(dtype)
+
+        base_unit = unit_registry.m
+        quantity = data * base_unit
+        x = coord * base_unit
+        y = coord * base_unit
+
+        units = {
+            "data": (unit, base_unit, base_unit),
+            "dims": (base_unit, unit, base_unit),
+            "coords": (base_unit, base_unit, unit),
+        }
+        data_unit, dim_unit, coord_unit = units.get(variation)
+
+        data_array = xr.DataArray(
+            data=quantity, coords={"x": x, "y": ("x", y)}, dims="x"
+        )
+
+        other = attach_units(
+            strip_units(data_array),
+            {
+                None: (data_unit, base_unit if quantity.check(data_unit) else None),
+                "x": (dim_unit, base_unit if x.check(dim_unit) else None),
+                "y": (coord_unit, base_unit if y.check(coord_unit) else None),
+            },
+        )
+
+        # TODO: test dim coord once indexes leave units intact
+        # also, express this in terms of calls on the raw data array
+        # and then check the units
+        equal_arrays = (
+            np.all(quantity == other.data)
+            and (np.all(x == other.x.data) or True)  # dims can't be checked yet
+            and np.all(y == other.y.data)
+        )
+        equal_units = (
+            data_unit == unit_registry.m
+            and coord_unit == unit_registry.m
+            and dim_unit == unit_registry.m
+        )
+        expected = equal_arrays and (func.name != "identical" or equal_units)
+        result = func(data_array, other)
+
+        assert expected == result
+
+    @pytest.mark.parametrize(
+        "unit",
+        (
+            pytest.param(1, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, id="dimensionless"),
+            pytest.param(unit_registry.s, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, id="compatible_unit"),
+            pytest.param(unit_registry.m, id="identical_unit"),
+        ),
+    )
+    def test_broadcast_equals(self, unit, dtype):
+        left_array = np.ones(shape=(2, 2), dtype=dtype) * unit_registry.m
+        right_array = array_attach_units(
+            np.ones(shape=(2,), dtype=dtype),
+            unit,
+            convert_from=unit_registry.m if left_array.check(unit) else None,
+        )
+
+        left = xr.DataArray(data=left_array, dims=("x", "y"))
+        right = xr.DataArray(data=right_array, dims="x")
+
+        expected = np.all(left_array == right_array[:, None])
+        result = left.broadcast_equals(right)
+
+        assert expected == result
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("pipe", lambda da: da * 10),
+            method("assign_coords", y2=("y", np.arange(10) * unit_registry.mm)),
+            method("assign_attrs", attr1="value"),
+            method("rename", x2="x_mm"),
+            method("swap_dims", {"x": "x2"}),
+            method(
+                "expand_dims",
+                dim={"z": np.linspace(10, 20, 12) * unit_registry.s},
+                axis=1,
+            ),
+            method("drop", labels="x"),
+            method("reset_coords", names="x2"),
+            method("copy"),
+            pytest.param(
+                method("astype", np.float32),
+                marks=pytest.mark.xfail(reason="units get stripped"),
+            ),
+            pytest.param(
+                method("item", 1), marks=pytest.mark.xfail(reason="units get stripped")
+            ),
+        ),
+        ids=repr,
+    )
+    def test_content_manipulation(self, func, dtype):
+        quantity = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
+            * unit_registry.pascal
+        )
+        x = np.arange(quantity.shape[0]) * unit_registry.m
+        y = np.arange(quantity.shape[1]) * unit_registry.m
+        x2 = x.to(unit_registry.mm)
+
+        data_array = xr.DataArray(
+            name="data",
+            data=quantity,
+            coords={"x": x, "x2": ("x", x2), "y": y},
+            dims=("x", "y"),
+        )
+
+        stripped_kwargs = {
+            key: array_strip_units(value) for key, value in func.kwargs.items()
+        }
+        expected = attach_units(
+            func(strip_units(data_array), **stripped_kwargs),
+            {
+                "data": quantity.units,
+                "x": x.units,
+                "x_mm": x2.units,
+                "x2": x2.units,
+                "y": y.units,
+            },
+        )
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(
+                method("drop", labels=np.array([1, 5]), dim="x"),
+                marks=pytest.mark.xfail(
+                    reason="selecting using incompatible units does not raise"
+                ),
+            ),
+            pytest.param(method("copy", data=np.arange(20))),
+        ),
+        ids=repr,
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, DimensionalityError, id="no_unit"),
+            pytest.param(
+                unit_registry.dimensionless, DimensionalityError, id="dimensionless"
+            ),
+            pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, KeyError, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_content_manipulation_with_units(self, func, unit, error, dtype):
+        quantity = np.linspace(0, 10, 20, dtype=dtype) * unit_registry.pascal
+        x = np.arange(len(quantity)) * unit_registry.m
+
+        data_array = xr.DataArray(name="data", data=quantity, coords={"x": x}, dims="x")
+
+        kwargs = {
+            key: (value * unit if isinstance(value, np.ndarray) else value)
+            for key, value in func.kwargs.items()
+        }
+        stripped_kwargs = func.kwargs
+
+        expected = attach_units(
+            func(strip_units(data_array), **stripped_kwargs),
+            {"data": quantity.units if func.name == "drop" else unit, "x": x.units},
+        )
+        if error is not None and func.name == "drop":
+            with pytest.raises(error):
+                func(data_array, **kwargs)
+        else:
+            result = func(data_array, **kwargs)
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "indices",
+        (
+            pytest.param(4, id="single index"),
+            pytest.param([5, 2, 9, 1], id="multiple indices"),
+        ),
+    )
+    def test_isel(self, indices, dtype):
+        array = np.arange(10).astype(dtype) * unit_registry.s
+        x = np.arange(len(array)) * unit_registry.m
+
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+
+        expected = attach_units(
+            strip_units(data_array).isel(x=indices),
+            {"data": unit_registry.s, "x": unit_registry.m},
+        )
+        result = data_array.isel(x=indices)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(
+        reason="xarray does not support duck arrays in dimension coordinates"
+    )
+    @pytest.mark.parametrize(
+        "values",
+        (
+            pytest.param(12, id="single value"),
+            pytest.param([10, 5, 13], id="list of multiple values"),
+            pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "units,error",
+        (
+            pytest.param(1, KeyError, id="no units"),
+            pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
+            pytest.param(unit_registry.degree, KeyError, id="incorrect unit"),
+            pytest.param(unit_registry.s, None, id="correct unit"),
+        ),
+    )
+    def test_sel(self, values, units, error, dtype):
+        array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m
+        x = np.arange(len(array)) * unit_registry.s
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+
+        values_with_units = values * units
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.sel(x=values_with_units)
+        else:
+            result_array = array[values]
+            result_data_array = data_array.sel(x=values_with_units)
+            assert_equal_with_units(result_array, result_data_array)
+
+    @pytest.mark.xfail(
+        reason="xarray does not support duck arrays in dimension coordinates"
+    )
+    @pytest.mark.parametrize(
+        "values",
+        (
+            pytest.param(12, id="single value"),
+            pytest.param([10, 5, 13], id="list of multiple values"),
+            pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"),
+        ),
+    )
+    @pytest.mark.parametrize(
+        "units,error",
+        (
+            pytest.param(1, KeyError, id="no units"),
+            pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"),
+            pytest.param(unit_registry.degree, KeyError, id="incorrect unit"),
+            pytest.param(unit_registry.s, None, id="correct unit"),
+        ),
+    )
+    def test_loc(self, values, units, error, dtype):
+        array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m
+        x = np.arange(len(array)) * unit_registry.s
+        data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"])
+
+        values_with_units = values * units
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.loc[values_with_units]
+        else:
+            result_array = array[values]
+            result_data_array = data_array.loc[values_with_units]
+            assert_equal_with_units(result_array, result_data_array)
+
+    @pytest.mark.xfail(reason="tries to coerce using asarray")
+    @pytest.mark.parametrize(
+        "shape",
+        (
+            pytest.param((10, 20), id="nothing squeezable"),
+            pytest.param((10, 20, 1), id="last dimension squeezable"),
+            pytest.param((10, 1, 20), id="middle dimension squeezable"),
+            pytest.param((1, 10, 20), id="first dimension squeezable"),
+            pytest.param((1, 10, 1, 20), id="first and last dimension squeezable"),
+        ),
+    )
+    def test_squeeze(self, shape, dtype):
+        names = "xyzt"
+        coords = {
+            name: np.arange(length).astype(dtype)
+            * (unit_registry.m if name != "t" else unit_registry.s)
+            for name, length in zip(names, shape)
+        }
+        array = np.arange(10 * 20).astype(dtype).reshape(shape) * unit_registry.J
+        data_array = xr.DataArray(
+            data=array, coords=coords, dims=tuple(names[: len(shape)])
+        )
+
+        result_array = array.squeeze()
+        result_data_array = data_array.squeeze()
+        assert_equal_with_units(result_array, result_data_array)
+
+        # try squeezing the dimensions separately
+        names = tuple(dim for dim, coord in coords.items() if len(coord) == 1)
+        for index, name in enumerate(names):
+            assert_equal_with_units(
+                np.squeeze(array, axis=index), data_array.squeeze(dim=name)
+            )
+
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, None, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
+            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_interp(self, unit, error):
+        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+        new_coords = (np.arange(10) + 0.5) * unit
+        coords = {
+            "x": np.arange(10) * unit_registry.m,
+            "y": np.arange(5) * unit_registry.m,
+        }
+
+        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.interp(x=new_coords)
+        else:
+            new_coords_ = (
+                new_coords.magnitude if hasattr(new_coords, "magnitude") else new_coords
+            )
+            result_array = strip_units(data_array).interp(
+                x=new_coords_ * unit_registry.degK
+            )
+            result_data_array = data_array.interp(x=new_coords)
+
+            assert_equal_with_units(result_array, result_data_array)
+
+    @pytest.mark.xfail(reason="tries to coerce using asarray")
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, None, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
+            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_interp_like(self, unit, error):
+        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+        coords = {
+            "x": (np.arange(10) + 0.3) * unit_registry.m,
+            "y": (np.arange(5) + 0.3) * unit_registry.m,
+        }
+
+        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+        new_data_array = xr.DataArray(
+            data=np.empty((20, 10)),
+            coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
+            dims=("x", "y"),
+        )
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.interp_like(new_data_array)
+        else:
+            result_array = (
+                xr.DataArray(
+                    data=array.magnitude,
+                    coords={name: value.magnitude for name, value in coords.items()},
+                    dims=("x", "y"),
+                ).interp_like(strip_units(new_data_array))
+                * unit_registry.degK
+            )
+            result_data_array = data_array.interp_like(new_data_array)
+
+            assert_equal_with_units(result_array, result_data_array)
+
+    @pytest.mark.xfail(
+        reason="pint does not implement np.result_type in __array_function__ yet"
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, None, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
+            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_reindex(self, unit, error):
+        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+        new_coords = (np.arange(10) + 0.5) * unit
+        coords = {
+            "x": np.arange(10) * unit_registry.m,
+            "y": np.arange(5) * unit_registry.m,
+        }
+
+        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.interp(x=new_coords)
+        else:
+            result_array = strip_units(data_array).reindex(
+                x=(
+                    new_coords.magnitude
+                    if hasattr(new_coords, "magnitude")
+                    else new_coords
+                )
+                * unit_registry.degK
+            )
+            result_data_array = data_array.reindex(x=new_coords)
+
+            assert_equal_with_units(result_array, result_data_array)
+
+    @pytest.mark.xfail(
+        reason="pint does not implement np.result_type in __array_function__ yet"
+    )
+    @pytest.mark.parametrize(
+        "unit,error",
+        (
+            pytest.param(1, None, id="no_unit"),
+            pytest.param(unit_registry.dimensionless, None, id="dimensionless"),
+            pytest.param(unit_registry.s, None, id="incompatible_unit"),
+            pytest.param(unit_registry.cm, None, id="compatible_unit"),
+            pytest.param(unit_registry.m, None, id="identical_unit"),
+        ),
+    )
+    def test_reindex_like(self, unit, error):
+        array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK
+        coords = {
+            "x": (np.arange(10) + 0.3) * unit_registry.m,
+            "y": (np.arange(5) + 0.3) * unit_registry.m,
+        }
+
+        data_array = xr.DataArray(array, coords=coords, dims=("x", "y"))
+        new_data_array = xr.DataArray(
+            data=np.empty((20, 10)),
+            coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit},
+            dims=("x", "y"),
+        )
+
+        if error is not None:
+            with pytest.raises(error):
+                data_array.reindex_like(new_data_array)
+        else:
+            expected = attach_units(
+                strip_units(data_array).reindex_like(strip_units(new_data_array)),
+                {
+                    "data": unit_registry.degK,
+                    "x": unit_registry.m,
+                    "y": unit_registry.m,
+                },
+            )
+            result = data_array.reindex_like(new_data_array)
+
+            assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (method("unstack"), method("reset_index", "z"), method("reorder_levels")),
+        ids=repr,
+    )
+    def test_stacking_stacked(self, func, dtype):
+        array = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        )
+        x = np.arange(array.shape[0])
+        y = np.arange(array.shape[1])
+
+        data_array = xr.DataArray(
+            name="data", data=array, coords={"x": x, "y": y}, dims=("x", "y")
+        )
+        stacked = data_array.stack(z=("x", "y"))
+
+        expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m})
+        result = func(stacked)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="indexes strip the label units")
+    def test_to_unstacked_dataset(self, dtype):
+        array = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype)
+            * unit_registry.pascal
+        )
+        x = np.arange(array.shape[0]) * unit_registry.m
+        y = np.arange(array.shape[1]) * unit_registry.s
+
+        data_array = xr.DataArray(
+            data=array, coords={"x": x, "y": y}, dims=("x", "y")
+        ).stack(z=("x", "y"))
+
+        func = method("to_unstacked_dataset", dim="z")
+
+        expected = attach_units(
+            func(strip_units(data_array)),
+            {"y": y.units, **dict(zip(x.magnitude, [array.units] * len(y)))},
+        ).rename({elem.magnitude: elem for elem in x})
+        result = func(data_array)
+
+        print(data_array, expected, result, sep="\n")
+
+        assert_equal_with_units(expected, result)
+
+        assert False
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("transpose", "y", "x", "z"),
+            method("stack", a=("x", "y")),
+            method("set_index", x="x2"),
+            pytest.param(
+                method("shift", x=2), marks=pytest.mark.xfail(reason="strips units")
+            ),
+            pytest.param(
+                method("roll", x=2, roll_coords=False),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+            method("sortby", "x2"),
+        ),
+        ids=repr,
+    )
+    def test_stacking_reordering(self, func, dtype):
+        array = (
+            np.linspace(0, 10, 2 * 5 * 10).reshape(2, 5, 10).astype(dtype)
+            * unit_registry.m
+        )
+        x = np.arange(array.shape[0])
+        y = np.arange(array.shape[1])
+        z = np.arange(array.shape[2])
+        x2 = np.linspace(0, 1, array.shape[0])[::-1]
+
+        data_array = xr.DataArray(
+            name="data",
+            data=array,
+            coords={"x": x, "y": y, "z": z, "x2": ("x", x2)},
+            dims=("x", "y", "z"),
+        )
+
+        expected = attach_units(
+            func(strip_units(data_array)), {"data": unit_registry.m}
+        )
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            method("diff", dim="x"),
+            method("differentiate", coord="x"),
+            method("integrate", dim="x"),
+            pytest.param(
+                method("quantile", q=[0.25, 0.75]),
+                marks=pytest.mark.xfail(
+                    reason="pint does not implement nanpercentile yet"
+                ),
+            ),
+            pytest.param(
+                method("reduce", func=np.sum, dim="x"),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+            pytest.param(
+                lambda x: x.dot(x),
+                id="method_dot",
+                marks=pytest.mark.xfail(reason="pint does not implement einsum"),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_computation(self, func, dtype):
+        array = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        )
+
+        x = np.arange(array.shape[0]) * unit_registry.m
+        y = np.arange(array.shape[1]) * unit_registry.s
+
+        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
+        units = extract_units(data_array)
+
+        expected = attach_units(func(strip_units(data_array)), units)
+        result = func(data_array)
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(
+                method("groupby", "y"), marks=pytest.mark.xfail(reason="strips units")
+            ),
+            pytest.param(
+                method("groupby_bins", "y", bins=4),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+            method("coarsen", y=2),
+            pytest.param(
+                method("rolling", y=3), marks=pytest.mark.xfail(reason="strips units")
+            ),
+            pytest.param(
+                method("rolling_exp", y=3),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_computation_objects(self, func, dtype):
+        array = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        )
+
+        x = np.arange(array.shape[0]) * unit_registry.m
+        y = np.arange(array.shape[1]) * 3 * unit_registry.s
+
+        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
+        units = extract_units(data_array)
+
+        expected = attach_units(func(strip_units(data_array)).mean(), units)
+        result = func(data_array).mean()
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.xfail(reason="strips units")
+    def test_resample(self, dtype):
+        array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m
+
+        time = pd.date_range("10-09-2010", periods=len(array), freq="1y")
+        data_array = xr.DataArray(data=array, coords={"time": time}, dims="time")
+        units = extract_units(data_array)
+
+        func = method("resample", time="6m")
+
+        expected = attach_units(func(strip_units(data_array)).mean(), units)
+        result = func(data_array).mean()
+
+        assert_equal_with_units(expected, result)
+
+    @pytest.mark.parametrize(
+        "func",
+        (
+            pytest.param(
+                method("assign_coords", {"z": (["x"], np.arange(5) * unit_registry.s)}),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+            pytest.param(method("first")),
+            pytest.param(method("last")),
+            pytest.param(
+                method("quantile", q=[0.25, 0.5, 0.75], dim="x"),
+                marks=pytest.mark.xfail(reason="strips units"),
+            ),
+        ),
+        ids=repr,
+    )
+    def test_grouped_operations(self, func, dtype):
+        array = (
+            np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m
+        )
+
+        x = np.arange(array.shape[0]) * unit_registry.m
+        y = np.arange(array.shape[1]) * 3 * unit_registry.s
+
+        data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y"))
+        units = extract_units(data_array)
+
+        expected = attach_units(func(strip_units(data_array).groupby("y")), units)
+        result = func(data_array.groupby("y"))
+
+        assert_equal_with_units(expected, result)
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index eb6101fe37d..78723eda013 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -215,7 +215,7 @@ def __hash__(self):
                 return hash(self.item)
 
             def __repr__(self):
-                return "%s(item=%r)" % (type(self).__name__, self.item)
+                return "{}(item={!r})".format(type(self).__name__, self.item)
 
         item = HashableItemWrapper((1, 2, 3))
         x = self.cls("x", [item])
diff --git a/xarray/ufuncs.py b/xarray/ufuncs.py
index 7b9ca1878f7..0f6fc3b1334 100644
--- a/xarray/ufuncs.py
+++ b/xarray/ufuncs.py
@@ -55,7 +55,7 @@ def __call__(self, *args, **kwargs):
         f = _dask_or_eager_func(self._name, array_args=slice(len(args)))
         if len(args) > 2 or len(args) == 0:
             raise TypeError(
-                "cannot handle %s arguments for %r" % (len(args), self._name)
+                "cannot handle {} arguments for {!r}".format(len(args), self._name)
             )
         elif len(args) == 1:
             if isinstance(args[0], _xarray_types):
diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py
index 4ba327913bc..0d6d147f0bb 100755
--- a/xarray/util/print_versions.py
+++ b/xarray/util/print_versions.py
@@ -83,7 +83,7 @@ def show_versions(file=sys.stdout):
     try:
         sys_info.extend(netcdf_and_hdf5_versions())
     except Exception as e:
-        print("Error collecting netcdf / hdf5 version: {}".format(e))
+        print(f"Error collecting netcdf / hdf5 version: {e}")
 
     deps = [
         # (MODULE_NAME, f(mod) -> mod version)
@@ -141,11 +141,11 @@ def show_versions(file=sys.stdout):
     print("------------------", file=file)
 
     for k, stat in sys_info:
-        print("%s: %s" % (k, stat), file=file)
+        print(f"{k}: {stat}", file=file)
 
     print("", file=file)
     for k, stat in deps_blob:
-        print("%s: %s" % (k, stat), file=file)
+        print(f"{k}: {stat}", file=file)
 
 
 if __name__ == "__main__":